From b234c5601dd11a2ed354e1dffb80d12c7c1f02cc Mon Sep 17 00:00:00 2001
From: dantengsky <dantengsky@gmail.com>
Date: Wed, 13 Nov 2024 22:22:49 +0800
Subject: [PATCH] fix: incorrect table data disck cache key

should use `offset` and `len` of column meta as corresponding parts of
table data cache key
---
 .../read/block/block_reader_merge_io_async.rs | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs
index 0f2b2bcf8cb71..71c3552077d98 100644
--- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs
+++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs
@@ -94,12 +94,19 @@ impl BlockReader {
             let table_data_cache = CacheManager::instance().get_table_data_cache();
             // add raw data (compressed raw bytes) to column cache
             for (column_id, (chunk_idx, range)) in &merge_io_result.columns_chunk_offsets {
-                let cache_key = TableDataCacheKey::new(
-                    &merge_io_result.block_path,
-                    *column_id,
-                    range.start as u64,
-                    (range.end - range.start) as u64,
-                );
+                // Safe to unwrap here, since this column has been fetched, its meta must be present.
+                let column_meta = columns_meta.get(column_id).unwrap();
+                let (offset, len) = column_meta.offset_length();
+
+                // Should NOT use `range.start`, `(range.end - range.start)` as parts of the cache key,
+                // as they are not stable and can vary for the same column depending on the query's projection.
+                // For instance:
+                //  - `SELECT col1, col2 FROM t;`
+                //  - `SELECT col2 FROM t;`
+                // may result in different ranges for `col2`
+                // This can lead to cache missing or INCONSISTENCIES
+
+                let cache_key = TableDataCacheKey::new(location, *column_id, offset, len);
                 let chunk_data = merge_io_result
                     .owner_memory
                     .get_chunk(*chunk_idx, &merge_io_result.block_path)?;