From 18fd5dc5c9dcb14610f744ffb3a7e97a84426f22 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 1 Aug 2024 10:41:13 +0200 Subject: [PATCH 1/2] fix incorrectly mapping timestamp types --- src/delta_utils.cpp | 4 +-- test/sql/delta_kernel_rs/timestamp_ntz.test | 29 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 test/sql/delta_kernel_rs/timestamp_ntz.test diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index f6f8b3d..9648481 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -27,8 +27,8 @@ unique_ptr SchemaVisitor::VisitSnapshotSchema(ffi::Sha visitor.visit_boolean = VisitSimpleType(); visitor.visit_binary = VisitSimpleType(); visitor.visit_date = VisitSimpleType(); - visitor.visit_timestamp = VisitSimpleType(); - visitor.visit_timestamp_ntz = VisitSimpleType(); + visitor.visit_timestamp = VisitSimpleType(); + visitor.visit_timestamp_ntz = VisitSimpleType(); uintptr_t result = visit_schema(snapshot, &visitor); return state.TakeFieldList(result); diff --git a/test/sql/delta_kernel_rs/timestamp_ntz.test b/test/sql/delta_kernel_rs/timestamp_ntz.test new file mode 100644 index 0000000..8386ace --- /dev/null +++ b/test/sql/delta_kernel_rs/timestamp_ntz.test @@ -0,0 +1,29 @@ +# name: test/sql/delta_kernel_rs/timestamp_ntz.test +# description: table with timestamp_ntz type (timstamp without timezone) +# group: [delta_kernel_rs] + +require parquet + +require delta + +require-env DELTA_KERNEL_TESTS_PATH + +query III +DESCRIBE FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/data-reader-timestamp_ntz') order by id +---- +id INTEGER YES NULL NULL NULL +tsNtz TIMESTAMP YES NULL NULL NULL +tsNtzPartition TIMESTAMP YES NULL NULL NULL + +query III +FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/data-reader-timestamp_ntz') order by id +---- +0 2021-11-18 02:30:00.123456 2021-11-18 02:30:00.123456 +1 2013-07-05 17:01:00.123456 2021-11-18 02:30:00.123456 +2 NULL 2021-11-18 02:30:00.123456 +3 2021-11-18 02:30:00.123456 2013-07-05 17:01:00.123456 +4 2013-07-05 17:01:00.123456 2013-07-05 17:01:00.123456 +5 NULL 2013-07-05 17:01:00.123456 +6 2021-11-18 02:30:00.123456 NULL +7 2013-07-05 17:01:00.123456 NULL +8 NULL NULL From d320bb2c9915e09bd832c9e3e2e41f296d95323f Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 1 Aug 2024 11:29:34 +0200 Subject: [PATCH 2/2] fix incorrect column mapping for delta constant columns --- src/functions/delta_scan.cpp | 2 +- test/sql/dat/all.test | 29 ++++++++++++++++++--- test/sql/delta_kernel_rs/timestamp_ntz.test | 2 +- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index d4a30fd..28ea597 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -563,7 +563,7 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio auto col_partition_entry = file_metadata->partition_map.find(global_names[col_id]); if (col_partition_entry != file_metadata->partition_map.end()) { // Todo: use https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization - auto maybe_value = Value(col_partition_entry->second).DefaultCastAs(global_types[i]); + auto maybe_value = Value(col_partition_entry->second).DefaultCastAs(global_types[col_id]); reader_data.constant_map.emplace_back(i, maybe_value); } } diff --git a/test/sql/dat/all.test b/test/sql/dat/all.test index fc7c6c5..96f3870 100644 --- a/test/sql/dat/all.test +++ b/test/sql/dat/all.test @@ -12,15 +12,26 @@ require-env DAT_PATH # all_primitive_types query I rowsort all_primitive_types -SELECT * +SELECT * EXCLUDE(timestamp) FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delta') ---- +# TODO: exclude is necessary due to issue with the golden tables, this should be fixed upstream query I rowsort all_primitive_types -SELECT * +SELECT * EXCLUDE(timestamp) FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/expected/latest/**/*.parquet') ---- +query I +SELECT timestamp +FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delta') +---- +1970-01-01 00:00:00+00 +1970-01-01 01:00:00+00 +1970-01-01 02:00:00+00 +1970-01-01 03:00:00+00 +1970-01-01 04:00:00+00 + # nested_types query I rowsort nested_types SELECT * @@ -120,15 +131,25 @@ require notwindows # multi_partitioned_2 query I rowsort multi_partitioned_2 -SELECT * +SELECT * EXCLUDE(time) FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/delta') ---- +# TODO: exclude is necessary due to issue with the golden tables, this should be fixed upstream query I rowsort multi_partitioned_2 -SELECT * +SELECT * EXCLUDE(time) FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/expected/latest/**/*.parquet') ---- +query I +SELECT time +FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/delta') +order by time +---- +1970-01-01 00:00:00+00 +1970-01-01 12:30:00+00 +1970-01-02 08:45:00+00 + # no_replay query I rowsort no_replay SELECT * diff --git a/test/sql/delta_kernel_rs/timestamp_ntz.test b/test/sql/delta_kernel_rs/timestamp_ntz.test index 8386ace..1f89b09 100644 --- a/test/sql/delta_kernel_rs/timestamp_ntz.test +++ b/test/sql/delta_kernel_rs/timestamp_ntz.test @@ -8,7 +8,7 @@ require delta require-env DELTA_KERNEL_TESTS_PATH -query III +query IIIIII DESCRIBE FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/data-reader-timestamp_ntz') order by id ---- id INTEGER YES NULL NULL NULL