Skip to content

Commit

Permalink
Merge pull request #68 from samansmink/fix-incorrect-timestamp-map
Browse files Browse the repository at this point in the history
Fix 2 small bugs
  • Loading branch information
samansmink authored Aug 1, 2024
2 parents 47724ba + d320bb2 commit fa8a295
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/delta_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ unique_ptr<SchemaVisitor::FieldList> SchemaVisitor::VisitSnapshotSchema(ffi::Sha
visitor.visit_boolean = VisitSimpleType<LogicalType::BOOLEAN>();
visitor.visit_binary = VisitSimpleType<LogicalType::VARCHAR>();
visitor.visit_date = VisitSimpleType<LogicalType::DATE>();
visitor.visit_timestamp = VisitSimpleType<LogicalType::TIMESTAMP>();
visitor.visit_timestamp_ntz = VisitSimpleType<LogicalType::TIMESTAMP_TZ>();
visitor.visit_timestamp = VisitSimpleType<LogicalType::TIMESTAMP_TZ>();
visitor.visit_timestamp_ntz = VisitSimpleType<LogicalType::TIMESTAMP>();

uintptr_t result = visit_schema(snapshot, &visitor);
return state.TakeFieldList(result);
Expand Down
2 changes: 1 addition & 1 deletion src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio
auto col_partition_entry = file_metadata->partition_map.find(global_names[col_id]);
if (col_partition_entry != file_metadata->partition_map.end()) {
// Todo: use https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization
auto maybe_value = Value(col_partition_entry->second).DefaultCastAs(global_types[i]);
auto maybe_value = Value(col_partition_entry->second).DefaultCastAs(global_types[col_id]);
reader_data.constant_map.emplace_back(i, maybe_value);
}
}
Expand Down
29 changes: 25 additions & 4 deletions test/sql/dat/all.test
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,26 @@ require-env DAT_PATH

# all_primitive_types
query I rowsort all_primitive_types
SELECT *
SELECT * EXCLUDE(timestamp)
FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delta')
----

# TODO: exclude is necessary due to issue with the golden tables, this should be fixed upstream
query I rowsort all_primitive_types
SELECT *
SELECT * EXCLUDE(timestamp)
FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/expected/latest/**/*.parquet')
----

query I
SELECT timestamp
FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delta')
----
1970-01-01 00:00:00+00
1970-01-01 01:00:00+00
1970-01-01 02:00:00+00
1970-01-01 03:00:00+00
1970-01-01 04:00:00+00

# nested_types
query I rowsort nested_types
SELECT *
Expand Down Expand Up @@ -120,15 +131,25 @@ require notwindows

# multi_partitioned_2
query I rowsort multi_partitioned_2
SELECT *
SELECT * EXCLUDE(time)
FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/delta')
----

# TODO: exclude is necessary due to issue with the golden tables, this should be fixed upstream
query I rowsort multi_partitioned_2
SELECT *
SELECT * EXCLUDE(time)
FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/expected/latest/**/*.parquet')
----

query I
SELECT time
FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/multi_partitioned_2/delta')
order by time
----
1970-01-01 00:00:00+00
1970-01-01 12:30:00+00
1970-01-02 08:45:00+00

# no_replay
query I rowsort no_replay
SELECT *
Expand Down
29 changes: 29 additions & 0 deletions test/sql/delta_kernel_rs/timestamp_ntz.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# name: test/sql/delta_kernel_rs/timestamp_ntz.test
# description: table with timestamp_ntz type (timstamp without timezone)
# group: [delta_kernel_rs]

require parquet

require delta

require-env DELTA_KERNEL_TESTS_PATH

query IIIIII
DESCRIBE FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/data-reader-timestamp_ntz') order by id
----
id INTEGER YES NULL NULL NULL
tsNtz TIMESTAMP YES NULL NULL NULL
tsNtzPartition TIMESTAMP YES NULL NULL NULL

query III
FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/data-reader-timestamp_ntz') order by id
----
0 2021-11-18 02:30:00.123456 2021-11-18 02:30:00.123456
1 2013-07-05 17:01:00.123456 2021-11-18 02:30:00.123456
2 NULL 2021-11-18 02:30:00.123456
3 2021-11-18 02:30:00.123456 2013-07-05 17:01:00.123456
4 2013-07-05 17:01:00.123456 2013-07-05 17:01:00.123456
5 NULL 2013-07-05 17:01:00.123456
6 2021-11-18 02:30:00.123456 NULL
7 2013-07-05 17:01:00.123456 NULL
8 NULL NULL

0 comments on commit fa8a295

Please sign in to comment.