From 510fecec54ddb809d3c4f764b962e3623b8bee32 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 29 May 2024 17:20:53 +0200 Subject: [PATCH] add missing nested types --- src/include/delta_utils.hpp | 27 ++++++++++++++++++++++----- test/sql/dat/nested_types.test | 29 +++++++++++++++++++++++++++++ test/sql/dat/primitive_types.test | 11 +++++++++++ 3 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 test/sql/dat/nested_types.test diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 461668a..fe298fd 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -20,6 +20,8 @@ class SchemaVisitor { visitor.data = &state; visitor.make_field_list = (uintptr_t (*)(void*, uintptr_t)) &MakeFieldList; visitor.visit_struct = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, uintptr_t)) &VisitStruct; + visitor.visit_array = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) &VisitArray; + visitor.visit_map = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) &VisitMap; visitor.visit_decimal = (void (*)(void*, uintptr_t, ffi::KernelStringSlice, uint8_t , uint8_t)) &VisitDecimal; visitor.visit_string = VisitSimpleType(); visitor.visit_long = VisitSimpleType(); @@ -29,10 +31,10 @@ class SchemaVisitor { visitor.visit_float = VisitSimpleType(); visitor.visit_double = VisitSimpleType(); visitor.visit_boolean = VisitSimpleType(); - visitor.visit_binary = VisitSimpleType(); // TODO: check - visitor.visit_date = VisitSimpleType(); // TODO: check - visitor.visit_timestamp = VisitSimpleType(); // TODO: check - visitor.visit_timestamp_ntz = VisitSimpleType(); // TODO: check + visitor.visit_binary = VisitSimpleType(); + visitor.visit_date = VisitSimpleType(); + visitor.visit_timestamp = VisitSimpleType(); + visitor.visit_timestamp_ntz = VisitSimpleType(); uintptr_t result = visit_schema(snapshot, &visitor); return state.TakeFieldList(result); @@ -66,6 +68,20 @@ class SchemaVisitor { state->AppendToList(sibling_list_id, name, LogicalType::STRUCT(std::move(*children))); } + static void VisitArray(SchemaVisitor* state, uintptr_t sibling_list_id, ffi::KernelStringSlice name, bool contains_null, uintptr_t child_list_id) { + auto children = state->TakeFieldList(child_list_id); + + D_ASSERT(children->size() == 1); + state->AppendToList(sibling_list_id, name, LogicalType::LIST(children->front().second)); + } + + static void VisitMap(SchemaVisitor* state, uintptr_t sibling_list_id, ffi::KernelStringSlice name, bool contains_null, uintptr_t child_list_id) { + auto children = state->TakeFieldList(child_list_id); + + D_ASSERT(children->size() == 2); + state->AppendToList(sibling_list_id, name, LogicalType::MAP(LogicalType::STRUCT(std::move(*children)))); + } + uintptr_t MakeFieldListImpl(uintptr_t capacity_hint) { uintptr_t id = next_id++; auto list = make_uniq(); @@ -80,6 +96,7 @@ class SchemaVisitor { auto it = inflight_lists.find(id); if (it == inflight_lists.end()) { // TODO... some error... + throw InternalException("WEIRD SHIT"); } else { it->second->emplace_back(std::make_pair(string(name.ptr, name.len), std::move(child))); } @@ -89,7 +106,7 @@ class SchemaVisitor { auto it = inflight_lists.find(id); if (it == inflight_lists.end()) { // TODO: Raise some kind of error. - return {}; // not present + throw InternalException("WEIRD SHIT 2"); } auto rval = std::move(it->second); inflight_lists.erase(it); diff --git a/test/sql/dat/nested_types.test b/test/sql/dat/nested_types.test new file mode 100644 index 0000000..18ec9cb --- /dev/null +++ b/test/sql/dat/nested_types.test @@ -0,0 +1,29 @@ +# name: test/sql/dat_primitive_types.test +# description: DAT test suite: primitive types +# group: [delta] + +require parquet + +require delta + +require-env DAT_PATH + +query IIII +SELECT * +FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/delta') +---- +0 {'float64': 0.0, 'bool': true} [0] {} +1 {'float64': 1.0, 'bool': false} [0, 1] {0=0} +2 {'float64': 2.0, 'bool': true} [0, 1, 2] {0=0, 1=1} +3 {'float64': 3.0, 'bool': false} [0, 1, 2, 3] {0=0, 1=1, 2=2} +4 {'float64': 4.0, 'bool': true} [0, 1, 2, 3, 4] {0=0, 1=1, 2=2, 3=3} + +query IIII +SELECT * +FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/nested_types/expected/**/*.parquet') +---- +0 {'float64': 0.0, 'bool': true} [0] {} +1 {'float64': 1.0, 'bool': false} [0, 1] {0=0} +2 {'float64': 2.0, 'bool': true} [0, 1, 2] {0=0, 1=1} +3 {'float64': 3.0, 'bool': false} [0, 1, 2, 3] {0=0, 1=1, 2=2} +4 {'float64': 4.0, 'bool': true} [0, 1, 2, 3, 4] {0=0, 1=1, 2=2, 3=3} diff --git a/test/sql/dat/primitive_types.test b/test/sql/dat/primitive_types.test index 2c2fd03..ae89f5e 100644 --- a/test/sql/dat/primitive_types.test +++ b/test/sql/dat/primitive_types.test @@ -17,3 +17,14 @@ FROM delta_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delt 2 2 2 2 2 2.0 2.0 true \x00\x00 12.000 1970-01-03 1970-01-01 02:00:00 3 3 3 3 3 3.0 3.0 false \x00\x00\x00 13.000 1970-01-04 1970-01-01 03:00:00 4 4 4 4 4 4.0 4.0 true \x00\x00\x00\x00 14.000 1970-01-05 1970-01-01 04:00:00 + +query IIIIIIIIIIII +SELECT * +FROM parquet_scan('${DAT_PATH}/out/reader_tests/generated/all_primitive_types/expected/**/*.parquet') +---- +0 0 0 0 0 0.0 0.0 true (empty) 10.000 1970-01-01 1970-01-01 00:00:00 +1 1 1 1 1 1.0 1.0 false \x00 11.000 1970-01-02 1970-01-01 01:00:00 +2 2 2 2 2 2.0 2.0 true \x00\x00 12.000 1970-01-03 1970-01-01 02:00:00 +3 3 3 3 3 3.0 3.0 false \x00\x00\x00 13.000 1970-01-04 1970-01-01 03:00:00 +4 4 4 4 4 4.0 4.0 true \x00\x00\x00\x00 14.000 1970-01-05 1970-01-01 04:00:00 +