diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/count.csv b/data/iceberg/generated_spec1_0_001/expected_results/last/count.csv deleted file mode 100644 index 3b35899..0000000 --- a/data/iceberg/generated_spec1_0_001/expected_results/last/count.csv +++ /dev/null @@ -1,2 +0,0 @@ -count -7690 \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/data/._SUCCESS.crc b/data/iceberg/generated_spec1_0_001/expected_results/last/data/._SUCCESS.crc deleted file mode 100644 index 3b7b044..0000000 Binary files a/data/iceberg/generated_spec1_0_001/expected_results/last/data/._SUCCESS.crc and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/data/.part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet.crc b/data/iceberg/generated_spec1_0_001/expected_results/last/data/.part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet.crc deleted file mode 100644 index 62fa3b7..0000000 Binary files a/data/iceberg/generated_spec1_0_001/expected_results/last/data/.part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet.crc and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/data/part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet b/data/iceberg/generated_spec1_0_001/expected_results/last/data/part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet deleted file mode 100644 index 54fb77d..0000000 Binary files a/data/iceberg/generated_spec1_0_001/expected_results/last/data/part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/query.sql b/data/iceberg/generated_spec1_0_001/expected_results/last/query.sql deleted file mode 100644 index 6c7f91c..0000000 --- a/data/iceberg/generated_spec1_0_001/expected_results/last/query.sql +++ /dev/null @@ -1,3 +0,0 @@ --- The query executed at this step: -ALTER TABLE iceberg_catalog.pyspark_iceberg_table -ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-1-bcb5bbb9-a993-41f7-95e1-09e0c2475f4a-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-1-bcb5bbb9-a993-41f7-95e1-09e0c2475f4a-00001.parquet deleted file mode 100644 index 97ac7e8..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-1-bcb5bbb9-a993-41f7-95e1-09e0c2475f4a-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-14-db28d572-b7de-4568-9b00-8c55c69cb179-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-14-db28d572-b7de-4568-9b00-8c55c69cb179-00001.parquet deleted file mode 100644 index 9b8b181..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-14-db28d572-b7de-4568-9b00-8c55c69cb179-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-26-f6481588-9ba4-4a7b-b3dd-f188d41fa5b8-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-26-f6481588-9ba4-4a7b-b3dd-f188d41fa5b8-00001.parquet deleted file mode 100644 index 3755277..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-26-f6481588-9ba4-4a7b-b3dd-f188d41fa5b8-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-36-cf35a788-d8c2-4ded-a9f7-5239797e80b8-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-36-cf35a788-d8c2-4ded-a9f7-5239797e80b8-00001.parquet deleted file mode 100644 index facc504..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-36-cf35a788-d8c2-4ded-a9f7-5239797e80b8-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-5-bd5417f5-f28c-46b1-b1ab-39ee9c191368-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-5-bd5417f5-f28c-46b1-b1ab-39ee9c191368-00001.parquet deleted file mode 100644 index 5998bc2..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-5-bd5417f5-f28c-46b1-b1ab-39ee9c191368-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-9-6cdc0135-4256-4772-8c3e-3f4803ded842-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-9-6cdc0135-4256-4772-8c3e-3f4803ded842-00001.parquet deleted file mode 100644 index 8f8a093..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-9-6cdc0135-4256-4772-8c3e-3f4803ded842-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m0.avro deleted file mode 100644 index 90e2897..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m1.avro deleted file mode 100644 index 584671d..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m0.avro deleted file mode 100644 index 7dd57c8..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m1.avro deleted file mode 100644 index 80355e7..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/7723fb1b-ae48-49de-9e77-cd7945667cb9-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/7723fb1b-ae48-49de-9e77-cd7945667cb9-m0.avro deleted file mode 100644 index 54b6edf..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/7723fb1b-ae48-49de-9e77-cd7945667cb9-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m0.avro deleted file mode 100644 index 2974743..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m1.avro deleted file mode 100644 index 2ecadc1..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m0.avro deleted file mode 100644 index 925a068..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m1.avro deleted file mode 100644 index 3666460..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m2.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m2.avro deleted file mode 100644 index feb60ea..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m2.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/e3febcc2-7f11-44b9-80af-571fb1c0463a-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/e3febcc2-7f11-44b9-80af-571fb1c0463a-m0.avro deleted file mode 100644 index b812dca..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/e3febcc2-7f11-44b9-80af-571fb1c0463a-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro deleted file mode 100644 index 9a7b32f..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro deleted file mode 100644 index 45cc87a..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro deleted file mode 100644 index c0715d7..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro deleted file mode 100644 index e6edc42..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro deleted file mode 100644 index be74220..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro deleted file mode 100644 index fde9231..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro deleted file mode 100644 index 63e2d76..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json deleted file mode 100644 index ec7f17f..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json +++ /dev/null @@ -1,217 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580919873, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 9145725745960929259, - "refs" : { - "main" : { - "snapshot-id" : 9145725745960929259, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - } ], - "metadata-log" : [ ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json deleted file mode 100644 index 8eb6b17..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json +++ /dev/null @@ -1,246 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580920785, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 8671490307245765264, - "refs" : { - "main" : { - "snapshot-id" : 8671490307245765264, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json deleted file mode 100644 index ae141f5..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json +++ /dev/null @@ -1,272 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580921348, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 4543110679664799316, - "refs" : { - "main" : { - "snapshot-id" : 4543110679664799316, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json deleted file mode 100644 index b44942a..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json +++ /dev/null @@ -1,301 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580921764, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 6238750566879819059, - "refs" : { - "main" : { - "snapshot-id" : 6238750566879819059, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json deleted file mode 100644 index 1aabc15..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json +++ /dev/null @@ -1,324 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580922113, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 2276968461870063565, - "refs" : { - "main" : { - "snapshot-id" : 2276968461870063565, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json deleted file mode 100644 index 9162f1b..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580922559, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 1692767036460164714, - "refs" : { - "main" : { - "snapshot-id" : 1692767036460164714, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json deleted file mode 100644 index 744af3d..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json +++ /dev/null @@ -1,445 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580922734, - "last-column-id" : 16, - "schema" : { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 1692767036460164714, - "refs" : { - "main" : { - "snapshot-id" : 1692767036460164714, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580922559, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json deleted file mode 100644 index ede9cca..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json +++ /dev/null @@ -1,474 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580923120, - "last-column-id" : 16, - "schema" : { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 4407328776463037310, - "refs" : { - "main" : { - "snapshot-id" : 4407328776463037310, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4407328776463037310, - "parent-snapshot-id" : 1692767036460164714, - "timestamp-ms" : 1719580923120, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "400831", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "400831", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - }, { - "timestamp-ms" : 1719580923120, - "snapshot-id" : 4407328776463037310 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580922559, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580922734, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json deleted file mode 100644 index 6d60244..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json +++ /dev/null @@ -1,561 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580923295, - "last-column-id" : 16, - "schema" : { - "type" : "struct", - "schema-id" : 2, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "long" - } ] - }, - "current-schema-id" : 2, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, { - "type" : "struct", - "schema-id" : 2, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "long" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 4407328776463037310, - "refs" : { - "main" : { - "snapshot-id" : 4407328776463037310, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4407328776463037310, - "parent-snapshot-id" : 1692767036460164714, - "timestamp-ms" : 1719580923120, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "400831", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "400831", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - }, { - "timestamp-ms" : 1719580923120, - "snapshot-id" : 4407328776463037310 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580922559, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580922734, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - }, { - "timestamp-ms" : 1719580923120, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/version-hint.text b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/version-hint.text deleted file mode 100644 index f11c82a..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/version-hint.text +++ /dev/null @@ -1 +0,0 @@ -9 \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/count.csv b/data/iceberg/generated_spec2_0_001/expected_results/last/count.csv deleted file mode 100644 index 1221195..0000000 --- a/data/iceberg/generated_spec2_0_001/expected_results/last/count.csv +++ /dev/null @@ -1,2 +0,0 @@ -count -6592 \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/._SUCCESS.crc b/data/iceberg/generated_spec2_0_001/expected_results/last/data/._SUCCESS.crc deleted file mode 100644 index 3b7b044..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/._SUCCESS.crc and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc b/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc deleted file mode 100644 index a0d706d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc b/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc deleted file mode 100644 index 2f43db6..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet b/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet deleted file mode 100644 index 53283bf..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet b/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet deleted file mode 100644 index 6d6fe6d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/query.sql b/data/iceberg/generated_spec2_0_001/expected_results/last/query.sql deleted file mode 100644 index 6c7f91c..0000000 --- a/data/iceberg/generated_spec2_0_001/expected_results/last/query.sql +++ /dev/null @@ -1,3 +0,0 @@ --- The query executed at this step: -ALTER TABLE iceberg_catalog.pyspark_iceberg_table -ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-1-3e88ec3a-0596-440f-9ce6-3debf172be49-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-1-3e88ec3a-0596-440f-9ce6-3debf172be49-00001.parquet deleted file mode 100644 index 6837339..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-1-3e88ec3a-0596-440f-9ce6-3debf172be49-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001-deletes.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001-deletes.parquet deleted file mode 100644 index 4d44760..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001-deletes.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001.parquet deleted file mode 100644 index b3c12c7..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-24-3a7a66b3-bd3a-4417-b6a9-45cb309eddc2-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-24-3a7a66b3-bd3a-4417-b6a9-45cb309eddc2-00001.parquet deleted file mode 100644 index 98c2d49..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-24-3a7a66b3-bd3a-4417-b6a9-45cb309eddc2-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001-deletes.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001-deletes.parquet deleted file mode 100644 index 47329b2..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001-deletes.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001.parquet deleted file mode 100644 index 979ad92..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001-deletes.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001-deletes.parquet deleted file mode 100644 index 99aafa1..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001-deletes.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001.parquet deleted file mode 100644 index c242949..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-7-3be35a72-224f-475b-a0eb-34cea92784b4-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-7-3be35a72-224f-475b-a0eb-34cea92784b4-00001.parquet deleted file mode 100644 index 285069d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-7-3be35a72-224f-475b-a0eb-34cea92784b4-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/26871791-3133-4757-9cbc-b356c613c83a-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/26871791-3133-4757-9cbc-b356c613c83a-m0.avro deleted file mode 100644 index a922251..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/26871791-3133-4757-9cbc-b356c613c83a-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m0.avro deleted file mode 100644 index f0b73f1..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m1.avro deleted file mode 100644 index d7b9a64..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m0.avro deleted file mode 100644 index 19d600f..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m1.avro deleted file mode 100644 index c8d664d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/9ae37730-f1aa-4609-8b39-3f0ded6f78cf-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/9ae37730-f1aa-4609-8b39-3f0ded6f78cf-m0.avro deleted file mode 100644 index 92c4470..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/9ae37730-f1aa-4609-8b39-3f0ded6f78cf-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m0.avro deleted file mode 100644 index e28081d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m1.avro deleted file mode 100644 index 9101316..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m0.avro deleted file mode 100644 index 9a6b6d5..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m1.avro deleted file mode 100644 index 31716e1..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro deleted file mode 100644 index fafc511..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro deleted file mode 100644 index 3df6c65..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro deleted file mode 100644 index f4da2fc..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro deleted file mode 100644 index 0da1497..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro deleted file mode 100644 index 8f34f30..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro deleted file mode 100644 index 3e9ee6b..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro deleted file mode 100644 index aab9ece..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json deleted file mode 100644 index 3c9acfc..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json +++ /dev/null @@ -1,139 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 1, - "last-updated-ms" : 1719580927570, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 764624380497366583, - "refs" : { - "main" : { - "snapshot-id" : 764624380497366583, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - } ], - "metadata-log" : [ ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json deleted file mode 100644 index bae2e24..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json +++ /dev/null @@ -1,169 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 2, - "last-updated-ms" : 1719580928275, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4037069315291880534, - "refs" : { - "main" : { - "snapshot-id" : 4037069315291880534, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json deleted file mode 100644 index 7db94f1..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json +++ /dev/null @@ -1,196 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 3, - "last-updated-ms" : 1719580929047, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 6287117141668015642, - "refs" : { - "main" : { - "snapshot-id" : 6287117141668015642, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json deleted file mode 100644 index edcbb4b..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json +++ /dev/null @@ -1,226 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 4, - "last-updated-ms" : 1719580929661, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 6585012225877417653, - "refs" : { - "main" : { - "snapshot-id" : 6585012225877417653, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json deleted file mode 100644 index 61b94fc..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json +++ /dev/null @@ -1,256 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 5, - "last-updated-ms" : 1719580930402, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4440319347650982524, - "refs" : { - "main" : { - "snapshot-id" : 4440319347650982524, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json deleted file mode 100644 index 2f8a8fb..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json +++ /dev/null @@ -1,280 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 6, - "last-updated-ms" : 1719580930749, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 3119545726281138740, - "refs" : { - "main" : { - "snapshot-id" : 3119545726281138740, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json deleted file mode 100644 index 7dd8062..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json +++ /dev/null @@ -1,367 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 6, - "last-updated-ms" : 1719580930997, - "last-column-id" : 16, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 3119545726281138740, - "refs" : { - "main" : { - "snapshot-id" : 3119545726281138740, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580930749, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json deleted file mode 100644 index 56d07a9..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json +++ /dev/null @@ -1,397 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 7, - "last-updated-ms" : 1719580931465, - "last-column-id" : 16, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4786266686210019019, - "refs" : { - "main" : { - "snapshot-id" : 4786266686210019019, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - }, { - "sequence-number" : 7, - "snapshot-id" : 4786266686210019019, - "parent-snapshot-id" : 3119545726281138740, - "timestamp-ms" : 1719580931465, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "685", - "added-files-size" : "51653", - "added-position-deletes" : "685", - "changed-partition-count" : "1", - "total-records" : "18044", - "total-files-size" : "1096091", - "total-data-files" : "5", - "total-delete-files" : "3", - "total-position-deletes" : "11452", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - }, { - "timestamp-ms" : 1719580931465, - "snapshot-id" : 4786266686210019019 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580930749, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580930997, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json deleted file mode 100644 index 85c01bd..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json +++ /dev/null @@ -1,484 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 7, - "last-updated-ms" : 1719580931691, - "last-column-id" : 16, - "current-schema-id" : 2, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, { - "type" : "struct", - "schema-id" : 2, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "long" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4786266686210019019, - "refs" : { - "main" : { - "snapshot-id" : 4786266686210019019, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - }, { - "sequence-number" : 7, - "snapshot-id" : 4786266686210019019, - "parent-snapshot-id" : 3119545726281138740, - "timestamp-ms" : 1719580931465, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "685", - "added-files-size" : "51653", - "added-position-deletes" : "685", - "changed-partition-count" : "1", - "total-records" : "18044", - "total-files-size" : "1096091", - "total-data-files" : "5", - "total-delete-files" : "3", - "total-position-deletes" : "11452", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - }, { - "timestamp-ms" : 1719580931465, - "snapshot-id" : 4786266686210019019 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580930749, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580930997, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - }, { - "timestamp-ms" : 1719580931465, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/version-hint.text b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/version-hint.text deleted file mode 100644 index f11c82a..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/version-hint.text +++ /dev/null @@ -1 +0,0 @@ -9 \ No newline at end of file diff --git a/duckdb b/duckdb index af39bd0..1f98600 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit af39bd0dcf66876e09ac2a7c3baa28fe1b301151 +Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc diff --git a/extension-ci-tools b/extension-ci-tools index 00831df..278ca51 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 00831df06713072df217d3fb2f6b5e0fae78742f +Subproject commit 278ca51098ac492a3108915e72fe3076254ae80f diff --git a/src/common/iceberg.cpp b/src/common/iceberg.cpp index 6e63969..784583b 100644 --- a/src/common/iceberg.cpp +++ b/src/common/iceberg.cpp @@ -80,8 +80,8 @@ vector IcebergTable::ReadManifestEntries(const string &pat } } else { auto schema = avro::compileJsonSchemaFromString(MANIFEST_ENTRY_SCHEMA); - avro::DataFileReader dfr(std::move(stream), schema); - c::manifest_entry manifest_entry; + avro::DataFileReader dfr(std::move(stream), schema); + manifest_entry manifest_entry; while (dfr.read(manifest_entry)) { ret.emplace_back(IcebergManifestEntry(manifest_entry)); } diff --git a/src/iceberg_functions/iceberg_metadata.cpp b/src/iceberg_functions/iceberg_metadata.cpp index 1ffff2c..3141809 100644 --- a/src/iceberg_functions/iceberg_metadata.cpp +++ b/src/iceberg_functions/iceberg_metadata.cpp @@ -125,12 +125,12 @@ static void IcebergMetaDataFunction(ClientContext &context, TableFunctionInput & FlatVector::GetData(output.data[0])[out] = StringVector::AddString(output.data[0], string_t(manifest.manifest.manifest_path)); - ; + FlatVector::GetData(output.data[1])[out] = manifest.manifest.sequence_number; FlatVector::GetData(output.data[2])[out] = StringVector::AddString( output.data[2], string_t(IcebergManifestContentTypeToString(manifest.manifest.content))); - FlatVector::GetData(output.data[3])[out] = StringVector::AddString( + FlatVector::GetData(output.data[3])[out] = StringVector::AddString( output.data[3], string_t(IcebergManifestEntryStatusTypeToString(manifest_entry.status))); FlatVector::GetData(output.data[4])[out] = StringVector::AddString( output.data[4], string_t(IcebergManifestEntryContentTypeToString(manifest_entry.content))); diff --git a/src/iceberg_functions/iceberg_scan.cpp b/src/iceberg_functions/iceberg_scan.cpp index 4e0b5cc..afb7059 100644 --- a/src/iceberg_functions/iceberg_scan.cpp +++ b/src/iceberg_functions/iceberg_scan.cpp @@ -15,10 +15,14 @@ #include "duckdb/parser/expression/star_expression.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/parser/tableref/emptytableref.hpp" +#include "duckdb/parser/statement/select_statement.hpp" +#include "duckdb/parser/query_node/select_node.hpp" +#include "duckdb/parser/expression/conjunction_expression.hpp" #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" #include "duckdb/common/file_opener.hpp" #include "duckdb/common/file_system.hpp" +#include "duckdb/common/printer.hpp" #include "iceberg_metadata.hpp" #include "iceberg_utils.hpp" #include "iceberg_functions.hpp" @@ -26,291 +30,855 @@ #include #include +#include +#include // For std::setw namespace duckdb { +// Helper function to log indentation +static void LogIndent(int indent_level) { + for (int i = 0; i < indent_level; ++i) { + std::cout << " "; + } +} + +// Utility function to convert ExpressionType enum to string using DuckDB's function +static std::string GetExpressionTypeString(ExpressionType type) { + return ExpressionTypeToString(type); // Use DuckDB's existing function +} + +// Recursive function to log details of a ParsedExpression +static void LogExpressionDetails(const ParsedExpression &expr, int indent_level = 0) { + LogIndent(indent_level); + std::cout << "Expression Type: " << GetExpressionTypeString(expr.type) << std::endl; + + switch (expr.type) { + case ExpressionType::CONJUNCTION_AND: + case ExpressionType::CONJUNCTION_OR: { + auto &conj = (ConjunctionExpression &)expr; + std::cout << "Conjunction Type: " << GetExpressionTypeString(conj.type) << std::endl; + std::cout << "Number of Children: " << conj.children.size() << std::endl; + for (const auto &child : conj.children) { + LogExpressionDetails(*child, indent_level + 1); + } + break; + } + case ExpressionType::COMPARE_EQUAL: + case ExpressionType::COMPARE_GREATERTHAN: + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + case ExpressionType::COMPARE_LESSTHAN: + case ExpressionType::COMPARE_LESSTHANOREQUALTO: { + auto &comp = (ComparisonExpression &)expr; + std::cout << "Comparison Operator: " << GetExpressionTypeString(comp.type) << std::endl; + LogIndent(indent_level); + std::cout << "Left Operand:" << std::endl; + LogExpressionDetails(*comp.left, indent_level + 1); + LogIndent(indent_level); + std::cout << "Right Operand:" << std::endl; + LogExpressionDetails(*comp.right, indent_level + 1); + break; + } + case ExpressionType::COLUMN_REF: { + auto &col_ref = (ColumnRefExpression &)expr; + std::cout << "Column Name: " << col_ref.column_names[0] << std::endl; + break; + } + case ExpressionType::VALUE_CONSTANT: { // Corrected + auto &const_expr = (ConstantExpression &)expr; + std::cout << "Constant Value: " << const_expr.value.ToString() << std::endl; + break; + } + case ExpressionType::FUNCTION: { + auto &func_expr = (FunctionExpression &)expr; + std::cout << "Function Name: " << func_expr.function_name << std::endl; + std::cout << "Number of Arguments: " << func_expr.children.size() << std::endl; + for (const auto &child : func_expr.children) { + LogExpressionDetails(*child, indent_level + 1); + } + break; + } + // Add more cases as needed for other expression types + default: + LogIndent(indent_level); + std::cout << "Unhandled Expression Type: " << GetExpressionTypeString(expr.type) << std::endl; + break; + } +} + +// === Derived TableFunctionInfo to hold constraints === +struct IcebergTableFunctionInfo : public TableFunctionInfo { + vector> constraints; + + IcebergTableFunctionInfo(vector> &&constraints_p) + : constraints(std::move(constraints_p)) {} +}; + +// === Helper function to recursively extract comparison predicates from expressions === +static void ExtractPredicates(ParsedExpression &expr, vector> &predicates) { + if (expr.type == ExpressionType::CONJUNCTION_AND) { + auto &conj = (ConjunctionExpression &)expr; + // Access children instead of left and right + if (conj.children.size() >= 2) { + ExtractPredicates(*conj.children[0], predicates); + ExtractPredicates(*conj.children[1], predicates); + } + } else if (expr.type == ExpressionType::COMPARE_EQUAL || + expr.type == ExpressionType::COMPARE_GREATERTHAN || + expr.type == ExpressionType::COMPARE_GREATERTHANOREQUALTO || + expr.type == ExpressionType::COMPARE_LESSTHAN || + expr.type == ExpressionType::COMPARE_LESSTHANOREQUALTO) { + // Clone the expression and add to predicates + predicates.emplace_back(expr.Copy()); + } + // Add more conditions here if you want to handle OR or other expressions +} + struct IcebergScanGlobalTableFunctionState : public GlobalTableFunctionState { public: - static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { - return make_uniq(); - } + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + return make_uniq(); + } }; static unique_ptr GetFilenameExpr(unique_ptr colref_expr) { - vector> split_children; - split_children.push_back(std::move(colref_expr)); - split_children.push_back(make_uniq(Value("/"))); - auto data_split = make_uniq("string_split", std::move(split_children)); + vector> split_children; + split_children.emplace_back(std::move(colref_expr)); + split_children.emplace_back(make_uniq(Value("/"))); + auto data_split = make_uniq("string_split", std::move(split_children)); - vector> list_extract_children; - list_extract_children.push_back(std::move(data_split)); - list_extract_children.push_back(make_uniq(Value(-1))); - auto list_extract_expr = make_uniq("list_extract", std::move(list_extract_children)); + vector> list_extract_children; + list_extract_children.emplace_back(std::move(data_split)); + list_extract_children.emplace_back(make_uniq(Value(-1))); + auto list_extract_expr = make_uniq("list_extract", std::move(list_extract_children)); - return std::move(list_extract_expr); + return std::move(list_extract_expr); } static unique_ptr GetFilenameMatchExpr() { - auto data_colref_expr = make_uniq("filename", "iceberg_scan_data"); - auto delete_colref_expr = make_uniq("file_path", "iceberg_scan_deletes"); + auto data_colref_expr = make_uniq("filename", "iceberg_scan_data"); + auto delete_colref_expr = make_uniq("file_path", "iceberg_scan_deletes"); - auto data_filename_expr = GetFilenameExpr(std::move(data_colref_expr)); - auto delete_filename_expr = GetFilenameExpr(std::move(delete_colref_expr)); + auto data_filename_expr = GetFilenameExpr(std::move(data_colref_expr)); + auto delete_filename_expr = GetFilenameExpr(std::move(delete_colref_expr)); - return make_uniq(ExpressionType::COMPARE_NOT_DISTINCT_FROM, std::move(data_filename_expr), - std::move(delete_filename_expr)); + return make_uniq(ExpressionType::COMPARE_NOT_DISTINCT_FROM, std::move(data_filename_expr), + std::move(delete_filename_expr)); }; -//! Uses recursive unnest on list of structs to return a table with all data and delete files -//! TODO: refactor, probably. +// Uses recursive unnest on list of structs to return a table with all data and delete files static unique_ptr MakeListFilesExpression(vector &data_file_values, vector &delete_file_values) { - vector structs; - for (const auto &file : data_file_values) { - child_list_t child; - child.emplace_back(make_pair("file", file)); - child.emplace_back(make_pair("type", Value("data"))); - structs.push_back(Value::STRUCT(child)); - } - for (const auto &file : delete_file_values) { - child_list_t child; - child.emplace_back(make_pair("file", file)); - child.emplace_back(make_pair("type", Value("delete"))); - structs.push_back(Value::STRUCT(child)); - } - - // Unnest - vector> unnest_children; - unnest_children.push_back(make_uniq(Value::LIST(structs))); - auto recursive_named_param = make_uniq(Value::BOOLEAN(true)); - recursive_named_param->alias = "recursive"; - unnest_children.push_back(std::move(recursive_named_param)); - - // Select node - auto select_node = make_uniq(); - vector> select_exprs; - select_exprs.emplace_back(make_uniq("unnest", std::move(unnest_children))); - select_node->select_list = std::move(select_exprs); - select_node->from_table = make_uniq(); - - // Select statement - auto select_statement = make_uniq(); - select_statement->node = std::move(select_node); - return make_uniq(std::move(select_statement), "iceberg_scan"); + vector structs; + for (const auto &file : data_file_values) { + child_list_t child; + child.emplace_back(make_pair("file", file)); + child.emplace_back(make_pair("type", Value("data"))); + structs.emplace_back(Value::STRUCT(child)); + } + for (const auto &file : delete_file_values) { + child_list_t child; + child.emplace_back(make_pair("file", file)); + child.emplace_back(make_pair("type", Value("delete"))); + structs.emplace_back(Value::STRUCT(child)); + } + + // Unnest + vector> unnest_children; + unnest_children.emplace_back(make_uniq(Value::LIST(structs))); + auto recursive_named_param = make_uniq(Value::BOOLEAN(true)); + recursive_named_param->alias = "recursive"; + unnest_children.emplace_back(std::move(recursive_named_param)); + + // Select node + auto select_node = make_uniq(); + vector> select_exprs; + select_exprs.emplace_back(make_uniq("unnest", std::move(unnest_children))); + select_node->select_list = std::move(select_exprs); + select_node->from_table = make_uniq(); + + // Select statement + auto select_statement = make_uniq(); + select_statement->node = std::move(select_node); + return make_uniq(std::move(select_statement), "iceberg_scan"); } // Create the param for passing the iceberg schema to the parquet reader as a DuckDB map static Value GetParquetSchemaParam(vector &schema) { - vector map_entries; - - for (auto &schema_entry : schema) { - child_list_t map_value_children; - map_value_children.push_back(make_pair("name", Value(schema_entry.name))); - map_value_children.push_back(make_pair("type", Value(schema_entry.type.ToString()))); - map_value_children.push_back(make_pair("default_value", schema_entry.default_value)); - auto map_value = Value::STRUCT(map_value_children); - - child_list_t map_entry_children; - map_entry_children.push_back(make_pair("key", schema_entry.id)); - map_entry_children.push_back(make_pair("values", map_value)); - auto map_entry = Value::STRUCT(map_entry_children); - - map_entries.push_back(map_entry); - } - - auto param_type = - LogicalType::STRUCT({{"key", LogicalType::INTEGER}, - {"value", LogicalType::STRUCT({{{"name", LogicalType::VARCHAR}, - {"type", LogicalType::VARCHAR}, - {"default_value", LogicalType::VARCHAR}}})}}); - auto ret = Value::MAP(param_type, map_entries); - return ret; + vector map_entries; + + for (auto &schema_entry : schema) { + child_list_t map_value_children; + map_value_children.emplace_back(make_pair("name", Value(schema_entry.name))); + map_value_children.emplace_back(make_pair("type", Value(schema_entry.type.ToString()))); + map_value_children.emplace_back(make_pair("default_value", schema_entry.default_value)); + auto map_value = Value::STRUCT(map_value_children); + + child_list_t map_entry_children; + map_entry_children.emplace_back(make_pair("key", Value(schema_entry.id))); + map_entry_children.emplace_back(make_pair("value", map_value)); + auto map_entry = Value::STRUCT(map_entry_children); + + map_entries.emplace_back(map_entry); + } + + auto param_type = + LogicalType::STRUCT({{"key", LogicalType::INTEGER}, + {"value", LogicalType::STRUCT({{{"name", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"default_value", LogicalType::VARCHAR}}})}}); + auto ret = Value::MAP(param_type, map_entries); + return ret; } -//! Build the Parquet Scan expression for the files we need to scan -static unique_ptr MakeScanExpression(vector &data_file_values, vector &delete_file_values, - vector &schema, bool allow_moved_paths, string metadata_compression_codec, bool skip_schema_inference) { - // No deletes, just return a TableFunctionRef for a parquet scan of the data files - if (delete_file_values.empty()) { - auto table_function_ref_data = make_uniq(); - table_function_ref_data->alias = "iceberg_scan_data"; - vector> left_children; - left_children.push_back(make_uniq(Value::LIST(data_file_values))); - if (!skip_schema_inference) { - left_children.push_back( - make_uniq(ExpressionType::COMPARE_EQUAL, make_uniq("schema"), - make_uniq(GetParquetSchemaParam(schema)))); - } - table_function_ref_data->function = make_uniq("parquet_scan", std::move(left_children)); - return std::move(table_function_ref_data); - } - - // Join - auto join_node = make_uniq(JoinRefType::REGULAR); - auto filename_match_expr = - allow_moved_paths - ? GetFilenameMatchExpr() - : make_uniq(ExpressionType::COMPARE_NOT_DISTINCT_FROM, - make_uniq("filename", "iceberg_scan_data"), - make_uniq("file_path", "iceberg_scan_deletes")); - join_node->type = JoinType::ANTI; - join_node->condition = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(filename_match_expr), - make_uniq(ExpressionType::COMPARE_NOT_DISTINCT_FROM, - make_uniq("file_row_number", "iceberg_scan_data"), - make_uniq("pos", "iceberg_scan_deletes"))); - - // LHS: data - auto table_function_ref_data = make_uniq(); - table_function_ref_data->alias = "iceberg_scan_data"; - vector> left_children; - left_children.push_back(make_uniq(Value::LIST(data_file_values))); - left_children.push_back(make_uniq(ExpressionType::COMPARE_EQUAL, - make_uniq("filename"), - make_uniq(Value(1)))); - left_children.push_back(make_uniq(ExpressionType::COMPARE_EQUAL, - make_uniq("file_row_number"), - make_uniq(Value(1)))); - if (!skip_schema_inference) { - left_children.push_back( - make_uniq(ExpressionType::COMPARE_EQUAL, make_uniq("schema"), - make_uniq(GetParquetSchemaParam(schema)))); - } - table_function_ref_data->function = make_uniq("parquet_scan", std::move(left_children)); - join_node->left = std::move(table_function_ref_data); - - // RHS: deletes - auto table_function_ref_deletes = make_uniq(); - table_function_ref_deletes->alias = "iceberg_scan_deletes"; - vector> right_children; - right_children.push_back(make_uniq(Value::LIST(delete_file_values))); - table_function_ref_deletes->function = make_uniq("parquet_scan", std::move(right_children)); - join_node->right = std::move(table_function_ref_deletes); - - // Wrap the join in a select, exclude the filename and file_row_number cols - auto select_statement = make_uniq(); - - // Construct Select node - auto select_node = make_uniq(); - select_node->from_table = std::move(join_node); - auto select_expr = make_uniq(); - select_expr->exclude_list = {"filename", "file_row_number"}; - vector> select_exprs; - select_exprs.push_back(std::move(select_expr)); - select_node->select_list = std::move(select_exprs); - select_statement->node = std::move(select_node); - - return make_uniq(std::move(select_statement), "iceberg_scan"); +// Utility function to convert byte vector to hex string for logging +static std::string ByteArrayToHexString(const std::vector &bytes) { + std::ostringstream oss; + for (auto byte : bytes) { + oss << std::hex << std::setw(2) << std::setfill('0') << (int)byte; + } + return oss.str(); +} + +// Updated DeserializeBound function with detailed logging +static Value DeserializeBound(const std::vector &bound_value, const LogicalType &type) { + // Log the type ID and raw bound value + std::cout << " DeserializeBound called with Type: " + << LogicalTypeIdToString(type.id()) + << ", Raw Bound Value (Hex): " << ByteArrayToHexString(bound_value) << std::endl; + + Value deserialized_value; + try { + switch (type.id()) { + case LogicalTypeId::INTEGER: { + if (bound_value.size() < sizeof(int32_t)) { + throw std::runtime_error("Invalid bound size for INTEGER type"); + } + int32_t val; + std::memcpy(&val, bound_value.data(), sizeof(int32_t)); + deserialized_value = Value::INTEGER(val); + break; + } + case LogicalTypeId::BIGINT: { + if (bound_value.size() < sizeof(int64_t)) { + throw std::runtime_error("Invalid bound size for BIGINT type"); + } + int64_t val; + std::memcpy(&val, bound_value.data(), sizeof(int64_t)); + deserialized_value = Value::BIGINT(val); + break; + } + case LogicalTypeId::DATE: { + if (bound_value.size() < sizeof(int32_t)) { // Dates are typically stored as int32 (days since epoch) + throw std::runtime_error("Invalid bound size for DATE type"); + } + int32_t days_since_epoch; + std::memcpy(&days_since_epoch, bound_value.data(), sizeof(int32_t)); + // Convert to DuckDB date + date_t date = Date::EpochDaysToDate(days_since_epoch); + deserialized_value = Value::DATE(date); + break; + } + case LogicalTypeId::TIMESTAMP: { + if (bound_value.size() < sizeof(int64_t)) { // Timestamps are typically stored as int64 (microseconds since epoch) + throw std::runtime_error("Invalid bound size for TIMESTAMP type"); + } + int64_t micros_since_epoch; + std::memcpy(µs_since_epoch, bound_value.data(), sizeof(int64_t)); + std::cout << " TIMESTAMP bound value (microseconds since epoch): " << micros_since_epoch << std::endl; + // Convert to DuckDB timestamp using microseconds + timestamp_t timestamp = Timestamp::FromEpochMicroSeconds(micros_since_epoch); + deserialized_value = Value::TIMESTAMP(timestamp); + std::cout << " TIMESTAMP bound value (converted): " << deserialized_value.ToString() << std::endl; + break; + } + case LogicalTypeId::TIMESTAMP_TZ: { // Added support for TIMESTAMP WITH TIME ZONE + if (bound_value.size() < sizeof(int64_t)) { // Assuming stored as int64 (microseconds since epoch) + throw std::runtime_error("Invalid bound size for TIMESTAMP_TZ type"); + } + int64_t micros_since_epoch; + std::memcpy(µs_since_epoch, bound_value.data(), sizeof(int64_t)); + std::cout << " TIMESTAMP_TZ bound value (microseconds since epoch): " << micros_since_epoch << std::endl; + // Convert to DuckDB timestamp using microseconds + timestamp_t timestamp = Timestamp::FromEpochMicroSeconds(micros_since_epoch); + // Create a TIMESTAMPTZ Value + deserialized_value = Value::TIMESTAMPTZ(timestamp); + std::cout << " TIMESTAMP_TZ bound value (converted): " << deserialized_value.ToString() << std::endl; + break; + } + case LogicalTypeId::DOUBLE: { + if (bound_value.size() < sizeof(double)) { + throw std::runtime_error("Invalid bound size for DOUBLE type"); + } + double val; + std::memcpy(&val, bound_value.data(), sizeof(double)); + deserialized_value = Value::DOUBLE(val); + break; + } + case LogicalTypeId::VARCHAR: { + // Assume the bytes represent a UTF-8 string + std::string str(bound_value.begin(), bound_value.end()); + deserialized_value = Value(str); + break; + } + // Add more types as needed + default: + throw std::runtime_error("Unsupported type for DeserializeBound"); + } + + // Log the final deserialized value + std::cout << " Deserialized Value: " << deserialized_value.ToString() << std::endl; + } catch (const std::exception &e) { + std::cout << " Error during deserialization: " << e.what() << std::endl; + // Depending on your error handling strategy, you might want to rethrow or handle it here + throw; + } + + return deserialized_value; +} + + +static bool EvaluatePredicateAgainstStatistics(const IcebergManifestEntry &entry, + const vector> &predicates, + const std::vector &schema) { + // Create a mapping from column names to field IDs and their LogicalTypes + std::unordered_map> column_to_field_info; + for (const auto &col_def : schema) { + column_to_field_info[col_def.name] = {col_def.id, col_def.type}; // Assuming col_def.type is LogicalType + } + + for (const auto &predicate : predicates) { + if (auto comparison = dynamic_cast(predicate.get())) { + // Assume predicates are on columns, possibly transformed + std::string column_name; + if (auto colref = dynamic_cast(comparison->left.get())) { + column_name = colref->GetColumnName(); + } else { + // Unsupported predicate structure + std::cout << " Unsupported predicate structure. Skipping predicate." << std::endl; + continue; + } + + // Retrieve field ID and type + auto it = column_to_field_info.find(column_name); + if (it == column_to_field_info.end()) { + // Column not found in schema, cannot evaluate predicate + std::cout << " Column '" << column_name << "' not found in schema. Skipping predicate." << std::endl; + continue; + } + int field_id = it->second.first; + LogicalType field_type = it->second.second; + + // Convert field_id to string for lookup + std::string field_id_str = std::to_string(field_id); + + // Get lower and upper bounds + auto lower_it = entry.lower_bounds.find(field_id_str); + auto upper_it = entry.upper_bounds.find(field_id_str); + + if (lower_it == entry.lower_bounds.end() || upper_it == entry.upper_bounds.end()) { + std::cout << " No bounds found for field ID: " << field_id_str << ". Cannot evaluate predicate." << std::endl; + continue; // Cannot filter based on missing bounds + } + + // Deserialize bounds + Value lower_bound, upper_bound; + try { + lower_bound = DeserializeBound(lower_it->second, field_type); + upper_bound = DeserializeBound(upper_it->second, field_type); + } catch (const std::exception &e) { + std::cout << " Failed to deserialize bounds for field ID " << field_id_str << ": " << e.what() << std::endl; + continue; + } + + // Extract the constant value from the predicate + Value constant_value; + if (auto const_expr = dynamic_cast(comparison->right.get())) { + constant_value = const_expr->value; + } else { + // Unsupported predicate structure + std::cout << " Unsupported predicate structure on right operand. Skipping predicate." << std::endl; + continue; + } + + std::cout << " Evaluating predicate: " << predicate->ToString() << std::endl; + std::cout << " Mapped Field ID: " << field_id_str << ", Value: " << constant_value.ToString() << std::endl; + std::cout << " IcebergManifestEntry bounds for field ID '" << field_id_str << "':" << std::endl; + std::cout << " Lower bound: " << lower_bound.ToString() << std::endl; + std::cout << " Upper bound: " << upper_bound.ToString() << std::endl; + + // Evaluate the predicate against the bounds + bool result = true; + switch (comparison->type) { + case ExpressionType::COMPARE_EQUAL: + result = (constant_value >= lower_bound && constant_value <= upper_bound); + break; + case ExpressionType::COMPARE_GREATERTHAN: + result = (constant_value <= upper_bound); + break; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + result = (constant_value <= upper_bound); + break; + case ExpressionType::COMPARE_LESSTHAN: + result = (constant_value >= lower_bound); + break; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + result = (constant_value >= lower_bound); + break; + default: + // For other types of comparisons, we can't make a decision based on bounds + result = true; // Conservative approach + break; + } + + std::cout << " Predicate evaluation result: " << (result ? "true" : "false") << std::endl; + if (!result) { + return false; // If any predicate fails, exclude the file + } + } + } + return true; // All predicates passed +} + +// Build the Parquet Scan expression for the files we need to scan +static unique_ptr MakeScanExpression(const string &iceberg_path, FileSystem &fs, + vector &data_file_entries, + vector &delete_file_values, + vector &schema, bool allow_moved_paths, + string metadata_compression_codec, bool skip_schema_inference, + const IcebergTableFunctionInfo *iceberg_info = nullptr) { + // Log the total number of files before filtering + std::cout << "Iceberg scan: Total data files before filtering: " << data_file_entries.size() << std::endl; + + if (iceberg_info) { + std::cout << "Iceberg scan: iceberg_info provided." << std::endl; + std::cout << "Iceberg scan: Number of constraints: " << iceberg_info->constraints.size() << std::endl; + } else { + std::cout << "Iceberg scan: iceberg_info is nullptr." << std::endl; + } + + // Log predicates if they exist + if (iceberg_info && !iceberg_info->constraints.empty()) { + std::cout << "Iceberg scan: Predicates applied:" << std::endl; + int predicate_index = 1; + for (const auto &predicate : iceberg_info->constraints) { + std::cout << " Predicate " << predicate_index++ << ":" << std::endl; + LogExpressionDetails(*predicate, 2); // Indent level 2 for predicates + } + } else { + std::cout << "Iceberg scan: No predicates applied" << std::endl; + } + + vector filtered_data_file_values; + if (iceberg_info && !iceberg_info->constraints.empty()) { + for (const auto &entry : data_file_entries) { + std::cout << "Evaluating file: " << entry.file_path << std::endl; + if (EvaluatePredicateAgainstStatistics(entry, iceberg_info->constraints, schema)) { + auto full_path = allow_moved_paths ? IcebergUtils::GetFullPath(iceberg_path, entry.file_path, fs) : entry.file_path; + filtered_data_file_values.emplace_back(full_path); + std::cout << " Iceberg scan: Data file included after filtering: " << full_path << std::endl; + } else { + std::cout << " Iceberg scan: Data file excluded after filtering: " << entry.file_path << std::endl; + } + } + std::cout << "Iceberg scan: Data files after filtering: " << filtered_data_file_values.size() << std::endl; + } else { + for (const auto &entry : data_file_entries) { + auto full_path = allow_moved_paths ? IcebergUtils::GetFullPath(iceberg_path, entry.file_path, fs) : entry.file_path; + filtered_data_file_values.emplace_back(full_path); + } + std::cout << "Iceberg scan: No predicates applied, all " << filtered_data_file_values.size() << " files included" << std::endl; + } + + // Log delete files + std::cout << "Iceberg scan: Delete files: " << delete_file_values.size() << std::endl; + + // No deletes, just return a TableFunctionRef for a parquet scan of the data files + // No deletes, just return a TableFunctionRef for a parquet scan of the data files + if (delete_file_values.empty()) { + if (!filtered_data_file_values.empty()) { + // Existing parquet_scan code + auto table_function_ref_data = make_uniq(); + table_function_ref_data->alias = "iceberg_scan_data"; + vector> left_children; + LogicalType child_type = LogicalType::VARCHAR; + left_children.emplace_back( + make_uniq( + Value::LIST(child_type, filtered_data_file_values) + ) + ); + if (!skip_schema_inference) { + left_children.emplace_back( + make_uniq( + ExpressionType::COMPARE_EQUAL, + make_uniq("schema"), + make_uniq(GetParquetSchemaParam(schema)) + ) + ); + } + table_function_ref_data->function = make_uniq("parquet_scan", std::move(left_children)); + return std::move(table_function_ref_data); + } else { + // **BEGIN: Handling Empty Filtered Data Files** + auto select_node = make_uniq(); + select_node->where_clause = make_uniq(Value::BOOLEAN(false)); + + // Add select expressions for each column based on the schema + for (const auto &col : schema) { + // Create a NULL constant of the appropriate type + auto null_expr = make_uniq(Value(col.type)); + // Alias it to the column name + null_expr->alias = col.name; + select_node->select_list.emplace_back(std::move(null_expr)); + } + + // **Add the FROM clause as EmptyTableRef** + select_node->from_table = make_uniq(); + + // Create a SelectStatement + auto select_statement = make_uniq(); + select_statement->node = std::move(select_node); + + // Create a SubqueryRef with the SelectStatement + auto table_ref_empty = make_uniq(std::move(select_statement), "empty_scan"); + + // Log that we are returning an empty table + std::cout << "Iceberg scan: No files to scan after filtering. Returning empty table." << std::endl; + + return std::move(table_ref_empty); + // **END: Handling Empty Filtered Data Files** + } + } + + + // Join + auto join_node = make_uniq(JoinRefType::REGULAR); + auto filename_match_expr = + allow_moved_paths + ? GetFilenameMatchExpr() + : make_uniq(ExpressionType::COMPARE_NOT_DISTINCT_FROM, + make_uniq("filename", "iceberg_scan_data"), + make_uniq("file_path", "iceberg_scan_deletes")); + join_node->type = JoinType::ANTI; + join_node->condition = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(filename_match_expr), + make_uniq(ExpressionType::COMPARE_NOT_DISTINCT_FROM, + make_uniq("file_row_number", "iceberg_scan_data"), + make_uniq("pos", "iceberg_scan_deletes"))); + + // LHS: data + auto table_function_ref_data = make_uniq(); + table_function_ref_data->alias = "iceberg_scan_data"; + vector> left_children; + left_children.emplace_back(make_uniq(Value::LIST(filtered_data_file_values))); + left_children.emplace_back(make_uniq(ExpressionType::COMPARE_EQUAL, + make_uniq("filename"), + make_uniq(Value(1)))); + left_children.emplace_back(make_uniq(ExpressionType::COMPARE_EQUAL, + make_uniq("file_row_number"), + make_uniq(Value(1)))); + if (!skip_schema_inference) { + left_children.emplace_back( + make_uniq(ExpressionType::COMPARE_EQUAL, make_uniq("schema"), + make_uniq(GetParquetSchemaParam(schema)))); + } + table_function_ref_data->function = make_uniq("parquet_scan", std::move(left_children)); + join_node->left = std::move(table_function_ref_data); + + // RHS: deletes + auto table_function_ref_deletes = make_uniq(); + table_function_ref_deletes->alias = "iceberg_scan_deletes"; + vector> right_children; + right_children.emplace_back(make_uniq(Value::LIST(delete_file_values))); + table_function_ref_deletes->function = make_uniq("parquet_scan", std::move(right_children)); + join_node->right = std::move(table_function_ref_deletes); + + // Wrap the join in a select, exclude the filename and file_row_number cols + auto select_statement = make_uniq(); + + // Construct Select node + auto select_node = make_uniq(); + select_node->from_table = std::move(join_node); + auto select_expr = make_uniq(); + select_expr->exclude_list = {"filename", "file_row_number"}; + vector> select_exprs; + select_exprs.emplace_back(std::move(select_expr)); + select_node->select_list = std::move(select_exprs); + select_statement->node = std::move(select_node); + + return make_uniq(std::move(select_statement), "iceberg_scan"); } static unique_ptr IcebergScanBindReplace(ClientContext &context, TableFunctionBindInput &input) { - FileSystem &fs = FileSystem::GetFileSystem(context); - auto iceberg_path = input.inputs[0].ToString(); - - // Enabling this will ensure the ANTI Join with the deletes only looks at filenames, instead of full paths - // this allows hive tables to be moved and have mismatching paths, usefull for testing, but will have worse - // performance - bool allow_moved_paths = false; - bool skip_schema_inference = false; - string mode = "default"; - string metadata_compression_codec = "none"; - string table_version = DEFAULT_VERSION_HINT_FILE; - string version_name_format = DEFAULT_TABLE_VERSION_FORMAT; - - for (auto &kv : input.named_parameters) { - auto loption = StringUtil::Lower(kv.first); - if (loption == "allow_moved_paths") { - allow_moved_paths = BooleanValue::Get(kv.second); - if (StringUtil::EndsWith(iceberg_path, ".json")) { - throw InvalidInputException( - "Enabling allow_moved_paths is not enabled for directly scanning metadata files."); - } - } else if (loption == "mode") { - mode = StringValue::Get(kv.second); - } else if (loption == "metadata_compression_codec") { - metadata_compression_codec = StringValue::Get(kv.second); - } else if (loption == "skip_schema_inference") { - skip_schema_inference = BooleanValue::Get(kv.second); - } else if (loption == "version") { - table_version = StringValue::Get(kv.second); - } else if (loption == "version_name_format") { - version_name_format = StringValue::Get(kv.second); - } - } - auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(iceberg_path, fs, metadata_compression_codec, table_version, version_name_format); - IcebergSnapshot snapshot_to_scan; - if (input.inputs.size() > 1) { - if (input.inputs[1].type() == LogicalType::UBIGINT) { - snapshot_to_scan = IcebergSnapshot::GetSnapshotById(iceberg_meta_path, fs, input.inputs[1].GetValue(), metadata_compression_codec, skip_schema_inference); - } else if (input.inputs[1].type() == LogicalType::TIMESTAMP) { - snapshot_to_scan = - IcebergSnapshot::GetSnapshotByTimestamp(iceberg_meta_path, fs, input.inputs[1].GetValue(), metadata_compression_codec, skip_schema_inference); - } else { - throw InvalidInputException("Unknown argument type in IcebergScanBindReplace."); - } - } else { - snapshot_to_scan = IcebergSnapshot::GetLatestSnapshot(iceberg_meta_path, fs, metadata_compression_codec, skip_schema_inference); - } - - IcebergTable iceberg_table = IcebergTable::Load(iceberg_path, snapshot_to_scan, fs, allow_moved_paths, metadata_compression_codec); - auto data_files = iceberg_table.GetPaths(); - auto delete_files = iceberg_table.GetPaths(); - vector data_file_values; - for (auto &data_file : data_files) { - data_file_values.push_back( - {allow_moved_paths ? IcebergUtils::GetFullPath(iceberg_path, data_file, fs) : data_file}); - } - vector delete_file_values; - for (auto &delete_file : delete_files) { - delete_file_values.push_back( - {allow_moved_paths ? IcebergUtils::GetFullPath(iceberg_path, delete_file, fs) : delete_file}); - } - - if (mode == "list_files") { - return MakeListFilesExpression(data_file_values, delete_file_values); - } else if (mode == "default") { - return MakeScanExpression(data_file_values, delete_file_values, snapshot_to_scan.schema, allow_moved_paths, metadata_compression_codec, skip_schema_inference); - } else { - throw NotImplementedException("Unknown mode type for ICEBERG_SCAN bind : '" + mode + "'"); - } + FileSystem &fs = FileSystem::GetFileSystem(context); + auto iceberg_path = input.inputs[0].ToString(); + + // Log the input path + std::cout << "Iceberg scan: Input path: " << iceberg_path << std::endl; + + // Parse named parameters + bool allow_moved_paths = false; + bool skip_schema_inference = false; + string mode = "default"; + string metadata_compression_codec = "none"; + string table_version = DEFAULT_VERSION_HINT_FILE; + string version_name_format = DEFAULT_TABLE_VERSION_FORMAT; + + for (auto &kv : input.named_parameters) { + auto loption = StringUtil::Lower(kv.first); + if (loption == "allow_moved_paths") { + allow_moved_paths = BooleanValue::Get(kv.second); + if (StringUtil::EndsWith(iceberg_path, ".json")) { + throw InvalidInputException( + "Enabling allow_moved_paths is not enabled for directly scanning metadata files."); + } + } else if (loption == "mode") { + mode = StringValue::Get(kv.second); + } else if (loption == "metadata_compression_codec") { + metadata_compression_codec = StringValue::Get(kv.second); + } else if (loption == "skip_schema_inference") { + skip_schema_inference = BooleanValue::Get(kv.second); + } else if (loption == "version") { + table_version = StringValue::Get(kv.second); + } else if (loption == "version_name_format") { + version_name_format = StringValue::Get(kv.second); + } + } + auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(iceberg_path, fs, metadata_compression_codec, table_version, version_name_format); + + // Log the metadata path + std::cout << "Iceberg scan: Metadata path: " << iceberg_meta_path << std::endl; + + IcebergSnapshot snapshot_to_scan; + if (input.inputs.size() > 1) { + if (input.inputs[1].type() == LogicalType::UBIGINT) { + snapshot_to_scan = IcebergSnapshot::GetSnapshotById(iceberg_meta_path, fs, input.inputs[1].GetValue(), metadata_compression_codec, skip_schema_inference); + } else if (input.inputs[1].type() == LogicalType::TIMESTAMP) { + snapshot_to_scan = + IcebergSnapshot::GetSnapshotByTimestamp(iceberg_meta_path, fs, input.inputs[1].GetValue(), metadata_compression_codec, skip_schema_inference); + } else { + throw InvalidInputException("Unknown argument type in IcebergScanBindReplace."); + } + } else { + snapshot_to_scan = IcebergSnapshot::GetLatestSnapshot(iceberg_meta_path, fs, metadata_compression_codec, skip_schema_inference); + } + + std::cout << "Iceberg scan: Got Snapshot" << std::endl; + + IcebergTable iceberg_table = IcebergTable::Load(iceberg_path, snapshot_to_scan, fs, allow_moved_paths, metadata_compression_codec); + + // Log some information about the loaded table + std::cout << "Iceberg scan: Loaded table with " << iceberg_table.entries.size() << " entries" << std::endl; + + auto data_entries = iceberg_table.GetEntries(); + auto delete_files = iceberg_table.GetPaths(); + + // Log information about data entries and delete files + std::cout << "Iceberg scan: Found " << data_entries.size() << " data entries" << std::endl; + std::cout << "Iceberg scan: Found " << delete_files.size() << " delete files" << std::endl; + + vector delete_file_values; + for (auto &delete_file : delete_files) { + auto full_path = allow_moved_paths ? IcebergUtils::GetFullPath(iceberg_path, delete_file, fs) : delete_file; + delete_file_values.emplace_back(full_path); + + // Log each delete file path + std::cout << "Iceberg scan: Delete file: " << full_path << std::endl; + } + + // === Extract predicates from input.binder === + vector> extracted_predicates; + if (input.binder) { + std::cout << "Iceberg scan: input.binder: " << input.binder << std::endl; + + // Access the where_clause from the binder + auto statement = input.binder->GetRootStatement(); + if (statement && statement->type == StatementType::SELECT_STATEMENT) { + auto &select_statement = (SelectStatement &)*statement; + if (select_statement.node->type == QueryNodeType::SELECT_NODE) { + auto &select_node = (SelectNode &)*select_statement.node; + if (select_node.where_clause) { + std::cout << "Iceberg scan: select_node.where_clause: " << select_node.where_clause->ToString() << std::endl; + ExtractPredicates(*select_node.where_clause, extracted_predicates); + } + } + } + + // Log the number of extracted predicates + std::cout << "Iceberg scan: Extracted " << extracted_predicates.size() << " predicates" << std::endl; + + // Optionally, you can log details of each extracted predicate + for (size_t i = 0; i < extracted_predicates.size(); ++i) { + std::cout << "Predicate " << i + 1 << ": " << extracted_predicates[i]->ToString() << std::endl; + } + } + + // Log extracted predicates for debugging + std::cout << "Iceberg scan: Extracted Predicates:" << std::endl; + int pred_index = 1; + for (const auto &pred : extracted_predicates) { + std::cout << " Predicate " << pred_index++ << ": " << pred->ToString() << std::endl; + } + + // Create IcebergTableFunctionInfo with extracted predicates + auto iceberg_info = make_uniq(std::move(extracted_predicates)); + input.info = iceberg_info.release(); // Assign raw pointer + + if (mode == "list_files") { + vector data_file_values; + for (const auto &entry : data_entries) { + auto full_path = allow_moved_paths ? IcebergUtils::GetFullPath(iceberg_path, entry.file_path, fs) : entry.file_path; + data_file_values.emplace_back(full_path); + } + return MakeListFilesExpression(data_file_values, delete_file_values); + } else if (mode == "default") { + // Pass the extracted predicates to MakeScanExpression + // Cast input.info to IcebergTableFunctionInfo to access constraints + IcebergTableFunctionInfo *iceberg_info_cast = dynamic_cast(input.info.get()); + if (!iceberg_info_cast) { + throw std::bad_cast(); // Handle the error appropriately + } + return MakeScanExpression(iceberg_path, fs, data_entries, delete_file_values, snapshot_to_scan.schema, allow_moved_paths, + metadata_compression_codec, skip_schema_inference, iceberg_info_cast); + } else { + throw NotImplementedException("Unknown mode type for ICEBERG_SCAN bind : '" + mode + "'"); + } +} + +struct IcebergFunctionData : public FunctionData { + unique_ptr pushdown_predicate; + + IcebergFunctionData() : pushdown_predicate(nullptr) {} + + // Required: Implement the Copy method + unique_ptr Copy() const override { + if (pushdown_predicate) { + return make_uniq(*pushdown_predicate); + } else { + return make_uniq(); + } + } + + // Optional: Implement the Equals method if needed + bool Equals(const FunctionData &other_p) const override { + auto &other = dynamic_cast(other_p); + if (!pushdown_predicate && !other.pushdown_predicate) { + return true; + } + if (pushdown_predicate && other.pushdown_predicate) { + return pushdown_predicate->Equals(*other.pushdown_predicate); + } + return false; + } + + // Copy constructor for the Copy method + IcebergFunctionData(const ParsedExpression &expr) { + pushdown_predicate = expr.Copy(); + } + + IcebergFunctionData(const unique_ptr &expr) { + if (expr) { + pushdown_predicate = expr->Copy(); + } + } +}; + + +// === Implement the pushdown_complex_filter callback === +static void IcebergPushdownFilter(ClientContext &context, LogicalGet &get, FunctionData *bind_data, vector> &filters) { + // Combine all filters into a single conjunction (AND) + unique_ptr combined_filter; + if (filters.empty()) { + combined_filter = nullptr; + } else if (filters.size() == 1) { + auto iceberg_data = dynamic_cast(bind_data); + if (!iceberg_data) { + throw InternalException("Invalid bind_data in pushdown_complex_filter"); + } + combined_filter = std::move(iceberg_data->pushdown_predicate); + std::cout << "Iceberg scan: pushdown_complex_filter: 1 filter" << std::endl; + } else { + auto conj = make_uniq(ExpressionType::CONJUNCTION_AND); + for (auto &filter : filters) { + std::cout << "Iceberg scan: pushdown_complex_filter: adding filter" << std::endl; + // conj->children.emplace_back(std::move(filter)); + } + combined_filter = std::move(conj); + std::cout << "Iceberg scan: pushdown_complex_filter: added " << filters.size() << " filters" << std::endl; + } + + // Store the combined filter in FunctionData + auto iceberg_data = dynamic_cast(bind_data); + if (!iceberg_data) { + throw InternalException("Invalid bind_data in pushdown_complex_filter"); + } + + iceberg_data->pushdown_predicate = std::move(combined_filter); + + // Optionally, log the received predicate + if (iceberg_data->pushdown_predicate) { + std::cout << "Iceberg scan: Received pushdown predicate: " << iceberg_data->pushdown_predicate->ToString() << std::endl; + } else { + std::cout << "Iceberg scan: No pushdown predicate received." << std::endl; + } + + // By returning nullptr, we indicate that the predicate has been handled + // If you wish DuckDB to handle additional transformations, modify here } +// === Register the TableFunction with Filter Pushdown === TableFunctionSet IcebergFunctions::GetIcebergScanFunction() { - TableFunctionSet function_set("iceberg_scan"); - - auto fun = TableFunction({LogicalType::VARCHAR}, nullptr, nullptr, IcebergScanGlobalTableFunctionState::Init); - fun.bind_replace = IcebergScanBindReplace; - fun.named_parameters["skip_schema_inference"] = LogicalType::BOOLEAN; - fun.named_parameters["allow_moved_paths"] = LogicalType::BOOLEAN; - fun.named_parameters["mode"] = LogicalType::VARCHAR; - fun.named_parameters["metadata_compression_codec"] = LogicalType::VARCHAR; - fun.named_parameters["version"] = LogicalType::VARCHAR; - fun.named_parameters["version_name_format"] = LogicalType::VARCHAR; - function_set.AddFunction(fun); - - fun = TableFunction({LogicalType::VARCHAR, LogicalType::UBIGINT}, nullptr, nullptr, - IcebergScanGlobalTableFunctionState::Init); - fun.bind_replace = IcebergScanBindReplace; - fun.named_parameters["skip_schema_inference"] = LogicalType::BOOLEAN; - fun.named_parameters["allow_moved_paths"] = LogicalType::BOOLEAN; - fun.named_parameters["mode"] = LogicalType::VARCHAR; - fun.named_parameters["metadata_compression_codec"] = LogicalType::VARCHAR; - fun.named_parameters["version"] = LogicalType::VARCHAR; - fun.named_parameters["version_name_format"] = LogicalType::VARCHAR; - function_set.AddFunction(fun); - - fun = TableFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, nullptr, nullptr, - IcebergScanGlobalTableFunctionState::Init); - fun.bind_replace = IcebergScanBindReplace; - fun.named_parameters["skip_schema_inference"] = LogicalType::BOOLEAN; - fun.named_parameters["allow_moved_paths"] = LogicalType::BOOLEAN; - fun.named_parameters["mode"] = LogicalType::VARCHAR; - fun.named_parameters["metadata_compression_codec"] = LogicalType::VARCHAR; - fun.named_parameters["version"] = LogicalType::VARCHAR; - fun.named_parameters["version_name_format"] = LogicalType::VARCHAR; - function_set.AddFunction(fun); - - return function_set; + TableFunctionSet function_set("iceberg_scan"); + + // Default mode: list all files and apply predicate pushdown + auto fun = TableFunction({LogicalType::VARCHAR}, nullptr, nullptr, IcebergScanGlobalTableFunctionState::Init); + fun.bind_replace = IcebergScanBindReplace; + + // Enable filter pushdown + fun.filter_pushdown = true; + + // Implement the pushdown_complex_filter callback + fun.pushdown_complex_filter = IcebergPushdownFilter; + + // Register named parameters + fun.named_parameters["skip_schema_inference"] = LogicalType::BOOLEAN; + fun.named_parameters["allow_moved_paths"] = LogicalType::BOOLEAN; + fun.named_parameters["mode"] = LogicalType::VARCHAR; + fun.named_parameters["metadata_compression_codec"] = LogicalType::VARCHAR; + fun.named_parameters["version"] = LogicalType::VARCHAR; + fun.named_parameters["version_name_format"] = LogicalType::VARCHAR; + + function_set.AddFunction(fun); + + // Register additional modes as needed (e.g., with UBIGINT, TIMESTAMP) + // Example for UBIGINT mode + fun = TableFunction({LogicalType::VARCHAR, LogicalType::UBIGINT}, nullptr, nullptr, + IcebergScanGlobalTableFunctionState::Init); + fun.bind_replace = IcebergScanBindReplace; + fun.filter_pushdown = true; + fun.pushdown_complex_filter = IcebergPushdownFilter; + fun.named_parameters["skip_schema_inference"] = LogicalType::BOOLEAN; + fun.named_parameters["allow_moved_paths"] = LogicalType::BOOLEAN; + fun.named_parameters["mode"] = LogicalType::VARCHAR; + fun.named_parameters["metadata_compression_codec"] = LogicalType::VARCHAR; + fun.named_parameters["version"] = LogicalType::VARCHAR; + fun.named_parameters["version_name_format"] = LogicalType::VARCHAR; + function_set.AddFunction(fun); + + // Example for TIMESTAMP mode + fun = TableFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, nullptr, nullptr, + IcebergScanGlobalTableFunctionState::Init); + fun.bind_replace = IcebergScanBindReplace; + fun.filter_pushdown = true; + fun.pushdown_complex_filter = IcebergPushdownFilter; + fun.named_parameters["skip_schema_inference"] = LogicalType::BOOLEAN; + fun.named_parameters["allow_moved_paths"] = LogicalType::BOOLEAN; + fun.named_parameters["mode"] = LogicalType::VARCHAR; + fun.named_parameters["metadata_compression_codec"] = LogicalType::VARCHAR; + fun.named_parameters["version"] = LogicalType::VARCHAR; + fun.named_parameters["version_name_format"] = LogicalType::VARCHAR; + function_set.AddFunction(fun); + + return function_set; } } // namespace duckdb diff --git a/src/iceberg_functions/iceberg_utils.cpp b/src/iceberg_functions/iceberg_utils.cpp new file mode 100644 index 0000000..eb41c6f --- /dev/null +++ b/src/iceberg_functions/iceberg_utils.cpp @@ -0,0 +1,6 @@ +string IcebergUtils::GetFullPath(const string &base_path, const string &relative_path, FileSystem &fs) { + if (fs.IsAbsolutePath(relative_path)) { + return relative_path; + } + return fs.JoinPath(base_path, relative_path); +} \ No newline at end of file diff --git a/src/include/avro_codegen/iceberg_manifest_entry_partial.hpp b/src/include/avro_codegen/iceberg_manifest_entry_partial.hpp index e38d4b8..7a69715 100644 --- a/src/include/avro_codegen/iceberg_manifest_entry_partial.hpp +++ b/src/include/avro_codegen/iceberg_manifest_entry_partial.hpp @@ -1,124 +1,388 @@ /** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* https://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - - -#ifndef CPX2_HH_2561633724__H_ -#define CPX2_HH_2561633724__H_ + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This code was generated by avrogencpp 1.13.0-SNAPSHOT. Do not edit.*/ + +#ifndef MANIFEST_ENTRY_HH_2043678367_H +#define MANIFEST_ENTRY_HH_2043678367_H #include -#include "boost/any.hpp" +#include +#include #include "avro/Specific.hh" #include "avro/Encoder.hh" #include "avro/Decoder.hh" -namespace c { -struct data_file { - int32_t content; - std::string file_path; - std::string file_format; - int64_t record_count; - data_file() : - content(int32_t()), - file_path(std::string()), - file_format(std::string()), - record_count(int64_t()) - { } +struct k126_v127 { + int32_t key; + std::vector value; + k126_v127() : + key(int32_t()), + value(std::vector()) + { } +}; + +struct manifest_entry_json_Union__0__ { +private: + size_t idx_; + std::any value_; +public: + /** enum representing union branches as returned by the idx() function */ + enum class Branch: size_t { + null = 0, + array = 1, + }; + size_t idx() const { return idx_; } + Branch branch() const { return static_cast(idx_); } + bool is_null() const { + return (idx_ == 0); + } + void set_null() { + idx_ = 0; + value_ = std::any(); + } + const std::vector& get_array() const; + std::vector& get_array(); + void set_array(const std::vector& v); + void set_array(std::vector&& v); + manifest_entry_json_Union__0__(); +}; + +struct k129_v130 { + int32_t key; + std::vector value; + k129_v130() : + key(int32_t()), + value(std::vector()) + { } +}; + +struct manifest_entry_json_Union__1__ { +private: + size_t idx_; + std::any value_; +public: + /** enum representing union branches as returned by the idx() function */ + enum class Branch: size_t { + null = 0, + array = 1, + }; + size_t idx() const { return idx_; } + Branch branch() const { return static_cast(idx_); } + bool is_null() const { + return (idx_ == 0); + } + void set_null() { + idx_ = 0; + value_ = std::any(); + } + const std::vector& get_array() const; + std::vector& get_array(); + void set_array(const std::vector& v); + void set_array(std::vector&& v); + manifest_entry_json_Union__1__(); +}; + +struct r2 { + typedef manifest_entry_json_Union__0__ lower_bounds_t; + typedef manifest_entry_json_Union__1__ upper_bounds_t; + int32_t content; + std::string file_path; + std::string file_format; + int64_t record_count; + lower_bounds_t lower_bounds; + upper_bounds_t upper_bounds; + r2() : + content(int32_t()), + file_path(std::string()), + file_format(std::string()), + record_count(int64_t()), + lower_bounds(lower_bounds_t()), + upper_bounds(upper_bounds_t()) + { } }; struct manifest_entry { - int32_t status; - data_file data_file_; // NOTE: as generated, this is called data_file, but this causes issues with GCC - manifest_entry() : - status(int32_t()), - data_file_() - { } + int32_t status; + r2 data_file; + manifest_entry() : + status(int32_t()), + data_file(r2()) + { } }; +inline +const std::vector& manifest_entry_json_Union__0__::get_array() const { + if (idx_ != 1) { + throw avro::Exception("Invalid type for union manifest_entry_json_Union__0__"); + } + return *std::any_cast >(&value_); +} + +inline +std::vector& manifest_entry_json_Union__0__::get_array() { + if (idx_ != 1) { + throw avro::Exception("Invalid type for union manifest_entry_json_Union__0__"); + } + return *std::any_cast >(&value_); +} + +inline +void manifest_entry_json_Union__0__::set_array(const std::vector& v) { + idx_ = 1; + value_ = v; +} + +inline +void manifest_entry_json_Union__0__::set_array(std::vector&& v) { + idx_ = 1; + value_ = std::move(v); } + +inline +const std::vector& manifest_entry_json_Union__1__::get_array() const { + if (idx_ != 1) { + throw avro::Exception("Invalid type for union manifest_entry_json_Union__1__"); + } + return *std::any_cast >(&value_); +} + +inline +std::vector& manifest_entry_json_Union__1__::get_array() { + if (idx_ != 1) { + throw avro::Exception("Invalid type for union manifest_entry_json_Union__1__"); + } + return *std::any_cast >(&value_); +} + +inline +void manifest_entry_json_Union__1__::set_array(const std::vector& v) { + idx_ = 1; + value_ = v; +} + +inline +void manifest_entry_json_Union__1__::set_array(std::vector&& v) { + idx_ = 1; + value_ = std::move(v); +} + +inline manifest_entry_json_Union__0__::manifest_entry_json_Union__0__() : idx_(0) { } +inline manifest_entry_json_Union__1__::manifest_entry_json_Union__1__() : idx_(0) { } namespace avro { -template<> struct codec_traits { - static void encode(Encoder& e, const c::data_file& v) { - avro::encode(e, v.content); - avro::encode(e, v.file_path); - avro::encode(e, v.file_format); - avro::encode(e, v.record_count); - } - static void decode(Decoder& d, c::data_file& v) { - if (avro::ResolvingDecoder *rd = - dynamic_cast(&d)) { - const std::vector fo = rd->fieldOrder(); - for (std::vector::const_iterator it = fo.begin(); - it != fo.end(); ++it) { - switch (*it) { - case 0: - avro::decode(d, v.content); - break; - case 1: - avro::decode(d, v.file_path); - break; - case 2: - avro::decode(d, v.file_format); - break; - case 3: - avro::decode(d, v.record_count); - break; - default: - break; - } - } - } else { - avro::decode(d, v.content); - avro::decode(d, v.file_path); - avro::decode(d, v.file_format); - avro::decode(d, v.record_count); - } - } +template<> struct codec_traits { + static void encode(Encoder& e, const k126_v127& v) { + avro::encode(e, v.key); + avro::encode(e, v.value); + } + static void decode(Decoder& d, k126_v127& v) { + if (avro::ResolvingDecoder *rd = + dynamic_cast(&d)) { + const std::vector fo = rd->fieldOrder(); + for (std::vector::const_iterator it = fo.begin(); + it != fo.end(); ++it) { + switch (*it) { + case 0: + avro::decode(d, v.key); + break; + case 1: + avro::decode(d, v.value); + break; + default: + break; + } + } + } else { + avro::decode(d, v.key); + avro::decode(d, v.value); + } + } +}; + +template<> struct codec_traits { + static void encode(Encoder& e, manifest_entry_json_Union__0__ v) { + e.encodeUnionIndex(v.idx()); + switch (v.idx()) { + case 0: + e.encodeNull(); + break; + case 1: + avro::encode(e, v.get_array()); + break; + } + } + static void decode(Decoder& d, manifest_entry_json_Union__0__& v) { + size_t n = d.decodeUnionIndex(); + if (n >= 2) { throw avro::Exception("Union index too big"); } + switch (n) { + case 0: + d.decodeNull(); + v.set_null(); + break; + case 1: + { + std::vector vv; + avro::decode(d, vv); + v.set_array(std::move(vv)); + } + break; + } + } +}; + +template<> struct codec_traits { + static void encode(Encoder& e, const k129_v130& v) { + avro::encode(e, v.key); + avro::encode(e, v.value); + } + static void decode(Decoder& d, k129_v130& v) { + if (avro::ResolvingDecoder *rd = + dynamic_cast(&d)) { + const std::vector fo = rd->fieldOrder(); + for (std::vector::const_iterator it = fo.begin(); + it != fo.end(); ++it) { + switch (*it) { + case 0: + avro::decode(d, v.key); + break; + case 1: + avro::decode(d, v.value); + break; + default: + break; + } + } + } else { + avro::decode(d, v.key); + avro::decode(d, v.value); + } + } +}; + +template<> struct codec_traits { + static void encode(Encoder& e, manifest_entry_json_Union__1__ v) { + e.encodeUnionIndex(v.idx()); + switch (v.idx()) { + case 0: + e.encodeNull(); + break; + case 1: + avro::encode(e, v.get_array()); + break; + } + } + static void decode(Decoder& d, manifest_entry_json_Union__1__& v) { + size_t n = d.decodeUnionIndex(); + if (n >= 2) { throw avro::Exception("Union index too big"); } + switch (n) { + case 0: + d.decodeNull(); + v.set_null(); + break; + case 1: + { + std::vector vv; + avro::decode(d, vv); + v.set_array(std::move(vv)); + } + break; + } + } +}; + +template<> struct codec_traits { + static void encode(Encoder& e, const r2& v) { + avro::encode(e, v.content); + avro::encode(e, v.file_path); + avro::encode(e, v.file_format); + avro::encode(e, v.record_count); + avro::encode(e, v.lower_bounds); + avro::encode(e, v.upper_bounds); + } + static void decode(Decoder& d, r2& v) { + if (avro::ResolvingDecoder *rd = + dynamic_cast(&d)) { + const std::vector fo = rd->fieldOrder(); + for (std::vector::const_iterator it = fo.begin(); + it != fo.end(); ++it) { + switch (*it) { + case 0: + avro::decode(d, v.content); + break; + case 1: + avro::decode(d, v.file_path); + break; + case 2: + avro::decode(d, v.file_format); + break; + case 3: + avro::decode(d, v.record_count); + break; + case 4: + avro::decode(d, v.lower_bounds); + break; + case 5: + avro::decode(d, v.upper_bounds); + break; + default: + break; + } + } + } else { + avro::decode(d, v.content); + avro::decode(d, v.file_path); + avro::decode(d, v.file_format); + avro::decode(d, v.record_count); + avro::decode(d, v.lower_bounds); + avro::decode(d, v.upper_bounds); + } + } }; -template<> struct codec_traits { - static void encode(Encoder& e, const c::manifest_entry& v) { - avro::encode(e, v.status); - avro::encode(e, v.data_file_); - } - static void decode(Decoder& d, c::manifest_entry& v) { - if (avro::ResolvingDecoder *rd = - dynamic_cast(&d)) { - const std::vector fo = rd->fieldOrder(); - for (std::vector::const_iterator it = fo.begin(); - it != fo.end(); ++it) { - switch (*it) { - case 0: - avro::decode(d, v.status); - break; - case 1: - avro::decode(d, v.data_file_); - break; - default: - break; - } - } - } else { - avro::decode(d, v.status); - avro::decode(d, v.data_file_); - } - } +template<> struct codec_traits { + static void encode(Encoder& e, const manifest_entry& v) { + avro::encode(e, v.status); + avro::encode(e, v.data_file); + } + static void decode(Decoder& d, manifest_entry& v) { + if (avro::ResolvingDecoder *rd = + dynamic_cast(&d)) { + const std::vector fo = rd->fieldOrder(); + for (std::vector::const_iterator it = fo.begin(); + it != fo.end(); ++it) { + switch (*it) { + case 0: + avro::decode(d, v.status); + break; + case 1: + avro::decode(d, v.data_file); + break; + default: + break; + } + } + } else { + avro::decode(d, v.status); + avro::decode(d, v.data_file); + } + } }; } -#endif +#endif \ No newline at end of file diff --git a/src/include/iceberg_metadata.hpp b/src/include/iceberg_metadata.hpp index d7d4478..cbfae6a 100644 --- a/src/include/iceberg_metadata.hpp +++ b/src/include/iceberg_metadata.hpp @@ -113,6 +113,24 @@ struct IcebergTable { return ret; } + //! Returns all IcebergManifestEntry objects to be scanned for the given IcebergManifestContentType + template + vector GetEntries() { + vector ret; + for (auto &entry : entries) { + if (entry.manifest.content != TYPE) { + continue; + } + for (auto &manifest_entry : entry.manifest_entries) { + if (manifest_entry.status == IcebergManifestEntryStatusType::DELETED) { + continue; + } + ret.push_back(manifest_entry); + } + } + return ret; + } + void Print() { Printer::Print("Iceberg table (" + path + ")"); for (auto &entry : entries) { diff --git a/src/include/iceberg_types.hpp b/src/include/iceberg_types.hpp index 16ec1ca..daa92de 100644 --- a/src/include/iceberg_types.hpp +++ b/src/include/iceberg_types.hpp @@ -8,6 +8,7 @@ #pragma once +#include // Add this line for std::setfill and std::setw #include "avro_codegen/iceberg_manifest_entry_partial.hpp" #include "avro_codegen/iceberg_manifest_entry_partial_v1.hpp" #include "avro_codegen/iceberg_manifest_file_partial.hpp" @@ -119,23 +120,97 @@ struct IcebergManifest { //! The schema containing the fields from the manifest entry. //! this schema should match the generated cpp header from src/include/avro_codegen/iceberg_manifest_entry_partial.hpp -static string MANIFEST_ENTRY_SCHEMA = "{\n" - " \"type\": \"record\",\n" - " \"name\": \"manifest_entry\",\n" - " \"fields\" : [\n" - " {\"name\": \"status\", \"type\" : \"int\"},\n" - " {\"name\": \"data_file\", \"type\": {\n" - " \"type\": \"record\",\n" - " \"name\": \"r2\",\n" - " \"fields\" : [\n" - " {\"name\": \"content\", \"type\": \"int\"},\n" - " {\"name\": \"file_path\", \"type\": \"string\"},\n" - " {\"name\": \"file_format\", \"type\": \"string\"},\n" - " {\"name\": \"record_count\", \"type\" : \"long\"}\n" - " ]}\n" - " }\n" - " ]\n" - " }"; +// static string MANIFEST_ENTRY_SCHEMA = "{\n" +// " \"type\": \"record\",\n" +// " \"name\": \"manifest_entry\",\n" +// " \"fields\" : [\n" +// " {\"name\": \"status\", \"type\" : \"int\"},\n" +// " {\"name\": \"data_file\", \"type\": {\n" +// " \"type\": \"record\",\n" +// " \"name\": \"r2\",\n" +// " \"fields\" : [\n" +// " {\"name\": \"content\", \"type\": \"int\"},\n" +// " {\"name\": \"file_path\", \"type\": \"string\"},\n" +// " {\"name\": \"file_format\", \"type\": \"string\"},\n" +// " {\"name\": \"record_count\", \"type\" : \"long\"},\n" +// " {\"name\": \"lower_bounds\", \"type\": [\"null\", {\n" +// " \"type\": \"array\",\n" +// " \"items\": {\n" +// " \"type\": \"record\",\n" +// " \"name\": \"k126_v127\",\n" +// " \"fields\": [\n" +// " {\"name\": \"key\", \"type\": \"int\"},\n" +// " {\"name\": \"value\", \"type\": \"bytes\"}\n" +// " ]\n" +// " }\n" +// " }], \"default\": null},\n" +// " {\"name\": \"upper_bounds\", \"type\": [\"null\", {\n" +// " \"type\": \"array\",\n" +// " \"items\": {\n" +// " \"type\": \"record\",\n" +// " \"name\": \"k129_v130\",\n" +// " \"fields\": [\n" +// " {\"name\": \"key\", \"type\": \"int\"},\n" +// " {\"name\": \"value\", \"type\": \"bytes\"}\n" +// " ]\n" +// " }\n" +// " }], \"default\": null}\n" +// " ]}\n" +// " }\n" +// " ]\n" +// " }"; + +static string MANIFEST_ENTRY_SCHEMA = R"( +{ + "type": "record", + "name": "manifest_entry", + "fields": [ + { "name": "status", "type": "int", "field-id": 0 }, + { "name": "data_file", "type": { + "type": "record", + "name": "r2", + "fields": [ + { "name": "content", "type": "int", "field-id": 134 }, + { "name": "file_path", "type": "string", "field-id": 100 }, + { "name": "file_format", "type": "string", "field-id": 101 }, + { "name": "record_count", "type": "long", "field-id": 103 }, + { "name": "lower_bounds", "type": [ + "null", + { + "type": "array", + "items": { + "type": "record", + "name": "k126_v127", + "fields": [ + { "name": "key", "type": "int", "field-id": 126 }, + { "name": "value", "type": "bytes", "field-id": 127 } + ] + } + } + ], + "default": null, + "field-id": 125 + }, + { "name": "upper_bounds", "type": [ + "null", + { + "type": "array", + "items": { + "type": "record", + "name": "k129_v130", + "fields": [ + { "name": "key", "type": "int", "field-id": 129 }, + { "name": "value", "type": "bytes", "field-id": 130 } + ] + } + } + ], "default": null, "field-id": 128 } + ] + }, "field-id": 2 } + ] +} +)"; + static string MANIFEST_ENTRY_SCHEMA_V1 = "{\n" " \"type\": \"record\",\n" @@ -157,45 +232,79 @@ static string MANIFEST_ENTRY_SCHEMA_V1 = "{\n" //! An entry in a manifest file struct IcebergManifestEntry { - explicit IcebergManifestEntry(const c::manifest_entry &schema) { - status = (IcebergManifestEntryStatusType)schema.status; - content = (IcebergManifestEntryContentType)schema.data_file_.content; - file_path = schema.data_file_.file_path; - file_format = schema.data_file_.file_format; - record_count = schema.data_file_.record_count; - } + explicit IcebergManifestEntry(const manifest_entry &schema) { + status = (IcebergManifestEntryStatusType)schema.status; + content = (IcebergManifestEntryContentType)schema.data_file.content; + const auto &data_file = schema.data_file; + file_path = data_file.file_path; + file_format = data_file.file_format; + record_count = data_file.record_count; + lower_bounds.clear(); + upper_bounds.clear(); - explicit IcebergManifestEntry(const c::manifest_entry_v1 &schema) { - status = (IcebergManifestEntryStatusType)schema.status; - content = IcebergManifestEntryContentType::DATA; - file_path = schema.data_file_.file_path; - file_format = schema.data_file_.file_format; - record_count = schema.data_file_.record_count; - } + // Handle lower_bounds + if (data_file.lower_bounds.idx() == static_cast(manifest_entry_json_Union__0__::Branch::array)) { + const auto &bounds_array = data_file.lower_bounds.get_array(); + for (const auto &lb : bounds_array) { + lower_bounds[std::to_string(lb.key)] = lb.value; + } + } else { + std::cout << "Lower bounds is null" << std::endl; + } - IcebergManifestEntryStatusType status; + // Handle upper_bounds + if (data_file.upper_bounds.idx() == static_cast(manifest_entry_json_Union__1__::Branch::array)) { + const auto &bounds_array = data_file.upper_bounds.get_array(); + for (const auto &ub : bounds_array) { + upper_bounds[std::to_string(ub.key)] = ub.value; + } + } else { + std::cout << "Upper bounds is null" << std::endl; + } + } - //! ----- Data File Struct ------ - IcebergManifestEntryContentType content; - string file_path; - string file_format; - int64_t record_count; + explicit IcebergManifestEntry(const c::manifest_entry_v1 &schema) { + status = (IcebergManifestEntryStatusType)schema.status; + content = IcebergManifestEntryContentType::DATA; + file_path = schema.data_file_.file_path; + file_format = schema.data_file_.file_format; + record_count = schema.data_file_.record_count; - void Print() { - Printer::Print(" -> ManifestEntry = { type: " + IcebergManifestEntryStatusTypeToString(status) + - ", content: " + IcebergManifestEntryContentTypeToString(content) + ", file: " + file_path + - ", record_count: " + to_string(record_count) + "}"); - } + // Initialize bounds as empty maps + lower_bounds.clear(); + upper_bounds.clear(); + } - static vector Types() { - return { - LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::BIGINT, - }; - } - static vector Names() { - return {"status", "content", "file_path", "file_format", "record_count"}; - } + + + IcebergManifestEntryStatusType status; + + //! ----- Data File Struct ------ + IcebergManifestEntryContentType content; + string file_path; + string file_format; + int64_t record_count; + + // Add new members for bounds + std::unordered_map> lower_bounds; + std::unordered_map> upper_bounds; + + void Print() { + Printer::Print(" -> ManifestEntry = { type: " + IcebergManifestEntryStatusTypeToString(status) + + ", content: " + IcebergManifestEntryContentTypeToString(content) + ", file: " + file_path + + ", record_count: " + to_string(record_count) + "}"); + } + + static vector Types() { + return { + LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::BIGINT, + }; + } + + static vector Names() { + return {"status", "content", "file_path", "file_format", "record_count"}; + } }; struct IcebergTableEntry {