diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 3a530b9..eefa263 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,20 +14,21 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.2 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.3 with: extension_name: iceberg - duckdb_version: v1.1.2 - ci_tools_version: v1.1.2 + duckdb_version: v1.1.3 + ci_tools_version: v1.1.3 exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' duckdb-stable-deploy: name: Deploy extension binaries needs: duckdb-stable-build - uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.1.2 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.1.3 secrets: inherit with: extension_name: iceberg - duckdb_version: v1.1.2 + duckdb_version: v1.1.3 + ci_tools_version: v1.1.3 exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} diff --git a/.gitmodules b/.gitmodules index 3efaa06..dd490ea 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "duckdb"] path = duckdb url = https://github.com/duckdb/duckdb - branch = master + branch = main [submodule "extension-ci-tools"] path = extension-ci-tools url = https://github.com/duckdb/extension-ci-tools.git diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc b/data/iceberg/lineitem_iceberg_gz_no_hint/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc new file mode 100644 index 0000000..f88d18d Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet b/data/iceberg/lineitem_iceberg_gz_no_hint/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet new file mode 100644 index 0000000..63b23d3 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc new file mode 100644 index 0000000..c2fab3e Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc new file mode 100644 index 0000000..a27fd90 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v1.gz.metadata.json.crc b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v1.gz.metadata.json.crc new file mode 100644 index 0000000..48de20c Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v1.gz.metadata.json.crc differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v2.gz.metadata.json.crc b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v2.gz.metadata.json.crc new file mode 100644 index 0000000..915a750 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v2.gz.metadata.json.crc differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.version-hint.text.crc b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.version-hint.text.crc new file mode 100644 index 0000000..2003120 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.version-hint.text.crc differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro new file mode 100644 index 0000000..8425c93 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro new file mode 100644 index 0000000..9e26d01 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v1.gz.metadata.json b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v1.gz.metadata.json new file mode 100644 index 0000000..efc8150 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v1.gz.metadata.json differ diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v2.gz.metadata.json b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v2.gz.metadata.json new file mode 100644 index 0000000..a48a7c9 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v2.gz.metadata.json differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/README.md b/data/iceberg/lineitem_iceberg_no_hint/README.md new file mode 100644 index 0000000..d564b5f --- /dev/null +++ b/data/iceberg/lineitem_iceberg_no_hint/README.md @@ -0,0 +1,20 @@ +# README +this iceberg table is generated by using DuckDB (v0.7.0) to generated TPC-H lineitem +SF0.01 then storing that to a parquet file. + +Then pyspark (3.3.1) was used with the iceberg extension from https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.0.0/iceberg-spark-runtime-3.3_2.12-1.0.0.jar +to write the iceberg table. + +finally, using pyspark, a delete query was performed on this iceberg table: + +``` +DELETE FROM iceberg_catalog.lineitem_iceberg where l_extendedprice < 10000 +``` + +The result for Q06 of TPC-H on this table according to pyspark is now: +``` +[Row(revenue=Decimal('1077536.9101'))] +``` + +Note: it appears that there are no deletes present in this iceberg table, the whole thing was rewritten. +this is likely due to the fact that the table is so small? \ No newline at end of file diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc b/data/iceberg/lineitem_iceberg_no_hint/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc new file mode 100644 index 0000000..9c4bdc8 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc b/data/iceberg/lineitem_iceberg_no_hint/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc new file mode 100644 index 0000000..d00535f Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet b/data/iceberg/lineitem_iceberg_no_hint/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet new file mode 100644 index 0000000..0042379 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet b/data/iceberg/lineitem_iceberg_no_hint/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet new file mode 100644 index 0000000..d254ec4 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc new file mode 100644 index 0000000..bf060e8 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc new file mode 100644 index 0000000..671a7d1 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc new file mode 100644 index 0000000..e9dc69a Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc new file mode 100644 index 0000000..ed617a4 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc new file mode 100644 index 0000000..a74ad2f Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.v1.metadata.json.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.v1.metadata.json.crc new file mode 100644 index 0000000..af5bbe4 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.v1.metadata.json.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.v2.metadata.json.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.v2.metadata.json.crc new file mode 100644 index 0000000..856dc24 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.v2.metadata.json.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.version-hint.text.crc b/data/iceberg/lineitem_iceberg_no_hint/metadata/.version-hint.text.crc new file mode 100644 index 0000000..2003120 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/.version-hint.text.crc differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro b/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro new file mode 100644 index 0000000..5cd8724 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro b/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro new file mode 100644 index 0000000..24aabbb Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro b/data/iceberg/lineitem_iceberg_no_hint/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro new file mode 100644 index 0000000..0322f43 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro b/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro new file mode 100644 index 0000000..42ac35b Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro b/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro new file mode 100644 index 0000000..bfc7489 Binary files /dev/null and b/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro differ diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json b/data/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json new file mode 100644 index 0000000..0439057 --- /dev/null +++ b/data/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json @@ -0,0 +1,142 @@ +{ + "format-version" : 2, + "table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b", + "location" : "./lineitem_iceberg", + "last-sequence-number" : 1, + "last-updated-ms" : 1676473674504, + "last-column-id" : 16, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "l_orderkey", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "l_partkey", + "required" : false, + "type" : "int" + }, { + "id" : 3, + "name" : "l_suppkey", + "required" : false, + "type" : "int" + }, { + "id" : 4, + "name" : "l_linenumber", + "required" : false, + "type" : "int" + }, { + "id" : 5, + "name" : "l_quantity", + "required" : false, + "type" : "int" + }, { + "id" : 6, + "name" : "l_extendedprice", + "required" : false, + "type" : "decimal(15, 2)" + }, { + "id" : 7, + "name" : "l_discount", + "required" : false, + "type" : "decimal(15, 2)" + }, { + "id" : 8, + "name" : "l_tax", + "required" : false, + "type" : "decimal(15, 2)" + }, { + "id" : 9, + "name" : "l_returnflag", + "required" : false, + "type" : "string" + }, { + "id" : 10, + "name" : "l_linestatus", + "required" : false, + "type" : "string" + }, { + "id" : 11, + "name" : "l_shipdate", + "required" : false, + "type" : "date" + }, { + "id" : 12, + "name" : "l_commitdate", + "required" : false, + "type" : "date" + }, { + "id" : 13, + "name" : "l_receiptdate", + "required" : false, + "type" : "date" + }, { + "id" : 14, + "name" : "l_shipinstruct", + "required" : false, + "type" : "string" + }, { + "id" : 15, + "name" : "l_shipmode", + "required" : false, + "type" : "string" + }, { + "id" : 16, + "name" : "l_comment", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "owner" : "root", + "write.update.mode" : "merge-on-read" + }, + "current-snapshot-id" : 3776207205136740581, + "refs" : { + "main" : { + "snapshot-id" : 3776207205136740581, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 3776207205136740581, + "timestamp-ms" : 1676473674504, + "summary" : { + "operation" : "append", + "spark.app.id" : "local-1676472783435", + "added-data-files" : "1", + "added-records" : "60175", + "added-files-size" : "1390176", + "changed-partition-count" : "1", + "total-records" : "60175", + "total-files-size" : "1390176", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1676473674504, + "snapshot-id" : 3776207205136740581 + } ], + "metadata-log" : [ ] +} \ No newline at end of file diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json b/data/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json new file mode 100644 index 0000000..ffd0d60 --- /dev/null +++ b/data/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json @@ -0,0 +1,172 @@ +{ + "format-version" : 2, + "table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b", + "location" : "./lineitem_iceberg", + "last-sequence-number" : 2, + "last-updated-ms" : 1676473694730, + "last-column-id" : 16, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "l_orderkey", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "l_partkey", + "required" : false, + "type" : "int" + }, { + "id" : 3, + "name" : "l_suppkey", + "required" : false, + "type" : "int" + }, { + "id" : 4, + "name" : "l_linenumber", + "required" : false, + "type" : "int" + }, { + "id" : 5, + "name" : "l_quantity", + "required" : false, + "type" : "int" + }, { + "id" : 6, + "name" : "l_extendedprice", + "required" : false, + "type" : "decimal(15, 2)" + }, { + "id" : 7, + "name" : "l_discount", + "required" : false, + "type" : "decimal(15, 2)" + }, { + "id" : 8, + "name" : "l_tax", + "required" : false, + "type" : "decimal(15, 2)" + }, { + "id" : 9, + "name" : "l_returnflag", + "required" : false, + "type" : "string" + }, { + "id" : 10, + "name" : "l_linestatus", + "required" : false, + "type" : "string" + }, { + "id" : 11, + "name" : "l_shipdate", + "required" : false, + "type" : "date" + }, { + "id" : 12, + "name" : "l_commitdate", + "required" : false, + "type" : "date" + }, { + "id" : 13, + "name" : "l_receiptdate", + "required" : false, + "type" : "date" + }, { + "id" : 14, + "name" : "l_shipinstruct", + "required" : false, + "type" : "string" + }, { + "id" : 15, + "name" : "l_shipmode", + "required" : false, + "type" : "string" + }, { + "id" : 16, + "name" : "l_comment", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "owner" : "root", + "write.update.mode" : "merge-on-read" + }, + "current-snapshot-id" : 7635660646343998149, + "refs" : { + "main" : { + "snapshot-id" : 7635660646343998149, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 3776207205136740581, + "timestamp-ms" : 1676473674504, + "summary" : { + "operation" : "append", + "spark.app.id" : "local-1676472783435", + "added-data-files" : "1", + "added-records" : "60175", + "added-files-size" : "1390176", + "changed-partition-count" : "1", + "total-records" : "60175", + "total-files-size" : "1390176", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 7635660646343998149, + "parent-snapshot-id" : 3776207205136740581, + "timestamp-ms" : 1676473694730, + "summary" : { + "operation" : "overwrite", + "spark.app.id" : "local-1676472783435", + "added-data-files" : "1", + "deleted-data-files" : "1", + "added-records" : "51793", + "deleted-records" : "60175", + "added-files-size" : "1208539", + "removed-files-size" : "1390176", + "changed-partition-count" : "1", + "total-records" : "51793", + "total-files-size" : "1208539", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1676473674504, + "snapshot-id" : 3776207205136740581 + }, { + "timestamp-ms" : 1676473694730, + "snapshot-id" : 7635660646343998149 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1676473674504, + "metadata-file" : "lineitem_iceberg/metadata/v1.metadata.json" + } ] +} \ No newline at end of file diff --git a/duckdb b/duckdb index f680b7d..1986445 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit f680b7d08f56183391b581077d4baf589e1cc8bd +Subproject commit 19864453f7d0ed095256d848b46e7b8630989bac diff --git a/extension-ci-tools b/extension-ci-tools index f5594c6..5121955 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit f5594c61803daee122a5245afb817966e1a4545c +Subproject commit 5121955637acac20ba5ef7abf0b8957b0f0c849b diff --git a/src/common/iceberg.cpp b/src/common/iceberg.cpp index 6e63969..1ec5a76 100644 --- a/src/common/iceberg.cpp +++ b/src/common/iceberg.cpp @@ -188,19 +188,34 @@ string GenerateMetaDataUrl(FileSystem &fs, const string &meta_path, string &tabl } -string IcebergSnapshot::GetMetaDataPath(const string &path, FileSystem &fs, string metadata_compression_codec, string table_version = DEFAULT_VERSION_HINT_FILE, string version_format = DEFAULT_TABLE_VERSION_FORMAT) { +string IcebergSnapshot::GetMetaDataPath(ClientContext &context, const string &path, FileSystem &fs, string metadata_compression_codec, string table_version = DEFAULT_TABLE_VERSION, string version_format = DEFAULT_TABLE_VERSION_FORMAT) { + string version_hint; + string meta_path = fs.JoinPath(path, "metadata"); if (StringUtil::EndsWith(path, ".json")) { + // We've been given a real metadata path. Nothing else to do. return path; - } - - auto meta_path = fs.JoinPath(path, "metadata"); - string version_hint; - if(StringUtil::EndsWith(table_version, ".text")||StringUtil::EndsWith(table_version, ".txt")) { - version_hint = GetTableVersion(meta_path, fs, table_version); - } else { + } else if (!fs.DirectoryExists(meta_path)) { + // Make sure we have a metadata directory to look in + throw IOException("Cannot open \"%s\": Metadata directory does not exist", path); + } else if(StringUtil::EndsWith(table_version, ".text")||StringUtil::EndsWith(table_version, ".txt")) { + // We were given a hint filename + version_hint = GetTableVersionFromHint(meta_path, fs, table_version); + return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format); + } else if (table_version != UNKNOWN_TABLE_VERSION) { + // We were given an explicit version number version_hint = table_version; + return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format); + } else if (fs.FileExists(fs.JoinPath(meta_path, DEFAULT_VERSION_HINT_FILE))) { + // We're guessing, but a version-hint.text exists so we'll use that + version_hint = GetTableVersionFromHint(meta_path, fs, DEFAULT_VERSION_HINT_FILE); + return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format); + } else if (!UnsafeVersionGuessingEnabled(context)) { + // Make sure we're allowed to guess versions + throw InvalidInputException("No version was provided and no version-hint could be found, globbing the filesystem to locate the latest version is disabled by default as this is considered unsafe and could result in reading uncommitted data. To enable this use 'SET %s = true;'", VERSION_GUESSING_CONFIG_VARIABLE); + } else { + // We are allowed to guess to guess from file paths + return GuessTableVersion(meta_path, fs, table_version, metadata_compression_codec, version_format); } - return GenerateMetaDataUrl(fs, meta_path, version_hint, metadata_compression_codec, version_format); } @@ -238,7 +253,7 @@ IcebergSnapshot IcebergSnapshot::ParseSnapShot(yyjson_val *snapshot, idx_t icebe return ret; } -string IcebergSnapshot::GetTableVersion(const string &meta_path, FileSystem &fs, string version_file = DEFAULT_VERSION_HINT_FILE) { +string IcebergSnapshot::GetTableVersionFromHint(const string &meta_path, FileSystem &fs, string version_file = DEFAULT_VERSION_HINT_FILE) { auto version_file_path = fs.JoinPath(meta_path, version_file); auto version_file_content = IcebergUtils::FileToString(version_file_path, fs); @@ -251,6 +266,52 @@ string IcebergSnapshot::GetTableVersion(const string &meta_path, FileSystem &fs, } } +bool IcebergSnapshot::UnsafeVersionGuessingEnabled(ClientContext &context) { + Value result; + (void)context.TryGetCurrentSetting(VERSION_GUESSING_CONFIG_VARIABLE, result); + return !result.IsNull() && result.GetValue(); +} + + +string IcebergSnapshot::GuessTableVersion(const string &meta_path, FileSystem &fs, string &table_version, string &metadata_compression_codec, string &version_format = DEFAULT_TABLE_VERSION_FORMAT) { + string selected_metadata; + string version_pattern = "*"; // TODO: Different "table_version" strings could customize this + string compression_suffix = ""; + + + if (metadata_compression_codec == "gzip") { + compression_suffix = ".gz"; + } + + for(auto try_format : StringUtil::Split(version_format, ',')) { + auto glob_pattern = StringUtil::Format(try_format, version_pattern, compression_suffix); + + auto found_versions = fs.Glob(fs.JoinPath(meta_path, glob_pattern)); + if(found_versions.size() > 0) { + selected_metadata = PickTableVersion(found_versions, version_pattern, glob_pattern); + if(!selected_metadata.empty()) { // Found one + return selected_metadata; + } + } + } + + throw IOException( + "Could not guess Iceberg table version using '%s' compression and format(s): '%s'", + metadata_compression_codec, version_format); +} + +string IcebergSnapshot::PickTableVersion(vector &found_metadata, string &version_pattern, string &glob) { + // TODO: Different "table_version" strings could customize this + // For now: just sort the versions and take the largest + if(!found_metadata.empty()) { + std::sort(found_metadata.begin(), found_metadata.end()); + return found_metadata.back(); + } else { + return string(); + } +} + + yyjson_val *IcebergSnapshot::FindLatestSnapshotInternal(yyjson_val *snapshots) { size_t idx, max; yyjson_val *snapshot; diff --git a/src/iceberg_extension.cpp b/src/iceberg_extension.cpp index 895b79d..2ee9f3f 100644 --- a/src/iceberg_extension.cpp +++ b/src/iceberg_extension.cpp @@ -28,6 +28,14 @@ static void LoadInternal(DatabaseInstance &instance) { } void IcebergExtension::Load(DuckDB &db) { + auto &config = DBConfig::GetConfig(*db.instance); + + config.AddExtensionOption( + "unsafe_enable_version_guessing", + "Enable globbing the filesystem (if possible) to find the latest version metadata. This could result in reading an uncommitted version.", + LogicalType::BOOLEAN, + Value::BOOLEAN(false) + ); LoadInternal(*db.instance); } std::string IcebergExtension::Name() { diff --git a/src/iceberg_functions/iceberg_metadata.cpp b/src/iceberg_functions/iceberg_metadata.cpp index 1ffff2c..f8c9557 100644 --- a/src/iceberg_functions/iceberg_metadata.cpp +++ b/src/iceberg_functions/iceberg_metadata.cpp @@ -57,7 +57,7 @@ static unique_ptr IcebergMetaDataBind(ClientContext &context, Tabl bool allow_moved_paths = false; string metadata_compression_codec = "none"; bool skip_schema_inference = false; - string table_version = DEFAULT_VERSION_HINT_FILE; + string table_version = DEFAULT_TABLE_VERSION; string version_name_format = DEFAULT_TABLE_VERSION_FORMAT; for (auto &kv : input.named_parameters) { @@ -75,7 +75,7 @@ static unique_ptr IcebergMetaDataBind(ClientContext &context, Tabl } } - auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(iceberg_path, fs, metadata_compression_codec, table_version, version_name_format); + auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(context, iceberg_path, fs, metadata_compression_codec, table_version, version_name_format); IcebergSnapshot snapshot_to_scan; if (input.inputs.size() > 1) { if (input.inputs[1].type() == LogicalType::UBIGINT) { diff --git a/src/iceberg_functions/iceberg_scan.cpp b/src/iceberg_functions/iceberg_scan.cpp index 2b3ec86..38e8f89 100644 --- a/src/iceberg_functions/iceberg_scan.cpp +++ b/src/iceberg_functions/iceberg_scan.cpp @@ -225,7 +225,7 @@ static unique_ptr IcebergScanBindReplace(ClientContext &context, Table bool skip_schema_inference = false; string mode = "default"; string metadata_compression_codec = "none"; - string table_version = DEFAULT_VERSION_HINT_FILE; + string table_version = DEFAULT_TABLE_VERSION; string version_name_format = DEFAULT_TABLE_VERSION_FORMAT; for (auto &kv : input.named_parameters) { @@ -248,7 +248,7 @@ static unique_ptr IcebergScanBindReplace(ClientContext &context, Table version_name_format = StringValue::Get(kv.second); } } - auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(iceberg_path, fs, metadata_compression_codec, table_version, version_name_format); + auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath(context, iceberg_path, fs, metadata_compression_codec, table_version, version_name_format); IcebergSnapshot snapshot_to_scan; if (input.inputs.size() > 1) { if (input.inputs[1].type() == LogicalType::UBIGINT) { diff --git a/src/iceberg_functions/iceberg_snapshots.cpp b/src/iceberg_functions/iceberg_snapshots.cpp index e6e4003..e6d8e54 100644 --- a/src/iceberg_functions/iceberg_snapshots.cpp +++ b/src/iceberg_functions/iceberg_snapshots.cpp @@ -33,7 +33,7 @@ struct IcebergSnapshotGlobalTableFunctionState : public GlobalTableFunctionState FileSystem &fs = FileSystem::GetFileSystem(context); auto iceberg_meta_path = IcebergSnapshot::GetMetaDataPath( - bind_data.filename, fs, bind_data.metadata_compression_codec, bind_data.table_version, bind_data.version_name_format); + context, bind_data.filename, fs, bind_data.metadata_compression_codec, bind_data.table_version, bind_data.version_name_format); global_state->metadata_file = IcebergSnapshot::ReadMetaData(iceberg_meta_path, fs, bind_data.metadata_compression_codec); global_state->metadata_doc = yyjson_read(global_state->metadata_file.c_str(), global_state->metadata_file.size(), 0); @@ -55,7 +55,7 @@ static unique_ptr IcebergSnapshotsBind(ClientContext &context, Tab auto bind_data = make_uniq(); string metadata_compression_codec = "none"; - string table_version = DEFAULT_VERSION_HINT_FILE; + string table_version = DEFAULT_TABLE_VERSION; string version_name_format = DEFAULT_TABLE_VERSION_FORMAT; bool skip_schema_inference = false; diff --git a/src/include/iceberg_metadata.hpp b/src/include/iceberg_metadata.hpp index d7d4478..dd90f6c 100644 --- a/src/include/iceberg_metadata.hpp +++ b/src/include/iceberg_metadata.hpp @@ -16,12 +16,25 @@ using namespace duckdb_yyjson; namespace duckdb { +static string VERSION_GUESSING_CONFIG_VARIABLE = "unsafe_enable_version_guessing"; + +// When this is provided (and unsafe_enable_version_guessing is true) +// we first look for DEFAULT_VERSION_HINT_FILE, if it doesn't exist we +// then search for versions matching the DEFAULT_TABLE_VERSION_FORMAT +// We take the lexographically "greatest" one as the latest version +// Note that this will voliate ACID constraints in some situations. +static string UNKNOWN_TABLE_VERSION = "?"; + // First arg is version string, arg is either empty or ".gz" if gzip // Allows for both "v###.gz.metadata.json" and "###.metadata.json" styles static string DEFAULT_TABLE_VERSION_FORMAT = "v%s%s.metadata.json,%s%s.metadata.json"; +// This isn't explicitly in the standard, but is a commonly used technique static string DEFAULT_VERSION_HINT_FILE = "version-hint.text"; +// By default we will use the unknown version behavior mentioned above +static string DEFAULT_TABLE_VERSION = UNKNOWN_TABLE_VERSION; + struct IcebergColumnDefinition { public: static IcebergColumnDefinition ParseFromJson(yyjson_val *val); @@ -73,14 +86,18 @@ class IcebergSnapshot { static IcebergSnapshot ParseSnapShot(yyjson_val *snapshot, idx_t iceberg_format_version, idx_t schema_id, vector &schemas, string metadata_compression_codec, bool skip_schema_inference); - static string GetMetaDataPath(const string &path, FileSystem &fs, string metadata_compression_codec, string table_version, string version_format); + static string GetMetaDataPath(ClientContext &context, const string &path, FileSystem &fs, string metadata_compression_codec, string table_version, string version_format); static string ReadMetaData(const string &path, FileSystem &fs, string metadata_compression_codec); static yyjson_val *GetSnapshots(const string &path, FileSystem &fs, string GetSnapshotByTimestamp); static unique_ptr GetParseInfo(yyjson_doc &metadata_json); protected: + //! Version extraction and identification + static bool UnsafeVersionGuessingEnabled(ClientContext &context); + static string GetTableVersionFromHint(const string &path, FileSystem &fs, string version_format); + static string GuessTableVersion(const string &meta_path, FileSystem &fs, string &table_version, string &metadata_compression_codec, string &version_format); + static string PickTableVersion(vector &found_metadata, string &version_pattern, string &glob); //! Internal JSON parsing functions - static string GetTableVersion(const string &path, FileSystem &fs, string version_format); static yyjson_val *FindLatestSnapshotInternal(yyjson_val *snapshots); static yyjson_val *FindSnapshotByIdInternal(yyjson_val *snapshots, idx_t target_id); static yyjson_val *FindSnapshotByIdTimestampInternal(yyjson_val *snapshots, timestamp_t timestamp); diff --git a/test/sql/iceberg_metadata.test b/test/sql/iceberg_metadata.test index 2ee1fbd..cae75df 100644 --- a/test/sql/iceberg_metadata.test +++ b/test/sql/iceberg_metadata.test @@ -51,4 +51,46 @@ lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA statement error SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_nonexistent'); ---- -IO Error: Cannot open file +IO Error: Cannot open "data/iceberg/lineitem_iceberg_nonexistent": Metadata directory does not exist + +statement error +SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); +---- +:.*SET unsafe_enable_version_guessing.* + +statement ok +SET unsafe_enable_version_guessing = true; + +query IIIIIIII +SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); +---- +lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 +lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 + +query IIIIIIII +SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version='1'); +---- +lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro 1 DATA ADDED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 + +query IIIIIIII +SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version_name_format='v%s%s.metadata.json'); +---- +lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 +lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 + +query IIIIIIII +SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version='?', version_name_format='v%s%s.metadata.json'); +---- +lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 +lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 + +query IIIIIIII +SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC='gzip', version='?'); +---- +lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA ADDED EXISTING lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet PARQUET 111968 + +query IIIIIIII +SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC='gzip'); +---- +lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA ADDED EXISTING lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet PARQUET 111968 + diff --git a/test/sql/iceberg_scan.test b/test/sql/iceberg_scan.test index 4f84bb7..f27e88c 100644 --- a/test/sql/iceberg_scan.test +++ b/test/sql/iceberg_scan.test @@ -12,6 +12,7 @@ require parquet require iceberg + ### Scanning latest snapshot query I SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE); @@ -79,3 +80,41 @@ query I SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='2', version_name_format='v%s%s.metadata.json'); ---- 111968 + +statement error +SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); +---- +:.*SET unsafe_enable_version_guessing.* + +statement ok +SET unsafe_enable_version_guessing=true; + +query I +SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.504'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +---- +60175 + +query I +SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.729'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +---- +60175 + +query I +SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:08:14.73'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +---- +51793 + +statement error +FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.503'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +---- +IO Error: Could not find latest snapshots for timestamp 2023-02-15 15:07:54.503 + +query I +SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); +---- +111968 + +query I +SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='2', version_name_format='v%s%s.metadata.json'); +---- +111968 diff --git a/test/sql/iceberg_snapshots.test b/test/sql/iceberg_snapshots.test index f260c91..40125b9 100644 --- a/test/sql/iceberg_snapshots.test +++ b/test/sql/iceberg_snapshots.test @@ -41,7 +41,7 @@ SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg', version='1'); statement error SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_nonexistent'); ---- -IO Error: Cannot open file "data/iceberg/lineitem_iceberg_nonexistent/metadata/version-hint.text": No such file or directory +IO Error: Cannot open "data/iceberg/lineitem_iceberg_nonexistent": Metadata directory does not exist statement error SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz'); @@ -57,3 +57,29 @@ query IIII SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz', metadata_compression_codec="gzip", version='2'); ---- 0 4468019210336628573 2024-03-13 18:38:58.602 lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro + +statement ok +SET unsafe_enable_version_guessing=true; + +query IIII +SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_no_hint'); +---- +1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro +2 7635660646343998149 2023-02-15 15:08:14.73 lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro + +query IIII +SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_no_hint', version='1'); +---- +1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro + +query IIII +SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_no_hint', version="?"); +---- +1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro +2 7635660646343998149 2023-02-15 15:08:14.73 lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro + +query IIII +SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz_no_hint', metadata_compression_codec="gzip"); +---- +0 4468019210336628573 2024-03-13 18:38:58.602 lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro +