Skip to content

Commit

Permalink
Merge pull request #79 from teaguesterling/issues/29
Browse files Browse the repository at this point in the history
Issues/29: Add version guessing functionality
  • Loading branch information
samansmink authored Nov 26, 2024
2 parents d62d91d + 0e1297f commit 27803ae
Show file tree
Hide file tree
Showing 44 changed files with 556 additions and 28 deletions.
11 changes: 6 additions & 5 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@ concurrency:
jobs:
duckdb-stable-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/[email protected].2
uses: duckdb/extension-ci-tools/.github/workflows/[email protected].3
with:
extension_name: iceberg
duckdb_version: v1.1.2
ci_tools_version: v1.1.2
duckdb_version: v1.1.3
ci_tools_version: v1.1.3
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools'

duckdb-stable-deploy:
name: Deploy extension binaries
needs: duckdb-stable-build
uses: duckdb/extension-ci-tools/.github/workflows/[email protected].2
uses: duckdb/extension-ci-tools/.github/workflows/[email protected].3
secrets: inherit
with:
extension_name: iceberg
duckdb_version: v1.1.2
duckdb_version: v1.1.3
ci_tools_version: v1.1.3
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools'
deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[submodule "duckdb"]
path = duckdb
url = https://github.com/duckdb/duckdb
branch = master
branch = main
[submodule "extension-ci-tools"]
path = extension-ci-tools
url = https://github.com/duckdb/extension-ci-tools.git
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
20 changes: 20 additions & 0 deletions data/iceberg/lineitem_iceberg_no_hint/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# README
this iceberg table is generated by using DuckDB (v0.7.0) to generated TPC-H lineitem
SF0.01 then storing that to a parquet file.

Then pyspark (3.3.1) was used with the iceberg extension from https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.0.0/iceberg-spark-runtime-3.3_2.12-1.0.0.jar
to write the iceberg table.

finally, using pyspark, a delete query was performed on this iceberg table:

```
DELETE FROM iceberg_catalog.lineitem_iceberg where l_extendedprice < 10000
```

The result for Q06 of TPC-H on this table according to pyspark is now:
```
[Row(revenue=Decimal('1077536.9101'))]
```

Note: it appears that there are no deletes present in this iceberg table, the whole thing was rewritten.
this is likely due to the fact that the table is so small?
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
142 changes: 142 additions & 0 deletions data/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"format-version" : 2,
"table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b",
"location" : "./lineitem_iceberg",
"last-sequence-number" : 1,
"last-updated-ms" : 1676473674504,
"last-column-id" : 16,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey",
"required" : false,
"type" : "int"
}, {
"id" : 2,
"name" : "l_partkey",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey",
"required" : false,
"type" : "int"
}, {
"id" : 4,
"name" : "l_linenumber",
"required" : false,
"type" : "int"
}, {
"id" : 5,
"name" : "l_quantity",
"required" : false,
"type" : "int"
}, {
"id" : 6,
"name" : "l_extendedprice",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 7,
"name" : "l_discount",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 8,
"name" : "l_tax",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 9,
"name" : "l_returnflag",
"required" : false,
"type" : "string"
}, {
"id" : 10,
"name" : "l_linestatus",
"required" : false,
"type" : "string"
}, {
"id" : 11,
"name" : "l_shipdate",
"required" : false,
"type" : "date"
}, {
"id" : 12,
"name" : "l_commitdate",
"required" : false,
"type" : "date"
}, {
"id" : 13,
"name" : "l_receiptdate",
"required" : false,
"type" : "date"
}, {
"id" : 14,
"name" : "l_shipinstruct",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_shipmode",
"required" : false,
"type" : "string"
}, {
"id" : 16,
"name" : "l_comment",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "root",
"write.update.mode" : "merge-on-read"
},
"current-snapshot-id" : 3776207205136740581,
"refs" : {
"main" : {
"snapshot-id" : 3776207205136740581,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473674504,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"added-records" : "60175",
"added-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "60175",
"total-files-size" : "1390176",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1676473674504,
"snapshot-id" : 3776207205136740581
} ],
"metadata-log" : [ ]
}
172 changes: 172 additions & 0 deletions data/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
{
"format-version" : 2,
"table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b",
"location" : "./lineitem_iceberg",
"last-sequence-number" : 2,
"last-updated-ms" : 1676473694730,
"last-column-id" : 16,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey",
"required" : false,
"type" : "int"
}, {
"id" : 2,
"name" : "l_partkey",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey",
"required" : false,
"type" : "int"
}, {
"id" : 4,
"name" : "l_linenumber",
"required" : false,
"type" : "int"
}, {
"id" : 5,
"name" : "l_quantity",
"required" : false,
"type" : "int"
}, {
"id" : 6,
"name" : "l_extendedprice",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 7,
"name" : "l_discount",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 8,
"name" : "l_tax",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 9,
"name" : "l_returnflag",
"required" : false,
"type" : "string"
}, {
"id" : 10,
"name" : "l_linestatus",
"required" : false,
"type" : "string"
}, {
"id" : 11,
"name" : "l_shipdate",
"required" : false,
"type" : "date"
}, {
"id" : 12,
"name" : "l_commitdate",
"required" : false,
"type" : "date"
}, {
"id" : 13,
"name" : "l_receiptdate",
"required" : false,
"type" : "date"
}, {
"id" : 14,
"name" : "l_shipinstruct",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_shipmode",
"required" : false,
"type" : "string"
}, {
"id" : 16,
"name" : "l_comment",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "root",
"write.update.mode" : "merge-on-read"
},
"current-snapshot-id" : 7635660646343998149,
"refs" : {
"main" : {
"snapshot-id" : 7635660646343998149,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473674504,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"added-records" : "60175",
"added-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "60175",
"total-files-size" : "1390176",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro",
"schema-id" : 0
}, {
"sequence-number" : 2,
"snapshot-id" : 7635660646343998149,
"parent-snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473694730,
"summary" : {
"operation" : "overwrite",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"deleted-data-files" : "1",
"added-records" : "51793",
"deleted-records" : "60175",
"added-files-size" : "1208539",
"removed-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "51793",
"total-files-size" : "1208539",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1676473674504,
"snapshot-id" : 3776207205136740581
}, {
"timestamp-ms" : 1676473694730,
"snapshot-id" : 7635660646343998149
} ],
"metadata-log" : [ {
"timestamp-ms" : 1676473674504,
"metadata-file" : "lineitem_iceberg/metadata/v1.metadata.json"
} ]
}
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 301 files
Loading

0 comments on commit 27803ae

Please sign in to comment.