Skip to content

Commit

Permalink
Adding test cases
Browse files Browse the repository at this point in the history
Signed-off-by: Teague Sterling <[email protected]>
  • Loading branch information
teaguesterling committed Nov 11, 2024
1 parent 0a4f84f commit 05262fb
Show file tree
Hide file tree
Showing 34 changed files with 346 additions and 2 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
20 changes: 20 additions & 0 deletions data/iceberg/lineitem_iceberg_no_hint/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# README
this iceberg table is generated by using DuckDB (v0.7.0) to generated TPC-H lineitem
SF0.01 then storing that to a parquet file.

Then pyspark (3.3.1) was used with the iceberg extension from https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.0.0/iceberg-spark-runtime-3.3_2.12-1.0.0.jar
to write the iceberg table.

finally, using pyspark, a delete query was performed on this iceberg table:

```
DELETE FROM iceberg_catalog.lineitem_iceberg where l_extendedprice < 10000
```

The result for Q06 of TPC-H on this table according to pyspark is now:
```
[Row(revenue=Decimal('1077536.9101'))]
```

Note: it appears that there are no deletes present in this iceberg table, the whole thing was rewritten.
this is likely due to the fact that the table is so small?
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
142 changes: 142 additions & 0 deletions data/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"format-version" : 2,
"table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b",
"location" : "./lineitem_iceberg",
"last-sequence-number" : 1,
"last-updated-ms" : 1676473674504,
"last-column-id" : 16,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey",
"required" : false,
"type" : "int"
}, {
"id" : 2,
"name" : "l_partkey",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey",
"required" : false,
"type" : "int"
}, {
"id" : 4,
"name" : "l_linenumber",
"required" : false,
"type" : "int"
}, {
"id" : 5,
"name" : "l_quantity",
"required" : false,
"type" : "int"
}, {
"id" : 6,
"name" : "l_extendedprice",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 7,
"name" : "l_discount",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 8,
"name" : "l_tax",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 9,
"name" : "l_returnflag",
"required" : false,
"type" : "string"
}, {
"id" : 10,
"name" : "l_linestatus",
"required" : false,
"type" : "string"
}, {
"id" : 11,
"name" : "l_shipdate",
"required" : false,
"type" : "date"
}, {
"id" : 12,
"name" : "l_commitdate",
"required" : false,
"type" : "date"
}, {
"id" : 13,
"name" : "l_receiptdate",
"required" : false,
"type" : "date"
}, {
"id" : 14,
"name" : "l_shipinstruct",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_shipmode",
"required" : false,
"type" : "string"
}, {
"id" : 16,
"name" : "l_comment",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "root",
"write.update.mode" : "merge-on-read"
},
"current-snapshot-id" : 3776207205136740581,
"refs" : {
"main" : {
"snapshot-id" : 3776207205136740581,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473674504,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"added-records" : "60175",
"added-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "60175",
"total-files-size" : "1390176",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1676473674504,
"snapshot-id" : 3776207205136740581
} ],
"metadata-log" : [ ]
}
172 changes: 172 additions & 0 deletions data/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
{
"format-version" : 2,
"table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b",
"location" : "./lineitem_iceberg",
"last-sequence-number" : 2,
"last-updated-ms" : 1676473694730,
"last-column-id" : 16,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "l_orderkey",
"required" : false,
"type" : "int"
}, {
"id" : 2,
"name" : "l_partkey",
"required" : false,
"type" : "int"
}, {
"id" : 3,
"name" : "l_suppkey",
"required" : false,
"type" : "int"
}, {
"id" : 4,
"name" : "l_linenumber",
"required" : false,
"type" : "int"
}, {
"id" : 5,
"name" : "l_quantity",
"required" : false,
"type" : "int"
}, {
"id" : 6,
"name" : "l_extendedprice",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 7,
"name" : "l_discount",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 8,
"name" : "l_tax",
"required" : false,
"type" : "decimal(15, 2)"
}, {
"id" : 9,
"name" : "l_returnflag",
"required" : false,
"type" : "string"
}, {
"id" : 10,
"name" : "l_linestatus",
"required" : false,
"type" : "string"
}, {
"id" : 11,
"name" : "l_shipdate",
"required" : false,
"type" : "date"
}, {
"id" : 12,
"name" : "l_commitdate",
"required" : false,
"type" : "date"
}, {
"id" : 13,
"name" : "l_receiptdate",
"required" : false,
"type" : "date"
}, {
"id" : 14,
"name" : "l_shipinstruct",
"required" : false,
"type" : "string"
}, {
"id" : 15,
"name" : "l_shipmode",
"required" : false,
"type" : "string"
}, {
"id" : 16,
"name" : "l_comment",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "root",
"write.update.mode" : "merge-on-read"
},
"current-snapshot-id" : 7635660646343998149,
"refs" : {
"main" : {
"snapshot-id" : 7635660646343998149,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473674504,
"summary" : {
"operation" : "append",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"added-records" : "60175",
"added-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "60175",
"total-files-size" : "1390176",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro",
"schema-id" : 0
}, {
"sequence-number" : 2,
"snapshot-id" : 7635660646343998149,
"parent-snapshot-id" : 3776207205136740581,
"timestamp-ms" : 1676473694730,
"summary" : {
"operation" : "overwrite",
"spark.app.id" : "local-1676472783435",
"added-data-files" : "1",
"deleted-data-files" : "1",
"added-records" : "51793",
"deleted-records" : "60175",
"added-files-size" : "1208539",
"removed-files-size" : "1390176",
"changed-partition-count" : "1",
"total-records" : "51793",
"total-files-size" : "1208539",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" : "lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro",
"schema-id" : 0
} ],
"snapshot-log" : [ {
"timestamp-ms" : 1676473674504,
"snapshot-id" : 3776207205136740581
}, {
"timestamp-ms" : 1676473694730,
"snapshot-id" : 7635660646343998149
} ],
"metadata-log" : [ {
"timestamp-ms" : 1676473674504,
"metadata-file" : "lineitem_iceberg/metadata/v1.metadata.json"
} ]
}
2 changes: 1 addition & 1 deletion test/sql/iceberg_metadata.test
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA
statement error
SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_nonexistent');
----
IO Error: Cannot open file
IO Error: Could not guess Iceberg table version using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json'
10 changes: 10 additions & 0 deletions test/sql/iceberg_scan.test
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,13 @@ SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVE
----
IO Error: No snapshots found

query I
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE);
----
111968

query I
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, metadata_compression_codec="gzip");
----
111968

2 changes: 1 addition & 1 deletion test/sql/iceberg_snapshots.test
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg', version='1');
statement error
SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_nonexistent');
----
IO Error: Cannot open file "data/iceberg/lineitem_iceberg_nonexistent/metadata/version-hint.text": No such file or directory
IO Error: Could not guess Iceberg table version using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json'

statement error
SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz');
Expand Down

0 comments on commit 05262fb

Please sign in to comment.