diff --git a/Makefile b/Makefile index 86f3aa54b0..f2bb6f6871 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ help: ## Display this help install-poetry: ## Install poetry if the user has not done that yet. @if ! command -v poetry &> /dev/null; then \ echo "Poetry could not be found. Installing..."; \ - pip install --user poetry==1.8.4; \ + pip install --user poetry==1.8.5; \ else \ echo "Poetry is already installed."; \ fi diff --git a/dev/provision.py b/dev/provision.py index 53360748b6..b358da6593 100644 --- a/dev/provision.py +++ b/dev/provision.py @@ -22,7 +22,17 @@ from pyiceberg.schema import Schema from pyiceberg.types import FixedType, NestedField, UUIDType -spark = SparkSession.builder.getOrCreate() +# The configuration is important, otherwise we get many small +# parquet files with a single row. When a positional delete +# hits the Parquet file with one row, the parquet file gets +# dropped instead of having a merge-on-read delete file. +spark = ( + SparkSession + .builder + .config("spark.sql.shuffle.partitions", "1") + .config("spark.default.parallelism", "1") + .getOrCreate() +) catalogs = { 'rest': load_catalog( @@ -120,10 +130,6 @@ """ ) - # Partitioning is not really needed, but there is a bug: - # https://github.com/apache/iceberg/pull/7685 - spark.sql(f"ALTER TABLE {catalog_name}.default.test_positional_mor_deletes ADD PARTITION FIELD years(dt) AS dt_years") - spark.sql( f""" INSERT INTO {catalog_name}.default.test_positional_mor_deletes @@ -168,10 +174,6 @@ """ ) - # Partitioning is not really needed, but there is a bug: - # https://github.com/apache/iceberg/pull/7685 - spark.sql(f"ALTER TABLE {catalog_name}.default.test_positional_mor_double_deletes ADD PARTITION FIELD years(dt) AS dt_years") - spark.sql( f""" INSERT INTO {catalog_name}.default.test_positional_mor_double_deletes diff --git a/poetry.lock b/poetry.lock index 2e87e98d92..5770f8203d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1020,23 +1020,27 @@ files = [ [[package]] name = "deptry" -version = "0.21.1" +version = "0.21.2" description = "A command line utility to check for unused, missing and transitive dependencies in a Python project." optional = false python-versions = ">=3.9" files = [ - {file = "deptry-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c31e1a66502e28870e1e0a679598462a6119f4bcb656786e63cb545328170a3f"}, - {file = "deptry-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4b53089c22d18076935a3e9e6325566fa712cd9b89fe602978a8e85f0f4209bf"}, - {file = "deptry-0.21.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5eae7afbcb9b7f6baa855b323e0da016a23f2a98d4b181dcfd2c71766512387"}, - {file = "deptry-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4afef1c5eb0b48ebc31de2437b460df0363cb99722252b7faf7fa6f43e10cbcd"}, - {file = "deptry-0.21.1-cp39-abi3-win_amd64.whl", hash = "sha256:981a28e1feeaad82f07a6e3c8d7842c5f6eae3807dc13b24d453a20cd0a42a72"}, - {file = "deptry-0.21.1-cp39-abi3-win_arm64.whl", hash = "sha256:98075550540c6b45f57abdfc453900bd2a179dc495d986ccc0757a813ee55103"}, - {file = "deptry-0.21.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:79593d7631cdbbc39d76503e3af80e46d8b4873e915b85c1567a04c81e8a17d5"}, - {file = "deptry-0.21.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:145a172ea608bb86dd93a9d14f7d45ed8649a36d7f685ea725e0348cbf562f10"}, - {file = "deptry-0.21.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e487f520d4fbee513f4767ab98334a29d5d932f78eb413b64e27c977f2bf2756"}, - {file = "deptry-0.21.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:091288cad2bd6029995d2e700e965cd574079365807f202ee232e4be0a571f43"}, - {file = "deptry-0.21.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1adf29a5aa1d33d9e1140b9235b212d9753278604b4389b2186f638692e29876"}, - {file = "deptry-0.21.1.tar.gz", hash = "sha256:60332b8d58d6584b340511a4e1b694048499f273d69eaea413631b2e8bc186ff"}, + {file = "deptry-0.21.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e3b9e0c5ee437240b65e61107b5777a12064f78f604bf9f181a96c9b56eb896d"}, + {file = "deptry-0.21.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:d76bbf48bd62ecc44ca3d414769bd4b7956598d23d9ccb42fd359b831a31cab2"}, + {file = "deptry-0.21.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3080bb88c16ebd35f59cba7688416115b7aaf4630dc5a051dff2649cbf129a1b"}, + {file = "deptry-0.21.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adb12d6678fb5dbd320a0a2e37881059d0a45bec6329df4250c977d803fe7f96"}, + {file = "deptry-0.21.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:7479d3079be69c3bbf5913d8e21090749c1139ee91f81520ffce90b5322476b0"}, + {file = "deptry-0.21.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:019167b35301edd2bdd4719c8b8f44769be4507cb8a1cd46fff4393cdbe8d31b"}, + {file = "deptry-0.21.2-cp39-abi3-win_amd64.whl", hash = "sha256:d8add495f0dd19a38aa6d1e09b14b1441bca47c9d945bc7b322efb084313eea3"}, + {file = "deptry-0.21.2-cp39-abi3-win_arm64.whl", hash = "sha256:06d48e9fa460aad02f9e1b079d9f5a69d622d291b3a0525b722fc91c88032042"}, + {file = "deptry-0.21.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3ef8aed33a2eac357f9565063bc1257bcefa03a37038299c08a4222e28f3cd34"}, + {file = "deptry-0.21.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:917745db5f8295eb5048e43d9073a9a675ffdba865e9b294d2e7aa455730cb06"}, + {file = "deptry-0.21.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:186ddbc69c1f70e684e83e202795e1054d0c2dfc03b8acc077f65dc3b6a7f4ce"}, + {file = "deptry-0.21.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3686e86ad7063b5a6e5253454f9d9e4a7a6b1511a99bd4306fda5424480be48"}, + {file = "deptry-0.21.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1012a88500f242489066f811f6ec0c93328d9340bbf0f87f0c7d2146054d197e"}, + {file = "deptry-0.21.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:769bb658172586d1b03046bdc6b6c94f6a98ecfbac04ff7f77ec61768c75e1c2"}, + {file = "deptry-0.21.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fb2f43747b58abeec01dc277ef22859342f3bca2ac677818c94940a009b436c0"}, + {file = "deptry-0.21.2.tar.gz", hash = "sha256:4e870553c7a1fafcd99a83ba4137259525679eecabeff61bc669741efa201541"}, ] [package.dependencies] @@ -1167,42 +1171,42 @@ test = ["pytest (>=6)"] [[package]] name = "fastavro" -version = "1.9.7" +version = "1.10.0" description = "Fast read/write of AVRO files" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "fastavro-1.9.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc811fb4f7b5ae95f969cda910241ceacf82e53014c7c7224df6f6e0ca97f52f"}, - {file = "fastavro-1.9.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb8749e419a85f251bf1ac87d463311874972554d25d4a0b19f6bdc56036d7cf"}, - {file = "fastavro-1.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f9bafa167cb4d1c3dd17565cb5bf3d8c0759e42620280d1760f1e778e07fc"}, - {file = "fastavro-1.9.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e87d04b235b29f7774d226b120da2ca4e60b9e6fdf6747daef7f13f218b3517a"}, - {file = "fastavro-1.9.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b525c363e267ed11810aaad8fbdbd1c3bd8837d05f7360977d72a65ab8c6e1fa"}, - {file = "fastavro-1.9.7-cp310-cp310-win_amd64.whl", hash = "sha256:6312fa99deecc319820216b5e1b1bd2d7ebb7d6f221373c74acfddaee64e8e60"}, - {file = "fastavro-1.9.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ec8499dc276c2d2ef0a68c0f1ad11782b2b956a921790a36bf4c18df2b8d4020"}, - {file = "fastavro-1.9.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d9d96f98052615ab465c63ba8b76ed59baf2e3341b7b169058db104cbe2aa0"}, - {file = "fastavro-1.9.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:919f3549e07a8a8645a2146f23905955c35264ac809f6c2ac18142bc5b9b6022"}, - {file = "fastavro-1.9.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9de1fa832a4d9016724cd6facab8034dc90d820b71a5d57c7e9830ffe90f31e4"}, - {file = "fastavro-1.9.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1d09227d1f48f13281bd5ceac958650805aef9a4ef4f95810128c1f9be1df736"}, - {file = "fastavro-1.9.7-cp311-cp311-win_amd64.whl", hash = "sha256:2db993ae6cdc63e25eadf9f93c9e8036f9b097a3e61d19dca42536dcc5c4d8b3"}, - {file = "fastavro-1.9.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4e1289b731214a7315884c74b2ec058b6e84380ce9b18b8af5d387e64b18fc44"}, - {file = "fastavro-1.9.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eac69666270a76a3a1d0444f39752061195e79e146271a568777048ffbd91a27"}, - {file = "fastavro-1.9.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9be089be8c00f68e343bbc64ca6d9a13e5e5b0ba8aa52bcb231a762484fb270e"}, - {file = "fastavro-1.9.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d576eccfd60a18ffa028259500df67d338b93562c6700e10ef68bbd88e499731"}, - {file = "fastavro-1.9.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ee9bf23c157bd7dcc91ea2c700fa3bd924d9ec198bb428ff0b47fa37fe160659"}, - {file = "fastavro-1.9.7-cp312-cp312-win_amd64.whl", hash = "sha256:b6b2ccdc78f6afc18c52e403ee68c00478da12142815c1bd8a00973138a166d0"}, - {file = "fastavro-1.9.7-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:7313def3aea3dacface0a8b83f6d66e49a311149aa925c89184a06c1ef99785d"}, - {file = "fastavro-1.9.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:536f5644737ad21d18af97d909dba099b9e7118c237be7e4bd087c7abde7e4f0"}, - {file = "fastavro-1.9.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2af559f30383b79cf7d020a6b644c42ffaed3595f775fe8f3d7f80b1c43dfdc5"}, - {file = "fastavro-1.9.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:edc28ab305e3c424de5ac5eb87b48d1e07eddb6aa08ef5948fcda33cc4d995ce"}, - {file = "fastavro-1.9.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ec2e96bdabd58427fe683329b3d79f42c7b4f4ff6b3644664a345a655ac2c0a1"}, - {file = "fastavro-1.9.7-cp38-cp38-win_amd64.whl", hash = "sha256:3b683693c8a85ede496ebebe115be5d7870c150986e34a0442a20d88d7771224"}, - {file = "fastavro-1.9.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:58f76a5c9a312fbd37b84e49d08eb23094d36e10d43bc5df5187bc04af463feb"}, - {file = "fastavro-1.9.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56304401d2f4f69f5b498bdd1552c13ef9a644d522d5de0dc1d789cf82f47f73"}, - {file = "fastavro-1.9.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fcce036c6aa06269fc6a0428050fcb6255189997f5e1a728fc461e8b9d3e26b"}, - {file = "fastavro-1.9.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:17de68aae8c2525f5631d80f2b447a53395cdc49134f51b0329a5497277fc2d2"}, - {file = "fastavro-1.9.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7c911366c625d0a997eafe0aa83ffbc6fd00d8fd4543cb39a97c6f3b8120ea87"}, - {file = "fastavro-1.9.7-cp39-cp39-win_amd64.whl", hash = "sha256:912283ed48578a103f523817fdf0c19b1755cea9b4a6387b73c79ecb8f8f84fc"}, - {file = "fastavro-1.9.7.tar.gz", hash = "sha256:13e11c6cb28626da85290933027cd419ce3f9ab8e45410ef24ce6b89d20a1f6c"}, + {file = "fastavro-1.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1a9fe0672d2caf0fe54e3be659b13de3cad25a267f2073d6f4b9f8862acc31eb"}, + {file = "fastavro-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86dd0410770e0c99363788f0584523709d85e57bb457372ec5c285a482c17fe6"}, + {file = "fastavro-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:190e80dc7d77d03a6a8597a026146b32a0bbe45e3487ab4904dc8c1bebecb26d"}, + {file = "fastavro-1.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bf570d63be9155c3fdc415f60a49c171548334b70fff0679a184b69c29b6bc61"}, + {file = "fastavro-1.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e07abb6798e95dccecaec316265e35a018b523d1f3944ad396d0a93cb95e0a08"}, + {file = "fastavro-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:37203097ed11d0b8fd3c004904748777d730cafd26e278167ea602eebdef8eb2"}, + {file = "fastavro-1.10.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d183c075f527ab695a27ae75f210d4a86bce660cda2f85ae84d5606efc15ef50"}, + {file = "fastavro-1.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7a95a2c0639bffd7c079b59e9a796bfc3a9acd78acff7088f7c54ade24e4a77"}, + {file = "fastavro-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a678153b5da1b024a32ec3f611b2e7afd24deac588cb51dd1b0019935191a6d"}, + {file = "fastavro-1.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:67a597a5cfea4dddcf8b49eaf8c2b5ffee7fda15b578849185bc690ec0cd0d8f"}, + {file = "fastavro-1.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1fd689724760b17f69565d8a4e7785ed79becd451d1c99263c40cb2d6491f1d4"}, + {file = "fastavro-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:4f949d463f9ac4221128a51e4e34e2562f401e5925adcadfd28637a73df6c2d8"}, + {file = "fastavro-1.10.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cfe57cb0d72f304bd0dcc5a3208ca6a7363a9ae76f3073307d095c9d053b29d4"}, + {file = "fastavro-1.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74e517440c824cb65fb29d3e3903a9406f4d7c75490cef47e55c4c82cdc66270"}, + {file = "fastavro-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:203c17d44cadde76e8eecb30f2d1b4f33eb478877552d71f049265dc6f2ecd10"}, + {file = "fastavro-1.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6575be7f2b5f94023b5a4e766b0251924945ad55e9a96672dc523656d17fe251"}, + {file = "fastavro-1.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe471deb675ed2f01ee2aac958fbf8ebb13ea00fa4ce7f87e57710a0bc592208"}, + {file = "fastavro-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:567ff515f2a5d26d9674b31c95477f3e6022ec206124c62169bc2ffaf0889089"}, + {file = "fastavro-1.10.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:82263af0adfddb39c85f9517d736e1e940fe506dfcc35bc9ab9f85e0fa9236d8"}, + {file = "fastavro-1.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:566c193109ff0ff84f1072a165b7106c4f96050078a4e6ac7391f81ca1ef3efa"}, + {file = "fastavro-1.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e400d2e55d068404d9fea7c5021f8b999c6f9d9afa1d1f3652ec92c105ffcbdd"}, + {file = "fastavro-1.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b8227497f71565270f9249fc9af32a93644ca683a0167cfe66d203845c3a038"}, + {file = "fastavro-1.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e62d04c65461b30ac6d314e4197ad666371e97ae8cb2c16f971d802f6c7f514"}, + {file = "fastavro-1.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:86baf8c9740ab570d0d4d18517da71626fe9be4d1142bea684db52bd5adb078f"}, + {file = "fastavro-1.10.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5bccbb6f8e9e5b834cca964f0e6ebc27ebe65319d3940b0b397751a470f45612"}, + {file = "fastavro-1.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0132f6b0b53f61a0a508a577f64beb5de1a5e068a9b4c0e1df6e3b66568eec4"}, + {file = "fastavro-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca37a363b711202c6071a6d4787e68e15fa3ab108261058c4aae853c582339af"}, + {file = "fastavro-1.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:cf38cecdd67ca9bd92e6e9ba34a30db6343e7a3bedf171753ee78f8bd9f8a670"}, + {file = "fastavro-1.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f4dd10e0ed42982122d20cdf1a88aa50ee09e5a9cd9b39abdffb1aa4f5b76435"}, + {file = "fastavro-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:aaef147dc14dd2d7823246178fd06fc5e477460e070dc6d9e07dd8193a6bc93c"}, + {file = "fastavro-1.10.0.tar.gz", hash = "sha256:47bf41ac6d52cdfe4a3da88c75a802321321b37b663a900d12765101a5d6886f"}, ] [package.extras] @@ -1411,22 +1415,22 @@ gcsfuse = ["fusepy"] [[package]] name = "getdaft" -version = "0.3.15" +version = "0.4.0" description = "Distributed Dataframes for Multimodal Data" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "getdaft-0.3.15-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7f85b0a4b5937419e8845b4718a473f097d900f1b43efa87140397fc7eff2e75"}, - {file = "getdaft-0.3.15-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:3ece3de1a32c83e1ab641e41a3c8d4656cf356848b9c7d1b00564c359c30d6be"}, - {file = "getdaft-0.3.15-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:add1ba84c4a45a57c909730f39c96b1e8c9716bf7646d78164680d62899c4f0e"}, - {file = "getdaft-0.3.15-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f5486f86056427665668a69efa7dbd8361eff262f20d3c73767906dee0f5d55"}, - {file = "getdaft-0.3.15-cp38-abi3-win_amd64.whl", hash = "sha256:2c03a3ea203582004b664742f6bad5975fae9f02281942edc46b2b17622040a4"}, - {file = "getdaft-0.3.15.tar.gz", hash = "sha256:101726149ff611c6976f59670bf4fae82c9b939ae4a8d812d88a1cb824c1bca1"}, + {file = "getdaft-0.4.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:82464e2c809a3c659f14ed4887c430ed3eea959121cb1702cb48e32c499c17b8"}, + {file = "getdaft-0.4.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:df5ded32e96167cbb30aa579b1f8b156e63d19221288eae9e5763c0a4c4425ba"}, + {file = "getdaft-0.4.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:638b2f0497ec41343400ba8914f908581db9d3087611166579e700838f48876a"}, + {file = "getdaft-0.4.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df79ea828e9bc94cd1aea362c4a06bcd2e1363562df2ff95f8a1173d2b5f3320"}, + {file = "getdaft-0.4.0-cp39-abi3-win_amd64.whl", hash = "sha256:56b8487e77caf6f4f973a9350f89893d8063736d2a38127bdd840e1555faf1b5"}, + {file = "getdaft-0.4.0.tar.gz", hash = "sha256:15503f1930d9309d9d9caca1b4245064a33e00a111facd845380101d4cebc720"}, ] [package.dependencies] fsspec = "*" -pyarrow = ">=7.0.0" +pyarrow = ">=8.0.0" tqdm = "*" typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.10\""} @@ -2243,13 +2247,13 @@ type = ["mypy (==1.11.2)"] [[package]] name = "moto" -version = "5.0.23" +version = "5.0.24" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "moto-5.0.23-py3-none-any.whl", hash = "sha256:a8069f9c945e7503c43eccec30693f5656e0f8efb0256dfd814d99dedc38429e"}, - {file = "moto-5.0.23.tar.gz", hash = "sha256:8a32636647e45a9b76c32de0ed15c4b083c62849993217f96aa60026a2ca1721"}, + {file = "moto-5.0.24-py3-none-any.whl", hash = "sha256:4d826f1574849f18ddd2fcbf614d97f82c8fddfb9d95fac1078da01a39b57c10"}, + {file = "moto-5.0.24.tar.gz", hash = "sha256:dba6426bd770fbb9d892633fbd35253cbc181eeaa0eba97d6f058720a8fe9b42"}, ] [package.dependencies] @@ -4703,4 +4707,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.9, !=3.9.7" -content-hash = "4dfa11b8595ae4175804442806133a1bace83e7c7e94321fc5bfedadbd2e4260" +content-hash = "2084f03c93f2d1085a5671a171c6cbeb96d9688079270ceca38b0854fe9e0520" diff --git a/pyiceberg/cli/console.py b/pyiceberg/cli/console.py index 82c27a256b..83e67a3cbb 100644 --- a/pyiceberg/cli/console.py +++ b/pyiceberg/cli/console.py @@ -34,34 +34,9 @@ from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchPropertyException, NoSuchTableError from pyiceberg.table import TableProperties from pyiceberg.table.refs import SnapshotRef -from pyiceberg.utils.deprecated import deprecated from pyiceberg.utils.properties import property_as_int -class DeprecatedConstants: - @property - @deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="DEFAULT_MAX_SNAPSHOT_AGE_MS is deprecated. Use TableProperties.MAX_SNAPSHOT_AGE_MS_DEFAULT instead.", - ) - def DEFAULT_MAX_SNAPSHOT_AGE_MS(self) -> int: - return 432000000 - - @property - @deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="DEFAULT_MIN_SNAPSHOTS_TO_KEEP is deprecated. Use TableProperties.MIN_SNAPSHOTS_TO_KEEP_DEFAULT instead.", - ) - def DEFAULT_MIN_SNAPSHOTS_TO_KEEP(self) -> int: - return 1 - - -DEFAULT_MIN_SNAPSHOTS_TO_KEEP = DeprecatedConstants().DEFAULT_MIN_SNAPSHOTS_TO_KEEP -DEFAULT_MAX_SNAPSHOT_AGE_MS = DeprecatedConstants().DEFAULT_MAX_SNAPSHOT_AGE_MS - - def catch_exception() -> Callable: # type: ignore def decorator(func: Callable) -> Callable: # type: ignore @wraps(func) diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 434ae67df0..23796d4e6a 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -94,13 +94,13 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> AWSRequest: - if TOKEN not in properties: - raise SignError("Signer set, but token is not available") - signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT) - signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} + signer_headers = {} + if token := properties.get(TOKEN): + signer_headers = {"Authorization": f"Bearer {token}"} + signer_body = { "method": request.method, "region": request.context["client_region"], diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index bc2c0f2051..e701dd8717 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -165,7 +165,7 @@ from pyiceberg.utils.concurrent import ExecutorFactory from pyiceberg.utils.config import Config from pyiceberg.utils.datetime import millis_to_datetime -from pyiceberg.utils.deprecated import deprecated, deprecation_message +from pyiceberg.utils.deprecated import deprecation_message from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int from pyiceberg.utils.singleton import Singleton from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string @@ -1535,187 +1535,6 @@ def _record_batches_from_scan_tasks_and_deletes( total_row_count += len(batch) -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="project_table is deprecated. Use ArrowScan.to_table instead.", -) -def project_table( - tasks: Iterable[FileScanTask], - table_metadata: TableMetadata, - io: FileIO, - row_filter: BooleanExpression, - projected_schema: Schema, - case_sensitive: bool = True, - limit: Optional[int] = None, -) -> pa.Table: - """Resolve the right columns based on the identifier. - - Args: - tasks (Iterable[FileScanTask]): A URI or a path to a local file. - table_metadata (TableMetadata): The table metadata of the table that's being queried - io (FileIO): A FileIO to open streams to the object store - row_filter (BooleanExpression): The expression for filtering rows. - projected_schema (Schema): The output schema. - case_sensitive (bool): Case sensitivity when looking up column names. - limit (Optional[int]): Limit the number of records. - - Raises: - ResolveError: When an incompatible query is done. - """ - scheme, netloc, _ = PyArrowFileIO.parse_location(table_metadata.location) - if isinstance(io, PyArrowFileIO): - fs = io.fs_by_scheme(scheme, netloc) - else: - try: - from pyiceberg.io.fsspec import FsspecFileIO - - if isinstance(io, FsspecFileIO): - from pyarrow.fs import PyFileSystem - - fs = PyFileSystem(FSSpecHandler(io.get_fs(scheme))) - else: - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") - except ModuleNotFoundError as e: - # When FsSpec is not installed - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") from e - - use_large_types = property_as_bool(io.properties, PYARROW_USE_LARGE_TYPES_ON_READ, True) - - bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive) - - projected_field_ids = { - id for id in projected_schema.field_ids if not isinstance(projected_schema.find_type(id), (MapType, ListType)) - }.union(extract_field_ids(bound_row_filter)) - - deletes_per_file = _read_all_delete_files(fs, tasks) - executor = ExecutorFactory.get_or_create() - futures = [ - executor.submit( - _task_to_table, - fs, - task, - bound_row_filter, - projected_schema, - projected_field_ids, - deletes_per_file.get(task.file.file_path), - case_sensitive, - table_metadata.name_mapping(), - use_large_types, - ) - for task in tasks - ] - total_row_count = 0 - # for consistent ordering, we need to maintain future order - futures_index = {f: i for i, f in enumerate(futures)} - completed_futures: SortedList[Future[pa.Table]] = SortedList(iterable=[], key=lambda f: futures_index[f]) - for future in concurrent.futures.as_completed(futures): - completed_futures.add(future) - if table_result := future.result(): - total_row_count += len(table_result) - # stop early if limit is satisfied - if limit is not None and total_row_count >= limit: - break - - # by now, we've either completed all tasks or satisfied the limit - if limit is not None: - _ = [f.cancel() for f in futures if not f.done()] - - tables = [f.result() for f in completed_futures if f.result()] - - if len(tables) < 1: - return pa.Table.from_batches([], schema=schema_to_pyarrow(projected_schema, include_field_ids=False)) - - result = pa.concat_tables(tables, promote_options="permissive") - - if limit is not None: - return result.slice(0, limit) - - return result - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="project_table is deprecated. Use ArrowScan.to_record_batches instead.", -) -def project_batches( - tasks: Iterable[FileScanTask], - table_metadata: TableMetadata, - io: FileIO, - row_filter: BooleanExpression, - projected_schema: Schema, - case_sensitive: bool = True, - limit: Optional[int] = None, -) -> Iterator[pa.RecordBatch]: - """Resolve the right columns based on the identifier. - - Args: - tasks (Iterable[FileScanTask]): A URI or a path to a local file. - table_metadata (TableMetadata): The table metadata of the table that's being queried - io (FileIO): A FileIO to open streams to the object store - row_filter (BooleanExpression): The expression for filtering rows. - projected_schema (Schema): The output schema. - case_sensitive (bool): Case sensitivity when looking up column names. - limit (Optional[int]): Limit the number of records. - - Raises: - ResolveError: When an incompatible query is done. - """ - scheme, netloc, _ = PyArrowFileIO.parse_location(table_metadata.location) - if isinstance(io, PyArrowFileIO): - fs = io.fs_by_scheme(scheme, netloc) - else: - try: - from pyiceberg.io.fsspec import FsspecFileIO - - if isinstance(io, FsspecFileIO): - from pyarrow.fs import PyFileSystem - - fs = PyFileSystem(FSSpecHandler(io.get_fs(scheme))) - else: - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") - except ModuleNotFoundError as e: - # When FsSpec is not installed - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") from e - - use_large_types = property_as_bool(io.properties, PYARROW_USE_LARGE_TYPES_ON_READ, True) - - bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive) - - projected_field_ids = { - id for id in projected_schema.field_ids if not isinstance(projected_schema.find_type(id), (MapType, ListType)) - }.union(extract_field_ids(bound_row_filter)) - - deletes_per_file = _read_all_delete_files(fs, tasks) - - total_row_count = 0 - - for task in tasks: - # stop early if limit is satisfied - if limit is not None and total_row_count >= limit: - break - batches = _task_to_record_batches( - fs, - task, - bound_row_filter, - projected_schema, - projected_field_ids, - deletes_per_file.get(task.file.file_path), - case_sensitive, - table_metadata.name_mapping(), - use_large_types, - ) - for batch in batches: - if limit is not None: - if total_row_count >= limit: - break - elif total_row_count + len(batch) >= limit: - batch = batch.slice(0, limit - total_row_count) - yield batch - total_row_count += len(batch) - - def _to_requested_schema( requested_schema: Schema, file_schema: Schema, diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 02e8e43ff3..4ec3403bb3 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -111,14 +111,11 @@ UpgradeFormatVersionUpdate, update_table_metadata, ) -from pyiceberg.table.update.schema import UpdateSchema, _Move, _MoveOperation +from pyiceberg.table.update.schema import UpdateSchema from pyiceberg.table.update.snapshot import ( ManageSnapshots, UpdateSnapshot, - _DeleteFiles, _FastAppendFiles, - _MergeAppendFiles, - _OverwriteFiles, ) from pyiceberg.table.update.spec import UpdateSpec from pyiceberg.transforms import IdentityTransform @@ -137,8 +134,6 @@ ) from pyiceberg.utils.concurrent import ExecutorFactory from pyiceberg.utils.config import Config -from pyiceberg.utils.deprecated import deprecated -from pyiceberg.utils.deprecated import deprecation_message as deprecation_message from pyiceberg.utils.properties import property_as_bool if TYPE_CHECKING: @@ -1641,57 +1636,3 @@ def _parquet_files_to_data_files(table_metadata: TableMetadata, file_paths: List from pyiceberg.io.pyarrow import parquet_files_to_data_files yield from parquet_files_to_data_files(io=io, table_metadata=table_metadata, file_paths=iter(file_paths)) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.Move has been changed to private class pyiceberg.table.update.schema._Move", -) -def Move(*args: Any, **kwargs: Any) -> _Move: - return _Move(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.MoveOperation has been changed to private class pyiceberg.table.update.schema._MoveOperation", -) -def MoveOperation(*args: Any, **kwargs: Any) -> _MoveOperation: - return _MoveOperation(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.DeleteFiles has been changed to private class pyiceberg.table.update.snapshot._DeleteFiles", -) -def DeleteFiles(*args: Any, **kwargs: Any) -> _DeleteFiles: - return _DeleteFiles(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.FastAppendFiles has been changed to private class pyiceberg.table.update.snapshot._FastAppendFiles", -) -def FastAppendFiles(*args: Any, **kwargs: Any) -> _FastAppendFiles: - return _FastAppendFiles(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.MergeAppendFiles has been changed to private class pyiceberg.table.update.snapshot._MergeAppendFiles", -) -def MergeAppendFiles(*args: Any, **kwargs: Any) -> _MergeAppendFiles: - return _MergeAppendFiles(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.OverwriteFiles has been changed to private class pyiceberg.table.update.snapshot._OverwriteFiles", -) -def OverwriteFiles(*args: Any, **kwargs: Any) -> _OverwriteFiles: - return _OverwriteFiles(*args, **kwargs) diff --git a/pyiceberg/table/name_mapping.py b/pyiceberg/table/name_mapping.py index ec10e33e8a..e27763fc6a 100644 --- a/pyiceberg/table/name_mapping.py +++ b/pyiceberg/table/name_mapping.py @@ -33,7 +33,6 @@ from pyiceberg.schema import P, PartnerAccessor, Schema, SchemaVisitor, SchemaWithPartnerVisitor, visit, visit_with_partner from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel from pyiceberg.types import IcebergType, ListType, MapType, NestedField, PrimitiveType, StructType -from pyiceberg.utils.deprecated import deprecated class MappedField(IcebergBaseModel): @@ -76,18 +75,6 @@ class NameMapping(IcebergRootModel[List[MappedField]]): def _field_by_name(self) -> Dict[str, MappedField]: return visit_name_mapping(self, _IndexByName()) - @deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="Please use `apply_name_mapping` instead", - ) - def find(self, *names: str) -> MappedField: - name = ".".join(names) - try: - return self._field_by_name[name] - except KeyError as e: - raise ValueError(f"Could not find field with name: {name}") from e - def __len__(self) -> int: """Return the number of mappings.""" return len(self.root) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index de9a774e06..d5e8c1aba1 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -98,14 +98,6 @@ class AddSchemaUpdate(IcebergBaseModel): ), ) - initial_change: bool = Field( - default=False, - exclude=True, - deprecated=deprecation_notice( - deprecated_in="0.8.0", removed_in="0.9.0", help_message="CreateTableTransaction can work without this field" - ), - ) - class SetCurrentSchemaUpdate(IcebergBaseModel): action: Literal["set-current-schema"] = Field(default="set-current-schema") @@ -118,14 +110,6 @@ class AddPartitionSpecUpdate(IcebergBaseModel): action: Literal["add-spec"] = Field(default="add-spec") spec: PartitionSpec - initial_change: bool = Field( - default=False, - exclude=True, - deprecated=deprecation_notice( - deprecated_in="0.8.0", removed_in="0.9.0", help_message="CreateTableTransaction can work without this field" - ), - ) - class SetDefaultSpecUpdate(IcebergBaseModel): action: Literal["set-default-spec"] = Field(default="set-default-spec") @@ -138,14 +122,6 @@ class AddSortOrderUpdate(IcebergBaseModel): action: Literal["add-sort-order"] = Field(default="add-sort-order") sort_order: SortOrder = Field(alias="sort-order") - initial_change: bool = Field( - default=False, - exclude=True, - deprecated=deprecation_notice( - deprecated_in="0.8.0", removed_in="0.9.0", help_message="CreateTableTransaction can work without this field" - ), - ) - class SetDefaultSortOrderUpdate(IcebergBaseModel): action: Literal["set-default-sort-order"] = Field(default="set-default-sort-order") diff --git a/pyproject.toml b/pyproject.toml index 40286ef5a1..a2737c3f92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ pytest = "7.4.4" pytest-checkdocs = "2.10.1" pytest-lazy-fixture = "0.6.3" pre-commit = "4.0.1" -fastavro = "1.9.7" +fastavro = "1.10.0" coverage = { version = "^7.4.2", extras = ["toml"] } requests-mock = "1.12.1" moto = { version = "^5.0.2", extras = ["server"] } diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 040c67034b..bdc3d030fd 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -1243,18 +1243,3 @@ def test_update_metadata_log_overflow(table_v2: Table) -> None: table_v2.metadata_location, ) assert len(new_metadata.metadata_log) == 1 - - -def test_table_module_refactoring_backward_compatibility() -> None: - # TODO: Remove this in 0.9.0 - try: - from pyiceberg.table import ( # noqa: F401 - DeleteFiles, - FastAppendFiles, - MergeAppendFiles, - Move, - MoveOperation, - OverwriteFiles, - ) - except Exception as exc: - raise pytest.fail("Importing moved modules should not raise an exception") from exc diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py index 99a247ee19..bd271f59f8 100644 --- a/tests/table/test_name_mapping.py +++ b/tests/table/test_name_mapping.py @@ -283,16 +283,6 @@ def test_mapping_by_name(table_name_mapping_nested: NameMapping) -> None: } -def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: - assert table_name_mapping_nested.find("foo") == MappedField(field_id=1, names=["foo"]) - assert table_name_mapping_nested.find("location.element.latitude") == MappedField(field_id=13, names=["latitude"]) - assert table_name_mapping_nested.find("location", "element", "latitude") == MappedField(field_id=13, names=["latitude"]) - assert table_name_mapping_nested.find(*["location", "element", "latitude"]) == MappedField(field_id=13, names=["latitude"]) - - with pytest.raises(ValueError, match="Could not find field with name: boom"): - table_name_mapping_nested.find("boom") - - def test_update_mapping_no_updates_or_adds(table_name_mapping_nested: NameMapping) -> None: assert update_mapping(table_name_mapping_nested, {}, {}) == table_name_mapping_nested