From f2f412b7e4c0cd33b28adb2b165f3e2ac1171245 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Dec 2024 08:16:03 +0100 Subject: [PATCH 1/6] Bump getdaft from 0.3.15 to 0.4.0 (#1450) Bumps [getdaft](https://github.com/Eventual-Inc/Daft) from 0.3.15 to 0.4.0. - [Release notes](https://github.com/Eventual-Inc/Daft/releases) - [Commits](https://github.com/Eventual-Inc/Daft/compare/v0.3.15...v0.4.0) --- updated-dependencies: - dependency-name: getdaft dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2e87e98d92..c5a4418fe2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1411,22 +1411,22 @@ gcsfuse = ["fusepy"] [[package]] name = "getdaft" -version = "0.3.15" +version = "0.4.0" description = "Distributed Dataframes for Multimodal Data" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "getdaft-0.3.15-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7f85b0a4b5937419e8845b4718a473f097d900f1b43efa87140397fc7eff2e75"}, - {file = "getdaft-0.3.15-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:3ece3de1a32c83e1ab641e41a3c8d4656cf356848b9c7d1b00564c359c30d6be"}, - {file = "getdaft-0.3.15-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:add1ba84c4a45a57c909730f39c96b1e8c9716bf7646d78164680d62899c4f0e"}, - {file = "getdaft-0.3.15-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f5486f86056427665668a69efa7dbd8361eff262f20d3c73767906dee0f5d55"}, - {file = "getdaft-0.3.15-cp38-abi3-win_amd64.whl", hash = "sha256:2c03a3ea203582004b664742f6bad5975fae9f02281942edc46b2b17622040a4"}, - {file = "getdaft-0.3.15.tar.gz", hash = "sha256:101726149ff611c6976f59670bf4fae82c9b939ae4a8d812d88a1cb824c1bca1"}, + {file = "getdaft-0.4.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:82464e2c809a3c659f14ed4887c430ed3eea959121cb1702cb48e32c499c17b8"}, + {file = "getdaft-0.4.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:df5ded32e96167cbb30aa579b1f8b156e63d19221288eae9e5763c0a4c4425ba"}, + {file = "getdaft-0.4.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:638b2f0497ec41343400ba8914f908581db9d3087611166579e700838f48876a"}, + {file = "getdaft-0.4.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df79ea828e9bc94cd1aea362c4a06bcd2e1363562df2ff95f8a1173d2b5f3320"}, + {file = "getdaft-0.4.0-cp39-abi3-win_amd64.whl", hash = "sha256:56b8487e77caf6f4f973a9350f89893d8063736d2a38127bdd840e1555faf1b5"}, + {file = "getdaft-0.4.0.tar.gz", hash = "sha256:15503f1930d9309d9d9caca1b4245064a33e00a111facd845380101d4cebc720"}, ] [package.dependencies] fsspec = "*" -pyarrow = ">=7.0.0" +pyarrow = ">=8.0.0" tqdm = "*" typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.10\""} From 593ee34cbb455858d9b4663c30472c86126c80c5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Dec 2024 08:16:37 +0100 Subject: [PATCH 2/6] Bump moto from 5.0.23 to 5.0.24 (#1451) Bumps [moto](https://github.com/getmoto/moto) from 5.0.23 to 5.0.24. - [Release notes](https://github.com/getmoto/moto/releases) - [Changelog](https://github.com/getmoto/moto/blob/master/CHANGELOG.md) - [Commits](https://github.com/getmoto/moto/compare/5.0.23...5.0.24) --- updated-dependencies: - dependency-name: moto dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index c5a4418fe2..5fbbd8fd2a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2243,13 +2243,13 @@ type = ["mypy (==1.11.2)"] [[package]] name = "moto" -version = "5.0.23" +version = "5.0.24" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "moto-5.0.23-py3-none-any.whl", hash = "sha256:a8069f9c945e7503c43eccec30693f5656e0f8efb0256dfd814d99dedc38429e"}, - {file = "moto-5.0.23.tar.gz", hash = "sha256:8a32636647e45a9b76c32de0ed15c4b083c62849993217f96aa60026a2ca1721"}, + {file = "moto-5.0.24-py3-none-any.whl", hash = "sha256:4d826f1574849f18ddd2fcbf614d97f82c8fddfb9d95fac1078da01a39b57c10"}, + {file = "moto-5.0.24.tar.gz", hash = "sha256:dba6426bd770fbb9d892633fbd35253cbc181eeaa0eba97d6f058720a8fe9b42"}, ] [package.dependencies] From efdbcc5b3b9bb4eb617e09d060a23eb2f11a50e5 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 20 Dec 2024 08:17:13 +0100 Subject: [PATCH 3/6] Signer: Make `token` optional (#1447) Similar to Java: https://github.com/apache/iceberg/blob/91a1505d09cebcd1d088ac53cd42732c343883de/aws/src/main/java/org/apache/iceberg/aws/s3/signer/S3V4RestSignerClient.java#L205 Fixes #1442 --- pyiceberg/io/fsspec.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 434ae67df0..23796d4e6a 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -94,13 +94,13 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> AWSRequest: - if TOKEN not in properties: - raise SignError("Signer set, but token is not available") - signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT) - signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} + signer_headers = {} + if token := properties.get(TOKEN): + signer_headers = {"Authorization": f"Bearer {token}"} + signer_body = { "method": request.method, "region": request.context["client_region"], From 85b20531678f55e63703e3db570e607b17b4432d Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Dec 2024 09:09:44 -0500 Subject: [PATCH 4/6] Remove `0.9.0` deprecations (#1448) * remove deprecated * pyarrow * NameMapping.find --- pyiceberg/cli/console.py | 25 ---- pyiceberg/io/pyarrow.py | 183 +---------------------------- pyiceberg/table/__init__.py | 61 +--------- pyiceberg/table/name_mapping.py | 13 -- pyiceberg/table/update/__init__.py | 24 ---- tests/table/test_init.py | 15 --- tests/table/test_name_mapping.py | 10 -- 7 files changed, 2 insertions(+), 329 deletions(-) diff --git a/pyiceberg/cli/console.py b/pyiceberg/cli/console.py index 82c27a256b..83e67a3cbb 100644 --- a/pyiceberg/cli/console.py +++ b/pyiceberg/cli/console.py @@ -34,34 +34,9 @@ from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchPropertyException, NoSuchTableError from pyiceberg.table import TableProperties from pyiceberg.table.refs import SnapshotRef -from pyiceberg.utils.deprecated import deprecated from pyiceberg.utils.properties import property_as_int -class DeprecatedConstants: - @property - @deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="DEFAULT_MAX_SNAPSHOT_AGE_MS is deprecated. Use TableProperties.MAX_SNAPSHOT_AGE_MS_DEFAULT instead.", - ) - def DEFAULT_MAX_SNAPSHOT_AGE_MS(self) -> int: - return 432000000 - - @property - @deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="DEFAULT_MIN_SNAPSHOTS_TO_KEEP is deprecated. Use TableProperties.MIN_SNAPSHOTS_TO_KEEP_DEFAULT instead.", - ) - def DEFAULT_MIN_SNAPSHOTS_TO_KEEP(self) -> int: - return 1 - - -DEFAULT_MIN_SNAPSHOTS_TO_KEEP = DeprecatedConstants().DEFAULT_MIN_SNAPSHOTS_TO_KEEP -DEFAULT_MAX_SNAPSHOT_AGE_MS = DeprecatedConstants().DEFAULT_MAX_SNAPSHOT_AGE_MS - - def catch_exception() -> Callable: # type: ignore def decorator(func: Callable) -> Callable: # type: ignore @wraps(func) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 9847ec5a1c..ef6937f1bb 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -165,7 +165,7 @@ from pyiceberg.utils.concurrent import ExecutorFactory from pyiceberg.utils.config import Config from pyiceberg.utils.datetime import millis_to_datetime -from pyiceberg.utils.deprecated import deprecated, deprecation_message +from pyiceberg.utils.deprecated import deprecation_message from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int from pyiceberg.utils.singleton import Singleton from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string @@ -1528,187 +1528,6 @@ def _record_batches_from_scan_tasks_and_deletes( total_row_count += len(batch) -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="project_table is deprecated. Use ArrowScan.to_table instead.", -) -def project_table( - tasks: Iterable[FileScanTask], - table_metadata: TableMetadata, - io: FileIO, - row_filter: BooleanExpression, - projected_schema: Schema, - case_sensitive: bool = True, - limit: Optional[int] = None, -) -> pa.Table: - """Resolve the right columns based on the identifier. - - Args: - tasks (Iterable[FileScanTask]): A URI or a path to a local file. - table_metadata (TableMetadata): The table metadata of the table that's being queried - io (FileIO): A FileIO to open streams to the object store - row_filter (BooleanExpression): The expression for filtering rows. - projected_schema (Schema): The output schema. - case_sensitive (bool): Case sensitivity when looking up column names. - limit (Optional[int]): Limit the number of records. - - Raises: - ResolveError: When an incompatible query is done. - """ - scheme, netloc, _ = PyArrowFileIO.parse_location(table_metadata.location) - if isinstance(io, PyArrowFileIO): - fs = io.fs_by_scheme(scheme, netloc) - else: - try: - from pyiceberg.io.fsspec import FsspecFileIO - - if isinstance(io, FsspecFileIO): - from pyarrow.fs import PyFileSystem - - fs = PyFileSystem(FSSpecHandler(io.get_fs(scheme))) - else: - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") - except ModuleNotFoundError as e: - # When FsSpec is not installed - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") from e - - use_large_types = property_as_bool(io.properties, PYARROW_USE_LARGE_TYPES_ON_READ, True) - - bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive) - - projected_field_ids = { - id for id in projected_schema.field_ids if not isinstance(projected_schema.find_type(id), (MapType, ListType)) - }.union(extract_field_ids(bound_row_filter)) - - deletes_per_file = _read_all_delete_files(fs, tasks) - executor = ExecutorFactory.get_or_create() - futures = [ - executor.submit( - _task_to_table, - fs, - task, - bound_row_filter, - projected_schema, - projected_field_ids, - deletes_per_file.get(task.file.file_path), - case_sensitive, - table_metadata.name_mapping(), - use_large_types, - ) - for task in tasks - ] - total_row_count = 0 - # for consistent ordering, we need to maintain future order - futures_index = {f: i for i, f in enumerate(futures)} - completed_futures: SortedList[Future[pa.Table]] = SortedList(iterable=[], key=lambda f: futures_index[f]) - for future in concurrent.futures.as_completed(futures): - completed_futures.add(future) - if table_result := future.result(): - total_row_count += len(table_result) - # stop early if limit is satisfied - if limit is not None and total_row_count >= limit: - break - - # by now, we've either completed all tasks or satisfied the limit - if limit is not None: - _ = [f.cancel() for f in futures if not f.done()] - - tables = [f.result() for f in completed_futures if f.result()] - - if len(tables) < 1: - return pa.Table.from_batches([], schema=schema_to_pyarrow(projected_schema, include_field_ids=False)) - - result = pa.concat_tables(tables, promote_options="permissive") - - if limit is not None: - return result.slice(0, limit) - - return result - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="project_table is deprecated. Use ArrowScan.to_record_batches instead.", -) -def project_batches( - tasks: Iterable[FileScanTask], - table_metadata: TableMetadata, - io: FileIO, - row_filter: BooleanExpression, - projected_schema: Schema, - case_sensitive: bool = True, - limit: Optional[int] = None, -) -> Iterator[pa.RecordBatch]: - """Resolve the right columns based on the identifier. - - Args: - tasks (Iterable[FileScanTask]): A URI or a path to a local file. - table_metadata (TableMetadata): The table metadata of the table that's being queried - io (FileIO): A FileIO to open streams to the object store - row_filter (BooleanExpression): The expression for filtering rows. - projected_schema (Schema): The output schema. - case_sensitive (bool): Case sensitivity when looking up column names. - limit (Optional[int]): Limit the number of records. - - Raises: - ResolveError: When an incompatible query is done. - """ - scheme, netloc, _ = PyArrowFileIO.parse_location(table_metadata.location) - if isinstance(io, PyArrowFileIO): - fs = io.fs_by_scheme(scheme, netloc) - else: - try: - from pyiceberg.io.fsspec import FsspecFileIO - - if isinstance(io, FsspecFileIO): - from pyarrow.fs import PyFileSystem - - fs = PyFileSystem(FSSpecHandler(io.get_fs(scheme))) - else: - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") - except ModuleNotFoundError as e: - # When FsSpec is not installed - raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") from e - - use_large_types = property_as_bool(io.properties, PYARROW_USE_LARGE_TYPES_ON_READ, True) - - bound_row_filter = bind(table_metadata.schema(), row_filter, case_sensitive=case_sensitive) - - projected_field_ids = { - id for id in projected_schema.field_ids if not isinstance(projected_schema.find_type(id), (MapType, ListType)) - }.union(extract_field_ids(bound_row_filter)) - - deletes_per_file = _read_all_delete_files(fs, tasks) - - total_row_count = 0 - - for task in tasks: - # stop early if limit is satisfied - if limit is not None and total_row_count >= limit: - break - batches = _task_to_record_batches( - fs, - task, - bound_row_filter, - projected_schema, - projected_field_ids, - deletes_per_file.get(task.file.file_path), - case_sensitive, - table_metadata.name_mapping(), - use_large_types, - ) - for batch in batches: - if limit is not None: - if total_row_count >= limit: - break - elif total_row_count + len(batch) >= limit: - batch = batch.slice(0, limit - total_row_count) - yield batch - total_row_count += len(batch) - - def _to_requested_schema( requested_schema: Schema, file_schema: Schema, diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 02e8e43ff3..4ec3403bb3 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -111,14 +111,11 @@ UpgradeFormatVersionUpdate, update_table_metadata, ) -from pyiceberg.table.update.schema import UpdateSchema, _Move, _MoveOperation +from pyiceberg.table.update.schema import UpdateSchema from pyiceberg.table.update.snapshot import ( ManageSnapshots, UpdateSnapshot, - _DeleteFiles, _FastAppendFiles, - _MergeAppendFiles, - _OverwriteFiles, ) from pyiceberg.table.update.spec import UpdateSpec from pyiceberg.transforms import IdentityTransform @@ -137,8 +134,6 @@ ) from pyiceberg.utils.concurrent import ExecutorFactory from pyiceberg.utils.config import Config -from pyiceberg.utils.deprecated import deprecated -from pyiceberg.utils.deprecated import deprecation_message as deprecation_message from pyiceberg.utils.properties import property_as_bool if TYPE_CHECKING: @@ -1641,57 +1636,3 @@ def _parquet_files_to_data_files(table_metadata: TableMetadata, file_paths: List from pyiceberg.io.pyarrow import parquet_files_to_data_files yield from parquet_files_to_data_files(io=io, table_metadata=table_metadata, file_paths=iter(file_paths)) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.Move has been changed to private class pyiceberg.table.update.schema._Move", -) -def Move(*args: Any, **kwargs: Any) -> _Move: - return _Move(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.MoveOperation has been changed to private class pyiceberg.table.update.schema._MoveOperation", -) -def MoveOperation(*args: Any, **kwargs: Any) -> _MoveOperation: - return _MoveOperation(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.DeleteFiles has been changed to private class pyiceberg.table.update.snapshot._DeleteFiles", -) -def DeleteFiles(*args: Any, **kwargs: Any) -> _DeleteFiles: - return _DeleteFiles(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.FastAppendFiles has been changed to private class pyiceberg.table.update.snapshot._FastAppendFiles", -) -def FastAppendFiles(*args: Any, **kwargs: Any) -> _FastAppendFiles: - return _FastAppendFiles(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.MergeAppendFiles has been changed to private class pyiceberg.table.update.snapshot._MergeAppendFiles", -) -def MergeAppendFiles(*args: Any, **kwargs: Any) -> _MergeAppendFiles: - return _MergeAppendFiles(*args, **kwargs) - - -@deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="pyiceberg.table.OverwriteFiles has been changed to private class pyiceberg.table.update.snapshot._OverwriteFiles", -) -def OverwriteFiles(*args: Any, **kwargs: Any) -> _OverwriteFiles: - return _OverwriteFiles(*args, **kwargs) diff --git a/pyiceberg/table/name_mapping.py b/pyiceberg/table/name_mapping.py index ec10e33e8a..e27763fc6a 100644 --- a/pyiceberg/table/name_mapping.py +++ b/pyiceberg/table/name_mapping.py @@ -33,7 +33,6 @@ from pyiceberg.schema import P, PartnerAccessor, Schema, SchemaVisitor, SchemaWithPartnerVisitor, visit, visit_with_partner from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel from pyiceberg.types import IcebergType, ListType, MapType, NestedField, PrimitiveType, StructType -from pyiceberg.utils.deprecated import deprecated class MappedField(IcebergBaseModel): @@ -76,18 +75,6 @@ class NameMapping(IcebergRootModel[List[MappedField]]): def _field_by_name(self) -> Dict[str, MappedField]: return visit_name_mapping(self, _IndexByName()) - @deprecated( - deprecated_in="0.8.0", - removed_in="0.9.0", - help_message="Please use `apply_name_mapping` instead", - ) - def find(self, *names: str) -> MappedField: - name = ".".join(names) - try: - return self._field_by_name[name] - except KeyError as e: - raise ValueError(f"Could not find field with name: {name}") from e - def __len__(self) -> int: """Return the number of mappings.""" return len(self.root) diff --git a/pyiceberg/table/update/__init__.py b/pyiceberg/table/update/__init__.py index de9a774e06..d5e8c1aba1 100644 --- a/pyiceberg/table/update/__init__.py +++ b/pyiceberg/table/update/__init__.py @@ -98,14 +98,6 @@ class AddSchemaUpdate(IcebergBaseModel): ), ) - initial_change: bool = Field( - default=False, - exclude=True, - deprecated=deprecation_notice( - deprecated_in="0.8.0", removed_in="0.9.0", help_message="CreateTableTransaction can work without this field" - ), - ) - class SetCurrentSchemaUpdate(IcebergBaseModel): action: Literal["set-current-schema"] = Field(default="set-current-schema") @@ -118,14 +110,6 @@ class AddPartitionSpecUpdate(IcebergBaseModel): action: Literal["add-spec"] = Field(default="add-spec") spec: PartitionSpec - initial_change: bool = Field( - default=False, - exclude=True, - deprecated=deprecation_notice( - deprecated_in="0.8.0", removed_in="0.9.0", help_message="CreateTableTransaction can work without this field" - ), - ) - class SetDefaultSpecUpdate(IcebergBaseModel): action: Literal["set-default-spec"] = Field(default="set-default-spec") @@ -138,14 +122,6 @@ class AddSortOrderUpdate(IcebergBaseModel): action: Literal["add-sort-order"] = Field(default="add-sort-order") sort_order: SortOrder = Field(alias="sort-order") - initial_change: bool = Field( - default=False, - exclude=True, - deprecated=deprecation_notice( - deprecated_in="0.8.0", removed_in="0.9.0", help_message="CreateTableTransaction can work without this field" - ), - ) - class SetDefaultSortOrderUpdate(IcebergBaseModel): action: Literal["set-default-sort-order"] = Field(default="set-default-sort-order") diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 040c67034b..bdc3d030fd 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -1243,18 +1243,3 @@ def test_update_metadata_log_overflow(table_v2: Table) -> None: table_v2.metadata_location, ) assert len(new_metadata.metadata_log) == 1 - - -def test_table_module_refactoring_backward_compatibility() -> None: - # TODO: Remove this in 0.9.0 - try: - from pyiceberg.table import ( # noqa: F401 - DeleteFiles, - FastAppendFiles, - MergeAppendFiles, - Move, - MoveOperation, - OverwriteFiles, - ) - except Exception as exc: - raise pytest.fail("Importing moved modules should not raise an exception") from exc diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py index 99a247ee19..bd271f59f8 100644 --- a/tests/table/test_name_mapping.py +++ b/tests/table/test_name_mapping.py @@ -283,16 +283,6 @@ def test_mapping_by_name(table_name_mapping_nested: NameMapping) -> None: } -def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: - assert table_name_mapping_nested.find("foo") == MappedField(field_id=1, names=["foo"]) - assert table_name_mapping_nested.find("location.element.latitude") == MappedField(field_id=13, names=["latitude"]) - assert table_name_mapping_nested.find("location", "element", "latitude") == MappedField(field_id=13, names=["latitude"]) - assert table_name_mapping_nested.find(*["location", "element", "latitude"]) == MappedField(field_id=13, names=["latitude"]) - - with pytest.raises(ValueError, match="Could not find field with name: boom"): - table_name_mapping_nested.find("boom") - - def test_update_mapping_no_updates_or_adds(table_name_mapping_nested: NameMapping) -> None: assert update_mapping(table_name_mapping_nested, {}, {}) == table_name_mapping_nested From ab6b1906b70541ca3dec1d246ce1f114dd015777 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 20 Dec 2024 16:17:19 +0100 Subject: [PATCH 5/6] Remove unneeded partitioning (#1417) --- dev/provision.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dev/provision.py b/dev/provision.py index 53360748b6..b358da6593 100644 --- a/dev/provision.py +++ b/dev/provision.py @@ -22,7 +22,17 @@ from pyiceberg.schema import Schema from pyiceberg.types import FixedType, NestedField, UUIDType -spark = SparkSession.builder.getOrCreate() +# The configuration is important, otherwise we get many small +# parquet files with a single row. When a positional delete +# hits the Parquet file with one row, the parquet file gets +# dropped instead of having a merge-on-read delete file. +spark = ( + SparkSession + .builder + .config("spark.sql.shuffle.partitions", "1") + .config("spark.default.parallelism", "1") + .getOrCreate() +) catalogs = { 'rest': load_catalog( @@ -120,10 +130,6 @@ """ ) - # Partitioning is not really needed, but there is a bug: - # https://github.com/apache/iceberg/pull/7685 - spark.sql(f"ALTER TABLE {catalog_name}.default.test_positional_mor_deletes ADD PARTITION FIELD years(dt) AS dt_years") - spark.sql( f""" INSERT INTO {catalog_name}.default.test_positional_mor_deletes @@ -168,10 +174,6 @@ """ ) - # Partitioning is not really needed, but there is a bug: - # https://github.com/apache/iceberg/pull/7685 - spark.sql(f"ALTER TABLE {catalog_name}.default.test_positional_mor_double_deletes ADD PARTITION FIELD years(dt) AS dt_years") - spark.sql( f""" INSERT INTO {catalog_name}.default.test_positional_mor_double_deletes From dbcf65b4892779efca7362e069edecff7f2bf69f Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 20 Dec 2024 16:26:14 +0100 Subject: [PATCH 6/6] Bump to Poetry 1.8.5 (#1455) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 86f3aa54b0..f2bb6f6871 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ help: ## Display this help install-poetry: ## Install poetry if the user has not done that yet. @if ! command -v poetry &> /dev/null; then \ echo "Poetry could not be found. Installing..."; \ - pip install --user poetry==1.8.4; \ + pip install --user poetry==1.8.5; \ else \ echo "Poetry is already installed."; \ fi