Skip to content

Commit

Permalink
Fix overwrite when filtering all the data
Browse files Browse the repository at this point in the history
  • Loading branch information
ndrluis committed Aug 8, 2024
1 parent eca9870 commit ac7b4db
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
4 changes: 3 additions & 1 deletion pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,9 @@ def delete(self, delete_filter: Union[str, BooleanExpression], snapshot_properti
filtered_df = df.filter(preserve_row_filter)

# Only rewrite if there are records being deleted
if len(df) != len(filtered_df):
if len(filtered_df) == 0:
replaced_files.append((original_file.file, []))
elif len(df) != len(filtered_df):
replaced_files.append((
original_file.file,
list(
Expand Down
28 changes: 26 additions & 2 deletions tests/integration/test_writes/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@
from pyiceberg.catalog.rest import RestCatalog
from pyiceberg.catalog.sql import SqlCatalog
from pyiceberg.exceptions import NoSuchTableError
from pyiceberg.expressions import In
from pyiceberg.io.pyarrow import _dataframe_to_data_files
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import Schema
from pyiceberg.table import TableProperties
from pyiceberg.transforms import IdentityTransform
from pyiceberg.types import IntegerType, LongType, NestedField
from pyiceberg.types import IntegerType, LongType, NestedField, StringType
from utils import _create_table


Expand Down Expand Up @@ -1297,7 +1298,6 @@ def test_rest_catalog_with_empty_catalog_name_append_data(session_catalog: Catal
tbl.append(arrow_table_with_null)


@pytest.mark.integration
def test_table_v1_with_null_nested_namespace(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None:
identifier = "default.lower.table_v1_with_null_nested_namespace"
tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_with_null])
Expand All @@ -1309,3 +1309,27 @@ def test_table_v1_with_null_nested_namespace(session_catalog: Catalog, arrow_tab

# We expect no error here
session_catalog.drop_table(identifier)


@pytest.mark.integration
def test_overwrite_all_data_with_filter(session_catalog: Catalog) -> None:
schema = Schema(
NestedField(1, "id", StringType(), required=True),
NestedField(2, "name", StringType(), required=False),
identifier_field_ids=[1],
)

data = pa.Table.from_pylist(
[
{"id": "1", "name": "Amsterdam"},
{"id": "2", "name": "San Francisco"},
{"id": "3", "name": "Drachten"},
],
schema=schema.as_arrow(),
)

identifier = "default.test_overwrite_all_data_with_filter"
tbl = _create_table(session_catalog, identifier, data=[data], schema=schema)
tbl.overwrite(data, In("id", ["1", "2", "3"]))

assert len(tbl.scan().to_arrow()) == 3

0 comments on commit ac7b4db

Please sign in to comment.