Skip to content

Commit

Permalink
Replace reference of Table.identifier with Table.name (#1346)
Browse files Browse the repository at this point in the history
* fix Table.name

* replace Table.identifier with Table.name

* add warning filter
  • Loading branch information
kevinjqliu authored Nov 20, 2024
1 parent 8e0e6a1 commit 7a4734e
Show file tree
Hide file tree
Showing 15 changed files with 105 additions and 108 deletions.
2 changes: 1 addition & 1 deletion pyiceberg/catalog/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ def commit_table(
NoSuchTableError: If a table with the given identifier does not exist.
CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
"""
table_identifier = self._identifier_to_tuple_without_catalog(table.identifier)
table_identifier = table.name()
database_name, table_name = self.identifier_to_database_and_table(table_identifier, NoSuchTableError)

current_glue_table: Optional[TableTypeDef]
Expand Down
4 changes: 2 additions & 2 deletions pyiceberg/catalog/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def _convert_hive_into_iceberg(self, table: HiveTable) -> Table:
)

def _convert_iceberg_into_hive(self, table: Table) -> HiveTable:
identifier_tuple = self._identifier_to_tuple_without_catalog(table.identifier)
identifier_tuple = table.name()
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
current_time_millis = int(time.time() * 1000)

Expand Down Expand Up @@ -455,7 +455,7 @@ def commit_table(
NoSuchTableError: If a table with the given identifier does not exist.
CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
"""
table_identifier = self._identifier_to_tuple_without_catalog(table.identifier)
table_identifier = table.name()
database_name, table_name = self.identifier_to_database_and_table(table_identifier, NoSuchTableError)
# commit to hive
# https://github.com/apache/hive/blob/master/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift#L1232
Expand Down
2 changes: 1 addition & 1 deletion pyiceberg/catalog/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ def commit_table(
CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
CommitStateUnknownException: Failed due to an internal exception on the side of the catalog.
"""
identifier = self._identifier_to_tuple_without_catalog(table.identifier)
identifier = table.name()
table_identifier = TableIdentifier(namespace=identifier[:-1], name=identifier[-1])
table_request = CommitTableRequest(identifier=table_identifier, requirements=requirements, updates=updates)

Expand Down
4 changes: 2 additions & 2 deletions pyiceberg/catalog/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def commit_table(
NoSuchTableError: If a table with the given identifier does not exist.
CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
"""
table_identifier = self._identifier_to_tuple_without_catalog(table.identifier)
table_identifier = table.name()
namespace_tuple = Catalog.namespace_from(table_identifier)
namespace = Catalog.namespace_to_string(namespace_tuple)
table_name = Catalog.table_name_from(table_identifier)
Expand All @@ -430,7 +430,7 @@ def commit_table(
except NoSuchTableError:
current_table = None

updated_staged_table = self._update_and_stage_table(current_table, table.identifier, requirements, updates)
updated_staged_table = self._update_and_stage_table(current_table, table.name(), requirements, updates)
if current_table and updated_staged_table.metadata == current_table.metadata:
# no changes, do nothing
return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location)
Expand Down
4 changes: 2 additions & 2 deletions pyiceberg/cli/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def files(self, table: Table, history: bool) -> None:
else:
snapshots = []

snapshot_tree = Tree(f"Snapshots: {'.'.join(table.identifier)}")
snapshot_tree = Tree(f"Snapshots: {'.'.join(table.name())}")
io = table.io

for snapshot in snapshots:
Expand Down Expand Up @@ -216,7 +216,7 @@ class FauxTable(IcebergBaseModel):

print(
FauxTable(
identifier=table.identifier, metadata=table.metadata, metadata_location=table.metadata_location
identifier=table.name(), metadata=table.metadata, metadata_location=table.metadata_location
).model_dump_json()
)

Expand Down
2 changes: 1 addition & 1 deletion pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ def name(self) -> Identifier:
Returns:
An Identifier tuple of the table name
"""
return self.identifier
return self._identifier

def scan(
self,
Expand Down
16 changes: 8 additions & 8 deletions tests/catalog/integration_test_dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_create_table(
test_catalog.create_namespace(database_name)
test_catalog.create_table(identifier, table_schema_nested, get_s3_path(get_bucket_name(), database_name, table_name))
table = test_catalog.load_table(identifier)
assert table.identifier == (test_catalog.name,) + identifier
assert table.name() == identifier
metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)

Expand All @@ -78,7 +78,7 @@ def test_create_table_with_default_location(
test_catalog.create_namespace(database_name)
test_catalog.create_table(identifier, table_schema_nested)
table = test_catalog.load_table(identifier)
assert table.identifier == (test_catalog.name,) + identifier
assert table.name() == identifier
metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)

Expand All @@ -102,15 +102,15 @@ def test_create_table_if_not_exists_duplicated_table(
test_catalog.create_namespace(database_name)
table1 = test_catalog.create_table((database_name, table_name), table_schema_nested)
table2 = test_catalog.create_table_if_not_exists((database_name, table_name), table_schema_nested)
assert table1.identifier == table2.identifier
assert table1.name() == table2.name()


def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, database_name: str, table_name: str) -> None:
identifier = (database_name, table_name)
test_catalog.create_namespace(database_name)
table = test_catalog.create_table(identifier, table_schema_nested)
loaded_table = test_catalog.load_table(identifier)
assert table.identifier == loaded_table.identifier
assert table.name() == loaded_table.name()
assert table.metadata_location == loaded_table.metadata_location
assert table.metadata == loaded_table.metadata

Expand All @@ -134,11 +134,11 @@ def test_rename_table(
new_table_name = f"rename-{table_name}"
identifier = (database_name, table_name)
table = test_catalog.create_table(identifier, table_schema_nested)
assert table.identifier == (test_catalog.name,) + identifier
assert table.name() == identifier
new_identifier = (new_database_name, new_table_name)
test_catalog.rename_table(identifier, new_identifier)
new_table = test_catalog.load_table(new_identifier)
assert new_table.identifier == (test_catalog.name,) + new_identifier
assert new_table.name() == new_identifier
assert new_table.metadata_location == table.metadata_location
metadata_location = new_table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
Expand All @@ -150,7 +150,7 @@ def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_na
identifier = (database_name, table_name)
test_catalog.create_namespace(database_name)
table = test_catalog.create_table(identifier, table_schema_nested)
assert table.identifier == (test_catalog.name,) + identifier
assert table.name() == identifier
test_catalog.drop_table(identifier)
with pytest.raises(NoSuchTableError):
test_catalog.load_table(identifier)
Expand All @@ -163,7 +163,7 @@ def test_purge_table(
test_catalog.create_namespace(database_name)
test_catalog.create_table(identifier, table_schema_nested)
table = test_catalog.load_table(identifier)
assert table.identifier == (test_catalog.name,) + identifier
assert table.name() == identifier
metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
test_catalog.purge_table(identifier)
Expand Down
20 changes: 10 additions & 10 deletions tests/catalog/integration_test_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def test_create_table(
test_catalog.create_namespace(database_name)
test_catalog.create_table(identifier, table_schema_nested, get_s3_path(get_bucket_name(), database_name, table_name))
table = test_catalog.load_table(identifier)
assert table.identifier == (CATALOG_NAME,) + identifier
assert table.name() == identifier
metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0
Expand Down Expand Up @@ -183,7 +183,7 @@ def test_create_table_with_default_location(
test_catalog.create_namespace(database_name)
test_catalog.create_table(identifier, table_schema_nested)
table = test_catalog.load_table(identifier)
assert table.identifier == (CATALOG_NAME,) + identifier
assert table.name() == identifier
metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0
Expand All @@ -208,15 +208,15 @@ def test_create_table_if_not_exists_duplicated_table(
test_catalog.create_namespace(database_name)
table1 = test_catalog.create_table((database_name, table_name), table_schema_nested)
table2 = test_catalog.create_table_if_not_exists((database_name, table_name), table_schema_nested)
assert table1.identifier == table2.identifier
assert table1.name() == table2.name()


def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
identifier = (database_name, table_name)
test_catalog.create_namespace(database_name)
table = test_catalog.create_table(identifier, table_schema_nested)
loaded_table = test_catalog.load_table(identifier)
assert table.identifier == loaded_table.identifier
assert table.name() == loaded_table.name()
assert table.metadata_location == loaded_table.metadata_location
assert table.metadata == loaded_table.metadata
assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0
Expand All @@ -242,11 +242,11 @@ def test_rename_table(
identifier = (database_name, table_name)
table = test_catalog.create_table(identifier, table_schema_nested)
assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0
assert table.identifier == (CATALOG_NAME,) + identifier
assert table.name() == identifier
new_identifier = (new_database_name, new_table_name)
test_catalog.rename_table(identifier, new_identifier)
new_table = test_catalog.load_table(new_identifier)
assert new_table.identifier == (CATALOG_NAME,) + new_identifier
assert new_table.name() == new_identifier
assert new_table.metadata_location == table.metadata_location
metadata_location = new_table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
Expand All @@ -258,7 +258,7 @@ def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_na
identifier = (database_name, table_name)
test_catalog.create_namespace(database_name)
table = test_catalog.create_table(identifier, table_schema_nested)
assert table.identifier == (CATALOG_NAME,) + identifier
assert table.name() == identifier
test_catalog.drop_table(identifier)
with pytest.raises(NoSuchTableError):
test_catalog.load_table(identifier)
Expand All @@ -271,7 +271,7 @@ def test_purge_table(
test_catalog.create_namespace(database_name)
test_catalog.create_table(identifier, table_schema_nested)
table = test_catalog.load_table(identifier)
assert table.identifier == (CATALOG_NAME,) + identifier
assert table.name() == identifier
metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
test_catalog.purge_table(identifier)
Expand Down Expand Up @@ -536,7 +536,7 @@ def test_create_table_transaction(
update_snapshot.append_data_file(data_file)

table = test_catalog.load_table(identifier)
assert table.identifier == (CATALOG_NAME,) + identifier
assert table.name() == identifier
metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 0
Expand Down Expand Up @@ -584,6 +584,6 @@ def test_register_table_with_given_location(
test_catalog.drop_table(identifier) # drops the table but keeps the metadata file
assert not test_catalog.table_exists(identifier)
table = test_catalog.register_table(new_identifier, location)
assert table.identifier == (CATALOG_NAME,) + new_identifier
assert table.name() == new_identifier
assert table.metadata_location == location
assert test_catalog.table_exists(new_identifier)
2 changes: 1 addition & 1 deletion tests/catalog/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location:
def commit_table(
self, table: Table, requirements: Tuple[TableRequirement, ...], updates: Tuple[TableUpdate, ...]
) -> CommitTableResponse:
identifier_tuple = self._identifier_to_tuple_without_catalog(table.identifier)
identifier_tuple = table.name()
current_table = self.load_table(identifier_tuple)
base_metadata = current_table.metadata

Expand Down
Loading

0 comments on commit 7a4734e

Please sign in to comment.