From 11ce7a6998583280311d3b911dc7a637c387b1ee Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 14 Dec 2024 14:54:27 +0100
Subject: [PATCH 01/57] feat: initial setup for S3TablesCatalog

---
 pyiceberg/catalog/__init__.py  |   4 +-
 pyiceberg/catalog/s3tables.py  | 228 +++++++++++++++++++++++++++++++++
 pyiceberg/exceptions.py        |   5 +
 pyproject.toml                 |   1 +
 tests/catalog/test_s3tables.py |  71 ++++++++++
 5 files changed, 307 insertions(+), 2 deletions(-)
 create mode 100644 pyiceberg/catalog/s3tables.py
 create mode 100644 tests/catalog/test_s3tables.py

diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py
index aad225eae6..a5eb8feea9 100644
--- a/pyiceberg/catalog/__init__.py
+++ b/pyiceberg/catalog/__init__.py
@@ -903,8 +903,8 @@ def _get_default_warehouse_location(self, database_name: str, table_name: str) -
         raise ValueError("No default path is set, please specify a location when creating a table")
 
     @staticmethod
-    def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str) -> None:
-        ToOutputFile.table_metadata(metadata, io.new_output(metadata_path))
+    def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str, overwrite: bool = False) -> None:
+        ToOutputFile.table_metadata(metadata, io.new_output(metadata_path), overwrite=overwrite)
 
     @staticmethod
     def _get_metadata_location(location: str, new_version: int = 0) -> str:
diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
new file mode 100644
index 0000000000..82cd862571
--- /dev/null
+++ b/pyiceberg/catalog/s3tables.py
@@ -0,0 +1,228 @@
+import re
+from typing import TYPE_CHECKING
+from typing import List
+from typing import Optional
+from typing import Set
+from typing import Tuple
+from typing import Union
+
+import boto3
+
+from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, MetastoreCatalog
+from pyiceberg.catalog import WAREHOUSE_LOCATION
+from pyiceberg.catalog import Catalog
+from pyiceberg.catalog import PropertiesUpdateSummary
+from pyiceberg.exceptions import S3TablesError
+from pyiceberg.exceptions import TableBucketNotFound
+from pyiceberg.io import AWS_ACCESS_KEY_ID
+from pyiceberg.io import AWS_REGION
+from pyiceberg.io import AWS_SECRET_ACCESS_KEY
+from pyiceberg.io import AWS_SESSION_TOKEN
+from pyiceberg.io import load_file_io
+from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.serializers import FromInputFile
+from pyiceberg.table import CommitTableResponse
+from pyiceberg.table import CreateTableTransaction
+from pyiceberg.table import Table
+from pyiceberg.table import TableRequirement
+from pyiceberg.table.metadata import new_table_metadata
+from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
+from pyiceberg.table.update import TableUpdate
+from pyiceberg.typedef import EMPTY_DICT, Identifier
+from pyiceberg.typedef import Properties
+from pyiceberg.utils.properties import get_first_property_value
+
+if TYPE_CHECKING:
+    import pyarrow as pa
+
+S3TABLES_PROFILE_NAME = "s3tables.profile-name"
+S3TABLES_REGION = "s3tables.region"
+S3TABLES_ACCESS_KEY_ID = "s3tables.access-key-id"
+S3TABLES_SECRET_ACCESS_KEY = "s3tables.secret-access-key"
+S3TABLES_SESSION_TOKEN = "s3tables.session-token"
+
+S3TABLES_ENDPOINT = "s3tables.endpoint"
+
+
+class S3TableCatalog(MetastoreCatalog):
+    def __init__(self, name: str, **properties: str):
+        super().__init__(name, **properties)
+
+        session = boto3.Session(
+            profile_name=properties.get(S3TABLES_PROFILE_NAME),
+            region_name=get_first_property_value(properties, S3TABLES_REGION, AWS_REGION),
+            botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
+            aws_access_key_id=get_first_property_value(properties, S3TABLES_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+            aws_secret_access_key=get_first_property_value(
+                properties, S3TABLES_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY
+            ),
+            aws_session_token=get_first_property_value(properties, S3TABLES_SESSION_TOKEN, AWS_SESSION_TOKEN),
+        )
+        # TODO: s3tables client only supported from boto3>=1.35.74 so this can crash
+        # TODO: set a custom user-agent for api calls like the Java implementation
+        self.s3tables = session.client("s3tables")
+        # TODO: handle malformed properties instead of just raising a key error here
+        self.table_bucket_arn = self.properties[WAREHOUSE_LOCATION]
+        try:
+            self.s3tables.get_table_bucket(tableBucketARN=self.table_bucket_arn)
+        except self.s3tables.exceptions.NotFoundException as e:
+            raise TableBucketNotFound(e) from e
+
+    def commit_table(
+        self, table: Table, requirements: Tuple[TableRequirement, ...], updates: Tuple[TableUpdate, ...]
+    ) -> CommitTableResponse:
+        return super().commit_table(table, requirements, updates)
+
+    def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = ...) -> None:
+        valid_namespace: str = self._validate_namespace_identifier(namespace)
+        self.s3tables.create_namespace(tableBucketARN=self.table_bucket_arn, namespace=[valid_namespace])
+
+    def _validate_namespace_identifier(self, namespace: Union[str, Identifier]) -> str:
+        # for naming rules see: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html
+        # TODO: extract into constant variables
+        pattern = re.compile("[a-z0-9][a-z0-9_]{2,62}")
+        reserved = "aws_s3_metadata"
+
+        namespace = self.identifier_to_database(namespace)
+
+        if not pattern.fullmatch(namespace):
+            ...
+
+        if namespace == reserved:
+            ...
+
+        return namespace
+
+    def _validate_database_and_table_identifier(self, identifier: Union[str, Identifier]) -> Tuple[str, str]:
+        # for naming rules see: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html
+        # TODO: extract into constant variables
+        pattern = re.compile("[a-z0-9][a-z0-9_]{2,62}")
+
+        namespace, table_name = self.identifier_to_database_and_table(identifier)
+
+        namespace = self._validate_namespace_identifier(namespace)
+
+        if not pattern.fullmatch(table_name):
+            ...
+
+        return namespace, table_name
+
+    def create_table(
+        self,
+        identifier: Union[str, Identifier],
+        schema: Union[Schema, "pa.Schema"],
+        location: Optional[str] = None,
+        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
+        sort_order: SortOrder = UNSORTED_SORT_ORDER,
+        properties: Properties = EMPTY_DICT,
+    ) -> Table:
+        namespace, table_name = self._validate_database_and_table_identifier(identifier)
+
+        schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore
+
+        # TODO: check whether namespace exists and if it does, whether table_name already exists
+        self.s3tables.create_table(
+            tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG"
+        )
+
+        # location is given by s3 table bucket
+        response = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
+        version_token = response["versionToken"]
+
+        location = response["warehouseLocation"]
+        metadata_location = self._get_metadata_location(location=location)
+        metadata = new_table_metadata(
+            location=location,
+            schema=schema,
+            partition_spec=partition_spec,
+            sort_order=sort_order,
+            properties=properties,
+        )
+
+        io = load_file_io(properties=self.properties, location=metadata_location)
+        # TODO: this triggers unsupported list operation error, setting overwrite=True is a workaround for now
+        # TODO: we can perform this check manually maybe?
+        self._write_metadata(metadata, io, metadata_location, overwrite=True)
+        # TODO: after writing need to update table metadata location
+        # can this operation fail if the version token does not match?
+        self.s3tables.update_table_metadata_location(
+            tableBucketARN=self.table_bucket_arn,
+            namespace=namespace,
+            name=table_name,
+            versionToken=version_token,
+            metadataLocation=metadata_location,
+        )
+
+        return self.load_table(identifier=identifier)
+
+    def create_table_transaction(
+        self,
+        identifier: Union[str, Identifier],
+        schema: Union[Schema, "pa.Schema"],
+        location: Optional[str] = None,
+        partition_spec: PartitionSpec = ...,
+        sort_order: SortOrder = ...,
+        properties: Properties = ...,
+    ) -> CreateTableTransaction:
+        return super().create_table_transaction(identifier, schema, location, partition_spec, sort_order, properties)
+
+    def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
+        return super().drop_namespace(namespace)
+
+    def drop_table(self, identifier: Union[str, Identifier]) -> None:
+        return super().drop_table(identifier)
+
+    def drop_view(self, identifier: Union[str, Identifier]) -> None:
+        return super().drop_view(identifier)
+
+    def list_namespaces(self, namespace: Union[str, Identifier] = ...) -> List[Identifier]:
+        # TODO: handle pagination
+        response = self.s3tables.list_namespaces(tableBucketARN=self.table_bucket_arn)
+        return [tuple(namespace["namespace"]) for namespace in response["namespaces"]]
+
+    def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
+        return super().list_tables(namespace)
+
+    def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
+        return super().list_views(namespace)
+
+    def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
+        return super().load_namespace_properties(namespace)
+
+    def load_table(self, identifier: Union[str, Identifier]) -> Table:
+        namespace, table_name = self._validate_database_and_table_identifier(identifier)
+        # TODO: raise a NoSuchTableError if it does not exist
+        response = self.s3tables.get_table_metadata_location(
+            tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
+        )
+        # TODO: we might need to catch if table is not initialized i.e. does not have metadata setup yet
+        metadata_location = response["metadataLocation"]
+
+        io = load_file_io(properties=self.properties, location=metadata_location)
+        file = io.new_input(metadata_location)
+        metadata = FromInputFile.table_metadata(file)
+        return Table(
+            identifier=(namespace, table_name),
+            metadata=metadata,
+            metadata_location=metadata_location,
+            io=self._load_file_io(metadata.properties, metadata_location),
+            catalog=self,
+        )
+
+    def purge_table(self, identifier: Union[str, Identifier]) -> None:
+        return super().purge_table(identifier)
+
+    def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
+        return super().register_table(identifier, metadata_location)
+
+    def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
+        return super().rename_table(from_identifier, to_identifier)
+
+    def table_exists(self, identifier: Union[str, Identifier]) -> bool:
+        return super().table_exists(identifier)
+
+    def update_namespace_properties(
+        self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = ...
+    ) -> PropertiesUpdateSummary:
+        return super().update_namespace_properties(namespace, removals, updates)
diff --git a/pyiceberg/exceptions.py b/pyiceberg/exceptions.py
index 56574ff471..6096096f45 100644
--- a/pyiceberg/exceptions.py
+++ b/pyiceberg/exceptions.py
@@ -111,6 +111,11 @@ class ConditionalCheckFailedException(DynamoDbError):
 class GenericDynamoDbError(DynamoDbError):
     pass
 
+class S3TablesError(Exception):
+    pass
+
+class TableBucketNotFound(S3TablesError):
+    pass
 
 class CommitFailedException(Exception):
     """Commit failed, refresh and try again."""
diff --git a/pyproject.toml b/pyproject.toml
index 58dac055ca..f734cdd560 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1206,6 +1206,7 @@ sql-postgres = ["sqlalchemy", "psycopg2-binary"]
 sql-sqlite = ["sqlalchemy"]
 gcsfs = ["gcsfs"]
 rest-sigv4 = ["boto3"]
+s3tables = ["boto3"]
 
 [tool.pytest.ini_options]
 markers = [
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
new file mode 100644
index 0000000000..220b0ad507
--- /dev/null
+++ b/tests/catalog/test_s3tables.py
@@ -0,0 +1,71 @@
+import boto3
+from pyiceberg.schema import Schema
+import pytest
+
+from pyiceberg.catalog.s3tables import S3TableCatalog
+from pyiceberg.exceptions import TableBucketNotFound
+
+
+@pytest.fixture
+def database_name(database_name):
+    # naming rules prevent "-" in namespaces for s3 table buckets
+    return database_name.replace("-", "_")
+
+
+@pytest.fixture
+def table_name(table_name):
+    # naming rules prevent "-" in table namees for s3 table buckets
+    return table_name.replace("-", "_")
+
+@pytest.fixture
+def table_bucket_arn():
+    import os
+    # since the moto library does not support s3tables as of 2024-12-14 we have to test against a real AWS endpoint
+    # in one of the supported regions.
+
+    return os.environ["ARN"]
+
+
+def test_s3tables_boto_api(table_bucket_arn):
+    client = boto3.client("s3tables")
+    response = client.list_namespaces(tableBucketARN=table_bucket_arn)
+    print(response["namespaces"])
+
+    response = client.get_table_bucket(tableBucketARN=table_bucket_arn + "abc")
+    print(response)
+
+
+
+def test_s3tables_namespaces_api(table_bucket_arn):
+    client = boto3.client("s3tables")
+    response = client.create_namespace(tableBucketARN=table_bucket_arn, namespace=["one", "two"])
+    print(response)
+    response = client.list_namespaces(tableBucketARN=table_bucket_arn)
+    print(response)
+
+def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn):
+    properties = {"warehouse": f"{table_bucket_arn}-modified"}
+    with pytest.raises(TableBucketNotFound):
+        S3TableCatalog(name="test_s3tables_catalog", **properties)
+
+
+def test_create_namespace(table_bucket_arn, database_name: str):
+    properties = {"warehouse": table_bucket_arn}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    catalog.create_namespace(namespace=database_name)
+    namespaces = catalog.list_namespaces()
+    assert (database_name,) in namespaces
+
+
+def test_create_table(table_bucket_arn, database_name: str, table_name:str, table_schema_nested: Schema):
+    properties = {"warehouse": table_bucket_arn}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    identifier = (database_name, table_name)
+
+    catalog.create_namespace(namespace=database_name)
+    print(database_name, table_name)
+    # this fails with
+    # OSError: When completing multiple part upload for key 'metadata/00000-55a9c37c-b822-4a81-ac0e-1efbcd145dba.metadata.json' in bucket '14e4e036-d4ae-44f8-koana45eruw
+    # Uunable to parse ExceptionName: S3TablesUnsupportedHeader Message: S3 Tables does not support the following header: x-amz-api-version value: 2006-03-01
+    table = catalog.create_table(identifier=identifier, schema=table_schema_nested)
+    print(table)

From 6932281b89e522971771d46cb666480fd1ff48d0 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 14 Dec 2024 17:41:40 +0100
Subject: [PATCH 02/57] feat: support create_table using FsspecFileIO

---
 pyiceberg/catalog/s3tables.py  |  7 ++++++-
 tests/catalog/test_s3tables.py | 27 ++++-----------------------
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 82cd862571..740e72a124 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -14,7 +14,7 @@
 from pyiceberg.catalog import PropertiesUpdateSummary
 from pyiceberg.exceptions import S3TablesError
 from pyiceberg.exceptions import TableBucketNotFound
-from pyiceberg.io import AWS_ACCESS_KEY_ID
+from pyiceberg.io import AWS_ACCESS_KEY_ID, PY_IO_IMPL
 from pyiceberg.io import AWS_REGION
 from pyiceberg.io import AWS_SECRET_ACCESS_KEY
 from pyiceberg.io import AWS_SESSION_TOKEN
@@ -44,10 +44,15 @@
 
 S3TABLES_ENDPOINT = "s3tables.endpoint"
 
+# pyarrow does not support writing to S3 Table buckets as of 2024-12-14 https://github.com/apache/iceberg-python/issues/1404#issuecomment-2543174146
+S3TABLES_FILE_IO_DEFAULT = "pyiceberg.io.fsspec.FsspecFileIO"
+
 
 class S3TableCatalog(MetastoreCatalog):
     def __init__(self, name: str, **properties: str):
         super().__init__(name, **properties)
+        # TODO: implement a proper check for FileIO
+        self.properties[PY_IO_IMPL] = S3TABLES_FILE_IO_DEFAULT
 
         session = boto3.Session(
             profile_name=properties.get(S3TABLES_PROFILE_NAME),
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 220b0ad507..855b144a14 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -26,23 +26,6 @@ def table_bucket_arn():
     return os.environ["ARN"]
 
 
-def test_s3tables_boto_api(table_bucket_arn):
-    client = boto3.client("s3tables")
-    response = client.list_namespaces(tableBucketARN=table_bucket_arn)
-    print(response["namespaces"])
-
-    response = client.get_table_bucket(tableBucketARN=table_bucket_arn + "abc")
-    print(response)
-
-
-
-def test_s3tables_namespaces_api(table_bucket_arn):
-    client = boto3.client("s3tables")
-    response = client.create_namespace(tableBucketARN=table_bucket_arn, namespace=["one", "two"])
-    print(response)
-    response = client.list_namespaces(tableBucketARN=table_bucket_arn)
-    print(response)
-
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn):
     properties = {"warehouse": f"{table_bucket_arn}-modified"}
     with pytest.raises(TableBucketNotFound):
@@ -58,14 +41,12 @@ def test_create_namespace(table_bucket_arn, database_name: str):
 
 
 def test_create_table(table_bucket_arn, database_name: str, table_name:str, table_schema_nested: Schema):
-    properties = {"warehouse": table_bucket_arn}
+    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
+    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
     catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
-    print(database_name, table_name)
-    # this fails with
-    # OSError: When completing multiple part upload for key 'metadata/00000-55a9c37c-b822-4a81-ac0e-1efbcd145dba.metadata.json' in bucket '14e4e036-d4ae-44f8-koana45eruw
-    # Uunable to parse ExceptionName: S3TablesUnsupportedHeader Message: S3 Tables does not support the following header: x-amz-api-version value: 2006-03-01
     table = catalog.create_table(identifier=identifier, schema=table_schema_nested)
-    print(table)
+
+    assert table == catalog.load_table(identifier)

From f8bec25da1d6c9e1802735c2569c11057fe87035 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 14 Dec 2024 18:13:38 +0100
Subject: [PATCH 03/57] feat: implement drop_table

---
 pyiceberg/catalog/s3tables.py  | 38 ++++++++++++++++++++++++++--------
 tests/catalog/test_s3tables.py | 18 +++++++++++++++-
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 740e72a124..7234e7223a 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -8,18 +8,22 @@
 
 import boto3
 
-from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, MetastoreCatalog
+from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION
 from pyiceberg.catalog import WAREHOUSE_LOCATION
 from pyiceberg.catalog import Catalog
+from pyiceberg.catalog import MetastoreCatalog
 from pyiceberg.catalog import PropertiesUpdateSummary
+from pyiceberg.exceptions import NoSuchTableError
 from pyiceberg.exceptions import S3TablesError
 from pyiceberg.exceptions import TableBucketNotFound
-from pyiceberg.io import AWS_ACCESS_KEY_ID, PY_IO_IMPL
+from pyiceberg.io import AWS_ACCESS_KEY_ID
 from pyiceberg.io import AWS_REGION
 from pyiceberg.io import AWS_SECRET_ACCESS_KEY
 from pyiceberg.io import AWS_SESSION_TOKEN
+from pyiceberg.io import PY_IO_IMPL
 from pyiceberg.io import load_file_io
-from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
+from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC
+from pyiceberg.partitioning import PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.serializers import FromInputFile
 from pyiceberg.table import CommitTableResponse
@@ -27,9 +31,11 @@
 from pyiceberg.table import Table
 from pyiceberg.table import TableRequirement
 from pyiceberg.table.metadata import new_table_metadata
-from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
+from pyiceberg.table.sorting import UNSORTED_SORT_ORDER
+from pyiceberg.table.sorting import SortOrder
 from pyiceberg.table.update import TableUpdate
-from pyiceberg.typedef import EMPTY_DICT, Identifier
+from pyiceberg.typedef import EMPTY_DICT
+from pyiceberg.typedef import Identifier
 from pyiceberg.typedef import Properties
 from pyiceberg.utils.properties import get_first_property_value
 
@@ -176,7 +182,18 @@ def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
         return super().drop_namespace(namespace)
 
     def drop_table(self, identifier: Union[str, Identifier]) -> None:
-        return super().drop_table(identifier)
+        namespace, table_name = self._validate_database_and_table_identifier(identifier)
+        try:
+            response = self.s3tables.get_table(
+                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
+            )
+        except self.s3tables.exceptions.NotFoundException as e:
+            raise NoSuchTableError(f"No table with identifier {identifier} exists.") from e
+
+        # TODO: handle conflicts due to versionToken mismatch that might occur
+        self.s3tables.delete_table(
+            tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, versionToken=response["versionToken"]
+        )
 
     def drop_view(self, identifier: Union[str, Identifier]) -> None:
         return super().drop_view(identifier)
@@ -198,9 +215,12 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper
     def load_table(self, identifier: Union[str, Identifier]) -> Table:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
         # TODO: raise a NoSuchTableError if it does not exist
-        response = self.s3tables.get_table_metadata_location(
-            tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
-        )
+        try:
+            response = self.s3tables.get_table_metadata_location(
+                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
+            )
+        except self.s3tables.exceptions.NotFoundException as e:
+            raise NoSuchTableError(f"No table with identifier {identifier} exists.") from e
         # TODO: we might need to catch if table is not initialized i.e. does not have metadata setup yet
         metadata_location = response["metadataLocation"]
 
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 855b144a14..9a68cdb794 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -3,7 +3,7 @@
 import pytest
 
 from pyiceberg.catalog.s3tables import S3TableCatalog
-from pyiceberg.exceptions import TableBucketNotFound
+from pyiceberg.exceptions import NoSuchTableError, TableBucketNotFound
 
 
 @pytest.fixture
@@ -50,3 +50,19 @@ def test_create_table(table_bucket_arn, database_name: str, table_name:str, tabl
     table = catalog.create_table(identifier=identifier, schema=table_schema_nested)
 
     assert table == catalog.load_table(identifier)
+
+
+
+def test_drop_table(table_bucket_arn, database_name: str, table_name:str, table_schema_nested: Schema):
+    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
+    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    identifier = (database_name, table_name)
+
+    catalog.create_namespace(namespace=database_name)
+    catalog.create_table(identifier=identifier, schema=table_schema_nested)
+
+    catalog.drop_table(identifier=identifier)
+
+    with pytest.raises(NoSuchTableError):
+        catalog.load_table(identifier=identifier)

From 9018e704c6f84ad0d92a907775613b72c5bba994 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 14 Dec 2024 18:20:02 +0100
Subject: [PATCH 04/57] feat: implement drop_namespace

---
 pyiceberg/catalog/s3tables.py  | 9 +++++++--
 tests/catalog/test_s3tables.py | 9 +++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 7234e7223a..fd7d7addc0 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -13,7 +13,7 @@
 from pyiceberg.catalog import Catalog
 from pyiceberg.catalog import MetastoreCatalog
 from pyiceberg.catalog import PropertiesUpdateSummary
-from pyiceberg.exceptions import NoSuchTableError
+from pyiceberg.exceptions import NamespaceNotEmptyError, NoSuchTableError
 from pyiceberg.exceptions import S3TablesError
 from pyiceberg.exceptions import TableBucketNotFound
 from pyiceberg.io import AWS_ACCESS_KEY_ID
@@ -179,7 +179,12 @@ def create_table_transaction(
         return super().create_table_transaction(identifier, schema, location, partition_spec, sort_order, properties)
 
     def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
-        return super().drop_namespace(namespace)
+        namespace = self._validate_namespace_identifier(namespace)
+        try:
+            self.s3tables.delete_namespace(tableBucketARN=self.table_bucket_arn, namespace=namespace)
+        except self.s3tables.exceptions.ConflictException as e:
+            raise NamespaceNotEmptyError(f"Namespace {namespace} is not empty.") from e
+
 
     def drop_table(self, identifier: Union[str, Identifier]) -> None:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 9a68cdb794..0503fff9c1 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -39,6 +39,15 @@ def test_create_namespace(table_bucket_arn, database_name: str):
     namespaces = catalog.list_namespaces()
     assert (database_name,) in namespaces
 
+def test_drop_namespace(table_bucket_arn, database_name: str):
+    properties = {"warehouse": table_bucket_arn}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    catalog.create_namespace(namespace=database_name)
+    assert (database_name,) in catalog.list_namespaces()
+    catalog.drop_namespace(namespace=database_name)
+    assert (database_name,) not in catalog.list_namespaces()
+
+
 
 def test_create_table(table_bucket_arn, database_name: str, table_name:str, table_schema_nested: Schema):
     # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet

From b053fbb09b1c13d35ada0366bb68f16d0c8b7bc1 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 15 Dec 2024 12:01:36 +0100
Subject: [PATCH 05/57] test: validate how version conflict is handled with
 s3tables API

---
 tests/catalog/test_s3tables.py | 40 +++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 0503fff9c1..c7b9d13df8 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -1,9 +1,12 @@
+import uuid
+
 import boto3
-from pyiceberg.schema import Schema
 import pytest
 
 from pyiceberg.catalog.s3tables import S3TableCatalog
-from pyiceberg.exceptions import NoSuchTableError, TableBucketNotFound
+from pyiceberg.exceptions import NoSuchTableError
+from pyiceberg.exceptions import TableBucketNotFound
+from pyiceberg.schema import Schema
 
 
 @pytest.fixture
@@ -17,15 +20,41 @@ def table_name(table_name):
     # naming rules prevent "-" in table namees for s3 table buckets
     return table_name.replace("-", "_")
 
+
 @pytest.fixture
 def table_bucket_arn():
     import os
+
     # since the moto library does not support s3tables as of 2024-12-14 we have to test against a real AWS endpoint
     # in one of the supported regions.
 
     return os.environ["ARN"]
 
 
+def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn, database_name, table_name):
+    client = boto3.client("s3tables")
+    client.create_namespace(tableBucketARN=table_bucket_arn, namespace=[database_name])
+    response = client.create_table(
+        tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG"
+    )
+    version_token = response["versionToken"]
+    scrambled_version_token = version_token[::-1]
+
+    warehouse_location = client.get_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name)[
+        "warehouseLocation"
+    ]
+    metadata_location = f"{warehouse_location}/metadata/00001-{uuid.uuid4()}.metadata.json"
+
+    with pytest.raises(client.exceptions.ConflictException):
+        client.update_table_metadata_location(
+            tableBucketARN=table_bucket_arn,
+            namespace=database_name,
+            name=table_name,
+            versionToken=scrambled_version_token,
+            metadataLocation=metadata_location,
+        )
+
+
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn):
     properties = {"warehouse": f"{table_bucket_arn}-modified"}
     with pytest.raises(TableBucketNotFound):
@@ -39,6 +68,7 @@ def test_create_namespace(table_bucket_arn, database_name: str):
     namespaces = catalog.list_namespaces()
     assert (database_name,) in namespaces
 
+
 def test_drop_namespace(table_bucket_arn, database_name: str):
     properties = {"warehouse": table_bucket_arn}
     catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
@@ -48,8 +78,7 @@ def test_drop_namespace(table_bucket_arn, database_name: str):
     assert (database_name,) not in catalog.list_namespaces()
 
 
-
-def test_create_table(table_bucket_arn, database_name: str, table_name:str, table_schema_nested: Schema):
+def test_create_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
     # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
     properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
     catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
@@ -61,8 +90,7 @@ def test_create_table(table_bucket_arn, database_name: str, table_name:str, tabl
     assert table == catalog.load_table(identifier)
 
 
-
-def test_drop_table(table_bucket_arn, database_name: str, table_name:str, table_schema_nested: Schema):
+def test_drop_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
     # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
     properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
     catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)

From f8fff8ac0201b028e834efedc566770c12fa07cd Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 15 Dec 2024 13:40:23 +0100
Subject: [PATCH 06/57] feat: implement commit_table

---
 pyiceberg/catalog/s3tables.py  | 52 +++++++++++++++++++++++++++++++---
 tests/catalog/test_s3tables.py | 30 ++++++++++++++++++++
 2 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index fd7d7addc0..1fb384b21d 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -13,7 +13,9 @@
 from pyiceberg.catalog import Catalog
 from pyiceberg.catalog import MetastoreCatalog
 from pyiceberg.catalog import PropertiesUpdateSummary
-from pyiceberg.exceptions import NamespaceNotEmptyError, NoSuchTableError
+from pyiceberg.exceptions import CommitFailedException
+from pyiceberg.exceptions import NamespaceNotEmptyError
+from pyiceberg.exceptions import NoSuchTableError
 from pyiceberg.exceptions import S3TablesError
 from pyiceberg.exceptions import TableBucketNotFound
 from pyiceberg.io import AWS_ACCESS_KEY_ID
@@ -83,7 +85,47 @@ def __init__(self, name: str, **properties: str):
     def commit_table(
         self, table: Table, requirements: Tuple[TableRequirement, ...], updates: Tuple[TableUpdate, ...]
     ) -> CommitTableResponse:
-        return super().commit_table(table, requirements, updates)
+        table_identifier = table.name()
+        database_name, table_name = self.identifier_to_database_and_table(table_identifier, NoSuchTableError)
+
+        # TODO: loading table and getting versionToken should be an atomic operation, otherwise
+        # the table can change between those two API calls without noticing
+        # -> change this into an internal API that returns table information along with versionToken
+        current_table = self.load_table(identifier=table_identifier)
+        version_token = self.s3tables.get_table(
+            tableBucketARN=self.table_bucket_arn, namespace=database_name, name=table_name
+        )["versionToken"]
+
+        updated_staged_table = self._update_and_stage_table(current_table, table_identifier, requirements, updates)
+        if current_table and updated_staged_table.metadata == current_table.metadata:
+            # no changes, do nothing
+            return CommitTableResponse(
+                metadata=current_table.metadata, metadata_location=current_table.metadata_location
+            )
+
+        self._write_metadata(
+            metadata=updated_staged_table.metadata,
+            io=updated_staged_table.io,
+            metadata_path=updated_staged_table.metadata_location,
+            overwrite=True,
+        )
+
+        # try to update metadata location which will fail if the versionToken changed meanwhile
+        try:
+            self.s3tables.update_table_metadata_location(
+                tableBucketARN=self.table_bucket_arn,
+                namespace=database_name,
+                name=table_name,
+                versionToken=version_token,
+                metadataLocation=updated_staged_table.metadata_location,
+            )
+        except self.s3tables.exceptions.ConflictException as e:
+            raise CommitFailedException(
+                f"Cannot commit {database_name}.{table_name} because of a concurrent update to the table version {version_token}."
+            ) from e
+        return CommitTableResponse(
+            metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location
+        )
 
     def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = ...) -> None:
         valid_namespace: str = self._validate_namespace_identifier(namespace)
@@ -185,7 +227,6 @@ def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
         except self.s3tables.exceptions.ConflictException as e:
             raise NamespaceNotEmptyError(f"Namespace {namespace} is not empty.") from e
 
-
     def drop_table(self, identifier: Union[str, Identifier]) -> None:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
         try:
@@ -197,7 +238,10 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
 
         # TODO: handle conflicts due to versionToken mismatch that might occur
         self.s3tables.delete_table(
-            tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, versionToken=response["versionToken"]
+            tableBucketARN=self.table_bucket_arn,
+            namespace=namespace,
+            name=table_name,
+            versionToken=response["versionToken"],
         )
 
     def drop_view(self, identifier: Union[str, Identifier]) -> None:
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index c7b9d13df8..d185bd795d 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -7,6 +7,7 @@
 from pyiceberg.exceptions import NoSuchTableError
 from pyiceberg.exceptions import TableBucketNotFound
 from pyiceberg.schema import Schema
+from pyiceberg.types import IntegerType
 
 
 @pytest.fixture
@@ -103,3 +104,32 @@ def test_drop_table(table_bucket_arn, database_name: str, table_name: str, table
 
     with pytest.raises(NoSuchTableError):
         catalog.load_table(identifier=identifier)
+
+
+def test_commit_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
+    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
+    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    identifier = (database_name, table_name)
+
+    catalog.create_namespace(namespace=database_name)
+    table = catalog.create_table(identifier=identifier, schema=table_schema_nested)
+
+    last_updated_ms = table.metadata.last_updated_ms
+    original_table_metadata_location = table.metadata_location
+    original_table_last_updated_ms = table.metadata.last_updated_ms
+
+
+
+    transaction = table.transaction()
+    update = transaction.update_schema()
+    update.add_column(path="b", field_type=IntegerType())
+    update.commit()
+    transaction.commit_transaction()
+
+    updated_table_metadata = table.metadata
+    assert updated_table_metadata.current_schema_id == 1
+    assert len(updated_table_metadata.schemas) == 2
+    assert updated_table_metadata.last_updated_ms > last_updated_ms
+    assert updated_table_metadata.metadata_log[0].metadata_file == original_table_metadata_location
+    assert updated_table_metadata.metadata_log[0].timestamp_ms == original_table_last_updated_ms

From 579bce674c5c063a2149936b460f8ffbb0599f73 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Wed, 18 Dec 2024 21:37:10 +0100
Subject: [PATCH 07/57] feat: implement table_exists

---
 pyiceberg/catalog/s3tables.py  |  9 ++++++++-
 tests/catalog/test_s3tables.py | 12 ++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 1fb384b21d..947157320d 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -294,7 +294,14 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
         return super().rename_table(from_identifier, to_identifier)
 
     def table_exists(self, identifier: Union[str, Identifier]) -> bool:
-        return super().table_exists(identifier)
+        namespace, table_name = self._validate_database_and_table_identifier(identifier)
+        try:
+            self.s3tables.get_table(
+                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
+            )
+        except self.s3tables.exceptions.NotFoundException:
+            return False
+        return True
 
     def update_namespace_properties(
         self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = ...
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index d185bd795d..f33d04d953 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -91,6 +91,18 @@ def test_create_table(table_bucket_arn, database_name: str, table_name: str, tab
     assert table == catalog.load_table(identifier)
 
 
+def test_table_exists(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
+    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
+    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    identifier = (database_name, table_name)
+
+    catalog.create_namespace(namespace=database_name)
+    assert not catalog.table_exists(identifier=identifier)
+    catalog.create_table(identifier=identifier, schema=table_schema_nested)
+    assert catalog.table_exists(identifier=identifier)
+
+
 def test_drop_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
     # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
     properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}

From 5ef779018c7a434870d9d55c656af7c5d271645e Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Wed, 18 Dec 2024 21:53:07 +0100
Subject: [PATCH 08/57] feat: implement list_tables

---
 pyiceberg/catalog/s3tables.py  |  7 ++++++-
 tests/catalog/test_s3tables.py | 12 ++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 947157320d..047c689afd 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -253,7 +253,12 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ...) -> List[Ident
         return [tuple(namespace["namespace"]) for namespace in response["namespaces"]]
 
     def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
-        return super().list_tables(namespace)
+        namespace = self._validate_namespace_identifier(namespace)
+        paginator = self.s3tables.get_paginator("list_tables")
+        tables: List[str] = []
+        for page in paginator.paginate(tableBucketARN=self.table_bucket_arn, namespace=namespace):
+            tables.extend(table["name"] for table in page["tables"])
+        return tables
 
     def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
         return super().list_views(namespace)
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index f33d04d953..e74bddc973 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -103,6 +103,18 @@ def test_table_exists(table_bucket_arn, database_name: str, table_name: str, tab
     assert catalog.table_exists(identifier=identifier)
 
 
+def test_list_tables(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
+    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
+    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    identifier = (database_name, table_name)
+
+    catalog.create_namespace(namespace=database_name)
+    assert not catalog.list_tables(namespace=database_name)
+    catalog.create_table(identifier=identifier, schema=table_schema_nested)
+    assert catalog.list_tables(namespace=database_name)
+
+
 def test_drop_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
     # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
     properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}

From 6d496dfa06a5632dcb43b1d1ccc694d468c04cab Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Wed, 18 Dec 2024 21:59:14 +0100
Subject: [PATCH 09/57] refactor: improve list_namespace

---
 pyiceberg/catalog/s3tables.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 047c689afd..9e223dcc7a 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -247,10 +247,17 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
     def drop_view(self, identifier: Union[str, Identifier]) -> None:
         return super().drop_view(identifier)
 
-    def list_namespaces(self, namespace: Union[str, Identifier] = ...) -> List[Identifier]:
-        # TODO: handle pagination
-        response = self.s3tables.list_namespaces(tableBucketARN=self.table_bucket_arn)
-        return [tuple(namespace["namespace"]) for namespace in response["namespaces"]]
+    def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]:
+        # TODO: s3tables only support single level namespaces
+        if namespace:
+            namespace = self._validate_namespace_identifier(namespace)
+        paginator = self.s3tables.get_paginator("list_namespaces")
+
+        namespaces: List[Identifier] = []
+        for page in paginator.paginate(tableBucketARN=self.table_bucket_arn):
+            namespaces.extend(tuple(entry["namespace"]) for entry in page["namespaces"])
+
+        return namespaces
 
     def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
         namespace = self._validate_namespace_identifier(namespace)

From 33b08f0eeb7bcd4811db23144698576db48b9d12 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Wed, 18 Dec 2024 21:59:50 +0100
Subject: [PATCH 10/57] fix: return Identifier from list_tables

---
 pyiceberg/catalog/s3tables.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 9e223dcc7a..f974c354bd 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -262,9 +262,9 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi
     def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
         namespace = self._validate_namespace_identifier(namespace)
         paginator = self.s3tables.get_paginator("list_tables")
-        tables: List[str] = []
+        tables: List[Identifier] = []
         for page in paginator.paginate(tableBucketARN=self.table_bucket_arn, namespace=namespace):
-            tables.extend(table["name"] for table in page["tables"])
+            tables.extend((namespace, table["name"]) for table in page["tables"])
         return tables
 
     def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:

From 798e65d23e775d9a1c98d3d24c8945473b5906b1 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 14:22:25 +0100
Subject: [PATCH 11/57] feat: implement rename table

---
 pyiceberg/catalog/s3tables.py  | 22 ++++++++++++++++++----
 tests/catalog/test_s3tables.py | 19 +++++++++++++++++++
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index f974c354bd..54d57e347d 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -303,14 +303,28 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location:
         return super().register_table(identifier, metadata_location)
 
     def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
-        return super().rename_table(from_identifier, to_identifier)
+        from_namespace, from_table_name = self._validate_database_and_table_identifier(from_identifier)
+        to_namespace, to_table_name = self._validate_database_and_table_identifier(to_identifier)
+
+        version_token = self.s3tables.get_table(
+            tableBucketARN=self.table_bucket_arn, namespace=from_namespace, name=from_table_name
+        )["versionToken"]
+
+        self.s3tables.rename_table(
+            tableBucketARN=self.table_bucket_arn,
+            namespace=from_namespace,
+            name=from_table_name,
+            newNamespaceName=to_namespace,
+            newName=to_table_name,
+            versionToken=version_token
+        )
+
+        return self.load_table(to_identifier)
 
     def table_exists(self, identifier: Union[str, Identifier]) -> bool:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
         try:
-            self.s3tables.get_table(
-                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
-            )
+            self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
         except self.s3tables.exceptions.NotFoundException:
             return False
         return True
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index e74bddc973..3750b8a357 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -103,6 +103,25 @@ def test_table_exists(table_bucket_arn, database_name: str, table_name: str, tab
     assert catalog.table_exists(identifier=identifier)
 
 
+def test_rename_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
+    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
+    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    identifier = (database_name, table_name)
+
+    catalog.create_namespace(namespace=database_name)
+    catalog.create_table(identifier=identifier, schema=table_schema_nested)
+
+    to_database_name = f"{database_name}new"
+    to_table_name = f"{table_name}new"
+    to_identifier = (to_database_name, to_table_name)
+    catalog.create_namespace(namespace=to_database_name)
+    catalog.rename_table(from_identifier=identifier, to_identifier=to_identifier)
+
+    assert not catalog.table_exists(identifier=identifier)
+    assert catalog.table_exists(identifier=to_identifier)
+
+
 def test_list_tables(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
     # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
     properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}

From 5297ca72965592328cbeecca2eee78fec92c8372 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 14:32:03 +0100
Subject: [PATCH 12/57] feat: implement load_namespace_properties

---
 pyiceberg/catalog/s3tables.py  | 11 +++++++++--
 tests/catalog/test_s3tables.py |  9 +++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 54d57e347d..c77651444b 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -271,7 +271,14 @@ def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
         return super().list_views(namespace)
 
     def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
-        return super().load_namespace_properties(namespace)
+        namespace = self._validate_namespace_identifier(namespace)
+        response = self.s3tables.get_namespace(tableBucketARN=self.table_bucket_arn, namespace=namespace)
+        return {
+            "namespace": response["namespace"],
+            "createdAt": response["createdAt"],
+            "createdBy": response["createdBy"],
+            "ownerAccountId": response["ownerAccountId"],
+        }
 
     def load_table(self, identifier: Union[str, Identifier]) -> Table:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
@@ -316,7 +323,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
             name=from_table_name,
             newNamespaceName=to_namespace,
             newName=to_table_name,
-            versionToken=version_token
+            versionToken=version_token,
         )
 
         return self.load_table(to_identifier)
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 3750b8a357..5b7393b3a0 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -70,6 +70,13 @@ def test_create_namespace(table_bucket_arn, database_name: str):
     assert (database_name,) in namespaces
 
 
+def test_load_namespace_properties(table_bucket_arn, database_name: str):
+    properties = {"warehouse": table_bucket_arn}
+    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+    catalog.create_namespace(namespace=database_name)
+    assert database_name in catalog.load_namespace_properties(database_name)["namespace"]
+
+
 def test_drop_namespace(table_bucket_arn, database_name: str):
     properties = {"warehouse": table_bucket_arn}
     catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
@@ -162,8 +169,6 @@ def test_commit_table(table_bucket_arn, database_name: str, table_name: str, tab
     original_table_metadata_location = table.metadata_location
     original_table_last_updated_ms = table.metadata.last_updated_ms
 
-
-
     transaction = table.transaction()
     update = transaction.update_schema()
     update.add_column(path="b", field_type=IntegerType())

From 3817431bc03bf559f86d7300f9261d0aea2639f1 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 14:38:07 +0100
Subject: [PATCH 13/57] refactor: move some methods around

---
 pyiceberg/catalog/s3tables.py | 47 ++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index c77651444b..2370a5099c 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -10,7 +10,6 @@
 
 from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION
 from pyiceberg.catalog import WAREHOUSE_LOCATION
-from pyiceberg.catalog import Catalog
 from pyiceberg.catalog import MetastoreCatalog
 from pyiceberg.catalog import PropertiesUpdateSummary
 from pyiceberg.exceptions import CommitFailedException
@@ -157,6 +156,7 @@ def _validate_database_and_table_identifier(self, identifier: Union[str, Identif
         namespace = self._validate_namespace_identifier(namespace)
 
         if not pattern.fullmatch(table_name):
+            # TODO: raise proper errors for invalid table_name
             ...
 
         return namespace, table_name
@@ -209,16 +209,6 @@ def create_table(
 
         return self.load_table(identifier=identifier)
 
-    def create_table_transaction(
-        self,
-        identifier: Union[str, Identifier],
-        schema: Union[Schema, "pa.Schema"],
-        location: Optional[str] = None,
-        partition_spec: PartitionSpec = ...,
-        sort_order: SortOrder = ...,
-        properties: Properties = ...,
-    ) -> CreateTableTransaction:
-        return super().create_table_transaction(identifier, schema, location, partition_spec, sort_order, properties)
 
     def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
         namespace = self._validate_namespace_identifier(namespace)
@@ -244,9 +234,6 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
             versionToken=response["versionToken"],
         )
 
-    def drop_view(self, identifier: Union[str, Identifier]) -> None:
-        return super().drop_view(identifier)
-
     def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]:
         # TODO: s3tables only support single level namespaces
         if namespace:
@@ -267,9 +254,6 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
             tables.extend((namespace, table["name"]) for table in page["tables"])
         return tables
 
-    def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
-        return super().list_views(namespace)
-
     def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
         namespace = self._validate_namespace_identifier(namespace)
         response = self.s3tables.get_namespace(tableBucketARN=self.table_bucket_arn, namespace=namespace)
@@ -303,12 +287,6 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
             catalog=self,
         )
 
-    def purge_table(self, identifier: Union[str, Identifier]) -> None:
-        return super().purge_table(identifier)
-
-    def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
-        return super().register_table(identifier, metadata_location)
-
     def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
         from_namespace, from_table_name = self._validate_database_and_table_identifier(from_identifier)
         to_namespace, to_table_name = self._validate_database_and_table_identifier(to_identifier)
@@ -340,3 +318,26 @@ def update_namespace_properties(
         self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = ...
     ) -> PropertiesUpdateSummary:
         return super().update_namespace_properties(namespace, removals, updates)
+
+    def create_table_transaction(
+        self,
+        identifier: Union[str, Identifier],
+        schema: Union[Schema, "pa.Schema"],
+        location: Optional[str] = None,
+        partition_spec: PartitionSpec = ...,
+        sort_order: SortOrder = ...,
+        properties: Properties = ...,
+    ) -> CreateTableTransaction:
+        return super().create_table_transaction(identifier, schema, location, partition_spec, sort_order, properties)
+
+    def purge_table(self, identifier: Union[str, Identifier]) -> None:
+        return super().purge_table(identifier)
+
+    def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
+        return super().register_table(identifier, metadata_location)
+
+    def drop_view(self, identifier: Union[str, Identifier]) -> None:
+        return super().drop_view(identifier)
+
+    def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
+        return super().list_views(namespace)

From c4a6485a5ea80b3a7a7fdf6b07ea2b95a9fb2010 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 14:39:04 +0100
Subject: [PATCH 14/57] feat: raise NotImplementedError for views functionality

---
 pyiceberg/catalog/s3tables.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 2370a5099c..bb0007b796 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -337,7 +337,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location:
         return super().register_table(identifier, metadata_location)
 
     def drop_view(self, identifier: Union[str, Identifier]) -> None:
-        return super().drop_view(identifier)
+        raise NotImplementedError
 
     def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
-        return super().list_views(namespace)
+        raise NotImplementedError

From 7bc8c3e160adfb4f50eb26f30a096d2e281102c2 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 14:43:11 +0100
Subject: [PATCH 15/57] feat: raise NotImplementedError for purge_table

---
 pyiceberg/catalog/s3tables.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index bb0007b796..c91f76eb08 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -331,7 +331,9 @@ def create_table_transaction(
         return super().create_table_transaction(identifier, schema, location, partition_spec, sort_order, properties)
 
     def purge_table(self, identifier: Union[str, Identifier]) -> None:
-        return super().purge_table(identifier)
+        # purge is not supported as s3tables doesn't support delete operations
+        # table maintenance is automated
+        raise NotImplementedError
 
     def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
         return super().register_table(identifier, metadata_location)

From 3de30877d08ef6a2d4ac1068a2f55380e93c85c8 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 14:52:50 +0100
Subject: [PATCH 16/57] feat: raise NotImplementedError for
 update_namespace_properties

---
 pyiceberg/catalog/s3tables.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index c91f76eb08..058d2acc59 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -315,9 +315,10 @@ def table_exists(self, identifier: Union[str, Identifier]) -> bool:
         return True
 
     def update_namespace_properties(
-        self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = ...
+        self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT
     ) -> PropertiesUpdateSummary:
-        return super().update_namespace_properties(namespace, removals, updates)
+        # namespace properties are read only
+        raise NotImplementedError
 
     def create_table_transaction(
         self,

From 988485bf1b37fa51b8737fd4cd00108c8767343d Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 15:02:57 +0100
Subject: [PATCH 17/57] feat: raise NotImplementedError for register_table

---
 pyiceberg/catalog/s3tables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 058d2acc59..3c4a1efda7 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -337,7 +337,7 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None:
         raise NotImplementedError
 
     def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
-        return super().register_table(identifier, metadata_location)
+        raise NotImplementedError
 
     def drop_view(self, identifier: Union[str, Identifier]) -> None:
         raise NotImplementedError

From 6a28cb94625d061f93753cbd6fadb55e40ab3b71 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 15:10:51 +0100
Subject: [PATCH 18/57] fix: don't override create_table_transaction

---
 pyiceberg/catalog/s3tables.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 3c4a1efda7..555e9783e0 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -320,17 +320,6 @@ def update_namespace_properties(
         # namespace properties are read only
         raise NotImplementedError
 
-    def create_table_transaction(
-        self,
-        identifier: Union[str, Identifier],
-        schema: Union[Schema, "pa.Schema"],
-        location: Optional[str] = None,
-        partition_spec: PartitionSpec = ...,
-        sort_order: SortOrder = ...,
-        properties: Properties = ...,
-    ) -> CreateTableTransaction:
-        return super().create_table_transaction(identifier, schema, location, partition_spec, sort_order, properties)
-
     def purge_table(self, identifier: Union[str, Identifier]) -> None:
         # purge is not supported as s3tables doesn't support delete operations
         # table maintenance is automated

From a545c8247af8221f8f93df600466dc5191d6ab4e Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 21 Dec 2024 15:15:57 +0100
Subject: [PATCH 19/57] chore: run formatter

---
 pyiceberg/catalog/s3tables.py  | 68 ++++++++++------------------------
 pyiceberg/exceptions.py        |  3 ++
 tests/catalog/test_s3tables.py |  7 +---
 3 files changed, 25 insertions(+), 53 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 555e9783e0..b4aa7283b5 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -1,43 +1,24 @@
 import re
-from typing import TYPE_CHECKING
-from typing import List
-from typing import Optional
-from typing import Set
-from typing import Tuple
-from typing import Union
+from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union
 
 import boto3
 
-from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION
-from pyiceberg.catalog import WAREHOUSE_LOCATION
-from pyiceberg.catalog import MetastoreCatalog
-from pyiceberg.catalog import PropertiesUpdateSummary
-from pyiceberg.exceptions import CommitFailedException
-from pyiceberg.exceptions import NamespaceNotEmptyError
-from pyiceberg.exceptions import NoSuchTableError
-from pyiceberg.exceptions import S3TablesError
-from pyiceberg.exceptions import TableBucketNotFound
-from pyiceberg.io import AWS_ACCESS_KEY_ID
-from pyiceberg.io import AWS_REGION
-from pyiceberg.io import AWS_SECRET_ACCESS_KEY
-from pyiceberg.io import AWS_SESSION_TOKEN
-from pyiceberg.io import PY_IO_IMPL
-from pyiceberg.io import load_file_io
-from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC
-from pyiceberg.partitioning import PartitionSpec
+from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, WAREHOUSE_LOCATION, MetastoreCatalog, PropertiesUpdateSummary
+from pyiceberg.exceptions import (
+    CommitFailedException,
+    NamespaceNotEmptyError,
+    NoSuchTableError,
+    TableBucketNotFound,
+)
+from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, PY_IO_IMPL, load_file_io
+from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.serializers import FromInputFile
-from pyiceberg.table import CommitTableResponse
-from pyiceberg.table import CreateTableTransaction
-from pyiceberg.table import Table
-from pyiceberg.table import TableRequirement
+from pyiceberg.table import CommitTableResponse, Table, TableRequirement
 from pyiceberg.table.metadata import new_table_metadata
-from pyiceberg.table.sorting import UNSORTED_SORT_ORDER
-from pyiceberg.table.sorting import SortOrder
+from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
 from pyiceberg.table.update import TableUpdate
-from pyiceberg.typedef import EMPTY_DICT
-from pyiceberg.typedef import Identifier
-from pyiceberg.typedef import Properties
+from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties
 from pyiceberg.utils.properties import get_first_property_value
 
 if TYPE_CHECKING:
@@ -66,9 +47,7 @@ def __init__(self, name: str, **properties: str):
             region_name=get_first_property_value(properties, S3TABLES_REGION, AWS_REGION),
             botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
             aws_access_key_id=get_first_property_value(properties, S3TABLES_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
-            aws_secret_access_key=get_first_property_value(
-                properties, S3TABLES_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY
-            ),
+            aws_secret_access_key=get_first_property_value(properties, S3TABLES_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
             aws_session_token=get_first_property_value(properties, S3TABLES_SESSION_TOKEN, AWS_SESSION_TOKEN),
         )
         # TODO: s3tables client only supported from boto3>=1.35.74 so this can crash
@@ -91,16 +70,14 @@ def commit_table(
         # the table can change between those two API calls without noticing
         # -> change this into an internal API that returns table information along with versionToken
         current_table = self.load_table(identifier=table_identifier)
-        version_token = self.s3tables.get_table(
-            tableBucketARN=self.table_bucket_arn, namespace=database_name, name=table_name
-        )["versionToken"]
+        version_token = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=database_name, name=table_name)[
+            "versionToken"
+        ]
 
         updated_staged_table = self._update_and_stage_table(current_table, table_identifier, requirements, updates)
         if current_table and updated_staged_table.metadata == current_table.metadata:
             # no changes, do nothing
-            return CommitTableResponse(
-                metadata=current_table.metadata, metadata_location=current_table.metadata_location
-            )
+            return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location)
 
         self._write_metadata(
             metadata=updated_staged_table.metadata,
@@ -175,9 +152,7 @@ def create_table(
         schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore
 
         # TODO: check whether namespace exists and if it does, whether table_name already exists
-        self.s3tables.create_table(
-            tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG"
-        )
+        self.s3tables.create_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG")
 
         # location is given by s3 table bucket
         response = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
@@ -209,7 +184,6 @@ def create_table(
 
         return self.load_table(identifier=identifier)
 
-
     def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
         namespace = self._validate_namespace_identifier(namespace)
         try:
@@ -220,9 +194,7 @@ def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
     def drop_table(self, identifier: Union[str, Identifier]) -> None:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
         try:
-            response = self.s3tables.get_table(
-                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
-            )
+            response = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
         except self.s3tables.exceptions.NotFoundException as e:
             raise NoSuchTableError(f"No table with identifier {identifier} exists.") from e
 
diff --git a/pyiceberg/exceptions.py b/pyiceberg/exceptions.py
index 6096096f45..f633e4c143 100644
--- a/pyiceberg/exceptions.py
+++ b/pyiceberg/exceptions.py
@@ -111,12 +111,15 @@ class ConditionalCheckFailedException(DynamoDbError):
 class GenericDynamoDbError(DynamoDbError):
     pass
 
+
 class S3TablesError(Exception):
     pass
 
+
 class TableBucketNotFound(S3TablesError):
     pass
 
+
 class CommitFailedException(Exception):
     """Commit failed, refresh and try again."""
 
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 5b7393b3a0..2cbdca15c3 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -4,8 +4,7 @@
 import pytest
 
 from pyiceberg.catalog.s3tables import S3TableCatalog
-from pyiceberg.exceptions import NoSuchTableError
-from pyiceberg.exceptions import TableBucketNotFound
+from pyiceberg.exceptions import NoSuchTableError, TableBucketNotFound
 from pyiceberg.schema import Schema
 from pyiceberg.types import IntegerType
 
@@ -35,9 +34,7 @@ def table_bucket_arn():
 def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn, database_name, table_name):
     client = boto3.client("s3tables")
     client.create_namespace(tableBucketARN=table_bucket_arn, namespace=[database_name])
-    response = client.create_table(
-        tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG"
-    )
+    response = client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
     version_token = response["versionToken"]
     scrambled_version_token = version_token[::-1]
 

From 7164e096bd1ed2e8a519cafb43fcd129bfca9a35 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 23 Dec 2024 14:19:27 +0100
Subject: [PATCH 20/57] feat: raise exceptions if boto3 doesn't support
 s3tables

---
 pyiceberg/catalog/s3tables.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index b4aa7283b5..b8009191b2 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -2,6 +2,7 @@
 from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union
 
 import boto3
+from boto3.session import UnknownServiceError
 
 from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, WAREHOUSE_LOCATION, MetastoreCatalog, PropertiesUpdateSummary
 from pyiceberg.exceptions import (
@@ -9,6 +10,7 @@
     NamespaceNotEmptyError,
     NoSuchTableError,
     TableBucketNotFound,
+    S3TablesError
 )
 from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, PY_IO_IMPL, load_file_io
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
@@ -50,9 +52,11 @@ def __init__(self, name: str, **properties: str):
             aws_secret_access_key=get_first_property_value(properties, S3TABLES_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
             aws_session_token=get_first_property_value(properties, S3TABLES_SESSION_TOKEN, AWS_SESSION_TOKEN),
         )
-        # TODO: s3tables client only supported from boto3>=1.35.74 so this can crash
-        # TODO: set a custom user-agent for api calls like the Java implementation
-        self.s3tables = session.client("s3tables")
+        try:
+            self.s3tables = session.client("s3tables")
+        except UnknownServiceError as e:
+            raise S3TablesError("'s3tables' requires boto3>=1.35.74. Current version: {boto3.__version__}.") from e
+
         # TODO: handle malformed properties instead of just raising a key error here
         self.table_bucket_arn = self.properties[WAREHOUSE_LOCATION]
         try:

From efa04e3d2e4aec65b17fe94a0d3474ed91630261 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 23 Dec 2024 14:24:55 +0100
Subject: [PATCH 21/57] feat: make endpoint configurable

---
 pyiceberg/catalog/s3tables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index b8009191b2..ab3e77a016 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -53,7 +53,7 @@ def __init__(self, name: str, **properties: str):
             aws_session_token=get_first_property_value(properties, S3TABLES_SESSION_TOKEN, AWS_SESSION_TOKEN),
         )
         try:
-            self.s3tables = session.client("s3tables")
+            self.s3tables = session.client("s3tables", endpoint_url=properties.get(S3TABLES_ENDPOINT))
         except UnknownServiceError as e:
             raise S3TablesError("'s3tables' requires boto3>=1.35.74. Current version: {boto3.__version__}.") from e
 

From f1454937d7d1220e021e250e456bbc725a54919c Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 23 Dec 2024 14:34:53 +0100
Subject: [PATCH 22/57] feat: explicitly configure tableBucketARN

---
 pyiceberg/catalog/s3tables.py  |  6 ++--
 tests/catalog/test_s3tables.py | 50 +++++++++++-----------------------
 2 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index ab3e77a016..4675a9cf76 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -32,6 +32,8 @@
 S3TABLES_SECRET_ACCESS_KEY = "s3tables.secret-access-key"
 S3TABLES_SESSION_TOKEN = "s3tables.session-token"
 
+S3TABLES_TABLE_BUCKET_ARN = "s3tables.table-bucket-arn"
+
 S3TABLES_ENDPOINT = "s3tables.endpoint"
 
 # pyarrow does not support writing to S3 Table buckets as of 2024-12-14 https://github.com/apache/iceberg-python/issues/1404#issuecomment-2543174146
@@ -44,6 +46,8 @@ def __init__(self, name: str, **properties: str):
         # TODO: implement a proper check for FileIO
         self.properties[PY_IO_IMPL] = S3TABLES_FILE_IO_DEFAULT
 
+        self.table_bucket_arn = self.properties[S3TABLES_TABLE_BUCKET_ARN]
+
         session = boto3.Session(
             profile_name=properties.get(S3TABLES_PROFILE_NAME),
             region_name=get_first_property_value(properties, S3TABLES_REGION, AWS_REGION),
@@ -57,8 +61,6 @@ def __init__(self, name: str, **properties: str):
         except UnknownServiceError as e:
             raise S3TablesError("'s3tables' requires boto3>=1.35.74. Current version: {boto3.__version__}.") from e
 
-        # TODO: handle malformed properties instead of just raising a key error here
-        self.table_bucket_arn = self.properties[WAREHOUSE_LOCATION]
         try:
             self.s3tables.get_table_bucket(tableBucketARN=self.table_bucket_arn)
         except self.s3tables.exceptions.NotFoundException as e:
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 2cbdca15c3..67cdeb21c8 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -30,6 +30,12 @@ def table_bucket_arn():
 
     return os.environ["ARN"]
 
+@pytest.fixture
+def catalog(table_bucket_arn):
+    # setting FileIO to FsspecFileIO explicitly is required as pyarrow does not work with S3 Table Buckets yet
+    properties = {"s3tables.table-bucket-arn": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    return S3TableCatalog(name="test_s3tables_catalog", **properties)
+
 
 def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn, database_name, table_name):
     client = boto3.client("s3tables")
@@ -54,39 +60,30 @@ def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn, dat
 
 
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn):
-    properties = {"warehouse": f"{table_bucket_arn}-modified"}
+    properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified"}
     with pytest.raises(TableBucketNotFound):
         S3TableCatalog(name="test_s3tables_catalog", **properties)
 
 
-def test_create_namespace(table_bucket_arn, database_name: str):
-    properties = {"warehouse": table_bucket_arn}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_create_namespace(catalog, database_name: str):
     catalog.create_namespace(namespace=database_name)
     namespaces = catalog.list_namespaces()
     assert (database_name,) in namespaces
 
 
-def test_load_namespace_properties(table_bucket_arn, database_name: str):
-    properties = {"warehouse": table_bucket_arn}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_load_namespace_properties(catalog, database_name: str):
     catalog.create_namespace(namespace=database_name)
     assert database_name in catalog.load_namespace_properties(database_name)["namespace"]
 
 
-def test_drop_namespace(table_bucket_arn, database_name: str):
-    properties = {"warehouse": table_bucket_arn}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_drop_namespace(catalog, database_name: str):
     catalog.create_namespace(namespace=database_name)
     assert (database_name,) in catalog.list_namespaces()
     catalog.drop_namespace(namespace=database_name)
     assert (database_name,) not in catalog.list_namespaces()
 
 
-def test_create_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
-    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
-    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_create_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -95,10 +92,7 @@ def test_create_table(table_bucket_arn, database_name: str, table_name: str, tab
     assert table == catalog.load_table(identifier)
 
 
-def test_table_exists(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
-    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
-    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_table_exists(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -107,10 +101,7 @@ def test_table_exists(table_bucket_arn, database_name: str, table_name: str, tab
     assert catalog.table_exists(identifier=identifier)
 
 
-def test_rename_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
-    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
-    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_rename_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -126,10 +117,7 @@ def test_rename_table(table_bucket_arn, database_name: str, table_name: str, tab
     assert catalog.table_exists(identifier=to_identifier)
 
 
-def test_list_tables(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
-    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
-    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_list_tables(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -138,10 +126,7 @@ def test_list_tables(table_bucket_arn, database_name: str, table_name: str, tabl
     assert catalog.list_tables(namespace=database_name)
 
 
-def test_drop_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
-    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
-    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_drop_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -153,10 +138,7 @@ def test_drop_table(table_bucket_arn, database_name: str, table_name: str, table
         catalog.load_table(identifier=identifier)
 
 
-def test_commit_table(table_bucket_arn, database_name: str, table_name: str, table_schema_nested: Schema):
-    # setting FileIO to FsspecFileIO explicitly is required as pyarrwo does not work with S3 Table Buckets yet
-    properties = {"warehouse": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
-    catalog = S3TableCatalog(name="test_s3tables_catalog", **properties)
+def test_commit_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)

From 2592610fa7eb79ae7fba14a19f8d337004c32079 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 23 Dec 2024 14:37:04 +0100
Subject: [PATCH 23/57] fix: remove defaulting to FsspecIO

---
 pyiceberg/catalog/s3tables.py  | 5 -----
 tests/catalog/test_s3tables.py | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 4675a9cf76..b8d4ebf672 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -36,15 +36,10 @@
 
 S3TABLES_ENDPOINT = "s3tables.endpoint"
 
-# pyarrow does not support writing to S3 Table buckets as of 2024-12-14 https://github.com/apache/iceberg-python/issues/1404#issuecomment-2543174146
-S3TABLES_FILE_IO_DEFAULT = "pyiceberg.io.fsspec.FsspecFileIO"
-
 
 class S3TableCatalog(MetastoreCatalog):
     def __init__(self, name: str, **properties: str):
         super().__init__(name, **properties)
-        # TODO: implement a proper check for FileIO
-        self.properties[PY_IO_IMPL] = S3TABLES_FILE_IO_DEFAULT
 
         self.table_bucket_arn = self.properties[S3TABLES_TABLE_BUCKET_ARN]
 
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 67cdeb21c8..c761ab79ad 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -32,7 +32,7 @@ def table_bucket_arn():
 
 @pytest.fixture
 def catalog(table_bucket_arn):
-    # setting FileIO to FsspecFileIO explicitly is required as pyarrow does not work with S3 Table Buckets yet
+    # pyarrow does not support writing to S3 Table buckets as of 2024-12-14 https://github.com/apache/iceberg-python/issues/1404#issuecomment-2543174146
     properties = {"s3tables.table-bucket-arn": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
     return S3TableCatalog(name="test_s3tables_catalog", **properties)
 

From 798cfb3837645de134c30d27111cd51f1afa6778 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 23 Dec 2024 14:55:12 +0100
Subject: [PATCH 24/57] feat: raise exceptions for invalid namespace/table name

---
 pyiceberg/catalog/s3tables.py | 31 ++++++++++++-------------------
 pyiceberg/exceptions.py       |  7 +++++++
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index b8d4ebf672..7a3c56ae01 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -4,13 +4,15 @@
 import boto3
 from boto3.session import UnknownServiceError
 
-from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, WAREHOUSE_LOCATION, MetastoreCatalog, PropertiesUpdateSummary
+from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, MetastoreCatalog, PropertiesUpdateSummary
 from pyiceberg.exceptions import (
     CommitFailedException,
     NamespaceNotEmptyError,
     NoSuchTableError,
     TableBucketNotFound,
-    S3TablesError
+    S3TablesError,
+    InvalidNamespaceName,
+    InvalidTableName,
 )
 from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, PY_IO_IMPL, load_file_io
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
@@ -36,6 +38,10 @@
 
 S3TABLES_ENDPOINT = "s3tables.endpoint"
 
+# for naming rules see: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html
+S3TABLES_VALID_NAME_REGEX = pattern = re.compile("[a-z0-9][a-z0-9_]{2,62}")
+S3TABLES_RESERVED_NAMESPACE = "aws_s3_metadata"
+
 
 class S3TableCatalog(MetastoreCatalog):
     def __init__(self, name: str, **properties: str):
@@ -109,33 +115,20 @@ def create_namespace(self, namespace: Union[str, Identifier], properties: Proper
         self.s3tables.create_namespace(tableBucketARN=self.table_bucket_arn, namespace=[valid_namespace])
 
     def _validate_namespace_identifier(self, namespace: Union[str, Identifier]) -> str:
-        # for naming rules see: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html
-        # TODO: extract into constant variables
-        pattern = re.compile("[a-z0-9][a-z0-9_]{2,62}")
-        reserved = "aws_s3_metadata"
-
         namespace = self.identifier_to_database(namespace)
 
-        if not pattern.fullmatch(namespace):
-            ...
-
-        if namespace == reserved:
-            ...
+        if not S3TABLES_VALID_NAME_REGEX.fullmatch(namespace) or namespace == S3TABLES_RESERVED_NAMESPACE:
+            raise InvalidNamespaceName("The specified namespace name is not valid.")
 
         return namespace
 
     def _validate_database_and_table_identifier(self, identifier: Union[str, Identifier]) -> Tuple[str, str]:
-        # for naming rules see: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html
-        # TODO: extract into constant variables
-        pattern = re.compile("[a-z0-9][a-z0-9_]{2,62}")
-
         namespace, table_name = self.identifier_to_database_and_table(identifier)
 
         namespace = self._validate_namespace_identifier(namespace)
 
-        if not pattern.fullmatch(table_name):
-            # TODO: raise proper errors for invalid table_name
-            ...
+        if not S3TABLES_VALID_NAME_REGEX.fullmatch(table_name):
+            raise InvalidTableName("The specified table name is not valid.")
 
         return namespace, table_name
 
diff --git a/pyiceberg/exceptions.py b/pyiceberg/exceptions.py
index f633e4c143..245fffd3c1 100644
--- a/pyiceberg/exceptions.py
+++ b/pyiceberg/exceptions.py
@@ -115,6 +115,13 @@ class GenericDynamoDbError(DynamoDbError):
 class S3TablesError(Exception):
     pass
 
+class InvalidNamespaceName(S3TablesError):
+    pass
+
+
+class InvalidTableName(S3TablesError):
+    pass
+
 
 class TableBucketNotFound(S3TablesError):
     pass

From 3b402e4c17a5231826833189bef13305c42d750b Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 14:48:10 +0100
Subject: [PATCH 25/57] feat: improve error handling for create_table

---
 pyiceberg/catalog/s3tables.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 7a3c56ae01..d9ddeff5dc 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -164,17 +164,20 @@ def create_table(
 
         io = load_file_io(properties=self.properties, location=metadata_location)
         # TODO: this triggers unsupported list operation error, setting overwrite=True is a workaround for now
-        # TODO: we can perform this check manually maybe?
         self._write_metadata(metadata, io, metadata_location, overwrite=True)
-        # TODO: after writing need to update table metadata location
-        # can this operation fail if the version token does not match?
-        self.s3tables.update_table_metadata_location(
-            tableBucketARN=self.table_bucket_arn,
-            namespace=namespace,
-            name=table_name,
-            versionToken=version_token,
-            metadataLocation=metadata_location,
-        )
+
+        try:
+            self.s3tables.update_table_metadata_location(
+                tableBucketARN=self.table_bucket_arn,
+                namespace=namespace,
+                name=table_name,
+                versionToken=version_token,
+                metadataLocation=metadata_location,
+            )
+        except self.s3tables.exceptions.ConflictException as e:
+            raise CommitFailedException(
+                f"Cannot create {namespace}.{table_name} because of a concurrent update to the table version {version_token}."
+            ) from e
 
         return self.load_table(identifier=identifier)
 

From 138f62e5818dfe780adf74389fde046f5cadf8fe Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 14:49:24 +0100
Subject: [PATCH 26/57] feat: improve error handling for delete_table

---
 pyiceberg/catalog/s3tables.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index d9ddeff5dc..92c8f78077 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -195,13 +195,18 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
         except self.s3tables.exceptions.NotFoundException as e:
             raise NoSuchTableError(f"No table with identifier {identifier} exists.") from e
 
-        # TODO: handle conflicts due to versionToken mismatch that might occur
-        self.s3tables.delete_table(
-            tableBucketARN=self.table_bucket_arn,
-            namespace=namespace,
-            name=table_name,
-            versionToken=response["versionToken"],
-        )
+        version_token = response["versionToken"]
+        try:
+            self.s3tables.delete_table(
+                tableBucketARN=self.table_bucket_arn,
+                namespace=namespace,
+                name=table_name,
+                versionToken=version_token,
+            )
+        except self.s3tables.exceptions.ConflictException as e:
+            raise CommitFailedException(
+                f"Cannot delete {namespace}.{table_name} because of a concurrent update to the table version {version_token}."
+            ) from e
 
     def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]:
         # TODO: s3tables only support single level namespaces

From 8d64396684abf81040f9ac9331559b58ff473c9b Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 14:50:49 +0100
Subject: [PATCH 27/57] chore: cleanup comments

---
 pyiceberg/catalog/s3tables.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 92c8f78077..b95f2ba8d2 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -209,7 +209,6 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
             ) from e
 
     def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]:
-        # TODO: s3tables only support single level namespaces
         if namespace:
             namespace = self._validate_namespace_identifier(namespace)
         paginator = self.s3tables.get_paginator("list_namespaces")
@@ -240,7 +239,7 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper
 
     def load_table(self, identifier: Union[str, Identifier]) -> Table:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
-        # TODO: raise a NoSuchTableError if it does not exist
+
         try:
             response = self.s3tables.get_table_metadata_location(
                 tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name

From 642ccd83c6ce841500eb6e70b340b2b70d5ce4e4 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 14:55:16 +0100
Subject: [PATCH 28/57] feat: catch missing metadata for load_table

---
 pyiceberg/catalog/s3tables.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index b95f2ba8d2..077d93d7d0 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -246,8 +246,10 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
             )
         except self.s3tables.exceptions.NotFoundException as e:
             raise NoSuchTableError(f"No table with identifier {identifier} exists.") from e
-        # TODO: we might need to catch if table is not initialized i.e. does not have metadata setup yet
-        metadata_location = response["metadataLocation"]
+
+        metadata_location = response.get("metadataLocation")
+        if not metadata_location:
+            raise S3TablesError("No table metadata found.")
 
         io = load_file_io(properties=self.properties, location=metadata_location)
         file = io.new_input(metadata_location)

From 3c17fc0448c1e461592ac5a056c6dad562c9c4e3 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 16:02:17 +0100
Subject: [PATCH 29/57] feat: handle missing namespace and preexisting table

---
 pyiceberg/catalog/s3tables.py  | 15 ++++++++++++---
 tests/catalog/test_s3tables.py | 19 ++++++++++++++++++-
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 077d93d7d0..6ae2db3f32 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -13,6 +13,8 @@
     S3TablesError,
     InvalidNamespaceName,
     InvalidTableName,
+    TableAlreadyExistsError,
+    NoSuchNamespaceError
 )
 from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, PY_IO_IMPL, load_file_io
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
@@ -145,10 +147,17 @@ def create_table(
 
         schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore
 
-        # TODO: check whether namespace exists and if it does, whether table_name already exists
-        self.s3tables.create_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG")
+        try:
+            self.s3tables.create_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG")
+        except self.s3tables.exceptions.NotFoundException as e:
+            raise NoSuchNamespaceError(
+                f"Cannot create {namespace}.{table_name} because no such namespace exists."
+            ) from e
+        except self.s3tables.exceptions.ConflictException as e:
+            raise TableAlreadyExistsError(
+                f"Cannot create {namespace}.{table_name} because a table of the same name already exists in the namespace."
+            ) from e
 
-        # location is given by s3 table bucket
         response = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
         version_token = response["versionToken"]
 
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index c761ab79ad..cd764737f2 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -4,7 +4,7 @@
 import pytest
 
 from pyiceberg.catalog.s3tables import S3TableCatalog
-from pyiceberg.exceptions import NoSuchTableError, TableBucketNotFound
+from pyiceberg.exceptions import NoSuchTableError, TableBucketNotFound, NoSuchNamespaceError
 from pyiceberg.schema import Schema
 from pyiceberg.types import IntegerType
 
@@ -59,6 +59,14 @@ def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn, dat
         )
 
 
+def test_s3tables_api_raises_on_preexisting_table(table_bucket_arn, database_name, table_name):
+    client = boto3.client("s3tables")
+    client.create_namespace(tableBucketARN=table_bucket_arn, namespace=[database_name])
+    client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
+    with pytest.raises(client.exceptions.ConflictException):
+        client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
+
+
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn):
     properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified"}
     with pytest.raises(TableBucketNotFound):
@@ -92,6 +100,15 @@ def test_create_table(catalog, database_name: str, table_name: str, table_schema
     assert table == catalog.load_table(identifier)
 
 
+def test_create_table_in_invalid_namespace_raises_exception(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+    identifier = (database_name, table_name)
+
+    with pytest.raises(NoSuchNamespaceError):
+        catalog.create_table(identifier=identifier, schema=table_schema_nested)
+
+
+
+
 def test_table_exists(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 

From ec647fb068dac8d0977bad06e61ccb715bd7dc49 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 16:12:21 +0100
Subject: [PATCH 30/57] feat: handle versionToken and table in an atomic
 operation

---
 pyiceberg/catalog/s3tables.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 6ae2db3f32..e8bbebe960 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -75,13 +75,7 @@ def commit_table(
         table_identifier = table.name()
         database_name, table_name = self.identifier_to_database_and_table(table_identifier, NoSuchTableError)
 
-        # TODO: loading table and getting versionToken should be an atomic operation, otherwise
-        # the table can change between those two API calls without noticing
-        # -> change this into an internal API that returns table information along with versionToken
-        current_table = self.load_table(identifier=table_identifier)
-        version_token = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=database_name, name=table_name)[
-            "versionToken"
-        ]
+        current_table, version_token = self._load_table_and_version(identifier=table_identifier)
 
         updated_staged_table = self._update_and_stage_table(current_table, table_identifier, requirements, updates)
         if current_table and updated_staged_table.metadata == current_table.metadata:
@@ -172,7 +166,7 @@ def create_table(
         )
 
         io = load_file_io(properties=self.properties, location=metadata_location)
-        # TODO: this triggers unsupported list operation error, setting overwrite=True is a workaround for now
+        # this triggers unsupported list operation error, setting overwrite=True is a workaround for now
         self._write_metadata(metadata, io, metadata_location, overwrite=True)
 
         try:
@@ -247,6 +241,10 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper
         }
 
     def load_table(self, identifier: Union[str, Identifier]) -> Table:
+        table, _ = self._load_table_and_version(identifier)
+        return table
+
+    def _load_table_and_version(self, identifier: Union[str, Identifier]) -> Tuple[Table, str]:
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
 
         try:
@@ -260,6 +258,8 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
         if not metadata_location:
             raise S3TablesError("No table metadata found.")
 
+        version_token = response["versionToken"]
+
         io = load_file_io(properties=self.properties, location=metadata_location)
         file = io.new_input(metadata_location)
         metadata = FromInputFile.table_metadata(file)
@@ -269,7 +269,7 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
             metadata_location=metadata_location,
             io=self._load_file_io(metadata.properties, metadata_location),
             catalog=self,
-        )
+        ), version_token
 
     def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
         from_namespace, from_table_name = self._validate_database_and_table_identifier(from_identifier)

From 9396e7373a1a866fbc2bd811e12befa20f0705bc Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 16:13:00 +0100
Subject: [PATCH 31/57] chore: run formatter

---
 pyiceberg/catalog/s3tables.py  | 18 +++++++++---------
 pyiceberg/exceptions.py        |  1 +
 tests/catalog/test_s3tables.py |  9 +++++----
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index e8bbebe960..85971d9c4b 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -7,16 +7,16 @@
 from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, MetastoreCatalog, PropertiesUpdateSummary
 from pyiceberg.exceptions import (
     CommitFailedException,
+    InvalidNamespaceName,
+    InvalidTableName,
     NamespaceNotEmptyError,
+    NoSuchNamespaceError,
     NoSuchTableError,
-    TableBucketNotFound,
     S3TablesError,
-    InvalidNamespaceName,
-    InvalidTableName,
     TableAlreadyExistsError,
-    NoSuchNamespaceError
+    TableBucketNotFound,
 )
-from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, PY_IO_IMPL, load_file_io
+from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, load_file_io
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.serializers import FromInputFile
@@ -142,11 +142,11 @@ def create_table(
         schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore
 
         try:
-            self.s3tables.create_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG")
+            self.s3tables.create_table(
+                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG"
+            )
         except self.s3tables.exceptions.NotFoundException as e:
-            raise NoSuchNamespaceError(
-                f"Cannot create {namespace}.{table_name} because no such namespace exists."
-            ) from e
+            raise NoSuchNamespaceError(f"Cannot create {namespace}.{table_name} because no such namespace exists.") from e
         except self.s3tables.exceptions.ConflictException as e:
             raise TableAlreadyExistsError(
                 f"Cannot create {namespace}.{table_name} because a table of the same name already exists in the namespace."
diff --git a/pyiceberg/exceptions.py b/pyiceberg/exceptions.py
index 245fffd3c1..8367d91b79 100644
--- a/pyiceberg/exceptions.py
+++ b/pyiceberg/exceptions.py
@@ -115,6 +115,7 @@ class GenericDynamoDbError(DynamoDbError):
 class S3TablesError(Exception):
     pass
 
+
 class InvalidNamespaceName(S3TablesError):
     pass
 
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index cd764737f2..d379e73b82 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -4,7 +4,7 @@
 import pytest
 
 from pyiceberg.catalog.s3tables import S3TableCatalog
-from pyiceberg.exceptions import NoSuchTableError, TableBucketNotFound, NoSuchNamespaceError
+from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchTableError, TableBucketNotFound
 from pyiceberg.schema import Schema
 from pyiceberg.types import IntegerType
 
@@ -30,6 +30,7 @@ def table_bucket_arn():
 
     return os.environ["ARN"]
 
+
 @pytest.fixture
 def catalog(table_bucket_arn):
     # pyarrow does not support writing to S3 Table buckets as of 2024-12-14 https://github.com/apache/iceberg-python/issues/1404#issuecomment-2543174146
@@ -100,15 +101,15 @@ def test_create_table(catalog, database_name: str, table_name: str, table_schema
     assert table == catalog.load_table(identifier)
 
 
-def test_create_table_in_invalid_namespace_raises_exception(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+def test_create_table_in_invalid_namespace_raises_exception(
+    catalog, database_name: str, table_name: str, table_schema_nested: Schema
+):
     identifier = (database_name, table_name)
 
     with pytest.raises(NoSuchNamespaceError):
         catalog.create_table(identifier=identifier, schema=table_schema_nested)
 
 
-
-
 def test_table_exists(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
     identifier = (database_name, table_name)
 

From 11f6f66c1f7fa528451b374d06e20323e4e779e5 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 29 Dec 2024 16:17:09 +0100
Subject: [PATCH 32/57] chore: add type hints for tests

---
 pyiceberg/catalog/s3tables.py  | 15 +++++++-------
 tests/catalog/test_s3tables.py | 36 +++++++++++++++++-----------------
 2 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 85971d9c4b..fae82b0ee8 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -2,7 +2,6 @@
 from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union
 
 import boto3
-from boto3.session import UnknownServiceError
 
 from pyiceberg.catalog import DEPRECATED_BOTOCORE_SESSION, MetastoreCatalog, PropertiesUpdateSummary
 from pyiceberg.exceptions import (
@@ -20,10 +19,10 @@
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.serializers import FromInputFile
-from pyiceberg.table import CommitTableResponse, Table, TableRequirement
+from pyiceberg.table import CommitTableResponse, Table
 from pyiceberg.table.metadata import new_table_metadata
 from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
-from pyiceberg.table.update import TableUpdate
+from pyiceberg.table.update import TableRequirement, TableUpdate
 from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties
 from pyiceberg.utils.properties import get_first_property_value
 
@@ -61,7 +60,7 @@ def __init__(self, name: str, **properties: str):
         )
         try:
             self.s3tables = session.client("s3tables", endpoint_url=properties.get(S3TABLES_ENDPOINT))
-        except UnknownServiceError as e:
+        except boto3.session.UnknownServiceError as e:
             raise S3TablesError("'s3tables' requires boto3>=1.35.74. Current version: {boto3.__version__}.") from e
 
         try:
@@ -106,7 +105,7 @@ def commit_table(
             metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location
         )
 
-    def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = ...) -> None:
+    def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
         valid_namespace: str = self._validate_namespace_identifier(namespace)
         self.s3tables.create_namespace(tableBucketARN=self.table_bucket_arn, namespace=[valid_namespace])
 
@@ -155,10 +154,10 @@ def create_table(
         response = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
         version_token = response["versionToken"]
 
-        location = response["warehouseLocation"]
-        metadata_location = self._get_metadata_location(location=location)
+        warehouse_location = response["warehouseLocation"]
+        metadata_location = self._get_metadata_location(location=warehouse_location)
         metadata = new_table_metadata(
-            location=location,
+            location=warehouse_location,
             schema=schema,
             partition_spec=partition_spec,
             sort_order=sort_order,
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index d379e73b82..101eae485e 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -10,19 +10,19 @@
 
 
 @pytest.fixture
-def database_name(database_name):
+def database_name(database_name: str) -> str:
     # naming rules prevent "-" in namespaces for s3 table buckets
     return database_name.replace("-", "_")
 
 
 @pytest.fixture
-def table_name(table_name):
+def table_name(table_name: str) -> str:
     # naming rules prevent "-" in table namees for s3 table buckets
     return table_name.replace("-", "_")
 
 
 @pytest.fixture
-def table_bucket_arn():
+def table_bucket_arn() -> str:
     import os
 
     # since the moto library does not support s3tables as of 2024-12-14 we have to test against a real AWS endpoint
@@ -32,13 +32,13 @@ def table_bucket_arn():
 
 
 @pytest.fixture
-def catalog(table_bucket_arn):
+def catalog(table_bucket_arn: str) -> S3TableCatalog:
     # pyarrow does not support writing to S3 Table buckets as of 2024-12-14 https://github.com/apache/iceberg-python/issues/1404#issuecomment-2543174146
     properties = {"s3tables.table-bucket-arn": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
     return S3TableCatalog(name="test_s3tables_catalog", **properties)
 
 
-def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn, database_name, table_name):
+def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn: str, database_name: str, table_name: str) -> None:
     client = boto3.client("s3tables")
     client.create_namespace(tableBucketARN=table_bucket_arn, namespace=[database_name])
     response = client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
@@ -60,7 +60,7 @@ def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn, dat
         )
 
 
-def test_s3tables_api_raises_on_preexisting_table(table_bucket_arn, database_name, table_name):
+def test_s3tables_api_raises_on_preexisting_table(table_bucket_arn: str, database_name: str, table_name: str) -> None:
     client = boto3.client("s3tables")
     client.create_namespace(tableBucketARN=table_bucket_arn, namespace=[database_name])
     client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
@@ -68,31 +68,31 @@ def test_s3tables_api_raises_on_preexisting_table(table_bucket_arn, database_nam
         client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
 
 
-def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn):
+def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn: str) -> None:
     properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified"}
     with pytest.raises(TableBucketNotFound):
         S3TableCatalog(name="test_s3tables_catalog", **properties)
 
 
-def test_create_namespace(catalog, database_name: str):
+def test_create_namespace(catalog: S3TableCatalog, database_name: str) -> None:
     catalog.create_namespace(namespace=database_name)
     namespaces = catalog.list_namespaces()
     assert (database_name,) in namespaces
 
 
-def test_load_namespace_properties(catalog, database_name: str):
+def test_load_namespace_properties(catalog: S3TableCatalog, database_name: str) -> None:
     catalog.create_namespace(namespace=database_name)
     assert database_name in catalog.load_namespace_properties(database_name)["namespace"]
 
 
-def test_drop_namespace(catalog, database_name: str):
+def test_drop_namespace(catalog: S3TableCatalog, database_name: str) -> None:
     catalog.create_namespace(namespace=database_name)
     assert (database_name,) in catalog.list_namespaces()
     catalog.drop_namespace(namespace=database_name)
     assert (database_name,) not in catalog.list_namespaces()
 
 
-def test_create_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+def test_create_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -102,15 +102,15 @@ def test_create_table(catalog, database_name: str, table_name: str, table_schema
 
 
 def test_create_table_in_invalid_namespace_raises_exception(
-    catalog, database_name: str, table_name: str, table_schema_nested: Schema
-):
+    catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema
+) -> None:
     identifier = (database_name, table_name)
 
     with pytest.raises(NoSuchNamespaceError):
         catalog.create_table(identifier=identifier, schema=table_schema_nested)
 
 
-def test_table_exists(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+def test_table_exists(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -119,7 +119,7 @@ def test_table_exists(catalog, database_name: str, table_name: str, table_schema
     assert catalog.table_exists(identifier=identifier)
 
 
-def test_rename_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+def test_rename_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -135,7 +135,7 @@ def test_rename_table(catalog, database_name: str, table_name: str, table_schema
     assert catalog.table_exists(identifier=to_identifier)
 
 
-def test_list_tables(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+def test_list_tables(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -144,7 +144,7 @@ def test_list_tables(catalog, database_name: str, table_name: str, table_schema_
     assert catalog.list_tables(namespace=database_name)
 
 
-def test_drop_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+def test_drop_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -156,7 +156,7 @@ def test_drop_table(catalog, database_name: str, table_name: str, table_schema_n
         catalog.load_table(identifier=identifier)
 
 
-def test_commit_table(catalog, database_name: str, table_name: str, table_schema_nested: Schema):
+def test_commit_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)

From 92fad8e70975d1cb98785f35a522ed0b374d9c9c Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 4 Jan 2025 13:29:37 +0100
Subject: [PATCH 33/57] fix: no longer enforce FsspecFileIO

---
 tests/catalog/test_s3tables.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 101eae485e..76663de25a 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -33,8 +33,7 @@ def table_bucket_arn() -> str:
 
 @pytest.fixture
 def catalog(table_bucket_arn: str) -> S3TableCatalog:
-    # pyarrow does not support writing to S3 Table buckets as of 2024-12-14 https://github.com/apache/iceberg-python/issues/1404#issuecomment-2543174146
-    properties = {"s3tables.table-bucket-arn": table_bucket_arn, "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    properties = {"s3tables.table-bucket-arn": table_bucket_arn}
     return S3TableCatalog(name="test_s3tables_catalog", **properties)
 
 

From cb23a72ed21947f4f6666ea2d6ebf859c9ed47eb Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 4 Jan 2025 13:37:01 +0100
Subject: [PATCH 34/57] test: remove tests for boto3 behavior

---
 tests/catalog/test_s3tables.py | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 76663de25a..d9f05df430 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -37,36 +37,6 @@ def catalog(table_bucket_arn: str) -> S3TableCatalog:
     return S3TableCatalog(name="test_s3tables_catalog", **properties)
 
 
-def test_s3tables_api_raises_on_conflicting_version_tokens(table_bucket_arn: str, database_name: str, table_name: str) -> None:
-    client = boto3.client("s3tables")
-    client.create_namespace(tableBucketARN=table_bucket_arn, namespace=[database_name])
-    response = client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
-    version_token = response["versionToken"]
-    scrambled_version_token = version_token[::-1]
-
-    warehouse_location = client.get_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name)[
-        "warehouseLocation"
-    ]
-    metadata_location = f"{warehouse_location}/metadata/00001-{uuid.uuid4()}.metadata.json"
-
-    with pytest.raises(client.exceptions.ConflictException):
-        client.update_table_metadata_location(
-            tableBucketARN=table_bucket_arn,
-            namespace=database_name,
-            name=table_name,
-            versionToken=scrambled_version_token,
-            metadataLocation=metadata_location,
-        )
-
-
-def test_s3tables_api_raises_on_preexisting_table(table_bucket_arn: str, database_name: str, table_name: str) -> None:
-    client = boto3.client("s3tables")
-    client.create_namespace(tableBucketARN=table_bucket_arn, namespace=[database_name])
-    client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
-    with pytest.raises(client.exceptions.ConflictException):
-        client.create_table(tableBucketARN=table_bucket_arn, namespace=database_name, name=table_name, format="ICEBERG")
-
-
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn: str) -> None:
     properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified"}
     with pytest.raises(TableBucketNotFound):

From 5251ffa57493a0c243ee3ff4ccdc560fac165d0f Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 4 Jan 2025 13:44:14 +0100
Subject: [PATCH 35/57] test: verify column was created on commit

---
 tests/catalog/test_s3tables.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index d9f05df430..15d9ba57ed 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -147,3 +147,4 @@ def test_commit_table(catalog: S3TableCatalog, database_name: str, table_name: s
     assert updated_table_metadata.last_updated_ms > last_updated_ms
     assert updated_table_metadata.metadata_log[0].metadata_file == original_table_metadata_location
     assert updated_table_metadata.metadata_log[0].timestamp_ms == original_table_last_updated_ms
+    assert table.schema().columns[-1].name == "b"

From b544b5073ca9d0a9ebc790f7158d70cec6457c2f Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sat, 4 Jan 2025 13:57:59 +0100
Subject: [PATCH 36/57] test: verify new data can be committed to table

---
 tests/catalog/test_s3tables.py | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 15d9ba57ed..f97371540b 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -1,6 +1,3 @@
-import uuid
-
-import boto3
 import pytest
 
 from pyiceberg.catalog.s3tables import S3TableCatalog
@@ -125,7 +122,9 @@ def test_drop_table(catalog: S3TableCatalog, database_name: str, table_name: str
         catalog.load_table(identifier=identifier)
 
 
-def test_commit_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
+def test_commit_new_column_to_table(
+    catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema
+) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -148,3 +147,27 @@ def test_commit_table(catalog: S3TableCatalog, database_name: str, table_name: s
     assert updated_table_metadata.metadata_log[0].metadata_file == original_table_metadata_location
     assert updated_table_metadata.metadata_log[0].timestamp_ms == original_table_last_updated_ms
     assert table.schema().columns[-1].name == "b"
+
+
+def test_commit_new_data_to_table(
+    catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema
+) -> None:
+    identifier = (database_name, table_name)
+
+    catalog.create_namespace(namespace=database_name)
+    table = catalog.create_table(identifier=identifier, schema=table_schema_nested)
+
+    row_count = table.scan().to_arrow().num_rows
+    last_updated_ms = table.metadata.last_updated_ms
+    original_table_metadata_location = table.metadata_location
+    original_table_last_updated_ms = table.metadata.last_updated_ms
+
+    transaction = table.transaction()
+    transaction.append(table.scan().to_arrow())
+    transaction.commit_transaction()
+
+    updated_table_metadata = table.metadata
+    assert updated_table_metadata.last_updated_ms > last_updated_ms
+    assert updated_table_metadata.metadata_log[0].metadata_file == original_table_metadata_location
+    assert updated_table_metadata.metadata_log[0].timestamp_ms == original_table_last_updated_ms
+    assert table.scan().to_arrow().num_rows == 2 * row_count

From a02678a37a738d2bf52efd76be1a2de9155abcd9 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 5 Jan 2025 17:11:33 +0100
Subject: [PATCH 37/57] docs: update documentation for create_table

---
 pyiceberg/catalog/s3tables.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index fae82b0ee8..0485bb3072 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -140,6 +140,9 @@ def create_table(
 
         schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore
 
+        # creating a new table with S3 Tables is a two step process. We first have to create an S3 Table with the
+        # S3 Tables API and then write the new metadata.json to the warehouseLocaiton associated with the newly
+        # created S3 Table.
         try:
             self.s3tables.create_table(
                 tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG"
@@ -165,7 +168,8 @@ def create_table(
         )
 
         io = load_file_io(properties=self.properties, location=metadata_location)
-        # this triggers unsupported list operation error, setting overwrite=True is a workaround for now
+        # this triggers unsupported list operation error as S3 Table Buckets only support a subset of the S3 Bucket API,
+        # setting overwrite=True is a workaround for now since it prevents a call to list_objects
         self._write_metadata(metadata, io, metadata_location, overwrite=True)
 
         try:

From 1facfe18962cd5d43f2a2a07c6a247fb6e529762 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Sun, 5 Jan 2025 17:25:29 +0100
Subject: [PATCH 38/57] test: set AWS regions explicitly

---
 tests/catalog/test_s3tables.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index f97371540b..930b11ec9a 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -30,12 +30,12 @@ def table_bucket_arn() -> str:
 
 @pytest.fixture
 def catalog(table_bucket_arn: str) -> S3TableCatalog:
-    properties = {"s3tables.table-bucket-arn": table_bucket_arn}
+    properties = {"s3tables.table-bucket-arn": table_bucket_arn, "s3tables.region": "us-east-1", "s3.region": "us-east-1"}
     return S3TableCatalog(name="test_s3tables_catalog", **properties)
 
 
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn: str) -> None:
-    properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified"}
+    properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified", "s3tables.region": "us-east-1"}
     with pytest.raises(TableBucketNotFound):
         S3TableCatalog(name="test_s3tables_catalog", **properties)
 

From 096ab6f8ad3875e06badf610f9f7be7471a116f7 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 6 Jan 2025 17:19:43 +0100
Subject: [PATCH 39/57] Apply suggestions from code review

Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com>
---
 pyiceberg/catalog/s3tables.py  | 2 +-
 tests/catalog/test_s3tables.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 0485bb3072..1408322ea7 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -141,7 +141,7 @@ def create_table(
         schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore
 
         # creating a new table with S3 Tables is a two step process. We first have to create an S3 Table with the
-        # S3 Tables API and then write the new metadata.json to the warehouseLocaiton associated with the newly
+        # S3 Tables API and then write the new metadata.json to the warehouseLocation associated with the newly
         # created S3 Table.
         try:
             self.s3tables.create_table(
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index 930b11ec9a..d9f48e363a 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -26,11 +26,15 @@ def table_bucket_arn() -> str:
     # in one of the supported regions.
 
     return os.environ["ARN"]
+@pytest.fixture
+def aws_region() -> str:
+    import os
 
+    return os.environ["AWS_REGION"]
 
 @pytest.fixture
-def catalog(table_bucket_arn: str) -> S3TableCatalog:
-    properties = {"s3tables.table-bucket-arn": table_bucket_arn, "s3tables.region": "us-east-1", "s3.region": "us-east-1"}
+def catalog(table_bucket_arn: str, aws_region: str) -> S3TableCatalog:
+    properties = {"s3tables.table-bucket-arn": table_bucket_arn, "s3tables.region": aws_region}
     return S3TableCatalog(name="test_s3tables_catalog", **properties)
 
 

From 4fd1d61e76e1f9c156c83f3c749fe217589c6153 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 6 Jan 2025 17:52:34 +0100
Subject: [PATCH 40/57] test: commit new data to table

---
 tests/catalog/test_s3tables.py | 62 ++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 6 deletions(-)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index d9f48e363a..d1f6e6ffd1 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -26,12 +26,15 @@ def table_bucket_arn() -> str:
     # in one of the supported regions.
 
     return os.environ["ARN"]
+
+
 @pytest.fixture
 def aws_region() -> str:
     import os
 
     return os.environ["AWS_REGION"]
 
+
 @pytest.fixture
 def catalog(table_bucket_arn: str, aws_region: str) -> S3TableCatalog:
     properties = {"s3tables.table-bucket-arn": table_bucket_arn, "s3tables.region": aws_region}
@@ -153,15 +156,62 @@ def test_commit_new_column_to_table(
     assert table.schema().columns[-1].name == "b"
 
 
-def test_commit_new_data_to_table(
-    catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema
-) -> None:
+def test_write_pyarrow_table(catalog: S3TableCatalog, database_name: str, table_name: str) -> None:
     identifier = (database_name, table_name)
+    catalog.create_namespace(namespace=database_name)
 
+    import pyarrow as pa
+
+    pyarrow_table = pa.Table.from_arrays(
+        [
+            pa.array([None, "A", "B", "C"]),  # 'foo' column
+            pa.array([1, 2, 3, 4]),  # 'bar' column
+            pa.array([True, None, False, True]),  # 'baz' column
+            pa.array([None, "A", "B", "C"]),  # 'large' column
+        ],
+        schema=pa.schema(
+            [
+                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("bar", pa.int32(), nullable=False),
+                pa.field("baz", pa.bool_(), nullable=True),
+                pa.field("large", pa.large_string(), nullable=True),
+            ]
+        ),
+    )
+    table = catalog.create_table(identifier=identifier, schema=pyarrow_table.schema)
+    table.append(pyarrow_table)
+
+    assert table.scan().to_arrow().num_rows == pyarrow_table.num_rows
+
+
+def test_commit_new_data_to_table(catalog: S3TableCatalog, database_name: str, table_name: str) -> None:
+    identifier = (database_name, table_name)
     catalog.create_namespace(namespace=database_name)
-    table = catalog.create_table(identifier=identifier, schema=table_schema_nested)
+
+    import pyarrow as pa
+
+    pyarrow_table = pa.Table.from_arrays(
+        [
+            pa.array([None, "A", "B", "C"]),  # 'foo' column
+            pa.array([1, 2, 3, 4]),  # 'bar' column
+            pa.array([True, None, False, True]),  # 'baz' column
+            pa.array([None, "A", "B", "C"]),  # 'large' column
+        ],
+        schema=pa.schema(
+            [
+                pa.field("foo", pa.large_string(), nullable=True),
+                pa.field("bar", pa.int32(), nullable=False),
+                pa.field("baz", pa.bool_(), nullable=True),
+                pa.field("large", pa.large_string(), nullable=True),
+            ]
+        ),
+    )
+
+    table = catalog.create_table(identifier=identifier, schema=pyarrow_table.schema)
+    table.append(pyarrow_table)
 
     row_count = table.scan().to_arrow().num_rows
+    assert row_count
     last_updated_ms = table.metadata.last_updated_ms
     original_table_metadata_location = table.metadata_location
     original_table_last_updated_ms = table.metadata.last_updated_ms
@@ -172,6 +222,6 @@ def test_commit_new_data_to_table(
 
     updated_table_metadata = table.metadata
     assert updated_table_metadata.last_updated_ms > last_updated_ms
-    assert updated_table_metadata.metadata_log[0].metadata_file == original_table_metadata_location
-    assert updated_table_metadata.metadata_log[0].timestamp_ms == original_table_last_updated_ms
+    assert updated_table_metadata.metadata_log[-1].metadata_file == original_table_metadata_location
+    assert updated_table_metadata.metadata_log[-1].timestamp_ms == original_table_last_updated_ms
     assert table.scan().to_arrow().num_rows == 2 * row_count

From f7b960aa5a30bc3e6d86260b576ae85d37737681 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 6 Jan 2025 17:56:05 +0100
Subject: [PATCH 41/57] feat: clarify update_namespace_properties error

---
 pyiceberg/catalog/s3tables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 1408322ea7..01c42f9e3b 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -305,7 +305,7 @@ def update_namespace_properties(
         self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT
     ) -> PropertiesUpdateSummary:
         # namespace properties are read only
-        raise NotImplementedError
+        raise NotImplementedError("Namespace properties are read only")
 
     def purge_table(self, identifier: Union[str, Identifier]) -> None:
         # purge is not supported as s3tables doesn't support delete operations

From c72164bcd523ca3f83b2dd6fb9763d07fc8586ff Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Mon, 6 Jan 2025 17:59:55 +0100
Subject: [PATCH 42/57] feat: raise error when setting custom namespace
 properties

---
 pyiceberg/catalog/s3tables.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 01c42f9e3b..d5f5bbc305 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -106,6 +106,8 @@ def commit_table(
         )
 
     def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
+        if properties:
+            raise NotImplementedError("Setting namespace properties is not supported.")
         valid_namespace: str = self._validate_namespace_identifier(namespace)
         self.s3tables.create_namespace(tableBucketARN=self.table_bucket_arn, namespace=[valid_namespace])
 
@@ -305,7 +307,7 @@ def update_namespace_properties(
         self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT
     ) -> PropertiesUpdateSummary:
         # namespace properties are read only
-        raise NotImplementedError("Namespace properties are read only")
+        raise NotImplementedError("Namespace properties are read only.")
 
     def purge_table(self, identifier: Union[str, Identifier]) -> None:
         # purge is not supported as s3tables doesn't support delete operations

From c90bf3165a343beb604da3405f1281e16b061caf Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 08:57:31 +0100
Subject: [PATCH 43/57] refactor: change S3TableCatalog -> S3TablesCatalog

---
 pyiceberg/catalog/s3tables.py  |  2 +-
 tests/catalog/test_s3tables.py | 32 ++++++++++++++++----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index d5f5bbc305..ed6a8027f2 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -44,7 +44,7 @@
 S3TABLES_RESERVED_NAMESPACE = "aws_s3_metadata"
 
 
-class S3TableCatalog(MetastoreCatalog):
+class S3TablesCatalog(MetastoreCatalog):
     def __init__(self, name: str, **properties: str):
         super().__init__(name, **properties)
 
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index d1f6e6ffd1..c5cf959d4a 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -1,6 +1,6 @@
 import pytest
 
-from pyiceberg.catalog.s3tables import S3TableCatalog
+from pyiceberg.catalog.s3tables import S3TablesCatalog
 from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchTableError, TableBucketNotFound
 from pyiceberg.schema import Schema
 from pyiceberg.types import IntegerType
@@ -36,36 +36,36 @@ def aws_region() -> str:
 
 
 @pytest.fixture
-def catalog(table_bucket_arn: str, aws_region: str) -> S3TableCatalog:
+def catalog(table_bucket_arn: str, aws_region: str) -> S3TablesCatalog:
     properties = {"s3tables.table-bucket-arn": table_bucket_arn, "s3tables.region": aws_region}
-    return S3TableCatalog(name="test_s3tables_catalog", **properties)
+    return S3TablesCatalog(name="test_s3tables_catalog", **properties)
 
 
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn: str) -> None:
     properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified", "s3tables.region": "us-east-1"}
     with pytest.raises(TableBucketNotFound):
-        S3TableCatalog(name="test_s3tables_catalog", **properties)
+        S3TablesCatalog(name="test_s3tables_catalog", **properties)
 
 
-def test_create_namespace(catalog: S3TableCatalog, database_name: str) -> None:
+def test_create_namespace(catalog: S3TablesCatalog, database_name: str) -> None:
     catalog.create_namespace(namespace=database_name)
     namespaces = catalog.list_namespaces()
     assert (database_name,) in namespaces
 
 
-def test_load_namespace_properties(catalog: S3TableCatalog, database_name: str) -> None:
+def test_load_namespace_properties(catalog: S3TablesCatalog, database_name: str) -> None:
     catalog.create_namespace(namespace=database_name)
     assert database_name in catalog.load_namespace_properties(database_name)["namespace"]
 
 
-def test_drop_namespace(catalog: S3TableCatalog, database_name: str) -> None:
+def test_drop_namespace(catalog: S3TablesCatalog, database_name: str) -> None:
     catalog.create_namespace(namespace=database_name)
     assert (database_name,) in catalog.list_namespaces()
     catalog.drop_namespace(namespace=database_name)
     assert (database_name,) not in catalog.list_namespaces()
 
 
-def test_create_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
+def test_create_table(catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -75,7 +75,7 @@ def test_create_table(catalog: S3TableCatalog, database_name: str, table_name: s
 
 
 def test_create_table_in_invalid_namespace_raises_exception(
-    catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema
+    catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: Schema
 ) -> None:
     identifier = (database_name, table_name)
 
@@ -83,7 +83,7 @@ def test_create_table_in_invalid_namespace_raises_exception(
         catalog.create_table(identifier=identifier, schema=table_schema_nested)
 
 
-def test_table_exists(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
+def test_table_exists(catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -92,7 +92,7 @@ def test_table_exists(catalog: S3TableCatalog, database_name: str, table_name: s
     assert catalog.table_exists(identifier=identifier)
 
 
-def test_rename_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
+def test_rename_table(catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -108,7 +108,7 @@ def test_rename_table(catalog: S3TableCatalog, database_name: str, table_name: s
     assert catalog.table_exists(identifier=to_identifier)
 
 
-def test_list_tables(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
+def test_list_tables(catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -117,7 +117,7 @@ def test_list_tables(catalog: S3TableCatalog, database_name: str, table_name: st
     assert catalog.list_tables(namespace=database_name)
 
 
-def test_drop_table(catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
+def test_drop_table(catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: Schema) -> None:
     identifier = (database_name, table_name)
 
     catalog.create_namespace(namespace=database_name)
@@ -130,7 +130,7 @@ def test_drop_table(catalog: S3TableCatalog, database_name: str, table_name: str
 
 
 def test_commit_new_column_to_table(
-    catalog: S3TableCatalog, database_name: str, table_name: str, table_schema_nested: Schema
+    catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: Schema
 ) -> None:
     identifier = (database_name, table_name)
 
@@ -156,7 +156,7 @@ def test_commit_new_column_to_table(
     assert table.schema().columns[-1].name == "b"
 
 
-def test_write_pyarrow_table(catalog: S3TableCatalog, database_name: str, table_name: str) -> None:
+def test_write_pyarrow_table(catalog: S3TablesCatalog, database_name: str, table_name: str) -> None:
     identifier = (database_name, table_name)
     catalog.create_namespace(namespace=database_name)
 
@@ -184,7 +184,7 @@ def test_write_pyarrow_table(catalog: S3TableCatalog, database_name: str, table_
     assert table.scan().to_arrow().num_rows == pyarrow_table.num_rows
 
 
-def test_commit_new_data_to_table(catalog: S3TableCatalog, database_name: str, table_name: str) -> None:
+def test_commit_new_data_to_table(catalog: S3TablesCatalog, database_name: str, table_name: str) -> None:
     identifier = (database_name, table_name)
     catalog.create_namespace(namespace=database_name)
 

From 78bbe54c1bd1eebb554eebd6e358280e5cfda48f Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 08:59:42 +0100
Subject: [PATCH 44/57] feat: raise error on specified table location

---
 pyiceberg/catalog/s3tables.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index ed6a8027f2..a732afcdd9 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -138,6 +138,8 @@ def create_table(
         sort_order: SortOrder = UNSORTED_SORT_ORDER,
         properties: Properties = EMPTY_DICT,
     ) -> Table:
+        if location:
+            raise NotImplementedError("S3 Tables does not support user specified table locations.")
         namespace, table_name = self._validate_database_and_table_identifier(identifier)
 
         schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore

From bd38d15cf08ef932b8fcdd2b056505358d751463 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 17:57:00 +0100
Subject: [PATCH 45/57] feat: return empty list when querying a hierarchical
 namespace

---
 pyiceberg/catalog/s3tables.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index a732afcdd9..0237dcd0ae 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -220,7 +220,8 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
 
     def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]:
         if namespace:
-            namespace = self._validate_namespace_identifier(namespace)
+            # hierarchical namespaces are not supported
+            return []
         paginator = self.s3tables.get_paginator("list_namespaces")
 
         namespaces: List[Identifier] = []

From 85d50fe545e85955b566f5d596ff24b4dff750dc Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 18:00:22 +0100
Subject: [PATCH 46/57] refactor: use get_table_metadata_location instead of
 get_table

---
 pyiceberg/catalog/s3tables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 0237dcd0ae..6384823705 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -158,7 +158,7 @@ def create_table(
                 f"Cannot create {namespace}.{table_name} because a table of the same name already exists in the namespace."
             ) from e
 
-        response = self.s3tables.get_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
+        response = self.s3tables.get_table_metadata_location(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
         version_token = response["versionToken"]
 
         warehouse_location = response["warehouseLocation"]

From c8434d4b28eabe657eec7d4deace2090b7352da2 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 18:02:57 +0100
Subject: [PATCH 47/57] refactor: extract 'ICEBERG' table format into constant

---
 pyiceberg/catalog/s3tables.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 6384823705..efc22a42c3 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -43,6 +43,8 @@
 S3TABLES_VALID_NAME_REGEX = pattern = re.compile("[a-z0-9][a-z0-9_]{2,62}")
 S3TABLES_RESERVED_NAMESPACE = "aws_s3_metadata"
 
+S3TABLES_FORMAT = "ICEBERG"
+
 
 class S3TablesCatalog(MetastoreCatalog):
     def __init__(self, name: str, **properties: str):
@@ -149,7 +151,7 @@ def create_table(
         # created S3 Table.
         try:
             self.s3tables.create_table(
-                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format="ICEBERG"
+                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format=S3TABLES_FORMAT
             )
         except self.s3tables.exceptions.NotFoundException as e:
             raise NoSuchNamespaceError(f"Cannot create {namespace}.{table_name} because no such namespace exists.") from e

From e479c0447f6ec26c5ec37858e7267413bfa60e50 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 18:05:44 +0100
Subject: [PATCH 48/57] feat: change s3tables.table-bucket-arn ->
 s3tables.warehouse

---
 pyiceberg/catalog/s3tables.py  | 2 +-
 tests/catalog/test_s3tables.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index efc22a42c3..4d22807687 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -35,7 +35,7 @@
 S3TABLES_SECRET_ACCESS_KEY = "s3tables.secret-access-key"
 S3TABLES_SESSION_TOKEN = "s3tables.session-token"
 
-S3TABLES_TABLE_BUCKET_ARN = "s3tables.table-bucket-arn"
+S3TABLES_TABLE_BUCKET_ARN = "s3tables.warehouse"
 
 S3TABLES_ENDPOINT = "s3tables.endpoint"
 
diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/test_s3tables.py
index c5cf959d4a..dc3bb88238 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/test_s3tables.py
@@ -37,12 +37,12 @@ def aws_region() -> str:
 
 @pytest.fixture
 def catalog(table_bucket_arn: str, aws_region: str) -> S3TablesCatalog:
-    properties = {"s3tables.table-bucket-arn": table_bucket_arn, "s3tables.region": aws_region}
+    properties = {"s3tables.warehouse": table_bucket_arn, "s3tables.region": aws_region}
     return S3TablesCatalog(name="test_s3tables_catalog", **properties)
 
 
 def test_creating_catalog_validates_s3_table_bucket_exists(table_bucket_arn: str) -> None:
-    properties = {"s3tables.table-bucket-arn": f"{table_bucket_arn}-modified", "s3tables.region": "us-east-1"}
+    properties = {"s3tables.warehouse": f"{table_bucket_arn}-modified", "s3tables.region": "us-east-1"}
     with pytest.raises(TableBucketNotFound):
         S3TablesCatalog(name="test_s3tables_catalog", **properties)
 

From acfd06c072dc17a971d226fbde09e0bebd226ee4 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 18:09:03 +0100
Subject: [PATCH 49/57] Apply suggestions from code review

Co-authored-by: Honah J. <honahx.dev@gmail.com>
---
 pyiceberg/catalog/s3tables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 4d22807687..1854dbfc9b 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -40,7 +40,7 @@
 S3TABLES_ENDPOINT = "s3tables.endpoint"
 
 # for naming rules see: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html
-S3TABLES_VALID_NAME_REGEX = pattern = re.compile("[a-z0-9][a-z0-9_]{2,62}")
+S3TABLES_VALID_NAME_REGEX = pattern = re.compile("[a-z0-9][a-z0-9_]{1,61}[a-z0-9]")
 S3TABLES_RESERVED_NAMESPACE = "aws_s3_metadata"
 
 S3TABLES_FORMAT = "ICEBERG"

From 01c9003f3f65c6defd4089afae206cebc977ca2e Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 18:13:19 +0100
Subject: [PATCH 50/57] feat: add link to naming-rules for invalid name errors

---
 pyiceberg/catalog/s3tables.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 1854dbfc9b..4d89108d8d 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -117,7 +117,7 @@ def _validate_namespace_identifier(self, namespace: Union[str, Identifier]) -> s
         namespace = self.identifier_to_database(namespace)
 
         if not S3TABLES_VALID_NAME_REGEX.fullmatch(namespace) or namespace == S3TABLES_RESERVED_NAMESPACE:
-            raise InvalidNamespaceName("The specified namespace name is not valid.")
+            raise InvalidNamespaceName("The specified namespace name is not valid. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html for naming rules.")
 
         return namespace
 
@@ -127,7 +127,7 @@ def _validate_database_and_table_identifier(self, identifier: Union[str, Identif
         namespace = self._validate_namespace_identifier(namespace)
 
         if not S3TABLES_VALID_NAME_REGEX.fullmatch(table_name):
-            raise InvalidTableName("The specified table name is not valid.")
+            raise InvalidTableName("The specified table name is not valid. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html for naming rules.")
 
         return namespace, table_name
 

From 1d25b69741df084b9e3857c7764c30146bf8d4a9 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 18:22:14 +0100
Subject: [PATCH 51/57] feat: delete s3 table if writing new_table_metadata is
 unsuccessful

---
 pyiceberg/catalog/s3tables.py | 63 ++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 4d89108d8d..2cfdf9595b 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -150,9 +150,9 @@ def create_table(
         # S3 Tables API and then write the new metadata.json to the warehouseLocation associated with the newly
         # created S3 Table.
         try:
-            self.s3tables.create_table(
+            version_token = self.s3tables.create_table(
                 tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name, format=S3TABLES_FORMAT
-            )
+            )["versionToken"]
         except self.s3tables.exceptions.NotFoundException as e:
             raise NoSuchNamespaceError(f"Cannot create {namespace}.{table_name} because no such namespace exists.") from e
         except self.s3tables.exceptions.ConflictException as e:
@@ -160,36 +160,39 @@ def create_table(
                 f"Cannot create {namespace}.{table_name} because a table of the same name already exists in the namespace."
             ) from e
 
-        response = self.s3tables.get_table_metadata_location(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
-        version_token = response["versionToken"]
-
-        warehouse_location = response["warehouseLocation"]
-        metadata_location = self._get_metadata_location(location=warehouse_location)
-        metadata = new_table_metadata(
-            location=warehouse_location,
-            schema=schema,
-            partition_spec=partition_spec,
-            sort_order=sort_order,
-            properties=properties,
-        )
-
-        io = load_file_io(properties=self.properties, location=metadata_location)
-        # this triggers unsupported list operation error as S3 Table Buckets only support a subset of the S3 Bucket API,
-        # setting overwrite=True is a workaround for now since it prevents a call to list_objects
-        self._write_metadata(metadata, io, metadata_location, overwrite=True)
-
         try:
-            self.s3tables.update_table_metadata_location(
-                tableBucketARN=self.table_bucket_arn,
-                namespace=namespace,
-                name=table_name,
-                versionToken=version_token,
-                metadataLocation=metadata_location,
+            response = self.s3tables.get_table_metadata_location(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
+            warehouse_location = response["warehouseLocation"]
+
+            metadata_location = self._get_metadata_location(location=warehouse_location)
+            metadata = new_table_metadata(
+                location=warehouse_location,
+                schema=schema,
+                partition_spec=partition_spec,
+                sort_order=sort_order,
+                properties=properties,
             )
-        except self.s3tables.exceptions.ConflictException as e:
-            raise CommitFailedException(
-                f"Cannot create {namespace}.{table_name} because of a concurrent update to the table version {version_token}."
-            ) from e
+
+            io = load_file_io(properties=self.properties, location=metadata_location)
+            # this triggers unsupported list operation error as S3 Table Buckets only support a subset of the S3 Bucket API,
+            # setting overwrite=True is a workaround for now since it prevents a call to list_objects
+            self._write_metadata(metadata, io, metadata_location, overwrite=True)
+
+            try:
+                self.s3tables.update_table_metadata_location(
+                    tableBucketARN=self.table_bucket_arn,
+                    namespace=namespace,
+                    name=table_name,
+                    versionToken=version_token,
+                    metadataLocation=metadata_location,
+                )
+            except self.s3tables.exceptions.ConflictException as e:
+                raise CommitFailedException(
+                    f"Cannot create {namespace}.{table_name} because of a concurrent update to the table version {version_token}."
+                ) from e
+        except:
+            self.s3tables.delete_table(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
+            raise
 
         return self.load_table(identifier=identifier)
 

From 188c45de8b3cb04c29386f67c7f7e422c27f0cc4 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 18:22:38 +0100
Subject: [PATCH 52/57] chore: run linter

---
 pyiceberg/catalog/s3tables.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 2cfdf9595b..1e6b8585d6 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -117,7 +117,9 @@ def _validate_namespace_identifier(self, namespace: Union[str, Identifier]) -> s
         namespace = self.identifier_to_database(namespace)
 
         if not S3TABLES_VALID_NAME_REGEX.fullmatch(namespace) or namespace == S3TABLES_RESERVED_NAMESPACE:
-            raise InvalidNamespaceName("The specified namespace name is not valid. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html for naming rules.")
+            raise InvalidNamespaceName(
+                "The specified namespace name is not valid. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html for naming rules."
+            )
 
         return namespace
 
@@ -127,7 +129,9 @@ def _validate_database_and_table_identifier(self, identifier: Union[str, Identif
         namespace = self._validate_namespace_identifier(namespace)
 
         if not S3TABLES_VALID_NAME_REGEX.fullmatch(table_name):
-            raise InvalidTableName("The specified table name is not valid. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html for naming rules.")
+            raise InvalidTableName(
+                "The specified table name is not valid. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-tables-buckets-naming.html for naming rules."
+            )
 
         return namespace, table_name
 
@@ -161,7 +165,9 @@ def create_table(
             ) from e
 
         try:
-            response = self.s3tables.get_table_metadata_location(tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name)
+            response = self.s3tables.get_table_metadata_location(
+                tableBucketARN=self.table_bucket_arn, namespace=namespace, name=table_name
+            )
             warehouse_location = response["warehouseLocation"]
 
             metadata_location = self._get_metadata_location(location=warehouse_location)

From de310507c4fcf60fb983fb06f00234fa19572039 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Tue, 7 Jan 2025 19:15:06 +0100
Subject: [PATCH 53/57] test: rename test_s3tables.py ->
 integration_test_s3tables.py

---
 ..._s3tables.py => integration_test_s3tables.py} | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)
 rename tests/catalog/{test_s3tables.py => integration_test_s3tables.py} (93%)

diff --git a/tests/catalog/test_s3tables.py b/tests/catalog/integration_test_s3tables.py
similarity index 93%
rename from tests/catalog/test_s3tables.py
rename to tests/catalog/integration_test_s3tables.py
index dc3bb88238..7b880bee31 100644
--- a/tests/catalog/test_s3tables.py
+++ b/tests/catalog/integration_test_s3tables.py
@@ -20,19 +20,25 @@ def table_name(table_name: str) -> str:
 
 @pytest.fixture
 def table_bucket_arn() -> str:
+    """Set the environment variable AWS_TEST_S3_TABLE_BUCKET_ARN for an S3 table bucket to test."""
     import os
 
-    # since the moto library does not support s3tables as of 2024-12-14 we have to test against a real AWS endpoint
-    # in one of the supported regions.
-
-    return os.environ["ARN"]
+    table_bucket_arn = os.getenv("AWS_TEST_S3_TABLE_BUCKET_ARN")
+    if not table_bucket_arn:
+        raise ValueError(
+            "Please specify a table bucket arn to run the test by setting environment variable AWS_TEST_S3_TABLE_BUCKET_ARN"
+        )
+    return table_bucket_arn
 
 
 @pytest.fixture
 def aws_region() -> str:
     import os
 
-    return os.environ["AWS_REGION"]
+    aws_region = os.getenv("AWS_REGION")
+    if not aws_region:
+        raise ValueError("Please specify an AWS region to run the test by setting environment variable AWS_REGION")
+    return aws_region
 
 
 @pytest.fixture

From 2e1c3833f5f374cbc9ee485f79c75c50f63282fc Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Wed, 8 Jan 2025 09:08:15 +0100
Subject: [PATCH 54/57] chore: update poetry.lock

---
 poetry.lock | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 7bc22bec33..934280dcbd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -5342,6 +5342,7 @@ pyarrow = ["pyarrow"]
 ray = ["pandas", "pyarrow", "ray", "ray"]
 rest-sigv4 = ["boto3"]
 s3fs = ["s3fs"]
+s3tables = ["boto3"]
 snappy = ["python-snappy"]
 sql-postgres = ["psycopg2-binary", "sqlalchemy"]
 sql-sqlite = ["sqlalchemy"]
@@ -5350,4 +5351,4 @@ zstandard = ["zstandard"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9, !=3.9.7"
-content-hash = "59e5678cd718f658c5bd099c03051564ee60f991e5f222bf92da13d1dd025a42"
+content-hash = "10f9fb23f30dddeb93ad5c7ba7757816af5813293332e88929301c418ad7b256"

From fdd1d7a4e23e4f0a05ce53f8a1472363fdc11e08 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Wed, 8 Jan 2025 20:19:53 +0100
Subject: [PATCH 55/57] fix: add license to files

---
 pyiceberg/catalog/s3tables.py              | 16 ++++++++++++++++
 tests/catalog/integration_test_s3tables.py | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index 1e6b8585d6..a577613c92 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -1,3 +1,19 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
 import re
 from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union
 
diff --git a/tests/catalog/integration_test_s3tables.py b/tests/catalog/integration_test_s3tables.py
index 7b880bee31..351f530122 100644
--- a/tests/catalog/integration_test_s3tables.py
+++ b/tests/catalog/integration_test_s3tables.py
@@ -1,3 +1,19 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
 import pytest
 
 from pyiceberg.catalog.s3tables import S3TablesCatalog

From 8b1cec0af084c9dc1fc42f5586ca8ce284bbcc31 Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Thu, 9 Jan 2025 08:56:43 +0100
Subject: [PATCH 56/57] fix: raise error when creating a table during a
 transaction

---
 pyiceberg/catalog/s3tables.py              | 79 ++++++++++++++--------
 tests/catalog/integration_test_s3tables.py | 36 ++++++++++
 2 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/pyiceberg/catalog/s3tables.py b/pyiceberg/catalog/s3tables.py
index a577613c92..14c249448e 100644
--- a/pyiceberg/catalog/s3tables.py
+++ b/pyiceberg/catalog/s3tables.py
@@ -35,7 +35,7 @@
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.serializers import FromInputFile
-from pyiceberg.table import CommitTableResponse, Table
+from pyiceberg.table import CommitTableResponse, CreateTableTransaction, Table
 from pyiceberg.table.metadata import new_table_metadata
 from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
 from pyiceberg.table.update import TableRequirement, TableUpdate
@@ -92,36 +92,57 @@ def commit_table(
         table_identifier = table.name()
         database_name, table_name = self.identifier_to_database_and_table(table_identifier, NoSuchTableError)
 
-        current_table, version_token = self._load_table_and_version(identifier=table_identifier)
-
-        updated_staged_table = self._update_and_stage_table(current_table, table_identifier, requirements, updates)
-        if current_table and updated_staged_table.metadata == current_table.metadata:
-            # no changes, do nothing
-            return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location)
-
-        self._write_metadata(
-            metadata=updated_staged_table.metadata,
-            io=updated_staged_table.io,
-            metadata_path=updated_staged_table.metadata_location,
-            overwrite=True,
-        )
-
-        # try to update metadata location which will fail if the versionToken changed meanwhile
+        current_table: Optional[Table]
+        version_token: Optional[str]
         try:
-            self.s3tables.update_table_metadata_location(
-                tableBucketARN=self.table_bucket_arn,
-                namespace=database_name,
-                name=table_name,
-                versionToken=version_token,
-                metadataLocation=updated_staged_table.metadata_location,
+            current_table, version_token = self._load_table_and_version(identifier=table_identifier)
+        except NoSuchTableError:
+            current_table = None
+            version_token = None
+
+        if current_table:
+            updated_staged_table = self._update_and_stage_table(current_table, table_identifier, requirements, updates)
+            if updated_staged_table.metadata == current_table.metadata:
+                # no changes, do nothing
+                return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location)
+
+            self._write_metadata(
+                metadata=updated_staged_table.metadata,
+                io=updated_staged_table.io,
+                metadata_path=updated_staged_table.metadata_location,
+                overwrite=True,
             )
-        except self.s3tables.exceptions.ConflictException as e:
-            raise CommitFailedException(
-                f"Cannot commit {database_name}.{table_name} because of a concurrent update to the table version {version_token}."
-            ) from e
-        return CommitTableResponse(
-            metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location
-        )
+
+            # try to update metadata location which will fail if the versionToken changed meanwhile
+            try:
+                self.s3tables.update_table_metadata_location(
+                    tableBucketARN=self.table_bucket_arn,
+                    namespace=database_name,
+                    name=table_name,
+                    versionToken=version_token,
+                    metadataLocation=updated_staged_table.metadata_location,
+                )
+            except self.s3tables.exceptions.ConflictException as e:
+                raise CommitFailedException(
+                    f"Cannot commit {database_name}.{table_name} because of a concurrent update to the table version {version_token}."
+                ) from e
+            return CommitTableResponse(
+                metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location
+            )
+        else:
+            # table does not exist, create it
+            raise NotImplementedError("Creating a table on commit is currently not supported.")
+
+    def create_table_transaction(
+        self,
+        identifier: Union[str, Identifier],
+        schema: Union[Schema, "pa.Schema"],
+        location: Optional[str] = None,
+        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
+        sort_order: SortOrder = UNSORTED_SORT_ORDER,
+        properties: Properties = EMPTY_DICT,
+    ) -> CreateTableTransaction:
+        raise NotImplementedError("create_table_transaction currently not supported.")
 
     def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
         if properties:
diff --git a/tests/catalog/integration_test_s3tables.py b/tests/catalog/integration_test_s3tables.py
index 351f530122..e803b5a2b9 100644
--- a/tests/catalog/integration_test_s3tables.py
+++ b/tests/catalog/integration_test_s3tables.py
@@ -18,7 +18,9 @@
 
 from pyiceberg.catalog.s3tables import S3TablesCatalog
 from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchTableError, TableBucketNotFound
+from pyiceberg.partitioning import PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
+from pyiceberg.transforms import IdentityTransform
 from pyiceberg.types import IntegerType
 
 
@@ -247,3 +249,37 @@ def test_commit_new_data_to_table(catalog: S3TablesCatalog, database_name: str,
     assert updated_table_metadata.metadata_log[-1].metadata_file == original_table_metadata_location
     assert updated_table_metadata.metadata_log[-1].timestamp_ms == original_table_last_updated_ms
     assert table.scan().to_arrow().num_rows == 2 * row_count
+
+
+def test_create_table_transaction(
+    catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: str
+) -> None:
+    identifier = (database_name, table_name)
+    catalog.create_namespace(namespace=database_name)
+
+    with catalog.create_table_transaction(
+        identifier,
+        table_schema_nested,
+        partition_spec=PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="foo")),
+    ) as txn:
+        last_updated_metadata = txn.table_metadata.last_updated_ms
+        with txn.update_schema() as update_schema:
+            update_schema.add_column(path="b", field_type=IntegerType())
+
+        with txn.update_spec() as update_spec:
+            update_spec.add_identity("bar")
+
+        txn.set_properties(test_a="test_aa", test_b="test_b", test_c="test_c")
+
+    table = catalog.load_table(identifier)
+
+    assert table.schema().find_field("b").field_type == IntegerType()
+    assert table.properties == {"test_a": "test_aa", "test_b": "test_b", "test_c": "test_c"}
+    assert table.spec().last_assigned_field_id == 1001
+    assert table.spec().fields_by_source_id(1)[0].name == "foo"
+    assert table.spec().fields_by_source_id(1)[0].field_id == 1000
+    assert table.spec().fields_by_source_id(1)[0].transform == IdentityTransform()
+    assert table.spec().fields_by_source_id(2)[0].name == "bar"
+    assert table.spec().fields_by_source_id(2)[0].field_id == 1001
+    assert table.spec().fields_by_source_id(2)[0].transform == IdentityTransform()
+    assert table.metadata.last_updated_ms > last_updated_metadata

From 99e569be012eeb014f73f58dda11bed3ce5bd99d Mon Sep 17 00:00:00 2001
From: Felix Scherz <felixwscherz@gmail.com>
Date: Thu, 9 Jan 2025 09:01:26 +0100
Subject: [PATCH 57/57] test: mark create_table_transaction test wiht xfail

---
 tests/catalog/integration_test_s3tables.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/catalog/integration_test_s3tables.py b/tests/catalog/integration_test_s3tables.py
index e803b5a2b9..8cfcbe7b44 100644
--- a/tests/catalog/integration_test_s3tables.py
+++ b/tests/catalog/integration_test_s3tables.py
@@ -251,6 +251,7 @@ def test_commit_new_data_to_table(catalog: S3TablesCatalog, database_name: str,
     assert table.scan().to_arrow().num_rows == 2 * row_count
 
 
+@pytest.mark.xfail(raises=NotImplementedError, reason="create_table_transaction not implemented yet")
 def test_create_table_transaction(
     catalog: S3TablesCatalog, database_name: str, table_name: str, table_schema_nested: str
 ) -> None: