use schema_to_pyarrow directly for backporting

apache · Mar 29, 2024 · 93b61dd · 93b61dd
1 parent 56899e6
commit 93b61dd
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 5 deletions.
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -1731,7 +1731,7 @@ def write_file(table: Table, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
     parquet_writer_kwargs = _get_parquet_writer_kwargs(table.properties)
 
     file_path = f'{table.location()}/data/{task.generate_data_file_filename("parquet")}'
-    file_schema = table.schema().as_arrow()
+    file_schema = schema_to_pyarrow(table.schema())
 
     fo = table.io.new_output(file_path)
     row_group_size = PropertyUtil.property_as_int(

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -1053,10 +1053,12 @@ def append(self, df: pa.Table) -> None:
         if len(self.spec().fields) > 0:
             raise ValueError("Cannot write to partitioned tables")
 
+        from pyiceberg.io.pyarrow import schema_to_pyarrow
+
         _check_schema_compatible(self.schema(), other_schema=df.schema)
         # cast if the two schemas are compatible but not equal
-        if self.schema().as_arrow() != df.schema:
-            df = df.cast(self.schema().as_arrow())
+        if schema_to_pyarrow(self.schema()) != df.schema:
+            df = df.cast(schema_to_pyarrow(self.schema()))
 
         merge = _MergingSnapshotProducer(operation=Operation.APPEND, table=self)
 
@@ -1091,10 +1093,12 @@ def overwrite(self, df: pa.Table, overwrite_filter: BooleanExpression = ALWAYS_T
         if len(self.spec().fields) > 0:
             raise ValueError("Cannot write to partitioned tables")
 
+        from pyiceberg.io.pyarrow import schema_to_pyarrow
+
         _check_schema_compatible(self.schema(), other_schema=df.schema)
         # cast if the two schemas are compatible but not equal
-        if self.schema().as_arrow() != df.schema:
-            df = df.cast(self.schema().as_arrow())
+        if schema_to_pyarrow(self.schema()) != df.schema:
+            df = df.cast(schema_to_pyarrow(self.schema()))
 
         merge = _MergingSnapshotProducer(
             operation=Operation.OVERWRITE if self.current_snapshot() is not None else Operation.APPEND,