merge main

apache · Jun 14, 2024 · 905cc7a · 905cc7a
1 parent 79cf181
commit 905cc7a
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 9 deletions.
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -1918,9 +1918,9 @@ def write_parquet(task: WriteTask) -> DataFile:
             file_schema = table_schema
 
         batches = [
-                to_requested_schema(requested_schema=file_schema, file_schema=table_schema, batch=batch)
-                for batch in task.record_batches
-            ]
+            to_requested_schema(requested_schema=file_schema, file_schema=table_schema, batch=batch)
+            for batch in task.record_batches
+        ]
         arrow_table = pa.Table.from_batches(batches)
         file_path = f'{table_metadata.location}/data/{task.generate_data_file_path("parquet")}'
         fo = io.new_output(file_path)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -1781,10 +1781,11 @@ def to_arrow(self) -> pa.Table:
         )
 
     def to_arrow_batch_reader(self) -> pa.RecordBatchReader:
-        from pyiceberg.io.pyarrow import project_batches, schema_to_pyarrow
         import pyarrow as pa
-
-        reader = pa.RecordBatchReader.from_batches(
+
+        from pyiceberg.io.pyarrow import project_batches, schema_to_pyarrow
+
+        return pa.RecordBatchReader.from_batches(
             schema_to_pyarrow(self.projection()),
             project_batches(
                 self.plan_files(),
@@ -1796,9 +1797,6 @@ def to_arrow_batch_reader(self) -> pa.RecordBatchReader:
                 limit=self.limit,
             ),
         )
-        # Cast the reader to its projected schema its projected schema for consistency
-        # https://github.com/apache/iceberg-python/issues/791
-        return reader.cast(reader.schema)
 
     def to_pandas(self, **kwargs: Any) -> pd.DataFrame:
         return self.to_arrow().to_pandas(**kwargs)