Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
sungwy committed Jun 14, 2024
1 parent 79cf181 commit 905cc7a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 9 deletions.
6 changes: 3 additions & 3 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1918,9 +1918,9 @@ def write_parquet(task: WriteTask) -> DataFile:
file_schema = table_schema

batches = [
to_requested_schema(requested_schema=file_schema, file_schema=table_schema, batch=batch)
for batch in task.record_batches
]
to_requested_schema(requested_schema=file_schema, file_schema=table_schema, batch=batch)
for batch in task.record_batches
]
arrow_table = pa.Table.from_batches(batches)
file_path = f'{table_metadata.location}/data/{task.generate_data_file_path("parquet")}'
fo = io.new_output(file_path)
Expand Down
10 changes: 4 additions & 6 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1781,10 +1781,11 @@ def to_arrow(self) -> pa.Table:
)

def to_arrow_batch_reader(self) -> pa.RecordBatchReader:
from pyiceberg.io.pyarrow import project_batches, schema_to_pyarrow
import pyarrow as pa

reader = pa.RecordBatchReader.from_batches(

from pyiceberg.io.pyarrow import project_batches, schema_to_pyarrow

return pa.RecordBatchReader.from_batches(
schema_to_pyarrow(self.projection()),
project_batches(
self.plan_files(),
Expand All @@ -1796,9 +1797,6 @@ def to_arrow_batch_reader(self) -> pa.RecordBatchReader:
limit=self.limit,
),
)
# Cast the reader to its projected schema its projected schema for consistency
# https://github.com/apache/iceberg-python/issues/791
return reader.cast(reader.schema)

def to_pandas(self, **kwargs: Any) -> pd.DataFrame:
return self.to_arrow().to_pandas(**kwargs)
Expand Down

0 comments on commit 905cc7a

Please sign in to comment.