From 736829dd55ca9d1f0c96705f0873a12476ebcfa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 7 Jan 2025 13:29:31 +0100 Subject: [PATCH 1/4] GH-45129: [Python][C++] Fix usage of deprecated C++ functionality on pyarrow --- python/pyarrow/_parquet.pxd | 8 +++----- python/pyarrow/_parquet.pyx | 8 ++++---- python/pyarrow/src/arrow/python/python_test.cc | 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 71e93ce0a4c51..c17c3b70d7f41 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -484,11 +484,9 @@ cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil: const vector[int]& column_indices, shared_ptr[CTable]* out) - CStatus GetRecordBatchReader(const vector[int]& row_group_indices, - const vector[int]& column_indices, - unique_ptr[CRecordBatchReader]* out) - CStatus GetRecordBatchReader(const vector[int]& row_group_indices, - unique_ptr[CRecordBatchReader]* out) + CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices, + const vector[int]& column_indices) + CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices) CStatus ReadTable(shared_ptr[CTable]* out) CStatus ReadTable(const vector[int]& column_indices, diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index a3abf1865b7b5..2fb1e41641f8e 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1616,16 +1616,16 @@ cdef class ParquetReader(_Weakrefable): for index in column_indices: c_column_indices.push_back(index) with nogil: - check_status( + recordbatchreader = GetResultValue( self.reader.get().GetRecordBatchReader( - c_row_groups, c_column_indices, &recordbatchreader + c_row_groups, c_column_indices ) ) else: with nogil: - check_status( + recordbatchreader = GetResultValue( self.reader.get().GetRecordBatchReader( - c_row_groups, &recordbatchreader + c_row_groups ) ) diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc index eea6bf9459d1f..f988f8da31cb1 100644 --- a/python/pyarrow/src/arrow/python/python_test.cc +++ b/python/pyarrow/src/arrow/python/python_test.cc @@ -663,7 +663,7 @@ Status TestDecimal128OverflowFails() { ASSERT_EQ(38, metadata.precision()); ASSERT_EQ(1, metadata.scale()); - auto type = ::arrow::decimal(38, 38); + auto type = ::arrow::smallest_decimal(38, 38); const auto& decimal_type = checked_cast(*type); ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value)); @@ -689,7 +689,7 @@ Status TestDecimal256OverflowFails() { ASSERT_EQ(76, metadata.precision()); ASSERT_EQ(1, metadata.scale()); - auto type = ::arrow::decimal(76, 76); + auto type = ::arrow::smallest_decimal(76, 76); const auto& decimal_type = checked_cast(*type); ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value)); From a38d756b8abe46bcf4090ce9bb47fb312311839f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 7 Jan 2025 15:41:43 +0100 Subject: [PATCH 2/4] We seem to require a shared_ptr insted of unique_ptr due to our implementation of SmartPtrNoGIL operator --- python/pyarrow/_parquet.pxd | 4 ++-- python/pyarrow/_parquet.pyx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index c17c3b70d7f41..416d73361f585 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -484,9 +484,9 @@ cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil: const vector[int]& column_indices, shared_ptr[CTable]* out) - CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices, + CResult[shared_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices, const vector[int]& column_indices) - CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices) + CResult[shared_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices) CStatus ReadTable(shared_ptr[CTable]* out) CStatus ReadTable(const vector[int]& column_indices, diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 2fb1e41641f8e..84be109c48a26 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1602,7 +1602,7 @@ cdef class ParquetReader(_Weakrefable): vector[int] c_row_groups vector[int] c_column_indices shared_ptr[CRecordBatch] record_batch - UniquePtrNoGIL[CRecordBatchReader] recordbatchreader + shared_ptr[CRecordBatchReader] recordbatchreader self.set_batch_size(batch_size) From 4b2e3d4e382d0fd42a4b8d708fd6ea731f8352dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 7 Jan 2025 16:28:39 +0100 Subject: [PATCH 3/4] This sould be SharedPtrNoGIL as we are usng no_gil --- python/pyarrow/_parquet.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 84be109c48a26..2770fb57aefd1 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1602,7 +1602,7 @@ cdef class ParquetReader(_Weakrefable): vector[int] c_row_groups vector[int] c_column_indices shared_ptr[CRecordBatch] record_batch - shared_ptr[CRecordBatchReader] recordbatchreader + SharedPtrNoGIL[CRecordBatchReader] recordbatchreader self.set_batch_size(batch_size) From 0a1d214724874f475c47d8e7e4f640c044e9ed47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 8 Jan 2025 10:44:47 +0100 Subject: [PATCH 4/4] Revert to unique_ptr as returned from C++ --- python/pyarrow/_parquet.pxd | 4 ++-- python/pyarrow/_parquet.pyx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 416d73361f585..c17c3b70d7f41 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -484,9 +484,9 @@ cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil: const vector[int]& column_indices, shared_ptr[CTable]* out) - CResult[shared_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices, + CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices, const vector[int]& column_indices) - CResult[shared_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices) + CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices) CStatus ReadTable(shared_ptr[CTable]* out) CStatus ReadTable(const vector[int]& column_indices, diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 2770fb57aefd1..2fb1e41641f8e 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1602,7 +1602,7 @@ cdef class ParquetReader(_Weakrefable): vector[int] c_row_groups vector[int] c_column_indices shared_ptr[CRecordBatch] record_batch - SharedPtrNoGIL[CRecordBatchReader] recordbatchreader + UniquePtrNoGIL[CRecordBatchReader] recordbatchreader self.set_batch_size(batch_size)