diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index 98784450b3cce..576bd0dada596 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -222,6 +222,11 @@ class ORCFileReader::Impl { return Init(); } + virtual liborc::Reader* GetRawORCReader() { + return reader_.get(); + } + + Status Init() { int64_t nstripes = reader_->getNumberOfStripes(); stripes_.resize(static_cast(nstripes)); @@ -548,6 +553,10 @@ class ORCFileReader::Impl { return NextStripeReader(batch_size, empty_vec); } + liborc::Reader* ORCFileReader::GetRawORCReader() { + return impl_->GetRawORCReader(); + } + private: MemoryPool* pool_; std::unique_ptr reader_; @@ -559,6 +568,15 @@ ORCFileReader::ORCFileReader() { impl_.reset(new ORCFileReader::Impl()); } ORCFileReader::~ORCFileReader() {} +liborc::Reader* ORCFileReader::GetRawORCReader() { + return impl_->GetRawORCReader(); +} + +Status ORCFileReader::Open(const std::shared_ptr& file, + MemoryPool* pool, std::unique_ptr* reader) { + return Open(file, pool).Value(reader); +} + Result> ORCFileReader::Open( const std::shared_ptr& file, MemoryPool* pool) { #ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK diff --git a/cpp/src/arrow/adapters/orc/adapter.h b/cpp/src/arrow/adapters/orc/adapter.h index 4ffff81f355f1..6adb3be5ec5b3 100644 --- a/cpp/src/arrow/adapters/orc/adapter.h +++ b/cpp/src/arrow/adapters/orc/adapter.h @@ -30,6 +30,7 @@ #include "arrow/type_fwd.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" +#include "arrow/adapters/orc/adapter_util.h" namespace arrow { namespace adapters { @@ -53,6 +54,19 @@ class ARROW_EXPORT ORCFileReader { public: ~ORCFileReader(); + /// \brief Creates a new ORC reader. + /// + /// \param[in] file the data source + /// \param[in] pool a MemoryPool to use for buffer allocations + /// \param[out] reader the returned reader object + /// \return Status + ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload instead.") + static Status Open(const std::shared_ptr& file, MemoryPool* pool, + std::unique_ptr* reader); + + /// \brief Get ORC reader from inside. + liborc::Reader* GetRawORCReader(); + /// \brief Creates a new ORC reader /// /// \param[in] file the data source diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index d272c78560f82..a34517c660108 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1369,6 +1369,23 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { return total; } + Result RecordBatchCountRows(int i) override { + DCHECK_GE(i, 0); + DCHECK_LT(i, num_record_batches()); + ARROW_ASSIGN_OR_RAISE(auto outer_message, + ReadMessageFromBlock(GetRecordBatchBlock(i))); + auto metadata = outer_message->metadata(); + const flatbuf::Message* message = nullptr; + RETURN_NOT_OK( + internal::VerifyMessage(metadata->data(), metadata->size(), &message)); + auto batch = message->header_as_RecordBatch(); + if (batch == nullptr) { + return Status::IOError( + "Header-type of flatbuffer-encoded Message is not RecordBatch."); + } + return batch->length(); + } + Status Open(const std::shared_ptr& file, int64_t footer_offset, const IpcReadOptions& options) { owned_file_ = file; diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h index 888f59a627771..2e876d65f567f 100644 --- a/cpp/src/arrow/ipc/reader.h +++ b/cpp/src/arrow/ipc/reader.h @@ -203,6 +203,8 @@ class ARROW_EXPORT RecordBatchFileReader /// \brief Computes the total number of rows in the file. virtual Result CountRows() = 0; + virtual Result RecordBatchCountRows(int i) = 0; + /// \brief Begin loading metadata for the desired batches into memory. /// /// This method will also begin loading all dictionaries messages into memory. diff --git a/cpp/src/arrow/util/bit_stream_utils.h b/cpp/src/arrow/util/bit_stream_utils.h index 811694e43b76c..e5a10e48ecc3e 100644 --- a/cpp/src/arrow/util/bit_stream_utils.h +++ b/cpp/src/arrow/util/bit_stream_utils.h @@ -272,6 +272,11 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer, #pragma warning(push) #pragma warning(disable : 4800) #endif + if (ARROW_PREDICT_FALSE(*bit_offset >= 64)) { + auto msg = std::string("invalid bit offset: ") + std::to_string(*bit_offset); + msg += ", may be malformed num_bits: " + std::to_string(num_bits); + throw std::runtime_error(msg); + } *v = static_cast(bit_util::TrailingBits(*buffered_values, *bit_offset + num_bits) >> *bit_offset); #ifdef _MSC_VER diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 05221568c8fa0..5b567989adf6b 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -2721,7 +2721,7 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder delta_bit_widths_; - int delta_bit_width_; + int delta_bit_width_ = 0; T last_value_; };