diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index e7e824896b191..b6549a0e6ea41 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -499,7 +499,12 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { dict_encoded_size_(0), memo_table_(pool, kInitialHashTableSize) {} - ~DictEncoderImpl() override { DCHECK(buffered_indices_.empty()); } + ~DictEncoderImpl() override { + /// Indices could not be written in case of an exception in writing. + if (!buffered_indices_.empty()) { + ClearIndices(); + } + } int dict_encoded_size() const override { return dict_encoded_size_; } @@ -1028,7 +1033,7 @@ int PlainDecoder::DecodeArrow( VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, [&]() { - builder->UnsafeAppend(SafeLoadAs(data_)); + builder->UnsafeAppend(::arrow::bit_util::ToLittleEndian(SafeLoadAs(data_))); data_ += sizeof(value_type); }, [&]() { builder->UnsafeAppendNull(); }); @@ -1055,7 +1060,8 @@ int PlainDecoder::DecodeArrow( VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, [&]() { - PARQUET_THROW_NOT_OK(builder->Append(SafeLoadAs(data_))); + PARQUET_THROW_NOT_OK( + builder->Append(::arrow::bit_util::ToLittleEndian(SafeLoadAs(data_)))); data_ += sizeof(value_type); }, [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); }); @@ -1075,7 +1081,17 @@ inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values, } // If bytes_to_decode == 0, data could be null if (bytes_to_decode > 0) { +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + for (size_t i = 0; i < num_values; ++i) + { + memcpy(out + i, data + sizeof(T) * i, sizeof(T)); + auto begin = reinterpret_cast(out + i); + auto end = begin + sizeof(T); + std::reverse(begin, end); + } +#else memcpy(out, data, bytes_to_decode); +#endif } return static_cast(bytes_to_decode); } @@ -1098,7 +1114,7 @@ static inline int64_t ReadByteArray(const uint8_t* data, int64_t data_size, if (ARROW_PREDICT_FALSE(data_size < 4)) { ParquetException::EofException(); } - const int32_t len = SafeLoadAs(data); + const int32_t len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs(data)); if (len < 0) { throw ParquetException("Invalid BYTE_ARRAY value"); } @@ -1387,7 +1403,7 @@ class PlainByteArrayDecoder : public PlainDecoder, if (ARROW_PREDICT_FALSE(len_ < 4)) { ParquetException::EofException(); } - auto value_len = SafeLoadAs(data_); + auto value_len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs(data_)); if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) { return Status::Invalid("Invalid or corrupted value_len '", value_len, "'"); } @@ -1433,7 +1449,7 @@ class PlainByteArrayDecoder : public PlainDecoder, if (ARROW_PREDICT_FALSE(len_ < 4)) { ParquetException::EofException(); } - auto value_len = SafeLoadAs(data_); + auto value_len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs(data_)); if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) { return Status::Invalid("Invalid or corrupted value_len '", value_len, "'"); } @@ -2580,7 +2596,7 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder delta_bit_widths_; - int delta_bit_width_ = 0; + int delta_bit_width_; T last_value_; }; @@ -3312,7 +3328,7 @@ int ByteStreamSplitDecoder::DecodeArrow( const size_t byte_index = b * num_values_in_buffer_ + offset; gathered_byte_data[b] = data[byte_index]; } - builder->UnsafeAppend(SafeLoadAs(&gathered_byte_data[0])); + builder->UnsafeAppend(::arrow::bit_util::ToLittleEndian(SafeLoadAs(&gathered_byte_data[0]))); ++offset; }, [&]() { builder->UnsafeAppendNull(); });