Skip to content

Commit

Permalink
Don't throw in ~DictEncoderImpl()
Browse files Browse the repository at this point in the history
  • Loading branch information
Avogar committed Mar 21, 2024
1 parent ba5c679 commit 12232bb
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,12 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
dict_encoded_size_(0),
memo_table_(pool, kInitialHashTableSize) {}

~DictEncoderImpl() override { DCHECK(buffered_indices_.empty()); }
~DictEncoderImpl() override {
/// Indices could not be written in case of an exception in writing.
if (!buffered_indices_.empty()) {
ClearIndices();
}
}

int dict_encoded_size() const override { return dict_encoded_size_; }

Expand Down Expand Up @@ -1028,7 +1033,7 @@ int PlainDecoder<DType>::DecodeArrow(
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
[&]() {
builder->UnsafeAppend(SafeLoadAs<value_type>(data_));
builder->UnsafeAppend(::arrow::bit_util::ToLittleEndian(SafeLoadAs<value_type>(data_)));
data_ += sizeof(value_type);
},
[&]() { builder->UnsafeAppendNull(); });
Expand All @@ -1055,7 +1060,8 @@ int PlainDecoder<DType>::DecodeArrow(
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
[&]() {
PARQUET_THROW_NOT_OK(builder->Append(SafeLoadAs<value_type>(data_)));
PARQUET_THROW_NOT_OK(
builder->Append(::arrow::bit_util::ToLittleEndian(SafeLoadAs<value_type>(data_))));
data_ += sizeof(value_type);
},
[&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); });
Expand All @@ -1075,7 +1081,17 @@ inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
}
// If bytes_to_decode == 0, data could be null
if (bytes_to_decode > 0) {
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
for (size_t i = 0; i < num_values; ++i)
{
memcpy(out + i, data + sizeof(T) * i, sizeof(T));
auto begin = reinterpret_cast<uint8_t*>(out + i);
auto end = begin + sizeof(T);
std::reverse(begin, end);
}
#else
memcpy(out, data, bytes_to_decode);
#endif
}
return static_cast<int>(bytes_to_decode);
}
Expand All @@ -1098,7 +1114,7 @@ static inline int64_t ReadByteArray(const uint8_t* data, int64_t data_size,
if (ARROW_PREDICT_FALSE(data_size < 4)) {
ParquetException::EofException();
}
const int32_t len = SafeLoadAs<int32_t>(data);
const int32_t len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs<int32_t>(data));
if (len < 0) {
throw ParquetException("Invalid BYTE_ARRAY value");
}
Expand Down Expand Up @@ -1387,7 +1403,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
if (ARROW_PREDICT_FALSE(len_ < 4)) {
ParquetException::EofException();
}
auto value_len = SafeLoadAs<int32_t>(data_);
auto value_len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs<int32_t>(data_));
if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) {
return Status::Invalid("Invalid or corrupted value_len '", value_len, "'");
}
Expand Down Expand Up @@ -1433,7 +1449,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
if (ARROW_PREDICT_FALSE(len_ < 4)) {
ParquetException::EofException();
}
auto value_len = SafeLoadAs<int32_t>(data_);
auto value_len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs<int32_t>(data_));
if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) {
return Status::Invalid("Invalid or corrupted value_len '", value_len, "'");
}
Expand Down Expand Up @@ -2580,7 +2596,7 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
T min_delta_;
uint32_t mini_block_idx_;
std::shared_ptr<ResizableBuffer> delta_bit_widths_;
int delta_bit_width_ = 0;
int delta_bit_width_;

T last_value_;
};
Expand Down Expand Up @@ -3312,7 +3328,7 @@ int ByteStreamSplitDecoder<DType>::DecodeArrow(
const size_t byte_index = b * num_values_in_buffer_ + offset;
gathered_byte_data[b] = data[byte_index];
}
builder->UnsafeAppend(SafeLoadAs<T>(&gathered_byte_data[0]));
builder->UnsafeAppend(::arrow::bit_util::ToLittleEndian(SafeLoadAs<T>(&gathered_byte_data[0])));
++offset;
},
[&]() { builder->UnsafeAppendNull(); });
Expand Down

0 comments on commit 12232bb

Please sign in to comment.