Skip to content

Commit

Permalink
Fix parquet endian issue for s390x
Browse files Browse the repository at this point in the history
  • Loading branch information
HarryLeeIBM authored and Avogar committed Sep 19, 2023
1 parent 9d9c464 commit e40416e
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 9 deletions.
2 changes: 1 addition & 1 deletion cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
if (data_size < 4) {
throw ParquetException("Received invalid levels (corrupt data page?)");
}
num_bytes = ::arrow::util::SafeLoadAs<int32_t>(data);
num_bytes = ::arrow::bit_util::ToLittleEndian(::arrow::util::SafeLoadAs<int32_t>(data));
if (num_bytes < 0 || num_bytes > data_size - 4) {
throw ParquetException("Received invalid number of bytes (corrupt data page?)");
}
Expand Down
23 changes: 17 additions & 6 deletions cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1028,7 +1028,7 @@ int PlainDecoder<DType>::DecodeArrow(
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
[&]() {
builder->UnsafeAppend(SafeLoadAs<value_type>(data_));
builder->UnsafeAppend(::arrow::bit_util::ToLittleEndian(SafeLoadAs<value_type>(data_)));
data_ += sizeof(value_type);
},
[&]() { builder->UnsafeAppendNull(); });
Expand All @@ -1055,7 +1055,8 @@ int PlainDecoder<DType>::DecodeArrow(
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
[&]() {
PARQUET_THROW_NOT_OK(builder->Append(SafeLoadAs<value_type>(data_)));
PARQUET_THROW_NOT_OK(
builder->Append(::arrow::bit_util::ToLittleEndian(SafeLoadAs<value_type>(data_))));
data_ += sizeof(value_type);
},
[&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); });
Expand All @@ -1075,7 +1076,17 @@ inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
}
// If bytes_to_decode == 0, data could be null
if (bytes_to_decode > 0) {
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
for (size_t i = 0; i < num_values; ++i)
{
memcpy(out + i, data + sizeof(T) * i, sizeof(T));
auto begin = reinterpret_cast<uint8_t*>(out + i);
auto end = begin + sizeof(T);
std::reverse(begin, end);
}
#else
memcpy(out, data, bytes_to_decode);
#endif
}
return static_cast<int>(bytes_to_decode);
}
Expand All @@ -1098,7 +1109,7 @@ static inline int64_t ReadByteArray(const uint8_t* data, int64_t data_size,
if (ARROW_PREDICT_FALSE(data_size < 4)) {
ParquetException::EofException();
}
const int32_t len = SafeLoadAs<int32_t>(data);
const int32_t len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs<int32_t>(data));
if (len < 0) {
throw ParquetException("Invalid BYTE_ARRAY value");
}
Expand Down Expand Up @@ -1387,7 +1398,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
if (ARROW_PREDICT_FALSE(len_ < 4)) {
ParquetException::EofException();
}
auto value_len = SafeLoadAs<int32_t>(data_);
auto value_len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs<int32_t>(data_));
if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) {
return Status::Invalid("Invalid or corrupted value_len '", value_len, "'");
}
Expand Down Expand Up @@ -1433,7 +1444,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
if (ARROW_PREDICT_FALSE(len_ < 4)) {
ParquetException::EofException();
}
auto value_len = SafeLoadAs<int32_t>(data_);
auto value_len = ::arrow::bit_util::ToLittleEndian(SafeLoadAs<int32_t>(data_));
if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) {
return Status::Invalid("Invalid or corrupted value_len '", value_len, "'");
}
Expand Down Expand Up @@ -3312,7 +3323,7 @@ int ByteStreamSplitDecoder<DType>::DecodeArrow(
const size_t byte_index = b * num_values_in_buffer_ + offset;
gathered_byte_data[b] = data[byte_index];
}
builder->UnsafeAppend(SafeLoadAs<T>(&gathered_byte_data[0]));
builder->UnsafeAppend(::arrow::bit_util::ToLittleEndian(SafeLoadAs<T>(&gathered_byte_data[0])));
++offset;
},
[&]() { builder->UnsafeAppendNull(); });
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/file_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -473,9 +473,9 @@ class SerializedFile : public ParquetFileReader::Contents {
"is not a parquet file.");
}
// Both encrypted/unencrypted footers have the same footer length check.
uint32_t metadata_len = ::arrow::util::SafeLoadAs<uint32_t>(
uint32_t metadata_len = ::arrow::bit_util::FromLittleEndian(::arrow::util::SafeLoadAs<uint32_t>(
reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
kFooterSize);
kFooterSize));
if (metadata_len > source_size_ - kFooterSize) {
throw ParquetInvalidOrCorruptedFileException(
"Parquet file size is ", source_size_,
Expand Down

0 comments on commit e40416e

Please sign in to comment.