Skip to content

Commit

Permalink
[fix](arrow-flight-sql) Fix Doris NULL column conversion to arrow bat…
Browse files Browse the repository at this point in the history
…ch (apache#43929)

### What problem does this PR solve?

Problem Summary:

The representation of NULL columns in Doris is special, which is
`DataTypeNull<DataTypeNumber::Uint8>`. `Uint8` uses
`arrow::BooleanBuilder` when serializing into arrow batch, which does
not match the expected `arrow::NullBuilder`.

Fix:

```
*** Query id: fd32741526804c1e-bc016473fd8f3aa3 ***
*** is nereids: 1 ***
*** tablet id: 0 ***
*** Aborted at 1731327262 (unix time) try "date -d @1731327262" if you are using GNU date ***
*** Current BE git commitID: 653e315 ***
*** SIGSEGV address not mapped to object (@0x100000024) received by PID 1442863 (TID 1443456 OR 0x7f8b8cdea700) from PID 36; stack trace: ***
 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/common/signal_handler.h:421
 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so
 2# JVM_handle_linux_signal in /mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so
 3# 0x00007F8CA1F38B50 in /lib64/libc.so.6
 4# 0x000055FC45E5B2D3 in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be
 5# arrow::BooleanBuilder::AppendValues(unsigned char const*, long, unsigned char const*) in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be
 6# doris::vectorized::DataTypeNumberSerDe<unsigned char>::write_column_to_arrow(doris::vectorized::IColumn const&, doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 15ul, 16ul> const*, arrow::ArrayBuilder*, int, int, cctz::time_zone const&) const at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/data_types/serde/data_type_number_serde.cpp:86
 7# doris::FromBlockConverter::convert(std::shared_ptr<arrow::RecordBatch>*) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/util/arrow/block_convertor.cpp:390
 8# doris::convert_to_arrow_batch(doris::vectorized::Block const&, std::shared_ptr<arrow::Schema> const&, arrow::MemoryPool*, std::shared_ptr<arrow::RecordBatch>*, cctz::time_zone const&) in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be
 9# doris::vectorized::VArrowFlightResultWriter::write(doris::vectorized::Block&) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/varrow_flight_result_writer.cpp:76
10# doris::vectorized::VResultSink::send(doris::RuntimeState*, doris::vectorized::Block*, bool) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/vresult_sink.cpp:149
11# doris::PlanFragmentExecutor::open_vectorized_internal() at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:341
12# doris::PlanFragmentExecutor::open() at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:273
```
  • Loading branch information
xinyiZzz committed Nov 19, 2024
1 parent 8cdb3f2 commit 49ce106
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions be/src/vec/data_types/serde/data_type_number_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,21 @@ void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, const
auto arrow_null_map = revert_null_map(null_map, start, end);
auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : arrow_null_map.data();
if constexpr (std::is_same_v<T, UInt8>) {
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
checkArrowStatus(
builder.AppendValues(reinterpret_cast<const uint8_t*>(col_data.data() + start),
end - start,
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
column.get_name(), array_builder->type()->name());
auto* null_builder = dynamic_cast<arrow::NullBuilder*>(array_builder);
if (null_builder) {
for (size_t i = start; i < end; ++i) {
checkArrowStatus(null_builder->AppendNull(), column.get_name(),
null_builder->type()->name());
}
} else {
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
checkArrowStatus(
builder.AppendValues(reinterpret_cast<const uint8_t*>(col_data.data() + start),
end - start,
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
column.get_name(), array_builder->type()->name());
}

} else if constexpr (std::is_same_v<T, Int128>) {
auto& string_builder = assert_cast<arrow::StringBuilder&>(*array_builder);
for (size_t i = start; i < end; ++i) {
Expand Down

0 comments on commit 49ce106

Please sign in to comment.