From 49ce106d80cb649f1a8f7d49721735a34c4ca371 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Tue, 19 Nov 2024 10:54:42 +0800 Subject: [PATCH] [fix](arrow-flight-sql) Fix Doris NULL column conversion to arrow batch (#43929) ### What problem does this PR solve? Problem Summary: The representation of NULL columns in Doris is special, which is `DataTypeNull`. `Uint8` uses `arrow::BooleanBuilder` when serializing into arrow batch, which does not match the expected `arrow::NullBuilder`. Fix: ``` *** Query id: fd32741526804c1e-bc016473fd8f3aa3 *** *** is nereids: 1 *** *** tablet id: 0 *** *** Aborted at 1731327262 (unix time) try "date -d @1731327262" if you are using GNU date *** *** Current BE git commitID: 653e315ba5 *** *** SIGSEGV address not mapped to object (@0x100000024) received by PID 1442863 (TID 1443456 OR 0x7f8b8cdea700) from PID 36; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so 2# JVM_handle_linux_signal in /mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so 3# 0x00007F8CA1F38B50 in /lib64/libc.so.6 4# 0x000055FC45E5B2D3 in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be 5# arrow::BooleanBuilder::AppendValues(unsigned char const*, long, unsigned char const*) in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be 6# doris::vectorized::DataTypeNumberSerDe::write_column_to_arrow(doris::vectorized::IColumn const&, doris::vectorized::PODArray, 15ul, 16ul> const*, arrow::ArrayBuilder*, int, int, cctz::time_zone const&) const at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/data_types/serde/data_type_number_serde.cpp:86 7# doris::FromBlockConverter::convert(std::shared_ptr*) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/util/arrow/block_convertor.cpp:390 8# doris::convert_to_arrow_batch(doris::vectorized::Block const&, std::shared_ptr const&, arrow::MemoryPool*, std::shared_ptr*, cctz::time_zone const&) in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be 9# doris::vectorized::VArrowFlightResultWriter::write(doris::vectorized::Block&) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/varrow_flight_result_writer.cpp:76 10# doris::vectorized::VResultSink::send(doris::RuntimeState*, doris::vectorized::Block*, bool) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/vresult_sink.cpp:149 11# doris::PlanFragmentExecutor::open_vectorized_internal() at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:341 12# doris::PlanFragmentExecutor::open() at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:273 ``` --- .../serde/data_type_number_serde.cpp | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index efa41e346bfa6e..f4fb6bbbb1f9cf 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -78,12 +78,21 @@ void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const auto arrow_null_map = revert_null_map(null_map, start, end); auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : arrow_null_map.data(); if constexpr (std::is_same_v) { - ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); - checkArrowStatus( - builder.AppendValues(reinterpret_cast(col_data.data() + start), - end - start, - reinterpret_cast(arrow_null_map_data)), - column.get_name(), array_builder->type()->name()); + auto* null_builder = dynamic_cast(array_builder); + if (null_builder) { + for (size_t i = start; i < end; ++i) { + checkArrowStatus(null_builder->AppendNull(), column.get_name(), + null_builder->type()->name()); + } + } else { + ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); + checkArrowStatus( + builder.AppendValues(reinterpret_cast(col_data.data() + start), + end - start, + reinterpret_cast(arrow_null_map_data)), + column.get_name(), array_builder->type()->name()); + } + } else if constexpr (std::is_same_v) { auto& string_builder = assert_cast(*array_builder); for (size_t i = start; i < end; ++i) {