diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index ca1f49a67f067f6..e66e016381e83b4 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -389,7 +389,8 @@ void ColumnArray::insert_from(const IColumn& src_, size_t n) { if (!get_data().is_nullable() && src.get_data().is_nullable()) { // Note: we can't process the case of 'Array(Nullable(nest))' - DCHECK(false); + throw Exception(ErrorCode::INTERNAL_ERROR, "insert '{}' into '{}'", src.get_name(), + get_name()); } else if (get_data().is_nullable() && !src.get_data().is_nullable()) { // Note: here we should process the case of 'Array(NotNullable(nest))' reinterpret_cast(&get_data()) diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 40d74b18ed45093..57d16f0ed6808be 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -22,7 +22,6 @@ #include #include -#include #include "util/memcpy_inlined.h" #include "util/simd/bits.h" @@ -134,34 +133,38 @@ void ColumnStr::insert_range_from(const IColumn& src, size_t start, size_t le if (length == 0) { return; } + auto do_insert = [&](const auto& src_concrete) { + const auto& src_offset = src_concrete.get_offsets(); + const auto& src_chars = src_concrete.get_chars(); + if (start + length > src_offset.size()) { + throw doris::Exception( + doris::ErrorCode::INTERNAL_ERROR, + "Parameter out of bound in IColumnStr::insert_range_from method."); + } + size_t nested_offset = src_offset[start - 1]; + size_t nested_length = src_offset[start + length - 1] - nested_offset; - const auto& src_concrete = assert_cast&>(src); - - if (start + length > src_concrete.offsets.size()) { - throw doris::Exception( - doris::ErrorCode::INTERNAL_ERROR, - "Parameter out of bound in IColumnStr::insert_range_from method."); - } - - size_t nested_offset = src_concrete.offset_at(start); - size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset; - - size_t old_chars_size = chars.size(); - check_chars_length(old_chars_size + nested_length, offsets.size() + length); - chars.resize(old_chars_size + nested_length); - memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length); + size_t old_chars_size = chars.size(); + check_chars_length(old_chars_size + nested_length, offsets.size() + length); + chars.resize(old_chars_size + nested_length); + memcpy(&chars[old_chars_size], &src_chars[nested_offset], nested_length); - if (start == 0 && offsets.empty()) { - offsets.assign(src_concrete.offsets.begin(), src_concrete.offsets.begin() + length); - } else { - size_t old_size = offsets.size(); - size_t prev_max_offset = offsets.back(); /// -1th index is Ok, see PaddedPODArray - offsets.resize(old_size + length); + if (start == 0 && offsets.empty()) { + offsets.assign(src_offset.begin(), src_offset.begin() + length); + } else { + size_t old_size = offsets.size(); + size_t prev_max_offset = offsets.back(); /// -1th index is Ok, see PaddedPODArray + offsets.resize(old_size + length); - for (size_t i = 0; i < length; ++i) { - offsets[old_size + i] = - src_concrete.offsets[start + i] - nested_offset + prev_max_offset; + for (size_t i = 0; i < length; ++i) { + offsets[old_size + i] = src_offset[start + i] - nested_offset + prev_max_offset; + } } + }; + if (src.is_column_string64()) { + do_insert(assert_cast&>(src)); + } else { + do_insert(assert_cast&>(src)); } } @@ -602,6 +605,7 @@ void ColumnStr::compare_internal(size_t rhs_row_id, const IColumn& rhs, int n template ColumnPtr ColumnStr::convert_column_if_overflow() { if (std::is_same_v && chars.size() > config::string_overflow_size) { + auto total_chars_size = chars.size(); auto new_col = ColumnStr::create(); const auto length = offsets.size(); @@ -614,10 +618,12 @@ ColumnPtr ColumnStr::convert_column_if_overflow() { // if offset overflow. will be lower than offsets[loc - 1] while (offsets[loc] >= offsets[loc - 1] && loc < length) { large_offsets[loc] = offsets[loc]; + DCHECK(large_offsets[loc] <= total_chars_size); loc++; } while (loc < length) { large_offsets[loc] = (offsets[loc] - offsets[loc - 1]) + large_offsets[loc - 1]; + DCHECK(large_offsets[loc] <= total_chars_size); loc++; }