Skip to content

Commit

Permalink
[fix](string64) fix coredump caused by ColumnArray<ColumnStr<uint64_t…
Browse files Browse the repository at this point in the history
…>>::insert_indices_from
  • Loading branch information
jacktengg committed Nov 11, 2024
1 parent af565ba commit 2a7efa5
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 26 deletions.
3 changes: 2 additions & 1 deletion be/src/vec/columns/column_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,8 @@ void ColumnArray::insert_from(const IColumn& src_, size_t n) {

if (!get_data().is_nullable() && src.get_data().is_nullable()) {
// Note: we can't process the case of 'Array(Nullable(nest))'
DCHECK(false);
throw Exception(ErrorCode::INTERNAL_ERROR, "insert '{}' into '{}'", src.get_name(),
get_name());
} else if (get_data().is_nullable() && !src.get_data().is_nullable()) {
// Note: here we should process the case of 'Array(NotNullable(nest))'
reinterpret_cast<ColumnNullable*>(&get_data())
Expand Down
56 changes: 31 additions & 25 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <ostream>

#include "util/memcpy_inlined.h"
#include "util/simd/bits.h"
Expand Down Expand Up @@ -134,34 +133,38 @@ void ColumnStr<T>::insert_range_from(const IColumn& src, size_t start, size_t le
if (length == 0) {
return;
}
auto do_insert = [&](const auto& src_concrete) {
const auto& src_offset = src_concrete.get_offsets();
const auto& src_chars = src_concrete.get_chars();
if (start + length > src_offset.size()) {
throw doris::Exception(
doris::ErrorCode::INTERNAL_ERROR,
"Parameter out of bound in IColumnStr<T>::insert_range_from method.");
}
size_t nested_offset = src_offset[start - 1];
size_t nested_length = src_offset[start + length - 1] - nested_offset;

const auto& src_concrete = assert_cast<const ColumnStr<T>&>(src);

if (start + length > src_concrete.offsets.size()) {
throw doris::Exception(
doris::ErrorCode::INTERNAL_ERROR,
"Parameter out of bound in IColumnStr<T>::insert_range_from method.");
}

size_t nested_offset = src_concrete.offset_at(start);
size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset;

size_t old_chars_size = chars.size();
check_chars_length(old_chars_size + nested_length, offsets.size() + length);
chars.resize(old_chars_size + nested_length);
memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length);
size_t old_chars_size = chars.size();
check_chars_length(old_chars_size + nested_length, offsets.size() + length);
chars.resize(old_chars_size + nested_length);
memcpy(&chars[old_chars_size], &src_chars[nested_offset], nested_length);

if (start == 0 && offsets.empty()) {
offsets.assign(src_concrete.offsets.begin(), src_concrete.offsets.begin() + length);
} else {
size_t old_size = offsets.size();
size_t prev_max_offset = offsets.back(); /// -1th index is Ok, see PaddedPODArray
offsets.resize(old_size + length);
if (start == 0 && offsets.empty()) {
offsets.assign(src_offset.begin(), src_offset.begin() + length);
} else {
size_t old_size = offsets.size();
size_t prev_max_offset = offsets.back(); /// -1th index is Ok, see PaddedPODArray
offsets.resize(old_size + length);

for (size_t i = 0; i < length; ++i) {
offsets[old_size + i] =
src_concrete.offsets[start + i] - nested_offset + prev_max_offset;
for (size_t i = 0; i < length; ++i) {
offsets[old_size + i] = src_offset[start + i] - nested_offset + prev_max_offset;
}
}
};
if (src.is_column_string64()) {
do_insert(assert_cast<const ColumnStr<uint64_t>&>(src));
} else {
do_insert(assert_cast<const ColumnStr<uint32_t>&>(src));
}
}

Expand Down Expand Up @@ -602,6 +605,7 @@ void ColumnStr<T>::compare_internal(size_t rhs_row_id, const IColumn& rhs, int n
template <typename T>
ColumnPtr ColumnStr<T>::convert_column_if_overflow() {
if (std::is_same_v<T, UInt32> && chars.size() > config::string_overflow_size) {
auto total_chars_size = chars.size();
auto new_col = ColumnStr<uint64_t>::create();

const auto length = offsets.size();
Expand All @@ -614,10 +618,12 @@ ColumnPtr ColumnStr<T>::convert_column_if_overflow() {
// if offset overflow. will be lower than offsets[loc - 1]
while (offsets[loc] >= offsets[loc - 1] && loc < length) {
large_offsets[loc] = offsets[loc];
DCHECK(large_offsets[loc] <= total_chars_size);
loc++;
}
while (loc < length) {
large_offsets[loc] = (offsets[loc] - offsets[loc - 1]) + large_offsets[loc - 1];
DCHECK(large_offsets[loc] <= total_chars_size);
loc++;
}

Expand Down

0 comments on commit 2a7efa5

Please sign in to comment.