Skip to content

Commit

Permalink
[Refactor](Variant) should not call finalize in const functions
Browse files Browse the repository at this point in the history
`finalize` may change the inner data structure in ColumnObject, and lead to heap use after free in some scenario
  • Loading branch information
eldenmoon committed Jul 15, 2024
1 parent 43895d7 commit 9cdc963
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 43 deletions.
105 changes: 64 additions & 41 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,26 +791,58 @@ void ColumnObject::insert_default() {
++num_rows;
}

Field ColumnObject::operator[](size_t n) const {
if (!is_finalized()) {
const_cast<ColumnObject*>(this)->finalize();
}
VariantMap map;
for (const auto& entry : subcolumns) {
if (WhichDataType(remove_nullable(entry->data.data_types.back())).is_json()) {
void ColumnObject::Subcolumn::get(size_t n, Field& res) const {
if (is_finalized()) {
if (least_common_type.get_base_type_id() == TypeIndex::JSONB) {
// JsonbFiled is special case
Field f = JsonbField();
(*entry->data.data.back()).get(n, f);
map[entry->path.get_path()] = std::move(f);
continue;
res = JsonbField();
}
get_finalized_column().get(n, res);
return;
}

size_t ind = n;
if (ind < num_of_defaults_in_prefix) {
if (least_common_type.get_base_type_id() == TypeIndex::Nothing) {
res = Null();
return;
}
res = least_common_type.get()->get_default();
return;
}

ind -= num_of_defaults_in_prefix;
for (const auto& part : data) {
if (ind < part->size()) {
part->get(ind, res);
Field new_field;
convert_field_to_type(res, *least_common_type.get(), &new_field);
res = new_field;
return;
}
map[entry->path.get_path()] = (*entry->data.data.back())[n];

ind -= part->size();
}
return map;

throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range",
n);
}

Field ColumnObject::operator[](size_t n) const {
Field object;
get(n, object);
return object;
}

void ColumnObject::get(size_t n, Field& res) const {
res = (*this)[n];
assert(n < size());
res = VariantMap();
auto& object = res.get<VariantMap&>();

for (const auto& entry : subcolumns) {
auto it = object.try_emplace(entry->path.get_path()).first;
entry->data.get(n, it->second);
}
}

Status ColumnObject::try_insert_indices_from(const IColumn& src, const int* indices_begin,
Expand Down Expand Up @@ -1380,7 +1412,10 @@ void ColumnObject::strip_outer_array() {

ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const {
if (!is_finalized()) {
const_cast<ColumnObject*>(this)->finalize();
auto finalized = clone_finalized();
auto& finalized_object = assert_cast<ColumnObject&>(*finalized);
return finalized_object.apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.filter(filter, count); });
}
auto new_column = ColumnObject::create(true, false);
for (auto& entry : subcolumns) {
Expand Down Expand Up @@ -1545,36 +1580,34 @@ void ColumnObject::insert_indices_from(const IColumn& src, const uint32_t* indic
}
}

// finalize has no side effect and can be safely used in const functions
#define ENSURE_FINALIZED() \
if (!is_finalized()) { \
const_cast<ColumnObject*>(this)->finalize(); \
void ColumnObject::for_each_imutable_subcolumn(ImutableColumnCallback callback) const {
if (!is_finalized()) {
auto finalized = clone_finalized();
auto& finalized_object = assert_cast<ColumnObject&>(*finalized);
finalized_object.for_each_imutable_subcolumn(callback);
return;
}
for (const auto& entry : subcolumns) {
for (auto& part : entry->data.data) {
callback(*part);
}
}
}

void ColumnObject::update_hash_with_value(size_t n, SipHash& hash) const {
ENSURE_FINALIZED();
for_each_imutable_subcolumn([&](const auto& subcolumn) {
if (n >= subcolumn.size()) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"greater than column size {}, sub_column_info:{}, total lines "
"of this column:{}",
subcolumn.size(), subcolumn.dump_structure(), num_rows);
}
return subcolumn.update_hash_with_value(n, hash);
});
for_each_imutable_subcolumn(
[&](const auto& subcolumn) { return subcolumn.update_hash_with_value(n, hash); });
}

void ColumnObject::update_hashes_with_value(uint64_t* __restrict hashes,
const uint8_t* __restrict null_data) const {
ENSURE_FINALIZED();
for_each_imutable_subcolumn([&](const auto& subcolumn) {
return subcolumn.update_hashes_with_value(hashes, nullptr);
});
}

void ColumnObject::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash,
const uint8_t* __restrict null_data) const {
ENSURE_FINALIZED();
for_each_imutable_subcolumn([&](const auto& subcolumn) {
return subcolumn.update_xxHash_with_value(start, end, hash, nullptr);
});
Expand All @@ -1583,28 +1616,18 @@ void ColumnObject::update_xxHash_with_value(size_t start, size_t end, uint64_t&
void ColumnObject::update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type,
uint32_t rows, uint32_t offset,
const uint8_t* __restrict null_data) const {
ENSURE_FINALIZED();
for_each_imutable_subcolumn([&](const auto& subcolumn) {
return subcolumn.update_crcs_with_value(hash, type, rows, offset, nullptr);
});
}

void ColumnObject::update_crc_with_value(size_t start, size_t end, uint32_t& hash,
const uint8_t* __restrict null_data) const {
ENSURE_FINALIZED();
for_each_imutable_subcolumn([&](const auto& subcolumn) {
return subcolumn.update_crc_with_value(start, end, hash, nullptr);
});
}

void ColumnObject::for_each_imutable_subcolumn(ImutableColumnCallback callback) const {
for (const auto& entry : subcolumns) {
for (auto& part : entry->data.data) {
callback(*part);
}
}
}

std::string ColumnObject::debug_string() const {
std::stringstream res;
res << get_family_name() << "(num_row = " << num_rows;
Expand Down
6 changes: 4 additions & 2 deletions be/src/vec/columns/column_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {

size_t get_dimensions() const { return least_common_type.get_dimensions(); }

void get(size_t n, Field& res) const;

/// Checks the consistency of column's parts stored in @data.
void checkTypes() const;

Expand Down Expand Up @@ -422,8 +424,6 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
Status try_insert_indices_from(const IColumn& src, const int* indices_begin,
const int* indices_end);

void for_each_imutable_subcolumn(ImutableColumnCallback callback) const;

void update_hash_with_value(size_t n, SipHash& hash) const override;

ColumnPtr filter(const Filter&, ssize_t) const override;
Expand All @@ -439,6 +439,8 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
template <typename Func>
MutableColumnPtr apply_for_subcolumns(Func&& func) const;

void for_each_imutable_subcolumn(ImutableColumnCallback callback) const;

// Extract path from root column and replace root with new extracted column,
// root must be jsonb type
Status extract_root(const PathInData& path);
Expand Down

0 comments on commit 9cdc963

Please sign in to comment.