Skip to content

Commit

Permalink
[opt] optimize insert_range_from
Browse files Browse the repository at this point in the history
  • Loading branch information
eldenmoon committed Jan 9, 2025
1 parent f9d2e25 commit 302c048
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 11 deletions.
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ void VariantStatistics::to_pb(VariantStatisticsPB* stats) const {
for (const auto& [path, value] : sparse_column_non_null_size) {
stats->mutable_sparse_column_non_null_size()->emplace(path, value);
}
LOG(INFO) << "num subcolumns " << subcolumns_non_null_size.size() << ", num sparse columns "
<< sparse_column_non_null_size.size();
}

void VariantStatistics::from_pb(const VariantStatisticsPB& stats) {
Expand Down
12 changes: 10 additions & 2 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1318,9 +1318,10 @@ void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t le
// We can reach the limit of subcolumns, and in this case
// the rest of subcolumns from src will be inserted into sparse column.
std::map<std::string_view, Subcolumn> src_path_and_subcoumn_for_sparse_column;
int idx_hint = 0;
for (const auto& entry : src_object.subcolumns) {
// Check if we already have such dense column path.
if (auto* subcolumn = get_subcolumn(entry->path); subcolumn != nullptr) {
if (auto* subcolumn = get_subcolumn(entry->path, idx_hint); subcolumn != nullptr) {
subcolumn->insert_range_from(entry->data, start, length);
} else if (try_add_new_subcolumn(entry->path)) {
subcolumn = get_subcolumn(entry->path);
Expand All @@ -1329,6 +1330,7 @@ void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t le
} else {
src_path_and_subcoumn_for_sparse_column.emplace(entry->path.get_path(), entry->data);
}
++idx_hint;
}

// Paths in sparse column are sorted, so paths from src_dense_column_path_for_sparse_column should be inserted properly
Expand All @@ -1345,7 +1347,7 @@ void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t le
src_object, std::move(sorted_src_subcolumn_for_sparse_column), start, length);

num_rows += length;
finalize();
// finalize();
ENABLE_CHECK_CONSISTENCY(this);
}

Expand Down Expand Up @@ -1946,6 +1948,12 @@ void ColumnObject::clear_sparse_column() {
}

Status ColumnObject::finalize(FinalizeMode mode) {
if (is_finalized() && mode == FinalizeMode::READ_MODE) {
doc_structure = nullptr;
_prev_positions.clear();
ENABLE_CHECK_CONSISTENCY(this);
return Status::OK();
}
Subcolumns new_subcolumns;

if (auto root = subcolumns.get_mutable_root(); root == nullptr) {
Expand Down
7 changes: 1 addition & 6 deletions be/src/vec/data_types/data_type_nothing.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,7 @@ class DataTypeNothing final : public IDataType {
const char* deserialize(const char* buf, MutableColumnPtr* column,
int be_exec_version) const override;

[[noreturn]] Field get_default() const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Method get_default() is not implemented for data type {}.",
get_name());
__builtin_unreachable();
}
Field get_default() const override { return Null(); }

[[noreturn]] Field get_field(const TExprNode& node) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
Expand Down
4 changes: 2 additions & 2 deletions regression-test/data/variant_p0/test_sub_path_pruning.out
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@
1 {"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14}

-- !sql --
"1"
1
{"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14}

-- !sql --
Expand All @@ -241,7 +241,7 @@
1 {"d":{"e":11}}

-- !sql --
"1"
1
{"d":{"e":11}}

-- !sql --
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ suite("test_compaction_sparse_column", "p1,nonConcurrent") {

try {
set_be_config.call("write_buffer_size", "10240")
set_be_config.call("variant_max_subcolumns_count", "3")
set_be_config.call("variant_max_subcolumns_count", "2")

sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
Expand Down

0 comments on commit 302c048

Please sign in to comment.