From ebb89c0801bfb4512bff2f7cd293e6d5f2633014 Mon Sep 17 00:00:00 2001 From: lihangyu Date: Fri, 20 Dec 2024 16:33:15 +0800 Subject: [PATCH 1/2] [Optimize](Variant) optimize schema update performance (#45480) When update schema with high concurrency, updaing schemas cost is expensive. 1. update schema only when rows is not 0 2. copy_from is expensive, use copy constructor --- .../olap/rowset/segment_v2/segment_writer.cpp | 4 +++- be/src/olap/rowset_builder.cpp | 24 ++++++++++--------- be/src/olap/tablet_schema.cpp | 15 ++++++++++++ be/src/olap/tablet_schema.h | 3 +++ be/src/vec/common/schema_util.cpp | 5 ++-- 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 2a61eb3f0fe8f7..b57bfcc556fad4 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -336,7 +336,9 @@ Status SegmentWriter::append_block_with_variant_subcolumns(vectorized::Block& da continue; } if (_flush_schema == nullptr) { - _flush_schema = std::make_shared(*_tablet_schema); + _flush_schema = std::make_shared(); + // deep copy + _flush_schema->copy_from(*_tablet_schema); } auto column_ref = data.get_by_position(i).column; const vectorized::ColumnObject& object_column = assert_cast( diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp index 4a95febbbb86fa..057cc35fb13098 100644 --- a/be/src/olap/rowset_builder.cpp +++ b/be/src/olap/rowset_builder.cpp @@ -335,21 +335,22 @@ Status RowsetBuilder::commit_txn() { SCOPED_TIMER(_commit_txn_timer); const RowsetWriterContext& rw_ctx = _rowset_writer->context(); - if (rw_ctx.tablet_schema->num_variant_columns() > 0) { + if (rw_ctx.tablet_schema->num_variant_columns() > 0 && _rowset->num_rows() > 0) { // Need to merge schema with `rw_ctx.merged_tablet_schema` in prior, // merged schema keeps the newest merged schema for the rowset, which is updated and merged // during flushing segments. if (rw_ctx.merged_tablet_schema != nullptr) { RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.merged_tablet_schema)); + } else { + // We should merge rowset schema further, in case that the merged_tablet_schema maybe null + // when enable_memtable_on_sink_node is true, the merged_tablet_schema will not be passed to + // the destination backend. + // update tablet schema when meet variant columns, before commit_txn + // Eg. rowset schema: A(int), B(float), C(int), D(int) + // _tabelt->tablet_schema: A(bigint), B(double) + // => update_schema: A(bigint), B(double), C(int), D(int) + RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema)); } - // We should merge rowset schema further, in case that the merged_tablet_schema maybe null - // when enable_memtable_on_sink_node is true, the merged_tablet_schema will not be passed to - // the destination backend. - // update tablet schema when meet variant columns, before commit_txn - // Eg. rowset schema: A(int), B(float), C(int), D(int) - // _tabelt->tablet_schema: A(bigint), B(double) - // => update_schema: A(bigint), B(double), C(int), D(int) - RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema)); } // Transfer ownership of `PendingRowsetGuard` to `TxnManager` @@ -387,7 +388,6 @@ Status BaseRowsetBuilder::cancel() { void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id, const OlapTableSchemaParam* table_schema_param, const TabletSchema& ori_tablet_schema) { - _tablet_schema->copy_from(ori_tablet_schema); // find the right index id int i = 0; auto indexes = table_schema_param->indexes(); @@ -396,11 +396,13 @@ void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id, break; } } - if (!indexes.empty() && !indexes[i]->columns.empty() && indexes[i]->columns[0]->unique_id() >= 0) { + _tablet_schema->shawdow_copy_without_columns(ori_tablet_schema); _tablet_schema->build_current_tablet_schema(index_id, table_schema_param->version(), indexes[i], ori_tablet_schema); + } else { + _tablet_schema->copy_from(ori_tablet_schema); } if (_tablet_schema->schema_version() > ori_tablet_schema.schema_version()) { // After schema change, should include extracted column diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 011a701b865aab..d7732c87063f2a 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1058,6 +1058,21 @@ void TabletSchema::copy_from(const TabletSchema& tablet_schema) { _table_id = tablet_schema.table_id(); } +void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) { + *this = tablet_schema; + _field_path_to_index.clear(); + _field_name_to_index.clear(); + _field_id_to_index.clear(); + _num_columns = 0; + _num_variant_columns = 0; + _num_null_columns = 0; + _num_key_columns = 0; + _cols.clear(); + _vl_field_mem_size = 0; + // notice : do not ref columns + _column_cache_handlers.clear(); +} + void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) { for (auto& col : _cols) { if (col->unique_id() < 0) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 8e8edf16953bf0..24b0f7b4fe3e9f 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -328,6 +328,8 @@ class TabletSchema { // Must make sure the row column is always the last column void add_row_column(); void copy_from(const TabletSchema& tablet_schema); + // lightweight copy, take care of lifecycle of TabletColumn + void shawdow_copy_without_columns(const TabletSchema& tablet_schema); void update_index_info_from(const TabletSchema& tablet_schema); std::string to_key() const; // Don't use. @@ -525,6 +527,7 @@ class TabletSchema { private: friend bool operator==(const TabletSchema& a, const TabletSchema& b); friend bool operator!=(const TabletSchema& a, const TabletSchema& b); + TabletSchema(const TabletSchema&) = default; void clear_column_cache_handlers(); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index fd50af3e1fcd88..47192f38079e85 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -416,9 +416,8 @@ Status get_least_common_schema(const std::vector& schemas, // duplicated paths following the update_least_common_schema process. auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& base_schema) { output_schema = std::make_shared(); - output_schema->copy_from(*base_schema); - // Merge columns from other schemas - output_schema->clear_columns(); + // not copy columns but only shadow copy other attributes + output_schema->shawdow_copy_without_columns(*base_schema); // Get all columns without extracted columns and collect variant col unique id for (const TabletColumnPtr& col : base_schema->columns()) { if (col->is_variant_type()) { From 28aa84658149fef5266a5f03c3f02a6c32267942 Mon Sep 17 00:00:00 2001 From: eldenmoon Date: Sat, 21 Dec 2024 23:43:35 +0800 Subject: [PATCH 2/2] fix compile --- be/src/olap/tablet_schema.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index d7732c87063f2a..e668d74b53d872 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1068,7 +1068,6 @@ void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schem _num_null_columns = 0; _num_key_columns = 0; _cols.clear(); - _vl_field_mem_size = 0; // notice : do not ref columns _column_cache_handlers.clear(); }