diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 84830502366408..cea825e15e1514 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -47,6 +47,7 @@ #include "olap/cumulative_compaction_policy.h" #include "olap/cumulative_compaction_time_series_policy.h" #include "olap/data_dir.h" +#include "olap/olap_common.h" #include "olap/olap_define.h" #include "olap/rowset/beta_rowset.h" #include "olap/rowset/beta_rowset_writer.h" @@ -174,10 +175,11 @@ Status Compaction::merge_input_rowsets() { // write merged rows to output rowset // The test results show that merger is low-memory-footprint, there is no need to tracker its mem pool - // if ctx.skip_inverted_index.size() > 0, it means we need to do inverted index compaction. + // if ctx.columns_to_do_index_compaction.size() > 0, it means we need to do inverted index compaction. // the row ID conversion matrix needs to be used for inverted index compaction. - if (!ctx.skip_inverted_index.empty() || (_tablet->keys_type() == KeysType::UNIQUE_KEYS && - _tablet->enable_unique_key_merge_on_write())) { + if (!ctx.columns_to_do_index_compaction.empty() || + (_tablet->keys_type() == KeysType::UNIQUE_KEYS && + _tablet->enable_unique_key_merge_on_write())) { _stats.rowid_conversion = _rowid_conversion.get(); } @@ -482,46 +484,12 @@ Status CompactionMixin::execute_compact_impl(int64_t permits) { Status Compaction::do_inverted_index_compaction() { const auto& ctx = _output_rs_writer->context(); if (!config::inverted_index_compaction_enable || _input_row_num <= 0 || - !_stats.rowid_conversion || ctx.skip_inverted_index.empty()) { + !_stats.rowid_conversion || ctx.columns_to_do_index_compaction.empty()) { return Status::OK(); } OlapStopWatch inverted_watch; - int64_t cur_max_version = 0; - { - std::shared_lock rlock(_tablet->get_header_lock()); - cur_max_version = _tablet->max_version_unlocked(); - } - - DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id()); - std::set missed_rows; - std::map>> location_map; - // Convert the delete bitmap of the input rowsets to output rowset. - _tablet->calc_compaction_output_rowset_delete_bitmap( - _input_rowsets, *_rowid_conversion, 0, cur_max_version + 1, &missed_rows, &location_map, - _tablet->tablet_meta()->delete_bitmap(), &output_rowset_delete_bitmap); - - if (!_allow_delete_in_cumu_compaction) { - if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION && - _stats.merged_rows != missed_rows.size() && _tablet->tablet_state() == TABLET_RUNNING) { - std::string err_msg = fmt::format( - "cumulative compaction: the merged rows({}) is not equal to missed " - "rows({}) in rowid conversion, tablet_id: {}, table_id:{}", - _stats.merged_rows, missed_rows.size(), _tablet->tablet_id(), - _tablet->table_id()); - if (config::enable_mow_compaction_correctness_check_core) { - CHECK(false) << err_msg; - } else { - DCHECK(false) << err_msg; - } - // log here just for debugging, do not return error - LOG(WARNING) << err_msg; - } - } - - RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); - // translation vec // <> // the first level vector: index indicates src segment. @@ -718,7 +686,7 @@ Status Compaction::do_inverted_index_compaction() { }; Status status = Status::OK(); - for (auto&& column_uniq_id : ctx.skip_inverted_index) { + for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) { auto col = _cur_tablet_schema->column_by_uid(column_uniq_id); const auto* index_meta = _cur_tablet_schema->get_inverted_index(col); @@ -809,13 +777,25 @@ Status Compaction::do_inverted_index_compaction() { return Status::OK(); } -void Compaction::construct_skip_inverted_index(RowsetWriterContext& ctx) { +void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { for (const auto& index : _cur_tablet_schema->indexes()) { if (index.index_type() != IndexType::INVERTED) { continue; } - auto col_unique_id = index.col_unique_ids()[0]; + auto col_unique_ids = index.col_unique_ids(); + // check if column unique ids is empty to avoid crash + if (col_unique_ids.empty()) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" << index.index_id() + << "] has no column unique id, will skip index compaction." + << " tablet_schema=" << _cur_tablet_schema->dump_full_schema(); + continue; + } + auto col_unique_id = col_unique_ids[0]; + // Avoid doing inverted index compaction on non-slice type columns + if (!field_is_slice_type(_cur_tablet_schema->column_by_uid(col_unique_id).type())) { + continue; + } auto has_inverted_index = [&](const RowsetSharedPtr& src_rs) { auto* rowset = static_cast(src_rs.get()); if (rowset->is_skip_index_compaction(col_unique_id)) { @@ -877,7 +857,7 @@ void Compaction::construct_skip_inverted_index(RowsetWriterContext& ctx) { reader->close(); // why is 3? - // bkd index will write at least 3 files + // slice type index file at least has 3 files: null_bitmap, segments_N, segments.gen if (files.size() < 3) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" << col_unique_id << "]," << index_file_path @@ -897,9 +877,8 @@ void Compaction::construct_skip_inverted_index(RowsetWriterContext& ctx) { bool all_have_inverted_index = std::all_of(_input_rowsets.begin(), _input_rowsets.end(), std::move(has_inverted_index)); - if (all_have_inverted_index && - field_is_slice_type(_cur_tablet_schema->column_by_uid(col_unique_id).type())) { - ctx.skip_inverted_index.insert(col_unique_id); + if (all_have_inverted_index) { + ctx.columns_to_do_index_compaction.insert(col_unique_id); } } } @@ -912,7 +891,7 @@ Status CompactionMixin::construct_output_rowset_writer(RowsetWriterContext& ctx) _tablet->keys_type() == KeysType::DUP_KEYS)) && _cur_tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - construct_skip_inverted_index(ctx); + construct_index_compaction_columns(ctx); } ctx.version = _output_version; ctx.rowset_state = VISIBLE; @@ -1209,7 +1188,7 @@ Status CloudCompactionMixin::construct_output_rowset_writer(RowsetWriterContext& _tablet->keys_type() == KeysType::DUP_KEYS)) && _cur_tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - construct_skip_inverted_index(ctx); + construct_index_compaction_columns(ctx); } // Use the storage resource of the previous rowset diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h index 38d50595ca8f6e..08afe840280ecf 100644 --- a/be/src/olap/compaction.h +++ b/be/src/olap/compaction.h @@ -69,7 +69,7 @@ class Compaction { Status do_inverted_index_compaction(); - void construct_skip_inverted_index(RowsetWriterContext& ctx); + void construct_index_compaction_columns(RowsetWriterContext& ctx); virtual Status construct_output_rowset_writer(RowsetWriterContext& ctx) = 0; diff --git a/be/src/olap/rowset/rowset_writer_context.h b/be/src/olap/rowset/rowset_writer_context.h index e13f7efe6e94fa..cb0fda83e60777 100644 --- a/be/src/olap/rowset/rowset_writer_context.h +++ b/be/src/olap/rowset/rowset_writer_context.h @@ -79,8 +79,8 @@ struct RowsetWriterContext { int64_t newest_write_timestamp = -1; bool enable_unique_key_merge_on_write = false; - // store column_unique_id to skip write inverted index - std::set skip_inverted_index; + // store column_unique_id to do index compaction + std::set columns_to_do_index_compaction; DataWriteType write_type = DataWriteType::TYPE_DEFAULT; BaseTabletSPtr tablet = nullptr; diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index c21021eb9d26cd..ed739fefcc1507 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -209,8 +209,9 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co opts.need_bitmap_index = column.has_bitmap_index(); bool skip_inverted_index = false; if (_opts.rowset_ctx != nullptr) { - // skip write inverted index for index compaction - skip_inverted_index = _opts.rowset_ctx->skip_inverted_index.count(column.unique_id()) > 0; + // skip write inverted index for index compaction column + skip_inverted_index = + _opts.rowset_ctx->columns_to_do_index_compaction.count(column.unique_id()) > 0; } // skip write inverted index on load if skip_write_index_on_load is true if (_opts.write_type == DataWriteType::TYPE_DIRECT && schema->skip_write_index_on_load()) { diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 8f7f614d7b332b..ce033cdd0022d0 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -202,8 +202,9 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo opts.need_bitmap_index = column.has_bitmap_index(); bool skip_inverted_index = false; if (_opts.rowset_ctx != nullptr) { - // skip write inverted index for index compaction - skip_inverted_index = _opts.rowset_ctx->skip_inverted_index.contains(column.unique_id()); + // skip write inverted index for index compaction column + skip_inverted_index = + _opts.rowset_ctx->columns_to_do_index_compaction.contains(column.unique_id()); } // skip write inverted index on load if skip_write_index_on_load is true if (_opts.write_type == DataWriteType::TYPE_DIRECT &&