diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index c0f16d304a2b72..a8ab93de455c3b 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -1630,11 +1630,13 @@ void drop_tablet_callback(StorageEngine& engine, const TAgentTaskRequest& req) { dropped_tablet->tablet_uid()); LOG_INFO("successfully drop tablet") .tag("signature", req.signature) - .tag("tablet_id", drop_tablet_req.tablet_id); + .tag("tablet_id", drop_tablet_req.tablet_id) + .tag("replica_id", drop_tablet_req.replica_id); } else { LOG_WARNING("failed to drop tablet") .tag("signature", req.signature) .tag("tablet_id", drop_tablet_req.tablet_id) + .tag("replica_id", drop_tablet_req.replica_id) .error(status); } diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp index 88d83000e95dfa..9742e57dcf9d34 100644 --- a/be/src/cloud/cloud_base_compaction.cpp +++ b/be/src/cloud/cloud_base_compaction.cpp @@ -125,6 +125,7 @@ Status CloudBaseCompaction::prepare_compact() { _input_row_num += rs->num_rows(); _input_segments += rs->num_segments(); _input_rowsets_data_size += rs->data_disk_size(); + _input_rowsets_index_size += rs->index_disk_size(); _input_rowsets_total_size += rs->total_disk_size(); } LOG_INFO("start CloudBaseCompaction, tablet_id={}, range=[{}-{}]", _tablet->tablet_id(), @@ -320,6 +321,10 @@ Status CloudBaseCompaction::modify_rowsets() { compaction_job->add_output_versions(_output_rowset->end_version()); compaction_job->add_txn_id(_output_rowset->txn_id()); compaction_job->add_output_rowset_ids(_output_rowset->rowset_id().to_string()); + compaction_job->set_index_size_input_rowsets(_input_rowsets_index_size); + compaction_job->set_segment_size_input_rowsets(_input_rowsets_data_size); + compaction_job->set_index_size_output_rowsets(_output_rowset->index_disk_size()); + compaction_job->set_segment_size_output_rowsets(_output_rowset->data_disk_size()); DeleteBitmapPtr output_rowset_delete_bitmap = nullptr; if (_tablet->keys_type() == KeysType::UNIQUE_KEYS && diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 2f08082f51b5f3..f63054563aa18a 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -33,6 +33,7 @@ #include "util/uuid_generator.h" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; bvar::Adder cumu_output_size("cumu_compaction", "output_size"); @@ -263,6 +264,10 @@ Status CloudCumulativeCompaction::modify_rowsets() { compaction_job->add_output_versions(_output_rowset->end_version()); compaction_job->add_txn_id(_output_rowset->txn_id()); compaction_job->add_output_rowset_ids(_output_rowset->rowset_id().to_string()); + compaction_job->set_index_size_input_rowsets(_input_rowsets_index_size); + compaction_job->set_segment_size_input_rowsets(_input_rowsets_data_size); + compaction_job->set_index_size_output_rowsets(_output_rowset->index_disk_size()); + compaction_job->set_segment_size_output_rowsets(_output_rowset->data_disk_size()); DBUG_EXECUTE_IF("CloudCumulativeCompaction::modify_rowsets.enable_spin_wait", { LOG(INFO) << "CloudCumulativeCompaction::modify_rowsets.enable_spin_wait, start"; @@ -371,11 +376,9 @@ Status CloudCumulativeCompaction::modify_rowsets() { Status CloudCumulativeCompaction::process_old_version_delete_bitmap() { // agg previously rowset old version delete bitmap std::vector pre_rowsets {}; - std::vector pre_rowset_ids {}; for (const auto& it : cloud_tablet()->rowset_map()) { if (it.first.second < _input_rowsets.front()->start_version()) { pre_rowsets.emplace_back(it.second); - pre_rowset_ids.emplace_back(it.second->rowset_id().to_string()); } } std::sort(pre_rowsets.begin(), pre_rowsets.end(), Rowset::comparator); @@ -486,8 +489,10 @@ Status CloudCumulativeCompaction::pick_rowsets_to_compact() { } int64_t max_score = config::cumulative_compaction_max_deltas; - auto process_memory_usage = doris::GlobalMemoryArbitrator::process_memory_usage(); - bool memory_usage_high = process_memory_usage > MemInfo::soft_mem_limit() * 0.8; + double process_memory_usage = + cast_set(doris::GlobalMemoryArbitrator::process_memory_usage()); + bool memory_usage_high = + process_memory_usage > cast_set(MemInfo::soft_mem_limit()) * 0.8; if (cloud_tablet()->last_compaction_status.is() || memory_usage_high) { max_score = std::max(config::cumulative_compaction_max_deltas / @@ -617,4 +622,5 @@ void CloudCumulativeCompaction::do_lease() { } } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_cumulative_compaction.h b/be/src/cloud/cloud_cumulative_compaction.h index 1159dcb59ceef1..87fc0b62c9c389 100644 --- a/be/src/cloud/cloud_cumulative_compaction.h +++ b/be/src/cloud/cloud_cumulative_compaction.h @@ -24,6 +24,7 @@ #include "olap/compaction.h" namespace doris { +#include "common/compile_check_begin.h" class CloudCumulativeCompaction : public CloudCompactionMixin { public: @@ -60,4 +61,5 @@ class CloudCumulativeCompaction : public CloudCompactionMixin { Version _last_delete_version {-1, -1}; }; +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.cpp b/be/src/cloud/cloud_cumulative_compaction_policy.cpp index 5a9879387b2327..92a47fcc69f8d7 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.cpp +++ b/be/src/cloud/cloud_cumulative_compaction_policy.cpp @@ -31,6 +31,7 @@ #include "olap/tablet_meta.h" namespace doris { +#include "common/compile_check_begin.h" CloudSizeBasedCumulativeCompactionPolicy::CloudSizeBasedCumulativeCompactionPolicy( int64_t promotion_size, double promotion_ratio, int64_t promotion_min_size, @@ -48,7 +49,7 @@ int64_t CloudSizeBasedCumulativeCompactionPolicy::_level_size(const int64_t size return (int64_t)1 << (sizeof(size) * 8 - 1 - __builtin_clzl(size)); } -int32_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( +int64_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, std::vector* input_rowsets, Version* last_delete_version, @@ -114,8 +115,8 @@ int32_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( size_t new_compaction_score = *compaction_score; while (rs_begin != input_rowsets->end()) { auto& rs_meta = (*rs_begin)->rowset_meta(); - int current_level = _level_size(rs_meta->total_disk_size()); - int remain_level = _level_size(total_size - rs_meta->total_disk_size()); + int64_t current_level = _level_size(rs_meta->total_disk_size()); + int64_t remain_level = _level_size(total_size - rs_meta->total_disk_size()); // if current level less then remain level, input rowsets contain current rowset // and process return; otherwise, input rowsets do not contain current rowset. if (current_level <= remain_level) { @@ -185,7 +186,7 @@ int32_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( } int64_t CloudSizeBasedCumulativeCompactionPolicy::cloud_promotion_size(CloudTablet* t) const { - int64_t promotion_size = int64_t(t->base_size() * _promotion_ratio); + int64_t promotion_size = int64_t(cast_set(t->base_size()) * _promotion_ratio); // promotion_size is between _size_based_promotion_size and _size_based_promotion_min_size return promotion_size > _promotion_size ? _promotion_size : promotion_size < _promotion_min_size ? _promotion_min_size @@ -215,7 +216,7 @@ int64_t CloudSizeBasedCumulativeCompactionPolicy::new_cumulative_point( : last_cumulative_point; } -int32_t CloudTimeSeriesCumulativeCompactionPolicy::pick_input_rowsets( +int64_t CloudTimeSeriesCumulativeCompactionPolicy::pick_input_rowsets( CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, std::vector* input_rowsets, Version* last_delete_version, @@ -377,4 +378,5 @@ int64_t CloudTimeSeriesCumulativeCompactionPolicy::new_cumulative_point( return output_rowset->end_version() + 1; } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.h b/be/src/cloud/cloud_cumulative_compaction_policy.h index c142a8a6d3dffe..9373728547241b 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.h +++ b/be/src/cloud/cloud_cumulative_compaction_policy.h @@ -30,6 +30,7 @@ #include "olap/rowset/rowset_meta.h" namespace doris { +#include "common/compile_check_begin.h" class Tablet; struct Version; @@ -44,7 +45,7 @@ class CloudCumulativeCompactionPolicy { virtual int64_t new_compaction_level(const std::vector& input_rowsets) = 0; - virtual int32_t pick_input_rowsets(CloudTablet* tablet, + virtual int64_t pick_input_rowsets(CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, @@ -71,7 +72,7 @@ class CloudSizeBasedCumulativeCompactionPolicy : public CloudCumulativeCompactio return 0; } - int32_t pick_input_rowsets(CloudTablet* tablet, + int64_t pick_input_rowsets(CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, @@ -106,7 +107,7 @@ class CloudTimeSeriesCumulativeCompactionPolicy : public CloudCumulativeCompacti int64_t new_compaction_level(const std::vector& input_rowsets) override; - int32_t pick_input_rowsets(CloudTablet* tablet, + int64_t pick_input_rowsets(CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, @@ -115,4 +116,5 @@ class CloudTimeSeriesCumulativeCompactionPolicy : public CloudCumulativeCompacti bool allow_delete = false) override; }; +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_delete_bitmap_action.cpp b/be/src/cloud/cloud_delete_bitmap_action.cpp index 86cc535e1bc88e..3d834bfe7b373c 100644 --- a/be/src/cloud/cloud_delete_bitmap_action.cpp +++ b/be/src/cloud/cloud_delete_bitmap_action.cpp @@ -50,6 +50,7 @@ #include "util/stopwatch.hpp" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; namespace { @@ -177,4 +178,5 @@ void CloudDeleteBitmapAction::handle(HttpRequest* req) { } } +#include "common/compile_check_end.h" } // namespace doris \ No newline at end of file diff --git a/be/src/cloud/cloud_delete_bitmap_action.h b/be/src/cloud/cloud_delete_bitmap_action.h index 35739a7373efc8..ce507ee9991757 100644 --- a/be/src/cloud/cloud_delete_bitmap_action.h +++ b/be/src/cloud/cloud_delete_bitmap_action.h @@ -27,6 +27,7 @@ #include "olap/tablet.h" namespace doris { +#include "common/compile_check_begin.h" class HttpRequest; class ExecEnv; @@ -52,4 +53,5 @@ class CloudDeleteBitmapAction : public HttpHandlerWithAuth { CloudStorageEngine& _engine; DeleteBitmapActionType _delete_bitmap_action_type; }; +#include "common/compile_check_end.h" } // namespace doris \ No newline at end of file diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp index 91611d20c6270b..fbf4b9cf303570 100644 --- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp +++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp @@ -34,6 +34,7 @@ #include "runtime/memory/mem_tracker_limiter.h" namespace doris { +#include "common/compile_check_begin.h" CloudEngineCalcDeleteBitmapTask::CloudEngineCalcDeleteBitmapTask( CloudStorageEngine& engine, const TCalcDeleteBitmapRequest& cal_delete_bitmap_req, @@ -227,7 +228,7 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const { } } auto total_update_delete_bitmap_time_us = MonotonicMicros() - t3; - LOG(INFO) << "calculate delete bitmap successfully on tablet" + LOG(INFO) << "finish calculate delete bitmap on tablet" << ", table_id=" << tablet->table_id() << ", transaction_id=" << _transaction_id << ", tablet_id=" << tablet->tablet_id() << ", get_tablet_time_us=" << get_tablet_time_us @@ -325,4 +326,5 @@ Status CloudTabletCalcDeleteBitmapTask::_handle_rowset( return status; } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_full_compaction.cpp b/be/src/cloud/cloud_full_compaction.cpp index c27b728c93d29b..bce00c9a2e74f6 100644 --- a/be/src/cloud/cloud_full_compaction.cpp +++ b/be/src/cloud/cloud_full_compaction.cpp @@ -216,6 +216,10 @@ Status CloudFullCompaction::modify_rowsets() { compaction_job->add_output_versions(_output_rowset->end_version()); compaction_job->add_txn_id(_output_rowset->txn_id()); compaction_job->add_output_rowset_ids(_output_rowset->rowset_id().to_string()); + compaction_job->set_index_size_input_rowsets(_input_rowsets_index_size); + compaction_job->set_segment_size_input_rowsets(_input_rowsets_data_size); + compaction_job->set_index_size_output_rowsets(_output_rowset->index_disk_size()); + compaction_job->set_segment_size_output_rowsets(_output_rowset->data_disk_size()); DeleteBitmapPtr output_rowset_delete_bitmap = nullptr; if (_tablet->keys_type() == KeysType::UNIQUE_KEYS && diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index 8e21498b0d873d..835e74ca7d5687 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -64,6 +64,7 @@ #include "util/thrift_rpc_helper.h" namespace doris::cloud { +#include "common/compile_check_begin.h" using namespace ErrorCode; Status bthread_fork_join(const std::vector>& tasks, int concurrency) { @@ -717,7 +718,7 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_ "rowset_ids.size={},segment_ids.size={},vers.size={},delete_bitmaps.size={}", rowset_ids.size(), segment_ids.size(), vers.size(), delete_bitmaps.size()); } - for (size_t i = 0; i < rowset_ids.size(); i++) { + for (int i = 0; i < rowset_ids.size(); i++) { RowsetId rst_id; rst_id.init(rowset_ids[i]); delete_bitmap->merge( @@ -757,10 +758,10 @@ Status CloudMetaMgr::prepare_rowset(const RowsetMeta& rs_meta, Status st = retry_rpc("prepare rowset", req, &resp, &MetaService_Stub::prepare_rowset); if (!st.ok() && resp.status().code() == MetaServiceCode::ALREADY_EXISTED) { if (existed_rs_meta != nullptr && resp.has_existed_rowset_meta()) { - RowsetMetaPB doris_rs_meta = + RowsetMetaPB doris_rs_meta_tmp = cloud_rowset_meta_to_doris(std::move(*resp.mutable_existed_rowset_meta())); *existed_rs_meta = std::make_shared(); - (*existed_rs_meta)->init_from_pb(doris_rs_meta); + (*existed_rs_meta)->init_from_pb(doris_rs_meta_tmp); } return Status::AlreadyExist("failed to prepare rowset: {}", resp.status().msg()); } @@ -1286,4 +1287,5 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { return total_inverted_index_size; } +#include "common/compile_check_end.h" } // namespace doris::cloud diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h index c49b036ad90c15..913ef59489a1b3 100644 --- a/be/src/cloud/cloud_meta_mgr.h +++ b/be/src/cloud/cloud_meta_mgr.h @@ -27,6 +27,7 @@ #include "util/s3_util.h" namespace doris { +#include "common/compile_check_begin.h" class DeleteBitmap; class StreamLoadContext; @@ -124,4 +125,5 @@ class CloudMetaMgr { }; } // namespace cloud +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 0bab742c3ad6e5..1cc4d052a81d69 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -340,17 +340,23 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam int64_t num_output_rows = 0; int64_t size_output_rowsets = 0; int64_t num_output_segments = 0; + int64_t index_size_output_rowsets = 0; + int64_t segment_size_output_rowsets = 0; for (auto& rs : _output_rowsets) { sc_job->add_txn_ids(rs->txn_id()); sc_job->add_output_versions(rs->end_version()); num_output_rows += rs->num_rows(); size_output_rowsets += rs->total_disk_size(); num_output_segments += rs->num_segments(); + index_size_output_rowsets += rs->index_disk_size(); + segment_size_output_rowsets += rs->data_disk_size(); } sc_job->set_num_output_rows(num_output_rows); sc_job->set_size_output_rowsets(size_output_rowsets); sc_job->set_num_output_segments(num_output_segments); sc_job->set_num_output_rowsets(_output_rowsets.size()); + sc_job->set_index_size_output_rowsets(index_size_output_rowsets); + sc_job->set_segment_size_output_rowsets(segment_size_output_rowsets); } _output_cumulative_point = std::min(_output_cumulative_point, sc_job->alter_version() + 1); sc_job->set_output_cumulative_point(_output_cumulative_point); diff --git a/be/src/cloud/cloud_storage_engine.cpp b/be/src/cloud/cloud_storage_engine.cpp index dc6abbac31ba1b..b66a9cfbdb2245 100644 --- a/be/src/cloud/cloud_storage_engine.cpp +++ b/be/src/cloud/cloud_storage_engine.cpp @@ -52,6 +52,7 @@ #include "util/parse_util.h" namespace doris { +#include "common/compile_check_begin.h" using namespace std::literals; @@ -166,7 +167,8 @@ Status CloudStorageEngine::open() { _memtable_flush_executor = std::make_unique(); // Use file cache disks number - _memtable_flush_executor->init(io::FileCacheFactory::instance()->get_cache_instance_size()); + _memtable_flush_executor->init( + cast_set(io::FileCacheFactory::instance()->get_cache_instance_size())); _calc_delete_bitmap_executor = std::make_unique(); _calc_delete_bitmap_executor->init(); @@ -321,7 +323,7 @@ void CloudStorageEngine::_check_file_cache_ttl_block_valid() { for (const auto& rowset : rowsets) { int64_t ttl_seconds = tablet->tablet_meta()->ttl_seconds(); if (rowset->newest_write_timestamp() + ttl_seconds <= UnixSeconds()) continue; - for (int64_t seg_id = 0; seg_id < rowset->num_segments(); seg_id++) { + for (uint32_t seg_id = 0; seg_id < rowset->num_segments(); seg_id++) { auto hash = Segment::file_cache_key(rowset->rowset_id().to_string(), seg_id); auto* file_cache = io::FileCacheFactory::instance()->get_by_path(hash); file_cache->update_ttl_atime(hash); @@ -350,11 +352,11 @@ void CloudStorageEngine::sync_storage_vault() { for (auto& [id, vault_info, path_format] : vault_infos) { auto fs = get_filesystem(id); - auto st = (fs == nullptr) - ? std::visit(VaultCreateFSVisitor {id, path_format}, vault_info) - : std::visit(RefreshFSVaultVisitor {id, std::move(fs), path_format}, - vault_info); - if (!st.ok()) [[unlikely]] { + auto status = (fs == nullptr) + ? std::visit(VaultCreateFSVisitor {id, path_format}, vault_info) + : std::visit(RefreshFSVaultVisitor {id, std::move(fs), path_format}, + vault_info); + if (!status.ok()) [[unlikely]] { LOG(WARNING) << vault_process_error(id, vault_info, std::move(st)); } } @@ -504,13 +506,13 @@ void CloudStorageEngine::_compaction_tasks_producer_callback() { /// If it is not cleaned up, the reference count of the tablet will always be greater than 1, /// thus cannot be collected by the garbage collector. (TabletManager::start_trash_sweep) for (const auto& tablet : tablets_compaction) { - Status st = submit_compaction_task(tablet, compaction_type); - if (st.ok()) continue; - if ((!st.is() && - !st.is()) || + Status status = submit_compaction_task(tablet, compaction_type); + if (status.ok()) continue; + if ((!status.is() && + !status.is()) || VLOG_DEBUG_IS_ON) { LOG(WARNING) << "failed to submit compaction task for tablet: " - << tablet->tablet_id() << ", err: " << st; + << tablet->tablet_id() << ", err: " << status; } } interval = config::generate_compaction_tasks_interval_ms; @@ -544,7 +546,8 @@ std::vector CloudStorageEngine::_generate_cloud_compaction_task int num_cumu = std::accumulate(submitted_cumu_compactions.begin(), submitted_cumu_compactions.end(), 0, [](int a, auto& b) { return a + b.second.size(); }); - int num_base = submitted_base_compactions.size() + submitted_full_compactions.size(); + int num_base = + cast_set(submitted_base_compactions.size() + submitted_full_compactions.size()); int n = thread_per_disk - num_cumu - num_base; if (compaction_type == CompactionType::BASE_COMPACTION) { // We need to reserve at least one thread for cumulative compaction, @@ -822,7 +825,7 @@ Status CloudStorageEngine::get_compaction_status_json(std::string* result) { // cumu std::string_view cumu = "CumulativeCompaction"; rapidjson::Value cumu_key; - cumu_key.SetString(cumu.data(), cumu.length(), root.GetAllocator()); + cumu_key.SetString(cumu.data(), cast_set(cumu.length()), root.GetAllocator()); rapidjson::Document cumu_arr; cumu_arr.SetArray(); for (auto& [tablet_id, v] : _submitted_cumu_compactions) { @@ -834,7 +837,7 @@ Status CloudStorageEngine::get_compaction_status_json(std::string* result) { // base std::string_view base = "BaseCompaction"; rapidjson::Value base_key; - base_key.SetString(base.data(), base.length(), root.GetAllocator()); + base_key.SetString(base.data(), cast_set(base.length()), root.GetAllocator()); rapidjson::Document base_arr; base_arr.SetArray(); for (auto& [tablet_id, _] : _submitted_base_compactions) { @@ -857,4 +860,5 @@ std::shared_ptr CloudStorageEngine::cumu_compac return _cumulative_compaction_policies.at(compaction_policy); } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index ea86f3b40ff1dc..93c7128756738c 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -50,6 +50,7 @@ #include "vec/common/schema_util.h" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; @@ -219,6 +220,7 @@ Status CloudTablet::sync_if_not_running() { } TabletSchemaSPtr CloudTablet::merged_tablet_schema() const { + std::shared_lock rlock(_meta_lock); return _merged_tablet_schema; } @@ -380,7 +382,7 @@ void CloudTablet::delete_rowsets(const std::vector& to_delete, _tablet_meta->modify_rs_metas({}, rs_metas, false); } -int CloudTablet::delete_expired_stale_rowsets() { +uint64_t CloudTablet::delete_expired_stale_rowsets() { std::vector expired_rowsets; int64_t expired_stale_sweep_endtime = ::time(nullptr) - config::tablet_rowset_stale_sweep_time_sec; @@ -539,7 +541,7 @@ Result> CloudTablet::create_transient_rowset_write return RowsetFactory::create_rowset_writer(_engine, context, false) .transform([&](auto&& writer) { - writer->set_segment_start_id(rowset.num_segments()); + writer->set_segment_start_id(cast_set(rowset.num_segments())); return writer; }); } @@ -617,7 +619,8 @@ void CloudTablet::get_compaction_status(std::string* json_result) { } rapidjson::Value value; std::string version_str = rowset->get_rowset_info_str(); - value.SetString(version_str.c_str(), version_str.length(), versions_arr.GetAllocator()); + value.SetString(version_str.c_str(), cast_set(version_str.length()), + versions_arr.GetAllocator()); versions_arr.PushBack(value, versions_arr.GetAllocator()); last_version = ver.second; } @@ -630,7 +633,7 @@ void CloudTablet::get_compaction_status(std::string* json_result) { for (auto& rowset : stale_rowsets) { rapidjson::Value value; std::string version_str = rowset->get_rowset_info_str(); - value.SetString(version_str.c_str(), version_str.length(), + value.SetString(version_str.c_str(), cast_set(version_str.length()), stale_versions_arr.GetAllocator()); stale_versions_arr.PushBack(value, stale_versions_arr.GetAllocator()); } @@ -775,7 +778,8 @@ Status CloudTablet::calc_delete_bitmap_for_compaction( } std::unique_ptr> location_map; - if (config::enable_rowid_conversion_correctness_check) { + if (config::enable_rowid_conversion_correctness_check && + tablet_schema()->cluster_key_uids().empty()) { location_map = std::make_unique>(); LOG(INFO) << "Location Map inited succ for tablet:" << tablet_id(); } @@ -924,4 +928,5 @@ void CloudTablet::build_tablet_report_info(TTabletInfo* tablet_info) { // but it may be used in the future. } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h index 80038e569ba2fc..fc0d64a493d316 100644 --- a/be/src/cloud/cloud_tablet.h +++ b/be/src/cloud/cloud_tablet.h @@ -92,7 +92,7 @@ class CloudTablet final : public BaseTablet { void clear_cache() override; // Return number of deleted stale rowsets - int delete_expired_stale_rowsets(); + uint64_t delete_expired_stale_rowsets(); bool has_stale_rowsets() const { return !_stale_rs_version_map.empty(); } diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp index e95c295ae1daa5..f90bf536f63018 100644 --- a/be/src/cloud/config.cpp +++ b/be/src/cloud/config.cpp @@ -20,6 +20,7 @@ #include "common/status.h" namespace doris::config { +#include "common/compile_check_begin.h" DEFINE_String(deploy_mode, ""); DEFINE_mString(cloud_unique_id, ""); @@ -76,4 +77,5 @@ DEFINE_mInt32(tablet_txn_info_min_expired_seconds, "120"); DEFINE_mBool(enable_use_cloud_unique_id_from_fe, "true"); DEFINE_mBool(enable_cloud_tablet_report, "true"); +#include "common/compile_check_end.h" } // namespace doris::config diff --git a/be/src/cloud/config.h b/be/src/cloud/config.h index b345e6355921a4..a8a7c0c48ec91f 100644 --- a/be/src/cloud/config.h +++ b/be/src/cloud/config.h @@ -20,6 +20,7 @@ #include "common/config.h" namespace doris::config { +#include "common/compile_check_begin.h" DECLARE_String(deploy_mode); // deprecated do not configure directly @@ -110,4 +111,5 @@ DECLARE_mBool(enable_use_cloud_unique_id_from_fe); DECLARE_Bool(enable_cloud_tablet_report); +#include "common/compile_check_end.h" } // namespace doris::config diff --git a/be/src/cloud/pb_convert.cpp b/be/src/cloud/pb_convert.cpp index bff7d8388d30d8..e655ceacf2f08d 100644 --- a/be/src/cloud/pb_convert.cpp +++ b/be/src/cloud/pb_convert.cpp @@ -324,7 +324,7 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, const TabletSchemaPB out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->CopyFrom(in.cluster_key_idxes()); + out->mutable_cluster_key_uids()->CopyFrom(in.cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); @@ -353,7 +353,7 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, TabletSchemaPB&& in) out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->Swap(in.mutable_cluster_key_idxes()); + out->mutable_cluster_key_uids()->Swap(in.mutable_cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); @@ -395,7 +395,7 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, const TabletSchemaCloudPB out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->CopyFrom(in.cluster_key_idxes()); + out->mutable_cluster_key_uids()->CopyFrom(in.cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); @@ -425,7 +425,7 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, TabletSchemaCloudPB&& in) out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->Swap(in.mutable_cluster_key_idxes()); + out->mutable_cluster_key_uids()->Swap(in.mutable_cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); diff --git a/be/src/clucene b/be/src/clucene index 48fa9cc4ec32b4..a506dbb6c523aa 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit 48fa9cc4ec32b40bf3b02338d0a1b2cdbc6408cf +Subproject commit a506dbb6c523aa65044eb1c527a066d236172543 diff --git a/be/src/common/compile_check_begin.h b/be/src/common/compile_check_begin.h index 6da403f2894885..4d860d39d1cf72 100644 --- a/be/src/common/compile_check_begin.h +++ b/be/src/common/compile_check_begin.h @@ -23,8 +23,9 @@ #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic error "-Wconversion" +#pragma clang diagnostic error "-Wshadow" #pragma clang diagnostic ignored "-Wsign-conversion" #pragma clang diagnostic ignored "-Wfloat-conversion" #endif -//#include "common/compile_check_begin.h" \ No newline at end of file +//#include "common/compile_check_begin.h" diff --git a/be/src/common/compile_check_end.h b/be/src/common/compile_check_end.h index 0897965dc74a3d..40df41b6bdfc6c 100644 --- a/be/src/common/compile_check_end.h +++ b/be/src/common/compile_check_end.h @@ -20,4 +20,4 @@ #endif #undef COMPILE_CHECK -// #include "common/compile_check_end.h" \ No newline at end of file +// #include "common/compile_check_end.h" diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 63989a76261bb6..b3e7d0bce5ee4d 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1166,6 +1166,9 @@ DEFINE_mBool(enable_missing_rows_correctness_check, "false"); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DEFINE_mInt32(mow_publish_max_discontinuous_version_num, "20"); +// When the size of primary keys in memory exceeds this value, finish current segment +// and create a new segment, used in compaction. Default 50MB. +DEFINE_mInt64(mow_primary_key_index_max_size_in_memory, "52428800"); // When the version is not continuous for MOW table in publish phase and the gap between // current txn's publishing version and the max version of the tablet exceeds this value, // don't print warning log diff --git a/be/src/common/config.h b/be/src/common/config.h index 29e55e6406390e..59fc61e8cb3e5a 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1236,6 +1236,9 @@ DECLARE_mBool(enable_missing_rows_correctness_check); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DECLARE_mInt32(mow_publish_max_discontinuous_version_num); +// When the size of primary keys in memory exceeds this value, finish current segment +// and create a new segment, used in compaction. +DECLARE_mInt64(mow_primary_key_index_max_size_in_memory); // When the version is not continuous for MOW table in publish phase and the gap between // current txn's publishing version and the max version of the tablet exceeds this value, // don't print warning log diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index ce2a6878dba034..d3d55f10dde5fb 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -500,15 +500,18 @@ void Daemon::cache_adjust_capacity_thread() { void Daemon::cache_prune_stale_thread() { int32_t interval = config::cache_periodic_prune_stale_sweep_sec; while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) { - if (interval <= 0) { - LOG(WARNING) << "config of cache clean interval is illegal: [" << interval - << "], force set to 3600 "; - interval = 3600; + if (config::cache_periodic_prune_stale_sweep_sec <= 0) { + LOG(WARNING) << "config of cache clean interval is: [" << interval + << "], it means the cache prune stale thread is disabled, will wait 3s " + "and check again."; + interval = 3; + continue; } if (config::disable_memory_gc) { continue; } CacheManager::instance()->for_each_cache_prune_stale(); + interval = config::cache_periodic_prune_stale_sweep_sec; } } diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp index 39dd45163322ac..4b430f04289d04 100644 --- a/be/src/exec/schema_scanner.cpp +++ b/be/src/exec/schema_scanner.cpp @@ -124,7 +124,6 @@ Status SchemaScanner::get_next_block_async(RuntimeState* state) { } SCOPED_ATTACH_TASK(state); _async_thread_running = true; - _finish_dependency->block(); if (!_opened) { _data_block = vectorized::Block::create_unique(); _init_block(_data_block.get()); @@ -140,9 +139,6 @@ Status SchemaScanner::get_next_block_async(RuntimeState* state) { _eos = eos; _async_thread_running = false; _dependency->set_ready(); - if (eos) { - _finish_dependency->set_ready(); - } })); return Status::OK(); } diff --git a/be/src/exec/schema_scanner.h b/be/src/exec/schema_scanner.h index 440912bff1d729..6e7a229b7fd7b9 100644 --- a/be/src/exec/schema_scanner.h +++ b/be/src/exec/schema_scanner.h @@ -106,11 +106,7 @@ class SchemaScanner { // factory function static std::unique_ptr create(TSchemaTableType::type type); TSchemaTableType::type type() const { return _schema_table_type; } - void set_dependency(std::shared_ptr dep, - std::shared_ptr fin_dep) { - _dependency = dep; - _finish_dependency = fin_dep; - } + void set_dependency(std::shared_ptr dep) { _dependency = dep; } Status get_next_block_async(RuntimeState* state); protected: @@ -139,7 +135,6 @@ class SchemaScanner { RuntimeProfile::Counter* _fill_block_timer = nullptr; std::shared_ptr _dependency = nullptr; - std::shared_ptr _finish_dependency = nullptr; std::unique_ptr _data_block; AtomicStatus _scanner_status; diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.cpp b/be/src/exec/schema_scanner/schema_columns_scanner.cpp index 8325a7f5dc4f2d..b60dfc3d203f89 100644 --- a/be/src/exec/schema_scanner/schema_columns_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_columns_scanner.cpp @@ -450,7 +450,19 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) { RETURN_IF_ERROR(fill_dest_column_for_range(block, 4, datas)); } // COLUMN_DEFAULT - { RETURN_IF_ERROR(fill_dest_column_for_range(block, 5, null_datas)); } + { + std::vector strs(columns_num); + for (int i = 0; i < columns_num; ++i) { + if (_desc_result.columns[i].columnDesc.__isset.defaultValue) { + strs[i] = StringRef(_desc_result.columns[i].columnDesc.defaultValue.c_str(), + _desc_result.columns[i].columnDesc.defaultValue.length()); + datas[i] = strs.data() + i; + } else { + datas[i] = nullptr; + } + } + RETURN_IF_ERROR(fill_dest_column_for_range(block, 5, datas)); + } // IS_NULLABLE { StringRef str_yes = StringRef("YES", 3); diff --git a/be/src/exec/schema_scanner/schema_tables_scanner.cpp b/be/src/exec/schema_scanner/schema_tables_scanner.cpp index 23710b81971c15..3aba0dfcc4f83c 100644 --- a/be/src/exec/schema_scanner/schema_tables_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_tables_scanner.cpp @@ -236,7 +236,7 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) { std::vector srcs(table_num); for (int i = 0; i < table_num; ++i) { const TTableStatus& tbl_status = _table_result.tables[i]; - if (tbl_status.__isset.avg_row_length) { + if (tbl_status.__isset.data_length) { srcs[i] = tbl_status.data_length; datas[i] = srcs.data() + i; } else { @@ -248,7 +248,19 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) { // max_data_length { RETURN_IF_ERROR(fill_dest_column_for_range(block, 10, null_datas)); } // index_length - { RETURN_IF_ERROR(fill_dest_column_for_range(block, 11, null_datas)); } + { + std::vector srcs(table_num); + for (int i = 0; i < table_num; ++i) { + const TTableStatus& tbl_status = _table_result.tables[i]; + if (tbl_status.__isset.index_length) { + srcs[i] = tbl_status.index_length; + datas[i] = srcs.data() + i; + } else { + datas[i] = nullptr; + } + } + RETURN_IF_ERROR(fill_dest_column_for_range(block, 11, datas)); + } // data_free { RETURN_IF_ERROR(fill_dest_column_for_range(block, 12, null_datas)); } // auto_increment diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h index ff749420ad286e..4d221f7bfe8421 100644 --- a/be/src/exprs/bloom_filter_func.h +++ b/be/src/exprs/bloom_filter_func.h @@ -26,17 +26,41 @@ #include "vec/common/string_ref.h" namespace doris { +// there are problems with the implementation of the old datetimev2. for compatibility reason, we will keep this code temporary. +struct fixed_len_to_uint32 { + template + uint32_t operator()(T value) { + if constexpr (sizeof(T) <= sizeof(uint32_t)) { + if constexpr (std::is_same_v>) { + return (uint32_t)value.to_int64(); + } else { + return (uint32_t)value; + } + } + return std::hash()(value); + } +}; + +struct fixed_len_to_uint32_v2 { + template + uint32_t operator()(T value) { + if constexpr (sizeof(T) <= sizeof(uint32_t)) { + if constexpr (std::is_same_v>) { + return (uint32_t)value.to_date_int_val(); + } else { + return (uint32_t)value; + } + } + return std::hash()(value); + } +}; class BloomFilterAdaptor { public: - BloomFilterAdaptor(bool null_aware = false) : _null_aware(null_aware) { + BloomFilterAdaptor(bool null_aware) : _null_aware(null_aware) { _bloom_filter = std::make_shared(); } - static int64_t optimal_bit_num(int64_t expect_num, double fpp) { - return doris::segment_v2::BloomFilter::optimal_bit_num(expect_num, fpp) / 8; - } - static BloomFilterAdaptor* create(bool null_aware) { return new BloomFilterAdaptor(null_aware); } @@ -57,27 +81,23 @@ class BloomFilterAdaptor { size_t size() { return _bloom_filter->directory().size; } - template - bool test(T data) const { - return _bloom_filter->find(data); - } + bool test(uint32_t data) const { return _bloom_filter->find(data); } - // test_element/find_element only used on vectorized engine - template + template bool test_element(T element) const { if constexpr (std::is_same_v) { return _bloom_filter->find(element); } else { - return _bloom_filter->find(HashUtil::fixed_len_to_uint32(element)); + return _bloom_filter->find(fixed_len_to_uint32_method()(element)); } } - template + template void add_element(T element) { if constexpr (std::is_same_v) { _bloom_filter->insert(element); } else { - _bloom_filter->insert(HashUtil::fixed_len_to_uint32(element)); + _bloom_filter->insert(fixed_len_to_uint32_method()(element)); } } @@ -214,6 +234,8 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase { void set_contain_null_and_null_aware() { _bloom_filter->set_contain_null_and_null_aware(); } + void set_enable_fixed_len_to_uint32_v2() { _enable_fixed_len_to_uint32_v2 = true; } + size_t get_size() const { return _bloom_filter ? _bloom_filter->size() : 0; } void light_copy(BloomFilterFuncBase* bloomfilter_func) { @@ -221,6 +243,7 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase { _bloom_filter_alloced = other_func->_bloom_filter_alloced; _bloom_filter = other_func->_bloom_filter; _inited = other_func->_inited; + _enable_fixed_len_to_uint32_v2 |= other_func->_enable_fixed_len_to_uint32_v2; } virtual void insert(const void* data) = 0; @@ -255,9 +278,10 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase { int64_t _runtime_bloom_filter_max_size; bool _build_bf_exactly = false; bool _bloom_filter_size_calculated_by_ndv = false; + bool _enable_fixed_len_to_uint32_v2 = false; }; -template +template uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char* data, const uint8* nullmap, uint16_t* offsets, int number, const bool is_parse_column) { @@ -281,7 +305,8 @@ uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char* dat if (nullmap == nullptr) { for (int i = 0; i < number; i++) { uint16_t idx = offsets[i]; - if (!bloom_filter.test_element(get_element(data, idx))) { + if (!bloom_filter.test_element( + get_element(data, idx))) { continue; } offsets[new_size++] = idx; @@ -294,7 +319,8 @@ uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char* dat continue; } } else { - if (!bloom_filter.test_element(get_element(data, idx))) { + if (!bloom_filter.test_element( + get_element(data, idx))) { continue; } } @@ -304,7 +330,7 @@ uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char* dat } else { if (nullmap == nullptr) { for (int i = 0; i < number; i++) { - if (!bloom_filter.test_element(get_element(data, i))) { + if (!bloom_filter.test_element(get_element(data, i))) { continue; } offsets[new_size++] = i; @@ -316,7 +342,8 @@ uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char* dat continue; } } else { - if (!bloom_filter.test_element(get_element(data, i))) { + if (!bloom_filter.test_element( + get_element(data, i))) { continue; } } @@ -327,16 +354,17 @@ uint16_t find_batch_olap(const BloomFilterAdaptor& bloom_filter, const char* dat return new_size; } -template +template struct CommonFindOp { - uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, - const uint8* nullmap, uint16_t* offsets, int number, - const bool is_parse_column) { - return find_batch_olap(bloom_filter, data, nullmap, offsets, number, is_parse_column); + static uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, + const uint8* nullmap, uint16_t* offsets, int number, + const bool is_parse_column) { + return find_batch_olap(bloom_filter, data, nullmap, offsets, + number, is_parse_column); } - void insert_batch(BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, - size_t start) const { + static void insert_batch(BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, + size_t start) { const auto size = column->size(); if (column->is_nullable()) { const auto* nullable = assert_cast(column.get()); @@ -348,7 +376,7 @@ struct CommonFindOp { const T* data = (T*)col.get_raw_data().data; for (size_t i = start; i < size; i++) { if (!nullmap[i]) { - bloom_filter.add_element(*(data + i)); + bloom_filter.add_element(*(data + i)); } else { bloom_filter.set_contain_null(); } @@ -356,13 +384,13 @@ struct CommonFindOp { } else { const T* data = (T*)column->get_raw_data().data; for (size_t i = start; i < size; i++) { - bloom_filter.add_element(*(data + i)); + bloom_filter.add_element(*(data + i)); } } } - void find_batch(const BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, - uint8_t* results) const { + static void find_batch(const BloomFilterAdaptor& bloom_filter, + const vectorized::ColumnPtr& column, uint8_t* results) { const T* __restrict data = nullptr; const uint8_t* __restrict nullmap = nullptr; if (column->is_nullable()) { @@ -382,31 +410,32 @@ struct CommonFindOp { if (nullmap) { for (size_t i = 0; i < size; i++) { if (!nullmap[i]) { - results[i] = bloom_filter.test_element(data[i]); + results[i] = bloom_filter.test_element(data[i]); } else { results[i] = bloom_filter.contain_null(); } } } else { for (size_t i = 0; i < size; i++) { - results[i] = bloom_filter.test_element(data[i]); + results[i] = bloom_filter.test_element(data[i]); } } } - void insert(BloomFilterAdaptor& bloom_filter, const void* data) const { - bloom_filter.add_element(*(T*)data); + static void insert(BloomFilterAdaptor& bloom_filter, const void* data) { + bloom_filter.add_element(*(T*)data); } }; -struct StringFindOp : CommonFindOp { +template +struct StringFindOp : CommonFindOp { static void insert_batch(BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, size_t start) { auto _insert_batch_col_str = [&](const auto& col, const uint8_t* __restrict nullmap, size_t start, size_t size) { for (size_t i = start; i < size; i++) { if (nullmap == nullptr || !nullmap[i]) { - bloom_filter.add_element(col.get_data_at(i)); + bloom_filter.add_element(col.get_data_at(i)); } else { bloom_filter.set_contain_null(); } @@ -451,20 +480,23 @@ struct StringFindOp : CommonFindOp { if (nullable->has_null()) { for (size_t i = 0; i < col.size(); i++) { if (!nullmap[i]) { - results[i] = bloom_filter.test_element(col.get_data_at(i)); + results[i] = bloom_filter.test_element( + col.get_data_at(i)); } else { results[i] = bloom_filter.contain_null(); } } } else { for (size_t i = 0; i < col.size(); i++) { - results[i] = bloom_filter.test_element(col.get_data_at(i)); + results[i] = bloom_filter.test_element( + col.get_data_at(i)); } } } else { const auto& col = assert_cast(column.get()); for (size_t i = 0; i < col->size(); i++) { - results[i] = bloom_filter.test_element(col->get_data_at(i)); + results[i] = + bloom_filter.test_element(col->get_data_at(i)); } } } @@ -472,34 +504,35 @@ struct StringFindOp : CommonFindOp { // We do not need to judge whether data is empty, because null will not appear // when filer used by the storage engine -struct FixedStringFindOp : public StringFindOp { +template +struct FixedStringFindOp : public StringFindOp { static uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, const uint8* nullmap, uint16_t* offsets, int number, const bool is_parse_column) { - return find_batch_olap(bloom_filter, data, nullmap, offsets, number, - is_parse_column); + return find_batch_olap( + bloom_filter, data, nullmap, offsets, number, is_parse_column); } }; -template +template struct BloomFilterTypeTraits { using T = typename PrimitiveTypeTraits::CppType; - using FindOp = CommonFindOp; + using FindOp = CommonFindOp; }; -template <> -struct BloomFilterTypeTraits { - using FindOp = FixedStringFindOp; +template +struct BloomFilterTypeTraits { + using FindOp = FixedStringFindOp; }; -template <> -struct BloomFilterTypeTraits { - using FindOp = StringFindOp; +template +struct BloomFilterTypeTraits { + using FindOp = StringFindOp; }; -template <> -struct BloomFilterTypeTraits { - using FindOp = StringFindOp; +template +struct BloomFilterTypeTraits { + using FindOp = StringFindOp; }; template @@ -511,16 +544,28 @@ class BloomFilterFunc final : public BloomFilterFuncBase { void insert(const void* data) override { DCHECK(_bloom_filter != nullptr); - dummy.insert(*_bloom_filter, data); + if (_enable_fixed_len_to_uint32_v2) { + OpV2::insert(*_bloom_filter, data); + } else { + Op::insert(*_bloom_filter, data); + } } void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start) override { DCHECK(_bloom_filter != nullptr); - dummy.insert_batch(*_bloom_filter, column, start); + if (_enable_fixed_len_to_uint32_v2) { + OpV2::insert_batch(*_bloom_filter, column, start); + } else { + Op::insert_batch(*_bloom_filter, column, start); + } } void find_fixed_len(const vectorized::ColumnPtr& column, uint8_t* results) override { - dummy.find_batch(*_bloom_filter, column, results); + if (_enable_fixed_len_to_uint32_v2) { + OpV2::find_batch(*_bloom_filter, column, results); + } else { + Op::find_batch(*_bloom_filter, column, results); + } } template @@ -542,12 +587,18 @@ class BloomFilterFunc final : public BloomFilterFuncBase { uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap, uint16_t* offsets, int number, bool is_parse_column) override { - return dummy.find_batch_olap_engine(*_bloom_filter, data, nullmap, offsets, number, - is_parse_column); + if (_enable_fixed_len_to_uint32_v2) { + return OpV2::find_batch_olap_engine(*_bloom_filter, data, nullmap, offsets, number, + is_parse_column); + } else { + return Op::find_batch_olap_engine(*_bloom_filter, data, nullmap, offsets, number, + is_parse_column); + } } private: - typename BloomFilterTypeTraits::FindOp dummy; + using Op = typename BloomFilterTypeTraits::FindOp; + using OpV2 = typename BloomFilterTypeTraits::FindOp; }; } // namespace doris diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index 44f39fb77f6d85..57a8b6376a9fed 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -232,7 +232,7 @@ inline auto create_bitmap_filter(PrimitiveType type) { template ColumnPredicate* create_olap_column_predicate(uint32_t column_id, const std::shared_ptr& filter, - int be_exec_version, const TabletColumn*) { + const TabletColumn*) { std::shared_ptr filter_olap; filter_olap.reset(create_bloom_filter(PT)); filter_olap->light_copy(filter.get()); @@ -243,10 +243,10 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template ColumnPredicate* create_olap_column_predicate(uint32_t column_id, const std::shared_ptr& filter, - int be_exec_version, const TabletColumn*) { + const TabletColumn*) { if constexpr (PT == TYPE_TINYINT || PT == TYPE_SMALLINT || PT == TYPE_INT || PT == TYPE_BIGINT) { - return new BitmapFilterColumnPredicate(column_id, filter, be_exec_version); + return new BitmapFilterColumnPredicate(column_id, filter); } else { throw Exception(ErrorCode::INTERNAL_ERROR, "bitmap filter do not support type {}", PT); } @@ -254,7 +254,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template ColumnPredicate* create_olap_column_predicate(uint32_t column_id, - const std::shared_ptr& filter, int, + const std::shared_ptr& filter, const TabletColumn* column = nullptr) { return create_in_list_predicate(column_id, filter, column->length()); @@ -262,7 +262,7 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template ColumnPredicate* create_olap_column_predicate(uint32_t column_id, - const std::shared_ptr& filter, int, + const std::shared_ptr& filter, const TabletColumn* column = nullptr) { // currently only support like predicate if constexpr (PT == TYPE_CHAR) { @@ -277,22 +277,19 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, template ColumnPredicate* create_column_predicate(uint32_t column_id, const std::shared_ptr& filter, - FieldType type, int be_exec_version, - const TabletColumn* column = nullptr) { + FieldType type, const TabletColumn* column = nullptr) { switch (type) { -#define M(NAME) \ - case FieldType::OLAP_FIELD_##NAME: { \ - return create_olap_column_predicate(column_id, filter, be_exec_version, column); \ +#define M(NAME) \ + case FieldType::OLAP_FIELD_##NAME: { \ + return create_olap_column_predicate(column_id, filter, column); \ } APPLY_FOR_PRIMTYPE(M) #undef M case FieldType::OLAP_FIELD_TYPE_DECIMAL: { - return create_olap_column_predicate(column_id, filter, be_exec_version, - column); + return create_olap_column_predicate(column_id, filter, column); } case FieldType::OLAP_FIELD_TYPE_BOOL: { - return create_olap_column_predicate(column_id, filter, be_exec_version, - column); + return create_olap_column_predicate(column_id, filter, column); } default: return nullptr; diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index c983af0fb3ea71..8f297d7074ff12 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -740,6 +740,12 @@ class RuntimePredicateWrapper { return Status::OK(); } + void set_enable_fixed_len_to_uint32_v2() { + if (is_bloomfilter()) { + _context->bloom_filter_func->set_enable_fixed_len_to_uint32_v2(); + } + } + // used by shuffle runtime filter // assign this filter by protobuf Status assign(const PBloomFilter* bloom_filter, butil::IOBufAsZeroCopyInputStream* data, @@ -975,11 +981,10 @@ class RuntimePredicateWrapper { Status IRuntimeFilter::create(RuntimeFilterParamsContext* state, const TRuntimeFilterDesc* desc, const TQueryOptions* query_options, const RuntimeFilterRole role, - int node_id, std::shared_ptr* res, - bool build_bf_exactly) { + int node_id, std::shared_ptr* res) { *res = std::make_shared(state, desc); (*res)->set_role(role); - return (*res)->init_with_desc(desc, query_options, node_id, build_bf_exactly); + return (*res)->init_with_desc(desc, query_options, node_id); } RuntimeFilterContextSPtr& IRuntimeFilter::get_shared_context_ref() { @@ -1348,7 +1353,7 @@ std::string IRuntimeFilter::formatted_state() const { } Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQueryOptions* options, - int node_id, bool build_bf_exactly) { + int node_id) { // if node_id == -1 , it shouldn't be a consumer DCHECK(node_id >= 0 || (node_id == -1 && !is_consumer())); @@ -1358,6 +1363,8 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue _expr_order = desc->expr_order; vectorized::VExprContextSPtr build_ctx; RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(desc->src_expr, build_ctx)); + _enable_fixed_len_to_uint32_v2 = options->__isset.enable_fixed_len_to_uint32_v2 && + options->enable_fixed_len_to_uint32_v2; RuntimeFilterParams params; params.filter_id = _filter_id; @@ -1370,21 +1377,10 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue params.runtime_bloom_filter_max_size = options->__isset.runtime_bloom_filter_max_size ? options->runtime_bloom_filter_max_size : 0; - auto sync_filter_size = desc->__isset.sync_filter_size && desc->sync_filter_size; - // We build runtime filter by exact distinct count if all of 3 conditions are met: - // 1. Only 1 join key - // 2. Bloom filter - // 3. Size of all bloom filters will be same (size will be sync or this is a broadcast join). - params.build_bf_exactly = - build_bf_exactly && (_runtime_filter_type == RuntimeFilterType::BLOOM_FILTER || - _runtime_filter_type == RuntimeFilterType::IN_OR_BLOOM_FILTER); + params.build_bf_exactly = desc->__isset.build_bf_exactly && desc->build_bf_exactly; params.bloom_filter_size_calculated_by_ndv = desc->bloom_filter_size_calculated_by_ndv; - if (!sync_filter_size) { - params.build_bf_exactly &= !_is_broadcast_join; - } - if (desc->__isset.bloom_filter_size_bytes) { params.bloom_filter_size = desc->bloom_filter_size_bytes; } @@ -1419,7 +1415,11 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue } _wrapper = std::make_shared(¶ms); - return _wrapper->init(¶ms); + RETURN_IF_ERROR(_wrapper->init(¶ms)); + if (_enable_fixed_len_to_uint32_v2) { + _wrapper->set_enable_fixed_len_to_uint32_v2(); + } + return Status::OK(); } Status IRuntimeFilter::serialize(PMergeFilterRequest* request, void** data, int* len) { @@ -1544,7 +1544,7 @@ std::string IRuntimeFilter::debug_string() const { return fmt::format( "RuntimeFilter: (id = {}, type = {}, is_broadcast: {}, ignored: {}, " "build_bf_cardinality: {}, dependency: {}, synced_size: {}, has_local_target: {}, " - "has_remote_target: {},error_msg: [{}]", + "has_remote_target: {}, error_msg: [{}]", _filter_id, to_string(_runtime_filter_type), _is_broadcast_join, _wrapper->_context->ignored, _wrapper->get_build_bf_cardinality(), _dependency ? _dependency->debug_string() : "none", _synced_size, _has_local_target, @@ -1616,6 +1616,9 @@ void IRuntimeFilter::update_filter(std::shared_ptr wrap wrapper->_column_return_type = _wrapper->_column_return_type; } _wrapper = wrapper; + if (_enable_fixed_len_to_uint32_v2) { + _wrapper->set_enable_fixed_len_to_uint32_v2(); + } update_runtime_filter_type_to_profile(local_merge_time); signal(); } diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index a1fdfbf07d239d..b0e82a75335cc5 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -213,8 +213,7 @@ class IRuntimeFilter { static Status create(RuntimeFilterParamsContext* state, const TRuntimeFilterDesc* desc, const TQueryOptions* query_options, const RuntimeFilterRole role, - int node_id, std::shared_ptr* res, - bool build_bf_exactly = false); + int node_id, std::shared_ptr* res); RuntimeFilterContextSPtr& get_shared_context_ref(); @@ -260,7 +259,7 @@ class IRuntimeFilter { // init filter with desc Status init_with_desc(const TRuntimeFilterDesc* desc, const TQueryOptions* options, - int node_id = -1, bool build_bf_exactly = false); + int node_id = -1); // serialize _wrapper to protobuf Status serialize(PMergeFilterRequest* request, void** data, int* len); @@ -355,8 +354,9 @@ class IRuntimeFilter { const std::shared_ptr& dependency); int64_t get_synced_size() const { - if (_synced_size == -1) { - throw Status::InternalError("sync filter size meet error, filter: {}", debug_string()); + if (_synced_size == -1 || !_dependency) { + throw Exception(doris::ErrorCode::INTERNAL_ERROR, + "sync filter size meet error, filter: {}", debug_string()); } return _synced_size; } @@ -424,6 +424,8 @@ class IRuntimeFilter { int64_t _synced_size = -1; std::shared_ptr _dependency; + + bool _enable_fixed_len_to_uint32_v2 = false; }; // avoid expose RuntimePredicateWrapper diff --git a/be/src/http/http_handler_with_auth.cpp b/be/src/http/http_handler_with_auth.cpp index 518b9868de191e..ae5c024e76d093 100644 --- a/be/src/http/http_handler_with_auth.cpp +++ b/be/src/http/http_handler_with_auth.cpp @@ -35,6 +35,7 @@ HttpHandlerWithAuth::HttpHandlerWithAuth(ExecEnv* exec_env, TPrivilegeHier::type : _exec_env(exec_env), _hier(hier), _type(type) {} int HttpHandlerWithAuth::on_header(HttpRequest* req) { + //if u return value isn't 0,u should `send_reply`,Avoid requesting links that never return. TCheckAuthRequest auth_request; TCheckAuthResult auth_result; AuthInfo auth_info; @@ -83,6 +84,11 @@ int HttpHandlerWithAuth::on_header(HttpRequest* req) { #ifndef BE_TEST TNetworkAddress master_addr = _exec_env->cluster_info()->master_fe_addr; + if (master_addr.hostname.empty() || master_addr.port == 0) { + LOG(WARNING) << "Not found master fe, Can't auth API request: " << req->debug_string(); + HttpChannel::send_error(req, HttpStatus::SERVICE_UNAVAILABLE); + return -1; + } { auto status = ThriftRpcHelper::rpc( master_addr.hostname, master_addr.port, @@ -90,6 +96,10 @@ int HttpHandlerWithAuth::on_header(HttpRequest* req) { client->checkAuth(auth_result, auth_request); }); if (!status) { + LOG(WARNING) << "CheckAuth Rpc Fail.Fe Ip:" << master_addr.hostname + << ", Fe port:" << master_addr.port << ".Status:" << status.to_string() + << ".Request: " << req->debug_string(); + HttpChannel::send_error(req, HttpStatus::SERVICE_UNAVAILABLE); return -1; } } @@ -98,6 +108,7 @@ int HttpHandlerWithAuth::on_header(HttpRequest* req) { auth_result.status.status_code = TStatusCode::type::OK; auth_result.status.error_msgs.clear(); } else { + HttpChannel::send_reply(req, HttpStatus::FORBIDDEN); return -1; } #endif diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index a499a27b07f6e2..82dc122e19f5ef 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -376,7 +376,7 @@ Status BaseTablet::calc_delete_bitmap_between_segments( seq_col_length = _tablet_meta->tablet_schema()->column(seq_col_idx).length() + 1; } size_t rowid_length = 0; - if (!_tablet_meta->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet_meta->tablet_schema()->cluster_key_uids().empty()) { rowid_length = PrimaryKeyIndexReader::ROW_ID_LENGTH; } @@ -438,7 +438,6 @@ Status BaseTablet::lookup_row_data(const Slice& encoded_key, const RowLocation& StringRef value = string_column->get_data_at(0); values = value.to_string(); if (write_to_cache) { - StringRef value = string_column->get_data_at(0); RowCache::instance()->insert({tablet_id(), encoded_key}, Slice {value.data, value.size}); } return Status::OK(); @@ -461,7 +460,7 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest seq_col_length = schema->column(schema->sequence_col_idx()).length() + 1; } size_t rowid_length = 0; - if (with_rowid && !schema->cluster_key_idxes().empty()) { + if (with_rowid && !schema->cluster_key_uids().empty()) { rowid_length = PrimaryKeyIndexReader::ROW_ID_LENGTH; } Slice key_without_seq = @@ -476,12 +475,12 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest int num_segments = cast_set(rs->num_segments()); DCHECK_EQ(segments_key_bounds.size(), num_segments); std::vector picked_segments; - for (int i = num_segments - 1; i >= 0; i--) { - if (key_without_seq.compare(segments_key_bounds[i].max_key()) > 0 || - key_without_seq.compare(segments_key_bounds[i].min_key()) < 0) { + for (int j = num_segments - 1; j >= 0; j--) { + if (key_without_seq.compare(segments_key_bounds[j].max_key()) > 0 || + key_without_seq.compare(segments_key_bounds[j].min_key()) < 0) { continue; } - picked_segments.emplace_back(i); + picked_segments.emplace_back(j); } if (picked_segments.empty()) { continue; @@ -654,7 +653,7 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, Slice key = Slice(index_column->get_data_at(i).data, index_column->get_data_at(i).size); RowLocation loc; // calculate row id - if (!_tablet_meta->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet_meta->tablet_schema()->cluster_key_uids().empty()) { size_t seq_col_length = 0; if (_tablet_meta->tablet_schema()->has_sequence_col()) { seq_col_length = @@ -778,11 +777,11 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, if (config::enable_merge_on_write_correctness_check) { RowsetIdUnorderedSet rowsetids; - for (const auto& rowset : specified_rowsets) { - rowsetids.emplace(rowset->rowset_id()); + for (const auto& specified_rowset : specified_rowsets) { + rowsetids.emplace(specified_rowset->rowset_id()); VLOG_NOTICE << "[tabletID:" << tablet_id() << "]" << "[add_sentinel_mark_to_delete_bitmap][end_version:" << end_version << "]" - << "add:" << rowset->rowset_id(); + << "add:" << specified_rowset->rowset_id(); } add_sentinel_mark_to_delete_bitmap(delete_bitmap.get(), rowsetids); } @@ -892,11 +891,11 @@ Status BaseTablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, std::vector default_values; default_values.resize(cids.size()); for (int i = 0; i < cids.size(); ++i) { - const TabletColumn& column = tablet_schema.column(cids[i]); + const TabletColumn& tablet_column = tablet_schema.column(cids[i]); vectorized::DataTypePtr type = - vectorized::DataTypeFactory::instance().create_data_type(column); - col_uid_to_idx[column.unique_id()] = i; - default_values[i] = column.default_value(); + vectorized::DataTypeFactory::instance().create_data_type(tablet_column); + col_uid_to_idx[tablet_column.unique_id()] = i; + default_values[i] = tablet_column.default_value(); serdes[i] = type->get_serde(); } vectorized::JsonbSerializeUtil::jsonb_to_block(serdes, *string_column, col_uid_to_idx, block, @@ -1326,12 +1325,12 @@ Status BaseTablet::check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap required_rowsets_arr.PushBack(value, required_rowsets_arr.GetAllocator()); } } else { - std::vector rowsets; + std::vector tablet_rowsets; { std::shared_lock meta_rlock(_meta_lock); - rowsets = get_rowset_by_ids(&rowset_ids); + tablet_rowsets = get_rowset_by_ids(&rowset_ids); } - for (const auto& rowset : rowsets) { + for (const auto& rowset : tablet_rowsets) { rapidjson::Value value; std::string version_str = rowset->get_rowset_info_str(); value.SetString(version_str.c_str(), @@ -1439,12 +1438,12 @@ Status BaseTablet::update_delete_bitmap(const BaseTabletSPtr& self, TabletTxnInf txn_info->partial_update_info->max_version_in_flush_phase; DCHECK(max_version_in_flush_phase != -1); std::vector remained_rowsets; - for (const auto& rowset : specified_rowsets) { - if (rowset->end_version() <= max_version_in_flush_phase && - rowset->produced_by_compaction()) { - rowsets_skip_alignment.emplace_back(rowset); + for (const auto& specified_rowset : specified_rowsets) { + if (specified_rowset->end_version() <= max_version_in_flush_phase && + specified_rowset->produced_by_compaction()) { + rowsets_skip_alignment.emplace_back(specified_rowset); } else { - remained_rowsets.emplace_back(rowset); + remained_rowsets.emplace_back(specified_rowset); } } if (!rowsets_skip_alignment.empty()) { @@ -1604,10 +1603,6 @@ Status BaseTablet::check_rowid_conversion( VLOG_DEBUG << "check_rowid_conversion, location_map is empty"; return Status::OK(); } - if (!tablet_schema()->cluster_key_idxes().empty()) { - VLOG_DEBUG << "skip check_rowid_conversion for mow tables with cluster keys"; - return Status::OK(); - } std::vector dst_segments; RETURN_IF_ERROR( @@ -1762,7 +1757,7 @@ std::vector BaseTablet::get_snapshot_rowset(bool include_stale_ void BaseTablet::calc_consecutive_empty_rowsets( std::vector* empty_rowsets, - const std::vector& candidate_rowsets, int limit) { + const std::vector& candidate_rowsets, int64_t limit) { int len = cast_set(candidate_rowsets.size()); for (int i = 0; i < len - 1; ++i) { auto rowset = candidate_rowsets[i]; diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index f961f4c49eedd6..c6de447200f87c 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -276,10 +276,13 @@ class BaseTablet { // Find the first consecutive empty rowsets. output->size() >= limit void calc_consecutive_empty_rowsets(std::vector* empty_rowsets, const std::vector& candidate_rowsets, - int limit); + int64_t limit); // Return the merged schema of all rowsets - virtual TabletSchemaSPtr merged_tablet_schema() const { return _max_version_schema; } + virtual TabletSchemaSPtr merged_tablet_schema() const { + std::shared_lock rlock(_meta_lock); + return _max_version_schema; + } void traverse_rowsets(std::function visitor, bool include_stale = false) { diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index 716c99927bf2d6..8d89c7a31fb271 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -37,7 +37,7 @@ class BitmapFilterColumnPredicate : public ColumnPredicate { using SpecificFilter = BitmapFilterFunc; BitmapFilterColumnPredicate(uint32_t column_id, - const std::shared_ptr& filter, int) + const std::shared_ptr& filter) : ColumnPredicate(column_id), _filter(filter), _specific_filter(assert_cast(_filter.get())) {} diff --git a/be/src/olap/bloom_filter.hpp b/be/src/olap/bloom_filter.hpp deleted file mode 100644 index 5c7cb5f9e6419f..00000000000000 --- a/be/src/olap/bloom_filter.hpp +++ /dev/null @@ -1,272 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_BLOOM_FILTER_HPP -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_BLOOM_FILTER_HPP - -#include - -#include -#include - -#include "olap/olap_define.h" -#include "olap/utils.h" -#include "util/hash_util.hpp" - -namespace doris { - -static const uint64_t DEFAULT_SEED = 104729; -static const uint64_t BLOOM_FILTER_NULL_HASHCODE = 2862933555777941757ULL; - -struct BloomFilterIndexHeader { - uint64_t block_count; - BloomFilterIndexHeader() : block_count(0) {} -} __attribute__((packed)); - -// Bare metal bit set implementation. For performance reasons, this implementation does not -// check for index bounds nor expand the bit set if the specified index is greater than the size. -class BitSet { -public: - BitSet() : _data(nullptr), _data_len(0) {} - - ~BitSet() { SAFE_DELETE_ARRAY(_data); } - - // Init BitSet with given bit_num, which will align up to uint64_t - bool init(uint32_t bit_num) { - if (bit_num <= 0) { - return false; - } - - _data_len = (bit_num + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); - _data = new (std::nothrow) uint64_t[_data_len]; - if (_data == nullptr) { - return false; - } - - memset(_data, 0, _data_len * sizeof(uint64_t)); - return true; - } - - // Init BitSet with given buffer - bool init(uint64_t* data, uint32_t data_len) { - _data = data; - _data_len = data_len; - return true; - } - - // Set the bit specified by param, note that uint64_t type contains 2^6 bits - void set(uint32_t index) { _data[index >> 6] |= 1L << (index % 64); } - - // Return true if the bit specified by param is set - bool get(uint32_t index) const { return (_data[index >> 6] & (1L << (index % 64))) != 0; } - - // Merge with another BitSet by byte, return false when the length is not equal - bool merge(const BitSet& set) { - if (_data_len != set.data_len()) { - return false; - } - - for (uint32_t i = 0; i < _data_len; ++i) { - _data[i] |= set.data()[i]; - } - - return true; - } - - // Convert BitSet to string to convenient debug and test - std::string to_string() const { - uint32_t bit_num = _data_len * sizeof(uint64_t) * 8; - std::string str(bit_num, '0'); - for (uint32_t i = 0; i < bit_num; ++i) { - if ((_data[i >> 6] & (1L << i)) != 0) { - str[i] = '1'; - } - } - - return str; - } - - uint64_t* data() const { return _data; } - - uint32_t data_len() const { return _data_len; } - - uint32_t bit_num() const { return _data_len * sizeof(uint64_t) * 8; } - - void clear() { memset(_data, 0, _data_len * sizeof(uint64_t)); } - - void reset() { - _data = NULL; - _data_len = 0; - } - -private: - uint64_t* _data; - uint32_t _data_len; -}; - -class BloomFilter { -public: - BloomFilter() : _bit_num(0), _hash_function_num(0) {} - ~BloomFilter() {} - - // Create BloomFilter with given entry num and fpp, which is used for loading data - bool init(int64_t expected_entries, double fpp) { - uint32_t bit_num = _optimal_bit_num(expected_entries, fpp); - if (!_bit_set.init(bit_num)) { - return false; - } - - _bit_num = _bit_set.bit_num(); - _hash_function_num = _optimal_hash_function_num(expected_entries, _bit_num); - return true; - } - - // Create BloomFilter with given entry num and default fpp - bool init(int64_t expected_entries) { - return this->init(expected_entries, BLOOM_FILTER_DEFAULT_FPP); - } - - // Init BloomFilter with given buffer, which is used for query - bool init(uint64_t* data, uint32_t len, uint32_t hash_function_num) { - _bit_num = sizeof(uint64_t) * 8 * len; - _hash_function_num = hash_function_num; - return _bit_set.init(data, len); - } - - // Compute hash value of given buffer and add to BloomFilter - void add_bytes(const char* buf, uint32_t len) { - uint64_t hash = buf == nullptr ? BLOOM_FILTER_NULL_HASHCODE - : HashUtil::hash64(buf, len, DEFAULT_SEED); - add_hash(hash); - } - - // Generate multiple hash value according to following rule: - // new_hash_value = hash_high_part + (i * hash_low_part) - void add_hash(uint64_t hash) { - uint32_t hash1 = (uint32_t)hash; - uint32_t hash2 = (uint32_t)(hash >> 32); - - for (uint32_t i = 0; i < _hash_function_num; ++i) { - uint64_t combine_hash = hash1 + hash2 * i; - uint32_t index = combine_hash % _bit_num; - _bit_set.set(index); - } - } - - // Compute hash value of given buffer and verify whether exist in BloomFilter - bool test_bytes(const char* buf, uint32_t len) const { - uint64_t hash = buf == nullptr ? BLOOM_FILTER_NULL_HASHCODE - : HashUtil::hash64(buf, len, DEFAULT_SEED); - return test_hash(hash); - } - - // Verify whether hash value in BloomFilter - bool test_hash(uint64_t hash) const { - uint32_t hash1 = (uint32_t)hash; - uint32_t hash2 = (uint32_t)(hash >> 32); - - for (uint32_t i = 0; i < _hash_function_num; ++i) { - uint64_t combine_hash = hash1 + hash2 * i; - uint32_t index = combine_hash % _bit_num; - if (!_bit_set.get(index)) { - return false; - } - } - - return true; - } - - // Merge with another BloomFilter, return false when the length - // and hash function number is not equal - bool merge(const BloomFilter& that) { - if (_bit_num == that.bit_num() && _hash_function_num == that.hash_function_num()) { - _bit_set.merge(that.bit_set()); - return true; - } - - return false; - } - - void clear() { _bit_set.clear(); } - - void reset() { - _bit_num = 0; - _hash_function_num = 0; - _bit_set.reset(); - } - - uint32_t bit_num() const { return _bit_num; } - - uint32_t hash_function_num() const { return _hash_function_num; } - - const BitSet& bit_set() const { return _bit_set; } - - uint64_t* bit_set_data() const { return _bit_set.data(); } - - uint32_t bit_set_data_len() const { return _bit_set.data_len(); } - - // Convert BloomFilter to string to convenient debug and test - std::string to_string() const { - std::stringstream bf_stream; - bf_stream << "bit_num:" << _bit_num << " hash_function_num:" << _hash_function_num - << " bit_set:" << _bit_set.to_string(); - return bf_stream.str(); - } - - // Get points which set by given buffer in the BitSet - std::string get_bytes_points_string(const char* buf, uint32_t len) const { - uint64_t hash = buf == nullptr ? BLOOM_FILTER_NULL_HASHCODE - : HashUtil::hash64(buf, len, DEFAULT_SEED); - uint32_t hash1 = (uint32_t)hash; - uint32_t hash2 = (uint32_t)(hash >> 32); - - std::stringstream stream; - for (uint32_t i = 0; i < _hash_function_num; ++i) { - if (i != 0) { - stream << "-"; - } - - uint32_t combine_hash = hash1 + hash2 * i; - uint32_t index = combine_hash % _bit_num; - stream << index; - } - - return stream.str(); - } - -private: - // Compute the optimal bit number according to the following rule: - // m = -n * ln(fpp) / (ln(2) ^ 2) - uint32_t _optimal_bit_num(int64_t n, double fpp) { - return (uint32_t)(-n * log(fpp) / (log(2) * log(2))); - } - - // Compute the optimal hash function number according to the following rule: - // k = round(m * ln(2) / n) - uint32_t _optimal_hash_function_num(int64_t n, uint32_t m) { - uint32_t k = (uint32_t)round(m * log(2) / n); - return k > 1 ? k : 1; - } - - BitSet _bit_set; - uint32_t _bit_num; - uint32_t _hash_function_num; -}; - -} // namespace doris - -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_BLOOM_FILTER_HPP diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index e71e1862dc8dbb..8c45c20f799427 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -191,11 +191,14 @@ Status Compaction::merge_input_rowsets() { SCOPED_TIMER(_merge_rowsets_latency_timer); // 1. Merge segment files and write bkd inverted index if (_is_vertical) { + if (!_tablet->tablet_schema()->cluster_key_uids().empty()) { + RETURN_IF_ERROR(update_delete_bitmap()); + } res = Merger::vertical_merge_rowsets(_tablet, compaction_type(), *_cur_tablet_schema, input_rs_readers, _output_rs_writer.get(), get_avg_segment_rows(), way_num, &_stats); } else { - if (!_tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet->tablet_schema()->cluster_key_uids().empty()) { return Status::InternalError( "mow table with cluster keys does not support non vertical compaction"); } @@ -872,6 +875,60 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { } } +Status CompactionMixin::update_delete_bitmap() { + // for mow with cluster keys, compaction read data with delete bitmap + // if tablet is not ready(such as schema change), we need to update delete bitmap + { + std::shared_lock meta_rlock(_tablet->get_header_lock()); + if (_tablet->tablet_state() != TABLET_NOTREADY) { + return Status::OK(); + } + } + OlapStopWatch watch; + std::vector rowsets; + for (const auto& rowset : _input_rowsets) { + std::lock_guard rwlock(tablet()->get_rowset_update_lock()); + std::shared_lock rlock(_tablet->get_header_lock()); + Status st = _tablet->update_delete_bitmap_without_lock(_tablet, rowset, &rowsets); + if (!st.ok()) { + LOG(INFO) << "failed update_delete_bitmap_without_lock for tablet_id=" + << _tablet->tablet_id() << ", st=" << st.to_string(); + return st; + } + rowsets.push_back(rowset); + } + LOG(INFO) << "finish update delete bitmap for tablet: " << _tablet->tablet_id() + << ", rowsets: " << _input_rowsets.size() << ", cost: " << watch.get_elapse_time_us() + << "(us)"; + return Status::OK(); +} + +Status CloudCompactionMixin::update_delete_bitmap() { + // for mow with cluster keys, compaction read data with delete bitmap + // if tablet is not ready(such as schema change), we need to update delete bitmap + { + std::shared_lock meta_rlock(_tablet->get_header_lock()); + if (_tablet->tablet_state() != TABLET_NOTREADY) { + return Status::OK(); + } + } + OlapStopWatch watch; + std::vector rowsets; + for (const auto& rowset : _input_rowsets) { + Status st = _tablet->update_delete_bitmap_without_lock(_tablet, rowset, &rowsets); + if (!st.ok()) { + LOG(INFO) << "failed update_delete_bitmap_without_lock for tablet_id=" + << _tablet->tablet_id() << ", st=" << st.to_string(); + return st; + } + rowsets.push_back(rowset); + } + LOG(INFO) << "finish update delete bitmap for tablet: " << _tablet->tablet_id() + << ", rowsets: " << _input_rowsets.size() << ", cost: " << watch.get_elapse_time_us() + << "(us)"; + return Status::OK(); +} + Status CompactionMixin::construct_output_rowset_writer(RowsetWriterContext& ctx) { // only do index compaction for dup_keys and unique_keys with mow enabled if (config::inverted_index_compaction_enable && @@ -908,7 +965,8 @@ Status CompactionMixin::modify_rowsets() { LOG(INFO) << "RowLocation Set inited succ for tablet:" << _tablet->tablet_id(); } std::unique_ptr> location_map; - if (config::enable_rowid_conversion_correctness_check) { + if (config::enable_rowid_conversion_correctness_check && + tablet()->tablet_schema()->cluster_key_uids().empty()) { location_map = std::make_unique>(); LOG(INFO) << "Location Map inited succ for tablet:" << _tablet->tablet_id(); } @@ -925,7 +983,7 @@ Status CompactionMixin::modify_rowsets() { if (missed_rows) { missed_rows_size = missed_rows->size(); std::size_t merged_missed_rows_size = _stats.merged_rows; - if (!_tablet->tablet_meta()->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet->tablet_meta()->tablet_schema()->cluster_key_uids().empty()) { merged_missed_rows_size += _stats.filtered_rows; } if (_tablet->tablet_state() == TABLET_RUNNING && diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h index 06ef4268529247..7f92a6c5f4d7dc 100644 --- a/be/src/olap/compaction.h +++ b/be/src/olap/compaction.h @@ -84,6 +84,8 @@ class Compaction { int64_t merge_way_num(); + virtual Status update_delete_bitmap() = 0; + // the root tracker for this compaction std::shared_ptr _mem_tracker; @@ -146,6 +148,8 @@ class CompactionMixin : public Compaction { virtual Status modify_rowsets(); + Status update_delete_bitmap() override; + StorageEngine& _engine; private: @@ -175,6 +179,8 @@ class CloudCompactionMixin : public Compaction { protected: CloudTablet* cloud_tablet() { return static_cast(_tablet.get()); } + Status update_delete_bitmap() override; + virtual void garbage_collection(); CloudStorageEngine& _engine; diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index b961c694ede4d0..2dfd30fb86ed9a 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -145,7 +145,7 @@ Status CumulativeCompaction::pick_rowsets_to_compact() { DCHECK(missing_versions.size() % 2 == 0); LOG(WARNING) << "There are missed versions among rowsets. " << "total missed version size: " << missing_versions.size() / 2 - << " first missed version prev rowset verison=" << missing_versions[0] + << ", first missed version prev rowset verison=" << missing_versions[0] << ", first missed version next rowset version=" << missing_versions[1] << ", tablet=" << _tablet->tablet_id(); } diff --git a/be/src/olap/delta_writer_v2.h b/be/src/olap/delta_writer_v2.h index beeb3d3ecd3ec5..f9c2800a68f499 100644 --- a/be/src/olap/delta_writer_v2.h +++ b/be/src/olap/delta_writer_v2.h @@ -46,7 +46,6 @@ namespace doris { class FlushToken; class MemTable; -class MemTracker; class Schema; class StorageEngine; class TupleDescriptor; diff --git a/be/src/olap/inverted_index_parser.cpp b/be/src/olap/inverted_index_parser.cpp index f7e511970d91f2..f1de5a5e0c10fc 100644 --- a/be/src/olap/inverted_index_parser.cpp +++ b/be/src/olap/inverted_index_parser.cpp @@ -136,4 +136,13 @@ std::string get_parser_stopwords_from_properties( } } +std::string get_parser_dict_compression_from_properties( + const std::map& properties) { + if (properties.find(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY) != properties.end()) { + return properties.at(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY); + } else { + return ""; + } +} + } // namespace doris diff --git a/be/src/olap/inverted_index_parser.h b/be/src/olap/inverted_index_parser.h index 0b8426d74c7ab3..f1f85995a206a8 100644 --- a/be/src/olap/inverted_index_parser.h +++ b/be/src/olap/inverted_index_parser.h @@ -83,6 +83,8 @@ const std::string INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case"; const std::string INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords"; +const std::string INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY = "dict_compression"; + std::string inverted_index_parser_type_to_string(InvertedIndexParserType parser_type); InvertedIndexParserType get_inverted_index_parser_type_from_string(const std::string& parser_str); @@ -119,4 +121,7 @@ std::string get_parser_lowercase_from_properties( std::string get_parser_stopwords_from_properties( const std::map& properties); +std::string get_parser_dict_compression_from_properties( + const std::map& properties); + } // namespace doris diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 5db3d89378bcc2..765f67a07c7884 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -350,7 +350,7 @@ Status MemTable::_sort_by_cluster_keys() { } Tie tie = Tie(0, mutable_block.rows()); - for (auto cid : _tablet_schema->cluster_key_idxes()) { + for (auto cid : _tablet_schema->cluster_key_uids()) { auto index = _tablet_schema->field_index(cid); if (index == -1) { return Status::InternalError("could not find cluster key column with unique_id=" + @@ -619,7 +619,7 @@ Status MemTable::_to_block(std::unique_ptr* res) { (_skip_bitmap_col_idx == -1) ? _aggregate() : _aggregate(); } if (_keys_type == KeysType::UNIQUE_KEYS && _enable_unique_key_mow && - !_tablet_schema->cluster_key_idxes().empty()) { + !_tablet_schema->cluster_key_uids().empty()) { if (_partial_update_mode != UniqueKeyUpdateModePB::UPSERT) { return Status::InternalError( "Partial update for mow with cluster keys is not supported"); diff --git a/be/src/olap/memtable_writer.h b/be/src/olap/memtable_writer.h index fb07e740fa3cf6..713400793a1754 100644 --- a/be/src/olap/memtable_writer.h +++ b/be/src/olap/memtable_writer.h @@ -45,7 +45,6 @@ namespace doris { class FlushToken; class MemTable; -class MemTracker; class StorageEngine; class TupleDescriptor; class SlotDescriptor; diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index b207cc4c5ad22b..975aaa0bca3de5 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -86,7 +86,7 @@ Status Merger::vmerge_rowsets(BaseTabletSPtr tablet, ReaderType reader_type, merge_tablet_schema->merge_dropped_columns(*del_pred_rs->tablet_schema()); } reader_params.tablet_schema = merge_tablet_schema; - if (!tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!tablet->tablet_schema()->cluster_key_uids().empty()) { reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); } @@ -173,8 +173,8 @@ void Merger::vertical_split_columns(const TabletSchema& tablet_schema, if (delete_sign_idx != -1) { key_columns.emplace_back(delete_sign_idx); } - if (!tablet_schema.cluster_key_idxes().empty()) { - for (const auto& cid : tablet_schema.cluster_key_idxes()) { + if (!tablet_schema.cluster_key_uids().empty()) { + for (const auto& cid : tablet_schema.cluster_key_uids()) { auto idx = tablet_schema.field_index(cid); DCHECK(idx >= 0) << "could not find cluster key column with unique_id=" << cid << " in tablet schema, table_id=" << tablet_schema.table_id(); @@ -186,7 +186,7 @@ void Merger::vertical_split_columns(const TabletSchema& tablet_schema, // cluster key unique ids: [3, 1, 4] // the key_columns should be [0, 1, 3, 5] // the key_group_cluster_key_idxes should be [2, 1, 3] - for (const auto& cid : tablet_schema.cluster_key_idxes()) { + for (const auto& cid : tablet_schema.cluster_key_uids()) { auto idx = tablet_schema.field_index(cid); for (auto i = 0; i < key_columns.size(); ++i) { if (idx == key_columns[i]) { @@ -261,7 +261,7 @@ Status Merger::vertical_compact_one_group( reader_params.tablet_schema = merge_tablet_schema; bool has_cluster_key = false; - if (!tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!tablet->tablet_schema()->cluster_key_uids().empty()) { reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); has_cluster_key = true; } diff --git a/be/src/olap/metadata_adder.h b/be/src/olap/metadata_adder.h index 559c5db873b2f6..5b5ba16322490e 100644 --- a/be/src/olap/metadata_adder.h +++ b/be/src/olap/metadata_adder.h @@ -20,6 +20,8 @@ #include #include +#include "runtime/exec_env.h" +#include "runtime/memory/mem_tracker_limiter.h" #include "util/runtime_profile.h" namespace doris { @@ -27,8 +29,8 @@ namespace doris { inline bvar::Adder g_rowset_meta_mem_bytes("doris_rowset_meta_mem_bytes"); inline bvar::Adder g_rowset_meta_num("doris_rowset_meta_num"); -inline bvar::Adder g_all_rowsets_mem_bytes("doris_all_rowsets_mem_bytes"); -inline bvar::Adder g_all_rowsets_num("doris_all_rowsets_num"); +inline bvar::Adder g_rowset_mem_bytes("doris_rowset_mem_bytes"); +inline bvar::Adder g_rowset_num("doris_rowset_num"); inline bvar::Adder g_tablet_meta_mem_bytes("doris_tablet_meta_mem_bytes"); inline bvar::Adder g_tablet_meta_num("doris_tablet_meta_num"); @@ -42,8 +44,9 @@ inline bvar::Adder g_tablet_index_num("doris_tablet_index_num"); inline bvar::Adder g_tablet_schema_mem_bytes("doris_tablet_schema_mem_bytes"); inline bvar::Adder g_tablet_schema_num("doris_tablet_schema_num"); -inline bvar::Adder g_all_segments_mem_bytes("doris_all_segments_mem_bytes"); -inline bvar::Adder g_all_segments_num("doris_all_segments_num"); +inline bvar::Adder g_segment_mem_bytes("doris_segment_mem_bytes"); +inline bvar::Adder g_segment_num("doris_segment_num"); +inline bvar::Adder g_segment_estimate_mem_bytes("doris_segment_estimate_mem_bytes"); inline bvar::Adder g_column_reader_mem_bytes("doris_column_reader_mem_bytes"); inline bvar::Adder g_column_reader_num("doris_column_reader_num"); @@ -96,6 +99,10 @@ class ZoneMapIndexReader; When a derived Class extends MetadataAdder, then the Class's number and fixed length field's memory can be counted automatically. But if the Class has variable length field, then you should overwrite get_metadata_size and call update_metadata_size when the Class's memory changes. + get_metadata_size is only the memory of the metadata object itself, not include child objects, + for example, TabletMeta::get_metadata_size does not include the memory of TabletSchema. + Note, the memory allocated by Doris Allocator is not included. + There are some special situations that need to be noted: 1. when the derived Class override copy constructor, you'd better update memory size(call update_metadata_size) if derived class's memory changed in its copy constructor or you not call MetadataAdder's copy constructor. @@ -111,6 +118,31 @@ class MetadataAdder { static void dump_metadata_object(RuntimeProfile* object_heap_dump_snapshot); + static int64_t get_all_tablets_size() { + return g_tablet_meta_mem_bytes.get_value() + g_tablet_column_mem_bytes.get_value() + + g_tablet_index_mem_bytes.get_value() + g_tablet_schema_mem_bytes.get_value(); + } + + static int64_t get_all_rowsets_size() { + return g_rowset_meta_mem_bytes.get_value() + g_rowset_mem_bytes.get_value(); + } + + static int64_t get_all_segments_size() { + return g_segment_mem_bytes.get_value() + g_column_reader_mem_bytes.get_value() + + g_bitmap_index_reader_mem_bytes.get_value() + + g_bloom_filter_index_reader_mem_bytes.get_value() + + g_index_page_reader_mem_bytes.get_value() + + g_indexed_column_reader_mem_bytes.get_value() + + g_inverted_index_reader_mem_bytes.get_value() + + g_ordinal_index_reader_mem_bytes.get_value() + + g_zone_map_index_reader_mem_bytes.get_value(); + } + + // Doris currently uses the estimated segments memory as the basis, maybe it is more realistic. + static int64_t get_all_segments_estimate_size() { + return g_segment_estimate_mem_bytes.get_value(); + } + protected: MetadataAdder(const MetadataAdder& other); @@ -122,7 +154,6 @@ class MetadataAdder { MetadataAdder& operator=(const MetadataAdder& other) = default; -private: int64_t _current_meta_size {0}; void add_mem_size(int64_t val); @@ -167,7 +198,7 @@ void MetadataAdder::add_mem_size(int64_t val) { if constexpr (std::is_same_v) { g_rowset_meta_mem_bytes << val; } else if constexpr (std::is_same_v) { - g_all_rowsets_mem_bytes << val; + g_rowset_mem_bytes << val; } else if constexpr (std::is_same_v) { g_tablet_meta_mem_bytes << val; } else if constexpr (std::is_same_v) { @@ -177,7 +208,7 @@ void MetadataAdder::add_mem_size(int64_t val) { } else if constexpr (std::is_same_v) { g_tablet_schema_mem_bytes << val; } else if constexpr (std::is_same_v) { - g_all_segments_mem_bytes << val; + g_segment_mem_bytes << val; } else if constexpr (std::is_same_v) { g_column_reader_mem_bytes << val; } else if constexpr (std::is_same_v) { @@ -208,7 +239,7 @@ void MetadataAdder::add_num(int64_t val) { if constexpr (std::is_same_v) { g_rowset_meta_num << val; } else if constexpr (std::is_same_v) { - g_all_rowsets_num << val; + g_rowset_num << val; } else if constexpr (std::is_same_v) { g_tablet_meta_num << val; } else if constexpr (std::is_same_v) { @@ -218,7 +249,7 @@ void MetadataAdder::add_num(int64_t val) { } else if constexpr (std::is_same_v) { g_tablet_schema_num << val; } else if constexpr (std::is_same_v) { - g_all_segments_num << val; + g_segment_num << val; } else if constexpr (std::is_same_v) { g_column_reader_num << val; } else if constexpr (std::is_same_v) { @@ -250,12 +281,12 @@ void MetadataAdder::dump_metadata_object(RuntimeProfile* object_heap_dump_sna COUNTER_SET(rowset_meta_mem_bytes_counter, g_rowset_meta_mem_bytes.get_value()); COUNTER_SET(rowset_meta_num_counter, g_rowset_meta_num.get_value()); - RuntimeProfile::Counter* all_rowsets_mem_bytes_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsMemBytes", TUnit::BYTES); - RuntimeProfile::Counter* all_rowsets_num_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsNum", TUnit::UNIT); - COUNTER_SET(all_rowsets_mem_bytes_counter, g_all_rowsets_mem_bytes.get_value()); - COUNTER_SET(all_rowsets_num_counter, g_all_rowsets_num.get_value()); + RuntimeProfile::Counter* rowset_mem_bytes_counter = + ADD_COUNTER(object_heap_dump_snapshot, "RowsetMemBytes", TUnit::BYTES); + RuntimeProfile::Counter* rowset_num_counter = + ADD_COUNTER(object_heap_dump_snapshot, "RowsetNum", TUnit::UNIT); + COUNTER_SET(rowset_mem_bytes_counter, g_rowset_mem_bytes.get_value()); + COUNTER_SET(rowset_num_counter, g_rowset_num.get_value()); RuntimeProfile::Counter* tablet_meta_mem_bytes_counter = ADD_COUNTER(object_heap_dump_snapshot, "TabletMetaMemBytes", TUnit::BYTES); @@ -285,12 +316,12 @@ void MetadataAdder::dump_metadata_object(RuntimeProfile* object_heap_dump_sna COUNTER_SET(tablet_schema_mem_bytes_counter, g_tablet_schema_mem_bytes.get_value()); COUNTER_SET(tablet_schema_num_counter, g_tablet_schema_num.get_value()); - RuntimeProfile::Counter* all_segments_mem_bytes_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsMemBytes", TUnit::BYTES); - RuntimeProfile::Counter* all_segments_num_counter = - ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsNum", TUnit::UNIT); - COUNTER_SET(all_segments_mem_bytes_counter, g_all_segments_mem_bytes.get_value()); - COUNTER_SET(all_segments_num_counter, g_all_segments_num.get_value()); + RuntimeProfile::Counter* segment_mem_bytes_counter = + ADD_COUNTER(object_heap_dump_snapshot, "SegmentMemBytes", TUnit::BYTES); + RuntimeProfile::Counter* segment_num_counter = + ADD_COUNTER(object_heap_dump_snapshot, "SegmentNum", TUnit::UNIT); + COUNTER_SET(segment_mem_bytes_counter, g_segment_mem_bytes.get_value()); + COUNTER_SET(segment_num_counter, g_segment_num.get_value()); RuntimeProfile::Counter* column_reader_mem_bytes_counter = ADD_COUNTER(object_heap_dump_snapshot, "ColumnReaderMemBytes", TUnit::BYTES); diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 736bdaa99304d3..90d0883984e78b 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -1071,7 +1071,8 @@ Status StorageEngine::_submit_compaction_task(TabletSharedPtr tablet, if (!tablet->can_do_compaction(tablet->data_dir()->path_hash(), compaction_type)) { LOG(INFO) << "Tablet state has been changed, no need to begin this compaction " "task, tablet_id=" - << tablet->tablet_id() << "tablet_state=" << tablet->tablet_state(); + << tablet->tablet_id() << ", tablet_state=" << tablet->tablet_state(); + _pop_tablet_from_submitted_compaction(tablet, compaction_type); return; } tablet->compaction_stage = CompactionStage::EXECUTING; diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index bbb2ca72b4ae7f..cd52deed0c8a4d 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -703,10 +703,24 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value, rapidjson::Document::AllocatorType& allocator) { const auto& fs = _rowset_meta->fs(); auto storage_format = _schema->get_inverted_index_storage_format(); - auto format_str = storage_format == InvertedIndexStorageFormatPB::V1 ? "V1" : "V2"; + std::string format_str; + switch (storage_format) { + case InvertedIndexStorageFormatPB::V1: + format_str = "V1"; + break; + case InvertedIndexStorageFormatPB::V2: + format_str = "V2"; + break; + case InvertedIndexStorageFormatPB::V3: + format_str = "V3"; + break; + default: + return Status::InternalError("inverted index storage format error"); + break; + } auto rs_id = rowset_id().to_string(); rowset_value->AddMember("rowset_id", rapidjson::Value(rs_id.c_str(), allocator), allocator); - rowset_value->AddMember("index_storage_format", rapidjson::Value(format_str, allocator), + rowset_value->AddMember("index_storage_format", rapidjson::Value(format_str.c_str(), allocator), allocator); rapidjson::Value segments(rapidjson::kArrayType); for (int seg_id = 0; seg_id < num_segments(); ++seg_id) { diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 198b4e8595ed20..ab5bc48db80b00 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -60,6 +60,7 @@ #include "vec/data_types/data_type_factory.hpp" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; namespace { @@ -475,15 +476,15 @@ Status BetaRowsetWriter::_rename_compacted_segments(int64_t begin, int64_t end) return Status::OK(); } -void BetaRowsetWriter::_clear_statistics_for_deleting_segments_unsafe(uint64_t begin, - uint64_t end) { +void BetaRowsetWriter::_clear_statistics_for_deleting_segments_unsafe(uint32_t begin, + uint32_t end) { VLOG_DEBUG << "_segid_statistics_map clear record segid range from:" << begin << " to:" << end; - for (int i = begin; i <= end; ++i) { + for (uint32_t i = begin; i <= end; ++i) { _segid_statistics_map.erase(i); } } -Status BetaRowsetWriter::_rename_compacted_segment_plain(uint64_t seg_id) { +Status BetaRowsetWriter::_rename_compacted_segment_plain(uint32_t seg_id) { if (seg_id == _num_segcompacted) { ++_num_segcompacted; return Status::OK(); @@ -581,7 +582,7 @@ Status BetaRowsetWriter::_segcompaction_if_necessary() { Status status = Status::OK(); // if not doing segcompaction, just check segment number if (!config::enable_segcompaction || !_context.enable_segcompaction || - !_context.tablet_schema->cluster_key_idxes().empty() || + !_context.tablet_schema->cluster_key_uids().empty() || _context.tablet_schema->num_variant_columns() > 0) { return _check_segment_number_limit(_num_segment); } @@ -653,7 +654,7 @@ Status BaseBetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { _num_rows_written += rowset->num_rows(); _total_data_size += rowset->rowset_meta()->data_disk_size(); _total_index_size += rowset->rowset_meta()->index_disk_size(); - _num_segment += rowset->num_segments(); + _num_segment += cast_set(rowset->num_segments()); // append key_bounds to current rowset RETURN_IF_ERROR(rowset->get_segments_key_bounds(&_segments_encoded_key_bounds)); @@ -1043,7 +1044,7 @@ Status BaseBetaRowsetWriter::add_segment(uint32_t segment_id, const SegmentStati if (segment_id >= _segment_num_rows.size()) { _segment_num_rows.resize(segment_id + 1); } - _segment_num_rows[segid_offset] = segstat.row_num; + _segment_num_rows[segid_offset] = cast_set(segstat.row_num); } VLOG_DEBUG << "_segid_statistics_map add new record. segment_id:" << segment_id << " row_num:" << segstat.row_num << " data_size:" << segstat.data_size @@ -1111,4 +1112,5 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction( return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index d96301af22630d..a69d1063a55086 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -298,9 +298,9 @@ class BetaRowsetWriter : public BaseBetaRowsetWriter { Status _load_noncompacted_segment(segment_v2::SegmentSharedPtr& segment, int32_t segment_id); Status _find_longest_consecutive_small_segment(SegCompactionCandidatesSharedPtr& segments); Status _rename_compacted_segments(int64_t begin, int64_t end); - Status _rename_compacted_segment_plain(uint64_t seg_id); + Status _rename_compacted_segment_plain(uint32_t seg_id); Status _rename_compacted_indices(int64_t begin, int64_t end, uint64_t seg_id); - void _clear_statistics_for_deleting_segments_unsafe(uint64_t begin, uint64_t end); + void _clear_statistics_for_deleting_segments_unsafe(uint32_t begin, uint32_t end); StorageEngine& _engine; diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index c2a4469d97f324..e0eb7534123a86 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -115,8 +115,7 @@ Status SegmentFlusher::close() { bool SegmentFlusher::need_buffering() { // buffering variants for schema change return _context.write_type == DataWriteType::TYPE_SCHEMA_CHANGE && - (_context.tablet_schema->num_variant_columns() > 0 || - !_context.tablet_schema->cluster_key_idxes().empty()); + _context.tablet_schema->num_variant_columns() > 0; } Status SegmentFlusher::_add_rows(std::unique_ptr& segment_writer, diff --git a/be/src/olap/rowset/segment_v2/block_split_bloom_filter.h b/be/src/olap/rowset/segment_v2/block_split_bloom_filter.h index f68ddd7e74bfc5..8dc470d9da4f88 100644 --- a/be/src/olap/rowset/segment_v2/block_split_bloom_filter.h +++ b/be/src/olap/rowset/segment_v2/block_split_bloom_filter.h @@ -34,7 +34,6 @@ class BlockSplitBloomFilter : public BloomFilter { void add_hash(uint64_t hash) override; bool test_hash(uint64_t hash) const override; - bool contains(const BloomFilter&) const override { return true; } private: // Bytes in a tiny Bloom filter block. diff --git a/be/src/olap/rowset/segment_v2/bloom_filter.h b/be/src/olap/rowset/segment_v2/bloom_filter.h index a7845d1ca36704..4f4adf0fd12283 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter.h @@ -186,7 +186,7 @@ class BloomFilter { /// Checks if this contains everything from another bloom filter. /// Bloom filters must have equal size and seed. - virtual bool contains(const BloomFilter& bf_) const = 0; + virtual bool contains(const BloomFilter& bf_) const { return true; }; virtual char* data() const { return _data; } diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp index 609d21ce4f5c22..8c63c25d20acee 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp @@ -70,6 +70,7 @@ Status BloomFilterIndexIterator::read_bloom_filter(rowid_t ordinal, auto column = data_type->create_column(); RETURN_IF_ERROR(_bloom_filter_iter.seek_to_ordinal(ordinal)); + DCHECK(current_bloom_filter_index() == ordinal); size_t num_read = num_to_read; RETURN_IF_ERROR(_bloom_filter_iter.next_batch(&num_read, column)); DCHECK(num_to_read == num_read); diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp index edc6102703f492..3f9fb94df0a844 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -68,15 +69,12 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter { explicit BloomFilterIndexWriterImpl(const BloomFilterOptions& bf_options, const TypeInfo* type_info) - : _bf_options(bf_options), - _type_info(type_info), - _has_null(false), - _bf_buffer_size(0) {} + : _bf_options(bf_options), _type_info(type_info) {} ~BloomFilterIndexWriterImpl() override = default; Status add_values(const void* values, size_t count) override { - const CppType* v = (const CppType*)values; + const auto* v = (const CppType*)values; for (int i = 0; i < count; ++i) { if (_values.find(*v) == _values.end()) { if constexpr (_is_slice_type()) { @@ -105,7 +103,7 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter { bf->set_has_null(_has_null); for (auto& v : _values) { if constexpr (_is_slice_type()) { - Slice* s = (Slice*)&v; + auto* s = (Slice*)&v; bf->add_bytes(s->data, s->size); } else { bf->add_bytes((char*)&v, sizeof(CppType)); @@ -160,11 +158,11 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter { static constexpr bool _is_int128() { return field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT; } private: - BloomFilterOptions _bf_options; - const TypeInfo* _type_info; + BloomFilterOptions _bf_options {}; + const TypeInfo* _type_info = nullptr; vectorized::Arena _arena; - bool _has_null; - uint64_t _bf_buffer_size; + bool _has_null = false; + uint64_t _bf_buffer_size = 0; // distinct values ValueDict _values; std::vector> _bfs; @@ -173,7 +171,7 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter { } // namespace Status PrimaryKeyBloomFilterIndexWriterImpl::add_values(const void* values, size_t count) { - const Slice* v = (const Slice*)values; + const auto* v = (const Slice*)values; for (int i = 0; i < count; ++i) { Slice new_value; RETURN_IF_CATCH_EXCEPTION(_type_info->deep_copy(&new_value, v, &_arena)); @@ -189,7 +187,7 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::flush() { RETURN_IF_ERROR(bf->init(_values.size(), _bf_options.fpp, _bf_options.strategy)); bf->set_has_null(_has_null); for (auto& v : _values) { - Slice* s = (Slice*)&v; + auto* s = (Slice*)&v; bf->add_bytes(s->data, s->size); } _bf_buffer_size += bf->size(); @@ -205,7 +203,7 @@ Status PrimaryKeyBloomFilterIndexWriterImpl::flush() { Status PrimaryKeyBloomFilterIndexWriterImpl::finish(io::FileWriter* file_writer, ColumnIndexMetaPB* index_meta) { - if (_values.size() > 0) { + if (!_values.empty()) { RETURN_IF_ERROR(flush()); } index_meta->set_type(BLOOM_FILTER_INDEX); @@ -246,7 +244,7 @@ NGramBloomFilterIndexWriterImpl::NGramBloomFilterIndexWriterImpl( } Status NGramBloomFilterIndexWriterImpl::add_values(const void* values, size_t count) { - const Slice* src = reinterpret_cast(values); + const auto* src = reinterpret_cast(values); for (int i = 0; i < count; ++i, ++src) { if (src->size < _gram_size) { continue; @@ -339,7 +337,8 @@ Status NGramBloomFilterIndexWriterImpl::create(const BloomFilterOptions& bf_opti case FieldType::OLAP_FIELD_TYPE_CHAR: case FieldType::OLAP_FIELD_TYPE_VARCHAR: case FieldType::OLAP_FIELD_TYPE_STRING: - res->reset(new NGramBloomFilterIndexWriterImpl(bf_options, gram_size, gram_bf_size)); + *res = std::make_unique(bf_options, gram_size, + gram_bf_size); break; default: return Status::NotSupported("unsupported type for ngram bloom filter index:{}", diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp index 3028211f266157..da6beff5d8d6a2 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp @@ -81,7 +81,8 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, _sole_data_page = PagePointer(_meta.ordinal_index_meta().root_page()); } else { RETURN_IF_ERROR(load_index_page(_meta.ordinal_index_meta().root_page(), - &_ordinal_index_page_handle, &_ordinal_index_reader)); + &_ordinal_index_page_handle, + _ordinal_index_reader.get())); _has_index_page = true; } } @@ -92,7 +93,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, _sole_data_page = PagePointer(_meta.value_index_meta().root_page()); } else { RETURN_IF_ERROR(load_index_page(_meta.value_index_meta().root_page(), - &_value_index_page_handle, &_value_index_reader)); + &_value_index_page_handle, _value_index_reader.get())); _has_index_page = true; } } diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h index c3469f9f6bed0d..c9640c0007c153 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h @@ -50,9 +50,12 @@ class EncodingInfo; class IndexedColumnReader : public MetadataAdder { public: explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta) - : _file_reader(std::move(file_reader)), _meta(meta) {} + : _file_reader(std::move(file_reader)), _meta(meta) { + _ordinal_index_reader = std::make_unique(); + _value_index_reader = std::make_unique(); + } - ~IndexedColumnReader(); + ~IndexedColumnReader() override; Status load(bool use_page_cache, bool kept_in_memory, OlapReaderStatistics* index_load_stats = nullptr); @@ -90,8 +93,8 @@ class IndexedColumnReader : public MetadataAdder { bool _has_index_page = false; // valid only when the column contains only one data page PagePointer _sole_data_page; - IndexPageReader _ordinal_index_reader; - IndexPageReader _value_index_reader; + std::unique_ptr _ordinal_index_reader; + std::unique_ptr _value_index_reader; PageHandle _ordinal_index_page_handle; PageHandle _value_index_page_handle; @@ -108,8 +111,8 @@ class IndexedColumnIterator { explicit IndexedColumnIterator(const IndexedColumnReader* reader, OlapReaderStatistics* stats = nullptr) : _reader(reader), - _ordinal_iter(&reader->_ordinal_index_reader), - _value_iter(&reader->_value_index_reader), + _ordinal_iter(reader->_ordinal_index_reader.get()), + _value_iter(reader->_value_index_reader.get()), _stats(stats) {} // Seek to the given ordinal entry. Entry 0 is the first entry. diff --git a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h index d9e5080d2d584d..1e5e6f5d5cedd0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h @@ -17,7 +17,7 @@ #pragma once -#include +#include // IWYU pragma: keep #include #include diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp index 113833d560fd06..8d480829a0cd37 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp @@ -30,8 +30,8 @@ namespace doris::segment_v2 { Status InvertedIndexFileReader::init(int32_t read_buffer_size) { if (!_inited) { _read_buffer_size = read_buffer_size; - if (_storage_format == InvertedIndexStorageFormatPB::V2) { - auto st = _init_from_v2(read_buffer_size); + if (_storage_format >= InvertedIndexStorageFormatPB::V2) { + auto st = _init_from(read_buffer_size); if (!st.ok()) { return st; } @@ -41,7 +41,7 @@ Status InvertedIndexFileReader::init(int32_t read_buffer_size) { return Status::OK(); } -Status InvertedIndexFileReader::_init_from_v2(int32_t read_buffer_size) { +Status InvertedIndexFileReader::_init_from(int32_t read_buffer_size) { auto index_file_full_path = InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix); std::unique_lock lock(_mutex); // Lock for writing @@ -79,7 +79,7 @@ Status InvertedIndexFileReader::_init_from_v2(int32_t read_buffer_size) { // 3. read file int32_t version = _stream->readInt(); // Read version number - if (version == InvertedIndexStorageFormatPB::V2) { + if (version >= InvertedIndexStorageFormatPB::V2) { DCHECK(version == _storage_format); int32_t numIndices = _stream->readInt(); // Read number of indices ReaderFileEntry* entry = nullptr; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h index 3b7161c7643cef..443d40cfaf0d4f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h @@ -70,7 +70,7 @@ class InvertedIndexFileReader { int64_t get_inverted_file_size() const { return _stream == nullptr ? 0 : _stream->length(); } private: - Status _init_from_v2(int32_t read_buffer_size); + Status _init_from(int32_t read_buffer_size); Result> _open(int64_t index_id, const std::string& index_suffix) const; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp index bb373be5ee906a..4d6892aa78568f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp @@ -150,7 +150,7 @@ Status InvertedIndexFileWriter::close() { } } else { try { - RETURN_IF_ERROR(write_v2()); + RETURN_IF_ERROR(write()); for (const auto& entry : _indices_dirs) { const auto& dir = entry.second; // delete index path, which contains separated inverted index files @@ -293,7 +293,7 @@ Status InvertedIndexFileWriter::write_v1() { return Status::OK(); } -Status InvertedIndexFileWriter::write_v2() { +Status InvertedIndexFileWriter::write() { std::unique_ptr out_dir = nullptr; std::unique_ptr compound_file_output = nullptr; ErrorContext error_context; @@ -301,10 +301,10 @@ Status InvertedIndexFileWriter::write_v2() { // Calculate header length and initialize offset int64_t current_offset = headerLength(); // Prepare file metadata - auto file_metadata = prepare_file_metadata_v2(current_offset); + auto file_metadata = prepare_file_metadata(current_offset); // Create output stream - auto result = create_output_stream_v2(); + auto result = create_output_stream(); out_dir = std::move(result.first); compound_file_output = std::move(result.second); @@ -315,7 +315,7 @@ Status InvertedIndexFileWriter::write_v2() { write_index_headers_and_metadata(compound_file_output.get(), file_metadata); // Copy file data - copy_files_data_v2(compound_file_output.get(), file_metadata); + copy_files_data(compound_file_output.get(), file_metadata); _total_file_size = compound_file_output->getFilePointer(); _file_info.set_index_size(_total_file_size); @@ -470,7 +470,7 @@ void InvertedIndexFileWriter::write_header_and_data_v1(lucene::store::IndexOutpu std::pair, std::unique_ptr> -InvertedIndexFileWriter::create_output_stream_v2() { +InvertedIndexFileWriter::create_output_stream() { io::Path index_path {InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix)}; auto* out_dir = DorisFSDirectoryFactory::getDirectory(_fs, index_path.parent_path().c_str()); @@ -486,15 +486,15 @@ InvertedIndexFileWriter::create_output_stream_v2() { void InvertedIndexFileWriter::write_version_and_indices_count(lucene::store::IndexOutput* output) { // Write the version number - output->writeInt(InvertedIndexStorageFormatPB::V2); + output->writeInt(_storage_format); // Write the number of indices const auto num_indices = static_cast(_indices_dirs.size()); output->writeInt(num_indices); } -std::vector -InvertedIndexFileWriter::prepare_file_metadata_v2(int64_t& current_offset) { +std::vector InvertedIndexFileWriter::prepare_file_metadata( + int64_t& current_offset) { std::vector file_metadata; for (const auto& entry : _indices_dirs) { @@ -546,8 +546,8 @@ void InvertedIndexFileWriter::write_index_headers_and_metadata( } } -void InvertedIndexFileWriter::copy_files_data_v2(lucene::store::IndexOutput* output, - const std::vector& file_metadata) { +void InvertedIndexFileWriter::copy_files_data(lucene::store::IndexOutput* output, + const std::vector& file_metadata) { const int64_t buffer_length = 16384; uint8_t buffer[buffer_length]; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h index ba42ffdceb1475..ab7cdbff152460 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h @@ -71,7 +71,7 @@ class InvertedIndexFileWriter { Status delete_index(const TabletIndex* index_meta); Status initialize(InvertedIndexDirectoryMap& indices_dirs); virtual ~InvertedIndexFileWriter() = default; - Status write_v2(); + Status write(); Status write_v1(); Status close(); const InvertedIndexFileInfo* get_index_file_info() const { @@ -122,7 +122,7 @@ class InvertedIndexFileWriter { // Helper functions specific to write_v2 virtual std::pair, std::unique_ptr> - create_output_stream_v2(); + create_output_stream(); void write_version_and_indices_count(lucene::store::IndexOutput* output); struct FileMetadata { int64_t index_id; @@ -141,11 +141,11 @@ class InvertedIndexFileWriter { length(len), directory(dir) {} }; - std::vector prepare_file_metadata_v2(int64_t& current_offset); + std::vector prepare_file_metadata(int64_t& current_offset); virtual void write_index_headers_and_metadata(lucene::store::IndexOutput* output, const std::vector& file_metadata); - void copy_files_data_v2(lucene::store::IndexOutput* output, - const std::vector& file_metadata); + void copy_files_data(lucene::store::IndexOutput* output, + const std::vector& file_metadata); Status _insert_directory_into_map(int64_t index_id, const std::string& index_suffix, std::shared_ptr dir); // Member variables... diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 86a8f89e4c94e4..02edf2f1976e3e 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -212,6 +212,28 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { (*field)->setOmitTermFreqAndPositions( !(get_parser_phrase_support_string_from_properties(_index_meta->properties()) == INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES)); + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::create_field_v3", { + if (_index_file_writer->get_storage_format() != InvertedIndexStorageFormatPB::V3) { + return Status::Error( + "debug point: InvertedIndexColumnWriterImpl::create_field_v3 error"); + } + }) + if (_index_file_writer->get_storage_format() >= InvertedIndexStorageFormatPB::V3) { + (*field)->setIndexVersion(IndexVersion::kV3); + // Only effective in v3 + std::string dict_compression = + get_parser_dict_compression_from_properties(_index_meta->properties()); + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::create_field_dic_compression", { + if (dict_compression != INVERTED_INDEX_PARSER_TRUE) { + return Status::Error( + "debug point: " + "InvertedIndexColumnWriterImpl::create_field_dic_compression error"); + } + }) + if (dict_compression == INVERTED_INDEX_PARSER_TRUE) { + (*field)->updateFlag(FlagBits::DICT_COMPRESS); + } + } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/page_handle.h b/be/src/olap/rowset/segment_v2/page_handle.h index b1e53ee808697e..d4dfdfb2ff3c55 100644 --- a/be/src/olap/rowset/segment_v2/page_handle.h +++ b/be/src/olap/rowset/segment_v2/page_handle.h @@ -23,6 +23,10 @@ #include "util/slice.h" // for Slice namespace doris { + +// After disable page cache, sometimes we need to know the percentage of data pages in query memory. +inline bvar::Adder g_page_no_cache_mem_bytes("doris_page_no_cache_mem_bytes"); + namespace segment_v2 { // When a column page is read into memory, we use this to store it. @@ -37,8 +41,7 @@ class PageHandle { // This class will take the ownership of input data's memory. It will // free it when deconstructs. PageHandle(DataPage* data) : _is_data_owner(true), _data(data) { - _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker(); - _page_tracker->consume(_data->capacity()); + g_page_no_cache_mem_bytes << _data->capacity(); } // This class will take the content of cache data, and will make input @@ -51,20 +54,18 @@ class PageHandle { // we can use std::exchange if we switch c++14 on std::swap(_is_data_owner, other._is_data_owner); std::swap(_data, other._data); - _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker(); } PageHandle& operator=(PageHandle&& other) noexcept { std::swap(_is_data_owner, other._is_data_owner); std::swap(_data, other._data); _cache_data = std::move(other._cache_data); - _page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker(); return *this; } ~PageHandle() { if (_is_data_owner) { - _page_tracker->release(_data->capacity()); + g_page_no_cache_mem_bytes << -_data->capacity(); delete _data; } else { DCHECK(_data == nullptr); @@ -85,7 +86,6 @@ class PageHandle { // otherwise _cache_data is valid, and data is belong to cache. bool _is_data_owner = false; DataPage* _data = nullptr; - std::shared_ptr _page_tracker; PageCacheHandle _cache_data; // Don't allow copy and assign diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 0ad799683fc458..513c0be4f8cd14 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -163,7 +163,11 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table _tablet_schema(std::move(tablet_schema)), _idx_file_info(idx_file_info) {} -Segment::~Segment() = default; +Segment::~Segment() { + g_segment_estimate_mem_bytes << -_tracked_meta_mem_usage; + // if failed, fix `_tracked_meta_mem_usage` accuracy + DCHECK(_tracked_meta_mem_usage == meta_mem_usage()); +} io::UInt128Wrapper Segment::file_cache_key(std::string_view rowset_id, uint32_t seg_id) { return io::BlockFileCache::hash(fmt::format("{}_{}.dat", rowset_id, seg_id)); @@ -174,6 +178,12 @@ int64_t Segment::get_metadata_size() const { (_pk_index_meta ? _pk_index_meta->ByteSizeLong() : 0); } +void Segment::update_metadata_size() { + MetadataAdder::update_metadata_size(); + g_segment_estimate_mem_bytes << _meta_mem_usage - _tracked_meta_mem_usage; + _tracked_meta_mem_usage = _meta_mem_usage; +} + Status Segment::_open() { _footer_pb = std::make_unique(); RETURN_IF_ERROR(_parse_footer(_footer_pb.get())); @@ -191,8 +201,6 @@ Status Segment::_open() { _meta_mem_usage += _pk_index_meta->ByteSizeLong(); } - update_metadata_size(); - _meta_mem_usage += sizeof(*this); _meta_mem_usage += _tablet_schema->num_columns() * config::estimated_mem_per_column_reader; @@ -201,6 +209,8 @@ Status Segment::_open() { // 0.01 comes from PrimaryKeyIndexBuilder::init _meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8; + update_metadata_size(); + return Status::OK(); } @@ -467,6 +477,7 @@ Status Segment::_load_pk_bloom_filter() { // for BE UT "segment_cache_test" return _load_pk_bf_once.call([this] { _meta_mem_usage += 100; + update_metadata_size(); return Status::OK(); }); } @@ -955,7 +966,7 @@ Status Segment::lookup_row_key(const Slice& key, const TabletSchema* latest_sche std::string* encoded_seq_value, OlapReaderStatistics* stats) { RETURN_IF_ERROR(load_pk_index_and_bf()); bool has_seq_col = latest_schema->has_sequence_col(); - bool has_rowid = !latest_schema->cluster_key_idxes().empty(); + bool has_rowid = !latest_schema->cluster_key_uids().empty(); size_t seq_col_length = 0; if (has_seq_col) { seq_col_length = latest_schema->column(latest_schema->sequence_col_idx()).length() + 1; @@ -1065,7 +1076,7 @@ Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) { RETURN_IF_ERROR(iter->next_batch(&num_read, index_column)); CHECK(num_read == 1); // trim row id - if (_tablet_schema->cluster_key_idxes().empty()) { + if (_tablet_schema->cluster_key_uids().empty()) { *key = index_column->get_data_at(0).to_string(); } else { Slice sought_key = diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index bc5ab1e1fdc80a..1b20c1f066bdf9 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -57,7 +57,6 @@ class IDataType; class ShortKeyIndexDecoder; class Schema; class StorageReadOptions; -class MemTracker; class PrimaryKeyIndexReader; class RowwiseIterator; struct RowLocation; @@ -93,6 +92,7 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd ~Segment(); int64_t get_metadata_size() const override; + void update_metadata_size(); Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options, std::unique_ptr* iter); @@ -163,6 +163,8 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd io::FileReaderSPtr file_reader() { return _file_reader; } + // Including the column reader memory. + // another method `get_metadata_size` not include the column reader, only the segment object itself. int64_t meta_mem_usage() const { return _meta_mem_usage; } // Identify the column by unique id or path info @@ -249,9 +251,8 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd // 1. Tracking memory use by segment meta data such as footer or index page. // 2. Tracking memory use by segment column reader // The memory consumed by querying is tracked in segment iterator. - // TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value of SegmentMeta mem tracker - // is similar to `-2912341218700198079`. So, temporarily put it in experimental type tracker. int64_t _meta_mem_usage; + int64_t _tracked_meta_mem_usage = 0; RowsetId _rowset_id; TabletSchemaSPtr _tablet_schema; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 4ee73547c117e9..abdf9116756f0e 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -377,7 +377,7 @@ Status SegmentIterator::_lazy_init() { _row_bitmap.addRange(0, _segment->num_rows()); // z-order can not use prefix index if (_segment->_tablet_schema->sort_type() != SortType::ZORDER && - _segment->_tablet_schema->cluster_key_idxes().empty()) { + _segment->_tablet_schema->cluster_key_uids().empty()) { RETURN_IF_ERROR(_get_row_ranges_by_keys()); } RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); @@ -1193,7 +1193,7 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool bool has_seq_col = _segment->_tablet_schema->has_sequence_col(); // Used to get key range from primary key index, // for mow with cluster key table, we should get key range from short key index. - DCHECK(_segment->_tablet_schema->cluster_key_idxes().empty()); + DCHECK(_segment->_tablet_schema->cluster_key_uids().empty()); // if full key is exact_match, the primary key without sequence column should also the same if (has_seq_col && !exact_match) { @@ -2175,11 +2175,11 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { if (block->rows() == 0) { vectorized::MutableColumnPtr col0 = std::move(*block->get_by_position(0).column).mutate(); - auto res_column = vectorized::ColumnString::create(); - res_column->insert_data("", 0); - auto col_const = vectorized::ColumnConst::create(std::move(res_column), - selected_size); - block->replace_by_position(0, std::move(col_const)); + auto tmp_indicator_col = + block->get_by_position(0) + .type->create_column_const_with_default_value( + selected_size); + block->replace_by_position(0, std::move(tmp_indicator_col)); _output_index_result_column_for_expr(_sel_rowid_idx.data(), selected_size, block); block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0); diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index fc22c3570e52a2..fe465f98a2aad2 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -103,7 +103,7 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, << ", table_id=" << _tablet_schema->table_id() << ", num_key_columns=" << _num_sort_key_columns << ", num_short_key_columns=" << _num_short_key_columns - << ", cluster_key_columns=" << _tablet_schema->cluster_key_idxes().size(); + << ", cluster_key_columns=" << _tablet_schema->cluster_key_uids().size(); } for (size_t cid = 0; cid < _num_sort_key_columns; ++cid) { const auto& column = _tablet_schema->column(cid); @@ -125,8 +125,8 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, // cluster keys _key_coders.clear(); _key_index_size.clear(); - _num_sort_key_columns = _tablet_schema->cluster_key_idxes().size(); - for (auto cid : _tablet_schema->cluster_key_idxes()) { + _num_sort_key_columns = _tablet_schema->cluster_key_uids().size(); + for (auto cid : _tablet_schema->cluster_key_uids()) { const auto& column = _tablet_schema->column_by_uid(cid); _key_coders.push_back(get_key_coder(column.type())); _key_index_size.push_back(column.index_length()); @@ -545,6 +545,39 @@ Status SegmentWriter::probe_key_for_mow( return Status::OK(); } +Status SegmentWriter::partial_update_preconditions_check(size_t row_pos) { + if (!_is_mow()) { + auto msg = fmt::format( + "Can only do partial update on merge-on-write unique table, but found: " + "keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}", + _tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (_opts.rowset_ctx->partial_update_info == nullptr) { + auto msg = + fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (!_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()) { + auto msg = fmt::format( + "in fixed partial update code, but update_mode={}, please check, tablet_id={}", + _opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (row_pos != 0) { + auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + return Status::OK(); +} + // for partial update, we should do following steps to fill content of block: // 1. set block data to data convertor, and get all key_column's converted slice // 2. get pk of input block, and read missing columns @@ -562,11 +595,7 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* block->columns(), _tablet_schema->num_key_columns(), _tablet_schema->num_columns())); } - DCHECK(_is_mow()); - - DCHECK(_opts.rowset_ctx->partial_update_info); - DCHECK(_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()); - DCHECK(row_pos == 0); + RETURN_IF_ERROR(partial_update_preconditions_check(row_pos)); // find missing column cids const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids; @@ -788,7 +817,7 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po seq_column, num_rows, true)); // 2. generate short key index (use cluster key) key_columns.clear(); - for (const auto& cid : _tablet_schema->cluster_key_idxes()) { + for (const auto& cid : _tablet_schema->cluster_key_uids()) { // find cluster key index in tablet schema auto cluster_key_index = _tablet_schema->field_index(cid); if (cluster_key_index == -1) { @@ -1016,6 +1045,18 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) { *index_size = _file_writer->bytes_appended() - index_start; if (_has_key) { if (_is_mow_with_cluster_key()) { + // 1. sort primary keys + std::sort(_primary_keys.begin(), _primary_keys.end()); + // 2. write primary keys index + std::string last_key; + for (const auto& key : _primary_keys) { + DCHECK(key.compare(last_key) > 0) + << "found duplicate key or key is not sorted! current key: " << key + << ", last key: " << last_key; + RETURN_IF_ERROR(_primary_key_index_builder->add_item(key)); + last_key = key; + } + RETURN_IF_ERROR(_write_short_key_index()); *index_size = _file_writer->bytes_appended() - index_start; RETURN_IF_ERROR(_write_primary_key_index()); @@ -1236,27 +1277,16 @@ Status SegmentWriter::_generate_primary_key_index( last_key = std::move(key); } } else { // mow table with cluster key - // 1. generate primary keys in memory - std::vector primary_keys; + // generate primary keys in memory for (uint32_t pos = 0; pos < num_rows; pos++) { std::string key = _full_encode_keys(primary_key_coders, primary_key_columns, pos); _maybe_invalid_row_cache(key); if (_tablet_schema->has_sequence_col()) { _encode_seq_column(seq_column, pos, &key); } - _encode_rowid(pos, &key); - primary_keys.emplace_back(std::move(key)); - } - // 2. sort primary keys - std::sort(primary_keys.begin(), primary_keys.end()); - // 3. write primary keys index - std::string last_key; - for (const auto& key : primary_keys) { - DCHECK(key.compare(last_key) > 0) - << "found duplicate key or key is not sorted! current key: " << key - << ", last key: " << last_key; - RETURN_IF_ERROR(_primary_key_index_builder->add_item(key)); - last_key = key; + _encode_rowid(pos + _num_rows_written, &key); + _primary_keys_size += key.size(); + _primary_keys.emplace_back(std::move(key)); } } return Status::OK(); @@ -1289,7 +1319,7 @@ inline bool SegmentWriter::_is_mow() { } inline bool SegmentWriter::_is_mow_with_cluster_key() { - return _is_mow() && !_tablet_schema->cluster_key_idxes().empty(); + return _is_mow() && !_tablet_schema->cluster_key_uids().empty(); } } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 9a8af131087f92..60300383d7287d 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -105,6 +105,7 @@ class SegmentWriter { const std::function& found_cb, const std::function& not_found_cb, PartialUpdateStats& stats); + Status partial_update_preconditions_check(size_t row_pos); Status append_block_with_partial_content(const vectorized::Block* block, size_t row_pos, size_t num_rows); Status append_block_with_variant_subcolumns(vectorized::Block& data); @@ -155,6 +156,8 @@ class SegmentWriter { return Status::OK(); } + uint64_t primary_keys_size() const { return _primary_keys_size; } + private: DISALLOW_COPY_AND_ASSIGN(SegmentWriter); Status _create_column_writer(uint32_t cid, const TabletColumn& column, @@ -260,6 +263,8 @@ class SegmentWriter { std::map _rsid_to_rowset; // contains auto generated columns, should be nullptr if no variants's subcolumns TabletSchemaSPtr _flush_schema = nullptr; + std::vector _primary_keys; + uint64_t _primary_keys_size = 0; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index ce16e2d502b622..0846b0fc1186a8 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -109,7 +109,7 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 << ", table_id=" << _tablet_schema->table_id() << ", num_key_columns=" << _num_sort_key_columns << ", num_short_key_columns=" << _num_short_key_columns - << ", cluster_key_columns=" << _tablet_schema->cluster_key_idxes().size(); + << ", cluster_key_columns=" << _tablet_schema->cluster_key_uids().size(); } for (size_t cid = 0; cid < _num_sort_key_columns; ++cid) { const auto& column = _tablet_schema->column(cid); @@ -131,8 +131,8 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 // cluster keys _key_coders.clear(); _key_index_size.clear(); - _num_sort_key_columns = _tablet_schema->cluster_key_idxes().size(); - for (auto cid : _tablet_schema->cluster_key_idxes()) { + _num_sort_key_columns = _tablet_schema->cluster_key_uids().size(); + for (auto cid : _tablet_schema->cluster_key_uids()) { const auto& column = _tablet_schema->column_by_uid(cid); _key_coders.push_back(get_key_coder(column.type())); _key_index_size.push_back(column.index_length()); @@ -418,6 +418,51 @@ Status VerticalSegmentWriter::_probe_key_for_mow( return Status::OK(); } +Status VerticalSegmentWriter::_partial_update_preconditions_check(size_t row_pos, + bool is_flexible_update) { + if (!_is_mow()) { + auto msg = fmt::format( + "Can only do partial update on merge-on-write unique table, but found: " + "keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}", + _tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (_opts.rowset_ctx->partial_update_info == nullptr) { + auto msg = + fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (!is_flexible_update) { + if (!_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()) { + auto msg = fmt::format( + "in fixed partial update code, but update_mode={}, please check, tablet_id={}", + _opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + } else { + if (!_opts.rowset_ctx->partial_update_info->is_flexible_partial_update()) { + auto msg = fmt::format( + "in flexible partial update code, but update_mode={}, please check, " + "tablet_id={}", + _opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + } + if (row_pos != 0) { + auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + return Status::OK(); +} + // for partial update, we should do following steps to fill content of block: // 1. set block data to data convertor, and get all key_column's converted slice // 2. get pk of input block, and read missing columns @@ -427,11 +472,7 @@ Status VerticalSegmentWriter::_probe_key_for_mow( // 3. set columns to data convertor and then write all columns Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& data, vectorized::Block& full_block) { - DCHECK(_is_mow()); - DCHECK(_opts.rowset_ctx->partial_update_info != nullptr); - DCHECK(_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()); - DCHECK(data.row_pos == 0); - + RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos, false)); // create full block and fill with input columns full_block = _tablet_schema->create_block(); const auto& including_cids = _opts.rowset_ctx->partial_update_info->update_cids; @@ -580,10 +621,7 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da Status VerticalSegmentWriter::_append_block_with_flexible_partial_content( RowsInBlock& data, vectorized::Block& full_block) { - DCHECK(_is_mow()); - DCHECK(_opts.rowset_ctx->partial_update_info != nullptr); - DCHECK(_opts.rowset_ctx->partial_update_info->is_flexible_partial_update()); - DCHECK(data.row_pos == 0); + RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos, true)); // data.block has the same schema with full_block DCHECK(data.block->columns() == _tablet_schema->num_columns()); @@ -1149,9 +1187,9 @@ Status VerticalSegmentWriter::write_batch() { } auto column_unique_id = _tablet_schema->column(cid).unique_id(); if (_is_mow_with_cluster_key() && - std::find(_tablet_schema->cluster_key_idxes().begin(), - _tablet_schema->cluster_key_idxes().end(), - column_unique_id) != _tablet_schema->cluster_key_idxes().end()) { + std::find(_tablet_schema->cluster_key_uids().begin(), + _tablet_schema->cluster_key_uids().end(), + column_unique_id) != _tablet_schema->cluster_key_uids().end()) { cid_to_column[column_unique_id] = column; } RETURN_IF_ERROR(_column_writers[cid]->append(column->get_nullmap(), column->get_data(), @@ -1213,7 +1251,7 @@ Status VerticalSegmentWriter::_generate_key_index( data.num_rows, true)); // 2. generate short key index (use cluster key) std::vector short_key_columns; - for (const auto& cid : _tablet_schema->cluster_key_idxes()) { + for (const auto& cid : _tablet_schema->cluster_key_uids()) { short_key_columns.push_back(cid_to_column[cid]); } RETURN_IF_ERROR(_generate_short_key_index(short_key_columns, data.num_rows, short_key_pos)); @@ -1572,7 +1610,7 @@ inline bool VerticalSegmentWriter::_is_mow() { } inline bool VerticalSegmentWriter::_is_mow_with_cluster_key() { - return _is_mow() && !_tablet_schema->cluster_key_idxes().empty(); + return _is_mow() && !_tablet_schema->cluster_key_uids().empty(); } } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h index 951e9c2e2838c3..8cec6ed4d1abd6 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h @@ -175,6 +175,7 @@ class VerticalSegmentWriter { const std::function& found_cb, const std::function& not_found_cb, PartialUpdateStats& stats); + Status _partial_update_preconditions_check(size_t row_pos, bool is_flexible_update); Status _append_block_with_partial_content(RowsInBlock& data, vectorized::Block& full_block); Status _append_block_with_flexible_partial_content(RowsInBlock& data, vectorized::Block& full_block); diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index ee9bfd97745c9b..f493f21ac97fb7 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -72,10 +72,9 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, _cur_writer_idx = 0; RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows)); } else if (is_key) { - // TODO for cluster key, always create new segment writer because the primary keys are - // sorted in SegmentWriter::_generate_primary_key_index, will cause too many segments if (_segment_writers[_cur_writer_idx]->num_rows_written() > max_rows_per_segment || - has_cluster_key) { + (has_cluster_key && _segment_writers[_cur_writer_idx]->primary_keys_size() > + config::mow_primary_key_index_max_size_in_memory)) { // segment is full, need flush columns and create new segment writer RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get(), true)); @@ -181,6 +180,7 @@ Status VerticalBetaRowsetWriter::_create_segment_writer( writer_options.enable_unique_key_merge_on_write = context.enable_unique_key_merge_on_write; writer_options.rowset_ctx = &context; writer_options.max_rows_per_segment = context.max_rows_per_segment; + // TODO if support VerticalSegmentWriter, also need to handle cluster key primary key index *writer = std::make_unique( segment_file_writer.get(), seg_id, context.tablet_schema, context.tablet, context.data_dir, writer_options, inverted_index_file_writer.get()); diff --git a/be/src/olap/rowset_builder.h b/be/src/olap/rowset_builder.h index 7fd578037363a0..fb2294d1770cc4 100644 --- a/be/src/olap/rowset_builder.h +++ b/be/src/olap/rowset_builder.h @@ -38,7 +38,6 @@ namespace doris { class CalcDeleteBitmapToken; class FlushToken; class MemTable; -class MemTracker; class StorageEngine; class TupleDescriptor; class SlotDescriptor; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index ec291d8d2f0068..cdb637b1c42647 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -198,6 +198,21 @@ class MultiBlockMerger { pushed_row_refs.push_back(row_refs[i]); } } + if (!_tablet->tablet_schema()->cluster_key_uids().empty()) { + std::vector ids; + for (const auto& cid : _tablet->tablet_schema()->cluster_key_uids()) { + auto index = _tablet->tablet_schema()->field_index(cid); + if (index == -1) { + return Status::InternalError( + "could not find cluster key column with unique_id=" + + std::to_string(cid) + " in tablet schema"); + } + ids.push_back(index); + } + // sort by cluster key + std::stable_sort(pushed_row_refs.begin(), pushed_row_refs.end(), + ClusterKeyRowRefComparator(ids)); + } } // update real inserted row number @@ -249,6 +264,20 @@ class MultiBlockMerger { const size_t _num_columns; }; + struct ClusterKeyRowRefComparator { + ClusterKeyRowRefComparator(std::vector columns) : _columns(columns) {} + + int compare(const RowRef& lhs, const RowRef& rhs) const { + return lhs.block->compare_at(lhs.position, rhs.position, &_columns, *rhs.block, -1); + } + + bool operator()(const RowRef& lhs, const RowRef& rhs) const { + return compare(lhs, rhs) < 0; + } + + const std::vector _columns; + }; + BaseTabletSPtr _tablet; RowRefComparator _cmp; vectorized::Arena _arena; @@ -1158,6 +1187,7 @@ Status SchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParams& sc } context.write_type = DataWriteType::TYPE_SCHEMA_CHANGE; + // TODO if support VerticalSegmentWriter, also need to handle cluster key primary key index auto result = _new_tablet->create_rowset_writer(context, false); if (!result.has_value()) { res = Status::Error("create_rowset_writer failed, reason={}", diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 26ac54c699b81a..4240f7e250a06b 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -77,9 +77,8 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, } if (use_cache && !config::disable_segment_cache) { // memory of SegmentCache::CacheValue will be handled by SegmentCache - auto* cache_value = new SegmentCache::CacheValue(); + auto* cache_value = new SegmentCache::CacheValue(segment); _cache_mem_usage += segment->meta_mem_usage(); - cache_value->segment = std::move(segment); _segment_cache->insert(cache_key, *cache_value, cache_handle); } else { cache_handle->push_segment(std::move(segment)); diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index 834906da93bf74..2c5b1ed200dde7 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -75,9 +75,9 @@ class SegmentCache : public LRUCachePolicy { // Holding all opened segments of a rowset. class CacheValue : public LRUCacheValueBase { public: - ~CacheValue() override { segment.reset(); } + CacheValue(segment_v2::SegmentSharedPtr segment_) : segment(std::move(segment_)) {} - segment_v2::SegmentSharedPtr segment; + const segment_v2::SegmentSharedPtr segment; }; SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit) @@ -124,8 +124,13 @@ class SegmentLoader { void erase_segments(const RowsetId& rowset_id, int64_t num_segments); - // Just used for BE UT - int64_t cache_mem_usage() const { return _cache_mem_usage; } + int64_t cache_mem_usage() const { +#ifdef BE_TEST + return _cache_mem_usage; +#else + return _segment_cache->value_mem_consumption(); +#endif + } private: SegmentLoader(); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 0d04984d0e06ba..379fb6eec3cb43 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1692,6 +1692,10 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info, // tablet may not have cooldowned data, but the storage policy is set tablet_info->__set_cooldown_term(_cooldown_conf.term); } + tablet_info->__set_local_index_size(_tablet_meta->tablet_local_index_size()); + tablet_info->__set_local_segment_size(_tablet_meta->tablet_local_segment_size()); + tablet_info->__set_remote_index_size(_tablet_meta->tablet_remote_index_size()); + tablet_info->__set_remote_segment_size(_tablet_meta->tablet_remote_segment_size()); } void Tablet::report_error(const Status& st) { diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index d6a944dbc39853..33fee7ca350900 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -57,8 +57,6 @@ #include "olap/tablet_schema.h" #include "olap/txn_manager.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/thread_context.h" #include "service/backend_options.h" #include "util/defer_op.h" #include "util/doris_metrics.h" @@ -83,28 +81,18 @@ using std::vector; namespace doris { using namespace ErrorCode; -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "tablet_meta"}})); - bvar::Adder g_tablet_meta_schema_columns_count("tablet_meta_schema_columns_count"); TabletManager::TabletManager(StorageEngine& engine, int32_t tablet_map_lock_shard_size) : _engine(engine), - _tablet_meta_mem_tracker(std::make_shared("TabletMeta(experimental)")), _tablets_shards_size(tablet_map_lock_shard_size), _tablets_shards_mask(tablet_map_lock_shard_size - 1) { CHECK_GT(_tablets_shards_size, 0); CHECK_EQ(_tablets_shards_size & _tablets_shards_mask, 0); _tablets_shards.resize(_tablets_shards_size); - REGISTER_HOOK_METRIC(tablet_meta_mem_consumption, - [this]() { return _tablet_meta_mem_tracker->consumption(); }); } -TabletManager::~TabletManager() { -#ifndef BE_TEST - DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); -#endif -} +TabletManager::~TabletManager() = default; Status TabletManager::_add_tablet_unlocked(TTabletId tablet_id, const TabletSharedPtr& tablet, bool update_meta, bool force, RuntimeProfile* profile) { @@ -242,10 +230,6 @@ Status TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, tablet_map_t& tablet_map = _get_tablet_map(tablet_id); tablet_map[tablet_id] = tablet; _add_tablet_to_partition(tablet); - // TODO: remove multiply 2 of tablet meta mem size - // Because table schema will copy in tablet, there will be double mem cost - // so here multiply 2 - _tablet_meta_mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2); g_tablet_meta_schema_columns_count << tablet->tablet_meta()->tablet_columns_num(); COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RegisterTabletInfo", "AddTablet"), static_cast(watch.reset())); @@ -599,7 +583,6 @@ Status TabletManager::_drop_tablet(TTabletId tablet_id, TReplicaId replica_id, b } to_drop_tablet->deregister_tablet_from_dir(); - _tablet_meta_mem_tracker->release(to_drop_tablet->tablet_meta()->mem_size() * 2); g_tablet_meta_schema_columns_count << -to_drop_tablet->tablet_meta()->tablet_columns_num(); return Status::OK(); } @@ -1083,6 +1066,10 @@ void TabletManager::build_all_report_tablets_info(std::map* t_tablet_stat.__set_total_version_count(tablet_info.total_version_count); t_tablet_stat.__set_visible_version_count(tablet_info.visible_version_count); t_tablet_stat.__set_visible_version(tablet_info.version); + t_tablet_stat.__set_local_index_size(tablet_info.local_index_size); + t_tablet_stat.__set_local_segment_size(tablet_info.local_segment_size); + t_tablet_stat.__set_remote_index_size(tablet_info.remote_index_size); + t_tablet_stat.__set_remote_segment_size(tablet_info.remote_segment_size); }; for_each_tablet(handler, filter_all_tablets); @@ -1183,14 +1170,14 @@ bool TabletManager::_move_tablet_to_trash(const TabletSharedPtr& tablet) { if (tablet_in_not_shutdown->tablet_path() != tablet->tablet_path()) { LOG(INFO) << "tablet path not eq shutdown tablet path, move it to trash, tablet_id=" << tablet_in_not_shutdown->tablet_id() - << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() - << " shutdown tablet path=" << tablet->tablet_path(); + << ", mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() + << ", shutdown tablet path=" << tablet->tablet_path(); return tablet->data_dir()->move_to_trash(tablet->tablet_path()); } else { LOG(INFO) << "tablet path eq shutdown tablet path, not move to trash, tablet_id=" << tablet_in_not_shutdown->tablet_id() - << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() - << " shutdown tablet path=" << tablet->tablet_path(); + << ", mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() + << ", shutdown tablet path=" << tablet->tablet_path(); return true; } } @@ -1295,7 +1282,7 @@ Status TabletManager::register_transition_tablet(int64_t tablet_id, std::string // not found shard.tablets_under_transition[tablet_id] = std::make_tuple(reason, thread_id, 1); LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason - << " lock times=1 thread_id_in_map=" << thread_id; + << ", lock times=1, thread_id_in_map=" << thread_id; return Status::OK(); } else { // found @@ -1303,15 +1290,15 @@ Status TabletManager::register_transition_tablet(int64_t tablet_id, std::string if (thread_id != thread_id_in_map) { // other thread, failed LOG(INFO) << "tablet_id = " << tablet_id << " is doing " << r - << " thread_id_in_map=" << thread_id_in_map << " , add reason=" << reason - << " thread_id=" << thread_id; + << ", thread_id_in_map=" << thread_id_in_map << " , add reason=" << reason + << ", thread_id=" << thread_id; return Status::InternalError("{} failed try later, tablet_id={}", reason, tablet_id); } // add lock times ++lock_times; LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason - << " lock times=" << lock_times << " thread_id_in_map=" << thread_id_in_map; + << ", lock times=" << lock_times << ", thread_id_in_map=" << thread_id_in_map; return Status::OK(); } } @@ -1335,10 +1322,10 @@ void TabletManager::unregister_transition_tablet(int64_t tablet_id, std::string --lock_times; if (lock_times != 0) { LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason - << " left=" << lock_times << " thread_id_in_map=" << thread_id_in_map; + << ", left=" << lock_times << ", thread_id_in_map=" << thread_id_in_map; } else { LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason - << " thread_id_in_map=" << thread_id_in_map; + << ", thread_id_in_map=" << thread_id_in_map; shard.tablets_under_transition.erase(tablet_id); } } diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index 42623cf05f2aea..6b6e7998f9cee1 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -251,9 +251,6 @@ class TabletManager { StorageEngine& _engine; - // TODO: memory size of TabletSchema cannot be accurately tracked. - std::shared_ptr _tablet_meta_mem_tracker; - const int32_t _tablets_shards_size; const int32_t _tablets_shards_mask; std::vector _tablets_shards; diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 0570aff349c583..3247f34656fb5d 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -57,6 +57,7 @@ using std::unordered_map; using std::vector; namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; TabletMetaSharedPtr TabletMeta::create( @@ -106,7 +107,7 @@ TabletMeta::TabletMeta() _delete_bitmap(new DeleteBitmap(_tablet_id)) {} TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, - int64_t replica_id, int32_t schema_hash, uint64_t shard_id, + int64_t replica_id, int32_t schema_hash, int32_t shard_id, const TTabletSchema& tablet_schema, uint32_t next_unique_id, const std::unordered_map& col_ordinal_to_unique_id, TabletUid tablet_uid, TTabletType::type tabletType, @@ -203,6 +204,9 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id case TInvertedIndexFileStorageFormat::V2: schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); break; + case TInvertedIndexFileStorageFormat::V3: + schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3); + break; default: schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); break; @@ -216,8 +220,8 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id schema->set_sort_type(SortType::LEXICAL); } schema->set_sort_col_num(tablet_schema.sort_col_num); - for (const auto& i : tablet_schema.cluster_key_idxes) { - schema->add_cluster_key_idxes(i); + for (const auto& i : tablet_schema.cluster_key_uids) { + schema->add_cluster_key_uids(i); } tablet_meta_pb.set_in_restore_mode(false); @@ -571,7 +575,8 @@ void TabletMeta::serialize(string* meta_binary) { Status TabletMeta::deserialize(std::string_view meta_binary) { TabletMetaPB tablet_meta_pb; - bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(), meta_binary.size()); + bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(), + static_cast(meta_binary.size())); if (!parsed) { return Status::Error("parse tablet meta failed"); } @@ -664,7 +669,7 @@ void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size(); CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size && seg_maps_size == versions_size); - for (size_t i = 0; i < rst_ids_size; ++i) { + for (int i = 0; i < rst_ids_size; ++i) { RowsetId rst_id; rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i)); auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i); @@ -781,12 +786,6 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { time_series_compaction_level_threshold()); } -int64_t TabletMeta::mem_size() const { - auto size = sizeof(TabletMeta); - size += _schema->mem_size(); - return size; -} - void TabletMeta::to_json(string* json_string, json2pb::Pb2JsonOptions& options) { TabletMetaPB tablet_meta_pb; to_meta_pb(&tablet_meta_pb); @@ -1312,4 +1311,5 @@ std::string tablet_state_name(TabletState state) { } } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index fb0895604a19fe..25f6bcd569be43 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -51,6 +51,7 @@ #include "util/uid_util.h" namespace json2pb { +#include "common/compile_check_begin.h" struct Pb2JsonOptions; } // namespace json2pb @@ -100,7 +101,7 @@ class TabletMeta : public MetadataAdder { TabletMeta(); TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, int64_t replica_id, - int32_t schema_hash, uint64_t shard_id, const TTabletSchema& tablet_schema, + int32_t schema_hash, int32_t shard_id, const TTabletSchema& tablet_schema, uint32_t next_unique_id, const std::unordered_map& col_ordinal_to_unique_id, TabletUid tablet_uid, TTabletType::type tabletType, @@ -140,10 +141,6 @@ class TabletMeta : public MetadataAdder { void to_meta_pb(TabletMetaPB* tablet_meta_pb); void to_json(std::string* json_string, json2pb::Pb2JsonOptions& options); - // Don't use. - // TODO: memory size of TabletSchema cannot be accurately tracked. - // In some places, temporarily use num_columns() as TabletSchema size. - int64_t mem_size() const; size_t tablet_columns_num() const { return _schema->num_columns(); } TabletTypePB tablet_type() const { return _tablet_type; } @@ -156,7 +153,7 @@ class TabletMeta : public MetadataAdder { int64_t replica_id() const; void set_replica_id(int64_t replica_id) { _replica_id = replica_id; } int32_t schema_hash() const; - int16_t shard_id() const; + int32_t shard_id() const; void set_shard_id(int32_t shard_id); int64_t creation_time() const; void set_creation_time(int64_t creation_time); @@ -170,6 +167,12 @@ class TabletMeta : public MetadataAdder { size_t tablet_local_size() const; // Remote disk space occupied by tablet. size_t tablet_remote_size() const; + + size_t tablet_local_index_size() const; + size_t tablet_local_segment_size() const; + size_t tablet_remote_index_size() const; + size_t tablet_remote_segment_size() const; + size_t version_count() const; size_t stale_version_count() const; size_t version_count_cross_with_range(const Version& range) const; @@ -608,7 +611,7 @@ inline int32_t TabletMeta::schema_hash() const { return _schema_hash; } -inline int16_t TabletMeta::shard_id() const { +inline int32_t TabletMeta::shard_id() const { return _shard_id; } @@ -668,6 +671,46 @@ inline size_t TabletMeta::tablet_remote_size() const { return total_size; } +inline size_t TabletMeta::tablet_local_index_size() const { + size_t total_size = 0; + for (auto& rs : _rs_metas) { + if (rs->is_local()) { + total_size += rs->index_disk_size(); + } + } + return total_size; +} + +inline size_t TabletMeta::tablet_local_segment_size() const { + size_t total_size = 0; + for (auto& rs : _rs_metas) { + if (rs->is_local()) { + total_size += rs->data_disk_size(); + } + } + return total_size; +} + +inline size_t TabletMeta::tablet_remote_index_size() const { + size_t total_size = 0; + for (auto& rs : _rs_metas) { + if (!rs->is_local()) { + total_size += rs->index_disk_size(); + } + } + return total_size; +} + +inline size_t TabletMeta::tablet_remote_segment_size() const { + size_t total_size = 0; + for (auto& rs : _rs_metas) { + if (!rs->is_local()) { + total_size += rs->data_disk_size(); + } + } + return total_size; +} + inline size_t TabletMeta::version_count() const { return _rs_metas.size(); } @@ -732,4 +775,5 @@ std::string tablet_state_name(TabletState state); bool operator==(const TabletMeta& a, const TabletMeta& b); bool operator!=(const TabletMeta& a, const TabletMeta& b); +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/olap/tablet_meta_manager.cpp b/be/src/olap/tablet_meta_manager.cpp index 6f27dd4db4e672..7c08d7856200f9 100644 --- a/be/src/olap/tablet_meta_manager.cpp +++ b/be/src/olap/tablet_meta_manager.cpp @@ -291,8 +291,7 @@ Status TabletMetaManager::remove_old_version_delete_bitmap(DataDir* store, TTabl return true; }; LOG(INFO) << "remove old version delete bitmap, tablet_id: " << tablet_id - << " version: " << version << " removed keys size: " << remove_keys.size(); - ; + << " version: " << version << ", removed keys size: " << remove_keys.size(); RETURN_IF_ERROR(meta->iterate(META_COLUMN_FAMILY_INDEX, begin_key, get_remove_keys_func)); return meta->remove(META_COLUMN_FAMILY_INDEX, remove_keys); } diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index 7410b70f4aa471..a83e0bfdbf4c30 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -464,13 +464,39 @@ Status TabletReader::_init_orderby_keys_param(const ReaderParams& read_params) { // UNIQUE_KEYS will compare all keys as before if (_tablet_schema->keys_type() == DUP_KEYS || (_tablet_schema->keys_type() == UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write())) { - // find index in vector _return_columns - // for the read_orderby_key_num_prefix_columns orderby keys - for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { - for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { - if (_return_columns[idx] == i) { - _orderby_key_columns.push_back(idx); - break; + if (!_tablet_schema->cluster_key_uids().empty()) { + if (read_params.read_orderby_key_num_prefix_columns > + _tablet_schema->cluster_key_uids().size()) { + return Status::Error( + "read_orderby_key_num_prefix_columns={} > cluster_keys.size()={}", + read_params.read_orderby_key_num_prefix_columns, + _tablet_schema->cluster_key_uids().size()); + } + for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { + auto cid = _tablet_schema->cluster_key_uids()[i]; + auto index = _tablet_schema->field_index(cid); + if (index < 0) { + return Status::Error( + "could not find cluster key column with unique_id=" + + std::to_string(cid) + + " in tablet schema, tablet_id=" + std::to_string(_tablet->tablet_id())); + } + for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { + if (_return_columns[idx] == index) { + _orderby_key_columns.push_back(idx); + break; + } + } + } + } else { + // find index in vector _return_columns + // for the read_orderby_key_num_prefix_columns orderby keys + for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { + for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { + if (_return_columns[idx] == i) { + _orderby_key_columns.push_back(idx); + break; + } } } } @@ -579,8 +605,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate( return nullptr; } const TabletColumn& column = materialize_column(_tablet_schema->column(index)); - return create_column_predicate(index, bloom_filter.second, column.type(), - _reader_context.runtime_state->be_exec_version(), &column); + return create_column_predicate(index, bloom_filter.second, column.type(), &column); } ColumnPredicate* TabletReader::_parse_to_predicate( @@ -590,8 +615,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate( return nullptr; } const TabletColumn& column = materialize_column(_tablet_schema->column(index)); - return create_column_predicate(index, in_filter.second, column.type(), - _reader_context.runtime_state->be_exec_version(), &column); + return create_column_predicate(index, in_filter.second, column.type(), &column); } ColumnPredicate* TabletReader::_parse_to_predicate( @@ -601,8 +625,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate( return nullptr; } const TabletColumn& column = materialize_column(_tablet_schema->column(index)); - return create_column_predicate(index, bitmap_filter.second, column.type(), - _reader_context.runtime_state->be_exec_version(), &column); + return create_column_predicate(index, bitmap_filter.second, column.type(), &column); } ColumnPredicate* TabletReader::_parse_to_predicate(const FunctionFilter& function_filter) { @@ -612,8 +635,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const FunctionFilter& functio } const TabletColumn& column = materialize_column(_tablet_schema->column(index)); return create_column_predicate(index, std::make_shared(function_filter), - column.type(), _reader_context.runtime_state->be_exec_version(), - &column); + column.type(), &column); } Status TabletReader::_init_delete_condition(const ReaderParams& read_params) { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index c4f96e2214853d..3ec5d22166477f 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -41,8 +41,6 @@ #include "olap/tablet_column_object_pool.h" #include "olap/types.h" #include "olap/utils.h" -#include "runtime/memory/lru_cache_policy.h" -#include "runtime/thread_context.h" #include "tablet_meta.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/aggregate_functions/aggregate_function_state_union.h" @@ -975,10 +973,10 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _indexes.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); - _cluster_key_idxes.clear(); + _cluster_key_uids.clear(); clear_column_cache_handlers(); - for (const auto& i : schema.cluster_key_idxes()) { - _cluster_key_idxes.push_back(i); + for (const auto& i : schema.cluster_key_uids()) { + _cluster_key_uids.push_back(i); } for (auto& column_pb : schema.column()) { TabletColumnPtr column; @@ -1126,10 +1124,10 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _sequence_col_idx = -1; _version_col_idx = -1; _skip_bitmap_col_idx = -1; - _cluster_key_idxes.clear(); + _cluster_key_uids.clear(); clear_column_cache_handlers(); - for (const auto& i : ori_tablet_schema._cluster_key_idxes) { - _cluster_key_idxes.push_back(i); + for (const auto& i : ori_tablet_schema._cluster_key_uids) { + _cluster_key_uids.push_back(i); } for (auto& column : index->columns) { if (column->is_key()) { @@ -1237,8 +1235,8 @@ void TabletSchema::reserve_extracted_columns() { } void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { - for (const auto& i : _cluster_key_idxes) { - tablet_schema_pb->add_cluster_key_idxes(i); + for (const auto& i : _cluster_key_uids) { + tablet_schema_pb->add_cluster_key_uids(i); } tablet_schema_pb->set_keys_type(_keys_type); for (const auto& col : _cols) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 5fb3deafd77319..c813d6f0ef8722 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -332,10 +332,8 @@ class TabletSchema : public MetadataAdder { void copy_from(const TabletSchema& tablet_schema); void update_index_info_from(const TabletSchema& tablet_schema); std::string to_key() const; - // Don't use. - // TODO: memory size of TabletSchema cannot be accurately tracked. - // In some places, temporarily use num_columns() as TabletSchema size. - int64_t mem_size() const { return _mem_size; } + // get_metadata_size is only the memory of the TabletSchema itself, not include child objects. + int64_t mem_size() const { return get_metadata_size(); } size_t row_size() const; int32_t field_index(const std::string& field_name) const; int32_t field_index(const vectorized::PathInData& path) const; @@ -351,7 +349,7 @@ class TabletSchema : public MetadataAdder { const std::vector& columns() const; size_t num_columns() const { return _num_columns; } size_t num_key_columns() const { return _num_key_columns; } - const std::vector& cluster_key_idxes() const { return _cluster_key_idxes; } + const std::vector& cluster_key_uids() const { return _cluster_key_uids; } size_t num_null_columns() const { return _num_null_columns; } size_t num_short_key_columns() const { return _num_short_key_columns; } size_t num_rows_per_row_block() const { return _num_rows_per_row_block; } @@ -550,7 +548,7 @@ class TabletSchema : public MetadataAdder { size_t _num_columns = 0; size_t _num_variant_columns = 0; size_t _num_key_columns = 0; - std::vector _cluster_key_idxes; + std::vector _cluster_key_uids; size_t _num_null_columns = 0; size_t _num_short_key_columns = 0; size_t _num_rows_per_row_block = 0; @@ -573,7 +571,6 @@ class TabletSchema : public MetadataAdder { int64_t _db_id = -1; bool _disable_auto_compaction = false; bool _enable_single_replica_compaction = false; - int64_t _mem_size = 0; bool _store_row_column = false; bool _skip_write_index_on_load = false; InvertedIndexStorageFormatPB _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; diff --git a/be/src/olap/tablet_schema_cache.cpp b/be/src/olap/tablet_schema_cache.cpp index fd238fa5affb3f..e044ef9c0426f4 100644 --- a/be/src/olap/tablet_schema_cache.cpp +++ b/be/src/olap/tablet_schema_cache.cpp @@ -56,7 +56,7 @@ std::pair TabletSchemaCache::insert(const std: tablet_schema_ptr->init_from_pb(pb, false, true); value->tablet_schema = tablet_schema_ptr; lru_handle = LRUCachePolicy::insert(key_signature, value, tablet_schema_ptr->num_columns(), - 0, CachePriority::NORMAL); + tablet_schema_ptr->mem_size(), CachePriority::NORMAL); g_tablet_schema_cache_count << 1; g_tablet_schema_cache_columns_count << tablet_schema_ptr->num_columns(); } diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index f7712625d3e9a6..ad018c8b4f8f3d 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -34,6 +34,7 @@ #include "pipeline/common/set_utils.h" #include "pipeline/exec/data_queue.h" #include "pipeline/exec/join/process_hash_table_probe.h" +#include "util/stack_util.h" #include "vec/common/sort/partition_sorter.h" #include "vec/common/sort/sorter.h" #include "vec/core/block.h" @@ -107,7 +108,7 @@ class Dependency : public std::enable_shared_from_this { // Which dependency current pipeline task is blocked by. `nullptr` if this dependency is ready. [[nodiscard]] virtual Dependency* is_blocked_by(PipelineTask* task = nullptr); // Notify downstream pipeline tasks this dependency is ready. - void set_ready(); + virtual void set_ready(); void set_ready_to_read() { DCHECK_EQ(_shared_state->source_deps.size(), 1) << debug_string(); _shared_state->source_deps.front()->set_ready(); diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index cec0c77da8a61d..b2a79a941f79e7 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -91,8 +91,8 @@ Status HashJoinBuildSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo RETURN_IF_ERROR(_hash_table_init(state)); _runtime_filters.resize(p._runtime_filter_descs.size()); for (size_t i = 0; i < p._runtime_filter_descs.size(); i++) { - RETURN_IF_ERROR(state->register_producer_runtime_filter( - p._runtime_filter_descs[i], &_runtime_filters[i], _build_expr_ctxs.size() == 1)); + RETURN_IF_ERROR(state->register_producer_runtime_filter(p._runtime_filter_descs[i], + &_runtime_filters[i])); } _runtime_filter_slots = @@ -139,35 +139,54 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu return Base::close(state, exec_status); } - if (state->get_task()->wake_up_by_downstream()) { - if (_should_build_hash_table) { - // partitial ignore rf to make global rf work - RETURN_IF_ERROR(_runtime_filter_slots->send_filter_size(state, 0, _finish_dependency)); - RETURN_IF_ERROR(_runtime_filter_slots->ignore_all_filters()); + try { + if (state->get_task()->wake_up_by_downstream()) { + if (_should_build_hash_table) { + // partitial ignore rf to make global rf work + RETURN_IF_ERROR( + _runtime_filter_slots->send_filter_size(state, 0, _finish_dependency)); + RETURN_IF_ERROR(_runtime_filter_slots->ignore_all_filters()); + } else { + // do not publish filter coz local rf not inited and useless + return Base::close(state, exec_status); + } + } else if (_should_build_hash_table) { + if (p._shared_hashtable_controller && + !p._shared_hash_table_context->complete_build_stage) { + return Status::InternalError("close before sink meet eos"); + } + auto* block = _shared_state->build_block.get(); + uint64_t hash_table_size = block ? block->rows() : 0; + { + SCOPED_TIMER(_runtime_filter_init_timer); + RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size)); + RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); + } + if (hash_table_size > 1) { + SCOPED_TIMER(_runtime_filter_compute_timer); + _runtime_filter_slots->insert(block); + } + } else if ((p._shared_hashtable_controller && !p._shared_hash_table_context->signaled) || + (p._shared_hash_table_context && + !p._shared_hash_table_context->complete_build_stage)) { + throw Exception(ErrorCode::INTERNAL_ERROR, "build_sink::close meet error state"); } else { - // do not publish filter coz local rf not inited and useless - return Base::close(state, exec_status); - } - } else if (_should_build_hash_table) { - if (p._shared_hashtable_controller && !p._shared_hash_table_context->complete_build_stage) { - return Status::InternalError("close before sink meet eos"); + RETURN_IF_ERROR( + _runtime_filter_slots->copy_from_shared_context(p._shared_hash_table_context)); } - auto* block = _shared_state->build_block.get(); - uint64_t hash_table_size = block ? block->rows() : 0; - { - SCOPED_TIMER(_runtime_filter_init_timer); - RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size)); - RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); - } - if (hash_table_size > 1) { - SCOPED_TIMER(_runtime_filter_compute_timer); - _runtime_filter_slots->insert(block); - } - } - SCOPED_TIMER(_publish_runtime_filter_timer); - RETURN_IF_ERROR_OR_CATCH_EXCEPTION( - _runtime_filter_slots->publish(state, !_should_build_hash_table)); + SCOPED_TIMER(_publish_runtime_filter_timer); + RETURN_IF_ERROR(_runtime_filter_slots->publish(state, !_should_build_hash_table)); + } catch (Exception& e) { + return Status::InternalError( + "rf process meet error: {}, wake_up_by_downstream: {}, should_build_hash_table: " + "{}, _finish_dependency: {}, complete_build_stage: {}, shared_hash_table_signaled: " + "{}", + e.to_string(), state->get_task()->wake_up_by_downstream(), _should_build_hash_table, + _finish_dependency->debug_string(), + p._shared_hash_table_context && !p._shared_hash_table_context->complete_build_stage, + p._shared_hashtable_controller && !p._shared_hash_table_context->signaled); + } return Base::close(state, exec_status); } @@ -537,9 +556,6 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, vectorized::Block* return _shared_hash_table_context->status; } - RETURN_IF_ERROR(local_state._runtime_filter_slots->copy_from_shared_context( - _shared_hash_table_context)); - local_state.profile()->add_info_string( "SharedHashTableFrom", print_id( diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp index 9e3e8a08ca83a5..35b9de619f393d 100644 --- a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp +++ b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp @@ -67,7 +67,7 @@ Status NestedLoopJoinBuildSinkLocalState::init(RuntimeState* state, LocalSinkSta _runtime_filters.resize(p._runtime_filter_descs.size()); for (size_t i = 0; i < p._runtime_filter_descs.size(); i++) { RETURN_IF_ERROR(state->register_producer_runtime_filter(p._runtime_filter_descs[i], - &_runtime_filters[i], false)); + &_runtime_filters[i])); } return Status::OK(); } diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp index ab0a43f4a635cf..5273960a5c1c29 100644 --- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp @@ -202,7 +202,7 @@ size_t PartitionedAggSinkOperatorX::revocable_mem_size(RuntimeState* state) cons Status PartitionedAggSinkLocalState::setup_in_memory_agg_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp b/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp index 655a6e19725a9b..cdc6ef881d436d 100644 --- a/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp +++ b/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp @@ -166,7 +166,7 @@ Status PartitionedAggSourceOperatorX::get_block(RuntimeState* state, vectorized: Status PartitionedAggLocalState::setup_in_memory_agg_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp index 0e56acc1c574b2..20b25d54ff9f16 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp +++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp @@ -606,7 +606,7 @@ Status PartitionedHashJoinProbeOperatorX::_setup_internal_operators( } local_state._runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); local_state._runtime_state->set_task_execution_context( diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp index d221eaeed0faba..878c3870946f1c 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp +++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp @@ -438,7 +438,7 @@ Status PartitionedHashJoinSinkOperatorX::_setup_internal_operator(RuntimeState* auto& local_state = get_local_state(state); local_state._shared_state->inner_runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); local_state._shared_state->inner_runtime_state->set_task_execution_context( state->get_task_execution_context().lock()); diff --git a/be/src/pipeline/exec/schema_scan_operator.cpp b/be/src/pipeline/exec/schema_scan_operator.cpp index 006ecf8ad82e84..ddc2821cac14a1 100644 --- a/be/src/pipeline/exec/schema_scan_operator.cpp +++ b/be/src/pipeline/exec/schema_scan_operator.cpp @@ -48,7 +48,7 @@ Status SchemaScanLocalState::init(RuntimeState* state, LocalStateInfo& info) { // new one scanner _schema_scanner = SchemaScanner::create(schema_table->schema_table_type()); - _schema_scanner->set_dependency(_data_dependency, _finish_dependency); + _schema_scanner->set_dependency(_data_dependency); if (nullptr == _schema_scanner) { return Status::InternalError("schema scanner get nullptr pointer."); } @@ -266,9 +266,6 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state, vectorized::Block* bl } while (block->rows() == 0 && !*eos); local_state.reached_limit(block, eos); - if (*eos) { - local_state._finish_dependency->set_always_ready(); - } return Status::OK(); } diff --git a/be/src/pipeline/exec/schema_scan_operator.h b/be/src/pipeline/exec/schema_scan_operator.h index 03cf422fbc52e6..c8ddf885e98a0f 100644 --- a/be/src/pipeline/exec/schema_scan_operator.h +++ b/be/src/pipeline/exec/schema_scan_operator.h @@ -36,9 +36,6 @@ class SchemaScanLocalState final : public PipelineXLocalState<> { SchemaScanLocalState(RuntimeState* state, OperatorXBase* parent) : PipelineXLocalState<>(state, parent) { - _finish_dependency = - std::make_shared(parent->operator_id(), parent->node_id(), - parent->get_name() + "_FINISH_DEPENDENCY", true); _data_dependency = std::make_shared(parent->operator_id(), parent->node_id(), parent->get_name() + "_DEPENDENCY", true); } @@ -48,7 +45,6 @@ class SchemaScanLocalState final : public PipelineXLocalState<> { Status open(RuntimeState* state) override; - Dependency* finishdependency() override { return _finish_dependency.get(); } std::vector dependencies() const override { return {_data_dependency.get()}; } private: @@ -57,7 +53,6 @@ class SchemaScanLocalState final : public PipelineXLocalState<> { SchemaScannerParam _scanner_param; std::unique_ptr _schema_scanner; - std::shared_ptr _finish_dependency; std::shared_ptr _data_dependency; }; diff --git a/be/src/pipeline/exec/spill_sort_sink_operator.cpp b/be/src/pipeline/exec/spill_sort_sink_operator.cpp index 6e6689d4134deb..6071301c1d7bcc 100644 --- a/be/src/pipeline/exec/spill_sort_sink_operator.cpp +++ b/be/src/pipeline/exec/spill_sort_sink_operator.cpp @@ -80,7 +80,7 @@ Status SpillSortSinkLocalState::close(RuntimeState* state, Status execsink_statu Status SpillSortSinkLocalState::setup_in_memory_sort_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/exec/spill_sort_source_operator.cpp b/be/src/pipeline/exec/spill_sort_source_operator.cpp index e766cb27168de1..69ed816fa9142d 100644 --- a/be/src/pipeline/exec/spill_sort_source_operator.cpp +++ b/be/src/pipeline/exec/spill_sort_source_operator.cpp @@ -212,7 +212,7 @@ Status SpillSortLocalState::_create_intermediate_merger( } Status SpillSortLocalState::setup_in_memory_sort_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/local_exchange/local_exchanger.h b/be/src/pipeline/local_exchange/local_exchanger.h index f518e2649f89e6..4d699baa52fb8b 100644 --- a/be/src/pipeline/local_exchange/local_exchanger.h +++ b/be/src/pipeline/local_exchange/local_exchanger.h @@ -110,7 +110,11 @@ struct BlockQueue { : eos(other.eos.load()), data_queue(std::move(other.data_queue)) {} inline bool enqueue(BlockType const& item) { if (!eos) { - data_queue.enqueue(item); + if (!data_queue.enqueue(item)) [[unlikely]] { + throw Exception(ErrorCode::INTERNAL_ERROR, + "Exception occurs in data queue [size = {}] of local exchange.", + data_queue.size_approx()); + } return true; } return false; @@ -118,7 +122,11 @@ struct BlockQueue { inline bool enqueue(BlockType&& item) { if (!eos) { - data_queue.enqueue(std::move(item)); + if (!data_queue.enqueue(std::move(item))) [[unlikely]] { + throw Exception(ErrorCode::INTERNAL_ERROR, + "Exception occurs in data queue [size = {}] of local exchange.", + data_queue.size_approx()); + } return true; } return false; @@ -186,6 +194,8 @@ struct BlockWrapper { shared_state->exchanger->_free_block_limit * shared_state->exchanger->_num_sources) { data_block.clear_column_data(); + // Free blocks is used to improve memory efficiency. Failure during pushing back + // free block will not incur any bad result so just ignore the return value. shared_state->exchanger->_free_blocks.enqueue(std::move(data_block)); } } diff --git a/be/src/pipeline/pipeline.cpp b/be/src/pipeline/pipeline.cpp index 96da754daa5d98..e4678b7dcf3a83 100644 --- a/be/src/pipeline/pipeline.cpp +++ b/be/src/pipeline/pipeline.cpp @@ -112,7 +112,12 @@ void Pipeline::make_all_runnable() { if (_sink->count_down_destination()) { for (auto* task : _tasks) { if (task) { - task->clear_blocking_state(true); + task->set_wake_up_by_downstream(); + } + } + for (auto* task : _tasks) { + if (task) { + task->clear_blocking_state(); } } } diff --git a/be/src/pipeline/pipeline.h b/be/src/pipeline/pipeline.h index b969186b178bf7..afbe6c77596432 100644 --- a/be/src/pipeline/pipeline.h +++ b/be/src/pipeline/pipeline.h @@ -73,6 +73,14 @@ class Pipeline : public std::enable_shared_from_this { return idx == ExchangeType::HASH_SHUFFLE || idx == ExchangeType::BUCKET_HASH_SHUFFLE; } + // For HASH_SHUFFLE, BUCKET_HASH_SHUFFLE, and ADAPTIVE_PASSTHROUGH, + // data is processed and shuffled on the sink. + // Compared to PASSTHROUGH, this is a relatively heavy operation. + static bool heavy_operations_on_the_sink(ExchangeType idx) { + return idx == ExchangeType::HASH_SHUFFLE || idx == ExchangeType::BUCKET_HASH_SHUFFLE || + idx == ExchangeType::ADAPTIVE_PASSTHROUGH; + } + bool need_to_local_exchange(const DataDistribution target_data_distribution, const int idx) const; void init_data_distribution() { diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index 0775ef3fb19826..8ceb63eb99324c 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -397,9 +397,9 @@ Status PipelineFragmentContext::_build_pipeline_tasks(const doris::TPipelineFrag << print_id(_task_runtime_states[pip_idx][i]->fragment_instance_id()) << " " << pipeline->debug_string(); _task_runtime_states[pip_idx][i] = RuntimeState::create_unique( - this, local_params.fragment_instance_id, request.query_id, - request.fragment_id, request.query_options, _query_ctx->query_globals, - _exec_env, _query_ctx.get()); + local_params.fragment_instance_id, request.query_id, request.fragment_id, + request.query_options, _query_ctx->query_globals, _exec_env, + _query_ctx.get()); auto& task_runtime_state = _task_runtime_states[pip_idx][i]; _runtime_filter_states[i]->set_state(task_runtime_state.get()); { @@ -814,7 +814,7 @@ Status PipelineFragmentContext::_add_local_exchange_impl( } case ExchangeType::ADAPTIVE_PASSTHROUGH: shared_state->exchanger = AdaptivePassthroughExchanger::create_unique( - cur_pipe->num_tasks(), _num_instances, + std::max(cur_pipe->num_tasks(), _num_instances), _num_instances, _runtime_state->query_options().__isset.local_exchange_free_blocks_limit ? cast_set( _runtime_state->query_options().local_exchange_free_blocks_limit) @@ -915,9 +915,13 @@ Status PipelineFragmentContext::_add_local_exchange( << " cur_pipe->operators().size(): " << cur_pipe->operators().size() << " new_pip->operators().size(): " << new_pip->operators().size(); - // Add passthrough local exchanger if necessary + // There are some local shuffles with relatively heavy operations on the sink. + // If the local sink concurrency is 1 and the local source concurrency is n, the sink becomes a bottleneck. + // Therefore, local passthrough is used to increase the concurrency of the sink. + // op -> local sink(1) -> local source (n) + // op -> local passthrough(1) -> local passthrough(n) -> local sink(n) -> local source (n) if (cur_pipe->num_tasks() > 1 && new_pip->num_tasks() == 1 && - Pipeline::is_hash_exchange(data_distribution.distribution_type)) { + Pipeline::heavy_operations_on_the_sink(data_distribution.distribution_type)) { RETURN_IF_ERROR(_add_local_exchange_impl( cast_set(new_pip->operators().size()), pool, new_pip, add_pipeline(new_pip, pip_idx + 2), DataDistribution(ExchangeType::PASSTHROUGH), diff --git a/be/src/pipeline/pipeline_task.h b/be/src/pipeline/pipeline_task.h index 3b4627f589dc54..4bb062122c0c08 100644 --- a/be/src/pipeline/pipeline_task.h +++ b/be/src/pipeline/pipeline_task.h @@ -135,11 +135,12 @@ class PipelineTask { int task_id() const { return _index; }; bool is_finalized() const { return _finalized; } - void clear_blocking_state(bool wake_up_by_downstream = false) { + void set_wake_up_by_downstream() { _wake_up_by_downstream = true; } + + void clear_blocking_state() { _state->get_query_ctx()->get_execution_dependency()->set_always_ready(); // We use a lock to assure all dependencies are not deconstructed here. std::unique_lock lc(_dependency_lock); - _wake_up_by_downstream = _wake_up_by_downstream || wake_up_by_downstream; if (!_finalized) { _execution_dep->set_always_ready(); for (auto* dep : _filter_dependencies) { diff --git a/be/src/runtime/buffer_control_block.cpp b/be/src/runtime/buffer_control_block.cpp index 1ed2836f8eb616..8c1ae79955f317 100644 --- a/be/src/runtime/buffer_control_block.cpp +++ b/be/src/runtime/buffer_control_block.cpp @@ -292,6 +292,9 @@ Status BufferControlBlock::get_arrow_batch(std::shared_ptr* r _arrow_data_arrival.wait_for(l, std::chrono::milliseconds(20)); } + if (!_status.ok()) { + return _status; + } if (_is_cancelled) { return Status::Cancelled(fmt::format("Cancelled ()", print_id(_fragment_id))); } @@ -311,9 +314,12 @@ Status BufferControlBlock::get_arrow_batch(std::shared_ptr* r // normal path end if (_is_close) { + if (!_status.ok()) { + return _status; + } std::stringstream ss; _profile.pretty_print(&ss); - VLOG_NOTICE << fmt::format( + LOG(INFO) << fmt::format( "BufferControlBlock finished, fragment_id={}, is_close={}, is_cancelled={}, " "packet_num={}, peak_memory_usage={}, profile={}", print_id(_fragment_id), _is_close, _is_cancelled, _packet_num, @@ -321,7 +327,7 @@ Status BufferControlBlock::get_arrow_batch(std::shared_ptr* r return Status::OK(); } return Status::InternalError( - fmt::format("Get Arrow Batch Abnormal Ending ()", print_id(_fragment_id))); + fmt::format("Get Arrow Batch Abnormal Ending (), ()", print_id(_fragment_id), _status)); } void BufferControlBlock::get_arrow_batch(GetArrowResultBatchCtx* ctx) { @@ -354,10 +360,14 @@ void BufferControlBlock::get_arrow_batch(GetArrowResultBatchCtx* ctx) { // normal path end if (_is_close) { + if (!_status.ok()) { + ctx->on_failure(_status); + return; + } ctx->on_close(_packet_num); std::stringstream ss; _profile.pretty_print(&ss); - VLOG_NOTICE << fmt::format( + LOG(INFO) << fmt::format( "BufferControlBlock finished, fragment_id={}, is_close={}, is_cancelled={}, " "packet_num={}, peak_memory_usage={}, profile={}", print_id(_fragment_id), _is_close, _is_cancelled, _packet_num, @@ -391,8 +401,8 @@ Status BufferControlBlock::find_arrow_schema(std::shared_ptr* arr if (_is_close) { return Status::RuntimeError(fmt::format("Closed ()", print_id(_fragment_id))); } - return Status::InternalError( - fmt::format("Get Arrow Schema Abnormal Ending ()", print_id(_fragment_id))); + return Status::InternalError(fmt::format("Get Arrow Schema Abnormal Ending (), ()", + print_id(_fragment_id), _status)); } Status BufferControlBlock::close(const TUniqueId& id, Status exec_status) { diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index a27936f5f0d88b..636ce2bf288b58 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -178,7 +178,6 @@ class ExecEnv { std::vector mem_tracker_limiter_pool; void init_mem_tracker(); std::shared_ptr orphan_mem_tracker() { return _orphan_mem_tracker; } - std::shared_ptr page_no_cache_mem_tracker() { return _page_no_cache_mem_tracker; } std::shared_ptr brpc_iobuf_block_memory_tracker() { return _brpc_iobuf_block_memory_tracker; } @@ -188,6 +187,15 @@ class ExecEnv { std::shared_ptr stream_load_pipe_tracker() { return _stream_load_pipe_tracker; } + std::shared_ptr tablets_no_cache_mem_tracker() { + return _tablets_no_cache_mem_tracker; + } + std::shared_ptr rowsets_no_cache_mem_tracker() { + return _rowsets_no_cache_mem_tracker; + } + std::shared_ptr segments_no_cache_mem_tracker() { + return _segments_no_cache_mem_tracker; + } std::shared_ptr point_query_executor_mem_tracker() { return _point_query_executor_mem_tracker; } @@ -377,13 +385,15 @@ class ExecEnv { // Ideally, all threads are expected to attach to the specified tracker, so that "all memory has its own ownership", // and the consumption of the orphan mem tracker is close to 0, but greater than 0. std::shared_ptr _orphan_mem_tracker; - // page size not in cache, data page/index page/etc. - std::shared_ptr _page_no_cache_mem_tracker; std::shared_ptr _brpc_iobuf_block_memory_tracker; // Count the memory consumption of segment compaction tasks. std::shared_ptr _segcompaction_mem_tracker; std::shared_ptr _stream_load_pipe_tracker; + std::shared_ptr _tablets_no_cache_mem_tracker; + std::shared_ptr _rowsets_no_cache_mem_tracker; + std::shared_ptr _segments_no_cache_mem_tracker; + // Tracking memory may be shared between multiple queries. std::shared_ptr _point_query_executor_mem_tracker; std::shared_ptr _block_compression_mem_tracker; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index b7f926cc3b4512..a371cdb947ff56 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -599,15 +599,20 @@ void ExecEnv::init_mem_tracker() { _s_tracking_memory = true; _orphan_mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "Orphan"); - _page_no_cache_mem_tracker = std::make_shared("PageNoCache"); _brpc_iobuf_block_memory_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "IOBufBlockMemory"); _segcompaction_mem_tracker = - MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "SegCompaction"); + MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::COMPACTION, "SegCompaction"); + _tablets_no_cache_mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::METADATA, "Tablets(not in SchemaCache, TabletSchemaCache)"); + _segments_no_cache_mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::METADATA, "Segments(not in SegmentCache)"); + _rowsets_no_cache_mem_tracker = + MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::METADATA, "Rowsets"); _point_query_executor_mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "PointQueryExecutor"); _query_cache_mem_tracker = - MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "QueryCache"); + MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::CACHE, "QueryCache"); _block_compression_mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "BlockCompression"); _rowid_storage_reader_tracker = @@ -716,7 +721,7 @@ void ExecEnv::destroy() { _file_cache_open_fd_cache.reset(); SAFE_STOP(_write_cooldown_meta_executors); - // StorageEngine must be destoried before _page_no_cache_mem_tracker.reset and _cache_manager destory + // StorageEngine must be destoried before _cache_manager destory SAFE_STOP(_storage_engine); _storage_engine.reset(); diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 1e72fa756d3dd3..f96e4152500808 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -269,8 +269,11 @@ void FragmentMgr::stop() { // Only me can delete { - std::lock_guard lock(_lock); + std::unique_lock lock(_query_ctx_map_mutex); _query_ctx_map.clear(); + } + { + std::unique_lock lock(_pipeline_map_mutex); _pipeline_map.clear(); } _thread_pool->shutdown(); @@ -583,11 +586,7 @@ Status FragmentMgr::start_query_execution(const PExecPlanFragmentStartRequest* r TUniqueId query_id; query_id.__set_hi(request->query_id().hi()); query_id.__set_lo(request->query_id().lo()); - std::shared_ptr q_ctx = nullptr; - { - std::lock_guard lock(_lock); - q_ctx = _get_or_erase_query_ctx(query_id); - } + auto q_ctx = get_query_ctx(query_id); if (q_ctx) { q_ctx->set_ready_to_execute(Status::OK()); LOG_INFO("Query {} start execution", print_id(query_id)); @@ -602,114 +601,107 @@ Status FragmentMgr::start_query_execution(const PExecPlanFragmentStartRequest* r void FragmentMgr::remove_pipeline_context( std::shared_ptr f_context) { - { - std::lock_guard lock(_lock); - auto query_id = f_context->get_query_id(); - int64 now = duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - g_fragment_executing_count << -1; - g_fragment_last_active_time.set_value(now); - _pipeline_map.erase({query_id, f_context->get_fragment_id()}); - } + auto query_id = f_context->get_query_id(); + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + g_fragment_executing_count << -1; + g_fragment_last_active_time.set_value(now); + + std::unique_lock lock(_pipeline_map_mutex); + _pipeline_map.erase({query_id, f_context->get_fragment_id()}); } -std::shared_ptr FragmentMgr::_get_or_erase_query_ctx(const TUniqueId& query_id) { +std::shared_ptr FragmentMgr::get_query_ctx(const TUniqueId& query_id) { + std::shared_lock lock(_query_ctx_map_mutex); auto search = _query_ctx_map.find(query_id); if (search != _query_ctx_map.end()) { if (auto q_ctx = search->second.lock()) { return q_ctx; - } else { - LOG(WARNING) << "Query context (query id = " << print_id(query_id) - << ") has been released."; - _query_ctx_map.erase(search); - return nullptr; } } return nullptr; } -std::shared_ptr FragmentMgr::get_or_erase_query_ctx_with_lock( - const TUniqueId& query_id) { - std::unique_lock lock(_lock); - return _get_or_erase_query_ctx(query_id); -} - -template -Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, bool pipeline, - QuerySource query_source, - std::shared_ptr& query_ctx) { +Status FragmentMgr::_get_or_create_query_ctx(const TPipelineFragmentParams& params, + TUniqueId query_id, bool pipeline, + QuerySource query_source, + std::shared_ptr& query_ctx) { DBUG_EXECUTE_IF("FragmentMgr._get_query_ctx.failed", { return Status::InternalError("FragmentMgr._get_query_ctx.failed, query id {}", print_id(query_id)); }); + + // Find _query_ctx_map, in case some other request has already + // create the query fragments context. + query_ctx = get_query_ctx(query_id); if (params.is_simplified_param) { // Get common components from _query_ctx_map - std::lock_guard lock(_lock); - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { - query_ctx = q_ctx; - } else { + if (!query_ctx) { return Status::InternalError( "Failed to get query fragments context. Query {} may be timeout or be " "cancelled. host: {}", print_id(query_id), BackendOptions::get_localhost()); } } else { - // Find _query_ctx_map, in case some other request has already - // create the query fragments context. - std::lock_guard lock(_lock); - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { - query_ctx = q_ctx; - return Status::OK(); - } + if (!query_ctx) { + std::unique_lock lock(_query_ctx_map_mutex); + // Only one thread need create query ctx. other thread just get query_ctx in _query_ctx_map. + auto search = _query_ctx_map.find(query_id); + if (search != _query_ctx_map.end()) { + query_ctx = search->second.lock(); + } - // First time a fragment of a query arrived. print logs. - LOG(INFO) << "query_id: " << print_id(query_id) << ", coord_addr: " << params.coord - << ", total fragment num on current host: " << params.fragment_num_on_host - << ", fe process uuid: " << params.query_options.fe_process_uuid - << ", query type: " << params.query_options.query_type - << ", report audit fe:" << params.current_connect_fe; - - // This may be a first fragment request of the query. - // Create the query fragments context. - query_ctx = QueryContext::create_shared(query_id, _exec_env, params.query_options, - params.coord, params.is_nereids, - params.current_connect_fe, query_source); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_ctx->query_mem_tracker); - RETURN_IF_ERROR(DescriptorTbl::create(&(query_ctx->obj_pool), params.desc_tbl, - &(query_ctx->desc_tbl))); - // set file scan range params - if (params.__isset.file_scan_params) { - query_ctx->file_scan_range_params_map = params.file_scan_params; - } + if (!query_ctx) { + // First time a fragment of a query arrived. print logs. + LOG(INFO) << "query_id: " << print_id(query_id) << ", coord_addr: " << params.coord + << ", total fragment num on current host: " << params.fragment_num_on_host + << ", fe process uuid: " << params.query_options.fe_process_uuid + << ", query type: " << params.query_options.query_type + << ", report audit fe:" << params.current_connect_fe; + + // This may be a first fragment request of the query. + // Create the query fragments context. + query_ctx = QueryContext::create_shared(query_id, _exec_env, params.query_options, + params.coord, params.is_nereids, + params.current_connect_fe, query_source); + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(query_ctx->query_mem_tracker); + RETURN_IF_ERROR(DescriptorTbl::create(&(query_ctx->obj_pool), params.desc_tbl, + &(query_ctx->desc_tbl))); + // set file scan range params + if (params.__isset.file_scan_params) { + query_ctx->file_scan_range_params_map = params.file_scan_params; + } - query_ctx->query_globals = params.query_globals; + query_ctx->query_globals = params.query_globals; - if (params.__isset.resource_info) { - query_ctx->user = params.resource_info.user; - query_ctx->group = params.resource_info.group; - query_ctx->set_rsc_info = true; - } + if (params.__isset.resource_info) { + query_ctx->user = params.resource_info.user; + query_ctx->group = params.resource_info.group; + query_ctx->set_rsc_info = true; + } - _set_scan_concurrency(params, query_ctx.get()); - - if (params.__isset.workload_groups && !params.workload_groups.empty()) { - uint64_t tg_id = params.workload_groups[0].id; - WorkloadGroupPtr workload_group_ptr = - _exec_env->workload_group_mgr()->get_task_group_by_id(tg_id); - if (workload_group_ptr != nullptr) { - RETURN_IF_ERROR(workload_group_ptr->add_query(query_id, query_ctx)); - RETURN_IF_ERROR(query_ctx->set_workload_group(workload_group_ptr)); - _exec_env->runtime_query_statistics_mgr()->set_workload_group_id(print_id(query_id), - tg_id); - } else { - LOG(WARNING) << "Query/load id: " << print_id(query_ctx->query_id()) - << "can't find its workload group " << tg_id; + _set_scan_concurrency(params, query_ctx.get()); + + if (params.__isset.workload_groups && !params.workload_groups.empty()) { + uint64_t tg_id = params.workload_groups[0].id; + WorkloadGroupPtr workload_group_ptr = + _exec_env->workload_group_mgr()->get_task_group_by_id(tg_id); + if (workload_group_ptr != nullptr) { + RETURN_IF_ERROR(workload_group_ptr->add_query(query_id, query_ctx)); + RETURN_IF_ERROR(query_ctx->set_workload_group(workload_group_ptr)); + _exec_env->runtime_query_statistics_mgr()->set_workload_group_id( + print_id(query_id), tg_id); + } else { + LOG(WARNING) << "Query/load id: " << print_id(query_ctx->query_id()) + << "can't find its workload group " << tg_id; + } + } + // There is some logic in query ctx's dctor, we could not check if exists and delete the + // temp query ctx now. For example, the query id maybe removed from workload group's queryset. + _query_ctx_map.insert({query_id, query_ctx}); } } - // There is some logic in query ctx's dctor, we could not check if exists and delete the - // temp query ctx now. For example, the query id maybe removed from workload group's queryset. - _query_ctx_map.insert(std::make_pair(query_ctx->query_id(), query_ctx)); } return Status::OK(); } @@ -723,13 +715,13 @@ std::string FragmentMgr::dump_pipeline_tasks(int64_t duration) { fmt::memory_buffer debug_string_buffer; size_t i = 0; { - std::lock_guard lock(_lock); fmt::format_to(debug_string_buffer, "{} pipeline fragment contexts are still running! duration_limit={}\n", _pipeline_map.size(), duration); - timespec now; clock_gettime(CLOCK_MONOTONIC, &now); + + std::shared_lock lock(_pipeline_map_mutex); for (auto& it : _pipeline_map) { auto elapsed = it.second->elapsed_time() / 1000000000.0; if (elapsed < duration) { @@ -748,7 +740,7 @@ std::string FragmentMgr::dump_pipeline_tasks(int64_t duration) { } std::string FragmentMgr::dump_pipeline_tasks(TUniqueId& query_id) { - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { + if (auto q_ctx = get_query_ctx(query_id)) { return q_ctx->print_all_pipeline_context(); } else { return fmt::format( @@ -767,7 +759,8 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, << apache::thrift::ThriftDebugString(params.query_options).c_str(); std::shared_ptr query_ctx; - RETURN_IF_ERROR(_get_query_ctx(params, params.query_id, true, query_source, query_ctx)); + RETURN_IF_ERROR( + _get_or_create_query_ctx(params, params.query_id, true, query_source, query_ctx)); SCOPED_ATTACH_TASK(query_ctx.get()); int64_t duration_ns = 0; std::shared_ptr context = @@ -800,16 +793,8 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, } { - // (query_id, fragment_id) is executed only on one BE, locks _pipeline_map. - std::lock_guard lock(_lock); for (const auto& local_param : params.local_params) { const TUniqueId& fragment_instance_id = local_param.fragment_instance_id; - auto iter = _pipeline_map.find({params.query_id, params.fragment_id}); - if (iter != _pipeline_map.end()) { - return Status::InternalError( - "exec_plan_fragment query_id({}) input duplicated fragment_id({})", - print_id(params.query_id), params.fragment_id); - } query_ctx->fragment_instance_ids.push_back(fragment_instance_id); } @@ -818,7 +803,15 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, .count(); g_fragment_executing_count << 1; g_fragment_last_active_time.set_value(now); - // TODO: simplify this mapping + + // (query_id, fragment_id) is executed only on one BE, locks _pipeline_map. + std::unique_lock lock(_pipeline_map_mutex); + auto iter = _pipeline_map.find({params.query_id, params.fragment_id}); + if (iter != _pipeline_map.end()) { + return Status::InternalError( + "exec_plan_fragment query_id({}) input duplicated fragment_id({})", + print_id(params.query_id), params.fragment_id); + } _pipeline_map.insert({{params.query_id, params.fragment_id}, context}); } @@ -848,8 +841,7 @@ void FragmentMgr::cancel_query(const TUniqueId query_id, const Status reason) { std::shared_ptr query_ctx = nullptr; std::vector all_instance_ids; { - std::lock_guard state_lock(_lock); - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { + if (auto q_ctx = get_query_ctx(query_id)) { query_ctx = q_ctx; // Copy instanceids to avoid concurrent modification. // And to reduce the scope of lock. @@ -862,7 +854,7 @@ void FragmentMgr::cancel_query(const TUniqueId query_id, const Status reason) { } query_ctx->cancel(reason); { - std::lock_guard state_lock(_lock); + std::unique_lock l(_query_ctx_map_mutex); _query_ctx_map.erase(query_id); } LOG(INFO) << "Query " << print_id(query_id) @@ -898,7 +890,7 @@ void FragmentMgr::cancel_worker() { std::vector> ctx; { - std::lock_guard lock(_lock); + std::shared_lock lock(_pipeline_map_mutex); ctx.reserve(_pipeline_map.size()); for (auto& pipeline_itr : _pipeline_map) { ctx.push_back(pipeline_itr.second); @@ -910,29 +902,34 @@ void FragmentMgr::cancel_worker() { std::unordered_map, BrpcItem> brpc_stub_with_queries; { - std::lock_guard lock(_lock); - for (auto it = _query_ctx_map.begin(); it != _query_ctx_map.end();) { - if (auto q_ctx = it->second.lock()) { - if (q_ctx->is_timeout(now)) { - LOG_WARNING("Query {} is timeout", print_id(it->first)); - queries_timeout.push_back(it->first); - } else if (config::enable_brpc_connection_check) { - auto brpc_stubs = q_ctx->get_using_brpc_stubs(); - for (auto& item : brpc_stubs) { - if (!brpc_stub_with_queries.contains(item.second)) { - brpc_stub_with_queries.emplace(item.second, - BrpcItem {item.first, {q_ctx}}); - } else { - brpc_stub_with_queries[item.second].queries.emplace_back(q_ctx); + { + // TODO: Now only the cancel worker do the GC the _query_ctx_map. each query must + // do erase the finish query unless in _query_ctx_map. Rethink the logic is ok + std::unique_lock lock(_query_ctx_map_mutex); + for (auto it = _query_ctx_map.begin(); it != _query_ctx_map.end();) { + if (auto q_ctx = it->second.lock()) { + if (q_ctx->is_timeout(now)) { + LOG_WARNING("Query {} is timeout", print_id(it->first)); + queries_timeout.push_back(it->first); + } else if (config::enable_brpc_connection_check) { + auto brpc_stubs = q_ctx->get_using_brpc_stubs(); + for (auto& item : brpc_stubs) { + if (!brpc_stub_with_queries.contains(item.second)) { + brpc_stub_with_queries.emplace(item.second, + BrpcItem {item.first, {q_ctx}}); + } else { + brpc_stub_with_queries[item.second].queries.emplace_back(q_ctx); + } } } + ++it; + } else { + it = _query_ctx_map.erase(it); } - ++it; - } else { - it = _query_ctx_map.erase(it); } } + std::shared_lock lock(_query_ctx_map_mutex); // We use a very conservative cancel strategy. // 0. If there are no running frontends, do not cancel any queries. // 1. If query's process uuid is zero, do not cancel @@ -1215,7 +1212,7 @@ Status FragmentMgr::apply_filterv2(const PPublishFilterRequestV2* request, const auto& fragment_ids = request->fragment_ids(); { - std::unique_lock lock(_lock); + std::shared_lock lock(_pipeline_map_mutex); for (auto fragment_id : fragment_ids) { auto iter = _pipeline_map.find({UniqueId(request->query_id()).to_thrift(), fragment_id}); @@ -1267,8 +1264,7 @@ Status FragmentMgr::send_filter_size(const PSendFilterSizeRequest* request) { TUniqueId query_id; query_id.__set_hi(queryid.hi); query_id.__set_lo(queryid.lo); - std::lock_guard lock(_lock); - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { + if (auto q_ctx = get_query_ctx(query_id)) { query_ctx = q_ctx; } else { return Status::EndOfFile( @@ -1291,8 +1287,7 @@ Status FragmentMgr::sync_filter_size(const PSyncFilterSizeRequest* request) { TUniqueId query_id; query_id.__set_hi(queryid.hi); query_id.__set_lo(queryid.lo); - std::lock_guard lock(_lock); - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { + if (auto q_ctx = get_query_ctx(query_id)) { query_ctx = q_ctx; } else { return Status::EndOfFile( @@ -1312,8 +1307,7 @@ Status FragmentMgr::merge_filter(const PMergeFilterRequest* request, TUniqueId query_id; query_id.__set_hi(queryid.hi); query_id.__set_lo(queryid.lo); - std::lock_guard lock(_lock); - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { + if (auto q_ctx = get_query_ctx(query_id)) { query_ctx = q_ctx; } else { return Status::EndOfFile( @@ -1330,7 +1324,7 @@ Status FragmentMgr::merge_filter(const PMergeFilterRequest* request, void FragmentMgr::get_runtime_query_info(std::vector* query_info_list) { { - std::lock_guard lock(_lock); + std::unique_lock lock(_query_ctx_map_mutex); for (auto iter = _query_ctx_map.begin(); iter != _query_ctx_map.end();) { if (auto q_ctx = iter->second.lock()) { WorkloadQueryInfo workload_query_info; @@ -1353,19 +1347,9 @@ Status FragmentMgr::get_realtime_exec_status(const TUniqueId& query_id, return Status::InvalidArgument("exes_status is nullptr"); } - std::shared_ptr query_context = nullptr; - - { - std::lock_guard lock(_lock); - if (auto q_ctx = _get_or_erase_query_ctx(query_id)) { - query_context = q_ctx; - } else { - return Status::NotFound("Query {} has been released", print_id(query_id)); - } - } - + std::shared_ptr query_context = get_query_ctx(query_id); if (query_context == nullptr) { - return Status::NotFound("Query {} not found", print_id(query_id)); + return Status::NotFound("Query {} not found or released", print_id(query_id)); } *exec_status = query_context->get_realtime_exec_status(); diff --git a/be/src/runtime/fragment_mgr.h b/be/src/runtime/fragment_mgr.h index 0eac0469683961..63d666788d0a5f 100644 --- a/be/src/runtime/fragment_mgr.h +++ b/be/src/runtime/fragment_mgr.h @@ -133,7 +133,7 @@ class FragmentMgr : public RestMonitorIface { ThreadPool* get_thread_pool() { return _thread_pool.get(); } int32_t running_query_num() { - std::unique_lock ctx_lock(_lock); + std::shared_lock lock(_query_ctx_map_mutex); return _query_ctx_map.size(); } @@ -145,7 +145,7 @@ class FragmentMgr : public RestMonitorIface { Status get_realtime_exec_status(const TUniqueId& query_id, TReportExecStatusParams* exec_status); - std::shared_ptr get_or_erase_query_ctx_with_lock(const TUniqueId& query_id); + std::shared_ptr get_query_ctx(const TUniqueId& query_id); private: struct BrpcItem { @@ -153,14 +153,12 @@ class FragmentMgr : public RestMonitorIface { std::vector> queries; }; - std::shared_ptr _get_or_erase_query_ctx(const TUniqueId& query_id); - template void _set_scan_concurrency(const Param& params, QueryContext* query_ctx); - template - Status _get_query_ctx(const Params& params, TUniqueId query_id, bool pipeline, - QuerySource query_type, std::shared_ptr& query_ctx); + Status _get_or_create_query_ctx(const TPipelineFragmentParams& params, TUniqueId query_id, + bool pipeline, QuerySource query_type, + std::shared_ptr& query_ctx); void _check_brpc_available(const std::shared_ptr& brpc_stub, const BrpcItem& brpc_item); @@ -168,20 +166,21 @@ class FragmentMgr : public RestMonitorIface { // This is input params ExecEnv* _exec_env = nullptr; + // The lock protect the `_pipeline_map` + std::shared_mutex _pipeline_map_mutex; + // (QueryID, FragmentID) -> PipelineFragmentContext + phmap::flat_hash_map, + std::shared_ptr> + _pipeline_map; + // The lock should only be used to protect the structures in fragment manager. Has to be // used in a very small scope because it may dead lock. For example, if the _lock is used // in prepare stage, the call path is prepare --> expr prepare --> may call allocator // when allocate failed, allocator may call query_is_cancelled, query is callced will also // call _lock, so that there is dead lock. - std::mutex _lock; - - // (QueryID, FragmentID) -> PipelineFragmentContext - std::unordered_map, - std::shared_ptr> - _pipeline_map; - + std::shared_mutex _query_ctx_map_mutex; // query id -> QueryContext - std::unordered_map> _query_ctx_map; + phmap::flat_hash_map> _query_ctx_map; std::unordered_map> _bf_size_map; CountDownLatch _stop_background_threads_latch; diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp index 0cb313747b0373..dd426f1ab81d3e 100644 --- a/be/src/runtime/load_channel.cpp +++ b/be/src/runtime/load_channel.cpp @@ -45,8 +45,7 @@ LoadChannel::LoadChannel(const UniqueId& load_id, int64_t timeout_s, bool is_hig _backend_id(backend_id), _enable_profile(enable_profile) { std::shared_ptr query_context = - ExecEnv::GetInstance()->fragment_mgr()->get_or_erase_query_ctx_with_lock( - _load_id.to_thrift()); + ExecEnv::GetInstance()->fragment_mgr()->get_query_ctx(_load_id.to_thrift()); std::shared_ptr mem_tracker = nullptr; WorkloadGroupPtr wg_ptr = nullptr; diff --git a/be/src/runtime/load_stream.cpp b/be/src/runtime/load_stream.cpp index 752e2ff95b2917..60da45fa685fbf 100644 --- a/be/src/runtime/load_stream.cpp +++ b/be/src/runtime/load_stream.cpp @@ -428,7 +428,7 @@ LoadStream::LoadStream(PUniqueId load_id, LoadStreamMgr* load_stream_mgr, bool e TUniqueId load_tid = ((UniqueId)load_id).to_thrift(); #ifndef BE_TEST std::shared_ptr query_context = - ExecEnv::GetInstance()->fragment_mgr()->get_or_erase_query_ctx_with_lock(load_tid); + ExecEnv::GetInstance()->fragment_mgr()->get_query_ctx(load_tid); if (query_context != nullptr) { _query_thread_context = {load_tid, query_context->query_mem_tracker, query_context->workload_group()}; diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index e7e1c73e7cbb41..8f077a4eb45bb1 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "util/runtime_profile.h" namespace doris { @@ -123,6 +125,7 @@ class CachePolicy { {"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE}, {"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE}, {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}, + {"QueryCache", CacheType::QUERY_CACHE}, {"TabletColumnObjectPool", CacheType::TABLET_COLUMN_OBJECT_POOL}}; static CacheType string_to_type(std::string type) { @@ -133,6 +136,9 @@ class CachePolicy { } } + inline static std::vector MetadataCache { + CacheType::SEGMENT_CACHE, CacheType::SCHEMA_CACHE, CacheType::TABLET_SCHEMA_CACHE}; + CachePolicy(CacheType type, size_t capacity, uint32_t stale_sweep_time_s, bool enable_prune); virtual ~CachePolicy(); diff --git a/be/src/runtime/memory/lru_cache_policy.h b/be/src/runtime/memory/lru_cache_policy.h index ea34e2837f1313..3fdb43facd7715 100644 --- a/be/src/runtime/memory/lru_cache_policy.h +++ b/be/src/runtime/memory/lru_cache_policy.h @@ -104,20 +104,26 @@ class LRUCachePolicy : public CachePolicy { return _mem_tracker->consumption(); } + int64_t value_mem_consumption() { + DCHECK(_value_mem_tracker != nullptr); + return _value_mem_tracker->consumption(); + } + // Insert will consume tracking_bytes to _mem_tracker and cache value destroy will release tracking_bytes. - // If LRUCacheType::SIZE, tracking_bytes usually equal to charge. - // If LRUCacheType::NUMBER, tracking_bytes usually not equal to charge, at this time charge is an weight. - // If LRUCacheType::SIZE and tracking_bytes equals 0, memory must be tracked in Doris Allocator, + // If LRUCacheType::SIZE, value_tracking_bytes usually equal to charge. + // If LRUCacheType::NUMBER, value_tracking_bytes usually not equal to charge, at this time charge is an weight. + // If LRUCacheType::SIZE and value_tracking_bytes equals 0, memory must be tracked in Doris Allocator, // cache value is allocated using Alloctor. - // If LRUCacheType::NUMBER and tracking_bytes equals 0, usually currently cannot accurately tracking memory size, + // If LRUCacheType::NUMBER and value_tracking_bytes equals 0, usually currently cannot accurately tracking memory size, // only tracking handle_size(106). - Cache::Handle* insert(const CacheKey& key, void* value, size_t charge, size_t tracking_bytes, + Cache::Handle* insert(const CacheKey& key, void* value, size_t charge, + size_t value_tracking_bytes, CachePriority priority = CachePriority::NORMAL) { - size_t tracking_bytes_with_handle = sizeof(LRUHandle) - 1 + key.size() + tracking_bytes; + size_t tracking_bytes = sizeof(LRUHandle) - 1 + key.size() + value_tracking_bytes; if (value != nullptr) { - mem_tracker()->consume(tracking_bytes_with_handle); ((LRUCacheValueBase*)value) - ->set_tracking_bytes(tracking_bytes_with_handle, _mem_tracker); + ->set_tracking_bytes(tracking_bytes, _mem_tracker, value_tracking_bytes, + _value_mem_tracker); } return _cache->insert(key, value, charge, priority); } @@ -265,9 +271,18 @@ class LRUCachePolicy : public CachePolicy { protected: void _init_mem_tracker(const std::string& type_name) { - _mem_tracker = MemTrackerLimiter::create_shared( - MemTrackerLimiter::Type::GLOBAL, - fmt::format("{}[{}]", type_string(_type), type_name)); + if (std::find(CachePolicy::MetadataCache.begin(), CachePolicy::MetadataCache.end(), + _type) == CachePolicy::MetadataCache.end()) { + _mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::CACHE, + fmt::format("{}[{}]", type_string(_type), type_name)); + } else { + _mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::METADATA, + fmt::format("{}[{}]", type_string(_type), type_name)); + } + _value_mem_tracker = std::make_shared( + fmt::format("{}::Value[{}]", type_string(_type), type_name)); } // if check_capacity failed, will return dummy lru cache, @@ -277,6 +292,7 @@ class LRUCachePolicy : public CachePolicy { LRUCacheType _lru_cache_type; std::shared_ptr _mem_tracker; + std::shared_ptr _value_mem_tracker; }; } // namespace doris diff --git a/be/src/runtime/memory/lru_cache_value_base.h b/be/src/runtime/memory/lru_cache_value_base.h index f9e534e6600df8..a9a3ae5ddab632 100644 --- a/be/src/runtime/memory/lru_cache_value_base.h +++ b/be/src/runtime/memory/lru_cache_value_base.h @@ -28,18 +28,27 @@ class LRUCacheValueBase { virtual ~LRUCacheValueBase() { if (_tracking_bytes > 0) { _mem_tracker->release(_tracking_bytes); + _value_mem_tracker->release(_value_tracking_bytes); } } void set_tracking_bytes(size_t tracking_bytes, - const std::shared_ptr& mem_tracker) { + const std::shared_ptr& mem_tracker, + size_t value_tracking_bytes, + const std::shared_ptr& value_mem_tracker) { this->_tracking_bytes = tracking_bytes; this->_mem_tracker = mem_tracker; + this->_value_tracking_bytes = value_tracking_bytes; + this->_value_mem_tracker = value_mem_tracker; + _mem_tracker->consume(_tracking_bytes); + _value_mem_tracker->consume(_value_tracking_bytes); } protected: size_t _tracking_bytes = 0; + size_t _value_tracking_bytes = 0; std::shared_ptr _mem_tracker; + std::shared_ptr _value_mem_tracker; }; } // namespace doris diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index 05ff13f0e7c646..ac4684835a670c 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -66,9 +66,13 @@ MemTrackerLimiter::MemTrackerLimiter(Type type, const std::string& label, int64_ _uid = UniqueId::gen_uid(); if (_type == Type::GLOBAL) { _group_num = 0; + } else if (_type == Type::METADATA) { + _group_num = 1; + } else if (_type == Type::CACHE) { + _group_num = 2; } else { _group_num = - mem_tracker_limiter_group_counter.fetch_add(1) % (MEM_TRACKER_GROUP_NUM - 1) + 1; + mem_tracker_limiter_group_counter.fetch_add(1) % (MEM_TRACKER_GROUP_NUM - 3) + 3; } // currently only select/load need runtime query statistics @@ -208,24 +212,20 @@ std::string MemTrackerLimiter::print_address_sanitizers() { RuntimeProfile* MemTrackerLimiter::make_profile(RuntimeProfile* profile) const { RuntimeProfile* profile_snapshot = profile->create_child( fmt::format("{}@{}@id={}", _label, type_string(_type), _uid.to_string()), true, false); - RuntimeProfile::Counter* current_usage_counter = - ADD_COUNTER(profile_snapshot, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* peak_usage_counter = - ADD_COUNTER(profile_snapshot, "PeakUsage", TUnit::BYTES); - COUNTER_SET(current_usage_counter, consumption()); - COUNTER_SET(peak_usage_counter, peak_consumption()); + RuntimeProfile::HighWaterMarkCounter* usage_counter = + profile_snapshot->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + COUNTER_SET(usage_counter, peak_consumption()); + COUNTER_SET(usage_counter, consumption()); if (has_limit()) { RuntimeProfile::Counter* limit_counter = ADD_COUNTER(profile_snapshot, "Limit", TUnit::BYTES); COUNTER_SET(limit_counter, _limit); } if (reserved_peak_consumption() != 0) { - RuntimeProfile::Counter* reserved_counter = - ADD_COUNTER(profile_snapshot, "ReservedMemory", TUnit::BYTES); - RuntimeProfile::Counter* reserved_peak_counter = - ADD_COUNTER(profile_snapshot, "ReservedPeakMemory", TUnit::BYTES); + RuntimeProfile::HighWaterMarkCounter* reserved_counter = + profile_snapshot->AddHighWaterMarkCounter("ReservedMemory", TUnit::BYTES); + COUNTER_SET(reserved_counter, reserved_peak_consumption()); COUNTER_SET(reserved_counter, reserved_consumption()); - COUNTER_SET(reserved_peak_counter, reserved_peak_consumption()); } return profile_snapshot; } @@ -268,8 +268,26 @@ void MemTrackerLimiter::make_type_trackers_profile(RuntimeProfile* profile, tracker->make_profile(profile); } } + } else if (type == Type::METADATA) { + std::lock_guard l( + ExecEnv::GetInstance()->mem_tracker_limiter_pool[1].group_lock); + for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[1].trackers) { + auto tracker = trackerWptr.lock(); + if (tracker != nullptr) { + tracker->make_profile(profile); + } + } + } else if (type == Type::CACHE) { + std::lock_guard l( + ExecEnv::GetInstance()->mem_tracker_limiter_pool[2].group_lock); + for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[2].trackers) { + auto tracker = trackerWptr.lock(); + if (tracker != nullptr) { + tracker->make_profile(profile); + } + } } else { - for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { + for (unsigned i = 3; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { @@ -296,8 +314,8 @@ void MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(RuntimeProfil std::unique_ptr tmp_profile_snapshot = std::make_unique("tmpSnapshot"); std::priority_queue> max_pq; - // start from 2, not include global type. - for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { + // start from 3, not include global/metadata/cache type. + for (unsigned i = 3; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { @@ -326,13 +344,19 @@ void MemTrackerLimiter::make_all_tasks_tracker_profile(RuntimeProfile* profile) types_profile[Type::SCHEMA_CHANGE] = profile->create_child("SchemaChangeTasks", true, false); types_profile[Type::OTHER] = profile->create_child("OtherTasks", true, false); - // start from 2, not include global type. - for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { + // start from 3, not include global/metadata/cache type. + for (unsigned i = 3; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { auto tracker = trackerWptr.lock(); if (tracker != nullptr) { + // BufferControlBlock will continue to exist for 5 minutes after the query ends, even if the + // result buffer is empty, and will not be shown in the profile. of course, this code is tricky. + if (tracker->consumption() == 0 && + tracker->label().starts_with("BufferControlBlock")) { + continue; + } tracker->make_profile(types_profile[tracker->type()]); } } diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 445856b1f6af83..43b20a410ff27c 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -77,12 +77,14 @@ class MemTrackerLimiter final { enum class GCType { PROCESS = 0, WORK_LOAD_GROUP = 1 }; enum class Type { - GLOBAL = 0, // Life cycle is the same as the process, e.g. Cache and default Orphan + GLOBAL = 0, // Life cycle is the same as the process, except cache and metadata. QUERY = 1, // Count the memory consumption of all Query tasks. LOAD = 2, // Count the memory consumption of all Load tasks. COMPACTION = 3, // Count the memory consumption of all Base and Cumulative tasks. SCHEMA_CHANGE = 4, // Count the memory consumption of all SchemaChange tasks. - OTHER = 5, + METADATA = 5, // Count the memory consumption of all Metadata. + CACHE = 6, // Count the memory consumption of all Cache. + OTHER = 7, // Count the memory consumption of all other tasks, such as Clone, Snapshot, etc.. }; static std::string type_string(Type type) { @@ -97,8 +99,12 @@ class MemTrackerLimiter final { return "compaction"; case Type::SCHEMA_CHANGE: return "schema_change"; + case Type::METADATA: + return "metadata"; + case Type::CACHE: + return "cache"; case Type::OTHER: - return "other"; + return "other_task"; default: LOG(FATAL) << "not match type of mem tracker limiter :" << static_cast(type); } @@ -158,6 +164,8 @@ class MemTrackerLimiter final { int64_t consumption() const { return _mem_counter.current_value(); } int64_t peak_consumption() const { return _mem_counter.peak_value(); } + // Use carefully! only memory that cannot be allocated using Doris Allocator needs to be consumed manually. + // Ideally, all memory should use Doris Allocator. void consume(int64_t bytes) { _mem_counter.add(bytes); if (_query_statistics) { diff --git a/be/src/runtime/memory/memory_profile.cpp b/be/src/runtime/memory/memory_profile.cpp index 8dbdcbdd3af769..5d649c526014af 100644 --- a/be/src/runtime/memory/memory_profile.cpp +++ b/be/src/runtime/memory/memory_profile.cpp @@ -18,6 +18,9 @@ #include "runtime/memory/memory_profile.h" #include "bvar/reducer.h" +#include "olap/metadata_adder.h" +#include "olap/schema_cache.h" +#include "olap/tablet_schema_cache.h" #include "runtime/exec_env.h" #include "runtime/memory/global_memory_arbitrator.h" #include "runtime/memory/mem_tracker_limiter.h" @@ -28,6 +31,9 @@ namespace doris { static bvar::Adder memory_all_tracked_sum_bytes("memory_all_tracked_sum_bytes"); static bvar::Adder memory_global_trackers_sum_bytes("memory_global_trackers_sum_bytes"); +static bvar::Adder memory_metadata_trackers_sum_bytes( + "memory_metadata_trackers_sum_bytes"); +static bvar::Adder memory_cache_trackers_sum_bytes("memory_cache_trackers_sum_bytes"); static bvar::Adder memory_query_trackers_sum_bytes("memory_query_trackers_sum_bytes"); static bvar::Adder memory_load_trackers_sum_bytes("memory_load_trackers_sum_bytes"); static bvar::Adder memory_compaction_trackers_sum_bytes( @@ -40,140 +46,122 @@ static bvar::Adder memory_all_tasks_memory_bytes("memory_all_tasks_memo static bvar::Adder memory_untracked_memory_bytes("memory_untracked_memory_bytes"); MemoryProfile::MemoryProfile() { - _memory_overview_profile.set(std::make_unique("MemoryOverviewSnapshot")); +#ifdef ADDRESS_SANITIZER + _memory_overview_profile = std::make_unique("[ASAN]MemoryOverviewSnapshot"); +#else + _memory_overview_profile = std::make_unique("MemoryOverviewSnapshot"); +#endif _global_memory_profile.set(std::make_unique("GlobalMemorySnapshot")); + _metadata_memory_profile.set(std::make_unique("MetadataMemorySnapshot")); + _cache_memory_profile.set(std::make_unique("CacheMemorySnapshot")); _top_memory_tasks_profile.set(std::make_unique("TopMemoryTasksSnapshot")); _tasks_memory_profile.set(std::make_unique("TasksMemorySnapshot")); + init_memory_overview_counter(); } -void MemoryProfile::refresh_memory_overview_profile() { -#ifdef ADDRESS_SANITIZER - std::unique_ptr memory_overview_profile = - std::make_unique("[ASAN]MemoryOverviewSnapshot"); -#else - std::unique_ptr memory_overview_profile = - std::make_unique("MemoryOverviewSnapshot"); -#endif - std::unique_ptr global_memory_profile = - std::make_unique("GlobalMemorySnapshot"); - std::unique_ptr top_memory_tasks_profile = - std::make_unique("TopMemoryTasksSnapshot"); - - // 1. create profile +void MemoryProfile::init_memory_overview_counter() { RuntimeProfile* untracked_memory_profile = - memory_overview_profile->create_child("UntrackedMemory", true, false); + _memory_overview_profile->create_child("UntrackedMemory", true, false); RuntimeProfile* tracked_memory_profile = - memory_overview_profile->create_child("TrackedMemory", true, false); + _memory_overview_profile->create_child("TrackedMemory", true, false); RuntimeProfile* tasks_memory_overview_profile = tracked_memory_profile->create_child("TasksMemory", true, false); RuntimeProfile* tasks_memory_overview_details_profile = tasks_memory_overview_profile->create_child("Details", true, false); RuntimeProfile* global_memory_overview_profile = tracked_memory_profile->create_child("GlobalMemory", true, false); + RuntimeProfile* metadata_memory_overview_profile = + tracked_memory_profile->create_child("MetadataMemory", true, false); + RuntimeProfile* cache_memory_overview_profile = + tracked_memory_profile->create_child("CacheMemory", true, false); RuntimeProfile* jemalloc_memory_profile = tracked_memory_profile->create_child("JemallocMemory", true, false); RuntimeProfile* jemalloc_memory_details_profile = jemalloc_memory_profile->create_child("Details", true, false); - // 2. add counter - // 2.1 add process memory counter - RuntimeProfile::Counter* process_physical_memory_current_usage_counter = - ADD_COUNTER(memory_overview_profile, "PhysicalMemory(VmRSS)", TUnit::BYTES); - RuntimeProfile::Counter* process_physical_memory_peak_usage_counter = - memory_overview_profile->AddHighWaterMarkCounter("PhysicalMemoryPeak", TUnit::BYTES); - RuntimeProfile::Counter* process_virtual_memory_current_usage_counter = - ADD_COUNTER(memory_overview_profile, "VirtualMemory(VmSize)", TUnit::BYTES); - RuntimeProfile::Counter* process_virtual_memory_peak_usage_counter = - memory_overview_profile->AddHighWaterMarkCounter("VirtualMemoryPeak", TUnit::BYTES); - - // 2.2 add untracked memory counter - RuntimeProfile::Counter* untracked_memory_current_usage_counter = - ADD_COUNTER(untracked_memory_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* untracked_memory_peak_usage_counter = - untracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - - // 2.3 add tracked memory counter - RuntimeProfile::Counter* tracked_memory_current_usage_counter = - ADD_COUNTER(tracked_memory_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* tracked_memory_peak_usage_counter = - tracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - - // 2.4 add jemalloc memory counter - RuntimeProfile::Counter* jemalloc_memory_current_usage_counter = - ADD_COUNTER(jemalloc_memory_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_memory_peak_usage_counter = - jemalloc_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_cache_current_usage_counter = - ADD_COUNTER(jemalloc_memory_details_profile, "Cache", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_cache_peak_usage_counter = - jemalloc_memory_details_profile->AddHighWaterMarkCounter("CachePeak", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_metadata_current_usage_counter = - ADD_COUNTER(jemalloc_memory_details_profile, "Metadata", TUnit::BYTES); - RuntimeProfile::Counter* jemalloc_metadata_peak_usage_counter = - jemalloc_memory_details_profile->AddHighWaterMarkCounter("MetadataPeak", TUnit::BYTES); - - // 2.5 add global memory counter - RuntimeProfile::Counter* global_current_usage_counter = - ADD_COUNTER(global_memory_overview_profile, "CurrentUsage", TUnit::BYTES); - RuntimeProfile::Counter* global_peak_usage_counter = - global_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - - // 2.6 add tasks memory counter - RuntimeProfile::Counter* tasks_memory_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_profile, "CurrentUsage", TUnit::BYTES, 1); + // 1 add process memory counter + _process_physical_memory_usage_counter = _memory_overview_profile->AddHighWaterMarkCounter( + "PhysicalMemory(VmRSS)", TUnit::BYTES); + _process_virtual_memory_usage_counter = _memory_overview_profile->AddHighWaterMarkCounter( + "VirtualMemory(VmSize)", TUnit::BYTES); + + // 2 add untracked/tracked memory counter + _untracked_memory_usage_counter = + untracked_memory_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _tracked_memory_usage_counter = + tracked_memory_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + + // 3 add Jemalloc memory counter + _jemalloc_memory_usage_counter = + jemalloc_memory_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _jemalloc_cache_usage_counter = + jemalloc_memory_details_profile->AddHighWaterMarkCounter("Cache", TUnit::BYTES); + _jemalloc_metadata_usage_counter = + jemalloc_memory_details_profile->AddHighWaterMarkCounter("Metadata", TUnit::BYTES); + + // 4 add global/metadata/cache memory counter + _global_usage_counter = + global_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _metadata_usage_counter = + metadata_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + _cache_usage_counter = + cache_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); + + // 5 add tasks memory counter + _tasks_memory_usage_counter = + tasks_memory_overview_profile->AddHighWaterMarkCounter("Memory", TUnit::BYTES); // Reserved memory is the sum of all task reserved memory, is duplicated with all task memory counter. - RuntimeProfile::Counter* reserved_memory_current_usage_counter = ADD_CHILD_COUNTER_WITH_LEVEL( - tasks_memory_overview_profile, "ReservedMemory", TUnit::BYTES, "CurrentUsage", 1); - RuntimeProfile::Counter* reserved_memory_peak_usage_counter = - tasks_memory_overview_profile->AddHighWaterMarkCounter("ReservedMemoryPeak", - TUnit::BYTES, "CurrentUsage", 1); - RuntimeProfile::Counter* tasks_memory_peak_usage_counter = - tasks_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); - RuntimeProfile::Counter* query_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Query", TUnit::BYTES, 1); - RuntimeProfile::Counter* query_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "QueryPeak", TUnit::BYTES, "Query", 1); - RuntimeProfile::Counter* load_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Load", TUnit::BYTES, 1); - RuntimeProfile::Counter* load_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter("LoadPeak", TUnit::BYTES, - "Load", 1); - RuntimeProfile::Counter* load_all_memtables_current_usage_counter = - ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, - "AllMemTablesMemory", TUnit::BYTES, "Load", 1); - RuntimeProfile::Counter* load_all_memtables_peak_usage_counter = - ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, - "AllMemTablesMemoryPeak", TUnit::BYTES, "Load", 1); - RuntimeProfile::Counter* compaction_current_usage_counter = ADD_COUNTER_WITH_LEVEL( - tasks_memory_overview_details_profile, "Compaction", TUnit::BYTES, 1); - RuntimeProfile::Counter* compaction_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "CompactionPeak", TUnit::BYTES, "Compaction", 1); - RuntimeProfile::Counter* schema_change_current_usage_counter = ADD_COUNTER_WITH_LEVEL( - tasks_memory_overview_details_profile, "SchemaChange", TUnit::BYTES, 1); - RuntimeProfile::Counter* schema_change_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "SchemaChangePeak", TUnit::BYTES, "SchemaChange", 1); - RuntimeProfile::Counter* other_current_usage_counter = - ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Other", TUnit::BYTES, 1); - RuntimeProfile::Counter* other_peak_usage_counter = - tasks_memory_overview_details_profile->AddHighWaterMarkCounter( - "OtherPeak", TUnit::BYTES, "Other", 1); - // 3. refresh counter - // 3.1 refresh process memory counter - COUNTER_SET(process_physical_memory_current_usage_counter, + _reserved_memory_usage_counter = tasks_memory_overview_profile->AddHighWaterMarkCounter( + "ReservedMemory", TUnit::BYTES, "Memory", 1); + _query_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Query", TUnit::BYTES); + _load_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Load", TUnit::BYTES); + _load_all_memtables_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("AllMemTablesMemory", + TUnit::BYTES, "Load", 1); + _compaction_usage_counter = tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "Compaction", TUnit::BYTES); + _schema_change_usage_counter = tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "SchemaChange", TUnit::BYTES); + _other_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("Other", TUnit::BYTES); +} + +void MemoryProfile::refresh_memory_overview_profile() { + // 1 create profile + std::unique_ptr global_memory_profile = + std::make_unique("GlobalMemorySnapshot"); + std::unique_ptr metadata_memory_profile = + std::make_unique("MetadataMemorySnapshot"); + std::unique_ptr cache_memory_profile = + std::make_unique("CacheMemorySnapshot"); + std::unique_ptr top_memory_tasks_profile = + std::make_unique("TopMemoryTasksSnapshot"); + + // 2 refresh process memory counter + COUNTER_SET(_process_physical_memory_usage_counter, PerfCounters::get_vm_rss()); // from /proc VmRSS VmHWM - COUNTER_SET(process_physical_memory_peak_usage_counter, PerfCounters::get_vm_hwm()); - COUNTER_SET(process_virtual_memory_current_usage_counter, + COUNTER_SET(_process_virtual_memory_usage_counter, PerfCounters::get_vm_size()); // from /proc VmSize VmPeak - COUNTER_SET(process_virtual_memory_peak_usage_counter, PerfCounters::get_vm_peak()); - // 3.2 refresh tracked memory counter + // 2 refresh metadata memory tracker + ExecEnv::GetInstance()->tablets_no_cache_mem_tracker()->set_consumption( + MetadataAdder::get_all_tablets_size() - + TabletSchemaCache::instance()->value_mem_consumption() - + SchemaCache::instance()->value_mem_consumption()); + ExecEnv::GetInstance()->rowsets_no_cache_mem_tracker()->set_consumption( + MetadataAdder::get_all_rowsets_size()); + ExecEnv::GetInstance()->segments_no_cache_mem_tracker()->set_consumption( + MetadataAdder::get_all_segments_estimate_size() - + SegmentLoader::instance()->cache_mem_usage()); + + // 4 refresh tracked memory counter std::unordered_map type_mem_sum = { {MemTrackerLimiter::Type::GLOBAL, 0}, {MemTrackerLimiter::Type::QUERY, 0}, {MemTrackerLimiter::Type::LOAD, 0}, {MemTrackerLimiter::Type::COMPACTION, 0}, - {MemTrackerLimiter::Type::SCHEMA_CHANGE, 0}, {MemTrackerLimiter::Type::OTHER, 0}}; + {MemTrackerLimiter::Type::SCHEMA_CHANGE, 0}, {MemTrackerLimiter::Type::METADATA, 0}, + {MemTrackerLimiter::Type::CACHE, 0}, {MemTrackerLimiter::Type::OTHER, 0}}; // always ExecEnv::ready(), because Daemon::_stop_background_threads_latch for (auto& group : ExecEnv::GetInstance()->mem_tracker_limiter_pool) { std::lock_guard l(group.group_lock); @@ -191,42 +179,46 @@ void MemoryProfile::refresh_memory_overview_profile() { all_tracked_mem_sum += it.second; switch (it.first) { case MemTrackerLimiter::Type::GLOBAL: - COUNTER_SET(global_current_usage_counter, it.second); - COUNTER_SET(global_peak_usage_counter, it.second); + COUNTER_SET(_global_usage_counter, it.second); memory_global_trackers_sum_bytes << it.second - memory_global_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::QUERY: - COUNTER_SET(query_current_usage_counter, it.second); - COUNTER_SET(query_peak_usage_counter, it.second); + COUNTER_SET(_query_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_query_trackers_sum_bytes << it.second - memory_query_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::LOAD: - COUNTER_SET(load_current_usage_counter, it.second); - COUNTER_SET(load_peak_usage_counter, it.second); + COUNTER_SET(_load_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_load_trackers_sum_bytes << it.second - memory_load_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::COMPACTION: - COUNTER_SET(compaction_current_usage_counter, it.second); - COUNTER_SET(compaction_peak_usage_counter, it.second); + COUNTER_SET(_compaction_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_compaction_trackers_sum_bytes << it.second - memory_compaction_trackers_sum_bytes.get_value(); break; case MemTrackerLimiter::Type::SCHEMA_CHANGE: - COUNTER_SET(schema_change_current_usage_counter, it.second); - COUNTER_SET(schema_change_peak_usage_counter, it.second); + COUNTER_SET(_schema_change_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_schema_change_trackers_sum_bytes << it.second - memory_schema_change_trackers_sum_bytes.get_value(); break; + case MemTrackerLimiter::Type::METADATA: + COUNTER_SET(_metadata_usage_counter, it.second); + memory_metadata_trackers_sum_bytes + << it.second - memory_metadata_trackers_sum_bytes.get_value(); + break; + case MemTrackerLimiter::Type::CACHE: + COUNTER_SET(_cache_usage_counter, it.second); + memory_cache_trackers_sum_bytes + << it.second - memory_cache_trackers_sum_bytes.get_value(); + break; case MemTrackerLimiter::Type::OTHER: - COUNTER_SET(other_current_usage_counter, it.second); - COUNTER_SET(other_peak_usage_counter, it.second); + COUNTER_SET(_other_usage_counter, it.second); tasks_trackers_mem_sum += it.second; memory_other_trackers_sum_bytes << it.second - memory_other_trackers_sum_bytes.get_value(); @@ -235,60 +227,52 @@ void MemoryProfile::refresh_memory_overview_profile() { MemTrackerLimiter::make_type_trackers_profile(global_memory_profile.get(), MemTrackerLimiter::Type::GLOBAL); + MemTrackerLimiter::make_type_trackers_profile(metadata_memory_profile.get(), + MemTrackerLimiter::Type::METADATA); + MemTrackerLimiter::make_type_trackers_profile(cache_memory_profile.get(), + MemTrackerLimiter::Type::CACHE); MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(top_memory_tasks_profile.get(), 15); - COUNTER_SET(tasks_memory_current_usage_counter, tasks_trackers_mem_sum); - COUNTER_SET(tasks_memory_peak_usage_counter, tasks_trackers_mem_sum); + COUNTER_SET(_tasks_memory_usage_counter, tasks_trackers_mem_sum); memory_all_tasks_memory_bytes << tasks_trackers_mem_sum - memory_all_tasks_memory_bytes.get_value(); - COUNTER_SET(reserved_memory_current_usage_counter, - GlobalMemoryArbitrator::process_reserved_memory()); - COUNTER_SET(reserved_memory_peak_usage_counter, - GlobalMemoryArbitrator::process_reserved_memory()); + COUNTER_SET(_reserved_memory_usage_counter, GlobalMemoryArbitrator::process_reserved_memory()); memory_reserved_memory_bytes << GlobalMemoryArbitrator::process_reserved_memory() - memory_reserved_memory_bytes.get_value(); all_tracked_mem_sum += MemInfo::allocator_cache_mem(); - COUNTER_SET(jemalloc_cache_current_usage_counter, - static_cast(MemInfo::allocator_cache_mem())); - COUNTER_SET(jemalloc_cache_peak_usage_counter, + COUNTER_SET(_jemalloc_cache_usage_counter, static_cast(MemInfo::allocator_cache_mem())); all_tracked_mem_sum += MemInfo::allocator_metadata_mem(); - COUNTER_SET(jemalloc_metadata_current_usage_counter, - static_cast(MemInfo::allocator_metadata_mem())); - COUNTER_SET(jemalloc_metadata_peak_usage_counter, + COUNTER_SET(_jemalloc_metadata_usage_counter, static_cast(MemInfo::allocator_metadata_mem())); - COUNTER_SET(jemalloc_memory_current_usage_counter, - jemalloc_cache_current_usage_counter->value() + - jemalloc_metadata_current_usage_counter->value()); - COUNTER_SET(jemalloc_memory_peak_usage_counter, - jemalloc_cache_current_usage_counter->value() + - jemalloc_metadata_current_usage_counter->value()); - - COUNTER_SET(tracked_memory_current_usage_counter, all_tracked_mem_sum); - COUNTER_SET(tracked_memory_peak_usage_counter, all_tracked_mem_sum); + COUNTER_SET(_jemalloc_memory_usage_counter, + _jemalloc_cache_usage_counter->current_value() + + _jemalloc_metadata_usage_counter->current_value()); + + COUNTER_SET(_tracked_memory_usage_counter, all_tracked_mem_sum); memory_all_tracked_sum_bytes << all_tracked_mem_sum - memory_all_tracked_sum_bytes.get_value(); - // 3.3 refresh untracked memory counter + // 5 refresh untracked memory counter int64_t untracked_memory = - process_physical_memory_current_usage_counter->value() - all_tracked_mem_sum; - COUNTER_SET(untracked_memory_current_usage_counter, untracked_memory); - COUNTER_SET(untracked_memory_peak_usage_counter, untracked_memory); + _process_physical_memory_usage_counter->current_value() - all_tracked_mem_sum; + COUNTER_SET(_untracked_memory_usage_counter, untracked_memory); memory_untracked_memory_bytes << untracked_memory - memory_untracked_memory_bytes.get_value(); - // 3.4 refresh additional tracker printed when memory exceeds limit. - COUNTER_SET(load_all_memtables_current_usage_counter, - ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->consumption()); + // 6 refresh additional tracker printed when memory exceeds limit. COUNTER_SET( - load_all_memtables_peak_usage_counter, + _load_all_memtables_usage_counter, ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->peak_consumption()); + COUNTER_SET(_load_all_memtables_usage_counter, + ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->consumption()); - // 4. reset profile - _memory_overview_profile.set(std::move(memory_overview_profile)); + // 7. reset profile _global_memory_profile.set(std::move(global_memory_profile)); + _metadata_memory_profile.set(std::move(metadata_memory_profile)); + _cache_memory_profile.set(std::move(cache_memory_profile)); _top_memory_tasks_profile.set(std::move(top_memory_tasks_profile)); } @@ -302,16 +286,25 @@ void MemoryProfile::refresh_tasks_memory_profile() { void MemoryProfile::make_memory_profile(RuntimeProfile* profile) const { RuntimeProfile* memory_profile_snapshot = profile->create_child("MemoryProfile", true, false); - auto memory_overview_version_ptr = _memory_overview_profile.get(); RuntimeProfile* memory_overview_profile = - memory_profile_snapshot->create_child(memory_overview_version_ptr->name(), true, false); - memory_overview_profile->merge(const_cast(memory_overview_version_ptr.get())); + memory_profile_snapshot->create_child(_memory_overview_profile->name(), true, false); + memory_overview_profile->merge(const_cast(_memory_overview_profile.get())); auto global_memory_version_ptr = _global_memory_profile.get(); RuntimeProfile* global_memory_profile = memory_profile_snapshot->create_child(global_memory_version_ptr->name(), true, false); global_memory_profile->merge(const_cast(global_memory_version_ptr.get())); + auto metadata_memory_version_ptr = _metadata_memory_profile.get(); + RuntimeProfile* metadata_memory_profile = + memory_profile_snapshot->create_child(metadata_memory_version_ptr->name(), true, false); + metadata_memory_profile->merge(const_cast(metadata_memory_version_ptr.get())); + + auto cache_memory_version_ptr = _cache_memory_profile.get(); + RuntimeProfile* cache_memory_profile = + memory_profile_snapshot->create_child(cache_memory_version_ptr->name(), true, false); + cache_memory_profile->merge(const_cast(cache_memory_version_ptr.get())); + auto top_memory_tasks_version_ptr = _top_memory_tasks_profile.get(); RuntimeProfile* top_memory_tasks_profile = memory_profile_snapshot->create_child( top_memory_tasks_version_ptr->name(), true, false); @@ -346,6 +339,8 @@ void MemoryProfile::print_log_process_usage() { LOG(WARNING) << "Process Memory Summary: " + GlobalMemoryArbitrator::process_mem_log_str(); LOG(WARNING) << "\n" << print_memory_overview_profile(); LOG(WARNING) << "\n" << print_global_memory_profile(); + LOG(WARNING) << "\n" << print_metadata_memory_profile(); + LOG(WARNING) << "\n" << print_cache_memory_profile(); LOG(WARNING) << "\n" << print_top_memory_tasks_profile(); } } diff --git a/be/src/runtime/memory/memory_profile.h b/be/src/runtime/memory/memory_profile.h index 9f1bab0c02a802..c6aefb72f22e1a 100644 --- a/be/src/runtime/memory/memory_profile.h +++ b/be/src/runtime/memory/memory_profile.h @@ -33,31 +33,27 @@ class MemoryProfile { void make_memory_profile(RuntimeProfile* profile) const; std::string print_memory_overview_profile() const { - std::stringstream ss; - auto version_ptr = _memory_overview_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_memory_overview_profile.get()); } std::string print_global_memory_profile() const { - std::stringstream ss; - auto version_ptr = _global_memory_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_global_memory_profile.get().get()); + } + + std::string print_metadata_memory_profile() const { + return return_memory_profile_str(_metadata_memory_profile.get().get()); + } + + std::string print_cache_memory_profile() const { + return return_memory_profile_str(_cache_memory_profile.get().get()); } std::string print_top_memory_tasks_profile() const { - std::stringstream ss; - auto version_ptr = _top_memory_tasks_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_top_memory_tasks_profile.get().get()); } std::string print_tasks_memory_profile() const { - std::stringstream ss; - auto version_ptr = _tasks_memory_profile.get(); - version_ptr->pretty_print(&ss); - return ss.str(); + return return_memory_profile_str(_tasks_memory_profile.get().get()); } static int64_t query_current_usage(); @@ -71,11 +67,50 @@ class MemoryProfile { void print_log_process_usage(); private: - MultiVersion _memory_overview_profile; + std::string return_memory_profile_str(const RuntimeProfile* profile) const { + std::stringstream ss; + profile->pretty_print(&ss); + return ss.str(); + } + + void init_memory_overview_counter(); + + std::unique_ptr _memory_overview_profile; MultiVersion _global_memory_profile; + MultiVersion _metadata_memory_profile; + MultiVersion _cache_memory_profile; MultiVersion _top_memory_tasks_profile; MultiVersion _tasks_memory_profile; + // process memory counter + RuntimeProfile::HighWaterMarkCounter* _process_physical_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _process_virtual_memory_usage_counter; + + // untracked/tracked memory counter + RuntimeProfile::HighWaterMarkCounter* _untracked_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _tracked_memory_usage_counter; + + // Jemalloc memory counter + RuntimeProfile::HighWaterMarkCounter* _jemalloc_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _jemalloc_cache_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _jemalloc_metadata_usage_counter; + + // global/metadata/cache memory counter + RuntimeProfile::HighWaterMarkCounter* _global_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _metadata_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _cache_usage_counter; + + // tasks memory counter + RuntimeProfile::HighWaterMarkCounter* _tasks_memory_usage_counter; + // reserved memory is the sum of all task reserved memory, is duplicated with all task memory counter. + RuntimeProfile::HighWaterMarkCounter* _reserved_memory_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _query_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _load_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _load_all_memtables_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _compaction_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _schema_change_usage_counter; + RuntimeProfile::HighWaterMarkCounter* _other_usage_counter; + std::atomic _enable_print_log_process_usage {true}; }; diff --git a/be/src/runtime/routine_load/routine_load_task_executor.h b/be/src/runtime/routine_load/routine_load_task_executor.h index 0e597d796c9f77..b1196f7824afac 100644 --- a/be/src/runtime/routine_load/routine_load_task_executor.h +++ b/be/src/runtime/routine_load/routine_load_task_executor.h @@ -73,6 +73,8 @@ class RoutineLoadTaskExecutor { std::vector* partition_offsets, int timeout); + ThreadPool& get_thread_pool() { return *_thread_pool; } + private: // execute the task void exec_task(std::shared_ptr ctx, DataConsumerPool* pool, diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index bb100fcbb42ec5..c16db7c67d3420 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -90,7 +90,7 @@ std::vector> RuntimeFilterMgr::get_consume_filte Status RuntimeFilterMgr::register_consumer_filter(const TRuntimeFilterDesc& desc, const TQueryOptions& options, int node_id, std::shared_ptr* consumer_filter, - bool build_bf_exactly, bool need_local_merge) { + bool need_local_merge) { SCOPED_CONSUME_MEM_TRACKER(_tracker.get()); int32_t key = desc.filter_id; bool has_exist = false; @@ -110,7 +110,7 @@ Status RuntimeFilterMgr::register_consumer_filter(const TRuntimeFilterDesc& desc if (!has_exist) { std::shared_ptr filter; RETURN_IF_ERROR(IRuntimeFilter::create(_state, &desc, &options, RuntimeFilterRole::CONSUMER, - node_id, &filter, build_bf_exactly)); + node_id, &filter)); _consumer_map[key].emplace_back(node_id, filter); *consumer_filter = filter; } else if (!need_local_merge) { @@ -122,7 +122,7 @@ Status RuntimeFilterMgr::register_consumer_filter(const TRuntimeFilterDesc& desc Status RuntimeFilterMgr::register_local_merge_producer_filter( const doris::TRuntimeFilterDesc& desc, const doris::TQueryOptions& options, - std::shared_ptr producer_filter, bool build_bf_exactly) { + std::shared_ptr producer_filter) { DCHECK(_is_global); SCOPED_CONSUME_MEM_TRACKER(_tracker.get()); int32_t key = desc.filter_id; @@ -143,8 +143,7 @@ Status RuntimeFilterMgr::register_local_merge_producer_filter( if (iter->second.filters.empty()) { std::shared_ptr merge_filter; RETURN_IF_ERROR(IRuntimeFilter::create(_state, &desc, &options, - RuntimeFilterRole::PRODUCER, -1, &merge_filter, - build_bf_exactly)); + RuntimeFilterRole::PRODUCER, -1, &merge_filter)); merge_filter->set_ignored(); iter->second.filters.emplace_back(merge_filter); } @@ -181,10 +180,9 @@ doris::LocalMergeFilters* RuntimeFilterMgr::get_local_merge_producer_filters(int return &iter->second; } -Status RuntimeFilterMgr::register_producer_filter(const TRuntimeFilterDesc& desc, - const TQueryOptions& options, - std::shared_ptr* producer_filter, - bool build_bf_exactly) { +Status RuntimeFilterMgr::register_producer_filter( + const TRuntimeFilterDesc& desc, const TQueryOptions& options, + std::shared_ptr* producer_filter) { DCHECK(!_is_global); SCOPED_CONSUME_MEM_TRACKER(_tracker.get()); int32_t key = desc.filter_id; @@ -196,7 +194,7 @@ Status RuntimeFilterMgr::register_producer_filter(const TRuntimeFilterDesc& desc return Status::InvalidArgument("filter has registed"); } RETURN_IF_ERROR(IRuntimeFilter::create(_state, &desc, &options, RuntimeFilterRole::PRODUCER, -1, - producer_filter, build_bf_exactly)); + producer_filter)); _producer_map.emplace(key, *producer_filter); return Status::OK(); } @@ -233,8 +231,8 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( cnt_val->filter = cnt_val->pool->add(new IRuntimeFilter(_state, runtime_filter_desc)); auto filter_id = runtime_filter_desc->filter_id; - RETURN_IF_ERROR(cnt_val->filter->init_with_desc(&cnt_val->runtime_filter_desc, query_options, - -1, false)); + RETURN_IF_ERROR( + cnt_val->filter->init_with_desc(&cnt_val->runtime_filter_desc, query_options, -1)); cnt_val->filter->set_ignored(); _filter_map.emplace(filter_id, cnt_val); return Status::OK(); diff --git a/be/src/runtime/runtime_filter_mgr.h b/be/src/runtime/runtime_filter_mgr.h index 0a6f8318feaba0..9f4cf5f4e22a07 100644 --- a/be/src/runtime/runtime_filter_mgr.h +++ b/be/src/runtime/runtime_filter_mgr.h @@ -100,19 +100,17 @@ class RuntimeFilterMgr { // register filter Status register_consumer_filter(const TRuntimeFilterDesc& desc, const TQueryOptions& options, int node_id, std::shared_ptr* consumer_filter, - bool build_bf_exactly = false, bool need_local_merge = false); + bool need_local_merge = false); Status register_local_merge_producer_filter(const TRuntimeFilterDesc& desc, const TQueryOptions& options, - std::shared_ptr producer_filter, - bool build_bf_exactly = false); + std::shared_ptr producer_filter); Status get_local_merge_producer_filters(int filter_id, LocalMergeFilters** local_merge_filters); LocalMergeFilters* get_local_merge_producer_filters(int filter_id); Status register_producer_filter(const TRuntimeFilterDesc& desc, const TQueryOptions& options, - std::shared_ptr* producer_filter, - bool build_bf_exactly = false); + std::shared_ptr* producer_filter); // update filter by remote void set_runtime_filter_params(const TRuntimeFilterParams& runtime_filter_params); diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 344180bad771ac..f3376d06858ec0 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -123,37 +123,6 @@ RuntimeState::RuntimeState(const TUniqueId& instance_id, const TUniqueId& query_ DCHECK(_query_mem_tracker != nullptr && _query_mem_tracker->label() != "Orphan"); } -RuntimeState::RuntimeState(pipeline::PipelineFragmentContext*, const TUniqueId& instance_id, - const TUniqueId& query_id, int32_t fragment_id, - const TQueryOptions& query_options, const TQueryGlobals& query_globals, - ExecEnv* exec_env, QueryContext* ctx) - : _profile("Fragment " + print_id(instance_id)), - _load_channel_profile(""), - _obj_pool(new ObjectPool()), - _unreported_error_idx(0), - _query_id(query_id), - _fragment_id(fragment_id), - _per_fragment_instance_idx(0), - _num_rows_load_total(0), - _num_rows_load_filtered(0), - _num_rows_load_unselected(0), - _num_rows_filtered_in_strict_mode_partial_update(0), - _num_print_error_rows(0), - _num_bytes_load_total(0), - _num_finished_scan_range(0), - _error_row_number(0), - _query_ctx(ctx) { - [[maybe_unused]] auto status = init(instance_id, query_options, query_globals, exec_env); - _query_mem_tracker = ctx->query_mem_tracker; -#ifdef BE_TEST - if (_query_mem_tracker == nullptr) { - init_mem_trackers(); - } -#endif - DCHECK(_query_mem_tracker != nullptr && _query_mem_tracker->label() != "Orphan"); - DCHECK(status.ok()); -} - RuntimeState::RuntimeState(const TUniqueId& query_id, int32_t fragment_id, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx) @@ -295,7 +264,7 @@ Status RuntimeState::init(const TUniqueId& fragment_instance_id, const TQueryOpt } std::weak_ptr RuntimeState::get_query_ctx_weak() { - return _exec_env->fragment_mgr()->get_or_erase_query_ctx_with_lock(_query_ctx->query_id()); + return _exec_env->fragment_mgr()->get_query_ctx(_query_ctx->query_id()); } void RuntimeState::init_mem_trackers(const std::string& name, const TUniqueId& id) { @@ -516,14 +485,13 @@ RuntimeFilterMgr* RuntimeState::global_runtime_filter_mgr() { } Status RuntimeState::register_producer_runtime_filter( - const TRuntimeFilterDesc& desc, std::shared_ptr* producer_filter, - bool build_bf_exactly) { + const TRuntimeFilterDesc& desc, std::shared_ptr* producer_filter) { // Producers are created by local runtime filter mgr and shared by global runtime filter manager. // When RF is published, consumers in both global and local RF mgr will be found. - RETURN_IF_ERROR(local_runtime_filter_mgr()->register_producer_filter( - desc, query_options(), producer_filter, build_bf_exactly)); + RETURN_IF_ERROR(local_runtime_filter_mgr()->register_producer_filter(desc, query_options(), + producer_filter)); RETURN_IF_ERROR(global_runtime_filter_mgr()->register_local_merge_producer_filter( - desc, query_options(), *producer_filter, build_bf_exactly)); + desc, query_options(), *producer_filter)); return Status::OK(); } @@ -532,10 +500,10 @@ Status RuntimeState::register_consumer_runtime_filter( std::shared_ptr* consumer_filter) { if (desc.has_remote_targets || need_local_merge) { return global_runtime_filter_mgr()->register_consumer_filter(desc, query_options(), node_id, - consumer_filter, false, true); + consumer_filter, true); } else { return local_runtime_filter_mgr()->register_consumer_filter(desc, query_options(), node_id, - consumer_filter, false, false); + consumer_filter, false); } } diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 0bc81bca4d99a1..a49567109a3b31 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -85,12 +85,7 @@ class RuntimeState { const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx); - // for only use in pipelineX - RuntimeState(pipeline::PipelineFragmentContext*, const TUniqueId& instance_id, - const TUniqueId& query_id, int32 fragment_id, const TQueryOptions& query_options, - const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx); - - // Used by pipelineX. This runtime state is only used for setup. + // Used by pipeline. This runtime state is only used for setup. RuntimeState(const TUniqueId& query_id, int32 fragment_id, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx); @@ -561,8 +556,7 @@ class RuntimeState { } Status register_producer_runtime_filter(const doris::TRuntimeFilterDesc& desc, - std::shared_ptr* producer_filter, - bool build_bf_exactly); + std::shared_ptr* producer_filter); Status register_consumer_runtime_filter(const doris::TRuntimeFilterDesc& desc, bool need_local_merge, int node_id, diff --git a/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp b/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp index e935aff996d55e..c24fcb73384494 100644 --- a/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp +++ b/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp @@ -56,7 +56,7 @@ arrow::Status ArrowFlightBatchReaderBase::_return_invalid_status(const std::stri } ArrowFlightBatchReaderBase::~ArrowFlightBatchReaderBase() { - VLOG_NOTICE << fmt::format( + LOG(INFO) << fmt::format( "ArrowFlightBatchReader finished, packet_seq={}, result_addr={}:{}, finistId={}, " "convert_arrow_batch_timer={}, deserialize_block_timer={}, peak_memory_usage={}", _packet_seq, _statement->result_addr.hostname, _statement->result_addr.port, diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 57600d1f56aae9..912f9f5ff403e7 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -80,6 +80,7 @@ #include "util/doris_metrics.h" namespace doris { +#include "common/compile_check_begin.h" namespace { std::shared_ptr get_rate_limit_group(event_base* event_base) { auto rate_limit = config::download_binlog_rate_limit_kbs; @@ -473,4 +474,5 @@ int HttpService::get_real_port() const { return _ev_http_server->get_real_port(); } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index adcd07e7de7484..439f3f17faf00f 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -1240,7 +1240,10 @@ void PInternalService::report_stream_load_status(google::protobuf::RpcController void PInternalService::get_info(google::protobuf::RpcController* controller, const PProxyRequest* request, PProxyResult* response, google::protobuf::Closure* done) { - bool ret = _heavy_work_pool.try_offer([this, request, response, done]() { + bool ret = _exec_env->routine_load_task_executor()->get_thread_pool().submit_func([this, + request, + response, + done]() { brpc::ClosureGuard closure_guard(done); // PProxyRequest is defined in gensrc/proto/internal_service.proto // Currently it supports 2 kinds of requests: diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index d444daa8c68d11..fbf10b75ae02c0 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -38,14 +38,6 @@ namespace doris { // Utility class to compute hash values. class HashUtil { public: - template - static uint32_t fixed_len_to_uint32(T value) { - if constexpr (sizeof(T) <= sizeof(uint32_t)) { - return (uint32_t)value; - } - return std::hash()(value); - } - static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t hash) { return crc32(hash, (const unsigned char*)data, bytes); } @@ -305,7 +297,7 @@ class HashUtil { #endif } - static uint64_t hash64(const void* data, uint32_t bytes, uint64_t seed) { + static uint64_t hash64(const void* data, uint64_t bytes, uint64_t seed) { #ifdef _SSE4_2_ if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) { return crc_hash64(data, bytes, seed); diff --git a/be/src/util/runtime_profile.cpp b/be/src/util/runtime_profile.cpp index 45db607a342743..1df4d8b55c278e 100644 --- a/be/src/util/runtime_profile.cpp +++ b/be/src/util/runtime_profile.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include "common/object_pool.h" #include "util/container_util.hpp" @@ -72,8 +73,7 @@ void RuntimeProfile::merge(RuntimeProfile* other) { dst_iter = _counter_map.find(src_iter->first); if (dst_iter == _counter_map.end()) { - _counter_map[src_iter->first] = _pool->add( - new Counter(src_iter->second->type(), src_iter->second->value())); + _counter_map[src_iter->first] = _pool->add(src_iter->second->clone()); } else { DCHECK(dst_iter->second->type() == src_iter->second->type()); @@ -574,8 +574,6 @@ void RuntimeProfile::to_thrift(TRuntimeProfileTree* tree) { } void RuntimeProfile::to_thrift(std::vector* nodes) { - nodes->reserve(nodes->size() + _children.size()); - int index = nodes->size(); nodes->push_back(TRuntimeProfileNode()); TRuntimeProfileNode& node = (*nodes)[index]; @@ -602,10 +600,13 @@ void RuntimeProfile::to_thrift(std::vector* nodes) { ChildVector children; { + // _children may be modified during to_thrift(), + // so we have to lock and copy _children to avoid race condition std::lock_guard l(_children_lock); children = _children; } node.num_children = children.size(); + nodes->reserve(nodes->size() + children.size()); for (int i = 0; i < children.size(); ++i) { int child_idx = nodes->size(); diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h index 6e393ac673a628..7130acbd2f9427 100644 --- a/be/src/util/runtime_profile.h +++ b/be/src/util/runtime_profile.h @@ -100,6 +100,8 @@ class RuntimeProfile { : _value(value), _type(type), _level(level) {} virtual ~Counter() = default; + virtual Counter* clone() const { return new Counter(type(), value(), _level); } + virtual void update(int64_t delta) { _value.fetch_add(delta, std::memory_order_relaxed); } void bit_or(int64_t delta) { _value.fetch_or(delta, std::memory_order_relaxed); } @@ -137,7 +139,7 @@ class RuntimeProfile { TUnit::type type() const { return _type; } - virtual int64_t level() { return _level; } + virtual int64_t level() const { return _level; } private: friend class RuntimeProfile; @@ -151,8 +153,16 @@ class RuntimeProfile { /// as value()) and the current value. class HighWaterMarkCounter : public Counter { public: - HighWaterMarkCounter(TUnit::type unit, int64_t level, const std::string& parent_name) - : Counter(unit, 0, level), current_value_(0), _parent_name(parent_name) {} + HighWaterMarkCounter(TUnit::type unit, int64_t level, const std::string& parent_name, + int64_t value = 0, int64_t current_value = 0) + : Counter(unit, value, level), + current_value_(current_value), + _parent_name(parent_name) {} + + virtual Counter* clone() const override { + return new HighWaterMarkCounter(type(), level(), parent_name(), value(), + current_value()); + } void add(int64_t delta) { current_value_.fetch_add(delta, std::memory_order_relaxed); @@ -188,10 +198,9 @@ class RuntimeProfile { virtual void pretty_print(std::ostream* s, const std::string& prefix, const std::string& name) const override { std::ostream& stream = *s; - stream << prefix << " - " << name << ": " - << PrettyPrinter::print(current_value(), type()) << std::endl; - stream << prefix << " - " << name << "Peak: " - << PrettyPrinter::print(_value.load(std::memory_order_relaxed), type()) + stream << prefix << " - " << name + << " Current: " << PrettyPrinter::print(current_value(), type()) << " (Peak: " + << PrettyPrinter::print(_value.load(std::memory_order_relaxed), type()) << ")" << std::endl; } @@ -217,6 +226,8 @@ class RuntimeProfile { int64_t current_value() const { return current_value_.load(std::memory_order_relaxed); } + std::string parent_name() const { return _parent_name; } + private: /// Set '_value' to 'v' if 'v' is larger than '_value'. The entire operation is /// atomic. @@ -247,8 +258,13 @@ class RuntimeProfile { // Do not call Set() and Update(). class DerivedCounter : public Counter { public: - DerivedCounter(TUnit::type type, const DerivedCounterFunction& counter_fn) - : Counter(type, 0), _counter_fn(counter_fn) {} + DerivedCounter(TUnit::type type, const DerivedCounterFunction& counter_fn, + int64_t value = 0, int64_t level = 1) + : Counter(type, value, level), _counter_fn(counter_fn) {} + + virtual Counter* clone() const override { + return new DerivedCounter(type(), _counter_fn, value(), level()); + } int64_t value() const override { return _counter_fn(); } @@ -259,8 +275,13 @@ class RuntimeProfile { // NonZeroCounter will not be converted to Thrift if the value is 0. class NonZeroCounter : public Counter { public: - NonZeroCounter(TUnit::type type, int64_t level, const std::string& parent_name) - : Counter(type, 0, level), _parent_name(parent_name) {} + NonZeroCounter(TUnit::type type, int64_t level, const std::string& parent_name, + int64_t value = 0) + : Counter(type, value, level), _parent_name(parent_name) {} + + virtual Counter* clone() const override { + return new NonZeroCounter(type(), level(), parent_name(), value()); + } void to_thrift(const std::string& name, std::vector& tcounters, std::map>& child_counters_map) override { @@ -272,6 +293,8 @@ class RuntimeProfile { } } + std::string parent_name() const { return _parent_name; } + private: const std::string _parent_name; }; diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h index 32fc9d5efce771..e0ec2bef62fc2a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function.h +++ b/be/src/vec/aggregate_functions/aggregate_function.h @@ -36,6 +36,7 @@ #include "vec/data_types/data_type_string.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" class Arena; class IColumn; @@ -598,3 +599,5 @@ class AggregateFunctionGuard { }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp index 18662bf66cf38c..8bf6c32c0872de 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.cpp @@ -29,6 +29,7 @@ #include "vec/functions/function.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" AggregateFunctionPtr create_aggregate_function_approx_count_distinct( const std::string& name, const DataTypes& argument_types, const bool result_is_nullable, diff --git a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h index d267499e059818..3ef22be9fca74c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h @@ -38,6 +38,7 @@ #include "vec/io/io_helper.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; class BufferReadable; @@ -64,8 +65,7 @@ struct AggregateFunctionApproxCountDistinctData { void write(BufferWritable& buf) const { std::string result; result.resize(hll_data.max_serialized_size()); - int size = hll_data.serialize((uint8_t*)result.data()); - result.resize(size); + result.resize(hll_data.serialize((uint8_t*)result.data())); write_binary(result, buf); } @@ -136,3 +136,5 @@ class AggregateFunctionApproxCountDistinct final }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.cpp b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp index 6a6711f90f983e..6109f0b0c601cd 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp @@ -25,6 +25,7 @@ #include "vec/core/field.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" template struct Avg { diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h b/be/src/vec/aggregate_functions/aggregate_function_avg.h index 62fbb8078ea949..8b24db692aef05 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h @@ -41,6 +41,7 @@ #include "vec/io/io_helper.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; class BufferReadable; @@ -72,7 +73,8 @@ struct AggregateFunctionAvgData { ResultT result() const { if constexpr (std::is_floating_point_v) { if constexpr (std::numeric_limits::is_iec559) { - return static_cast(sum) / count; /// allow division by zero + return static_cast(sum) / + static_cast(count); /// allow division by zero } } @@ -91,7 +93,7 @@ struct AggregateFunctionAvgData { if constexpr (IsDecimal256) { return static_cast(sum / T(count)); } else { - return static_cast(sum) / count; + return static_cast(sum) / static_cast(count); } } } @@ -124,7 +126,11 @@ class AggregateFunctionAvg final IsDecimalV2, ColumnDecimal, std::conditional_t, ColumnDecimal, ColumnFloat64>>; + // The result calculated by PercentileApprox is an approximate value, + // so the underlying storage uses float. The following calls will involve + // an implicit cast to float. + using DataType = typename Data::ResultType; /// ctor for native types AggregateFunctionAvg(const DataTypes& argument_types_) : IAggregateFunctionDataHelper>(argument_types_), @@ -148,9 +154,9 @@ class AggregateFunctionAvg final const auto& column = assert_cast(*columns[0]); if constexpr (IsDecimalNumber) { - this->data(place).sum += column.get_data()[row_num].value; + this->data(place).sum += (DataType)column.get_data()[row_num].value; } else { - this->data(place).sum += column.get_data()[row_num]; + this->data(place).sum += (DataType)column.get_data()[row_num]; } ++this->data(place).count; } @@ -282,3 +288,5 @@ class AggregateFunctionAvg final }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.cpp b/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.cpp index fc5df5303fd15d..70a707b02e992b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.cpp @@ -21,6 +21,7 @@ #include "vec/aggregate_functions/helpers.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" void register_aggregate_function_avg_weighted(AggregateFunctionSimpleFactory& factory) { factory.register_function_both("avg_weighted", creator_with_type::creator); diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h b/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h index b59a3dccf0cea8..d1a5921b45039f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h +++ b/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h @@ -35,6 +35,7 @@ #include "vec/io/io_helper.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; class BufferReadable; @@ -57,7 +58,7 @@ struct AggregateFunctionAvgWeightedData { DecimalV2Value value = binary_cast(data_val); data_sum = data_sum + (double(value) * weight_val); } else { - data_sum = data_sum + (data_val * weight_val); + data_sum = data_sum + (double(data_val) * weight_val); } weight_sum = weight_sum + weight_val; } @@ -138,3 +139,5 @@ class AggregateFunctionAvgWeight final }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_binary.h b/be/src/vec/aggregate_functions/aggregate_function_binary.h index 9fba9d11a1013a..fd5fc55d253661 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_binary.h +++ b/be/src/vec/aggregate_functions/aggregate_function_binary.h @@ -36,6 +36,7 @@ #include "vec/io/io_helper.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" template typename Moments> struct StatFunc { @@ -127,3 +128,5 @@ AggregateFunctionPtr create_with_two_basic_numeric_types(const DataTypePtr& firs } } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_bit.cpp b/be/src/vec/aggregate_functions/aggregate_function_bit.cpp index 97a6c0e92fa723..981ced1fbd5a46 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bit.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_bit.cpp @@ -24,6 +24,7 @@ #include "vec/aggregate_functions/helpers.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" void register_aggregate_function_bit(AggregateFunctionSimpleFactory& factory) { factory.register_function_both( diff --git a/be/src/vec/aggregate_functions/aggregate_function_bit.h b/be/src/vec/aggregate_functions/aggregate_function_bit.h index 1ab01b03ceea38..d9760fdd30080b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bit.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bit.h @@ -30,6 +30,7 @@ #include "vec/io/io_helper.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; class BufferReadable; @@ -142,4 +143,5 @@ class AggregateFunctionBitwise final } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp index e9c86d4b9556da..47ddf2d81b6a71 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp @@ -23,6 +23,7 @@ #include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" template class AggregateFunctionTemplate> AggregateFunctionPtr create_with_int_data_type(const DataTypes& argument_type) { @@ -33,7 +34,11 @@ AggregateFunctionPtr create_with_int_data_type(const DataTypes& argument_type) { return std::make_shared>>( \ argument_type); \ } - FOR_INTEGER_TYPES(DISPATCH) + // Keep consistent with the FE definition; the function does not have an int128 type. + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) #undef DISPATCH LOG(WARNING) << "with unknowed type, failed in create_with_int_data_type bitmap_union_int" << " and type is: " << argument_type[0]->get_name(); diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index b0619a63e1ffe8..fb17b0a80be092 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -38,6 +38,7 @@ #include "vec/data_types/data_type_number.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; class BufferReadable; @@ -432,4 +433,5 @@ AggregateFunctionPtr create_aggregate_function_bitmap_union(const std::string& n const DataTypes& argument_types, const bool result_is_nullable); -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp index 0b95ddfd46f0d5..2a2c86303f3000 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.cpp @@ -23,6 +23,7 @@ #include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" template AggregateFunctionPtr create_with_int_data_type(const DataTypes& argument_types) { @@ -32,7 +33,11 @@ AggregateFunctionPtr create_with_int_data_type(const DataTypes& argument_types) if (which.idx == TypeIndex::TYPE) { \ return std::make_shared>(argument_types); \ } - FOR_INTEGER_TYPES(DISPATCH) + // Keep consistent with the FE definition; the function does not have an int128 type. + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) #undef DISPATCH LOG(WARNING) << "with unknown type, failed in create_with_int_data_type bitmap_union_int" << " and type is: " << argument_types[0]->get_name(); diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h index 5747faf1b8e8c1..bff32aa606ccd2 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h @@ -31,6 +31,7 @@ #include "vec/data_types/data_type_bitmap.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; class BufferReadable; @@ -226,4 +227,5 @@ class AggregateFunctionBitmapAgg final } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp index d726b7c6355318..15806c739ed58c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp @@ -26,6 +26,7 @@ #include "vec/aggregate_functions/helpers.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" template AggregateFunctionPtr do_create_agg_function_collect(bool distinct, const DataTypes& argument_types, @@ -72,12 +73,18 @@ AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& n if (which.is_date_or_datetime()) { return do_create_agg_function_collect(distinct, argument_types, result_is_nullable); - } else if (which.is_date_v2() || which.is_ipv4()) { + } else if (which.is_date_v2()) { return do_create_agg_function_collect(distinct, argument_types, result_is_nullable); - } else if (which.is_date_time_v2() || which.is_ipv6()) { + } else if (which.is_date_time_v2()) { return do_create_agg_function_collect(distinct, argument_types, result_is_nullable); + } else if (which.is_ipv6()) { + return do_create_agg_function_collect(distinct, argument_types, + result_is_nullable); + } else if (which.is_ipv4()) { + return do_create_agg_function_collect(distinct, argument_types, + result_is_nullable); } else if (which.is_string()) { return do_create_agg_function_collect( distinct, argument_types, result_is_nullable); diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h b/be/src/vec/aggregate_functions/aggregate_function_collect.h index da310c6e0cc4c2..2d18a56313f3f9 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h @@ -46,6 +46,7 @@ #include "vec/io/var_int.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; } // namespace vectorized @@ -836,3 +837,5 @@ class AggregateFunctionCollect }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_combinator.h b/be/src/vec/aggregate_functions/aggregate_function_combinator.h index 1593d74ed4e59d..0908ac8d0278f1 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_combinator.h +++ b/be/src/vec/aggregate_functions/aggregate_function_combinator.h @@ -26,6 +26,7 @@ #include "vec/data_types/data_type.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" /** Aggregate function combinator allows to take one aggregate function * and transform it to another aggregate function. @@ -69,3 +70,5 @@ class IAggregateFunctionCombinator { }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_corr.cpp b/be/src/vec/aggregate_functions/aggregate_function_corr.cpp index cdaab6e086f4a5..e0a51ca6629a06 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_corr.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_corr.cpp @@ -21,6 +21,7 @@ #include "vec/core/types.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" template struct CorrMoment { diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.cpp b/be/src/vec/aggregate_functions/aggregate_function_count.cpp index 5cfe5af41982f6..72d12cf65fe9d0 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_count.cpp @@ -26,6 +26,7 @@ #include "vec/aggregate_functions/factory_helpers.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" AggregateFunctionPtr create_aggregate_function_count(const std::string& name, const DataTypes& argument_types, diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h b/be/src/vec/aggregate_functions/aggregate_function_count.h index 7b54d074683b04..630994a7967957 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count.h +++ b/be/src/vec/aggregate_functions/aggregate_function_count.h @@ -41,6 +41,7 @@ #include "vec/io/var_int.h" namespace doris { +#include "common/compile_check_begin.h" namespace vectorized { class Arena; class BufferReadable; @@ -321,3 +322,5 @@ class AggregateFunctionCountNotNullUnary final }; } // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp index 093b31d57db554..20235d9e2ef2e9 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp @@ -26,6 +26,7 @@ #include "vec/core/types.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" AggregateFunctionPtr create_aggregate_function_count_by_enum(const std::string& name, const DataTypes& argument_types, diff --git a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h index 1f5093de68263e..543ae55f872da6 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h +++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h @@ -32,6 +32,7 @@ #include "vec/io/io_helper.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" struct CountByEnumData { std::unordered_map cbe; @@ -46,8 +47,7 @@ void build_json_from_vec(rapidjson::StringBuffer& buffer, doc.SetArray(); rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); - int vec_size_number = data_vec.size(); - for (int idx = 0; idx < vec_size_number; ++idx) { + for (size_t idx = 0; idx < data_vec.size(); ++idx) { rapidjson::Value obj(rapidjson::kObjectType); rapidjson::Value obj_cbe(rapidjson::kObjectType); @@ -239,4 +239,5 @@ class AggregateFunctionCountByEnum final size_t arg_count; }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_covar.cpp b/be/src/vec/aggregate_functions/aggregate_function_covar.cpp index 4c5fe1321952d6..d9c091fb601868 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_covar.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_covar.cpp @@ -28,6 +28,7 @@ #include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" template