Skip to content

Commit

Permalink
[fix](cloud-mow) Fix the issue of missing and removing some old versi…
Browse files Browse the repository at this point in the history
…on delete bitmap
  • Loading branch information
hust-hhb committed Nov 22, 2024
1 parent ce55752 commit 1a5a788
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 76 deletions.
5 changes: 1 addition & 4 deletions be/src/cloud/cloud_cumulative_compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,9 @@ Status CloudCumulativeCompaction::process_old_version_delete_bitmap() {
rowset->rowset_id().to_string();
DeleteBitmap::BitmapKey start {rowset->rowset_id(), seg_id, 0};
DeleteBitmap::BitmapKey end {rowset->rowset_id(), seg_id, pre_max_version};
DeleteBitmap::BitmapKey before_end {rowset->rowset_id(), seg_id,
pre_max_version - 1};
auto d = _tablet->tablet_meta()->delete_bitmap().get_agg(
{rowset->rowset_id(), seg_id, pre_max_version});
to_remove_vec.emplace_back(
std::make_tuple(_tablet->tablet_id(), start, before_end));
to_remove_vec.emplace_back(std::make_tuple(_tablet->tablet_id(), start, end));
if (d->isEmpty()) {
continue;
}
Expand Down
61 changes: 56 additions & 5 deletions be/src/cloud/cloud_delete_bitmap_action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <thread>
#include <utility>

#include "cloud/cloud_meta_mgr.h"
#include "cloud/cloud_tablet.h"
#include "cloud/cloud_tablet_mgr.h"
#include "common/logging.h"
Expand Down Expand Up @@ -78,8 +79,8 @@ static Status _check_param(HttpRequest* req, uint64_t* tablet_id) {
return Status::OK();
}

Status CloudDeleteBitmapAction::_handle_show_delete_bitmap_count(HttpRequest* req,
std::string* json_result) {
Status CloudDeleteBitmapAction::_handle_show_local_delete_bitmap_count(HttpRequest* req,
std::string* json_result) {
uint64_t tablet_id = 0;
// check & retrieve tablet_id from req if it contains
RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), "check param failed");
Expand All @@ -95,7 +96,49 @@ Status CloudDeleteBitmapAction::_handle_show_delete_bitmap_count(HttpRequest* re
auto count = tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
auto cardinality = tablet->tablet_meta()->delete_bitmap().cardinality();
auto size = tablet->tablet_meta()->delete_bitmap().get_size();
LOG(INFO) << "show_delete_bitmap_count,tablet_id=" << tablet_id << ",count=" << count
LOG(INFO) << "show_local_delete_bitmap_count,tablet_id=" << tablet_id << ",count=" << count
<< ",cardinality=" << cardinality << ",size=" << size;

rapidjson::Document root;
root.SetObject();
root.AddMember("delete_bitmap_count", count, root.GetAllocator());
root.AddMember("cardinality", cardinality, root.GetAllocator());
root.AddMember("size", size, root.GetAllocator());

// to json string
rapidjson::StringBuffer strbuf;
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(strbuf);
root.Accept(writer);
*json_result = std::string(strbuf.GetString());

return Status::OK();
}

Status CloudDeleteBitmapAction::_handle_show_ms_delete_bitmap_count(HttpRequest* req,
std::string* json_result) {
uint64_t tablet_id = 0;
// check & retrieve tablet_id from req if it contains
RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), "check param failed");
if (tablet_id == 0) {
return Status::InternalError("check param failed: missing tablet_id");
}
TabletMetaSharedPtr tablet_meta;
auto st = _engine.meta_mgr().get_tablet_meta(tablet_id, &tablet_meta);
if (!st.ok()) {
LOG(WARNING) << "failed to get_tablet_meta tablet=" << tablet_id
<< ", st=" << st.to_string();
return st;
}
auto tablet = std::make_shared<CloudTablet>(_engine, std::move(tablet_meta));
st = _engine.meta_mgr().sync_tablet_rowsets(tablet.get(), false, true, true);
if (!st.ok()) {
LOG(WARNING) << "failed to sync tablet=" << tablet_id << ", st=" << st;
return st;
}
auto count = tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
auto cardinality = tablet->tablet_meta()->delete_bitmap().cardinality();
auto size = tablet->tablet_meta()->delete_bitmap().get_size();
LOG(INFO) << "show_ms_delete_bitmap_count,tablet_id=" << tablet_id << ",count=" << count
<< ",cardinality=" << cardinality << ",size=" << size;

rapidjson::Document root;
Expand All @@ -115,9 +158,17 @@ Status CloudDeleteBitmapAction::_handle_show_delete_bitmap_count(HttpRequest* re

void CloudDeleteBitmapAction::handle(HttpRequest* req) {
req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.data());
if (_delete_bitmap_action_type == DeleteBitmapActionType::COUNT_INFO) {
if (_delete_bitmap_action_type == DeleteBitmapActionType::COUNT_LOCAL) {
std::string json_result;
Status st = _handle_show_local_delete_bitmap_count(req, &json_result);
if (!st.ok()) {
HttpChannel::send_reply(req, HttpStatus::OK, st.to_json());
} else {
HttpChannel::send_reply(req, HttpStatus::OK, json_result);
}
} else if (_delete_bitmap_action_type == DeleteBitmapActionType::COUNT_MS) {
std::string json_result;
Status st = _handle_show_delete_bitmap_count(req, &json_result);
Status st = _handle_show_ms_delete_bitmap_count(req, &json_result);
if (!st.ok()) {
HttpChannel::send_reply(req, HttpStatus::OK, st.to_json());
} else {
Expand Down
5 changes: 3 additions & 2 deletions be/src/cloud/cloud_delete_bitmap_action.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class HttpRequest;

class ExecEnv;

enum class DeleteBitmapActionType { COUNT_INFO = 1 };
enum class DeleteBitmapActionType { COUNT_LOCAL = 1, COUNT_MS = 2 };

/// This action is used for viewing the delete bitmap status
class CloudDeleteBitmapAction : public HttpHandlerWithAuth {
Expand All @@ -45,7 +45,8 @@ class CloudDeleteBitmapAction : public HttpHandlerWithAuth {
void handle(HttpRequest* req) override;

private:
Status _handle_show_delete_bitmap_count(HttpRequest* req, std::string* json_result);
Status _handle_show_local_delete_bitmap_count(HttpRequest* req, std::string* json_result);
Status _handle_show_ms_delete_bitmap_count(HttpRequest* req, std::string* json_result);

private:
CloudStorageEngine& _engine;
Expand Down
15 changes: 10 additions & 5 deletions be/src/cloud/cloud_meta_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ Status CloudMetaMgr::get_tablet_meta(int64_t tablet_id, TabletMetaSharedPtr* tab
}

Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data,
bool sync_delete_bitmap) {
bool sync_delete_bitmap, bool full_sync) {
using namespace std::chrono;

TEST_SYNC_POINT_RETURN_WITH_VALUE("CloudMetaMgr::sync_tablet_rowsets", Status::OK(), tablet);
Expand All @@ -411,7 +411,11 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_
idx->set_partition_id(tablet->partition_id());
{
std::shared_lock rlock(tablet->get_header_lock());
req.set_start_version(tablet->max_version_unlocked() + 1);
if (full_sync) {
req.set_start_version(0);
} else {
req.set_start_version(tablet->max_version_unlocked() + 1);
}
req.set_base_compaction_cnt(tablet->base_compaction_cnt());
req.set_cumulative_compaction_cnt(tablet->cumulative_compaction_cnt());
req.set_cumulative_point(tablet->cumulative_layer_point());
Expand Down Expand Up @@ -471,7 +475,7 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_
DeleteBitmap delete_bitmap(tablet_id);
int64_t old_max_version = req.start_version() - 1;
auto st = sync_tablet_delete_bitmap(tablet, old_max_version, resp.rowset_meta(),
resp.stats(), req.idx(), &delete_bitmap);
resp.stats(), req.idx(), &delete_bitmap, full_sync);
if (st.is<ErrorCode::ROWSETS_EXPIRED>() && tried++ < retry_times) {
LOG_WARNING("rowset meta is expired, need to retry")
.tag("tablet", tablet->tablet_id())
Expand Down Expand Up @@ -617,12 +621,13 @@ bool CloudMetaMgr::sync_tablet_delete_bitmap_by_cache(CloudTablet* tablet, int64
Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_max_version,
std::ranges::range auto&& rs_metas,
const TabletStatsPB& stats, const TabletIndexPB& idx,
DeleteBitmap* delete_bitmap) {
DeleteBitmap* delete_bitmap, bool full_sync) {
if (rs_metas.empty()) {
return Status::OK();
}

if (sync_tablet_delete_bitmap_by_cache(tablet, old_max_version, rs_metas, delete_bitmap)) {
if (!full_sync &&
sync_tablet_delete_bitmap_by_cache(tablet, old_max_version, rs_metas, delete_bitmap)) {
return Status::OK();
} else {
LOG(WARNING) << "failed to sync delete bitmap by txn info. tablet_id="
Expand Down
5 changes: 3 additions & 2 deletions be/src/cloud/cloud_meta_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class CloudMetaMgr {
Status get_tablet_meta(int64_t tablet_id, std::shared_ptr<TabletMeta>* tablet_meta);

Status sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data = false,
bool sync_delete_bitmap = true);
bool sync_delete_bitmap = true, bool full_sync = false);

Status prepare_rowset(const RowsetMeta& rs_meta,
std::shared_ptr<RowsetMeta>* existed_rs_meta = nullptr);
Expand Down Expand Up @@ -116,7 +116,8 @@ class CloudMetaMgr {

Status sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_max_version,
std::ranges::range auto&& rs_metas, const TabletStatsPB& stats,
const TabletIndexPB& idx, DeleteBitmap* delete_bitmap);
const TabletIndexPB& idx, DeleteBitmap* delete_bitmap,
bool full_sync = false);
void check_table_size_correctness(const RowsetMeta& rs_meta);
int64_t get_segment_file_size(const RowsetMeta& rs_meta);
int64_t get_inverted_index_file_szie(const RowsetMeta& rs_meta);
Expand Down
6 changes: 5 additions & 1 deletion be/src/olap/tablet_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1205,9 +1205,13 @@ void DeleteBitmap::remove_stale_delete_bitmap_from_queue(const std::vector<std::
}
auto start_bmk = std::get<1>(delete_bitmap_tuple);
auto end_bmk = std::get<2>(delete_bitmap_tuple);
// the key range of to be removed is [start_bmk,end_bmk),
// due to the different definitions of the right boundary,
// so use end_bmk as right boundary when removing local delete bitmap,
// use (end_bmk - 1) as right boundary when removing ms delete bitmap
remove(start_bmk, end_bmk);
to_delete.emplace_back(std::make_tuple(std::get<0>(start_bmk).to_string(), 0,
std::get<2>(end_bmk)));
std::get<2>(end_bmk) - 1));
}
_stale_delete_bitmap.erase(version_str);
}
Expand Down
13 changes: 9 additions & 4 deletions be/src/service/http_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,16 @@ void HttpService::register_cloud_handler(CloudStorageEngine& engine) {
TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
_ev_http_server->register_handler(HttpMethod::GET, "/api/compaction/run_status",
run_status_compaction_action);
CloudDeleteBitmapAction* count_delete_bitmap_action =
_pool.add(new CloudDeleteBitmapAction(DeleteBitmapActionType::COUNT_INFO, _env, engine,
CloudDeleteBitmapAction* count_local_delete_bitmap_action =
_pool.add(new CloudDeleteBitmapAction(DeleteBitmapActionType::COUNT_LOCAL, _env, engine,
TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
_ev_http_server->register_handler(HttpMethod::GET, "/api/delete_bitmap/count",
count_delete_bitmap_action);
_ev_http_server->register_handler(HttpMethod::GET, "/api/delete_bitmap/count_local",
count_local_delete_bitmap_action);
CloudDeleteBitmapAction* count_ms_delete_bitmap_action =
_pool.add(new CloudDeleteBitmapAction(DeleteBitmapActionType::COUNT_MS, _env, engine,
TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
_ev_http_server->register_handler(HttpMethod::GET, "/api/delete_bitmap/count_ms",
count_ms_delete_bitmap_action);
#ifdef ENABLE_INJECTION_POINT
InjectionPointAction* injection_point_action = _pool.add(new InjectionPointAction);
_ev_http_server->register_handler(HttpMethod::GET, "/api/injection_point/{op}",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,78 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
0 0 0
1 8 8
0 0 8
1 1 1
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

-- !sql --
0 0 0
1 8 8
0 0 8
1 1 1
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

-- !sql --
0 0 0
0 0 13
1 13 13
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

-- !sql --
0 0 0
0 0 13
1 13 13
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

-- !sql --
0 0 0
0 0 18
1 23 23
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

-- !sql --
0 0 0
0 0 18
1 23 23
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

-- !sql --
0 0 0
0 5 5
1 28 28
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

Loading

0 comments on commit 1a5a788

Please sign in to comment.