From a0e7ca5bcce8649a51531ca1e39a1f9aa936231a Mon Sep 17 00:00:00 2001 From: abmdocrt Date: Fri, 22 Nov 2024 20:58:49 +0800 Subject: [PATCH] [Fix](checker) Fixed infinite loop after internal error in checker. (#44479) When the checker encounters an internal error, such as a transaction conflict, the return value will be less than 0 and the function will return immediately, but the related instance will not be removed from the map. Additionally, if the return value of the do check is not 0, the inverted check will not be performed. This PR fixes both of these issues. --- cloud/src/recycler/checker.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp index fe0887867b0052..19a10d61c12d35 100644 --- a/cloud/src/recycler/checker.cpp +++ b/cloud/src/recycler/checker.cpp @@ -168,25 +168,17 @@ int Checker::start() { auto ctime_ms = duration_cast(system_clock::now().time_since_epoch()).count(); g_bvar_checker_enqueue_cost_s.put(instance_id, ctime_ms / 1000 - enqueue_time_s); - ret = checker->do_check(); + int ret1 = checker->do_check(); + int ret2 = 0; if (config::enable_inverted_check) { - if (ret == 0) { - ret = checker->do_inverted_check(); - } - } - - if (ret < 0) { - // If ret < 0, it means that a temporary error occurred during the check process. - // The check job should not be considered finished, and the next round of check job - // should be retried as soon as possible. - return; + ret2 = checker->do_inverted_check(); } // If instance checker has been aborted, don't finish this job if (!checker->stopped()) { finish_instance_recycle_job(txn_kv_.get(), check_job_key, instance.instance_id(), - ip_port_, ret == 0, ctime_ms); + ip_port_, ret1 == 0 && ret2 == 0, ctime_ms); } { std::lock_guard lock(mtx_);