From ba55f247c2d2ce3447a514d6d2c578dbe337ca4c Mon Sep 17 00:00:00 2001 From: Kavitha Ramalingam Date: Wed, 13 Nov 2024 20:01:17 +0530 Subject: [PATCH] gNOI Warm Reboot - rebootbackend changes --- .../rebootbackend/reboot_thread.cpp | 32 +++++++++++++++-- .../rebootbackend/reboot_thread.h | 1 + .../rebootbackend/rebootbe.cpp | 8 +++++ .../rebootbackend/redis_utils.cpp | 13 +++++++ .../rebootbackend/redis_utils.h | 9 +++++ src/sonic-framework/tests/rebootbe_test.cpp | 34 ------------------- 6 files changed, 61 insertions(+), 36 deletions(-) diff --git a/src/sonic-framework/rebootbackend/reboot_thread.cpp b/src/sonic-framework/rebootbackend/reboot_thread.cpp index a57a241e302f..97b918744a67 100644 --- a/src/sonic-framework/rebootbackend/reboot_thread.cpp +++ b/src/sonic-framework/rebootbackend/reboot_thread.cpp @@ -115,6 +115,8 @@ void RebootThread::do_reboot(void) { if (m_request.method() == RebootMethod::COLD) { do_cold_reboot(s); + } else if (m_request.method() == RebootMethod::WARM) { + do_warm_reboot(s); } else { // This shouldn't be possible. Reference check_start_preconditions() SWSS_LOG_ERROR("Received unrecognized method type = %s", @@ -161,11 +163,28 @@ void RebootThread::do_cold_reboot(swss::Select &s) { // We shouldn't be here. Platform reboot should've killed us. log_error_and_set_non_retry_failure("platform failed to reboot"); - // Set critical state - //m_critical_interface.report_critical_state("platform failed to reboot"); return; } +void RebootThread::do_warm_reboot(swss::Select &s) { + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Sending warm reboot request to platform"); + if (send_dbus_reboot_request() == Progress::EXIT_EARLY) { + return; + } + + // Wait for warm reboot. If we return, reboot failed. + if (wait_for_platform_reboot(s) == Progress::EXIT_EARLY) { + return; + } + + // We shouldn't be here. Platform reboot should've killed us. + log_error_and_set_non_retry_failure("failed to warm reboot"); + + return; +} + + void RebootThread::reboot_thread(void) { SWSS_LOG_ENTER(); @@ -188,6 +207,15 @@ bool RebootThread::check_start_preconditions(const RebootRequest &request, request.method() != RebootMethod::WARM) { response.json_string = "RebootThread: Start rx'd unsupported method"; response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; + } else if (request.method() == RebootMethod::WARM) { + if (m_status.get_last_reboot_status() == + RebootStatus_Status::RebootStatus_Status_STATUS_FAILURE) { + // If the last reboot failed with a non-retriable failure, don't retry. + // But, we will allow a cold boot to recover. + response.json_string = + "RebootThread: last WARM reboot failed with non-retriable failure"; + response.status = swss::StatusCode::SWSS_RC_FAILED_PRECONDITION; + } } else if (request.delay() != 0) { response.json_string = "RebootThread: delayed start not supported"; response.status = swss::StatusCode::SWSS_RC_INVALID_PARAM; diff --git a/src/sonic-framework/rebootbackend/reboot_thread.h b/src/sonic-framework/rebootbackend/reboot_thread.h index bd0b54aed2a2..9a8bc1be65ae 100644 --- a/src/sonic-framework/rebootbackend/reboot_thread.h +++ b/src/sonic-framework/rebootbackend/reboot_thread.h @@ -165,6 +165,7 @@ class RebootThread { void do_reboot(void); Progress send_dbus_reboot_request(); void do_cold_reboot(swss::Select &s); + void do_warm_reboot(swss::Select &s); // Inner loop select handler to wait for platform reboot. // wait for timeout diff --git a/src/sonic-framework/rebootbackend/rebootbe.cpp b/src/sonic-framework/rebootbackend/rebootbe.cpp index be8a55230e02..deeb473e36ab 100644 --- a/src/sonic-framework/rebootbackend/rebootbe.cpp +++ b/src/sonic-framework/rebootbackend/rebootbe.cpp @@ -53,6 +53,14 @@ void RebootBE::Start() { s.addSelectable(&m_done); s.addSelectable(&m_reboot_thread_finished); + + if (swss::WarmStart::isWarmStart()) { + SWSS_LOG_NOTICE("Launching init thread for warm start"); + SetCurrentStatus(RebManagerStatus::WARM_INIT_WAIT); + } else { + SWSS_LOG_NOTICE("Warm restart not enabled"); + } + SWSS_LOG_NOTICE("RebootBE entering operational loop"); while (true) { swss::Selectable *sel; diff --git a/src/sonic-framework/rebootbackend/redis_utils.cpp b/src/sonic-framework/rebootbackend/redis_utils.cpp index 4010e99d423a..68abfcc9985d 100644 --- a/src/sonic-framework/rebootbackend/redis_utils.cpp +++ b/src/sonic-framework/rebootbackend/redis_utils.cpp @@ -77,4 +77,17 @@ bool get_docker_app_from_key(const std::string &key, return true; } +void set_warm_restart_counter(swss::DBConnector &db, int count) { + swss::Table table(&db, "BOOT_INFO"); + table.hset("system", "warmboot-count", std::to_string(count)); +} + +std::string get_warm_restart_counter(swss::DBConnector &db) { + swss::Table warmRestartTable(&db, "BOOT_INFO"); + std::string counter; + warmRestartTable.hget("system", "warmboot-count", counter); + return counter; +} + + } // namespace rebootbackend diff --git a/src/sonic-framework/rebootbackend/redis_utils.h b/src/sonic-framework/rebootbackend/redis_utils.h index 05d87c2aef1c..b169b69bf460 100644 --- a/src/sonic-framework/rebootbackend/redis_utils.h +++ b/src/sonic-framework/rebootbackend/redis_utils.h @@ -36,5 +36,14 @@ bool is_valid_key(const std::string &key, const std::string &separator); bool get_docker_app_from_key(const std::string &key, const std::string &separator, std::string &docker, std::string &app); +// Sets the warm restart count in the database. +void set_warm_restart_counter(swss::DBConnector &db, int count); + +// Returns the current warm restart count from the database. Returns an empty +// string if the warm restart count is not set, and a string representation +// of an integer otherwise. +std::string get_warm_restart_counter(swss::DBConnector &db); + + } // namespace rebootbackend diff --git a/src/sonic-framework/tests/rebootbe_test.cpp b/src/sonic-framework/tests/rebootbe_test.cpp index 7c23f996bad3..a4eb79b642c1 100644 --- a/src/sonic-framework/tests/rebootbe_test.cpp +++ b/src/sonic-framework/tests/rebootbe_test.cpp @@ -63,7 +63,6 @@ class RebootBETestWithoutStop : public ::testing::Test { m_rebootbeReponseChannel(&m_db, REBOOT_RESPONSE_NOTIFICATION_CHANNEL), m_rebootbe(m_dbus_interface) { sigterm_requested = false; -// TestUtils::clear_tables(m_db); m_s.addSelectable(&m_rebootbeReponseChannel); @@ -75,21 +74,6 @@ class RebootBETestWithoutStop : public ::testing::Test { } virtual ~RebootBETestWithoutStop() = default; - gnoi::system::RebootStatusResponse default_not_started_status() { - InitThreadStatus status; - return status.get_response(); - } - - gnoi::system::RebootStatusResponse default_done_status() { - InitThreadStatus status; - // We can't edit the status without it being active. - status.set_start_status(); - status.set_success(); - status.set_inactive(); - return status.get_response(); - } - - void start_rebootbe() { m_rebootbe_thread = std::make_unique(&RebootBE::Start, &m_rebootbe); @@ -227,26 +211,8 @@ class RebootBEAutoStartTest : public RebootBETest, RebootBEAutoStartTest() { //force_warm_start_state(GetParam()); - /* if (GetParam()) { - EXPECT_CALL(*m_init_thread, Start()) - .WillOnce(Return(swss::StatusCode::SWSS_RC_SUCCESS)); - EXPECT_CALL(*m_init_thread, Join()).WillOnce(Return(true)); - EXPECT_CALL(*m_init_thread, GetResponse()) - .WillOnce(Return(default_running_status())) - .WillRepeatedly(Return(default_done_status())); - } else { - EXPECT_CALL(*m_init_thread, GetResponse()) - .WillRepeatedly(Return(default_not_started_status())); - } */ - start_rebootbe(); -/* if (GetParam()) { - get_stack_unfrozen_select().notify(); - std::this_thread::sleep_for(std::chrono::milliseconds(50)); - get_init_done_select().notify(); - } */ - std::this_thread::sleep_for(std::chrono::milliseconds(50)); EXPECT_EQ(m_rebootbe.GetCurrentStatus(), RebootBE::RebManagerStatus::IDLE); }