From 1686dbe5a65c2f817e0b2dbd799014e73818dabf Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 4 Nov 2024 13:32:57 +0000 Subject: [PATCH 01/26] Extend reboot script for rebooting SmartSwitch --- scripts/reboot | 196 +++++++++++++++++++++++++++++++++++- scripts/reboot_helper.py | 193 +++++++++++++++++++++++++++++++++++ setup.py | 1 + tests/test_reboot_helper.py | 165 ++++++++++++++++++++++++++++++ utilities_common/chassis.py | 4 + 5 files changed, 557 insertions(+), 2 deletions(-) create mode 100644 scripts/reboot_helper.py create mode 100644 tests/test_reboot_helper.py diff --git a/scripts/reboot b/scripts/reboot index 044334af3e..70925e63d3 100755 --- a/scripts/reboot +++ b/scripts/reboot @@ -37,10 +37,17 @@ EXIT_NEXT_IMAGE_NOT_EXISTS=4 EXIT_SONIC_INSTALLER_VERIFY_REBOOT=21 EXIT_PLATFORM_FW_AU_FAILURE=22 PLATFORM_FWUTIL_AU_REBOOT_HANDLE="platform_fw_au_reboot_handle" +PLATFORM_JSON_FILE="platform.json" +PLATFORM_JSON_PATH="${DEVPATH}/${PLATFORM}/${PLATFORM_JSON_FILE}" REBOOT_SCRIPT_NAME=$(basename $0) REBOOT_TYPE="${REBOOT_SCRIPT_NAME}" TAG_LATEST=no REBOOT_FLAGS="" +FORCE_REBOOT="no" +SMART_SWITCH="no" +DPU_MODULE_NAME="" +REBOOT_DPU="no" +PRE_SHUTDOWN="no" function debug() { @@ -128,6 +135,8 @@ function show_help_and_exit() echo " " echo " Available options:" echo " -h, -? : getting this help" + echo " -d : DPU module name on a smart switch, option is invalid when on DPU" + echo " -p : Pre-shutdown steps on DPU, invalid on NPU" exit ${EXIT_SUCCESS} } @@ -154,7 +163,7 @@ function reboot_pre_check() ${DEVPATH}/${PLATFORM}/${PLATFORM_REBOOT_PRE_CHECK} [[ $? -ne 0 ]] && exit $? fi - + # Verify the next image by sonic-installer local message=$(sonic-installer verify-next-image 2>&1) if [ $? -ne 0 ]; then @@ -176,9 +185,128 @@ function check_conflict_boot_in_fw_update() fi } +# Function to retrieve DPU IP from CONFIG_DB +function get_dpu_ip() +{ + local DPU_NAME=$1 + dpu_ip=$(sonic-db-cli CONFIG_DB HGET "DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME}" "ips@") + if [ $? -ne 0 ] || [ -z "$dpu_ip" ]; then + echo "Error: Failed to retrieve DPU IP address for ${DPU_NAME}" + exit ${EXIT_ERROR} + fi + debug "$DPU_NAME ip: $dpu_ip" +} + +# Function to retrieve GNMI port from CONFIG_DB +function get_gnmi_port() { + local DPU_NAME=$1 + port=$(sonic-db-cli CONFIG_DB HGET "DPU_PORT|$DPU_NAME" "gnmi") + if [ $? -ne 0 ] || [ -z "$port" ]; then + echo "Error: Failed to retrieve GNMI port" + exit ${EXIT_ERROR} + fi + debug "$DPU_NAME GNMI port:$port" +} + +# Function to get reboot status from DPU +function get_reboot_status() +{ + local dpu_ip=$1 + local port=$2 + reboot_status=$(docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc RebootStatus) + if [ $? -ne 0 ] || [ -z "$reboot_status" ]; then + echo "Error: Failed to send reboot status command to DPU ${DPU_NAME}" + exit ${EXIT_ERROR} + fi + debug "$reboot_status" +} + +# Function to retrieve DPU bus info from platform JSON +function get_dpu_bus_info() { + local DPU_NAME=$1 + DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") + if [ -z "$DPU_BUS_INFO" ]; then + echo "Error: bus_info not found for DPU ${DPU_NAME}" + exit ${EXIT_ERROR} + fi + debug "$DPU_NAME : $DPU_BUS_INFO" +} + +# Function to reboot the platform module +function reboot_platform_module() { + local DPU_NAME=$1 + reboot_status=$(python3 -c "import reboot_helper; reboot_helper.reboot_module('${DPU_NAME}')") + if [ -z "$reboot_status" ] || [ "$reboot_status" = "false" ]; then + echo "Error: Failed to reboot the platform" + exit ${EXIT_ERROR} + fi +} + +function reboot_dpu_module() +{ + local DPU_NAME=$1 + local DPU_INDEX=${DPU_NAME//[!0-9]/} + + debug "User requested rebooting device ${DPU_NAME} ..." + + # Retrieve DPU IP and GNMI port + dpu_ip=$(get_dpu_ip "${DPU_NAME}") + port=$(get_gnmi_port "${DPU_NAME}") + + if [ -z "$dpu_ip" ] || [ -z "$port" ]; then + echo "Error: Failed to retrieve DPU IP or GNMI port for ${DPU_NAME}" + exit ${EXIT_ERROR} + fi + + # Issue GNOI client command to reboot the DPU + docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc Reboot -jsonin '{"method":3}' + if [ $? -ne 0 ]; then + echo "Error: Failed to send reboot command to DPU ${DPU_NAME}" + exit ${EXIT_ERROR} + fi + + # Retrieve dpu_halt_services_timeout value using jq + dpu_halt_services_timeout=$(jq -r '.dpu_halt_services_timeout' "$PLATFORM_JSON_PATH" 2>/dev/null) + if [ $? -ne 0 ]; then + echo "Error: Failed to retrieve dpu_halt_services_timeout from ${PLATFORM_JSON_PATH}" + exit ${EXIT_ERROR} + fi + + # Poll on reboot status response with a timeout mechanism + poll_interval=5 + waited_time=0 + while true; do + reboot_status=$(get_reboot_status "${dpu_ip}" "${port}") + debug "GNOI RebootStatus response ${reboot_status}" + is_reboot_active=$(echo "$reboot_status" | grep "active" | awk '{print $2}') + if [ "$is_reboot_active" == "false" ]; then + break + fi + + sleep "$poll_interval" + waited_time=$((waited_time + poll_interval)) + if [ $waited_time -ge $dpu_halt_services_timeout ]; then + echo "Error: Timeout waiting for DPU ${DPU_NAME} to finish rebooting" + exit ${EXIT_ERROR} + fi + done + + # Check if DPU exists and retrieve bus info + DPU_BUS_INFO=$(get_dpu_bus_info "${DPU_NAME}") + + # Update STATE_DB and handle PCIe removal and rescan + sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' + + echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove + reboot_platform_module "${DPU_NAME}" + echo 1 > /sys/bus/pci/rescan + + sonic-db-cli state_db del "PCIE_DETACH_INFO|${DPU_NAME}" +} + function parse_options() { - while getopts "h?vf" opt; do + while getopts "h?vfpd" opt; do case ${opt} in h|\? ) show_help_and_exit @@ -192,6 +320,13 @@ function parse_options() f ) REBOOT_FLAGS+=" -f" ;; + d ) + REBOOT_DPU="yes" + DPU_MODULE_NAME="$OPTARG" + ;; + p ) + PRE_SHUTDOWN="yes" + ;; esac done } @@ -215,6 +350,56 @@ function linecard_reboot_notify_supervisor() fi } +# Function to reboot all DPUs in parallel +function reboot_all_dpus() { + local NUM_DPU=$1 + + for (( i=0; i<"$NUM_DPU"; i++ )); do + echo "Rebooting DPU module dpu$i" + reboot_dpu_module "dpu$i" & + done + wait +} + +# Function to handle scenarios on smart switch +function handle_smart_switch() { + if [ -f "$PLATFORM_JSON_PATH" ]; then + NUM_DPU=$(jq -r '.DPUS | length' "$PLATFORM_JSON_PATH" 2>/dev/null) + if [ "$NUM_DPU" -gt 0 ]; then + SMART_SWITCH="yes" + fi + fi + + if [[ "$REBOOT_DPU" == "yes" ]]; then + if [[ "$SMART_SWITCH" == "yes" ]]; then + echo "User requested to reboot the device ${DPU_MODULE_NAME}" + reboot_dpu_module "$DPU_MODULE_NAME" + else + echo "Invalid '-d' option specified for a non-smart switch" + exit ${EXIT_ERROR} + fi + fi + + is_dpu=$(python3 -c "import reboot_helper; reboot_helper.is_dpu()") + debug "Is the platform DPU: $is_dpu" + + # Check if system is a DPU and handle -p option accordingly + if [[ "$is_dpu" == "True" && "$PRE_SHUTDOWN" != "yes" ]]; then + echo "Invalid, '-p' option not specified for a DPU" + exit ${EXIT_ERROR} + elif [[ "$is_dpu" != "True" && "$PRE_SHUTDOWN" == "yes" ]]; then + echo "Invalid '-p' option specified for a non-DPU" + exit ${EXIT_ERROR} + fi + + if [[ "$SMART_SWITCH" == "yes" ]]; then + # If not a DPU, reboot all DPUs in parallel + if [[ "$is_dpu" != "True" ]]; then + reboot_all_dpus "$NUM_DPU" + fi + fi +} + parse_options $@ # Exit if not superuser @@ -225,6 +410,8 @@ fi debug "User requested rebooting device ..." +handle_smart_switch + check_conflict_boot_in_fw_update setup_reboot_variables @@ -287,6 +474,11 @@ if [ -x ${WATCHDOG_UTIL} ]; then ${WATCHDOG_UTIL} arm fi +if [[ "${PRE_SHUTDOWN}" == "yes" ]]; then + echo "${DPU_MODULE_NAME} pre-shutdown steps are completed" + exit ${EXIT_SUCCESS} +fi + if [ -x ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} ]; then VERBOSE=yes debug "Rebooting with platform ${PLATFORM} specific tool ..." ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} $@ diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py new file mode 100644 index 0000000000..113b7d6c69 --- /dev/null +++ b/scripts/reboot_helper.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# +# reboot_helper.py +# +# Utility helper for reboot within SONiC + +import os +import re +import sys +import json +import sonic_platform +from sonic_py_common import logger, device_info +from utilities_common.chassis import is_smartswitch + +SYSLOG_IDENTIFIER = "reboot_helper" + +EXIT_FAIL = -1 +EXIT_SUCCESS = 0 +ERROR_NOT_IMPLEMENTED = 1 +ERROR_EXCEPTION = 2 + +# Global logger instance +log = logger.Logger(SYSLOG_IDENTIFIER) + +# Global variable for platform chassis +platform_chassis = None + + +def get_all_dpus(): + """ + Retrieve a list of all DPUs (Data Processing Units) in the system. + This function checks if the platform is a smartswitch and then loads the platform.json + file to extract the DPUs dictionary. It converts the DPU names to uppercase and returns + them as a list. + + Returns: + list: A list of DPU names in uppercase. + """ + dpu_list = [] + + if not is_smartswitch(): + return dpu_list + + # Load platform.json + platform_info = device_info.get_platform_info() + platform = platform_info.get('platform') + if not platform: + log.log_error("Platform does not exist in platform_info") + return dpu_list + platform_json_path = os.path.join("/usr/share/sonic/device", platform, "platform.json") + try: + with open(platform_json_path, 'r') as platform_json: + config_data = json.load(platform_json) + + # Extract DPUs dictionary + dpus = config_data.get("DPUS", {}) + + # Convert DPU names to uppercase and append to the list + dpu_list = [dpu.upper() for dpu in dpus] + + except FileNotFoundError: + log.log_error("platform.json not found") + except json.JSONDecodeError: + log.log_error("Failed to parse platform.json") + sys.exit(ERROR_EXCEPTION) + + return dpu_list + + +def load_platform_chassis(): + """ + Load the platform chassis using the SONiC platform API. + + This function attempts to instantiate the platform chassis object. + If successful, it sets the global variable `platform_chassis` to the instantiated object. + + Returns: + bool: True if the platform chassis is successfully loaded, False otherwise. + """ + global platform_chassis + + # Load new platform API class + try: + platform_chassis = sonic_platform.platform.Platform().get_chassis() + except Exception as e: + log.log_error("Failed to instantiate Chassis due to {}".format(repr(e))) + sys.exit(ERROR_EXCEPTION) + + if not platform_chassis: + log.log_error("Platform chassis is not loaded") + sys.exit(EXIT_FAIL) + + return True + + +def reboot_module(module_name): + """ + Reboot the specified module by invoking the platform API. + + Args: + module_name (str): The name of the module to reboot. + + Returns: + bool: True if the reboot command was successfully sent, False otherwise. + """ + + # Load the platform chassis if not already loaded + load_platform_chassis() + + if not is_smartswitch(): + log.log_error("Platform is not a smartswitch to reboot module") + return False + + # Attempt to reboot the module + if hasattr(platform_chassis, 'reboot'): + platform_chassis.reboot(module_name) + else: + log.log_error("Reboot method not found in platform chassis") + return False + + if module_name.upper() not in get_all_dpus(): + log.log_error("Module {} not found".format(module_name)) + return False + + log.log_info("Rebooting module {}...".format(module_name)) + try: + platform_chassis.reboot(module_name) + log.log_info("Reboot command sent for module {}".format(module_name)) + except NotImplementedError: + log.log_error("Reboot not implemented on this platform") + sys.exit(ERROR_NOT_IMPLEMENTED) + except Exception as e: + log.log_error("An error occurred while rebooting module {}: {}".format(module_name, e)) + sys.exit(ERROR_EXCEPTION) + + return True + + +def is_dpu(): + """Check if script is running on DPU module""" + + # Load the platform chassis if not already loaded + load_platform_chassis() + + if not is_smartswitch(): + return False + + # Load platform.json + platform_info = device_info.get_platform_info() + platform = platform_info.get('platform') + if not platform: + log.log_error("Platform does not exist in platform_info") + return False + platform_json_path = os.path.join("/usr/share/sonic/device", platform, "platform.json") + try: + with open(platform_json_path, 'r') as platform_json: + config_data = json.load(platform_json) + + # Check for any key matching the .DPU pattern + for key in config_data.keys(): + if re.search(r'\.DPU$', key): + return True + except FileNotFoundError: + log.log_error("platform.json not found") + except json.JSONDecodeError: + log.log_error("Failed to parse platform.json") + sys.exit(ERROR_EXCEPTION) + + return False + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("Usage: reboot_helper.py ") + sys.exit(EXIT_FAIL) + + command = sys.argv[1] + module_name = sys.argv[2] + + if command == "reboot": + success = reboot_module(module_name) + if not success: + sys.exit(EXIT_FAIL) + else: + print("Reboot command sent for module {module_name}") + elif command == "is_dpu": + if is_dpu(): + print("Script is running on DPU module") + else: + sys.exit(EXIT_FAIL) + else: + print("Unknown command: {command}") + sys.exit(EXIT_FAIL) diff --git a/setup.py b/setup.py index dc5fa4a9b4..c1e579857c 100644 --- a/setup.py +++ b/setup.py @@ -161,6 +161,7 @@ 'scripts/psushow', 'scripts/queuestat', 'scripts/reboot', + 'scripts/reboot_helper.py', 'scripts/route_check.py', 'scripts/route_check_test.sh', 'scripts/vnet_route_check.py', diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py new file mode 100644 index 0000000000..5eaa7f07a8 --- /dev/null +++ b/tests/test_reboot_helper.py @@ -0,0 +1,165 @@ +import os +import sys +import unittest +from unittest.mock import patch, MagicMock, mock_open + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +sys.modules['sonic_platform'] = MagicMock() +import scripts.reboot_helper # noqa: E402 + + +class TestRebootHelper(unittest.TestCase): + + @patch( + 'scripts.reboot_helper.is_smartswitch', + MagicMock(return_value=False) + ) + def test_get_all_dpus_not_smartswitch(self): + dpu_list = scripts.reboot_helper.get_all_dpus() + self.assertEqual(dpu_list, []) + + @patch( + 'os.path.join', MagicMock(return_value="/mock/path/platform.json") + ) + @patch( + 'builtins.open', new_callable=mock_open, + read_data='{"DPUS": {"DPU1": {}, "DPU2": {}}}' + ) + @patch( + 'scripts.reboot_helper.device_info.get_platform_info', + MagicMock(return_value={'platform': 'mock_platform'}) + ) + @patch( + 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) + ) + def test_get_all_dpus_valid_json(self, mock_is_smartswitch): + dpu_list = scripts.reboot_helper.get_all_dpus() + self.assertEqual(dpu_list, ["DPU1", "DPU2"]) + + @patch( + 'scripts.reboot_helper.sonic_platform.platform.Platform.get_chassis' + ) + def test_load_platform_chassis_success(self, mock_get_chassis): + mock_get_chassis.return_value = MagicMock() + result = scripts.reboot_helper.load_platform_chassis() + self.assertTrue(result) + + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=False) + ) + def test_reboot_module_chassis_fail(self): + result = scripts.reboot_helper.reboot_module("DPU1") + self.assertFalse(result) + + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True) + ) + @patch( + 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=False) + ) + def test_reboot_module_not_smartswitch(self): + result = scripts.reboot_helper.reboot_module("DPU1") + self.assertFalse(result) + + @patch( + 'scripts.reboot_helper.get_all_dpus', MagicMock(return_value=["DPU1"]) + ) + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True) + ) + @patch( + 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) + ) + def test_reboot_module_not_found(self): + result = scripts.reboot_helper.reboot_module("DPU2") + self.assertFalse(result) + + @patch( + 'scripts.reboot_helper.get_all_dpus', MagicMock(return_value=["DPU1"]) + ) + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True) + ) + @patch( + 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) + ) + @patch( + 'scripts.reboot_helper.platform_chassis.reboot', + MagicMock(return_value=True) + ) + def test_reboot_module_success(self): + result = scripts.reboot_helper.reboot_module("DPU1") + self.assertTrue(result) + + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=False) + ) + def test_is_dpu_load_platform_chassis_fail(self): + result = scripts.reboot_helper.is_dpu() + self.assertFalse(result) + + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True) + ) + @patch( + 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=False) + ) + def test_is_dpu_not_smartswitch(self): + result = scripts.reboot_helper.is_dpu() + self.assertFalse(result) + + @patch( + 'os.path.join', MagicMock(return_value="/mock/path/platform.json") + ) + @patch( + 'builtins.open', new_callable=mock_open, + read_data='{".DPU": {}}' + ) + @patch( + 'scripts.reboot_helper.device_info.get_platform_info', + MagicMock(return_value={'platform': 'mock_platform'}) + ) + @patch('scripts.reboot_helper.load_platform_chassis') + @patch('scripts.reboot_helper.is_smartswitch') + def test_is_dpu_found(self, mock_is_smartswitch, + mock_load_platform_chassis, + mock_get_platform_info): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + result = scripts.reboot_helper.is_dpu() + self.assertTrue(result) + + @patch( + 'os.path.join', MagicMock(return_value="/mock/path/platform.json") + ) + @patch( + 'builtins.open', new_callable=mock_open, + read_data='{}' + ) + @patch( + 'scripts.reboot_helper.device_info.get_platform_info', + MagicMock(return_value={'platform': 'mock_platform'}) + ) + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True) + ) + @patch( + 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) + ) + def test_is_dpu_not_found(self, mock_is_smartswitch): + result = scripts.reboot_helper.is_dpu() + self.assertFalse(result) + + +if __name__ == '__main__': + unittest.main() diff --git a/utilities_common/chassis.py b/utilities_common/chassis.py index 1283bca580..667f2ab155 100644 --- a/utilities_common/chassis.py +++ b/utilities_common/chassis.py @@ -16,3 +16,7 @@ def get_chassis_local_interfaces(): lst = data[1].split(",") return lst return lst + + +def is_smartswitch(): + return hasattr(device_info, 'is_smartswitch') and device_info.is_smartswitch() From 23461b2b5fdfde3b92f09a88b029e52a8cd45734 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 4 Nov 2024 16:50:15 +0000 Subject: [PATCH 02/26] Add more coverage --- scripts/reboot_helper.py | 28 ++++++++------- tests/test_reboot_helper.py | 69 ++++++++++++++++++++++++++++++++++--- 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 113b7d6c69..7de03ccf9d 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -57,12 +57,15 @@ def get_all_dpus(): # Convert DPU names to uppercase and append to the list dpu_list = [dpu.upper() for dpu in dpus] - except FileNotFoundError: log.log_error("platform.json not found") + return dpu_list except json.JSONDecodeError: log.log_error("Failed to parse platform.json") - sys.exit(ERROR_EXCEPTION) + return dpu_list + except Exception as e: + log.log_error("Unexpected error occurred while getting DPUs: {}".format(e)) + return dpu_list return dpu_list @@ -83,12 +86,11 @@ def load_platform_chassis(): try: platform_chassis = sonic_platform.platform.Platform().get_chassis() except Exception as e: - log.log_error("Failed to instantiate Chassis due to {}".format(repr(e))) - sys.exit(ERROR_EXCEPTION) + raise RuntimeError("Failed to instantiate Chassis due to {}".format(str(e))) - if not platform_chassis: + if platform_chassis is None: log.log_error("Platform chassis is not loaded") - sys.exit(EXIT_FAIL) + return False return True @@ -127,10 +129,9 @@ def reboot_module(module_name): platform_chassis.reboot(module_name) log.log_info("Reboot command sent for module {}".format(module_name)) except NotImplementedError: - log.log_error("Reboot not implemented on this platform") - sys.exit(ERROR_NOT_IMPLEMENTED) + raise NotImplementedError("Reboot not implemented on this platform: {type(e).__name__}") except Exception as e: - log.log_error("An error occurred while rebooting module {}: {}".format(module_name, e)) + log.log_error("Unexpected error occurred while rebooting module {}: {}".format(module_name, e)) sys.exit(ERROR_EXCEPTION) return True @@ -164,7 +165,6 @@ def is_dpu(): log.log_error("platform.json not found") except json.JSONDecodeError: log.log_error("Failed to parse platform.json") - sys.exit(ERROR_EXCEPTION) return False @@ -179,10 +179,12 @@ def is_dpu(): if command == "reboot": success = reboot_module(module_name) - if not success: - sys.exit(EXIT_FAIL) + if is_dpu(): + print("Script is running on DPU module") else: - print("Reboot command sent for module {module_name}") + sys.exit(EXIT_FAIL) + if not is_dpu(): + sys.exit(ERROR_EXCEPTION) elif command == "is_dpu": if is_dpu(): print("Script is running on DPU module") diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 5eaa7f07a8..29740006c6 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -1,5 +1,6 @@ import os import sys +import pytest import unittest from unittest.mock import patch, MagicMock, mock_open @@ -21,6 +22,41 @@ def test_get_all_dpus_not_smartswitch(self): dpu_list = scripts.reboot_helper.get_all_dpus() self.assertEqual(dpu_list, []) + @patch('scripts.reboot_helper.device_info.get_platform_info') + @patch('scripts.reboot_helper.is_smartswitch') + def test_get_all_dpus_invalid_platform_info(self, mock_is_smartswitch, + mock_get_platform_info): + mock_is_smartswitch.return_value = True + mock_get_platform_info.return_value = {'platform': None} + # Simulate get_platform_info returning None or invalid data + dpu_list = scripts.reboot_helper.get_all_dpus() + self.assertEqual(dpu_list, []) + + @patch('builtins.open', side_effect=FileNotFoundError) + def test_get_all_dpus_file_not_found(self, mock_open): + # Simulate the FileNotFoundError scenario + dpu_list = scripts.reboot_helper.get_all_dpus() + self.assertEqual(dpu_list, []) + + @patch( + 'os.path.join', MagicMock(return_value="/mock/path/platform.json") + ) + @patch('builtins.open', new_callable=mock_open, read_data='Invalid JSON') + @patch( + 'scripts.reboot_helper.device_info.get_platform_info', + MagicMock(return_value={'platform': 'mock_platform'}) + ) + @patch( + 'scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True) + ) + @patch( + 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) + ) + def test_get_all_dpus_malformed_json(self, mock_is_smartswitch): + dpu_list = scripts.reboot_helper.get_all_dpus() + self.assertEqual(dpu_list, []) + @patch( 'os.path.join', MagicMock(return_value="/mock/path/platform.json") ) @@ -47,6 +83,13 @@ def test_load_platform_chassis_success(self, mock_get_chassis): result = scripts.reboot_helper.load_platform_chassis() self.assertTrue(result) + @patch('scripts.reboot_helper.sonic_platform.platform.Platform') + def test_load_platform_chassis_exception(self, mock_platform): + mock_platform.side_effect = RuntimeError + + with pytest.raises(RuntimeError): + scripts.reboot_helper.load_platform_chassis() + @patch( 'scripts.reboot_helper.load_platform_chassis', MagicMock(return_value=False) @@ -80,6 +123,27 @@ def test_reboot_module_not_found(self): result = scripts.reboot_helper.reboot_module("DPU2") self.assertFalse(result) + @patch('scripts.reboot_helper.get_all_dpus', + MagicMock(return_value=["DPU1"])) + @patch('scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True)) + @patch('scripts.reboot_helper.is_smartswitch', + MagicMock(return_value=True)) + @patch('scripts.reboot_helper.platform_chassis.reboot', + MagicMock(side_effect=NotImplementedError)) + def test_reboot_module_not_implemented(self): + with pytest.raises(RuntimeError): + scripts.reboot_helper.reboot_module("DPU1") + + @patch('scripts.reboot_helper.get_all_dpus', MagicMock(return_value=[])) + @patch('scripts.reboot_helper.load_platform_chassis', + MagicMock(return_value=True)) + @patch('scripts.reboot_helper.is_smartswitch', + MagicMock(return_value=True)) + def test_reboot_module_empty_dpu_list(self): + result = scripts.reboot_helper.reboot_module("DPU1") + self.assertFalse(result) + @patch( 'scripts.reboot_helper.get_all_dpus', MagicMock(return_value=["DPU1"]) ) @@ -141,10 +205,7 @@ def test_is_dpu_found(self, mock_is_smartswitch, @patch( 'os.path.join', MagicMock(return_value="/mock/path/platform.json") ) - @patch( - 'builtins.open', new_callable=mock_open, - read_data='{}' - ) + @patch('builtins.open', new_callable=mock_open, read_data='{}') @patch( 'scripts.reboot_helper.device_info.get_platform_info', MagicMock(return_value={'platform': 'mock_platform'}) From cef5de786b9b98a5138ac7af0974b68a640eb359 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 4 Nov 2024 17:46:01 +0000 Subject: [PATCH 03/26] Add more unittests and optimize tests file --- scripts/reboot | 12 +- scripts/reboot_helper.py | 183 ++++++++------ tests/test_reboot_helper.py | 476 +++++++++++++++++++++--------------- 3 files changed, 402 insertions(+), 269 deletions(-) diff --git a/scripts/reboot b/scripts/reboot index 70925e63d3..e4723503ae 100755 --- a/scripts/reboot +++ b/scripts/reboot @@ -232,6 +232,16 @@ function get_dpu_bus_info() { debug "$DPU_NAME : $DPU_BUS_INFO" } +#Function to detach PCI module +function pci_detach_module() { + local DPU_NAME=$1 + local DPU_BUS_INFO=$2 + status = $(python3 -c "import reboot_helper; reboot_helper.pci_detach_module('${DPU_NAME}')") + if [ -z "$status" ] || [ "$status" = "false" ]; then + echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove + fi +} + # Function to reboot the platform module function reboot_platform_module() { local DPU_NAME=$1 @@ -297,7 +307,7 @@ function reboot_dpu_module() # Update STATE_DB and handle PCIe removal and rescan sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' - echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove + pci_detach_module "${DPU_NAME}" "${DPU_BUS_INFO}" reboot_platform_module "${DPU_NAME}" echo 1 > /sys/bus/pci/rescan diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 7de03ccf9d..7f1506b728 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -16,8 +16,6 @@ EXIT_FAIL = -1 EXIT_SUCCESS = 0 -ERROR_NOT_IMPLEMENTED = 1 -ERROR_EXCEPTION = 2 # Global logger instance log = logger.Logger(SYSLOG_IDENTIFIER) @@ -26,73 +24,68 @@ platform_chassis = None -def get_all_dpus(): +def load_platform_chassis(): """ - Retrieve a list of all DPUs (Data Processing Units) in the system. - This function checks if the platform is a smartswitch and then loads the platform.json - file to extract the DPUs dictionary. It converts the DPU names to uppercase and returns - them as a list. + Load the platform chassis using the SONiC platform API. + + This function attempts to instantiate the platform chassis object. + If successful, it sets the global variable `platform_chassis` to the instantiated object. Returns: - list: A list of DPU names in uppercase. + bool: True if the platform chassis is successfully loaded, False otherwise. """ - dpu_list = [] + global platform_chassis - if not is_smartswitch(): - return dpu_list + # Load new platform API class + try: + platform_chassis = sonic_platform.platform.Platform().get_chassis() + if platform_chassis is None: + log.log_error("Platform chassis is not loaded") + return False + return True + except Exception as e: + log.log_error(f"Failed to instantiate Chassis: {e}") + return False - # Load platform.json - platform_info = device_info.get_platform_info() - platform = platform_info.get('platform') - if not platform: - log.log_error("Platform does not exist in platform_info") - return dpu_list + +def load_platform_json(platform): + """Load and return the content of platform.json.""" platform_json_path = os.path.join("/usr/share/sonic/device", platform, "platform.json") try: with open(platform_json_path, 'r') as platform_json: - config_data = json.load(platform_json) - - # Extract DPUs dictionary - dpus = config_data.get("DPUS", {}) - - # Convert DPU names to uppercase and append to the list - dpu_list = [dpu.upper() for dpu in dpus] + return json.load(platform_json) except FileNotFoundError: log.log_error("platform.json not found") - return dpu_list except json.JSONDecodeError: log.log_error("Failed to parse platform.json") - return dpu_list - except Exception as e: - log.log_error("Unexpected error occurred while getting DPUs: {}".format(e)) - return dpu_list + return None - return dpu_list - -def load_platform_chassis(): +def get_all_dpus(): """ - Load the platform chassis using the SONiC platform API. - - This function attempts to instantiate the platform chassis object. - If successful, it sets the global variable `platform_chassis` to the instantiated object. + Retrieve a list of all DPUs (Data Processing Units) in the system. + This function checks if the platform is a smartswitch and then loads the platform.json + file to extract the DPUs dictionary. It converts the DPU names to uppercase and returns + them as a list. Returns: - bool: True if the platform chassis is successfully loaded, False otherwise. + list: A list of DPU names in uppercase. """ - global platform_chassis + if not is_smartswitch(): + return [] - # Load new platform API class - try: - platform_chassis = sonic_platform.platform.Platform().get_chassis() - except Exception as e: - raise RuntimeError("Failed to instantiate Chassis due to {}".format(str(e))) + platform_info = device_info.get_platform_info() + platform = platform_info.get('platform') + if not platform: + log.log_error("Platform does not exist in platform_info") + return [] - if platform_chassis is None: - log.log_error("Platform chassis is not loaded") - return False + config_data = load_platform_json(platform) + if config_data is None: + return [] - return True + dpus = config_data.get("DPUS", []) + return [dpu.upper() for dpu in dpus] def reboot_module(module_name): @@ -105,7 +98,6 @@ def reboot_module(module_name): Returns: bool: True if the reboot command was successfully sent, False otherwise. """ - # Load the platform chassis if not already loaded load_platform_chassis() @@ -114,9 +106,7 @@ def reboot_module(module_name): return False # Attempt to reboot the module - if hasattr(platform_chassis, 'reboot'): - platform_chassis.reboot(module_name) - else: + if not hasattr(platform_chassis, 'reboot'): log.log_error("Reboot method not found in platform chassis") return False @@ -132,7 +122,7 @@ def reboot_module(module_name): raise NotImplementedError("Reboot not implemented on this platform: {type(e).__name__}") except Exception as e: log.log_error("Unexpected error occurred while rebooting module {}: {}".format(module_name, e)) - sys.exit(ERROR_EXCEPTION) + return False return True @@ -152,44 +142,83 @@ def is_dpu(): if not platform: log.log_error("Platform does not exist in platform_info") return False - platform_json_path = os.path.join("/usr/share/sonic/device", platform, "platform.json") - try: - with open(platform_json_path, 'r') as platform_json: - config_data = json.load(platform_json) - # Check for any key matching the .DPU pattern - for key in config_data.keys(): - if re.search(r'\.DPU$', key): - return True - except FileNotFoundError: - log.log_error("platform.json not found") - except json.JSONDecodeError: - log.log_error("Failed to parse platform.json") + config_data = load_platform_json(platform) + if config_data is None: + return False - return False + return any(re.search(r'\.DPU$', key) for key in config_data.keys()) -if __name__ == "__main__": - if len(sys.argv) < 3: - print("Usage: reboot_helper.py ") +def pci_detach_module(module_name): + """ + Detach the specified module by invoking the platform API. + + Args: + module_name (str): The name of the module to detach. + + Returns: + bool: True if the detach command was successfully sent, False otherwise. + """ + + # Load the platform chassis if not already loaded + load_platform_chassis() + + if not is_smartswitch(): + log.log_error("Platform is not a smartswitch to detach module") + return False + + # Attempt to detach the module + if not hasattr(platform_chassis, 'pci_detach'): + log.log_error("PCI detach method not found in platform chassis") + return False + + if module_name.upper() not in get_all_dpus(): + log.log_error("Module {} not found".format(module_name)) + return False + + log.log_info("Detaching module {}...".format(module_name)) + try: + platform_chassis.pci_detach(module_name) + log.log_info("Detach command sent for module {}".format(module_name)) + return True + except NotImplementedError: + raise NotImplementedError("PCI detach not implemented on this platform: {type(e).__name__}") + except Exception as e: + log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) + return False + + +def main(): + if len(sys.argv) < 2: + print("Usage: reboot_helper.py [module_name]") sys.exit(EXIT_FAIL) command = sys.argv[1] - module_name = sys.argv[2] - if command == "reboot": - success = reboot_module(module_name) - if is_dpu(): - print("Script is running on DPU module") - else: + if command == "reboot" or command == "pci_detach": + if len(sys.argv) < 3: + print("Usage: reboot_helper.py ") sys.exit(EXIT_FAIL) - if not is_dpu(): - sys.exit(ERROR_EXCEPTION) + module_name = sys.argv[2] + + if command == "reboot": + success = reboot_module(module_name) + if not success: + sys.exit(EXIT_FAIL) + elif command == "pci_detach": + success = pci_detach_module(module_name) + if not success: + sys.exit(EXIT_FAIL) elif command == "is_dpu": if is_dpu(): print("Script is running on DPU module") else: sys.exit(EXIT_FAIL) else: - print("Unknown command: {command}") + print(f"Unknown command: {command}") sys.exit(EXIT_FAIL) + + +if __name__ == "__main__": + main() diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 29740006c6..18e27613d3 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -1,7 +1,8 @@ +# tests/test_reboot_helper.py + import os import sys import pytest -import unittest from unittest.mock import patch, MagicMock, mock_open test_path = os.path.dirname(os.path.abspath(__file__)) @@ -12,215 +13,308 @@ import scripts.reboot_helper # noqa: E402 -class TestRebootHelper(unittest.TestCase): +@pytest.fixture +def mock_load_platform_chassis(): + with patch('scripts.reboot_helper.load_platform_chassis', return_value=True) as mock: + yield mock - @patch( - 'scripts.reboot_helper.is_smartswitch', - MagicMock(return_value=False) - ) - def test_get_all_dpus_not_smartswitch(self): - dpu_list = scripts.reboot_helper.get_all_dpus() - self.assertEqual(dpu_list, []) - - @patch('scripts.reboot_helper.device_info.get_platform_info') - @patch('scripts.reboot_helper.is_smartswitch') - def test_get_all_dpus_invalid_platform_info(self, mock_is_smartswitch, - mock_get_platform_info): - mock_is_smartswitch.return_value = True - mock_get_platform_info.return_value = {'platform': None} - # Simulate get_platform_info returning None or invalid data - dpu_list = scripts.reboot_helper.get_all_dpus() - self.assertEqual(dpu_list, []) - @patch('builtins.open', side_effect=FileNotFoundError) - def test_get_all_dpus_file_not_found(self, mock_open): - # Simulate the FileNotFoundError scenario +@pytest.fixture +def mock_is_smartswitch(): + with patch('scripts.reboot_helper.is_smartswitch', return_value=True) as mock: + yield mock + + +@pytest.fixture +def mock_get_platform_info(): + with patch('scripts.reboot_helper.device_info.get_platform_info') as mock: + yield mock + + +@pytest.fixture +def mock_open_file(): + return mock_open() + + +@pytest.fixture +def mock_platform_chassis(): + with patch('scripts.reboot_helper.platform_chassis') as mock: + yield mock + + +@pytest.fixture +def mock_platform(): + with patch('scripts.reboot_helper.sonic_platform.platform.Platform') as mock: + yield mock + + +def test_get_all_dpus_not_smartswitch(mock_is_smartswitch): + mock_is_smartswitch.return_value = False + dpu_list = scripts.reboot_helper.get_all_dpus() + assert dpu_list == [] + + +def test_get_all_dpus_invalid_platform_info(mock_is_smartswitch, mock_get_platform_info): + mock_is_smartswitch.return_value = True + mock_get_platform_info.return_value = {'platform': None} + dpu_list = scripts.reboot_helper.get_all_dpus() + assert dpu_list == [] + + +def test_get_all_dpus_file_not_found(mock_open_file): + with patch('builtins.open', side_effect=FileNotFoundError): dpu_list = scripts.reboot_helper.get_all_dpus() - self.assertEqual(dpu_list, []) - - @patch( - 'os.path.join', MagicMock(return_value="/mock/path/platform.json") - ) - @patch('builtins.open', new_callable=mock_open, read_data='Invalid JSON') - @patch( - 'scripts.reboot_helper.device_info.get_platform_info', - MagicMock(return_value={'platform': 'mock_platform'}) - ) - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True) - ) - @patch( - 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) - ) - def test_get_all_dpus_malformed_json(self, mock_is_smartswitch): + assert dpu_list == [] + + +def test_get_all_dpus_malformed_json(mock_is_smartswitch, mock_get_platform_info, mock_open_file): + mock_is_smartswitch.return_value = True + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + + with patch('builtins.open', mock_open(read_data='Invalid JSON')): dpu_list = scripts.reboot_helper.get_all_dpus() - self.assertEqual(dpu_list, []) - - @patch( - 'os.path.join', MagicMock(return_value="/mock/path/platform.json") - ) - @patch( - 'builtins.open', new_callable=mock_open, - read_data='{"DPUS": {"DPU1": {}, "DPU2": {}}}' - ) - @patch( - 'scripts.reboot_helper.device_info.get_platform_info', - MagicMock(return_value={'platform': 'mock_platform'}) - ) - @patch( - 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) - ) - def test_get_all_dpus_valid_json(self, mock_is_smartswitch): + assert dpu_list == [] + + +def test_get_all_dpus_valid_json(mock_is_smartswitch, mock_get_platform_info, mock_open_file): + mock_is_smartswitch.return_value = True + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + + with patch('builtins.open', mock_open(read_data='{"DPUS": {"DPU1": {}, "DPU2": {}}}')): dpu_list = scripts.reboot_helper.get_all_dpus() - self.assertEqual(dpu_list, ["DPU1", "DPU2"]) + assert dpu_list == ["DPU1", "DPU2"] - @patch( - 'scripts.reboot_helper.sonic_platform.platform.Platform.get_chassis' - ) - def test_load_platform_chassis_success(self, mock_get_chassis): - mock_get_chassis.return_value = MagicMock() - result = scripts.reboot_helper.load_platform_chassis() - self.assertTrue(result) - @patch('scripts.reboot_helper.sonic_platform.platform.Platform') - def test_load_platform_chassis_exception(self, mock_platform): - mock_platform.side_effect = RuntimeError +def test_load_platform_chassis_success(mock_platform_chassis): + mock_platform_chassis.get_chassis.return_value = MagicMock() + result = scripts.reboot_helper.load_platform_chassis() + assert result - with pytest.raises(RuntimeError): - scripts.reboot_helper.load_platform_chassis() - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=False) - ) - def test_reboot_module_chassis_fail(self): - result = scripts.reboot_helper.reboot_module("DPU1") - self.assertFalse(result) - - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True) - ) - @patch( - 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=False) - ) - def test_reboot_module_not_smartswitch(self): +def test_load_platform_chassis_exception(mock_platform): + mock_platform.side_effect = RuntimeError + result = scripts.reboot_helper.load_platform_chassis() + assert not result + + +def test_reboot_module_chassis_fail(mock_load_platform_chassis): + mock_load_platform_chassis.return_value = False + result = scripts.reboot_helper.reboot_module("DPU1") + assert not result + + +def test_reboot_module_not_smartswitch(mock_load_platform_chassis): + with patch('scripts.reboot_helper.is_smartswitch', return_value=False): result = scripts.reboot_helper.reboot_module("DPU1") - self.assertFalse(result) - - @patch( - 'scripts.reboot_helper.get_all_dpus', MagicMock(return_value=["DPU1"]) - ) - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True) - ) - @patch( - 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) - ) - def test_reboot_module_not_found(self): + assert not result + + +def test_reboot_module_not_found(mock_load_platform_chassis, mock_get_platform_info): + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): result = scripts.reboot_helper.reboot_module("DPU2") - self.assertFalse(result) - - @patch('scripts.reboot_helper.get_all_dpus', - MagicMock(return_value=["DPU1"])) - @patch('scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True)) - @patch('scripts.reboot_helper.is_smartswitch', - MagicMock(return_value=True)) - @patch('scripts.reboot_helper.platform_chassis.reboot', - MagicMock(side_effect=NotImplementedError)) - def test_reboot_module_not_implemented(self): + assert not result + + +def test_reboot_module_not_implemented(mock_load_platform_chassis, mock_is_smartswitch, + mock_get_platform_info, mock_platform_chassis): + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + mock_platform_chassis.reboot.side_effect = NotImplementedError + with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): with pytest.raises(RuntimeError): scripts.reboot_helper.reboot_module("DPU1") - @patch('scripts.reboot_helper.get_all_dpus', MagicMock(return_value=[])) - @patch('scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True)) - @patch('scripts.reboot_helper.is_smartswitch', - MagicMock(return_value=True)) - def test_reboot_module_empty_dpu_list(self): + +def test_reboot_module_empty_dpu_list(mock_load_platform_chassis): + with patch('scripts.reboot_helper.get_all_dpus', return_value=[]): result = scripts.reboot_helper.reboot_module("DPU1") - self.assertFalse(result) - - @patch( - 'scripts.reboot_helper.get_all_dpus', MagicMock(return_value=["DPU1"]) - ) - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True) - ) - @patch( - 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) - ) - @patch( - 'scripts.reboot_helper.platform_chassis.reboot', - MagicMock(return_value=True) - ) - def test_reboot_module_success(self): + assert not result + + +def test_reboot_module_success(mock_load_platform_chassis, mock_is_smartswitch, + mock_get_platform_info, mock_platform_chassis): + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): + mock_platform_chassis.reboot.return_value = True result = scripts.reboot_helper.reboot_module("DPU1") - self.assertTrue(result) + assert result - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=False) - ) - def test_is_dpu_load_platform_chassis_fail(self): - result = scripts.reboot_helper.is_dpu() - self.assertFalse(result) - - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True) - ) - @patch( - 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=False) - ) - def test_is_dpu_not_smartswitch(self): + +def test_is_dpu_load_platform_chassis_fail(mock_load_platform_chassis): + mock_load_platform_chassis.return_value = False + result = scripts.reboot_helper.is_dpu() + assert not result + + +def test_is_dpu_not_smartswitch(mock_load_platform_chassis): + mock_load_platform_chassis.return_value = True + with patch('scripts.reboot_helper.is_smartswitch', return_value=False): result = scripts.reboot_helper.is_dpu() - self.assertFalse(result) - - @patch( - 'os.path.join', MagicMock(return_value="/mock/path/platform.json") - ) - @patch( - 'builtins.open', new_callable=mock_open, - read_data='{".DPU": {}}' - ) - @patch( - 'scripts.reboot_helper.device_info.get_platform_info', - MagicMock(return_value={'platform': 'mock_platform'}) - ) - @patch('scripts.reboot_helper.load_platform_chassis') - @patch('scripts.reboot_helper.is_smartswitch') - def test_is_dpu_found(self, mock_is_smartswitch, - mock_load_platform_chassis, - mock_get_platform_info): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True + assert not result + + +def test_is_dpu_found(mock_is_smartswitch, mock_load_platform_chassis, mock_get_platform_info, mock_open_file): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + + with patch('builtins.open', mock_open(read_data='{".DPU": {}}')): result = scripts.reboot_helper.is_dpu() - self.assertTrue(result) - - @patch( - 'os.path.join', MagicMock(return_value="/mock/path/platform.json") - ) - @patch('builtins.open', new_callable=mock_open, read_data='{}') - @patch( - 'scripts.reboot_helper.device_info.get_platform_info', - MagicMock(return_value={'platform': 'mock_platform'}) - ) - @patch( - 'scripts.reboot_helper.load_platform_chassis', - MagicMock(return_value=True) - ) - @patch( - 'scripts.reboot_helper.is_smartswitch', MagicMock(return_value=True) - ) - def test_is_dpu_not_found(self, mock_is_smartswitch): + assert result + + +def test_is_dpu_not_found(mock_is_smartswitch, mock_load_platform_chassis, mock_get_platform_info, mock_open_file): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + + with patch('builtins.open', mock_open(read_data='{}')): result = scripts.reboot_helper.is_dpu() - self.assertFalse(result) + assert not result + + +def test_pci_detach_module_chassis_fail(mock_load_platform_chassis): + mock_load_platform_chassis.return_value = False + result = scripts.reboot_helper.pci_detach_module("DPU1") + assert not result + + +def test_pci_detach_module_not_smartswitch(mock_load_platform_chassis): + with patch('scripts.reboot_helper.is_smartswitch', return_value=False): + result = scripts.reboot_helper.pci_detach_module("DPU1") + assert not result + + +def test_pci_detach_module_not_found(mock_load_platform_chassis, mock_get_platform_info): + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): + result = scripts.reboot_helper.pci_detach_module("DPU2") + assert not result + + +def test_pci_detach_module_not_implemented(mock_load_platform_chassis, mock_is_smartswitch, + mock_get_platform_info, mock_platform_chassis): + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + mock_platform_chassis.pci_detach.side_effect = NotImplementedError + with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): + with pytest.raises(RuntimeError): + scripts.reboot_helper.pci_detach_module("DPU1") + + +def test_pci_detach_module_empty_dpu_list(mock_load_platform_chassis): + with patch('scripts.reboot_helper.get_all_dpus', return_value=[]): + result = scripts.reboot_helper.pci_detach_module("DPU1") + assert not result + + +def test_pci_detach_module_success(mock_load_platform_chassis, mock_is_smartswitch, + mock_get_platform_info, mock_platform_chassis): + mock_get_platform_info.return_value = {'platform': 'mock_platform'} + with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): + mock_platform_chassis.reboot.return_value = True + result = scripts.reboot_helper.pci_detach_module("DPU1") + assert result + + +def test_main_reboot_command_success(monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1']) + + # Mock dependencies + with patch('scripts.reboot_helper.reboot_module', return_value=True) as mock_reboot_module, \ + patch('builtins.print') as mock_print, \ + patch('sys.exit') as mock_exit: + + scripts.reboot_helper.main() + + # Check that reboot_module was called + mock_reboot_module.assert_called_once_with('DPU1') + mock_exit.assert_not_called() + + +def test_main_reboot_command_fail(monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1']) + + # Mock dependencies + with patch('scripts.reboot_helper.reboot_module', return_value=False), \ + patch('sys.exit') as mock_exit: + + scripts.reboot_helper.main() + + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) + + +def test_main_is_dpu_command_true(monkeypatch): + # Simulate command-line arguments for is_dpu + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'is_dpu']) + + # Mock is_dpu to return True + with patch('scripts.reboot_helper.is_dpu', return_value=True), \ + patch('builtins.print') as mock_print, \ + patch('sys.exit') as mock_exit, \ + patch('builtins.print') as mock_print: + scripts.reboot_helper.main() + + # Check that the message was printed + mock_print.assert_called_with("Script is running on DPU module") + mock_exit.assert_not_called() + + +def test_main_is_dpu_command_false(monkeypatch): + # Simulate command-line arguments for is_dpu + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'is_dpu']) + + # Mock is_dpu to return False + with patch('scripts.reboot_helper.is_dpu', return_value=False), \ + patch('sys.exit') as mock_exit, \ + patch('builtins.print') as mock_print: + scripts.reboot_helper.main() + + # Check that sys.exit was called with EXIT_FAIL + mock_print.assert_not_called() + mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) + + +def test_main_pci_detach_command_success(monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + + # Mock dependencies + with patch('scripts.reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ + patch('builtins.print') as mock_print, \ + patch('sys.exit') as mock_exit: + + scripts.reboot_helper.main() + + # Check that pci_detach_module was called + mock_reboot_module.assert_called_once_with('DPU1') + mock_exit.assert_not_called() + + +def test_main_pci_detach_command_fail(monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + + # Mock dependencies + with patch('scripts.reboot_helper.pci_detach_module', return_value=False), \ + patch('sys.exit') as mock_exit: + + scripts.reboot_helper.main() + + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) + + +def test_main_unknown_command(monkeypatch): + # Simulate an unknown command + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'unknown', 'DPU1']) + # Mock sys.exit and capture print output + with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: + scripts.reboot_helper.main() -if __name__ == '__main__': - unittest.main() + # Check that the unknown command message was printed + mock_print.assert_called_with("Unknown command: unknown") + mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) From d41bf4367e0f59be4e2cee48dc9c7de252a4c833 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 4 Nov 2024 18:44:40 +0000 Subject: [PATCH 04/26] Fix minor indentation --- tests/test_reboot_helper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 18e27613d3..c40796967d 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -193,7 +193,7 @@ def test_pci_detach_module_not_found(mock_load_platform_chassis, mock_get_platfo def test_pci_detach_module_not_implemented(mock_load_platform_chassis, mock_is_smartswitch, - mock_get_platform_info, mock_platform_chassis): + mock_get_platform_info, mock_platform_chassis): mock_get_platform_info.return_value = {'platform': 'mock_platform'} mock_platform_chassis.pci_detach.side_effect = NotImplementedError with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): @@ -208,7 +208,7 @@ def test_pci_detach_module_empty_dpu_list(mock_load_platform_chassis): def test_pci_detach_module_success(mock_load_platform_chassis, mock_is_smartswitch, - mock_get_platform_info, mock_platform_chassis): + mock_get_platform_info, mock_platform_chassis): mock_get_platform_info.return_value = {'platform': 'mock_platform'} with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): mock_platform_chassis.reboot.return_value = True @@ -222,7 +222,6 @@ def test_main_reboot_command_success(monkeypatch): # Mock dependencies with patch('scripts.reboot_helper.reboot_module', return_value=True) as mock_reboot_module, \ - patch('builtins.print') as mock_print, \ patch('sys.exit') as mock_exit: scripts.reboot_helper.main() @@ -283,7 +282,6 @@ def test_main_pci_detach_command_success(monkeypatch): # Mock dependencies with patch('scripts.reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ - patch('builtins.print') as mock_print, \ patch('sys.exit') as mock_exit: scripts.reboot_helper.main() From 68e70abbc034346b0666c11bf9d97004f4f97f7e Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Wed, 6 Nov 2024 23:28:20 +0000 Subject: [PATCH 05/26] Move smartswitch helper functions to new reboot_smartswitch_helper.sh --- scripts/reboot | 186 +-------------- scripts/reboot_helper.py | 182 ++++++--------- scripts/reboot_smartswitch_helper | 264 +++++++++++++++++++++ setup.py | 1 + tests/test_reboot_helper.py | 370 ++++++++++++++++-------------- utilities_common/chassis.py | 9 + 6 files changed, 544 insertions(+), 468 deletions(-) create mode 100644 scripts/reboot_smartswitch_helper diff --git a/scripts/reboot b/scripts/reboot index e4723503ae..3de54703ad 100755 --- a/scripts/reboot +++ b/scripts/reboot @@ -5,6 +5,8 @@ declare -r EXIT_ERROR=1 declare -r WATCHDOG_UTIL="/usr/local/bin/watchdogutil" declare -r PRE_REBOOT_HOOK="pre_reboot_hook" +source reboot_smartswitch_helper + DEVPATH="/usr/share/sonic/device" PLAT_REBOOT="platform_reboot" PLATFORM_UPDATE_REBOOT_CAUSE="platform_update_reboot_cause" @@ -185,135 +187,6 @@ function check_conflict_boot_in_fw_update() fi } -# Function to retrieve DPU IP from CONFIG_DB -function get_dpu_ip() -{ - local DPU_NAME=$1 - dpu_ip=$(sonic-db-cli CONFIG_DB HGET "DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME}" "ips@") - if [ $? -ne 0 ] || [ -z "$dpu_ip" ]; then - echo "Error: Failed to retrieve DPU IP address for ${DPU_NAME}" - exit ${EXIT_ERROR} - fi - debug "$DPU_NAME ip: $dpu_ip" -} - -# Function to retrieve GNMI port from CONFIG_DB -function get_gnmi_port() { - local DPU_NAME=$1 - port=$(sonic-db-cli CONFIG_DB HGET "DPU_PORT|$DPU_NAME" "gnmi") - if [ $? -ne 0 ] || [ -z "$port" ]; then - echo "Error: Failed to retrieve GNMI port" - exit ${EXIT_ERROR} - fi - debug "$DPU_NAME GNMI port:$port" -} - -# Function to get reboot status from DPU -function get_reboot_status() -{ - local dpu_ip=$1 - local port=$2 - reboot_status=$(docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc RebootStatus) - if [ $? -ne 0 ] || [ -z "$reboot_status" ]; then - echo "Error: Failed to send reboot status command to DPU ${DPU_NAME}" - exit ${EXIT_ERROR} - fi - debug "$reboot_status" -} - -# Function to retrieve DPU bus info from platform JSON -function get_dpu_bus_info() { - local DPU_NAME=$1 - DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") - if [ -z "$DPU_BUS_INFO" ]; then - echo "Error: bus_info not found for DPU ${DPU_NAME}" - exit ${EXIT_ERROR} - fi - debug "$DPU_NAME : $DPU_BUS_INFO" -} - -#Function to detach PCI module -function pci_detach_module() { - local DPU_NAME=$1 - local DPU_BUS_INFO=$2 - status = $(python3 -c "import reboot_helper; reboot_helper.pci_detach_module('${DPU_NAME}')") - if [ -z "$status" ] || [ "$status" = "false" ]; then - echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove - fi -} - -# Function to reboot the platform module -function reboot_platform_module() { - local DPU_NAME=$1 - reboot_status=$(python3 -c "import reboot_helper; reboot_helper.reboot_module('${DPU_NAME}')") - if [ -z "$reboot_status" ] || [ "$reboot_status" = "false" ]; then - echo "Error: Failed to reboot the platform" - exit ${EXIT_ERROR} - fi -} - -function reboot_dpu_module() -{ - local DPU_NAME=$1 - local DPU_INDEX=${DPU_NAME//[!0-9]/} - - debug "User requested rebooting device ${DPU_NAME} ..." - - # Retrieve DPU IP and GNMI port - dpu_ip=$(get_dpu_ip "${DPU_NAME}") - port=$(get_gnmi_port "${DPU_NAME}") - - if [ -z "$dpu_ip" ] || [ -z "$port" ]; then - echo "Error: Failed to retrieve DPU IP or GNMI port for ${DPU_NAME}" - exit ${EXIT_ERROR} - fi - - # Issue GNOI client command to reboot the DPU - docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc Reboot -jsonin '{"method":3}' - if [ $? -ne 0 ]; then - echo "Error: Failed to send reboot command to DPU ${DPU_NAME}" - exit ${EXIT_ERROR} - fi - - # Retrieve dpu_halt_services_timeout value using jq - dpu_halt_services_timeout=$(jq -r '.dpu_halt_services_timeout' "$PLATFORM_JSON_PATH" 2>/dev/null) - if [ $? -ne 0 ]; then - echo "Error: Failed to retrieve dpu_halt_services_timeout from ${PLATFORM_JSON_PATH}" - exit ${EXIT_ERROR} - fi - - # Poll on reboot status response with a timeout mechanism - poll_interval=5 - waited_time=0 - while true; do - reboot_status=$(get_reboot_status "${dpu_ip}" "${port}") - debug "GNOI RebootStatus response ${reboot_status}" - is_reboot_active=$(echo "$reboot_status" | grep "active" | awk '{print $2}') - if [ "$is_reboot_active" == "false" ]; then - break - fi - - sleep "$poll_interval" - waited_time=$((waited_time + poll_interval)) - if [ $waited_time -ge $dpu_halt_services_timeout ]; then - echo "Error: Timeout waiting for DPU ${DPU_NAME} to finish rebooting" - exit ${EXIT_ERROR} - fi - done - - # Check if DPU exists and retrieve bus info - DPU_BUS_INFO=$(get_dpu_bus_info "${DPU_NAME}") - - # Update STATE_DB and handle PCIe removal and rescan - sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' - - pci_detach_module "${DPU_NAME}" "${DPU_BUS_INFO}" - reboot_platform_module "${DPU_NAME}" - echo 1 > /sys/bus/pci/rescan - - sonic-db-cli state_db del "PCIE_DETACH_INFO|${DPU_NAME}" -} - function parse_options() { while getopts "h?vfpd" opt; do @@ -360,56 +233,6 @@ function linecard_reboot_notify_supervisor() fi } -# Function to reboot all DPUs in parallel -function reboot_all_dpus() { - local NUM_DPU=$1 - - for (( i=0; i<"$NUM_DPU"; i++ )); do - echo "Rebooting DPU module dpu$i" - reboot_dpu_module "dpu$i" & - done - wait -} - -# Function to handle scenarios on smart switch -function handle_smart_switch() { - if [ -f "$PLATFORM_JSON_PATH" ]; then - NUM_DPU=$(jq -r '.DPUS | length' "$PLATFORM_JSON_PATH" 2>/dev/null) - if [ "$NUM_DPU" -gt 0 ]; then - SMART_SWITCH="yes" - fi - fi - - if [[ "$REBOOT_DPU" == "yes" ]]; then - if [[ "$SMART_SWITCH" == "yes" ]]; then - echo "User requested to reboot the device ${DPU_MODULE_NAME}" - reboot_dpu_module "$DPU_MODULE_NAME" - else - echo "Invalid '-d' option specified for a non-smart switch" - exit ${EXIT_ERROR} - fi - fi - - is_dpu=$(python3 -c "import reboot_helper; reboot_helper.is_dpu()") - debug "Is the platform DPU: $is_dpu" - - # Check if system is a DPU and handle -p option accordingly - if [[ "$is_dpu" == "True" && "$PRE_SHUTDOWN" != "yes" ]]; then - echo "Invalid, '-p' option not specified for a DPU" - exit ${EXIT_ERROR} - elif [[ "$is_dpu" != "True" && "$PRE_SHUTDOWN" == "yes" ]]; then - echo "Invalid '-p' option specified for a non-DPU" - exit ${EXIT_ERROR} - fi - - if [[ "$SMART_SWITCH" == "yes" ]]; then - # If not a DPU, reboot all DPUs in parallel - if [[ "$is_dpu" != "True" ]]; then - reboot_all_dpus "$NUM_DPU" - fi - fi -} - parse_options $@ # Exit if not superuser @@ -420,7 +243,10 @@ fi debug "User requested rebooting device ..." -handle_smart_switch +smart_switch_result=$(handle_smart_switch "$REBOOT_DPU" "$PRE_SHUTDOWN" "$DPU_MODULE_NAME") +if [[ $smart_switch_result -ne 0 ]]; then + exit $smart_switch_result +fi check_conflict_boot_in_fw_update diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 7f1506b728..78fe86c00b 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -4,13 +4,10 @@ # # Utility helper for reboot within SONiC -import os -import re import sys -import json import sonic_platform -from sonic_py_common import logger, device_info -from utilities_common.chassis import is_smartswitch +from sonic_py_common import logger +from utilities_common.chassis import is_smartswitch, get_dpu_list SYSLOG_IDENTIFIER = "reboot_helper" @@ -44,65 +41,48 @@ def load_platform_chassis(): return False return True except Exception as e: - log.log_error(f"Failed to instantiate Chassis: {e}") + log.log_error("Failed to instantiate Chassis: {}".format(str(e))) return False -def load_platform_json(platform): - """Load and return the content of platform.json.""" - platform_json_path = os.path.join("/usr/share/sonic/device", platform, "platform.json") - try: - with open(platform_json_path, 'r') as platform_json: - return json.load(platform_json) - except FileNotFoundError: - log.log_error("platform.json not found") - except json.JSONDecodeError: - log.log_error("Failed to parse platform.json") - return None - - -def get_all_dpus(): +def load_and_verify(module_name): """ - Retrieve a list of all DPUs (Data Processing Units) in the system. - This function checks if the platform is a smartswitch and then loads the platform.json - file to extract the DPUs dictionary. It converts the DPU names to uppercase and returns - them as a list. + Load the platform chassis and verify the required parameters. + + Args: + module_name (str): The name of the module to verify. Returns: - list: A list of DPU names in uppercase. + bool: True if platform chassis is successfully loaded and required parameters are verified, False otherwise. """ - if not is_smartswitch(): - return [] + # Load the platform chassis if not already loaded + if not load_platform_chassis(): + return False - platform_info = device_info.get_platform_info() - platform = platform_info.get('platform') - if not platform: - log.log_error("Platform does not exist in platform_info") - return [] + if not is_smartswitch(): + log.log_error("Platform is not a smartswitch") + return False - config_data = load_platform_json(platform) - if config_data is None: - return [] + dpu_list = get_dpu_list() + if module_name.lower() not in dpu_list: + log.log_error("Module {} not found".format(module_name)) + return False - dpus = config_data.get("DPUS", []) - return [dpu.upper() for dpu in dpus] + return True -def reboot_module(module_name): +def reboot_dpu(module_name, reboot_type): """ Reboot the specified module by invoking the platform API. Args: module_name (str): The name of the module to reboot. + reboot_type (str): The type of reboot requested for the module. Returns: bool: True if the reboot command was successfully sent, False otherwise. """ - # Load the platform chassis if not already loaded - load_platform_chassis() - - if not is_smartswitch(): - log.log_error("Platform is not a smartswitch to reboot module") + if not load_and_verify(module_name): return False # Attempt to reboot the module @@ -110,45 +90,15 @@ def reboot_module(module_name): log.log_error("Reboot method not found in platform chassis") return False - if module_name.upper() not in get_all_dpus(): - log.log_error("Module {} not found".format(module_name)) - return False - - log.log_info("Rebooting module {}...".format(module_name)) + log.log_info("Rebooting module {} with reboot_type {}...".format(module_name, reboot_type)) try: - platform_chassis.reboot(module_name) - log.log_info("Reboot command sent for module {}".format(module_name)) - except NotImplementedError: - raise NotImplementedError("Reboot not implemented on this platform: {type(e).__name__}") - except Exception as e: + platform_chassis.reboot(module_name, reboot_type) + log.log_info("Reboot command sent for module {} with reboot_type {}".format(module_name, reboot_type)) + return True + except (NotImplementedError, AttributeError) as e: log.log_error("Unexpected error occurred while rebooting module {}: {}".format(module_name, e)) return False - return True - - -def is_dpu(): - """Check if script is running on DPU module""" - - # Load the platform chassis if not already loaded - load_platform_chassis() - - if not is_smartswitch(): - return False - - # Load platform.json - platform_info = device_info.get_platform_info() - platform = platform_info.get('platform') - if not platform: - log.log_error("Platform does not exist in platform_info") - return False - - config_data = load_platform_json(platform) - if config_data is None: - return False - - return any(re.search(r'\.DPU$', key) for key in config_data.keys()) - def pci_detach_module(module_name): """ @@ -160,12 +110,7 @@ def pci_detach_module(module_name): Returns: bool: True if the detach command was successfully sent, False otherwise. """ - - # Load the platform chassis if not already loaded - load_platform_chassis() - - if not is_smartswitch(): - log.log_error("Platform is not a smartswitch to detach module") + if not load_and_verify(module_name): return False # Attempt to detach the module @@ -173,47 +118,64 @@ def pci_detach_module(module_name): log.log_error("PCI detach method not found in platform chassis") return False - if module_name.upper() not in get_all_dpus(): - log.log_error("Module {} not found".format(module_name)) - return False - log.log_info("Detaching module {}...".format(module_name)) try: platform_chassis.pci_detach(module_name) log.log_info("Detach command sent for module {}".format(module_name)) return True - except NotImplementedError: - raise NotImplementedError("PCI detach not implemented on this platform: {type(e).__name__}") - except Exception as e: + except (NotImplementedError, AttributeError) as e: + log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) + return False + + +def pci_reattach_module(module_name): + """ + Rescan the specified module by invoking the platform API. + + Args: + module_name (str): The name of the module to rescan. + + Returns: + bool: True if the rescan command was successfully sent, False otherwise. + """ + if not load_and_verify(module_name): + return False + + # Attempt to detach the module + if not hasattr(platform_chassis, 'pci_reattach'): + log.log_error("PCI reattach method not found in platform chassis") + return False + + log.log_info("Rescaning module {}...".format(module_name)) + try: + platform_chassis.pci_reattach(module_name) + log.log_info("Rescan command sent for module {}".format(module_name)) + return True + except (NotImplementedError, AttributeError) as e: log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) return False def main(): - if len(sys.argv) < 2: - print("Usage: reboot_helper.py [module_name]") + if len(sys.argv) < 3: + print("Usage: reboot_helper.py [reboot_type]") sys.exit(EXIT_FAIL) command = sys.argv[1] + module_name = sys.argv[2] - if command == "reboot" or command == "pci_detach": - if len(sys.argv) < 3: - print("Usage: reboot_helper.py ") + if command == "reboot": + if len(sys.argv) < 4: + print("Usage: reboot_helper.py reboot ") + sys.exit(EXIT_FAIL) + reboot_type = sys.argv[3] + if not reboot_dpu(module_name, reboot_type): + sys.exit(EXIT_FAIL) + elif command == "pci_detach": + if not pci_detach_module(module_name): sys.exit(EXIT_FAIL) - module_name = sys.argv[2] - - if command == "reboot": - success = reboot_module(module_name) - if not success: - sys.exit(EXIT_FAIL) - elif command == "pci_detach": - success = pci_detach_module(module_name) - if not success: - sys.exit(EXIT_FAIL) - elif command == "is_dpu": - if is_dpu(): - print("Script is running on DPU module") - else: + elif command == "pci_reattach": + if not pci_reattach_module(module_name): sys.exit(EXIT_FAIL) else: print(f"Unknown command: {command}") diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper new file mode 100644 index 0000000000..2c5a932b1b --- /dev/null +++ b/scripts/reboot_smartswitch_helper @@ -0,0 +1,264 @@ +#!/bin/bash + +declare -r GNMI_PORT=50052 +declare -r MODULE_REBOOT_DPU="DPU" +declare -r MODULE_REBOOT_SMARTSWITCH="SMARTSWITCH" + +# Function to check if running on smart switch +function is_smartswitch() +{ + return $(python3 -c "from utilities_common.chassis import is_smartswitch; is_smartswitch()") +} + +# Function to check if running on DPU +function is_dpu() +{ + return $(python3 -c "from utilities_common.chassis import is_dpu; is_dpu()") +} + +# Function to retrieve number of DPUs +function get_num_dpu() +{ + return $(python3 -c "from utilities_common.chassis import get_num_dpu; get_num_dpus()") +} + +# Function to retrieve DPU IP from CONFIG_DB +function get_dpu_ip() +{ + local DPU_NAME=$1 + dpu_ip=$(sonic-db-cli CONFIG_DB HGET "DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME}" "ips@") + if [ $? -ne 0 ] || [ -z "$dpu_ip" ]; then + echo "Error: Failed to retrieve DPU IP address for ${DPU_NAME}" + return ${EXIT_ERROR} + fi + debug "$DPU_NAME ip: $dpu_ip" +} + +# Function to retrieve GNMI port from CONFIG_DB +function get_gnmi_port() { + local DPU_NAME=$1 + port=$(sonic-db-cli CONFIG_DB HGET "DPU_PORT|$DPU_NAME" "gnmi") + if [ $? -ne 0 ] || [ -z "$port" ]; then + debug "Error: Failed to retrieve GNMI port" + port=$GNMI_PORT #Hardcoded default GNMI port + return ${EXIT_ERROR} + fi + debug "$DPU_NAME GNMI port:$port" +} + +# Function to get reboot status from DPU +function get_reboot_status() +{ + local dpu_ip=$1 + local port=$2 + reboot_status=$(docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc RebootStatus) + if [ $? -ne 0 ] || [ -z "$reboot_status" ]; then + echo "Error: Failed to send reboot status command to DPU ${DPU_NAME}" + return ${EXIT_ERROR} + fi + debug "$reboot_status" +} + +# Function to retrieve DPU bus info from platform JSON +function get_dpu_bus_info() { + local DPU_NAME=$1 + DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") + if [ -z "$DPU_BUS_INFO" ]; then + echo "Error: bus_info not found for DPU ${DPU_NAME}" + exit ${EXIT_ERROR} + fi + debug "$DPU_NAME : $DPU_BUS_INFO" +} + +#Function to detach PCI module +function pci_detach_module() { + local DPU_NAME=$1 + + status = $(python3 -c "import reboot_helper; reboot_helper.pci_detach_module('${DPU_NAME}')") + if [ -z "$status" ] || [ "$status" = "false" ]; then + # Check if DPU exists and retrieve bus info + DPU_BUS_INFO=$(get_dpu_bus_info "${DPU_NAME}") + echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove + fi +} + +#Function to rescan PCI module +function pci_reattach_module() { + local DPU_NAME=$1 + + status = $(python3 -c "import reboot_helper; reboot_helper.pci_reattach_module('${DPU_NAME}')") + if [ -z "$status" ] || [ "$status" = "false" ]; then + echo 1 > /sys/bus/pci/rescan + fi +} + +# Function to reboot DPU +function reboot_dpu_platform() { + local DPU_NAME=$1 + local REBOOT_TYPE=$2 + + local reboot_status=$(python3 -c "import reboot_helper; reboot_helper.reboot_dpu('${DPU_NAME}', '${REBOOT_TYPE}')") + if [ -z "$reboot_status" ] || [ "$reboot_status" = "false" ]; then + echo "Error: Failed to reboot the DPU" + return ${EXIT_ERROR} + fi +} + +#Function to wait for DPU reboot status +function wait_for_dpu_reboot_status() { + local dpu_ip=$1 + local port=$2 + + # Retrieve dpu_halt_services_timeout value using jq + local dpu_halt_services_timeout=$(jq -r '.dpu_halt_services_timeout' "$PLATFORM_JSON_PATH" 2>/dev/null) + if [ $? -ne 0 ]; then + echo "Error: Failed to retrieve dpu_halt_services_timeout from ${PLATFORM_JSON_PATH}" + return ${EXIT_ERROR} + fi + + # Poll on reboot status response with a timeout mechanism + local poll_interval=5 + local waited_time=0 + local reboot_status + while true; do + reboot_status=$(get_reboot_status "${dpu_ip}" "${port}") + debug "GNOI RebootStatus response ${reboot_status}" + is_reboot_active=$(echo "$reboot_status" | grep "active" | awk '{print $2}') + if [ "$is_reboot_active" == "false" ]; then + break + fi + + sleep "$poll_interval" + waited_time=$((waited_time + poll_interval)) + if [ $waited_time -ge $dpu_halt_services_timeout ]; then + echo "Error: Timeout waiting for DPU ${DPU_NAME} to finish rebooting" + return ${EXIT_ERROR} + fi + done +} + +# Function to send reboot command to DPU +function gnmi_reboot_dpu() +{ + # Retrieve DPU IP and GNMI port + dpu_ip=$(get_dpu_ip "${DPU_NAME}") + port=$(get_gnmi_port "${DPU_NAME}") + + if [ -z "$dpu_ip" ] || [ -z "$port" ]; then + echo "Error: Failed to retrieve DPU IP or GNMI port for ${DPU_NAME}" + return ${EXIT_ERROR} + fi + + docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc Reboot -jsonin '{"method":3}' + if [ $? -ne 0 ]; then + echo "Error: Failed to send reboot command to DPU ${DPU_NAME}" + return ${EXIT_ERROR} + fi + + wait_for_dpu_reboot_status "${dpu_ip}" "${port}" +} + +function reboot_dpu() +{ + local DPU_NAME=$1 + local REBOOT_TYPE=$2 + local DPU_INDEX=${DPU_NAME//[!0-9]/} + + debug "User requested rebooting device ${DPU_NAME} ..." + + # Send reboot command to DPU + gnoi_reboot_dpu "${dpu_ip}" "${port}" + + # Update STATE_DB and handle PCIe removal and rescan + sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' + + pci_detach_module "${DPU_NAME}" + reboot_dpu_platform "${DPU_NAME}" + pci_reattach_module "${DPU_NAME}" + + sonic-db-cli state_db del "PCIE_DETACH_INFO|${DPU_NAME}" +} + +# Function to reboot all DPUs in parallel +function reboot_all_dpus() { + local NUM_DPU=$1 + + if [[ -z $NUM_DPU ]]; then + echo "Error: Failed to retrieve number of DPUs or no DPUs found" + return + fi + + for (( i=0; i<"$NUM_DPU"; i++ )); do + echo "Rebooting DPU module dpu$i" + reboot_dpu "dpu$i" "$MODULE_REBOOT_SMARTSWITCH" & + done + wait +} + +# Function to verify DPU module name +function verify_dpu_module_name() { + local DPU_MODULE_NAME=$1 + local NUM_DPU=$2 + + if [[ -z "$DPU_MODULE_NAME" ]]; then + echo "Error: DPU module name not provided" + return $EXIT_ERROR + fi + + NUM_DPU=$((NUM_DPU - 1)) + if [[ ! "$DPU_MODULE_NAME" =~ ^dpu[0-$NUM_DPU]$ ]]; then + echo "Error: Invalid DPU module name provided" + return $EXIT_ERROR + fi +} + +# Function to handle scenarios on smart switch +function handle_smart_switch() { + local REBOOT_DPU=$1 + local PRE_SHUTDOWN=$2 + local DPU_NAME=$3 + + NUM_DPU=$(get_num_dpu) + + if [[ "$REBOOT_DPU" == "yes" ]]; then + if [[ "$PRE_SHUTDOWN" == "yes" ]]; then + echo "Invalid, '-p' option specified" + return $EXIT_ERROR + fi + + if is_smartswitch; then + if [[ -z $NUM_DPU ]]; then + echo "Error: Failed to retrieve number of DPUs or no DPUs found" + return $EXIT_ERROR + fi + + DPU_MODULE_NAME="${DPU_NAME,,}" + verify_dpu_module_name "$DPU_MODULE_NAME" "$NUM_DPU" + + echo "User requested to reboot the device ${DPU_MODULE_NAME}" + reboot_dpu "$DPU_MODULE_NAME" "$MODULE_REBOOT_DPU" + else + echo "Invalid '-d' option specified for a non-smart switch" + return $EXIT_ERROR + fi + fi + + debug "Is the platform DPU: $(is_dpu)" + + # Check if system is a DPU and handle -p option accordingly + if is_dpu; then + if [[ "$PRE_SHUTDOWN" != "yes" ]]; then + echo "Invalid, '-p' option not specified for a DPU" + return ${EXIT_ERROR} + elif [[ "$PRE_SHUTDOWN" == "yes" ]]; then + echo "Invalid '-p' option specified for a non-DPU" + return ${EXIT_ERROR} + fi + return ${EXIT_SUCCESS} + fi + + # If the system is a smart switch, reboot all DPUs in parallel + if is_smartswitch; then + reboot_all_dpus "$NUM_DPU" "$MODULE_REBOOT_SMARTSWITCH" + fi +} diff --git a/setup.py b/setup.py index c1e579857c..c0e5f006bb 100644 --- a/setup.py +++ b/setup.py @@ -161,6 +161,7 @@ 'scripts/psushow', 'scripts/queuestat', 'scripts/reboot', + 'scripts/reboot_smartswitch_helper', 'scripts/reboot_helper.py', 'scripts/route_check.py', 'scripts/route_check_test.sh', diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index c40796967d..3f46054c26 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -1,33 +1,29 @@ # tests/test_reboot_helper.py - import os import sys import pytest from unittest.mock import patch, MagicMock, mock_open -test_path = os.path.dirname(os.path.abspath(__file__)) -modules_path = os.path.dirname(test_path) -sys.path.insert(0, modules_path) - sys.modules['sonic_platform'] = MagicMock() -import scripts.reboot_helper # noqa: E402 +sys.path.append("scripts") +import reboot_helper @pytest.fixture def mock_load_platform_chassis(): - with patch('scripts.reboot_helper.load_platform_chassis', return_value=True) as mock: + with patch('reboot_helper.load_platform_chassis', return_value=True) as mock: yield mock @pytest.fixture -def mock_is_smartswitch(): - with patch('scripts.reboot_helper.is_smartswitch', return_value=True) as mock: +def mock_load_and_verify(): + with patch('reboot_helper.load_and_verify', return_value=True) as mock: yield mock @pytest.fixture -def mock_get_platform_info(): - with patch('scripts.reboot_helper.device_info.get_platform_info') as mock: +def mock_is_smartswitch(): + with patch('reboot_helper.is_smartswitch', return_value=True) as mock: yield mock @@ -38,242 +34,178 @@ def mock_open_file(): @pytest.fixture def mock_platform_chassis(): - with patch('scripts.reboot_helper.platform_chassis') as mock: + with patch('reboot_helper.platform_chassis') as mock: yield mock @pytest.fixture def mock_platform(): - with patch('scripts.reboot_helper.sonic_platform.platform.Platform') as mock: + with patch('reboot_helper.sonic_platform.platform.Platform') as mock: yield mock -def test_get_all_dpus_not_smartswitch(mock_is_smartswitch): - mock_is_smartswitch.return_value = False - dpu_list = scripts.reboot_helper.get_all_dpus() - assert dpu_list == [] - - -def test_get_all_dpus_invalid_platform_info(mock_is_smartswitch, mock_get_platform_info): - mock_is_smartswitch.return_value = True - mock_get_platform_info.return_value = {'platform': None} - dpu_list = scripts.reboot_helper.get_all_dpus() - assert dpu_list == [] - - -def test_get_all_dpus_file_not_found(mock_open_file): - with patch('builtins.open', side_effect=FileNotFoundError): - dpu_list = scripts.reboot_helper.get_all_dpus() - assert dpu_list == [] - - -def test_get_all_dpus_malformed_json(mock_is_smartswitch, mock_get_platform_info, mock_open_file): - mock_is_smartswitch.return_value = True - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - - with patch('builtins.open', mock_open(read_data='Invalid JSON')): - dpu_list = scripts.reboot_helper.get_all_dpus() - assert dpu_list == [] - - -def test_get_all_dpus_valid_json(mock_is_smartswitch, mock_get_platform_info, mock_open_file): - mock_is_smartswitch.return_value = True - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - - with patch('builtins.open', mock_open(read_data='{"DPUS": {"DPU1": {}, "DPU2": {}}}')): - dpu_list = scripts.reboot_helper.get_all_dpus() - assert dpu_list == ["DPU1", "DPU2"] +@pytest.fixture +def mock_get_dpu_list(): + with patch('reboot_helper.get_dpu_list', return_value=["dpu1"]) as mock: + yield mock def test_load_platform_chassis_success(mock_platform_chassis): mock_platform_chassis.get_chassis.return_value = MagicMock() - result = scripts.reboot_helper.load_platform_chassis() + result = reboot_helper.load_platform_chassis() assert result def test_load_platform_chassis_exception(mock_platform): mock_platform.side_effect = RuntimeError - result = scripts.reboot_helper.load_platform_chassis() + result = reboot_helper.load_platform_chassis() assert not result -def test_reboot_module_chassis_fail(mock_load_platform_chassis): +def test_load_and_verify_chassis_fail(): mock_load_platform_chassis.return_value = False - result = scripts.reboot_helper.reboot_module("DPU1") - assert not result + assert not reboot_helper.load_and_verify("DPU1") -def test_reboot_module_not_smartswitch(mock_load_platform_chassis): - with patch('scripts.reboot_helper.is_smartswitch', return_value=False): - result = scripts.reboot_helper.reboot_module("DPU1") - assert not result - - -def test_reboot_module_not_found(mock_load_platform_chassis, mock_get_platform_info): - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): - result = scripts.reboot_helper.reboot_module("DPU2") - assert not result +def test_load_and_verify_not_smartswitch(mock_is_smartswitch, mock_load_platform_chassis): + mock_is_smartswitch.return_value = False + mock_load_platform_chassis.return_value = True + assert not reboot_helper.load_and_verify("DPU1") -def test_reboot_module_not_implemented(mock_load_platform_chassis, mock_is_smartswitch, - mock_get_platform_info, mock_platform_chassis): - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - mock_platform_chassis.reboot.side_effect = NotImplementedError - with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): - with pytest.raises(RuntimeError): - scripts.reboot_helper.reboot_module("DPU1") +def test_load_and_verify_not_found(mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_dpu_list.return_value = ["dpu1"] + assert not reboot_helper.load_and_verify("DPU2") -def test_reboot_module_empty_dpu_list(mock_load_platform_chassis): - with patch('scripts.reboot_helper.get_all_dpus', return_value=[]): - result = scripts.reboot_helper.reboot_module("DPU1") - assert not result +def test_load_and_verify_empty_dpu_list(mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_dpu_list.return_value = [] -def test_reboot_module_success(mock_load_platform_chassis, mock_is_smartswitch, - mock_get_platform_info, mock_platform_chassis): - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): - mock_platform_chassis.reboot.return_value = True - result = scripts.reboot_helper.reboot_module("DPU1") - assert result + assert not reboot_helper.load_and_verify("DPU1") -def test_is_dpu_load_platform_chassis_fail(mock_load_platform_chassis): - mock_load_platform_chassis.return_value = False - result = scripts.reboot_helper.is_dpu() - assert not result +def test_load_and_verify_success(mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_dpu_list.return_value = ["dpu1"] + result = reboot_helper.load_and_verify("DPU1") + assert result -def test_is_dpu_not_smartswitch(mock_load_platform_chassis): - mock_load_platform_chassis.return_value = True - with patch('scripts.reboot_helper.is_smartswitch', return_value=False): - result = scripts.reboot_helper.is_dpu() - assert not result +def test_reboot_dpu_load_and_verify_fail(mock_load_and_verify): + mock_load_and_verify.return_value = False + result = reboot_helper.reboot_dpu("DPU1", "DPU") + assert not result -def test_is_dpu_found(mock_is_smartswitch, mock_load_platform_chassis, mock_get_platform_info, mock_open_file): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - with patch('builtins.open', mock_open(read_data='{".DPU": {}}')): - result = scripts.reboot_helper.is_dpu() - assert result +def test_reboot_dpu_not_implemented(mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.reboot.side_effect = NotImplementedError + assert not reboot_helper.reboot_dpu("DPU1", 'DPU') -def test_is_dpu_not_found(mock_is_smartswitch, mock_load_platform_chassis, mock_get_platform_info, mock_open_file): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_platform_info.return_value = {'platform': 'mock_platform'} +def test_reboot_dpu_success(mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.reboot.return_value = True + assert reboot_helper.reboot_dpu("DPU1", "DPU") - with patch('builtins.open', mock_open(read_data='{}')): - result = scripts.reboot_helper.is_dpu() - assert not result +def test_pci_detach_module_load_and_verify_fail(mock_load_and_verify): + mock_load_and_verify.return_value = False + assert not reboot_helper.pci_detach_module("DPU1") -def test_pci_detach_module_chassis_fail(mock_load_platform_chassis): - mock_load_platform_chassis.return_value = False - result = scripts.reboot_helper.pci_detach_module("DPU1") - assert not result +def test_pci_detach_module_not_implemented(mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.pci_detach.side_effect = NotImplementedError -def test_pci_detach_module_not_smartswitch(mock_load_platform_chassis): - with patch('scripts.reboot_helper.is_smartswitch', return_value=False): - result = scripts.reboot_helper.pci_detach_module("DPU1") - assert not result + assert not reboot_helper.pci_detach_module("DPU1") -def test_pci_detach_module_not_found(mock_load_platform_chassis, mock_get_platform_info): - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): - result = scripts.reboot_helper.pci_detach_module("DPU2") - assert not result +def test_pci_detach_module_success(mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.pci_detach.return_value = True + assert reboot_helper.pci_detach_module("DPU1") -def test_pci_detach_module_not_implemented(mock_load_platform_chassis, mock_is_smartswitch, - mock_get_platform_info, mock_platform_chassis): - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - mock_platform_chassis.pci_detach.side_effect = NotImplementedError - with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): - with pytest.raises(RuntimeError): - scripts.reboot_helper.pci_detach_module("DPU1") +def test_pci_reattach_module_load_and_verify_fail(mock_load_and_verify): + mock_load_and_verify.return_value = False + assert not reboot_helper.pci_reattach_module("DPU1") -def test_pci_detach_module_empty_dpu_list(mock_load_platform_chassis): - with patch('scripts.reboot_helper.get_all_dpus', return_value=[]): - result = scripts.reboot_helper.pci_detach_module("DPU1") - assert not result +def test_pci_reattach_module_not_implemented(mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.pci_reattach.side_effect = NotImplementedError + assert not reboot_helper.pci_reattach_module("DPU1") -def test_pci_detach_module_success(mock_load_platform_chassis, mock_is_smartswitch, - mock_get_platform_info, mock_platform_chassis): - mock_get_platform_info.return_value = {'platform': 'mock_platform'} - with patch('scripts.reboot_helper.get_all_dpus', return_value=["DPU1"]): - mock_platform_chassis.reboot.return_value = True - result = scripts.reboot_helper.pci_detach_module("DPU1") - assert result +def test_pci_reattach_module_success(mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.pci_reattach.return_value = True + assert reboot_helper.pci_reattach_module("DPU1") -def test_main_reboot_command_success(monkeypatch): +def test_main_reboot_command_fail(monkeypatch): # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1']) + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) # Mock dependencies - with patch('scripts.reboot_helper.reboot_module', return_value=True) as mock_reboot_module, \ + with patch('reboot_helper.reboot_dpu', return_value=False), \ patch('sys.exit') as mock_exit: - scripts.reboot_helper.main() + reboot_helper.main() - # Check that reboot_module was called - mock_reboot_module.assert_called_once_with('DPU1') - mock_exit.assert_not_called() + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) -def test_main_reboot_command_fail(monkeypatch): +def test_main_reboot_command_type_fail(monkeypatch): # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1']) + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'UNKNOWN']) # Mock dependencies - with patch('scripts.reboot_helper.reboot_module', return_value=False), \ + with patch('reboot_helper.reboot_dpu', return_value=False), \ patch('sys.exit') as mock_exit: - scripts.reboot_helper.main() + reboot_helper.main() # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + +def test_main_reboot_command_success(monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) -def test_main_is_dpu_command_true(monkeypatch): - # Simulate command-line arguments for is_dpu - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'is_dpu']) + # Mock dependencies + with patch('reboot_helper.reboot_dpu', return_value=True) as mock_reboot_dpu, \ + patch('sys.exit') as mock_exit: - # Mock is_dpu to return True - with patch('scripts.reboot_helper.is_dpu', return_value=True), \ - patch('builtins.print') as mock_print, \ - patch('sys.exit') as mock_exit, \ - patch('builtins.print') as mock_print: - scripts.reboot_helper.main() + reboot_helper.main() - # Check that the message was printed - mock_print.assert_called_with("Script is running on DPU module") + # Check that reboot_module was called + mock_reboot_dpu.assert_called_once_with('DPU1', 'DPU') mock_exit.assert_not_called() -def test_main_is_dpu_command_false(monkeypatch): - # Simulate command-line arguments for is_dpu - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'is_dpu']) +def test_main_pci_detach_command_fail(monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + + # Mock dependencies + with patch('reboot_helper.pci_detach_module', return_value=False), \ + patch('sys.exit') as mock_exit: - # Mock is_dpu to return False - with patch('scripts.reboot_helper.is_dpu', return_value=False), \ - patch('sys.exit') as mock_exit, \ - patch('builtins.print') as mock_print: - scripts.reboot_helper.main() + reboot_helper.main() # Check that sys.exit was called with EXIT_FAIL - mock_print.assert_not_called() - mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) def test_main_pci_detach_command_success(monkeypatch): @@ -281,28 +213,43 @@ def test_main_pci_detach_command_success(monkeypatch): monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) # Mock dependencies - with patch('scripts.reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ + with patch('reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ patch('sys.exit') as mock_exit: - scripts.reboot_helper.main() + reboot_helper.main() # Check that pci_detach_module was called mock_reboot_module.assert_called_once_with('DPU1') mock_exit.assert_not_called() -def test_main_pci_detach_command_fail(monkeypatch): +def test_main_pci_reattach_command_fail(monkeypatch): # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) # Mock dependencies - with patch('scripts.reboot_helper.pci_detach_module', return_value=False), \ + with patch('reboot_helper.pci_reattach_module', return_value=False), \ patch('sys.exit') as mock_exit: - scripts.reboot_helper.main() + reboot_helper.main() # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + + +def test_main_pci_reattach_command_success(monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) + + # Mock dependencies + with patch('reboot_helper.pci_reattach_module', return_value=True) as mock_reboot_module, \ + patch('sys.exit') as mock_exit: + + reboot_helper.main() + + # Check that pci_reattach_module was called + mock_reboot_module.assert_called_once_with('DPU1') + mock_exit.assert_not_called() def test_main_unknown_command(monkeypatch): @@ -311,8 +258,75 @@ def test_main_unknown_command(monkeypatch): # Mock sys.exit and capture print output with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: - scripts.reboot_helper.main() + reboot_helper.main() # Check that the unknown command message was printed mock_print.assert_called_with("Unknown command: unknown") - mock_exit.assert_called_once_with(scripts.reboot_helper.EXIT_FAIL) + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + + +def test_main_no_arguments(monkeypatch): + # Simulate no command-line arguments + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py']) + + # Mock sys.exit and capture print output + with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: + reboot_helper.main() + + # Check that the usage message was printed + mock_print.assert_called_with("Usage: reboot_helper.py [reboot_type]") + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + + +def test_main_reboot_missing_reboot_type(monkeypatch): + # Simulate command-line arguments for reboot without reboot_type + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1']) + + # Mock sys.exit and capture print output + with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: + reboot_helper.main() + + # Check that the usage message was printed + mock_print.assert_called_with("Usage: reboot_helper.py reboot ") + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + + +def test_main_reboot_invalid_reboot_type(monkeypatch): + # Simulate command-line arguments for reboot with invalid reboot_type + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'INVALID']) + + # Mock dependencies + with patch('reboot_helper.reboot_dpu', return_value=False), \ + patch('sys.exit') as mock_exit: + reboot_helper.main() + + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + + +def test_main_pci_detach_invalid_module(monkeypatch): + # Simulate command-line arguments for pci_detach with invalid module + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'INVALID']) + + # Mock dependencies + with patch('reboot_helper.pci_detach_module', return_value=False), \ + patch('sys.exit') as mock_exit: + + reboot_helper.main() + + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + + +def test_main_pci_reattach_invalid_module(monkeypatch): + # Simulate command-line arguments for pci_reattach with invalid module + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'INVALID']) + + # Mock dependencies + with patch('reboot_helper.pci_reattach_module', return_value=False), \ + patch('sys.exit') as mock_exit: + + reboot_helper.main() + + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) diff --git a/utilities_common/chassis.py b/utilities_common/chassis.py index 667f2ab155..9e3e8d717f 100644 --- a/utilities_common/chassis.py +++ b/utilities_common/chassis.py @@ -20,3 +20,12 @@ def get_chassis_local_interfaces(): def is_smartswitch(): return hasattr(device_info, 'is_smartswitch') and device_info.is_smartswitch() + +def is_dpu(): + return hasattr(device_info, 'is_dpu') and device_info.is_dpu() + +def get_dpu_list(): + if hasattr(device_info, 'get_dpu_list'): + return device_info.get_dpu_list() + + return [] From 3848b75e4b6ce673fc0be83b16d59ca27d3a1aff Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 8 Nov 2024 20:46:01 +0000 Subject: [PATCH 06/26] Fix pre-commit errors --- scripts/reboot_helper.py | 21 ++++++++++++--------- tests/test_reboot_helper.py | 10 +++++----- utilities_common/chassis.py | 2 ++ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 78fe86c00b..63d0470d43 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -92,10 +92,11 @@ def reboot_dpu(module_name, reboot_type): log.log_info("Rebooting module {} with reboot_type {}...".format(module_name, reboot_type)) try: - platform_chassis.reboot(module_name, reboot_type) - log.log_info("Reboot command sent for module {} with reboot_type {}".format(module_name, reboot_type)) + status = platform_chassis.reboot(module_name, reboot_type) + if not status: + log.log_error("Reboot status for module {}: {}".format(module_name, status)) return True - except (NotImplementedError, AttributeError) as e: + except Exception as e: log.log_error("Unexpected error occurred while rebooting module {}: {}".format(module_name, e)) return False @@ -120,10 +121,11 @@ def pci_detach_module(module_name): log.log_info("Detaching module {}...".format(module_name)) try: - platform_chassis.pci_detach(module_name) - log.log_info("Detach command sent for module {}".format(module_name)) + status = platform_chassis.pci_detach(module_name) + if not status: + log.log_error("PCI detach status for module {}: {}".format(module_name, status)) return True - except (NotImplementedError, AttributeError) as e: + except Exception as e: log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) return False @@ -148,10 +150,11 @@ def pci_reattach_module(module_name): log.log_info("Rescaning module {}...".format(module_name)) try: - platform_chassis.pci_reattach(module_name) - log.log_info("Rescan command sent for module {}".format(module_name)) + status = platform_chassis.pci_reattach(module_name) + if not status: + log.log_error("PCI detach status for module {}: {}".format(module_name, status)) return True - except (NotImplementedError, AttributeError) as e: + except Exception as e: log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) return False diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 3f46054c26..47076aca45 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -1,5 +1,4 @@ # tests/test_reboot_helper.py -import os import sys import pytest from unittest.mock import patch, MagicMock, mock_open @@ -7,7 +6,8 @@ sys.modules['sonic_platform'] = MagicMock() sys.path.append("scripts") -import reboot_helper +import reboot_helper # noqa: E402 + @pytest.fixture def mock_load_platform_chassis(): @@ -297,7 +297,7 @@ def test_main_reboot_invalid_reboot_type(monkeypatch): # Mock dependencies with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: reboot_helper.main() # Check that sys.exit was called with EXIT_FAIL @@ -310,7 +310,7 @@ def test_main_pci_detach_invalid_module(monkeypatch): # Mock dependencies with patch('reboot_helper.pci_detach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: reboot_helper.main() @@ -324,7 +324,7 @@ def test_main_pci_reattach_invalid_module(monkeypatch): # Mock dependencies with patch('reboot_helper.pci_reattach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: reboot_helper.main() diff --git a/utilities_common/chassis.py b/utilities_common/chassis.py index 9e3e8d717f..400cec11fa 100644 --- a/utilities_common/chassis.py +++ b/utilities_common/chassis.py @@ -21,9 +21,11 @@ def get_chassis_local_interfaces(): def is_smartswitch(): return hasattr(device_info, 'is_smartswitch') and device_info.is_smartswitch() + def is_dpu(): return hasattr(device_info, 'is_dpu') and device_info.is_dpu() + def get_dpu_list(): if hasattr(device_info, 'get_dpu_list'): return device_info.get_dpu_list() From 84d9e50382b3057e80166d2d5cc496ae7e03c0e0 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 8 Nov 2024 21:11:45 +0000 Subject: [PATCH 07/26] Fix few more indentation errors --- tests/test_reboot_helper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 47076aca45..62c05084c9 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -297,7 +297,7 @@ def test_main_reboot_invalid_reboot_type(monkeypatch): # Mock dependencies with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: reboot_helper.main() # Check that sys.exit was called with EXIT_FAIL @@ -310,7 +310,7 @@ def test_main_pci_detach_invalid_module(monkeypatch): # Mock dependencies with patch('reboot_helper.pci_detach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: reboot_helper.main() @@ -324,7 +324,7 @@ def test_main_pci_reattach_invalid_module(monkeypatch): # Mock dependencies with patch('reboot_helper.pci_reattach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: reboot_helper.main() From a849e41b103b45aa6b2bf3007a60b102d5116825 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 25 Nov 2024 11:11:29 +0000 Subject: [PATCH 08/26] Add a new API in chassis.py --- scripts/reboot_smartswitch_helper | 15 +++++---------- utilities_common/chassis.py | 9 +++++---- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index 2c5a932b1b..b27cb7f0d0 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -17,9 +17,9 @@ function is_dpu() } # Function to retrieve number of DPUs -function get_num_dpu() +function get_num_dpus() { - return $(python3 -c "from utilities_common.chassis import get_num_dpu; get_num_dpus()") + return $(python3 -c "from utilities_common.chassis import get_num_dpus; get_num_dpus()") } # Function to retrieve DPU IP from CONFIG_DB @@ -218,11 +218,11 @@ function handle_smart_switch() { local PRE_SHUTDOWN=$2 local DPU_NAME=$3 - NUM_DPU=$(get_num_dpu) + NUM_DPU=$(get_num_dpus) if [[ "$REBOOT_DPU" == "yes" ]]; then if [[ "$PRE_SHUTDOWN" == "yes" ]]; then - echo "Invalid, '-p' option specified" + echo "Invalid, '-p' option specified on the smart switch" return $EXIT_ERROR fi @@ -238,21 +238,16 @@ function handle_smart_switch() { echo "User requested to reboot the device ${DPU_MODULE_NAME}" reboot_dpu "$DPU_MODULE_NAME" "$MODULE_REBOOT_DPU" else - echo "Invalid '-d' option specified for a non-smart switch" + echo "Invalid '-d' option specified for a non-smart-switch" return $EXIT_ERROR fi fi - debug "Is the platform DPU: $(is_dpu)" - # Check if system is a DPU and handle -p option accordingly if is_dpu; then if [[ "$PRE_SHUTDOWN" != "yes" ]]; then echo "Invalid, '-p' option not specified for a DPU" return ${EXIT_ERROR} - elif [[ "$PRE_SHUTDOWN" == "yes" ]]; then - echo "Invalid '-p' option specified for a non-DPU" - return ${EXIT_ERROR} fi return ${EXIT_SUCCESS} fi diff --git a/utilities_common/chassis.py b/utilities_common/chassis.py index 400cec11fa..c2582b911f 100644 --- a/utilities_common/chassis.py +++ b/utilities_common/chassis.py @@ -26,8 +26,9 @@ def is_dpu(): return hasattr(device_info, 'is_dpu') and device_info.is_dpu() -def get_dpu_list(): - if hasattr(device_info, 'get_dpu_list'): - return device_info.get_dpu_list() +def get_num_dpus(): + return hasattr(device_info, 'get_num_dpus') and device_info.get_num_dpus() + - return [] +def get_dpu_list(): + return hasattr(device_info, 'get_dpu_list') and device_info.get_dpu_list() From 4975ac0830440ec5aa176c68dccccf64b2e5246e Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 25 Nov 2024 12:51:36 +0000 Subject: [PATCH 09/26] Fix issues while testing --- scripts/reboot | 5 +- scripts/reboot_helper.py | 5 +- scripts/reboot_smartswitch_helper | 188 +++++++------- tests/test_reboot_helper.py | 410 +++++++++++++----------------- 4 files changed, 287 insertions(+), 321 deletions(-) diff --git a/scripts/reboot b/scripts/reboot index 3de54703ad..08e85b751f 100755 --- a/scripts/reboot +++ b/scripts/reboot @@ -189,7 +189,7 @@ function check_conflict_boot_in_fw_update() function parse_options() { - while getopts "h?vfpd" opt; do + while getopts "h?vfpd:" opt; do case ${opt} in h|\? ) show_help_and_exit @@ -243,7 +243,8 @@ fi debug "User requested rebooting device ..." -smart_switch_result=$(handle_smart_switch "$REBOOT_DPU" "$PRE_SHUTDOWN" "$DPU_MODULE_NAME") +handle_smart_switch "$REBOOT_DPU" "$PRE_SHUTDOWN" "$DPU_MODULE_NAME" +smart_switch_result=$? if [[ $smart_switch_result -ne 0 ]]; then exit $smart_switch_result fi diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 63d0470d43..7521c491a3 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -95,6 +95,7 @@ def reboot_dpu(module_name, reboot_type): status = platform_chassis.reboot(module_name, reboot_type) if not status: log.log_error("Reboot status for module {}: {}".format(module_name, status)) + return False return True except Exception as e: log.log_error("Unexpected error occurred while rebooting module {}: {}".format(module_name, e)) @@ -159,7 +160,7 @@ def pci_reattach_module(module_name): return False -def main(): +def parse_args(): if len(sys.argv) < 3: print("Usage: reboot_helper.py [reboot_type]") sys.exit(EXIT_FAIL) @@ -186,4 +187,4 @@ def main(): if __name__ == "__main__": - main() + parse_args() diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index b27cb7f0d0..1a090a4350 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -4,46 +4,42 @@ declare -r GNMI_PORT=50052 declare -r MODULE_REBOOT_DPU="DPU" declare -r MODULE_REBOOT_SMARTSWITCH="SMARTSWITCH" +# Function to print debug message +function log_message() { + local message=$1 + echo "$(date '+%Y-%m-%d %H:%M:%S') - $message" >&2 +} + # Function to check if running on smart switch function is_smartswitch() { - return $(python3 -c "from utilities_common.chassis import is_smartswitch; is_smartswitch()") + python3 -c "from utilities_common.chassis import is_smartswitch; print(is_smartswitch())" | grep -q "True" } # Function to check if running on DPU function is_dpu() { - return $(python3 -c "from utilities_common.chassis import is_dpu; is_dpu()") + python3 -c "from utilities_common.chassis import is_dpu; print(is_dpu())" | grep -q "True" } # Function to retrieve number of DPUs function get_num_dpus() { - return $(python3 -c "from utilities_common.chassis import get_num_dpus; get_num_dpus()") + python3 -c "from utilities_common.chassis import get_num_dpus; print(get_num_dpus())" } # Function to retrieve DPU IP from CONFIG_DB function get_dpu_ip() { local DPU_NAME=$1 - dpu_ip=$(sonic-db-cli CONFIG_DB HGET "DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME}" "ips@") - if [ $? -ne 0 ] || [ -z "$dpu_ip" ]; then - echo "Error: Failed to retrieve DPU IP address for ${DPU_NAME}" - return ${EXIT_ERROR} - fi - debug "$DPU_NAME ip: $dpu_ip" + sonic-db-cli CONFIG_DB HGET "DHCP_SERVER_IPV4_PORT|bridge-midplane|${DPU_NAME}" "ips@" } # Function to retrieve GNMI port from CONFIG_DB -function get_gnmi_port() { +function get_gnmi_port() +{ local DPU_NAME=$1 - port=$(sonic-db-cli CONFIG_DB HGET "DPU_PORT|$DPU_NAME" "gnmi") - if [ $? -ne 0 ] || [ -z "$port" ]; then - debug "Error: Failed to retrieve GNMI port" - port=$GNMI_PORT #Hardcoded default GNMI port - return ${EXIT_ERROR} - fi - debug "$DPU_NAME GNMI port:$port" + sonic-db-cli CONFIG_DB HGET "DPU_PORT|$DPU_NAME" "gnmi" } # Function to get reboot status from DPU @@ -51,87 +47,75 @@ function get_reboot_status() { local dpu_ip=$1 local port=$2 - reboot_status=$(docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc RebootStatus) + local reboot_status + reboot_status=$(docker exec -i gnmi gnoi_client -target "${dpu_ip}:${port}" -logtostderr -insecure -rpc RebootStatus 2>/dev/null) if [ $? -ne 0 ] || [ -z "$reboot_status" ]; then - echo "Error: Failed to send reboot status command to DPU ${DPU_NAME}" + log_message "Error: Failed to send reboot status command to DPU ${DPU_NAME}" return ${EXIT_ERROR} fi - debug "$reboot_status" -} - -# Function to retrieve DPU bus info from platform JSON -function get_dpu_bus_info() { - local DPU_NAME=$1 - DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") - if [ -z "$DPU_BUS_INFO" ]; then - echo "Error: bus_info not found for DPU ${DPU_NAME}" - exit ${EXIT_ERROR} + local is_reboot_active + is_reboot_active=$(echo "$reboot_status" | grep "active" | awk '{print $2}') + if [ "$is_reboot_active" == "false" ]; then + log_message "DPU ${DPU_NAME} has finished rebooting" + return ${EXIT_SUCCESS} fi - debug "$DPU_NAME : $DPU_BUS_INFO" + return ${EXIT_ERROR} } -#Function to detach PCI module -function pci_detach_module() { +# Function to detach PCI module +function pci_detach_module() +{ local DPU_NAME=$1 - - status = $(python3 -c "import reboot_helper; reboot_helper.pci_detach_module('${DPU_NAME}')") - if [ -z "$status" ] || [ "$status" = "false" ]; then - # Check if DPU exists and retrieve bus info - DPU_BUS_INFO=$(get_dpu_bus_info "${DPU_NAME}") - echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove - fi + python3 -c "import reboot_helper; reboot_helper.pci_detach_module('${DPU_NAME}')" } -#Function to rescan PCI module -function pci_reattach_module() { +# Function to rescan PCI module +function pci_reattach_module() +{ local DPU_NAME=$1 - - status = $(python3 -c "import reboot_helper; reboot_helper.pci_reattach_module('${DPU_NAME}')") - if [ -z "$status" ] || [ "$status" = "false" ]; then - echo 1 > /sys/bus/pci/rescan - fi + python3 -c "import reboot_helper; reboot_helper.pci_reattach_module('${DPU_NAME}')" } # Function to reboot DPU -function reboot_dpu_platform() { +function reboot_dpu_platform() +{ local DPU_NAME=$1 local REBOOT_TYPE=$2 - - local reboot_status=$(python3 -c "import reboot_helper; reboot_helper.reboot_dpu('${DPU_NAME}', '${REBOOT_TYPE}')") - if [ -z "$reboot_status" ] || [ "$reboot_status" = "false" ]; then - echo "Error: Failed to reboot the DPU" - return ${EXIT_ERROR} - fi + python3 -c "import reboot_helper; reboot_helper.reboot_dpu('${DPU_NAME}', '${REBOOT_TYPE}')" } -#Function to wait for DPU reboot status -function wait_for_dpu_reboot_status() { +# Function to wait for DPU reboot status +function wait_for_dpu_reboot_status() +{ local dpu_ip=$1 local port=$2 - # Retrieve dpu_halt_services_timeout value using jq - local dpu_halt_services_timeout=$(jq -r '.dpu_halt_services_timeout' "$PLATFORM_JSON_PATH" 2>/dev/null) + if [[ -z "$PLATFORM_JSON_PATH" ]]; then + log_message "Error: PLATFORM_JSON_PATH is not defined" + exit $EXIT_ERROR + fi + + local dpu_halt_services_timeou + dpu_halt_services_timeout=$(jq -r '.dpu_halt_services_timeout' "$PLATFORM_JSON_PATH" 2>/dev/null) if [ $? -ne 0 ]; then - echo "Error: Failed to retrieve dpu_halt_services_timeout from ${PLATFORM_JSON_PATH}" + log_message "Error: Failed to retrieve dpu_halt_services_timeout from ${PLATFORM_JSON_PATH}" return ${EXIT_ERROR} fi - # Poll on reboot status response with a timeout mechanism local poll_interval=5 local waited_time=0 - local reboot_status while true; do - reboot_status=$(get_reboot_status "${dpu_ip}" "${port}") - debug "GNOI RebootStatus response ${reboot_status}" - is_reboot_active=$(echo "$reboot_status" | grep "active" | awk '{print $2}') - if [ "$is_reboot_active" == "false" ]; then + local reboot_status + get_reboot_status "${dpu_ip}" "${port}" + reboot_status=$? + if [ $reboot_status -eq ${EXIT_SUCCESS} ]; then break fi sleep "$poll_interval" waited_time=$((waited_time + poll_interval)) if [ $waited_time -ge $dpu_halt_services_timeout ]; then - echo "Error: Timeout waiting for DPU ${DPU_NAME} to finish rebooting" + log_message "Error: Timeout waiting for DPU ${DPU_NAME} to finish rebooting" return ${EXIT_ERROR} fi done @@ -142,16 +126,21 @@ function gnmi_reboot_dpu() { # Retrieve DPU IP and GNMI port dpu_ip=$(get_dpu_ip "${DPU_NAME}") + log_message "DPU IP ${DPU_NAME}: $dpu_ip" port=$(get_gnmi_port "${DPU_NAME}") + if [ -z "$port" ]; then + port=$GNMI_PORT # Default GNMI port + fi + log_message "GNMI port ${DPU_NAME}: $port" - if [ -z "$dpu_ip" ] || [ -z "$port" ]; then - echo "Error: Failed to retrieve DPU IP or GNMI port for ${DPU_NAME}" + if [ -z "$dpu_ip" ]; then + log_message "Error: Failed to retrieve DPU IP for ${DPU_NAME}" return ${EXIT_ERROR} fi docker exec -i gnmi gnoi_client -target ${dpu_ip}:${port} -logtostderr -insecure -rpc Reboot -jsonin '{"method":3}' if [ $? -ne 0 ]; then - echo "Error: Failed to send reboot command to DPU ${DPU_NAME}" + log_message "Error: Failed to send reboot command to DPU ${DPU_NAME}" return ${EXIT_ERROR} fi @@ -167,7 +156,11 @@ function reboot_dpu() debug "User requested rebooting device ${DPU_NAME} ..." # Send reboot command to DPU - gnoi_reboot_dpu "${dpu_ip}" "${port}" + gnmi_reboot_dpu "${DPU_NAME}" + if [ $? -ne 0 ]; then + log_message "Error: Failed to reboot DPU ${DPU_NAME}" + return ${EXIT_ERROR} + fi # Update STATE_DB and handle PCIe removal and rescan sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' @@ -184,15 +177,20 @@ function reboot_all_dpus() { local NUM_DPU=$1 if [[ -z $NUM_DPU ]]; then - echo "Error: Failed to retrieve number of DPUs or no DPUs found" + log_message "Error: Failed to retrieve number of DPUs or no DPUs found" return fi + local failures=0 for (( i=0; i<"$NUM_DPU"; i++ )); do - echo "Rebooting DPU module dpu$i" + log_message "Rebooting DPU module dpu$i" reboot_dpu "dpu$i" "$MODULE_REBOOT_SMARTSWITCH" & + if [ $? -ne 0 ]; then + ((failures++)) + fi done wait + return $failures } # Function to verify DPU module name @@ -201,13 +199,13 @@ function verify_dpu_module_name() { local NUM_DPU=$2 if [[ -z "$DPU_MODULE_NAME" ]]; then - echo "Error: DPU module name not provided" + log_message "Error: DPU module name not provided" return $EXIT_ERROR fi NUM_DPU=$((NUM_DPU - 1)) if [[ ! "$DPU_MODULE_NAME" =~ ^dpu[0-$NUM_DPU]$ ]]; then - echo "Error: Invalid DPU module name provided" + log_message "Error: Invalid DPU module name provided" return $EXIT_ERROR fi } @@ -220,40 +218,50 @@ function handle_smart_switch() { NUM_DPU=$(get_num_dpus) - if [[ "$REBOOT_DPU" == "yes" ]]; then - if [[ "$PRE_SHUTDOWN" == "yes" ]]; then - echo "Invalid, '-p' option specified on the smart switch" + if is_dpu; then + if [[ "$PRE_SHUTDOWN" != "yes" ]]; then + log_message "Error: '-p' option not specified for a DPU" + return $EXIT_ERROR + elif [[ "$REBOOT_DPU" == "yes" ]]; then + log_message "Error: '-d' option specified for a DPU" return $EXIT_ERROR fi + return $EXIT_SUCCESS + fi + if [[ "$PRE_SHUTDOWN" == "yes" ]]; then + log_message "Error: '-p' option specified for a non-DPU" + return $EXIT_ERROR + fi + + if [[ "$REBOOT_DPU" == "yes" ]]; then if is_smartswitch; then - if [[ -z $NUM_DPU ]]; then - echo "Error: Failed to retrieve number of DPUs or no DPUs found" - return $EXIT_ERROR + if [[ -z $NUM_DPU ]]; then + log_message "Error: Failed to retrieve number of DPUs or no DPUs found" + return $EXIT_ERROR fi DPU_MODULE_NAME="${DPU_NAME,,}" verify_dpu_module_name "$DPU_MODULE_NAME" "$NUM_DPU" + result=$? + if [[ $result -ne $EXIT_SUCCESS ]]; then + return $result + fi - echo "User requested to reboot the device ${DPU_MODULE_NAME}" + log_message "Rebooting device ${DPU_MODULE_NAME}" reboot_dpu "$DPU_MODULE_NAME" "$MODULE_REBOOT_DPU" + result=$? + return $result else - echo "Invalid '-d' option specified for a non-smart-switch" + log_message "Error: '-d' option specified for a non-smart-switch" return $EXIT_ERROR fi fi - # Check if system is a DPU and handle -p option accordingly - if is_dpu; then - if [[ "$PRE_SHUTDOWN" != "yes" ]]; then - echo "Invalid, '-p' option not specified for a DPU" - return ${EXIT_ERROR} - fi - return ${EXIT_SUCCESS} - fi - # If the system is a smart switch, reboot all DPUs in parallel if is_smartswitch; then reboot_all_dpus "$NUM_DPU" "$MODULE_REBOOT_SMARTSWITCH" + result=$? + return $result fi } diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 62c05084c9..e7c2ac06bd 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -9,324 +9,280 @@ import reboot_helper # noqa: E402 -@pytest.fixture -def mock_load_platform_chassis(): - with patch('reboot_helper.load_platform_chassis', return_value=True) as mock: - yield mock +class TestRebootHelper(object): + @pytest.fixture(scope='class') + def mock_load_platform_chassis(self): + with patch('reboot_helper.load_platform_chassis', return_value=True) as mock: + yield mock -@pytest.fixture -def mock_load_and_verify(): - with patch('reboot_helper.load_and_verify', return_value=True) as mock: - yield mock + @pytest.fixture(scope='class') + def mock_load_and_verify(self): + with patch('reboot_helper.load_and_verify', return_value=True) as mock: + yield mock -@pytest.fixture -def mock_is_smartswitch(): - with patch('reboot_helper.is_smartswitch', return_value=True) as mock: - yield mock + @pytest.fixture(scope='class') + def mock_is_smartswitch(self): + with patch('reboot_helper.is_smartswitch', return_value=True) as mock: + yield mock -@pytest.fixture -def mock_open_file(): - return mock_open() + @pytest.fixture(scope='class') + def mock_open_file(self): + return mock_open() -@pytest.fixture -def mock_platform_chassis(): - with patch('reboot_helper.platform_chassis') as mock: - yield mock + @pytest.fixture(scope='class') + def mock_platform_chassis(self): + with patch('reboot_helper.platform_chassis') as mock: + yield mock -@pytest.fixture -def mock_platform(): - with patch('reboot_helper.sonic_platform.platform.Platform') as mock: - yield mock + @pytest.fixture(scope='class') + def mock_platform(self): + with patch('reboot_helper.sonic_platform.platform.Platform') as mock: + yield mock -@pytest.fixture -def mock_get_dpu_list(): - with patch('reboot_helper.get_dpu_list', return_value=["dpu1"]) as mock: - yield mock + @pytest.fixture(scope='class') + def mock_get_dpu_list(self): + with patch('reboot_helper.get_dpu_list', return_value=["dpu1"]) as mock: + yield mock -def test_load_platform_chassis_success(mock_platform_chassis): - mock_platform_chassis.get_chassis.return_value = MagicMock() - result = reboot_helper.load_platform_chassis() - assert result + def test_load_platform_chassis_success(self, mock_platform_chassis): + mock_platform_chassis.get_chassis.return_value = MagicMock() + result = reboot_helper.load_platform_chassis() + assert result -def test_load_platform_chassis_exception(mock_platform): - mock_platform.side_effect = RuntimeError - result = reboot_helper.load_platform_chassis() - assert not result + def test_load_platform_chassis_exception(self, mock_platform): + mock_platform.side_effect = RuntimeError + result = reboot_helper.load_platform_chassis() + assert not result -def test_load_and_verify_chassis_fail(): - mock_load_platform_chassis.return_value = False - assert not reboot_helper.load_and_verify("DPU1") + def test_load_and_verify_chassis_fail(self, mock_load_platform_chassis): + mock_load_platform_chassis.return_value = False + assert not reboot_helper.load_and_verify("DPU1") -def test_load_and_verify_not_smartswitch(mock_is_smartswitch, mock_load_platform_chassis): - mock_is_smartswitch.return_value = False - mock_load_platform_chassis.return_value = True - assert not reboot_helper.load_and_verify("DPU1") + def test_load_and_verify_not_smartswitch(self, mock_is_smartswitch, mock_load_platform_chassis): + mock_is_smartswitch.return_value = False + mock_load_platform_chassis.return_value = True + assert not reboot_helper.load_and_verify("DPU1") -def test_load_and_verify_not_found(mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_dpu_list.return_value = ["dpu1"] + def test_load_and_verify_not_found(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_dpu_list.return_value = ["dpu1"] - assert not reboot_helper.load_and_verify("DPU2") + assert not reboot_helper.load_and_verify("DPU2") -def test_load_and_verify_empty_dpu_list(mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_dpu_list.return_value = [] + def test_load_and_verify_empty_dpu_list(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_dpu_list.return_value = [] - assert not reboot_helper.load_and_verify("DPU1") + assert not reboot_helper.load_and_verify("DPU1") -def test_load_and_verify_success(mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_dpu_list.return_value = ["dpu1"] + def test_load_and_verify_success(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): + mock_is_smartswitch.return_value = True + mock_load_platform_chassis.return_value = True + mock_get_dpu_list.return_value = ["dpu1"] - result = reboot_helper.load_and_verify("DPU1") - assert result + result = reboot_helper.load_and_verify("DPU1") + assert result -def test_reboot_dpu_load_and_verify_fail(mock_load_and_verify): - mock_load_and_verify.return_value = False - result = reboot_helper.reboot_dpu("DPU1", "DPU") - assert not result + def test_reboot_dpu_load_and_verify_fail(self, mock_load_and_verify): + mock_load_and_verify.return_value = False + result = reboot_helper.reboot_dpu("DPU1", "DPU") + assert not result -def test_reboot_dpu_not_implemented(mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.reboot.side_effect = NotImplementedError - assert not reboot_helper.reboot_dpu("DPU1", 'DPU') + def test_reboot_dpu_success(self, mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.reboot.return_value = True + assert reboot_helper.reboot_dpu("DPU1", "DPU") -def test_reboot_dpu_success(mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.reboot.return_value = True - assert reboot_helper.reboot_dpu("DPU1", "DPU") + def test_pci_detach_module_load_and_verify_fail(self, mock_load_and_verify): + mock_load_and_verify.return_value = False + assert not reboot_helper.pci_detach_module("DPU1") -def test_pci_detach_module_load_and_verify_fail(mock_load_and_verify): - mock_load_and_verify.return_value = False - assert not reboot_helper.pci_detach_module("DPU1") + def test_pci_detach_module_success(self, mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.pci_detach.return_value = True + assert reboot_helper.pci_detach_module("DPU1") -def test_pci_detach_module_not_implemented(mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.pci_detach.side_effect = NotImplementedError + def test_pci_reattach_module_load_and_verify_fail(self, mock_load_and_verify): + mock_load_and_verify.return_value = False + assert not reboot_helper.pci_reattach_module("DPU1") - assert not reboot_helper.pci_detach_module("DPU1") + def test_pci_reattach_module_success(self, mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + mock_platform_chassis.pci_reattach.return_value = True + assert reboot_helper.pci_reattach_module("DPU1") -def test_pci_detach_module_success(mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.pci_detach.return_value = True - assert reboot_helper.pci_detach_module("DPU1") + def test_parse_args_reboot_command_fail(self, monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) -def test_pci_reattach_module_load_and_verify_fail(mock_load_and_verify): - mock_load_and_verify.return_value = False - assert not reboot_helper.pci_reattach_module("DPU1") + # Mock dependencies + with patch('reboot_helper.reboot_dpu', return_value=False), \ + patch('sys.exit') as mock_exit: + reboot_helper.parse_args() -def test_pci_reattach_module_not_implemented(mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.pci_reattach.side_effect = NotImplementedError - assert not reboot_helper.pci_reattach_module("DPU1") + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) -def test_pci_reattach_module_success(mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.pci_reattach.return_value = True - assert reboot_helper.pci_reattach_module("DPU1") + def test_parse_args_reboot_command_type_fail(self, monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'UNKNOWN']) + # Mock dependencies + with patch('reboot_helper.reboot_dpu', return_value=False), \ + patch('sys.exit') as mock_exit: -def test_main_reboot_command_fail(monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) + reboot_helper.parse_args() - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - reboot_helper.main() - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + def test_parse_args_reboot_command_success(self, monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) + # Mock dependencies + with patch('reboot_helper.reboot_dpu', return_value=True) as mock_reboot_dpu, \ + patch('sys.exit') as mock_exit: -def test_main_reboot_command_type_fail(monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'UNKNOWN']) + reboot_helper.parse_args() - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: + # Check that reboot_module was called + mock_reboot_dpu.assert_called_once_with('DPU1', 'DPU') + mock_exit.assert_not_called() - reboot_helper.main() - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + def test_parse_args_pci_detach_command_fail(self, monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + # Mock dependencies + with patch('reboot_helper.pci_detach_module', return_value=False), \ + patch('sys.exit') as mock_exit: -def test_main_reboot_command_success(monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) + reboot_helper.parse_args() - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=True) as mock_reboot_dpu, \ - patch('sys.exit') as mock_exit: + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - reboot_helper.main() - # Check that reboot_module was called - mock_reboot_dpu.assert_called_once_with('DPU1', 'DPU') - mock_exit.assert_not_called() + def test_parse_args_pci_detach_command_success(self, monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + # Mock dependencies + with patch('reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ + patch('sys.exit') as mock_exit: -def test_main_pci_detach_command_fail(monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + reboot_helper.parse_args() - # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + # Check that pci_detach_module was called + mock_reboot_module.assert_called_once_with('DPU1') + mock_exit.assert_not_called() - reboot_helper.main() - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + def test_parse_args_pci_reattach_command_fail(self, monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) + # Mock dependencies + with patch('reboot_helper.pci_reattach_module', return_value=False), \ + patch('sys.exit') as mock_exit: -def test_main_pci_detach_command_success(monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) + reboot_helper.parse_args() - # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ - patch('sys.exit') as mock_exit: + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - reboot_helper.main() - # Check that pci_detach_module was called - mock_reboot_module.assert_called_once_with('DPU1') - mock_exit.assert_not_called() + def test_parse_args_pci_reattach_command_success(self, monkeypatch): + # Simulate command-line arguments for reboot + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) + # Mock dependencies + with patch('reboot_helper.pci_reattach_module', return_value=True) as mock_reboot_module, \ + patch('sys.exit') as mock_exit: -def test_main_pci_reattach_command_fail(monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) + reboot_helper.parse_args() - # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + # Check that pci_reattach_module was called + mock_reboot_module.assert_called_once_with('DPU1') + mock_exit.assert_not_called() - reboot_helper.main() - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + def test_parse_args_unknown_command(self, monkeypatch): + # Simulate an unknown command + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'unknown', 'DPU1']) + # Mock sys.exit and capture print output + with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: + reboot_helper.parse_args() -def test_main_pci_reattach_command_success(monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) + # Check that the unknown command message was printed + mock_print.assert_called_with("Unknown command: unknown") + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=True) as mock_reboot_module, \ - patch('sys.exit') as mock_exit: - reboot_helper.main() + def test_parse_args_reboot_invalid_reboot_type(self, monkeypatch): + # Simulate command-line arguments for reboot with invalid reboot_type + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'INVALID']) - # Check that pci_reattach_module was called - mock_reboot_module.assert_called_once_with('DPU1') - mock_exit.assert_not_called() + # Mock dependencies + with patch('reboot_helper.reboot_dpu', return_value=False), \ + patch('sys.exit') as mock_exit: + reboot_helper.parse_args() + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) -def test_main_unknown_command(monkeypatch): - # Simulate an unknown command - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'unknown', 'DPU1']) - # Mock sys.exit and capture print output - with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: - reboot_helper.main() + def test_parse_args_pci_detach_invalid_module(self, monkeypatch): + # Simulate command-line arguments for pci_detach with invalid module + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'INVALID']) - # Check that the unknown command message was printed - mock_print.assert_called_with("Unknown command: unknown") - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + # Mock dependencies + with patch('reboot_helper.pci_detach_module', return_value=False), \ + patch('sys.exit') as mock_exit: + reboot_helper.parse_args() -def test_main_no_arguments(monkeypatch): - # Simulate no command-line arguments - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py']) + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - # Mock sys.exit and capture print output - with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: - reboot_helper.main() - # Check that the usage message was printed - mock_print.assert_called_with("Usage: reboot_helper.py [reboot_type]") - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + def test_parse_args_pci_reattach_invalid_module(self, monkeypatch): + # Simulate command-line arguments for pci_reattach with invalid module + monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'INVALID']) + # Mock dependencies + with patch('reboot_helper.pci_reattach_module', return_value=False), \ + patch('sys.exit') as mock_exit: -def test_main_reboot_missing_reboot_type(monkeypatch): - # Simulate command-line arguments for reboot without reboot_type - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1']) + reboot_helper.parse_args() - # Mock sys.exit and capture print output - with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: - reboot_helper.main() - - # Check that the usage message was printed - mock_print.assert_called_with("Usage: reboot_helper.py reboot ") - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - -def test_main_reboot_invalid_reboot_type(monkeypatch): - # Simulate command-line arguments for reboot with invalid reboot_type - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'INVALID']) - - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: - reboot_helper.main() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - -def test_main_pci_detach_invalid_module(monkeypatch): - # Simulate command-line arguments for pci_detach with invalid module - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'INVALID']) - - # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=False), \ - patch('sys.exit') as mock_exit: - - reboot_helper.main() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - -def test_main_pci_reattach_invalid_module(monkeypatch): - # Simulate command-line arguments for pci_reattach with invalid module - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'INVALID']) - - # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=False), \ - patch('sys.exit') as mock_exit: - - reboot_helper.main() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) + # Check that sys.exit was called with EXIT_FAIL + mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) From bead103158e9f72dbc2532b388e183bd9ffc4adf Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 25 Nov 2024 14:55:25 +0000 Subject: [PATCH 10/26] Fix indentation errors --- tests/test_reboot_helper.py | 59 +++++++------------------------------ 1 file changed, 10 insertions(+), 49 deletions(-) diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index e7c2ac06bd..9cfe0f1846 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -15,65 +15,54 @@ def mock_load_platform_chassis(self): with patch('reboot_helper.load_platform_chassis', return_value=True) as mock: yield mock - @pytest.fixture(scope='class') def mock_load_and_verify(self): with patch('reboot_helper.load_and_verify', return_value=True) as mock: yield mock - @pytest.fixture(scope='class') def mock_is_smartswitch(self): with patch('reboot_helper.is_smartswitch', return_value=True) as mock: yield mock - @pytest.fixture(scope='class') def mock_open_file(self): return mock_open() - @pytest.fixture(scope='class') def mock_platform_chassis(self): with patch('reboot_helper.platform_chassis') as mock: yield mock - @pytest.fixture(scope='class') def mock_platform(self): with patch('reboot_helper.sonic_platform.platform.Platform') as mock: yield mock - @pytest.fixture(scope='class') def mock_get_dpu_list(self): with patch('reboot_helper.get_dpu_list', return_value=["dpu1"]) as mock: yield mock - def test_load_platform_chassis_success(self, mock_platform_chassis): mock_platform_chassis.get_chassis.return_value = MagicMock() result = reboot_helper.load_platform_chassis() assert result - def test_load_platform_chassis_exception(self, mock_platform): mock_platform.side_effect = RuntimeError result = reboot_helper.load_platform_chassis() assert not result - def test_load_and_verify_chassis_fail(self, mock_load_platform_chassis): mock_load_platform_chassis.return_value = False assert not reboot_helper.load_and_verify("DPU1") - def test_load_and_verify_not_smartswitch(self, mock_is_smartswitch, mock_load_platform_chassis): mock_is_smartswitch.return_value = False mock_load_platform_chassis.return_value = True assert not reboot_helper.load_and_verify("DPU1") - def test_load_and_verify_not_found(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): mock_is_smartswitch.return_value = True mock_load_platform_chassis.return_value = True @@ -81,7 +70,6 @@ def test_load_and_verify_not_found(self, mock_is_smartswitch, mock_load_platform assert not reboot_helper.load_and_verify("DPU2") - def test_load_and_verify_empty_dpu_list(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): mock_is_smartswitch.return_value = True mock_load_platform_chassis.return_value = True @@ -89,7 +77,6 @@ def test_load_and_verify_empty_dpu_list(self, mock_is_smartswitch, mock_load_pla assert not reboot_helper.load_and_verify("DPU1") - def test_load_and_verify_success(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): mock_is_smartswitch.return_value = True mock_load_platform_chassis.return_value = True @@ -98,76 +85,63 @@ def test_load_and_verify_success(self, mock_is_smartswitch, mock_load_platform_c result = reboot_helper.load_and_verify("DPU1") assert result - def test_reboot_dpu_load_and_verify_fail(self, mock_load_and_verify): mock_load_and_verify.return_value = False result = reboot_helper.reboot_dpu("DPU1", "DPU") assert not result - def test_reboot_dpu_success(self, mock_load_and_verify, mock_platform_chassis): mock_load_and_verify.return_value = True mock_platform_chassis.reboot.return_value = True assert reboot_helper.reboot_dpu("DPU1", "DPU") - def test_pci_detach_module_load_and_verify_fail(self, mock_load_and_verify): mock_load_and_verify.return_value = False assert not reboot_helper.pci_detach_module("DPU1") - def test_pci_detach_module_success(self, mock_load_and_verify, mock_platform_chassis): mock_load_and_verify.return_value = True mock_platform_chassis.pci_detach.return_value = True assert reboot_helper.pci_detach_module("DPU1") - def test_pci_reattach_module_load_and_verify_fail(self, mock_load_and_verify): mock_load_and_verify.return_value = False assert not reboot_helper.pci_reattach_module("DPU1") - def test_pci_reattach_module_success(self, mock_load_and_verify, mock_platform_chassis): mock_load_and_verify.return_value = True mock_platform_chassis.pci_reattach.return_value = True assert reboot_helper.pci_reattach_module("DPU1") - def test_parse_args_reboot_command_fail(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: - + with patch('reboot_helper.reboot_dpu', return_value=False), patch('sys.exit') as mock_exit: reboot_helper.parse_args() # Check that sys.exit was called with EXIT_FAIL mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - def test_parse_args_reboot_command_type_fail(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'UNKNOWN']) # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: + with patch('reboot_helper.reboot_dpu', return_value=False), patch('sys.exit') as mock_exit: reboot_helper.parse_args() # Check that sys.exit was called with EXIT_FAIL mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - def test_parse_args_reboot_command_success(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=True) as mock_reboot_dpu, \ - patch('sys.exit') as mock_exit: + with patch('reboot_helper.reboot_dpu', return_value=True) as mock_reboot_dpu, patch('sys.exit') as mock_exit: reboot_helper.parse_args() @@ -175,28 +149,25 @@ def test_parse_args_reboot_command_success(self, monkeypatch): mock_reboot_dpu.assert_called_once_with('DPU1', 'DPU') mock_exit.assert_not_called() - def test_parse_args_pci_detach_command_fail(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + with patch('reboot_helper.pci_detach_module', return_value=False), patch('sys.exit') as mock_exit: reboot_helper.parse_args() # Check that sys.exit was called with EXIT_FAIL mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - def test_parse_args_pci_detach_command_success(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) # Mock dependencies with patch('reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: # noqa: E125 reboot_helper.parse_args() @@ -204,28 +175,25 @@ def test_parse_args_pci_detach_command_success(self, monkeypatch): mock_reboot_module.assert_called_once_with('DPU1') mock_exit.assert_not_called() - def test_parse_args_pci_reattach_command_fail(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + with patch('reboot_helper.pci_reattach_module', return_value=False), patch('sys.exit') as mock_exit: reboot_helper.parse_args() # Check that sys.exit was called with EXIT_FAIL mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - def test_parse_args_pci_reattach_command_success(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) # Mock dependencies with patch('reboot_helper.pci_reattach_module', return_value=True) as mock_reboot_module, \ - patch('sys.exit') as mock_exit: + patch('sys.exit') as mock_exit: # noqa: E125 reboot_helper.parse_args() @@ -233,7 +201,6 @@ def test_parse_args_pci_reattach_command_success(self, monkeypatch): mock_reboot_module.assert_called_once_with('DPU1') mock_exit.assert_not_called() - def test_parse_args_unknown_command(self, monkeypatch): # Simulate an unknown command monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'unknown', 'DPU1']) @@ -246,41 +213,35 @@ def test_parse_args_unknown_command(self, monkeypatch): mock_print.assert_called_with("Unknown command: unknown") mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - def test_parse_args_reboot_invalid_reboot_type(self, monkeypatch): # Simulate command-line arguments for reboot with invalid reboot_type monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'INVALID']) # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), \ - patch('sys.exit') as mock_exit: + with patch('reboot_helper.reboot_dpu', return_value=False), patch('sys.exit') as mock_exit: reboot_helper.parse_args() # Check that sys.exit was called with EXIT_FAIL mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - def test_parse_args_pci_detach_invalid_module(self, monkeypatch): # Simulate command-line arguments for pci_detach with invalid module monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'INVALID']) # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + with patch('reboot_helper.pci_detach_module', return_value=False), patch('sys.exit') as mock_exit: reboot_helper.parse_args() # Check that sys.exit was called with EXIT_FAIL mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - def test_parse_args_pci_reattach_invalid_module(self, monkeypatch): # Simulate command-line arguments for pci_reattach with invalid module monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'INVALID']) # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=False), \ - patch('sys.exit') as mock_exit: + with patch('reboot_helper.pci_reattach_module', return_value=False), patch('sys.exit') as mock_exit: reboot_helper.parse_args() From f88491aba880611b54601ef3f1a38e05da8542f5 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 26 Nov 2024 06:29:29 +0000 Subject: [PATCH 11/26] Add DPU_BUS_INFO Increase code coverage as well --- scripts/reboot_helper.py | 2 ++ scripts/reboot_smartswitch_helper | 20 ++++++++++++---- tests/test_chassis.py | 38 +++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 tests/test_chassis.py diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 7521c491a3..676bbd693a 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -125,6 +125,7 @@ def pci_detach_module(module_name): status = platform_chassis.pci_detach(module_name) if not status: log.log_error("PCI detach status for module {}: {}".format(module_name, status)) + return False return True except Exception as e: log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) @@ -154,6 +155,7 @@ def pci_reattach_module(module_name): status = platform_chassis.pci_reattach(module_name) if not status: log.log_error("PCI detach status for module {}: {}".format(module_name, status)) + return False return True except Exception as e: log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index 1a090a4350..aa9b2acd7e 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -66,14 +66,24 @@ function get_reboot_status() function pci_detach_module() { local DPU_NAME=$1 + local DPU_BUS_INFO=$2 python3 -c "import reboot_helper; reboot_helper.pci_detach_module('${DPU_NAME}')" + if [ $? -ne 0 ]; then + log_message "Error: PCI detach vendor API is not available" + echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove + fi } # Function to rescan PCI module function pci_reattach_module() { local DPU_NAME=$1 + local DPU_BUS_INFO=$2 python3 -c "import reboot_helper; reboot_helper.pci_reattach_module('${DPU_NAME}')" + if [ $? -ne 0 ]; then + log_message "Error: PCI reattach vendor API is not available" + echo 1 > /sys/bus/pci/rescan + fi } # Function to reboot DPU @@ -162,14 +172,16 @@ function reboot_dpu() return ${EXIT_ERROR} fi + DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") + # Update STATE_DB and handle PCIe removal and rescan - sonic-db-cli state_db set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' + sonic-db-cli STATE_DB set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' - pci_detach_module "${DPU_NAME}" + pci_detach_module "${DPU_NAME} ${DPU_BUS_INFO}" reboot_dpu_platform "${DPU_NAME}" - pci_reattach_module "${DPU_NAME}" + pci_reattach_module "${DPU_NAME} ${DPU_BUS_INFO}" - sonic-db-cli state_db del "PCIE_DETACH_INFO|${DPU_NAME}" + sonic-db-cli STATE_DB del "PCIE_DETACH_INFO|${DPU_NAME}" } # Function to reboot all DPUs in parallel diff --git a/tests/test_chassis.py b/tests/test_chassis.py new file mode 100644 index 0000000000..cd9db158ee --- /dev/null +++ b/tests/test_chassis.py @@ -0,0 +1,38 @@ +import os +import pytest +from unittest import mock +from utilities_common import chassis + +class TestChassis: + @pytest.fixture + def mock_device_info(self): + with mock.patch('utilities_common.chassis.device_info') as mock_device_info: + yield mock_device_info + + def test_is_smartswitch(self, mock_device_info): + mock_device_info.is_smartswitch = mock.Mock(return_value=True) + assert chassis.is_smartswitch() == True + + mock_device_info.is_smartswitch = mock.Mock(return_value=False) + assert chassis.is_smartswitch() == False + + def test_is_dpu(self, mock_device_info): + mock_device_info.is_dpu = mock.Mock(return_value=True) + assert chassis.is_dpu() == True + + mock_device_info.is_dpu = mock.Mock(return_value=False) + assert chassis.is_dpu() == False + + def test_get_num_dpus(self, mock_device_info): + mock_device_info.get_num_dpus = mock.Mock(return_value=4) + assert chassis.get_num_dpus() == 4 + + del mock_device_info.get_num_dpus + assert chassis.get_num_dpus() == False + + def test_get_dpu_list(self, mock_device_info): + mock_device_info.get_dpu_list = mock.Mock(return_value=['dpu1', 'dpu2']) + assert chassis.get_dpu_list() == ['dpu1', 'dpu2'] + + del mock_device_info.get_dpu_list + assert chassis.get_dpu_list() == False From b3dbc0fd15ad6f3ca8b5ff1f218b7aa7c81ee825 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 26 Nov 2024 07:51:38 +0000 Subject: [PATCH 12/26] Fix pre-commit errors --- tests/test_chassis.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_chassis.py b/tests/test_chassis.py index cd9db158ee..e843e70066 100644 --- a/tests/test_chassis.py +++ b/tests/test_chassis.py @@ -1,8 +1,8 @@ -import os import pytest from unittest import mock from utilities_common import chassis + class TestChassis: @pytest.fixture def mock_device_info(self): @@ -11,28 +11,28 @@ def mock_device_info(self): def test_is_smartswitch(self, mock_device_info): mock_device_info.is_smartswitch = mock.Mock(return_value=True) - assert chassis.is_smartswitch() == True + assert chassis.is_smartswitch() == True # noqa: E712 mock_device_info.is_smartswitch = mock.Mock(return_value=False) - assert chassis.is_smartswitch() == False + assert chassis.is_smartswitch() == False # noqa: E712 def test_is_dpu(self, mock_device_info): mock_device_info.is_dpu = mock.Mock(return_value=True) - assert chassis.is_dpu() == True + assert chassis.is_dpu() == True # noqa: E712 mock_device_info.is_dpu = mock.Mock(return_value=False) - assert chassis.is_dpu() == False + assert chassis.is_dpu() == False # noqa: E712 def test_get_num_dpus(self, mock_device_info): mock_device_info.get_num_dpus = mock.Mock(return_value=4) assert chassis.get_num_dpus() == 4 del mock_device_info.get_num_dpus - assert chassis.get_num_dpus() == False + assert chassis.get_num_dpus() == False # noqa: E712 def test_get_dpu_list(self, mock_device_info): mock_device_info.get_dpu_list = mock.Mock(return_value=['dpu1', 'dpu2']) assert chassis.get_dpu_list() == ['dpu1', 'dpu2'] del mock_device_info.get_dpu_list - assert chassis.get_dpu_list() == False + assert chassis.get_dpu_list() == False # noqa: E712 From 2d8b9080edb8b388965201701677c497c831bdb6 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 26 Nov 2024 08:24:07 +0000 Subject: [PATCH 13/26] Add more error handling scenarios and increase more coverage --- scripts/reboot_smartswitch_helper | 20 +++++++++++++++++--- tests/test_reboot_helper.py | 20 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index aa9b2acd7e..7603f37724 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -67,7 +67,7 @@ function pci_detach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "import reboot_helper; reboot_helper.pci_detach_module('${DPU_NAME}')" + python3 -c "from reboot_helper import pci_detach_module; pci_detach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI detach vendor API is not available" echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove @@ -79,7 +79,7 @@ function pci_reattach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "import reboot_helper; reboot_helper.pci_reattach_module('${DPU_NAME}')" + python3 -c "from reboot_helper import pci_reattach_module; pci_reattach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI reattach vendor API is not available" echo 1 > /sys/bus/pci/rescan @@ -168,17 +168,31 @@ function reboot_dpu() # Send reboot command to DPU gnmi_reboot_dpu "${DPU_NAME}" if [ $? -ne 0 ]; then - log_message "Error: Failed to reboot DPU ${DPU_NAME}" + log_message "Error: Failed to send gnoi command to reboot DPU ${DPU_NAME}" return ${EXIT_ERROR} fi DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") + if [ -z "$DPU_BUS_INFO" ]; then + log_message "Error: Failed to retrieve bus info for DPU ${DPU_NAME}" + return ${EXIT_ERROR} + fi # Update STATE_DB and handle PCIe removal and rescan sonic-db-cli STATE_DB set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' pci_detach_module "${DPU_NAME} ${DPU_BUS_INFO}" + if [ $? -ne 0 ]; then + log_message "Error: Failed to detach PCI module for DPU ${DPU_NAME}" + return ${EXIT_ERROR} + fi + reboot_dpu_platform "${DPU_NAME}" + if [ $? -ne 0 ]; then + log_message "Error: Failed to send platform command to reboot DPU ${DPU_NAME}" + return ${EXIT_ERROR} + fi + pci_reattach_module "${DPU_NAME} ${DPU_BUS_INFO}" sonic-db-cli STATE_DB del "PCIE_DETACH_INFO|${DPU_NAME}" diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 9cfe0f1846..2a491b8f9a 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -54,6 +54,11 @@ def test_load_platform_chassis_exception(self, mock_platform): result = reboot_helper.load_platform_chassis() assert not result + def test_load_platform_chassis_none(self, mock_platform_chassis): + mock_platform_chassis.get_chassis.return_value = None + result = reboot_helper.load_platform_chassis() + assert not result + def test_load_and_verify_chassis_fail(self, mock_load_platform_chassis): mock_load_platform_chassis.return_value = False assert not reboot_helper.load_and_verify("DPU1") @@ -95,6 +100,11 @@ def test_reboot_dpu_success(self, mock_load_and_verify, mock_platform_chassis): mock_platform_chassis.reboot.return_value = True assert reboot_helper.reboot_dpu("DPU1", "DPU") + def test_reboot_dpu_no_reboot_method(self, mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + with patch('reboot_helper.hasattr', return_value=False): + assert not reboot_helper.reboot_dpu("DPU1", "DPU") + def test_pci_detach_module_load_and_verify_fail(self, mock_load_and_verify): mock_load_and_verify.return_value = False assert not reboot_helper.pci_detach_module("DPU1") @@ -104,6 +114,11 @@ def test_pci_detach_module_success(self, mock_load_and_verify, mock_platform_cha mock_platform_chassis.pci_detach.return_value = True assert reboot_helper.pci_detach_module("DPU1") + def test_pci_detach_module_no_detach_method(self, mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + with patch('reboot_helper.hasattr', return_value=False): + assert not reboot_helper.pci_detach_module("DPU1") + def test_pci_reattach_module_load_and_verify_fail(self, mock_load_and_verify): mock_load_and_verify.return_value = False assert not reboot_helper.pci_reattach_module("DPU1") @@ -113,6 +128,11 @@ def test_pci_reattach_module_success(self, mock_load_and_verify, mock_platform_c mock_platform_chassis.pci_reattach.return_value = True assert reboot_helper.pci_reattach_module("DPU1") + def test_pci_reattach_module_no_reattach_method(self, mock_load_and_verify, mock_platform_chassis): + mock_load_and_verify.return_value = True + with patch('reboot_helper.hasattr', return_value=False): + assert not reboot_helper.pci_reattach_module("DPU1") + def test_parse_args_reboot_command_fail(self, monkeypatch): # Simulate command-line arguments for reboot monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) From 1a6ef0406e424418feb433014d58e2e0c192ff90 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 26 Nov 2024 09:53:39 +0000 Subject: [PATCH 14/26] parse_args function is not required --- scripts/reboot_helper.py | 30 -------- tests/test_reboot_helper.py | 135 ------------------------------------ 2 files changed, 165 deletions(-) diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 676bbd693a..13dfb75b6e 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -160,33 +160,3 @@ def pci_reattach_module(module_name): except Exception as e: log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) return False - - -def parse_args(): - if len(sys.argv) < 3: - print("Usage: reboot_helper.py [reboot_type]") - sys.exit(EXIT_FAIL) - - command = sys.argv[1] - module_name = sys.argv[2] - - if command == "reboot": - if len(sys.argv) < 4: - print("Usage: reboot_helper.py reboot ") - sys.exit(EXIT_FAIL) - reboot_type = sys.argv[3] - if not reboot_dpu(module_name, reboot_type): - sys.exit(EXIT_FAIL) - elif command == "pci_detach": - if not pci_detach_module(module_name): - sys.exit(EXIT_FAIL) - elif command == "pci_reattach": - if not pci_reattach_module(module_name): - sys.exit(EXIT_FAIL) - else: - print(f"Unknown command: {command}") - sys.exit(EXIT_FAIL) - - -if __name__ == "__main__": - parse_args() diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py index 2a491b8f9a..b6c8c776e7 100644 --- a/tests/test_reboot_helper.py +++ b/tests/test_reboot_helper.py @@ -132,138 +132,3 @@ def test_pci_reattach_module_no_reattach_method(self, mock_load_and_verify, mock mock_load_and_verify.return_value = True with patch('reboot_helper.hasattr', return_value=False): assert not reboot_helper.pci_reattach_module("DPU1") - - def test_parse_args_reboot_command_fail(self, monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) - - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), patch('sys.exit') as mock_exit: - reboot_helper.parse_args() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - def test_parse_args_reboot_command_type_fail(self, monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'UNKNOWN']) - - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), patch('sys.exit') as mock_exit: - - reboot_helper.parse_args() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - def test_parse_args_reboot_command_success(self, monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'DPU']) - - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=True) as mock_reboot_dpu, patch('sys.exit') as mock_exit: - - reboot_helper.parse_args() - - # Check that reboot_module was called - mock_reboot_dpu.assert_called_once_with('DPU1', 'DPU') - mock_exit.assert_not_called() - - def test_parse_args_pci_detach_command_fail(self, monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) - - # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=False), patch('sys.exit') as mock_exit: - - reboot_helper.parse_args() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - def test_parse_args_pci_detach_command_success(self, monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'DPU1']) - - # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=True) as mock_reboot_module, \ - patch('sys.exit') as mock_exit: # noqa: E125 - - reboot_helper.parse_args() - - # Check that pci_detach_module was called - mock_reboot_module.assert_called_once_with('DPU1') - mock_exit.assert_not_called() - - def test_parse_args_pci_reattach_command_fail(self, monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) - - # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=False), patch('sys.exit') as mock_exit: - - reboot_helper.parse_args() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - def test_parse_args_pci_reattach_command_success(self, monkeypatch): - # Simulate command-line arguments for reboot - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'DPU1']) - - # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=True) as mock_reboot_module, \ - patch('sys.exit') as mock_exit: # noqa: E125 - - reboot_helper.parse_args() - - # Check that pci_reattach_module was called - mock_reboot_module.assert_called_once_with('DPU1') - mock_exit.assert_not_called() - - def test_parse_args_unknown_command(self, monkeypatch): - # Simulate an unknown command - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'unknown', 'DPU1']) - - # Mock sys.exit and capture print output - with patch('sys.exit') as mock_exit, patch('builtins.print') as mock_print: - reboot_helper.parse_args() - - # Check that the unknown command message was printed - mock_print.assert_called_with("Unknown command: unknown") - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - def test_parse_args_reboot_invalid_reboot_type(self, monkeypatch): - # Simulate command-line arguments for reboot with invalid reboot_type - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'reboot', 'DPU1', 'INVALID']) - - # Mock dependencies - with patch('reboot_helper.reboot_dpu', return_value=False), patch('sys.exit') as mock_exit: - reboot_helper.parse_args() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - def test_parse_args_pci_detach_invalid_module(self, monkeypatch): - # Simulate command-line arguments for pci_detach with invalid module - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_detach', 'INVALID']) - - # Mock dependencies - with patch('reboot_helper.pci_detach_module', return_value=False), patch('sys.exit') as mock_exit: - - reboot_helper.parse_args() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) - - def test_parse_args_pci_reattach_invalid_module(self, monkeypatch): - # Simulate command-line arguments for pci_reattach with invalid module - monkeypatch.setattr(sys, 'argv', ['reboot_helper.py', 'pci_reattach', 'INVALID']) - - # Mock dependencies - with patch('reboot_helper.pci_reattach_module', return_value=False), patch('sys.exit') as mock_exit: - - reboot_helper.parse_args() - - # Check that sys.exit was called with EXIT_FAIL - mock_exit.assert_called_once_with(reboot_helper.EXIT_FAIL) From ec21d6f3e20b4353e79138c6033b68b186760409 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 26 Nov 2024 12:53:10 +0000 Subject: [PATCH 15/26] Fix indentation --- scripts/reboot_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py index 13dfb75b6e..edbfe17256 100644 --- a/scripts/reboot_helper.py +++ b/scripts/reboot_helper.py @@ -4,7 +4,6 @@ # # Utility helper for reboot within SONiC -import sys import sonic_platform from sonic_py_common import logger from utilities_common.chassis import is_smartswitch, get_dpu_list From 8d59222b664a2f25f4f5a53ad1eca91d25026b67 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Wed, 27 Nov 2024 02:05:21 +0000 Subject: [PATCH 16/26] Address review comments 1. Move module base functions to utilties_common/module_base.py 2. Use load_platform_chassis from utilities_common/util_base.py 3. Write unit tests accordingly --- scripts/reboot_helper.py | 161 ------------------------------ scripts/reboot_smartswitch_helper | 20 ++-- setup.py | 1 - tests/test_module_base.py | 67 +++++++++++++ tests/test_reboot_helper.py | 134 ------------------------- utilities_common/module_base.py | 109 ++++++++++++++++++++ 6 files changed, 185 insertions(+), 307 deletions(-) delete mode 100644 scripts/reboot_helper.py create mode 100644 tests/test_module_base.py delete mode 100644 tests/test_reboot_helper.py create mode 100644 utilities_common/module_base.py diff --git a/scripts/reboot_helper.py b/scripts/reboot_helper.py deleted file mode 100644 index edbfe17256..0000000000 --- a/scripts/reboot_helper.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python3 -# -# reboot_helper.py -# -# Utility helper for reboot within SONiC - -import sonic_platform -from sonic_py_common import logger -from utilities_common.chassis import is_smartswitch, get_dpu_list - -SYSLOG_IDENTIFIER = "reboot_helper" - -EXIT_FAIL = -1 -EXIT_SUCCESS = 0 - -# Global logger instance -log = logger.Logger(SYSLOG_IDENTIFIER) - -# Global variable for platform chassis -platform_chassis = None - - -def load_platform_chassis(): - """ - Load the platform chassis using the SONiC platform API. - - This function attempts to instantiate the platform chassis object. - If successful, it sets the global variable `platform_chassis` to the instantiated object. - - Returns: - bool: True if the platform chassis is successfully loaded, False otherwise. - """ - global platform_chassis - - # Load new platform API class - try: - platform_chassis = sonic_platform.platform.Platform().get_chassis() - if platform_chassis is None: - log.log_error("Platform chassis is not loaded") - return False - return True - except Exception as e: - log.log_error("Failed to instantiate Chassis: {}".format(str(e))) - return False - - -def load_and_verify(module_name): - """ - Load the platform chassis and verify the required parameters. - - Args: - module_name (str): The name of the module to verify. - - Returns: - bool: True if platform chassis is successfully loaded and required parameters are verified, False otherwise. - """ - # Load the platform chassis if not already loaded - if not load_platform_chassis(): - return False - - if not is_smartswitch(): - log.log_error("Platform is not a smartswitch") - return False - - dpu_list = get_dpu_list() - if module_name.lower() not in dpu_list: - log.log_error("Module {} not found".format(module_name)) - return False - - return True - - -def reboot_dpu(module_name, reboot_type): - """ - Reboot the specified module by invoking the platform API. - - Args: - module_name (str): The name of the module to reboot. - reboot_type (str): The type of reboot requested for the module. - - Returns: - bool: True if the reboot command was successfully sent, False otherwise. - """ - if not load_and_verify(module_name): - return False - - # Attempt to reboot the module - if not hasattr(platform_chassis, 'reboot'): - log.log_error("Reboot method not found in platform chassis") - return False - - log.log_info("Rebooting module {} with reboot_type {}...".format(module_name, reboot_type)) - try: - status = platform_chassis.reboot(module_name, reboot_type) - if not status: - log.log_error("Reboot status for module {}: {}".format(module_name, status)) - return False - return True - except Exception as e: - log.log_error("Unexpected error occurred while rebooting module {}: {}".format(module_name, e)) - return False - - -def pci_detach_module(module_name): - """ - Detach the specified module by invoking the platform API. - - Args: - module_name (str): The name of the module to detach. - - Returns: - bool: True if the detach command was successfully sent, False otherwise. - """ - if not load_and_verify(module_name): - return False - - # Attempt to detach the module - if not hasattr(platform_chassis, 'pci_detach'): - log.log_error("PCI detach method not found in platform chassis") - return False - - log.log_info("Detaching module {}...".format(module_name)) - try: - status = platform_chassis.pci_detach(module_name) - if not status: - log.log_error("PCI detach status for module {}: {}".format(module_name, status)) - return False - return True - except Exception as e: - log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) - return False - - -def pci_reattach_module(module_name): - """ - Rescan the specified module by invoking the platform API. - - Args: - module_name (str): The name of the module to rescan. - - Returns: - bool: True if the rescan command was successfully sent, False otherwise. - """ - if not load_and_verify(module_name): - return False - - # Attempt to detach the module - if not hasattr(platform_chassis, 'pci_reattach'): - log.log_error("PCI reattach method not found in platform chassis") - return False - - log.log_info("Rescaning module {}...".format(module_name)) - try: - status = platform_chassis.pci_reattach(module_name) - if not status: - log.log_error("PCI detach status for module {}: {}".format(module_name, status)) - return False - return True - except Exception as e: - log.log_error("Unexpected error occurred while detaching module {}: {}".format(module_name, e)) - return False diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index 7603f37724..785d0f80c4 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -67,7 +67,7 @@ function pci_detach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "from reboot_helper import pci_detach_module; pci_detach_module('${DPU_NAME}')" + python3 -c "from utilities_common.module_base import ModuleHelper; ModuleHelper.pci_detach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI detach vendor API is not available" echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove @@ -79,7 +79,7 @@ function pci_reattach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "from reboot_helper import pci_reattach_module; pci_reattach_module('${DPU_NAME}')" + python3 -c "from utilties_common.module_base import ModuleHelper; ModuleHelper.pci_reattach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI reattach vendor API is not available" echo 1 > /sys/bus/pci/rescan @@ -91,7 +91,7 @@ function reboot_dpu_platform() { local DPU_NAME=$1 local REBOOT_TYPE=$2 - python3 -c "import reboot_helper; reboot_helper.reboot_dpu('${DPU_NAME}', '${REBOOT_TYPE}')" + python3 -c "rom utilties_common.module_base import ModuleHelper; ModuleHelper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}')" } # Function to wait for DPU reboot status @@ -105,8 +105,7 @@ function wait_for_dpu_reboot_status() exit $EXIT_ERROR fi - local dpu_halt_services_timeou - dpu_halt_services_timeout=$(jq -r '.dpu_halt_services_timeout' "$PLATFORM_JSON_PATH" 2>/dev/null) + local dpu_halt_services_timeout=$(jq -r '.dpu_halt_services_timeout' "$PLATFORM_JSON_PATH" 2>/dev/null) if [ $? -ne 0 ]; then log_message "Error: Failed to retrieve dpu_halt_services_timeout from ${PLATFORM_JSON_PATH}" return ${EXIT_ERROR} @@ -169,11 +168,10 @@ function reboot_dpu() gnmi_reboot_dpu "${DPU_NAME}" if [ $? -ne 0 ]; then log_message "Error: Failed to send gnoi command to reboot DPU ${DPU_NAME}" - return ${EXIT_ERROR} fi - DPU_BUS_INFO=$(jq -r --arg DPU_NAME "${DPU_NAME}" '.DPUS[] | select(has($DPU_NAME)) | .[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") - if [ -z "$DPU_BUS_INFO" ]; then + local DPU_BUS_INFO=$(jq -r --arg DPU_NAME "$DPU_NAME" '.DPUS[$DPU_NAME].bus_info' "$PLATFORM_JSON_PATH") + if [ -z "$DPU_BUS_INFO" ] || [ "$DPU_BUS_INFO" = "null" ]; then log_message "Error: Failed to retrieve bus info for DPU ${DPU_NAME}" return ${EXIT_ERROR} fi @@ -181,19 +179,19 @@ function reboot_dpu() # Update STATE_DB and handle PCIe removal and rescan sonic-db-cli STATE_DB set "PCIE_DETACH_INFO|${DPU_NAME}" '{"dpu_id": "'${DPU_INDEX}'", "dpu_state": "detaching", "bus_info": "'${DPU_BUS_INFO}'"}' - pci_detach_module "${DPU_NAME} ${DPU_BUS_INFO}" + pci_detach_module ${DPU_NAME} ${DPU_BUS_INFO} if [ $? -ne 0 ]; then log_message "Error: Failed to detach PCI module for DPU ${DPU_NAME}" return ${EXIT_ERROR} fi - reboot_dpu_platform "${DPU_NAME}" + reboot_dpu_platform ${DPU_NAME} if [ $? -ne 0 ]; then log_message "Error: Failed to send platform command to reboot DPU ${DPU_NAME}" return ${EXIT_ERROR} fi - pci_reattach_module "${DPU_NAME} ${DPU_BUS_INFO}" + pci_reattach_module ${DPU_NAME} ${DPU_BUS_INFO} sonic-db-cli STATE_DB del "PCIE_DETACH_INFO|${DPU_NAME}" } diff --git a/setup.py b/setup.py index 2deb44a39e..3815f19a9f 100644 --- a/setup.py +++ b/setup.py @@ -163,7 +163,6 @@ 'scripts/queuestat', 'scripts/reboot', 'scripts/reboot_smartswitch_helper', - 'scripts/reboot_helper.py', 'scripts/route_check.py', 'scripts/route_check_test.sh', 'scripts/vnet_route_check.py', diff --git a/tests/test_module_base.py b/tests/test_module_base.py new file mode 100644 index 0000000000..d864db7e88 --- /dev/null +++ b/tests/test_module_base.py @@ -0,0 +1,67 @@ +import sys +import pytest +from unittest import mock +from utilities_common.util_base import UtilHelper +from utilities_common.module_base import ModuleHelper, INVALID_MODULE_INDEX + +sys.modules['sonic_platform'] = mock.MagicMock() + +util = UtilHelper() +module_helper = ModuleHelper() + + +class TestModuleHelper: + @pytest.fixture + def mock_load_platform_chassis(self): + with mock.patch('utilities_common.module_base.util.load_platform_chassis') as mock_load_platform_chassis: + yield mock_load_platform_chassis + + @pytest.fixture + def mock_try_get(self): + with mock.patch('utilities_common.module_base.util.try_get') as mock_try_get: + yield mock_try_get + + def test_reboot_module_success(self, mock_load_platform_chassis, mock_try_get): + mock_try_get.return_value = True + mock_load_platform_chassis.return_value.get_module_index.return_value = 1 + mock_load_platform_chassis.return_value.get_module.return_value.reboot.return_value = True + + result = module_helper.reboot_module("DPU1", "cold") + assert result is True + + def test_reboot_module_invalid_index(self, mock_load_platform_chassis, mock_try_get): + mock_try_get.return_value = INVALID_MODULE_INDEX + mock_load_platform_chassis.return_value.get_module_index.return_value = INVALID_MODULE_INDEX + + result = module_helper.reboot_module("DPU1", "cold") + assert result is False + + def test_pci_detach_module_success(self, mock_load_platform_chassis, mock_try_get): + mock_try_get.return_value = True + mock_load_platform_chassis.return_value.get_module_index.return_value = 1 + mock_load_platform_chassis.return_value.get_module.return_value.pci_detach.return_value = True + + result = module_helper.pci_detach_module("DPU1") + assert result is True + + def test_pci_detach_module_invalid_index(self, mock_load_platform_chassis, mock_try_get): + mock_try_get.return_value = INVALID_MODULE_INDEX + mock_load_platform_chassis.return_value.get_module_index.return_value = INVALID_MODULE_INDEX + + result = module_helper.pci_detach_module("DPU1") + assert result is False + + def test_pci_reattach_module_success(self, mock_load_platform_chassis, mock_try_get): + mock_try_get.return_value = True + mock_load_platform_chassis.return_value.get_module_index.return_value = 1 + mock_load_platform_chassis.return_value.get_module.return_value.pci_reattach.return_value = True + + result = module_helper.pci_reattach_module("DPU1") + assert result is True + + def test_pci_reattach_module_invalid_index(self, mock_load_platform_chassis, mock_try_get): + mock_try_get.return_value = INVALID_MODULE_INDEX + mock_load_platform_chassis.return_value.get_module_index.return_value = INVALID_MODULE_INDEX + + result = module_helper.pci_reattach_module("DPU1") + assert result is False diff --git a/tests/test_reboot_helper.py b/tests/test_reboot_helper.py deleted file mode 100644 index b6c8c776e7..0000000000 --- a/tests/test_reboot_helper.py +++ /dev/null @@ -1,134 +0,0 @@ -# tests/test_reboot_helper.py -import sys -import pytest -from unittest.mock import patch, MagicMock, mock_open - -sys.modules['sonic_platform'] = MagicMock() - -sys.path.append("scripts") -import reboot_helper # noqa: E402 - - -class TestRebootHelper(object): - @pytest.fixture(scope='class') - def mock_load_platform_chassis(self): - with patch('reboot_helper.load_platform_chassis', return_value=True) as mock: - yield mock - - @pytest.fixture(scope='class') - def mock_load_and_verify(self): - with patch('reboot_helper.load_and_verify', return_value=True) as mock: - yield mock - - @pytest.fixture(scope='class') - def mock_is_smartswitch(self): - with patch('reboot_helper.is_smartswitch', return_value=True) as mock: - yield mock - - @pytest.fixture(scope='class') - def mock_open_file(self): - return mock_open() - - @pytest.fixture(scope='class') - def mock_platform_chassis(self): - with patch('reboot_helper.platform_chassis') as mock: - yield mock - - @pytest.fixture(scope='class') - def mock_platform(self): - with patch('reboot_helper.sonic_platform.platform.Platform') as mock: - yield mock - - @pytest.fixture(scope='class') - def mock_get_dpu_list(self): - with patch('reboot_helper.get_dpu_list', return_value=["dpu1"]) as mock: - yield mock - - def test_load_platform_chassis_success(self, mock_platform_chassis): - mock_platform_chassis.get_chassis.return_value = MagicMock() - result = reboot_helper.load_platform_chassis() - assert result - - def test_load_platform_chassis_exception(self, mock_platform): - mock_platform.side_effect = RuntimeError - result = reboot_helper.load_platform_chassis() - assert not result - - def test_load_platform_chassis_none(self, mock_platform_chassis): - mock_platform_chassis.get_chassis.return_value = None - result = reboot_helper.load_platform_chassis() - assert not result - - def test_load_and_verify_chassis_fail(self, mock_load_platform_chassis): - mock_load_platform_chassis.return_value = False - assert not reboot_helper.load_and_verify("DPU1") - - def test_load_and_verify_not_smartswitch(self, mock_is_smartswitch, mock_load_platform_chassis): - mock_is_smartswitch.return_value = False - mock_load_platform_chassis.return_value = True - assert not reboot_helper.load_and_verify("DPU1") - - def test_load_and_verify_not_found(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_dpu_list.return_value = ["dpu1"] - - assert not reboot_helper.load_and_verify("DPU2") - - def test_load_and_verify_empty_dpu_list(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_dpu_list.return_value = [] - - assert not reboot_helper.load_and_verify("DPU1") - - def test_load_and_verify_success(self, mock_is_smartswitch, mock_load_platform_chassis, mock_get_dpu_list): - mock_is_smartswitch.return_value = True - mock_load_platform_chassis.return_value = True - mock_get_dpu_list.return_value = ["dpu1"] - - result = reboot_helper.load_and_verify("DPU1") - assert result - - def test_reboot_dpu_load_and_verify_fail(self, mock_load_and_verify): - mock_load_and_verify.return_value = False - result = reboot_helper.reboot_dpu("DPU1", "DPU") - assert not result - - def test_reboot_dpu_success(self, mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.reboot.return_value = True - assert reboot_helper.reboot_dpu("DPU1", "DPU") - - def test_reboot_dpu_no_reboot_method(self, mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - with patch('reboot_helper.hasattr', return_value=False): - assert not reboot_helper.reboot_dpu("DPU1", "DPU") - - def test_pci_detach_module_load_and_verify_fail(self, mock_load_and_verify): - mock_load_and_verify.return_value = False - assert not reboot_helper.pci_detach_module("DPU1") - - def test_pci_detach_module_success(self, mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.pci_detach.return_value = True - assert reboot_helper.pci_detach_module("DPU1") - - def test_pci_detach_module_no_detach_method(self, mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - with patch('reboot_helper.hasattr', return_value=False): - assert not reboot_helper.pci_detach_module("DPU1") - - def test_pci_reattach_module_load_and_verify_fail(self, mock_load_and_verify): - mock_load_and_verify.return_value = False - assert not reboot_helper.pci_reattach_module("DPU1") - - def test_pci_reattach_module_success(self, mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - mock_platform_chassis.pci_reattach.return_value = True - assert reboot_helper.pci_reattach_module("DPU1") - - def test_pci_reattach_module_no_reattach_method(self, mock_load_and_verify, mock_platform_chassis): - mock_load_and_verify.return_value = True - with patch('reboot_helper.hasattr', return_value=False): - assert not reboot_helper.pci_reattach_module("DPU1") diff --git a/utilities_common/module_base.py b/utilities_common/module_base.py new file mode 100644 index 0000000000..eb87f0f44a --- /dev/null +++ b/utilities_common/module_base.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# +# module_base.py +# +# Utility helper for modules within SONiC + +from sonic_py_common import logger +from utilities_common.util_base import UtilHelper + +SYSLOG_IDENTIFIER = "module_base" +NOT_AVAILABLE = 'N/A' +INVALID_MODULE_INDEX = -1 + +# Load the UtilHelper class +util = UtilHelper() + +# Global logger instance +log = logger.Logger(SYSLOG_IDENTIFIER) + + +class ModuleHelper: + def __init__(self): + # Global variable for platform chassis + self.platform_chassis = util.load_platform_chassis() + if not self.platform_chassis: + log.log_error("Failed to load platform chassis") + return False + + def reboot_module(self, module_name, reboot_type): + """ + Reboot the specified module by invoking the platform API. + + Args: + module_name (str): The name of the module to reboot. + reboot_type (str): The type of reboot requested for the module. + + Returns: + bool: True if the reboot command was successfully sent, False otherwise. + """ + module_index = util.try_get(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + if module_index < 0: + log.log_error("Unable to get module-index for {}". format(module_name)) + return False + + if not hasattr(self.platform_chassis.get_module(module_index), 'reboot'): + log.log_error("Reboot method not found in platform chassis") + return False + + log.log_info("Rebooting module {} with reboot_type {}...".format(module_name, reboot_type)) + status = util.try_get(self.platform_chassis.get_module(module_index).reboot, reboot_type, False) + if not status: + log.log_error("Reboot status for module {}: {}".format(module_name, status)) + return False + + return True + + def pci_detach_module(self, module_name): + """ + Detach the specified module by invoking the platform API. + + Args: + module_name (str): The name of the module to detach. + + Returns: + bool: True if the detach command was successfully sent, False otherwise. + """ + module_index = util.try_get(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + if module_index < 0: + log.log_error("Unable to get module-index for {}". format(module_name)) + return False + + if not hasattr(self.platform_chassis.get_module(module_index), 'pci_detach'): + log.log_error("PCI detach method not found in platform chassis") + return False + + log.log_info("Detaching module {}...".format(module_name)) + status = util.try_get(self.platform_chassis.get_module(module_index).pci_detach, False) + if not status: + log.log_error("PCI detach status for module {}: {}".format(module_name, status)) + return False + + return True + + def pci_reattach_module(self, module_name): + """ + Rescan the specified module by invoking the platform API. + + Args: + module_name (str): The name of the module to rescan. + + Returns: + bool: True if the rescan command was successfully sent, False otherwise. + """ + module_index = util.try_get(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + if module_index < 0: + log.log_error("Unable to get module-index for {}". format(module_name)) + return False + + if not hasattr(self.platform_chassis.get_module(module_index), 'pci_reattach'): + log.log_error("PCI reattach method not found in platform chassis") + return False + + log.log_info("Rescanning module {}...".format(module_name)) + status = util.try_get(self.platform_chassis.get_module(module_index).pci_reattach, False) + if not status: + log.log_error("PCI rescan status for module {}: {}".format(module_name, status)) + return False + + return True From 98406c7c8bb0e9f6520f44f78b58401755fe2fe6 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Wed, 27 Nov 2024 13:23:01 +0000 Subject: [PATCH 17/26] Increase code coverage --- tests/test_module_base.py | 16 ++++++++++++++++ utilities_common/module_base.py | 1 - 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/test_module_base.py b/tests/test_module_base.py index d864db7e88..ffac0c71d4 100644 --- a/tests/test_module_base.py +++ b/tests/test_module_base.py @@ -21,6 +21,22 @@ def mock_try_get(self): with mock.patch('utilities_common.module_base.util.try_get') as mock_try_get: yield mock_try_get + @pytest.fixture + def mock_log_error(self): + with mock.patch('utilities_common.module_base.log.log_error') as mock_log_error: + yield mock_log_error + + def test_init_success(self, mock_load_platform_chassis): + mock_load_platform_chassis.return_value = mock.MagicMock() + module_helper = ModuleHelper() + assert module_helper.platform_chassis is not None + + def test_init_failure(self, mock_load_platform_chassis, mock_log_error): + mock_load_platform_chassis.return_value = None + module_helper = ModuleHelper() + mock_log_error.assert_called_once_with("Failed to load platform chassis") + assert module_helper.platform_chassis is None + def test_reboot_module_success(self, mock_load_platform_chassis, mock_try_get): mock_try_get.return_value = True mock_load_platform_chassis.return_value.get_module_index.return_value = 1 diff --git a/utilities_common/module_base.py b/utilities_common/module_base.py index eb87f0f44a..510e8aca88 100644 --- a/utilities_common/module_base.py +++ b/utilities_common/module_base.py @@ -24,7 +24,6 @@ def __init__(self): self.platform_chassis = util.load_platform_chassis() if not self.platform_chassis: log.log_error("Failed to load platform chassis") - return False def reboot_module(self, module_name, reboot_type): """ From a6f771e657e20a1d2867024a8ece1ef0c97367fc Mon Sep 17 00:00:00 2001 From: Vasundhara Volam <163894573+vvolam@users.noreply.github.com> Date: Wed, 27 Nov 2024 17:26:57 -0800 Subject: [PATCH 18/26] Update scripts/reboot_smartswitch_helper Co-authored-by: Gagan Punathil Ellath --- scripts/reboot_smartswitch_helper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index 785d0f80c4..c34d26fcff 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -79,7 +79,7 @@ function pci_reattach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "from utilties_common.module_base import ModuleHelper; ModuleHelper.pci_reattach_module('${DPU_NAME}')" + python3 -c "from utilities_common.module_base import ModuleHelper; ModuleHelper.pci_reattach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI reattach vendor API is not available" echo 1 > /sys/bus/pci/rescan From a3f8af7e26b37fb380aad0fa0e471ac0d3ccacbe Mon Sep 17 00:00:00 2001 From: Vasundhara Volam <163894573+vvolam@users.noreply.github.com> Date: Wed, 27 Nov 2024 17:27:32 -0800 Subject: [PATCH 19/26] Update scripts/reboot_smartswitch_helper Co-authored-by: Gagan Punathil Ellath --- scripts/reboot_smartswitch_helper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index c34d26fcff..4e6e35685d 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -91,7 +91,7 @@ function reboot_dpu_platform() { local DPU_NAME=$1 local REBOOT_TYPE=$2 - python3 -c "rom utilties_common.module_base import ModuleHelper; ModuleHelper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}')" + python3 -c "from utilities_common.module_base import ModuleHelper; ModuleHelper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}')" } # Function to wait for DPU reboot status From 36ecf1b1629215128d24df9cb258a8e01b1a04cc Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 28 Nov 2024 03:21:24 +0000 Subject: [PATCH 20/26] Rename module_base.py to module.py Instantiate ModuleHelper() before calling its functions --- scripts/reboot_smartswitch_helper | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index 4e6e35685d..7d9275ac68 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -67,7 +67,7 @@ function pci_detach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "from utilities_common.module_base import ModuleHelper; ModuleHelper.pci_detach_module('${DPU_NAME}')" + python3 -c "from utilities_common.module_base import ModuleHelper; helper = ModuleHelper(); helper.pci_detach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI detach vendor API is not available" echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove @@ -79,7 +79,7 @@ function pci_reattach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "from utilities_common.module_base import ModuleHelper; ModuleHelper.pci_reattach_module('${DPU_NAME}')" + python3 -c "from utilities_common.module_base import ModuleHelper; helper = ModuleHelper(); helper.pci_reattach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI reattach vendor API is not available" echo 1 > /sys/bus/pci/rescan @@ -91,7 +91,11 @@ function reboot_dpu_platform() { local DPU_NAME=$1 local REBOOT_TYPE=$2 - python3 -c "from utilities_common.module_base import ModuleHelper; ModuleHelper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}')" + python3 -c " + from utilities_common.module_base import ModuleHelper + helper = ModuleHelper() + helper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}') + " } # Function to wait for DPU reboot status From 67e78178e3cd27f4e8b12399c3dd06f1648bd5c1 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 28 Nov 2024 04:21:40 +0000 Subject: [PATCH 21/26] Committing missed files in previous commit --- scripts/reboot_smartswitch_helper | 6 +++--- tests/{test_module_base.py => test_module.py} | 2 +- utilities_common/{module_base.py => module.py} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename tests/{test_module_base.py => test_module.py} (97%) rename utilities_common/{module_base.py => module.py} (100%) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index 7d9275ac68..ffdd29f935 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -67,7 +67,7 @@ function pci_detach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "from utilities_common.module_base import ModuleHelper; helper = ModuleHelper(); helper.pci_detach_module('${DPU_NAME}')" + python3 -c "from utilities_common.module import ModuleHelper; helper = ModuleHelper(); helper.pci_detach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI detach vendor API is not available" echo 1 > /sys/bus/pci/devices/${DPU_BUS_INFO}/remove @@ -79,7 +79,7 @@ function pci_reattach_module() { local DPU_NAME=$1 local DPU_BUS_INFO=$2 - python3 -c "from utilities_common.module_base import ModuleHelper; helper = ModuleHelper(); helper.pci_reattach_module('${DPU_NAME}')" + python3 -c "from utilities_common.module import ModuleHelper; helper = ModuleHelper(); helper.pci_reattach_module('${DPU_NAME}')" if [ $? -ne 0 ]; then log_message "Error: PCI reattach vendor API is not available" echo 1 > /sys/bus/pci/rescan @@ -92,7 +92,7 @@ function reboot_dpu_platform() local DPU_NAME=$1 local REBOOT_TYPE=$2 python3 -c " - from utilities_common.module_base import ModuleHelper + from utilities_common.module import ModuleHelper helper = ModuleHelper() helper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}') " diff --git a/tests/test_module_base.py b/tests/test_module.py similarity index 97% rename from tests/test_module_base.py rename to tests/test_module.py index ffac0c71d4..8bdf598661 100644 --- a/tests/test_module_base.py +++ b/tests/test_module.py @@ -2,7 +2,7 @@ import pytest from unittest import mock from utilities_common.util_base import UtilHelper -from utilities_common.module_base import ModuleHelper, INVALID_MODULE_INDEX +from utilities_common.module import ModuleHelper, INVALID_MODULE_INDEX sys.modules['sonic_platform'] = mock.MagicMock() diff --git a/utilities_common/module_base.py b/utilities_common/module.py similarity index 100% rename from utilities_common/module_base.py rename to utilities_common/module.py From 88af21da0ac7a48a21d293efa0a79994527e53c9 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 28 Nov 2024 04:55:17 +0000 Subject: [PATCH 22/26] Define a new try_get_args() which takes arguments as inputs --- tests/test_module.py | 33 ++++++++++++++++++++------------- utilities_common/module.py | 26 ++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/tests/test_module.py b/tests/test_module.py index 8bdf598661..2033fa84d4 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -13,17 +13,22 @@ class TestModuleHelper: @pytest.fixture def mock_load_platform_chassis(self): - with mock.patch('utilities_common.module_base.util.load_platform_chassis') as mock_load_platform_chassis: + with mock.patch('utilities_common.module.util.load_platform_chassis') as mock_load_platform_chassis: yield mock_load_platform_chassis @pytest.fixture def mock_try_get(self): - with mock.patch('utilities_common.module_base.util.try_get') as mock_try_get: + with mock.patch('utilities_common.module.util.try_get') as mock_try_get: yield mock_try_get + @pytest.fixture + def mock_try_get_args(self): + with mock.patch.object(ModuleHelper, 'try_get_args') as mock_try_get_args: + yield mock_try_get_args + @pytest.fixture def mock_log_error(self): - with mock.patch('utilities_common.module_base.log.log_error') as mock_log_error: + with mock.patch('utilities_common.module.log.log_error') as mock_log_error: yield mock_log_error def test_init_success(self, mock_load_platform_chassis): @@ -37,22 +42,23 @@ def test_init_failure(self, mock_load_platform_chassis, mock_log_error): mock_log_error.assert_called_once_with("Failed to load platform chassis") assert module_helper.platform_chassis is None - def test_reboot_module_success(self, mock_load_platform_chassis, mock_try_get): - mock_try_get.return_value = True + def test_reboot_module_success(self, mock_load_platform_chassis, mock_try_get_args): + mock_try_get_args.return_value = True mock_load_platform_chassis.return_value.get_module_index.return_value = 1 mock_load_platform_chassis.return_value.get_module.return_value.reboot.return_value = True result = module_helper.reboot_module("DPU1", "cold") assert result is True - def test_reboot_module_invalid_index(self, mock_load_platform_chassis, mock_try_get): - mock_try_get.return_value = INVALID_MODULE_INDEX + def test_reboot_module_invalid_index(self, mock_load_platform_chassis, mock_try_get_args): + mock_try_get_args.return_value = INVALID_MODULE_INDEX mock_load_platform_chassis.return_value.get_module_index.return_value = INVALID_MODULE_INDEX result = module_helper.reboot_module("DPU1", "cold") assert result is False - def test_pci_detach_module_success(self, mock_load_platform_chassis, mock_try_get): + def test_pci_detach_module_success(self, mock_load_platform_chassis, mock_try_get_args, mock_try_get): + mock_try_get_args.return_value = True mock_try_get.return_value = True mock_load_platform_chassis.return_value.get_module_index.return_value = 1 mock_load_platform_chassis.return_value.get_module.return_value.pci_detach.return_value = True @@ -60,14 +66,15 @@ def test_pci_detach_module_success(self, mock_load_platform_chassis, mock_try_ge result = module_helper.pci_detach_module("DPU1") assert result is True - def test_pci_detach_module_invalid_index(self, mock_load_platform_chassis, mock_try_get): - mock_try_get.return_value = INVALID_MODULE_INDEX + def test_pci_detach_module_invalid_index(self, mock_load_platform_chassis, mock_try_get_args): + mock_try_get_args.return_value = INVALID_MODULE_INDEX mock_load_platform_chassis.return_value.get_module_index.return_value = INVALID_MODULE_INDEX result = module_helper.pci_detach_module("DPU1") assert result is False - def test_pci_reattach_module_success(self, mock_load_platform_chassis, mock_try_get): + def test_pci_reattach_module_success(self, mock_load_platform_chassis, mock_try_get_args, mock_try_get): + mock_try_get_args.return_value = True mock_try_get.return_value = True mock_load_platform_chassis.return_value.get_module_index.return_value = 1 mock_load_platform_chassis.return_value.get_module.return_value.pci_reattach.return_value = True @@ -75,8 +82,8 @@ def test_pci_reattach_module_success(self, mock_load_platform_chassis, mock_try_ result = module_helper.pci_reattach_module("DPU1") assert result is True - def test_pci_reattach_module_invalid_index(self, mock_load_platform_chassis, mock_try_get): - mock_try_get.return_value = INVALID_MODULE_INDEX + def test_pci_reattach_module_invalid_index(self, mock_load_platform_chassis, mock_try_get_args): + mock_try_get_args.return_value = INVALID_MODULE_INDEX mock_load_platform_chassis.return_value.get_module_index.return_value = INVALID_MODULE_INDEX result = module_helper.pci_reattach_module("DPU1") diff --git a/utilities_common/module.py b/utilities_common/module.py index 510e8aca88..63d6bb4f0f 100644 --- a/utilities_common/module.py +++ b/utilities_common/module.py @@ -25,6 +25,24 @@ def __init__(self): if not self.platform_chassis: log.log_error("Failed to load platform chassis") + def try_get_args(self, callback, *args, **kwargs): + """ + Handy function to invoke the callback and catch NotImplementedError + :param callback: Callback to be invoked + :param args: Arguments to be passed to callback + :param kwargs: Default return value if exception occur + :return: Default return value if exception occur else return value of the callback + """ + default = kwargs.get('default', NOT_AVAILABLE) + try: + ret = callback(*args) + if ret is None: + ret = default + except NotImplementedError: + ret = default + + return ret + def reboot_module(self, module_name, reboot_type): """ Reboot the specified module by invoking the platform API. @@ -36,7 +54,7 @@ def reboot_module(self, module_name, reboot_type): Returns: bool: True if the reboot command was successfully sent, False otherwise. """ - module_index = util.try_get(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) if module_index < 0: log.log_error("Unable to get module-index for {}". format(module_name)) return False @@ -46,7 +64,7 @@ def reboot_module(self, module_name, reboot_type): return False log.log_info("Rebooting module {} with reboot_type {}...".format(module_name, reboot_type)) - status = util.try_get(self.platform_chassis.get_module(module_index).reboot, reboot_type, False) + status = self.try_get_args(self.platform_chassis.get_module(module_index).reboot, reboot_type, False) if not status: log.log_error("Reboot status for module {}: {}".format(module_name, status)) return False @@ -63,7 +81,7 @@ def pci_detach_module(self, module_name): Returns: bool: True if the detach command was successfully sent, False otherwise. """ - module_index = util.try_get(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) if module_index < 0: log.log_error("Unable to get module-index for {}". format(module_name)) return False @@ -90,7 +108,7 @@ def pci_reattach_module(self, module_name): Returns: bool: True if the rescan command was successfully sent, False otherwise. """ - module_index = util.try_get(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) if module_index < 0: log.log_error("Unable to get module-index for {}". format(module_name)) return False From e4bcc950d14860e55b623356cad304ea4f46b506 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 3 Dec 2024 17:42:50 +0000 Subject: [PATCH 23/26] Fix some arguments Increase code coverage as well. --- scripts/reboot_smartswitch_helper | 8 ++------ tests/test_module.py | 21 +++++++++++++++++++++ utilities_common/module.py | 6 +++--- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/scripts/reboot_smartswitch_helper b/scripts/reboot_smartswitch_helper index ffdd29f935..4ced30c335 100644 --- a/scripts/reboot_smartswitch_helper +++ b/scripts/reboot_smartswitch_helper @@ -91,11 +91,7 @@ function reboot_dpu_platform() { local DPU_NAME=$1 local REBOOT_TYPE=$2 - python3 -c " - from utilities_common.module import ModuleHelper - helper = ModuleHelper() - helper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}') - " + python3 -c "from utilities_common.module import ModuleHelper; helper = ModuleHelper(); helper.reboot_module('${DPU_NAME}', '${REBOOT_TYPE}')" } # Function to wait for DPU reboot status @@ -189,7 +185,7 @@ function reboot_dpu() return ${EXIT_ERROR} fi - reboot_dpu_platform ${DPU_NAME} + reboot_dpu_platform ${DPU_NAME} ${REBOOT_TYPE} if [ $? -ne 0 ]; then log_message "Error: Failed to send platform command to reboot DPU ${DPU_NAME}" return ${EXIT_ERROR} diff --git a/tests/test_module.py b/tests/test_module.py index 2033fa84d4..71ba28be2e 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -31,6 +31,27 @@ def mock_log_error(self): with mock.patch('utilities_common.module.log.log_error') as mock_log_error: yield mock_log_error + def test_try_get_args_success(self): + def mock_callback(arg): + return arg + + result = module_helper.try_get_args(mock_callback, "test_arg") + assert result == "test_arg" + + def test_try_get_args_none_return(self): + def mock_callback(arg): + return None + + result = module_helper.try_get_args(mock_callback, "test_arg", default="default_value") + assert result == "default_value" + + def test_try_get_args_not_implemented_error(self): + def mock_callback(arg): + raise NotImplementedError + + result = module_helper.try_get_args(mock_callback, "test_arg", default="default_value") + assert result == "default_value" + def test_init_success(self, mock_load_platform_chassis): mock_load_platform_chassis.return_value = mock.MagicMock() module_helper = ModuleHelper() diff --git a/utilities_common/module.py b/utilities_common/module.py index 63d6bb4f0f..b9756bf219 100644 --- a/utilities_common/module.py +++ b/utilities_common/module.py @@ -64,7 +64,7 @@ def reboot_module(self, module_name, reboot_type): return False log.log_info("Rebooting module {} with reboot_type {}...".format(module_name, reboot_type)) - status = self.try_get_args(self.platform_chassis.get_module(module_index).reboot, reboot_type, False) + status = self.try_get_args(self.platform_chassis.get_module(module_index).reboot, reboot_type, default=False) if not status: log.log_error("Reboot status for module {}: {}".format(module_name, status)) return False @@ -91,7 +91,7 @@ def pci_detach_module(self, module_name): return False log.log_info("Detaching module {}...".format(module_name)) - status = util.try_get(self.platform_chassis.get_module(module_index).pci_detach, False) + status = util.try_get(self.platform_chassis.get_module(module_index).pci_detach, default=False) if not status: log.log_error("PCI detach status for module {}: {}".format(module_name, status)) return False @@ -118,7 +118,7 @@ def pci_reattach_module(self, module_name): return False log.log_info("Rescanning module {}...".format(module_name)) - status = util.try_get(self.platform_chassis.get_module(module_index).pci_reattach, False) + status = util.try_get(self.platform_chassis.get_module(module_index).pci_reattach, default=False) if not status: log.log_error("PCI rescan status for module {}: {}".format(module_name, status)) return False From edaa0dee02cec496a6fc82e53891d4b6712bc04c Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 3 Dec 2024 17:56:12 +0000 Subject: [PATCH 24/26] Exit the reboot script after completing DPU reboot --- scripts/reboot | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/reboot b/scripts/reboot index 08e85b751f..f48fd14c8c 100755 --- a/scripts/reboot +++ b/scripts/reboot @@ -249,6 +249,13 @@ if [[ $smart_switch_result -ne 0 ]]; then exit $smart_switch_result fi +# On a smartswitch, complete the DPU reboot and exit +is_smartswitch=$(is_smartswitch) +if [ "$is_smartswitch" == "True" ] && [ "$REBOOT_DPU" == "yes" ]; then + exit $smart_switch_result +fi +fi + check_conflict_boot_in_fw_update setup_reboot_variables From 119d83b550aa6d674b92dcd28c2317e36e6c2a94 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Wed, 4 Dec 2024 03:44:24 +0000 Subject: [PATCH 25/26] Fix long lines - Use SysLogger instead of Logger - Fix the return type of get_num_dpus() and get_dpu_list() --- utilities_common/chassis.py | 8 ++++++-- utilities_common/module.py | 17 ++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/utilities_common/chassis.py b/utilities_common/chassis.py index c2582b911f..264e5cf661 100644 --- a/utilities_common/chassis.py +++ b/utilities_common/chassis.py @@ -27,8 +27,12 @@ def is_dpu(): def get_num_dpus(): - return hasattr(device_info, 'get_num_dpus') and device_info.get_num_dpus() + if hasattr(device_info, 'get_num_dpus'): + return device_info.get_num_dpus() + return 0 def get_dpu_list(): - return hasattr(device_info, 'get_dpu_list') and device_info.get_dpu_list() + if hasattr(device_info, 'get_dpu_list'): + return device_info.get_dpu_list() + return [] diff --git a/utilities_common/module.py b/utilities_common/module.py index b9756bf219..36953c3ab5 100644 --- a/utilities_common/module.py +++ b/utilities_common/module.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 # -# module_base.py +# module.py # # Utility helper for modules within SONiC -from sonic_py_common import logger +from sonic_py_common import syslogger from utilities_common.util_base import UtilHelper -SYSLOG_IDENTIFIER = "module_base" +SYSLOG_IDENTIFIER = "module" NOT_AVAILABLE = 'N/A' INVALID_MODULE_INDEX = -1 @@ -15,7 +15,7 @@ util = UtilHelper() # Global logger instance -log = logger.Logger(SYSLOG_IDENTIFIER) +log = syslogger.SysLogger(SYSLOG_IDENTIFIER) class ModuleHelper: @@ -54,7 +54,8 @@ def reboot_module(self, module_name, reboot_type): Returns: bool: True if the reboot command was successfully sent, False otherwise. """ - module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, + default=INVALID_MODULE_INDEX) if module_index < 0: log.log_error("Unable to get module-index for {}". format(module_name)) return False @@ -81,7 +82,8 @@ def pci_detach_module(self, module_name): Returns: bool: True if the detach command was successfully sent, False otherwise. """ - module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, + default=INVALID_MODULE_INDEX) if module_index < 0: log.log_error("Unable to get module-index for {}". format(module_name)) return False @@ -108,7 +110,8 @@ def pci_reattach_module(self, module_name): Returns: bool: True if the rescan command was successfully sent, False otherwise. """ - module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, default=INVALID_MODULE_INDEX) + module_index = self.try_get_args(self.platform_chassis.get_module_index, module_name, + default=INVALID_MODULE_INDEX) if module_index < 0: log.log_error("Unable to get module-index for {}". format(module_name)) return False From 75f4c26163076f707b8a660e35a23ff8bd03df8d Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 6 Dec 2024 08:18:22 +0000 Subject: [PATCH 26/26] Update unit tests for update function code --- tests/test_chassis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_chassis.py b/tests/test_chassis.py index e843e70066..57fa418018 100644 --- a/tests/test_chassis.py +++ b/tests/test_chassis.py @@ -28,11 +28,11 @@ def test_get_num_dpus(self, mock_device_info): assert chassis.get_num_dpus() == 4 del mock_device_info.get_num_dpus - assert chassis.get_num_dpus() == False # noqa: E712 + assert chassis.get_num_dpus() == 0 # noqa: E712 def test_get_dpu_list(self, mock_device_info): mock_device_info.get_dpu_list = mock.Mock(return_value=['dpu1', 'dpu2']) assert chassis.get_dpu_list() == ['dpu1', 'dpu2'] del mock_device_info.get_dpu_list - assert chassis.get_dpu_list() == False # noqa: E712 + assert chassis.get_dpu_list() == [] # noqa: E712