From f994b052db6bae9b484797aa403fde8b230775c2 Mon Sep 17 00:00:00 2001 From: rraghav-cisco <58446052+rraghav-cisco@users.noreply.github.com> Date: Wed, 20 Nov 2024 02:48:50 -0800 Subject: [PATCH] Fixing service-restart testcases. (#15560) Description of PR Summary: The pfcwd_basic service-restart cases keep failing due to: sonic-net/sonic-buildimage#20637 The ask is not to restart swss multiple times without doing a config reload in between. So in this PR: we are doing config-reload for every iteration of the test The swss restart is done only once in one DUT. The asic is randomly picked, and the swss of that ASIC is restarted instead of doing the restart for all asics. Also added checks to make sure the services, interfaces and bgp are up before proceding with the ixia traffic. Approach What is the motivation for this PR? The issue: sonic-net/sonic-buildimage#20637 How did you do it? Pls see the description. How did you verify/test it? Ran it on my TB. =========================================================================================================================== PASSES =========================================================================================================================== ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-True-swss] _____________________________________________________________________________________ ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-False-swss] ____________________________________________________________________________________ ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-True-swss] _____________________________________________________________________________________ ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-False-swss] ____________________________________________________________________________________ ----------------------------------------------------------------------------- generated xml file: /run_logs/ixia/restart-service/2024-11-14-00-05-11/tr_2024-11-14-00-05-11.xml ------------------------------------------------------------------------------ INFO:root:Can not get Allure report URL. Please check logs ------------------------------------------------------------------------------------------------------------------- live log sessionfinish ------------------------------------------------------------------------------------------------------------------- 01:31:34 __init__.pytest_terminal_summary L0067 INFO | Can not get Allure report URL. Please check logs ================================================================================================================== short test summary info =================================================================================================================== PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-True-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-False-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-True-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-False-swss] ========================================================================================================= 4 passed, 7 warnings in 5180.68s (1:26:20) ========================================================================================================= sonic@ixia-sonic-mgmt-whitebox:/data/tests$ -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================================================================================================================== PASSES =========================================================================================================================== ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-True-swss] _____________________________________________________________________________________ ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-False-swss] ____________________________________________________________________________________ ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-True-swss] _____________________________________________________________________________________ ____________________________________________________________________________________ test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-False-swss] ____________________________________________________________________________________ ---------------------------------------------------------------------------- generated xml file: /run_logs/ixia/restart-service-2/2024-11-14-02-47-47/tr_2024-11-14-02-47-47.xml ----------------------------------------------------------------------------- INFO:root:Can not get Allure report URL. Please check logs ------------------------------------------------------------------------------------------------------------------- live log sessionfinish ------------------------------------------------------------------------------------------------------------------- 04:14:03 __init__.pytest_terminal_summary L0067 INFO | Can not get Allure report URL. Please check logs ================================================================================================================== short test summary info =================================================================================================================== PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-True-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info0-False-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-True-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_multi_lossless_prio_restart_service[multidut_port_info1-False-swss] ========================================================================================================= 4 passed, 7 warnings in 5173.22s (1:26:13) ========================================================================================================= sonic@ixia-sonic-mgmt-whitebox:/data/tests$ =========================================================================================================================== PASSES =========================================================================================================================== ____________________________________________________________________________________ test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info0-True-swss] ____________________________________________________________________________________ ___________________________________________________________________________________ test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info0-False-swss] ____________________________________________________________________________________ ____________________________________________________________________________________ test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info1-True-swss] ____________________________________________________________________________________ ___________________________________________________________________________________ test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info1-False-swss] ____________________________________________________________________________________ ---------------------------------------------------------------------------- generated xml file: /run_logs/ixia/restart-service-2/2024-11-14-06-39-15/tr_2024-11-14-06-39-15.xml ----------------------------------------------------------------------------- INFO:root:Can not get Allure report URL. Please check logs ------------------------------------------------------------------------------------------------------------------- live log sessionfinish ------------------------------------------------------------------------------------------------------------------- 08:10:42 __init__.pytest_terminal_summary L0067 INFO | Can not get Allure report URL. Please check logs ================================================================================================================== short test summary info =================================================================================================================== PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info0-True-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info0-False-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info1-True-swss] PASSED snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py::test_pfcwd_basic_single_lossless_prio_service_restart[multidut_port_info1-False-swss] ========================================================================================================= 4 passed, 7 warnings in 5484.86s (1:31:24) ========================================================================================================= sonic@ixia-sonic-mgmt-whitebox:/data/tests$ co-authorized by: jianquanye@microsoft.com --- .../test_multidut_pfcwd_basic_with_snappi.py | 63 +++++++++++++++---- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/tests/snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py b/tests/snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py index daf00e18751..9c09f674b45 100644 --- a/tests/snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py +++ b/tests/snappi_tests/multidut/pfcwd/test_multidut_pfcwd_basic_with_snappi.py @@ -1,6 +1,7 @@ import pytest import random import logging +import time import re from collections import defaultdict from tests.common.helpers.assertions import pytest_require, pytest_assert # noqa: F401 @@ -13,6 +14,8 @@ from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, lossless_prio_list # noqa F401 from tests.common.reboot import reboot # noqa: F401 from tests.common.utilities import wait_until # noqa: F401 +from tests.common.config_reload import config_reload +from tests.common.platform.interface_utils import check_interface_status_of_up_ports from tests.snappi_tests.multidut.pfcwd.files.pfcwd_multidut_basic_helper import run_pfcwd_basic_test from tests.common.snappi_tests.snappi_test_params import SnappiTestParams from tests.snappi_tests.files.helper import skip_pfcwd_test, reboot_duts, \ @@ -29,6 +32,26 @@ def number_of_tx_rx_ports(): yield (1, 1) +@pytest.fixture(autouse=False) +def save_restore_config(setup_ports_and_dut): + testbed_config, port_config_list, snappi_ports = setup_ports_and_dut + timestamp = time.time() + dest = f'~/{timestamp}' + + for duthost in list(set([snappi_ports[0]['duthost'], snappi_ports[1]['duthost']])): + duthost.shell(f"sudo mkdir {dest}; sudo cp /etc/sonic/config*.json {dest}") + duthost.shell("sudo config save -y") + + yield + + for duthost in list(set([snappi_ports[0]['duthost'], snappi_ports[1]['duthost']])): + duthost.shell(f"sudo cp {dest}/config_db*json /etc/sonic/") + duthost.shell("sudo config save -y") + + for duthost in list(set([snappi_ports[0]['duthost'], snappi_ports[1]['duthost']])): + config_reload(duthost) + + @pytest.mark.parametrize("trigger_pfcwd", [True, False]) def test_pfcwd_basic_single_lossless_prio(snappi_api, # noqa: F811 conn_graph_facts, # noqa: F811 @@ -221,7 +244,8 @@ def test_pfcwd_basic_single_lossless_prio_service_restart(snappi_api, prio_dscp_map, # noqa: F811 restart_service, trigger_pfcwd, - setup_ports_and_dut): # noqa: F811 + setup_ports_and_dut, # noqa: F811 + save_restore_config): """ Verify PFC watchdog basic test works on a single lossless priority after various service restarts @@ -251,6 +275,7 @@ def test_pfcwd_basic_single_lossless_prio_service_restart(snappi_api, logger.info('Port dictionary:{}'.format(ports_dict)) for duthost in list(set([snappi_ports[0]['duthost'], snappi_ports[1]['duthost']])): + up_bgp_neighbors = duthost.get_bgp_neighbors_per_asic("established") # Record current state of critical services. duthost.critical_services_fully_started() @@ -264,6 +289,11 @@ def test_pfcwd_basic_single_lossless_prio_service_restart(snappi_api, logger.info("Wait until the system is stable") pytest_assert(wait_until(WAIT_TIME, INTERVAL, 0, duthost.critical_services_fully_started), "Not all critical services are fully started") + pytest_assert(wait_until(WAIT_TIME, INTERVAL, 0, check_interface_status_of_up_ports, duthost), + "Not all interfaces are up.") + pytest_assert(wait_until( + WAIT_TIME, INTERVAL, 0, duthost.check_bgp_session_state_all_asics, up_bgp_neighbors, "established")) + else: for duthost in list(set([snappi_ports[0]['duthost'], snappi_ports[1]['duthost']])): logger.info("Issuing a restart of service {} on the dut {}".format(restart_service, duthost.hostname)) @@ -300,7 +330,8 @@ def test_pfcwd_basic_multi_lossless_prio_restart_service(snappi_api, prio_dscp_map, # noqa F811 restart_service, setup_ports_and_dut, # noqa: F811 - trigger_pfcwd): + trigger_pfcwd, + save_restore_config): """ Verify PFC watchdog basic test works on multiple lossless priorities after various service restarts @@ -330,16 +361,26 @@ def test_pfcwd_basic_multi_lossless_prio_restart_service(snappi_api, logger.info('Port dictionary:{}'.format(ports_dict)) for duthost in list(set([snappi_ports[0]['duthost'], snappi_ports[1]['duthost']])): + up_bgp_neighbors = duthost.get_bgp_neighbors_per_asic("established") + # Record current state of critical services. + duthost.critical_services_fully_started() + asic_list = ports_dict[duthost.hostname] - for asic in asic_list: - asic_id = re.match(r"(asic)(\d+)", asic).group(2) - proc = 'swss@' + asic_id - logger.info("Issuing a restart of service {} on the dut {}".format(proc, duthost.hostname)) - duthost.command("sudo systemctl reset-failed {}".format(proc)) - duthost.command("sudo systemctl restart {}".format(proc)) - logger.info("Wait until the system is stable") - pytest_assert(wait_until(WAIT_TIME, INTERVAL, 0, duthost.critical_services_fully_started), - "Not all critical services are fully started") + asic = random.sample(asic_list, 1)[0] + asic_id = re.match(r"(asic)(\d+)", asic).group(2) + proc = 'swss@' + asic_id + + logger.info("Issuing a restart of service {} on the dut {}".format(proc, duthost.hostname)) + duthost.command("sudo systemctl reset-failed {}".format(proc)) + duthost.command("sudo systemctl restart {}".format(proc)) + logger.info("Wait until the system is stable") + pytest_assert(wait_until(WAIT_TIME, INTERVAL, 0, duthost.critical_services_fully_started), + "Not all critical services are fully started") + pytest_assert(wait_until(WAIT_TIME, INTERVAL, 0, check_interface_status_of_up_ports, duthost), + "Not all interfaces are up.") + pytest_assert(wait_until( + WAIT_TIME, INTERVAL, 0, duthost.check_bgp_session_state_all_asics, up_bgp_neighbors, "established")) + else: for duthost in list(set([snappi_ports[0]['duthost'], snappi_ports[1]['duthost']])): logger.info("Issuing a restart of service {} on the dut {}".format(restart_service, duthost.hostname))