From b90bae38c18efe18d9605b2003c6366548637418 Mon Sep 17 00:00:00 2001 From: "Nana@Nvidia" <78413612+nhe-NV@users.noreply.github.com> Date: Thu, 16 May 2024 13:45:09 +0800 Subject: [PATCH] Update the pfcwd test cases according to the new implement of the pfcwd. (#12733) Update the pfcwd test cases according to the new implement of the pfcwd After the feature is completed, it criteria to trigger the pfcwd is much more strict than previous on Nvidia platform, So adjust the test case to properly trigger the pfcwd. The solution is to send background traffic during the pfc frame is sent from the fanout. --- .../ptftests/py3/pfc_wd_background_traffic.py | 77 +++++++++++++++++++ tests/pfcwd/files/pfcwd_helper.py | 65 +++++++++++++++- tests/pfcwd/test_pfcwd_all_port_storm.py | 28 +++++-- tests/pfcwd/test_pfcwd_function.py | 30 +++++--- tests/pfcwd/test_pfcwd_timer_accuracy.py | 29 +++++-- tests/pfcwd/test_pfcwd_warm_reboot.py | 29 ++++--- tests/ptf_runner.py | 14 ++-- 7 files changed, 226 insertions(+), 46 deletions(-) create mode 100644 ansible/roles/test/files/ptftests/py3/pfc_wd_background_traffic.py diff --git a/ansible/roles/test/files/ptftests/py3/pfc_wd_background_traffic.py b/ansible/roles/test/files/ptftests/py3/pfc_wd_background_traffic.py new file mode 100644 index 00000000000..b8487f5cc6b --- /dev/null +++ b/ansible/roles/test/files/ptftests/py3/pfc_wd_background_traffic.py @@ -0,0 +1,77 @@ +import ptf +import logging +from ptf.base_tests import BaseTest +import time +from ptf.testutils import test_params_get, simple_ip_packet, send_packet + + +class PfcWdBackgroundTrafficTest(BaseTest): + def __init__(self): + BaseTest.__init__(self) + self.test_params = test_params_get() + + def setUp(self): + self.dataplane = ptf.dataplane_instance + self.router_mac = self.test_params['router_mac'] + self.pkt_count = int(self.test_params['pkt_count']) + self.src_ports = self.test_params['src_ports'] + self.dst_ports = self.test_params['dst_ports'] + self.src_ips = self.test_params['src_ips'] + self.dst_ips = self.test_params['dst_ips'] + self.queues = self.test_params['queues'] if 'queues' in self.test_params else [3, 4] + self.bidirection = self.test_params['bidirection'] if 'bidirection' in self.test_params else True + + def runTest(self): + ttl = 64 + pkts_dict = {} + if len(self.dst_ports) > len(self.src_ports): + self.src_ports.append(self.src_ports[0]) + self.src_ips.append(self.src_ips[0]) + for i in range(len(self.src_ports)): + src_port = int(self.src_ports[i]) + dst_port = int(self.dst_ports[i]) + if src_port not in pkts_dict: + pkts_dict[src_port] = [] + if dst_port not in pkts_dict: + pkts_dict[dst_port] = [] + src_mac = self.dataplane.get_mac(0, src_port) + dst_mac = self.dataplane.get_mac(0, dst_port) + for queue in self.queues: + print(f"traffic from {src_port} to {dst_port}: {queue} ") + logging.info(f"traffic from {src_port} to {dst_port}: {queue} ") + pkt = simple_ip_packet( + eth_src=src_mac, + eth_dst=self.router_mac, + ip_src=self.src_ips[i], + ip_dst=self.dst_ips[i], + ip_dscp=queue, + ip_ecn=0, + ip_ttl=ttl + ) + pkts_dict[src_port].append(pkt) + if self.bidirection: + print(f"traffic from {dst_port} to {src_port}: {queue} ") + logging.info(f"traffic from {dst_port} to {src_port}: {queue} ") + pkt = simple_ip_packet( + eth_src=dst_mac, + eth_dst=self.router_mac, + ip_src=self.dst_ips[i], + ip_dst=self.src_ips[i], + ip_dscp=queue, + ip_ecn=0, + ip_ttl=ttl + ) + pkts_dict[dst_port].append(pkt) + + start = time.time() + logging.info("Start to send the background traffic") + print("Start to send the background traffic") + timeout = 500 + while True: + for port, pkts in pkts_dict.items(): + for pkt in pkts: + send_packet(self, port, pkt, self.pkt_count) + + now = time.time() + if now - start > timeout: + break diff --git a/tests/pfcwd/files/pfcwd_helper.py b/tests/pfcwd/files/pfcwd_helper.py index c7eb0189c07..4de4a99ce4c 100644 --- a/tests/pfcwd/files/pfcwd_helper.py +++ b/tests/pfcwd/files/pfcwd_helper.py @@ -1,9 +1,13 @@ +import datetime import ipaddress import sys import random import pytest +import contextlib +from tests.ptf_runner import ptf_runner from tests.common import constants +from tests.common.mellanox_data import is_mellanox_device # If the version of the Python interpreter is greater or equal to 3, set the unicode variable to the str class. if sys.version_info[0] >= 3: @@ -14,8 +18,9 @@ "mellanox": r"additional info: occupancy:[0-9]+\|packets:[0-9]+\|packets_last:[0-9]+\|pfc_rx_packets:[0-9]+\|" r"pfc_rx_packets_last:[0-9]+\|pfc_duration:[0-9]+\|pfc_duration_last:[0-9]+\|timestamp:[0-9]+\.[0-9]+\|" - r"timestamp_last:[0-9]+\.[0-9]+\|real_poll_time:[0-9]+" + r"timestamp_last:[0-9]+\.[0-9]+\|(effective|real)_poll_time:[0-9]+" } + EXPECT_PFC_WD_RESTORE_RE = ".*storm restored.*" @@ -474,3 +479,61 @@ def start_background_traffic( except BaseException: pass ptfhost.command(f'supervisorctl remove {program_name}') + + +@contextlib.contextmanager +def send_background_traffic(duthost, ptfhost, storm_hndle, selected_test_ports, test_ports_info): + """Send background traffic, stop the background traffic when the context finish """ + if is_mellanox_device(duthost): + background_traffic_params = _prepare_background_traffic_params(duthost, storm_hndle, + selected_test_ports, + test_ports_info) + background_traffic_log = _send_background_traffic(ptfhost, background_traffic_params) + yield + if is_mellanox_device(duthost): + _stop_background_traffic(ptfhost, background_traffic_log) + + +def _prepare_background_traffic_params(duthost, queues, selected_test_ports, test_ports_info): + src_ports = [] + dst_ports = [] + src_ips = [] + dst_ips = [] + for selected_test_port in selected_test_ports: + selected_test_port_info = test_ports_info[selected_test_port] + if type(selected_test_port_info["rx_port_id"]) == list: + src_ports.append(selected_test_port_info["rx_port_id"][0]) + else: + src_ports.append(selected_test_port_info["rx_port_id"]) + dst_ports.append(selected_test_port_info["test_port_id"]) + dst_ips.append(selected_test_port_info["test_neighbor_addr"]) + src_ips.append(selected_test_port_info["rx_neighbor_addr"]) + + router_mac = duthost.get_dut_iface_mac(selected_test_ports[0]) + pkt_count = 1000 + + ptf_params = {'router_mac': router_mac, + 'src_ports': src_ports, + 'dst_ports': dst_ports, + 'src_ips': src_ips, + 'dst_ips': dst_ips, + 'queues': queues, + 'bidirection': False, + 'pkt_count': pkt_count} + + return ptf_params + + +def _send_background_traffic(ptfhost, ptf_params): + timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S') + log_file = "/tmp/pfc_wd_background_traffic.PfcWdBackgroundTrafficTest.{}.log".format(timestamp) + ptf_runner(ptfhost, "ptftests", "pfc_wd_background_traffic.PfcWdBackgroundTrafficTest", "/root/ptftests", + params=ptf_params, log_file=log_file, is_python3=True, async_mode=True) + + return log_file + + +def _stop_background_traffic(ptfhost, background_traffic_log): + pids = ptfhost.shell(f"pgrep -f {background_traffic_log}")["stdout_lines"] + for pid in pids: + ptfhost.shell(f"kill -9 {pid}", module_ignore_errors=True) diff --git a/tests/pfcwd/test_pfcwd_all_port_storm.py b/tests/pfcwd/test_pfcwd_all_port_storm.py index 0799433d645..5c652c7cfc3 100644 --- a/tests/pfcwd/test_pfcwd_all_port_storm.py +++ b/tests/pfcwd/test_pfcwd_all_port_storm.py @@ -8,6 +8,7 @@ from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer from .files.pfcwd_helper import start_wd_on_ports, start_background_traffic # noqa F401 from .files.pfcwd_helper import EXPECT_PFC_WD_DETECT_RE, EXPECT_PFC_WD_RESTORE_RE, fetch_vendor_specific_diagnosis_re +from .files.pfcwd_helper import send_background_traffic TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates") @@ -151,7 +152,7 @@ def run_test(self, duthost, storm_hndle, expect_regex, syslog_marker, action): time.sleep(5) def test_all_port_storm_restore(self, duthosts, enum_rand_one_per_hwsku_frontend_hostname, - storm_test_setup_restore): + storm_test_setup_restore, setup_pfc_test, ptfhost): """ Tests PFC storm/restore on all ports @@ -162,12 +163,27 @@ def test_all_port_storm_restore(self, duthosts, enum_rand_one_per_hwsku_frontend duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] storm_hndle = storm_test_setup_restore logger.info("--- Testing if PFC storm is detected on all ports ---") - self.run_test(duthost, - storm_hndle, - expect_regex=[EXPECT_PFC_WD_DETECT_RE + fetch_vendor_specific_diagnosis_re(duthost)], - syslog_marker="all_port_storm", - action="storm") + # get all the tested ports + queues = [] + for peer in storm_hndle.peer_params.keys(): + fanout_intfs = storm_hndle.peer_params[peer]['intfs'].split(',') + device_conn = storm_hndle.fanout_graph[peer]['device_conn'] + queues.append(storm_hndle.storm_handle[peer].pfc_queue_idx) + queues = list(set(queues)) + selected_test_ports = [] + + for intf in fanout_intfs: + test_port = device_conn[intf]['peerport'] + if test_port in setup_pfc_test['test_ports']: + selected_test_ports.append(test_port) + + with send_background_traffic(duthost, ptfhost, queues, selected_test_ports, setup_pfc_test['test_ports']): + self.run_test(duthost, + storm_hndle, + expect_regex=[EXPECT_PFC_WD_DETECT_RE + fetch_vendor_specific_diagnosis_re(duthost)], + syslog_marker="all_port_storm", + action="storm") logger.info("--- Testing if PFC storm is restored on all ports ---") self.run_test(duthost, storm_hndle, expect_regex=[EXPECT_PFC_WD_RESTORE_RE], syslog_marker="all_port_storm_restore", action="restore") diff --git a/tests/pfcwd/test_pfcwd_function.py b/tests/pfcwd/test_pfcwd_function.py index 8b39852ef85..987db273745 100644 --- a/tests/pfcwd/test_pfcwd_function.py +++ b/tests/pfcwd/test_pfcwd_function.py @@ -17,6 +17,7 @@ from tests.common import constants from tests.common.dualtor.dual_tor_utils import is_tunnel_qos_remap_enabled, dualtor_ports # noqa F401 from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_enum_rand_one_per_hwsku_frontend_host_m # noqa F401, E501 +from .files.pfcwd_helper import send_background_traffic PTF_PORT_MAPPING_MODE = 'use_orig_interface' @@ -707,22 +708,27 @@ def storm_detect_path(self, dut, port, action): loganalyzer.expect_regex.extend([EXPECT_PFC_WD_DETECT_RE + fetch_vendor_specific_diagnosis_re(dut)]) loganalyzer.match_regex = [] - if action != "dontcare": - start_wd_on_ports(dut, port, restore_time, detect_time, action) + selected_test_ports = [self.pfc_wd['rx_port'][0]] + test_ports_info = {self.pfc_wd['rx_port'][0]: self.pfc_wd} + queues = [self.storm_hndle.pfc_queue_idx] - if not self.pfc_wd['fake_storm']: - self.storm_hndle.start_storm() + with send_background_traffic(dut, self.ptf, queues, selected_test_ports, test_ports_info): + if action != "dontcare": + start_wd_on_ports(dut, port, restore_time, detect_time, action) - if action == "dontcare": - self.traffic_inst.fill_buffer() - start_wd_on_ports(dut, port, restore_time, detect_time, "drop") + if not self.pfc_wd['fake_storm']: + self.storm_hndle.start_storm() - # placing this here to cover all action types. for 'dontcare' action, - # wd is started much later after the pfc storm is started - if self.pfc_wd['fake_storm']: - PfcCmd.set_storm_status(dut, self.queue_oid, "enabled") + if action == "dontcare": + self.traffic_inst.fill_buffer() + start_wd_on_ports(dut, port, restore_time, detect_time, "drop") + + # placing this here to cover all action types. for 'dontcare' action, + # wd is started much later after the pfc storm is started + if self.pfc_wd['fake_storm']: + PfcCmd.set_storm_status(dut, self.queue_oid, "enabled") - time.sleep(5) + time.sleep(5) # storm detect logger.info("Verify if PFC storm is detected on port {}".format(port)) diff --git a/tests/pfcwd/test_pfcwd_timer_accuracy.py b/tests/pfcwd/test_pfcwd_timer_accuracy.py index 9144fcc10bc..9a64ab9f2fa 100644 --- a/tests/pfcwd/test_pfcwd_timer_accuracy.py +++ b/tests/pfcwd/test_pfcwd_timer_accuracy.py @@ -8,6 +8,7 @@ from .files.pfcwd_helper import start_wd_on_ports, start_background_traffic # noqa F401 from tests.common.plugins.loganalyzer import DisableLogrotateCronContext +from .files.pfcwd_helper import send_background_traffic pytestmark = [ @@ -102,7 +103,9 @@ def pfcwd_timer_setup_restore(setup_pfc_test, enum_fanout_graph_facts, duthosts, logger.info("--- Pfcwd Timer Testrun ---") yield {'timers': timers, - 'storm_handle': storm_handle + 'storm_handle': storm_handle, + 'test_ports': test_ports, + 'selected_test_port': pfc_wd_test_port } logger.info("--- Pfcwd timer test cleanup ---") @@ -165,16 +168,25 @@ def set_storm_params(dut, fanout_info, fanout, peer_params): @pytest.mark.usefixtures('pfcwd_timer_setup_restore', 'start_background_traffic') class TestPfcwdAllTimer(object): """ PFCwd timer test class """ - def run_test(self): + def run_test(self, setup_info): """ Test execution """ with DisableLogrotateCronContext(self.dut): logger.info("Flush logs") self.dut.shell("logrotate -f /etc/logrotate.conf") - self.storm_handle.start_storm() - logger.info("Wait for queue to recover from PFC storm") - time.sleep(16) + + selected_test_ports = [setup_info['selected_test_port']] + test_ports_info = setup_info['test_ports'] + queues = [self.storm_handle.pfc_queue_idx] + + with send_background_traffic(self.dut, self.ptf, queues, selected_test_ports, test_ports_info): + self.storm_handle.start_storm() + logger.info("Wait for queue to recover from PFC storm") + time.sleep(8) + self.storm_handle.stop_storm() + time.sleep(16) + if self.dut.topo_type == 't2' and self.storm_handle.peer_device.os == 'sonic': storm_detect_ms = self.retrieve_timestamp("[d]etected PFC storm") else: @@ -266,7 +278,7 @@ def retrieve_timestamp(self, pattern): timestamp_ms = self.dut.shell("date -d {} +%s%3N".format(timestamp))['stdout'] return int(timestamp_ms) - def test_pfcwd_timer_accuracy(self, duthosts, enum_rand_one_per_hwsku_frontend_hostname, + def test_pfcwd_timer_accuracy(self, duthosts, ptfhost, enum_rand_one_per_hwsku_frontend_hostname, pfcwd_timer_setup_restore): """ Tests PFCwd timer accuracy @@ -280,6 +292,7 @@ def test_pfcwd_timer_accuracy(self, duthosts, enum_rand_one_per_hwsku_frontend_h self.storm_handle = setup_info['storm_handle'] self.timers = setup_info['timers'] self.dut = duthost + self.ptf = ptfhost self.all_detect_time = list() self.all_restore_time = list() self.all_dut_detect_restore_time = list() @@ -287,7 +300,7 @@ def test_pfcwd_timer_accuracy(self, duthosts, enum_rand_one_per_hwsku_frontend_h if self.dut.topo_type == 't2' and self.storm_handle.peer_device.os == 'sonic': for i in range(1, 11): logger.info("--- Pfcwd Timer Test iteration #{}".format(i)) - self.run_test() + self.run_test(setup_info) self.verify_pfcwd_timers_t2() else: for i in range(1, 20): @@ -301,7 +314,7 @@ def test_pfcwd_timer_accuracy(self, duthosts, enum_rand_one_per_hwsku_frontend_h pfcwd_cmd_response = self.dut.shell(cmd, module_ignore_errors=True) logger.debug("loop {} cmd {} rsp {}".format(i, cmd, pfcwd_cmd_response.get('stdout', None))) - self.run_test() + self.run_test(setup_info) self.verify_pfcwd_timers() except Exception as e: diff --git a/tests/pfcwd/test_pfcwd_warm_reboot.py b/tests/pfcwd/test_pfcwd_warm_reboot.py index 852f6e9d345..144e1e03e95 100644 --- a/tests/pfcwd/test_pfcwd_warm_reboot.py +++ b/tests/pfcwd/test_pfcwd_warm_reboot.py @@ -17,6 +17,7 @@ from tests.common.utilities import join_all from tests.ptf_runner import ptf_runner from .files.pfcwd_helper import EXPECT_PFC_WD_DETECT_RE, EXPECT_PFC_WD_RESTORE_RE +from .files.pfcwd_helper import send_background_traffic TEMPLATES_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates") TESTCASE_INFO = {'no_storm': {'test_sequence': ["detect", "restore", "warm-reboot", "detect", "restore"], @@ -336,19 +337,23 @@ def storm_detect_path(self, port, queue, first_detect_after_wb=False): self.loganalyzer.expect_regex.extend([EXPECT_PFC_WD_DETECT_RE]) self.loganalyzer.match_regex = [] - # ongoing storm. no need to start a new one - if not first_detect_after_wb: - if not self.pfc_wd['fake_storm']: - self.storm_handle[port][queue].start_storm() - time.sleep(15 * len(self.pfc_wd['queue_indices'])) + selected_test_ports = [self.pfc_wd['rx_port'][0]] + test_ports_info = {self.pfc_wd['rx_port'][0]: self.pfc_wd} + + with send_background_traffic(self.dut, self.ptf, [queue], selected_test_ports, test_ports_info): + # ongoing storm. no need to start a new one + if not first_detect_after_wb: + if not self.pfc_wd['fake_storm']: + self.storm_handle[port][queue].start_storm() + time.sleep(15 * len(self.pfc_wd['queue_indices'])) + else: + logger.info("Enable DEBUG fake storm on port {} queue {}".format(port, queue)) + PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "enabled") + time.sleep(5) else: - logger.info("Enable DEBUG fake storm on port {} queue {}".format(port, queue)) - PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "enabled") - time.sleep(5) - else: - # for the first iteration after wb, check the log for detect msgs for the ongoing storms - self.loganalyzer.expected_matches_target = len(self.ports) * len(self.pfc_wd['queue_indices']) - time.sleep(20) + # for the first iteration after wb, check the log for detect msgs for the ongoing storms + self.loganalyzer.expected_matches_target = len(self.ports) * len(self.pfc_wd['queue_indices']) + time.sleep(20) # storm detect check logger.info("Verify if PFC storm is detected on port {} queue {}".format(port, queue)) diff --git a/tests/ptf_runner.py b/tests/ptf_runner.py index 9633552ba68..24c17fc80a4 100644 --- a/tests/ptf_runner.py +++ b/tests/ptf_runner.py @@ -49,7 +49,7 @@ def get_dut_type(host): def ptf_runner(host, testdir, testname, platform_dir=None, params={}, platform="remote", qlen=0, relax=True, debug_level="info", socket_recv_size=None, log_file=None, device_sockets=[], timeout=0, custom_options="", - module_ignore_errors=False, is_python3=False): + module_ignore_errors=False, is_python3=False, async_mode=False): # Call virtual env ptf for migrated py3 scripts. # ptf will load all scripts under ptftests, it will throw error for py2 scripts. # So move migrated scripts to seperated py3 folder avoid impacting py2 scripts. @@ -111,12 +111,12 @@ def ptf_runner(host, testdir, testname, platform_dir=None, params={}, host.create_macsec_info() try: - result = host.shell(cmd, chdir="/root", module_ignore_errors=module_ignore_errors) - if log_file: - # when ptf cmd execution result is 0 (success), we need to skip collecting pcap file - ptf_collect(host, log_file, result is not None and result.get("rc", -1) == 0) - if result: - allure.attach(json.dumps(result, indent=4), 'ptf_console_result', allure.attachment_type.TEXT) + result = host.shell(cmd, chdir="/root", module_ignore_errors=module_ignore_errors, module_async=async_mode) + if not async_mode: + if log_file: + ptf_collect(host, log_file) + if result: + allure.attach(json.dumps(result, indent=4), 'ptf_console_result', allure.attachment_type.TEXT) if module_ignore_errors: if result["rc"] != 0: return result