diff --git a/ansible/roles/test/files/ptftests/device_connection.py b/ansible/roles/test/files/ptftests/device_connection.py index a331210d422..19ad85ebb59 100644 --- a/ansible/roles/test/files/ptftests/device_connection.py +++ b/ansible/roles/test/files/ptftests/device_connection.py @@ -93,3 +93,30 @@ def execCommand(self, cmd, timeout=DEFAULT_CMD_EXECUTION_TIMEOUT_SEC): client.close() return stdOut, stdErr, retValue + + @retry( + stop_max_attempt_number=2, + retry_on_exception=lambda e: isinstance(e, AuthenticationException) + ) + def fetch(self, remote_path, local_path): + """ + Fetch the file from the remote device + @param remote_path: the full path of the file to fetch + @param local_path: the full path of the file to be saved locally + """ + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + try: + client.connect(self.hostname, username=self.username, password=self.password, allow_agent=False) + ftp_client = client.open_sftp() + ftp_client.get(remote_path, local_path) + ftp_client.close() + except AuthenticationException as authenticationException: + logger.error('SSH Authentication failure with message: %s' % + authenticationException) + if self.alt_password is not None: + # attempt retry with alt_password + self.password = self.alt_password + raise AuthenticationException + finally: + client.close() diff --git a/ansible/roles/test/files/ptftests/py3/advanced-reboot.py b/ansible/roles/test/files/ptftests/py3/advanced-reboot.py index 3faa038dc89..f2301794ba6 100644 --- a/ansible/roles/test/files/ptftests/py3/advanced-reboot.py +++ b/ansible/roles/test/files/ptftests/py3/advanced-reboot.py @@ -145,6 +145,10 @@ def __init__(self): self.check_param('dut_username', '', required=True) self.check_param('dut_password', '', required=True) self.check_param('dut_hostname', '', required=True) + self.check_param('vmhost_username', '', required=False) + self.check_param('vmhost_password', '', required=False) + self.check_param('vmhost_mgmt_ip', '', required=False) + self.check_param('vmhost_external_port', '', required=False) self.check_param('reboot_limit_in_seconds', 30, required=False) self.check_param('reboot_type', 'fast-reboot', required=False) self.check_param('graceful_limit', 240, required=False) @@ -275,8 +279,17 @@ def __init__(self): alt_password=self.test_params.get('alt_password') ) + self.vmhost_external_port = self.test_params['vmhost_external_port'] + if self.vmhost_external_port: + self.vmhost_connection = DeviceConnection( + self.test_params['vmhost_mgmt_ip'], + self.test_params['vmhost_username'], + password=self.test_params['vmhost_password'] + ) + self.sender_thr = threading.Thread(target=self.send_in_background) self.sniff_thr = threading.Thread(target=self.sniff_in_background) + self.start_sender_delay = 30 # Check if platform type is kvm stdout, stderr, return_code = self.dut_connection.execCommand( @@ -661,6 +674,15 @@ def setUp(self): if self.kvm_test: self.log("This test is for KVM platform") + self.capture_pcap = ("/tmp/capture_%s.pcap" % self.logfile_suffix + if self.logfile_suffix is not None else "/tmp/capture.pcap") + if self.vmhost_external_port: + self.log("Test will collect tcpdump on the vmhost external port") + remote_capture_pcap = self.capture_pcap + f"_{self.test_params['dut_hostname']}" + self.remote_capture_pcap = remote_capture_pcap + self.vmhost_connection.execCommand(f"sudo rm -rf {self.remote_capture_pcap}") + self.log(f"The pcap file on vmhost will be located in {remote_capture_pcap}") + # get VM info if isinstance(self.test_params['arista_vms'], list): arista_vms = self.test_params['arista_vms'] @@ -754,6 +776,9 @@ def tearDown(self): self.log("Disabling arp_responder") self.cmd(["supervisorctl", "stop", "arp_responder"]) + if self.vmhost_external_port: + self.log("Remove the tcpdump pcap on the vm host.") + self.vmhost_connection.execCommand(f"sudo rm -rf {self.remote_capture_pcap}") # Stop watching DUT self.watching = False @@ -1681,7 +1706,7 @@ def send_in_background(self, packets_list=None): """ if not packets_list: packets_list = self.packets_list - self.sniffer_started.wait(timeout=10) + self.sniffer_started.wait(timeout=self.start_sender_delay) with self.dataplane_io_lock: # While running fast data plane sender thread there are two reasons for filter to be applied # 1. filter out data plane traffic which is tcp to free up the load @@ -1749,8 +1774,22 @@ def sniff_in_background(self, wait=None): sniffer = threading.Thread(target=self.tcpdump_sniff, kwargs={ 'wait': wait, 'sniff_filter': sniff_filter}) sniffer.start() - # Let the scapy sniff initialize completely. - time.sleep(2) + # Let the scapy sniff initialize completely. Need to wait more time when capturing on the vmhost. + base_tcpdump_delay = 2 + time.sleep(base_tcpdump_delay) + if self.vmhost_external_port: + elapsed_time = 0 + while elapsed_time < self.start_sender_delay - base_tcpdump_delay: + elapsed_time += 1 + time.sleep(1) + stdout_lines, stderr_lines, _ = self.vmhost_connection.execCommand(f"ls {self.remote_capture_pcap}") + if (self.remote_capture_pcap + '\n') in stdout_lines and len(stderr_lines) == 0: + self.log(f"The pcap file on the vmhost is created: {self.remote_capture_pcap}") + break + else: + self.log(f"Error: the pcap file on the vmhost is not created in {self.start_sender_delay}s.") + raise Exception("Tcpdump on the vmhost failed to start, test is aborted.") + # Unblock waiter for the send_in_background. self.sniffer_started.set() sniffer.join() @@ -1760,25 +1799,50 @@ def sniff_in_background(self, wait=None): def tcpdump_sniff(self, wait=300, sniff_filter=''): """ - @summary: PTF runner - runs a sniffer in PTF container. + @summary: PTF runner - runs a sniffer in vmhost(server) or the PTF container. Args: wait (int): Duration in seconds to sniff the traffic sniff_filter (str): Filter that tcpdump will use to collect only relevant packets """ try: - capture_pcap = ("/tmp/capture_%s.pcap" % self.logfile_suffix - if self.logfile_suffix is not None else "/tmp/capture.pcap") - subprocess.call(["rm", "-rf", capture_pcap]) # remove old capture + subprocess.call(["rm", "-rf", self.capture_pcap]) self.kill_sniffer = False - self.start_sniffer(capture_pcap, sniff_filter, wait) - self.create_single_pcap(capture_pcap) - self.packets = scapyall.rdpcap(capture_pcap) + + if self.vmhost_external_port: + self.start_sniffer_on_vmhost(self.remote_capture_pcap, sniff_filter, wait) + self.vmhost_connection.fetch(self.remote_capture_pcap, self.capture_pcap) + else: + self.start_sniffer_on_ptf(self.capture_pcap, sniff_filter, wait) + self.create_single_pcap(self.capture_pcap) + + self.packets = scapyall.rdpcap(self.capture_pcap) self.log("Number of all packets captured: {}".format(len(self.packets))) except Exception: traceback_msg = traceback.format_exc() self.log("Error in tcpdump_sniff: {}".format(traceback_msg)) - def start_sniffer(self, pcap_path, tcpdump_filter, timeout): + def start_sniffer_on_vmhost(self, pcap_path, tcpdump_filter, timeout): + """ + Start tcpdump sniffer on all data interfaces, and kill them after a specified timeout + """ + interface = self.test_params['vmhost_external_port'] + cmd = f"sudo nohup tcpdump -i {interface} {tcpdump_filter} -w {pcap_path}" + self.vmhost_connection.execCommand(cmd + " > /dev/null 2>&1 &") + self.log(f'Tcpdump sniffer starting on vmhost interface: {interface}') + + time_start = time.time() + while not self.kill_sniffer: + time.sleep(1) + curr_time = time.time() + if curr_time - time_start > timeout: + break + time_start = curr_time + + self.log("Going to kill the tcpdump process by SIGTERM") + self.vmhost_connection.execCommand(f'sudo pkill -f "{cmd}"') + self.log("Killed the tcpdump process") + + def start_sniffer_on_ptf(self, pcap_path, tcpdump_filter, timeout): """ Start tcpdump sniffer on all data interfaces, and kill them after a specified timeout """ diff --git a/tests/common/devices/vmhost.py b/tests/common/devices/vmhost.py index 680d5f52135..3350f052e91 100644 --- a/tests/common/devices/vmhost.py +++ b/tests/common/devices/vmhost.py @@ -17,5 +17,5 @@ def external_port(self): vm = self.host.options["variable_manager"] im = self.host.options["inventory_manager"] hostvars = vm.get_vars(host=im.get_host(self.hostname), include_delegate_to=False) - setattr(self, "_external_port", hostvars["external_port"]) + setattr(self, "_external_port", hostvars.get("external_port", '')) return getattr(self, "_external_port") diff --git a/tests/common/fixtures/advanced_reboot.py b/tests/common/fixtures/advanced_reboot.py index 74b4d4827b7..4ae3b8cfe43 100644 --- a/tests/common/fixtures/advanced_reboot.py +++ b/tests/common/fixtures/advanced_reboot.py @@ -40,7 +40,7 @@ class AdvancedReboot: Test cases can trigger test start utilizing runRebootTestcase API. """ - def __init__(self, request, duthosts, duthost, ptfhost, localhost, tbinfo, creds, **kwargs): + def __init__(self, request, duthosts, duthost, ptfhost, localhost, vmhost, tbinfo, creds, **kwargs): """ Class constructor. @param request: pytest request object @@ -85,6 +85,7 @@ def __init__(self, request, duthosts, duthost, ptfhost, localhost, tbinfo, creds self.duthost = duthost self.ptfhost = ptfhost self.localhost = localhost + self.vmhost = vmhost self.tbinfo = tbinfo self.creds = creds self.moduleIgnoreErrors = kwargs["allow_fail"] if "allow_fail" in kwargs else False @@ -99,6 +100,7 @@ def __init__(self, request, duthosts, duthost, ptfhost, localhost, tbinfo, creds self.lagMemberCnt = 0 self.vlanMaxCnt = 0 self.hostMaxCnt = HOST_MAX_COUNT + self.capture_on_vmhost = True if vmhost.external_port else False if "dualtor" in self.getTestbedType(): self.dual_tor_mode = True peer_duthost = get_peerhost(duthosts, duthost) @@ -184,6 +186,13 @@ def __buildTestbedData(self, tbinfo): attr['mgmt_addr'] for dev, attr in list(self.mgFacts['minigraph_devices'].items()) if attr['hwsku'] == 'Arista-VM' ] + if self.capture_on_vmhost: + self.rebootData['vmhost_mgmt_ip'] = self.vmhost.mgmt_ip + self.rebootData['vmhost_external_port'] = self.vmhost.external_port + self.rebootData['vmhost_username'] = \ + self.duthost.host.options['variable_manager']._hostvars[self.vmhost.hostname]['vm_host_user'] + self.rebootData['vmhost_password'] = \ + self.duthost.host.options['variable_manager']._hostvars[self.vmhost.hostname]['vm_host_password'] self.hostMaxLen = len(self.rebootData['arista_vms']) - 1 self.lagMemberCnt = len(list(self.mgFacts['minigraph_portchannels'].values())[0]['members']) @@ -736,6 +745,14 @@ def __runPtfRunner(self, rebootOper=None): "neighbor_type": self.neighborType, } + if self.capture_on_vmhost: + params.update({ + "vmhost_username": self.rebootData['vmhost_username'], + "vmhost_password": self.rebootData['vmhost_password'], + "vmhost_mgmt_ip": self.rebootData['vmhost_mgmt_ip'], + "vmhost_external_port": self.rebootData['vmhost_external_port'] + }) + if self.dual_tor_mode: params.update({ "peer_ports_file": self.rebootData['peer_ports_file'], @@ -875,8 +892,8 @@ def tearDown(self): @pytest.fixture -def get_advanced_reboot(request, duthosts, enum_rand_one_per_hwsku_frontend_hostname, ptfhost, localhost, tbinfo, - creds): +def get_advanced_reboot(request, duthosts, enum_rand_one_per_hwsku_frontend_hostname, ptfhost, localhost, vmhost, + tbinfo, creds): """ Pytest test fixture that provides access to AdvancedReboot test fixture @param request: pytest request object @@ -884,6 +901,7 @@ def get_advanced_reboot(request, duthosts, enum_rand_one_per_hwsku_frontend_host @param ptfhost: PTFHost for interacting with PTF through ansible @param localhost: Localhost for interacting with localhost through ansible @param tbinfo: fixture provides information about testbed + @param vmhost: AnsibleHost instance of the test server """ duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] instances = [] @@ -893,7 +911,7 @@ def get_advanced_reboot(**kwargs): API that returns instances of AdvancedReboot class """ assert len(instances) == 0, "Only one instance of reboot data is allowed" - advancedReboot = AdvancedReboot(request, duthosts, duthost, ptfhost, localhost, tbinfo, creds, **kwargs) + advancedReboot = AdvancedReboot(request, duthosts, duthost, ptfhost, localhost, vmhost, tbinfo, creds, **kwargs) instances.append(advancedReboot) return advancedReboot diff --git a/tests/platform_tests/test_cont_warm_reboot.py b/tests/platform_tests/test_cont_warm_reboot.py index c08d85c0a92..ca7443e375e 100644 --- a/tests/platform_tests/test_cont_warm_reboot.py +++ b/tests/platform_tests/test_cont_warm_reboot.py @@ -293,7 +293,7 @@ def create_test_report(self): pytest_assert(self.test_failures == 0, "Continuous reboot test failed {}/{} times". format(self.test_failures, self.reboot_count)) - def start_continuous_reboot(self, request, duthosts, duthost, ptfhost, localhost, tbinfo, creds): + def start_continuous_reboot(self, request, duthosts, duthost, ptfhost, localhost, vmhost, tbinfo, creds): self.test_set_up() # Start continuous warm/fast reboot on the DUT for count in range(self.continuous_reboot_count): @@ -306,8 +306,8 @@ def start_continuous_reboot(self, request, duthosts, duthost, ptfhost, localhost .format(self.reboot_count, self.continuous_reboot_count, self.reboot_type)) reboot_type = self.reboot_type + "-reboot" try: - self.advancedReboot = AdvancedReboot(request, duthosts, duthost, ptfhost, localhost, tbinfo, creds, - rebootType=reboot_type, moduleIgnoreErrors=True) + self.advancedReboot = AdvancedReboot(request, duthosts, duthost, ptfhost, localhost, vmhost, tbinfo, + creds, rebootType=reboot_type, moduleIgnoreErrors=True) except Exception: self.sub_test_result = False self.test_failures = self.test_failures + 1 @@ -355,7 +355,7 @@ def test_teardown(self): @pytest.mark.device_type('vs') def test_continuous_reboot(request, duthosts, enum_rand_one_per_hwsku_frontend_hostname, - ptfhost, localhost, conn_graph_facts, tbinfo, creds): + ptfhost, localhost, vmhost, conn_graph_facts, tbinfo, creds): """ @summary: This test performs continuous reboot cycles on images that are provided as an input. Supported parameters for this test can be modified at runtime: @@ -380,5 +380,5 @@ def test_continuous_reboot(request, duthosts, enum_rand_one_per_hwsku_frontend_h continuous_reboot = ContinuousReboot( request, duthost, ptfhost, localhost, conn_graph_facts) continuous_reboot.start_continuous_reboot( - request, duthosts, duthost, ptfhost, localhost, tbinfo, creds) + request, duthosts, duthost, ptfhost, localhost, vmhost, tbinfo, creds) continuous_reboot.test_teardown()