From 71edef4d234e57b1e030a5d0094f62814580da1e Mon Sep 17 00:00:00 2001 From: Ye Jianquan Date: Fri, 17 May 2024 09:16:14 +0800 Subject: [PATCH] [Feat] Enhance upgrade image script to support chassis devices (#12871) Description of PR Summary: Enhance upgrade image script to support chassis device. For chassis device, we need to firstly upgrade the image for supervisor cards, then upgrade the image for line cards. Approach What is the motivation for this PR? Enhance the upgrade_image script to support chassis devices. How did you do it? Upgrade image on the supervisor cards, then wait 900s for the supervisor card to be ready. Upgrade image on the line cards, then wait 300s for the line cards to be ready. The sonichosts defautly run commands on all supervisor cards and line cards at the same time, enhance the framework to be able to upgrade specific hosts. How did you verify/test it? Run upgrade image script on the chassis device and pizzbox device, both of them works well co-authorized by: jianquanye@microsoft.com --- ansible/devutil/devices/ansible_hosts.py | 6 +++- ansible/devutil/devices/chassis_utils.py | 26 ++++++++++++++++++ ansible/devutil/devices/sonic.py | 35 ++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 ansible/devutil/devices/chassis_utils.py diff --git a/ansible/devutil/devices/ansible_hosts.py b/ansible/devutil/devices/ansible_hosts.py index 33118522b5a..2cec9217967 100644 --- a/ansible/devutil/devices/ansible_hosts.py +++ b/ansible/devutil/devices/ansible_hosts.py @@ -701,6 +701,7 @@ def _run_ansible_module(self, *args, **kwargs): } """ caller_info = kwargs.pop("caller_info", None) + target_hosts = kwargs.pop("target_hosts", None) if not caller_info: previous_frame = inspect.currentframe().f_back caller_info = inspect.getframeinfo(previous_frame) @@ -723,7 +724,10 @@ def _run_ansible_module(self, *args, **kwargs): self._log_modules(caller_info, module_info, verbosity) task = self.build_task(**module_info) - results = self.run_tasks(self.host_pattern, self.loader, self.im, self.vm, self.options, tasks=[task]) + host_pattern = self.host_pattern + if target_hosts: + host_pattern = target_hosts + results = self.run_tasks(host_pattern, self.loader, self.im, self.vm, self.options, tasks=[task]) self._log_results(caller_info, module_info, results, verbosity) self._check_results(caller_info, module_info, results, module_ignore_errors, verbosity) diff --git a/ansible/devutil/devices/chassis_utils.py b/ansible/devutil/devices/chassis_utils.py new file mode 100644 index 00000000000..8e2721c8659 --- /dev/null +++ b/ansible/devutil/devices/chassis_utils.py @@ -0,0 +1,26 @@ +import enum + + +class ChassisCardType(str, enum.Enum): + # Sample: lab-1111-sup-1 + SUPERVISOR_CARD = "-sup-" + # Sample: lab-1111-lc1-1 + LINE_CARD = "-lc" + + +def is_chassis(sonichosts): + supervisor_card_exists, line_card_exists = False, False + for hostname in sonichosts.hostnames: + if ChassisCardType.SUPERVISOR_CARD.value in hostname: + supervisor_card_exists = True + if ChassisCardType.LINE_CARD.value in hostname: + line_card_exists = True + return supervisor_card_exists and line_card_exists + + +def get_chassis_hostnames(sonichosts, chassis_card_type: ChassisCardType): + res = [] + for hostname in sonichosts.hostnames: + if chassis_card_type.value in hostname: + res.append(hostname) + return res diff --git a/ansible/devutil/devices/sonic.py b/ansible/devutil/devices/sonic.py index 7a179481f84..e7f63e4b085 100644 --- a/ansible/devutil/devices/sonic.py +++ b/ansible/devutil/devices/sonic.py @@ -1,8 +1,11 @@ import logging +import time + import yaml from .ansible_hosts import AnsibleHosts from .ansible_hosts import RunAnsibleModuleFailed +from .chassis_utils import is_chassis, get_chassis_hostnames, ChassisCardType logger = logging.getLogger(__name__) @@ -26,14 +29,40 @@ def sonic_version(self): return {} -def upgrade_by_sonic(sonichosts, image_url, disk_used_percent): +def upgrade_by_sonic(sonichosts, localhost, image_url, disk_used_percent): try: sonichosts.reduce_and_add_sonic_images( disk_used_pcent=disk_used_percent, new_image_url=image_url, module_attrs={"become": True} ) - sonichosts.shell("reboot", module_attrs={"become": True, "async": 300, "poll": 0}) + if is_chassis(sonichosts): + logger.info("Upgrading image on chassis device...") + # Chassis DUT need to firstly upgrade and reboot supervisor cards. + # Until supervisor cards back online, then upgrade and reboot line cards. + rp_hostnames = get_chassis_hostnames(sonichosts, ChassisCardType.SUPERVISOR_CARD) + lc_hostnames = get_chassis_hostnames(sonichosts, ChassisCardType.LINE_CARD) + sonichosts.shell("reboot", target_hosts=rp_hostnames, + module_attrs={"become": True, "async": 300, "poll": 0}) + logger.info("Sleep 900s to wait for supervisor card to be ready...") + time.sleep(900) + for i in range(len(sonichosts.ips)): + localhost.wait_for( + host=sonichosts.ips[i], + port=22, + state="started", + search_regex="OpenSSH", + delay=0, + timeout=600, + module_attrs={"changed_when": False} + ) + sonichosts.shell("reboot", target_hosts=lc_hostnames, + module_attrs={"become": True, "async": 300, "poll": 0}) + logger.info("Sleep 300s to wait for line cards to be ready...") + time.sleep(300) + else: + sonichosts.shell("reboot", module_attrs={"become": True, "async": 300, "poll": 0}) + return True except RunAnsibleModuleFailed as e: logger.error( @@ -205,7 +234,7 @@ def upgrade_image(sonichosts, localhost, image_url, upgrade_type="sonic", disk_u return False if upgrade_type == "sonic": - upgrade_result = upgrade_by_sonic(sonichosts, image_url, disk_used_percent) + upgrade_result = upgrade_by_sonic(sonichosts, localhost, image_url, disk_used_percent) elif upgrade_type == "onie": upgrade_result = upgrade_by_onie(sonichosts, localhost, image_url, onie_pause_time) if not upgrade_result: