diff --git a/ceph/rados/core_workflows.py b/ceph/rados/core_workflows.py index 8439ef141c..562e1dc8c5 100644 --- a/ceph/rados/core_workflows.py +++ b/ceph/rados/core_workflows.py @@ -2628,25 +2628,26 @@ def get_heap_dump(self, osd_list): heap_dump[osd_id] = out.strip() return heap_dump - def list_orch_services(self, service_type=None) -> list: + def list_orch_services(self, service_type=None, export=None) -> list: """ Retrieves the list of orch services Args: service_type(optional): service name | e.g. mon, mgr, osd, etc - + export(optional): return export of orch service Returns: - list of service names using ceph orch ls [] + list of service names using ceph orch ls [] [--export] """ - service_name_ls = [] base_cmd = "ceph orch ls" cmd = f"{base_cmd} {service_type}" if service_type else base_cmd - orch_ls_op = self.run_ceph_command(cmd=cmd) + cmd += " --export" if export else cmd + orch_ls_op = self.run_ceph_command(cmd=cmd, client_exec=True) + + if export: + return orch_ls_op if orch_ls_op: - for service in orch_ls_op: - service_name_ls.append(service["service_name"]) - return service_name_ls + return [service["service_name"] for service in orch_ls_op] def check_host_status(self, hostname, status: str = None) -> bool: """ @@ -4616,21 +4617,6 @@ def get_rados_df(self, pool_name: str = None): return out["pools"][0] if pool_name else out - def get_service_list(self, service_type=None): - """ - Method to get the service list - Args: - service_type : service types are- mon,mgr,osd,rgw, mds - returns: - service list - """ - - cmd_service = "ceph orch ls" - if service_type: - cmd_service = f"{cmd_service} {service_type}" - out = self.run_ceph_command(cmd=cmd_service) - return out - def set_service_managed_type(self, service_type, unmanaged) -> bool: """ Method to set the service to either managed or unmanaged @@ -4668,8 +4654,8 @@ def set_service_managed_type(self, service_type, unmanaged) -> bool: log.info(f"Applying the spec file via cmd : {apply_cmd}") self.client.exec_command(cmd=apply_cmd, sudo=True) time.sleep(10) - out = self.get_service_list("osd") - status = out[0].get("unmanaged", False) + out = self.list_orch_services(service_type="osd", export=True)[0] + status = out.get("unmanaged", False) if status == "false": unmanaged_check = False else: diff --git a/ceph/rados/serviceability_workflows.py b/ceph/rados/serviceability_workflows.py index ba17c94ba2..b7d7c28e69 100644 --- a/ceph/rados/serviceability_workflows.py +++ b/ceph/rados/serviceability_workflows.py @@ -230,15 +230,15 @@ def remove_custom_host(self, host_node_name: str): def wait_osd_operation_status(status_cmd): status_flag = False + txt_osd_removed_logic = """ + The logic used to verify the OSD is removed or not is- + case1: If the ceph is still in process of removing the OSD the command generated + the proper json output.The json.loads method loads the output without any failure. + case2: If the OSDs are removed from the node then the command wont generate any output. + In this case the json.loads method throws the JSONDecodeError exception.This is the + confirmation that the OSDs removal are completed. """ end_time = datetime.datetime.now() + datetime.timedelta(seconds=600) - log.debug( - "The logic used to verify the OSD is removed or not is-" - "case1: If the ceph is still in process of removing the OSD the command generated " - "the proper json output.The json.loads method loads the output without any failure." - "case2: If the OSDs are removed from the node then the command wont generate any output." - "In this case the json.loads method throws the JSONDecodeError exception.This is the " - "confirmation that the OSDs removal are completed. " - ) + log.debug(f"{txt_osd_removed_logic}") while end_time > datetime.datetime.now(): out, err = self.cephadm.shell([status_cmd]) try: diff --git a/tests/rados/test_node_drain_customer_bug.py b/tests/rados/test_node_drain_customer_bug.py index 551141a850..8ee31981db 100644 --- a/tests/rados/test_node_drain_customer_bug.py +++ b/tests/rados/test_node_drain_customer_bug.py @@ -62,7 +62,13 @@ def run(ceph_cluster, **kw): cmd_host_ls = "ceph orch host ls" out = rados_obj.run_ceph_command(cmd=cmd_host_ls) log.debug(f"The hosts in the cluster before starting the test are - {out}") - + txt_version_logic = """ The logic developed to select the drain host is- + 1. Select the cluster node that has the _no_schedule label.This check is included because in few + scenarios(7.1z0) first the issue is reproducing and upgrading to the latest version and again + checking the bug + 2. Select the node with OSD weight/reweight are 0 if none of the hosts have the _no_schedule label + 3. If both 1&2 failed then select a random OSD node + """ mgr_host_object_list = [] for node in ceph_nodes: if node.role == "mgr": @@ -98,15 +104,8 @@ def run(ceph_cluster, **kw): log.info(f"The bug exists and the ceph version is - {ceph_version}") else: log.info(f"The bug not exists and the ceph version is - {ceph_version}") + log.info(f"{txt_version_logic}") - log.info( - "The logic developed to select the drain host is-" - "1. Select the cluster node that has the _no_schedule label.This check is included because in few " - " scenarios(7.1z0) first the issue is reproducing and upgrading to the latest version and again " - " checking the bug" - "2. Select the node with OSD weight/reweight are 0 if none of the hosts have the _no_schedule label" - "3. If both 1&2 failed then select a random OSD node" - ) mon_obj.set_config(section="mgr", name="debug_mgr", value="20/20") cmd_host_ls = "ceph orch host ls" out = rados_obj.run_ceph_command(cmd=cmd_host_ls) diff --git a/tests/rados/test_rados_preempt_scrub.py b/tests/rados/test_rados_preempt_scrub.py index d0e2793a86..03f9ab3e57 100644 --- a/tests/rados/test_rados_preempt_scrub.py +++ b/tests/rados/test_rados_preempt_scrub.py @@ -85,7 +85,7 @@ def run(ceph_cluster, **kw): log_lines = [ "head preempted", - "WaitReplicas::react(const GotReplicas&) PREEMPTED!", + "WaitReplicas::react(const GotReplicas&) PREEMPTED", ] init_time, _ = installer.exec_command(cmd="sudo date '+%Y-%m-%d %H:%M:%S'") @@ -162,7 +162,7 @@ def run(ceph_cluster, **kw): log.info(traceback.format_exc()) return 1 finally: - log.info("===================Execution of finally block===================") + log.info("Execution of finally block") if config.get("delete_pool"): method_should_succeed(rados_object.delete_pool, entry["pool_name"]) log.info("deleted the pool successfully")