Skip to content

Commit

Permalink
Enhance system test automation for parallelism and crash configuration
Browse files Browse the repository at this point in the history
Signed-off-by: Suma R <[email protected]>
  • Loading branch information
Suma R authored and Suma R committed Nov 26, 2024
1 parent 0cb1077 commit 948c949
Show file tree
Hide file tree
Showing 5 changed files with 670 additions and 243 deletions.
19 changes: 19 additions & 0 deletions suites/squid/cephfs/tier-3_cephfs_system_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ tests:
ENABLE_LOGS : 1
daemon_list : ['mds','client','osd','mgr','mon']
daemon_dbg_level : {'mds':20,'client':20,'osd':10,'mgr':10,'mon':10}
-
test:
abort-on-fail: false
desc: "Setup Crash configuration"
module: cephfs_crash_util.py
name: cephfs-crash-setup
config:
crash_setup : 1
daemon_list : ['mds','osd','mgr','mon']

- test:
name: CephFS_System_test
module: test_parallel.py
Expand All @@ -242,6 +252,15 @@ tests:
name: "CephFS System Test Client IO 7"
config:
test_name : io_test_workflow_7
-
test:
abort-on-fail: false
desc: "Check for Crash"
module: cephfs_crash_util.py
name: cephfs-crash-check
config:
crash_check : 1
daemon_list : ['mds','osd','mgr','mon']
-
test:
abort-on-fail: false
Expand Down
80 changes: 80 additions & 0 deletions tests/cephfs/cephfs_crash_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import os
import traceback

from tests.cephfs.cephfs_system.cephfs_system_utils import CephFSSystemUtils
from utility.log import Log

log = Log(__name__)


def run(ceph_cluster, **kw):
"""
This script is a wrapper to Crash configuration and Crash check and upload crash files.
It can be included prior to test case execution to configure crash and post testcase execution to collect crash files,
PRETEST: To configure crash
-----------------------
-
test:
abort-on-fail: false
desc: "Setup Crash configuration"
module: cephfs_crash_util.py
name: cephfs-crash-setup
config:
crash_setup : 1
daemon_list : ['mds','osd','mgr','mon']
POSTTEST: To check for crash and upload crash files to logdir
-------------------------
-
test:
abort-on-fail: false
desc: "Check for Crash"
module: cephfs_crash_util.py
name: cephfs-crash-check
config:
crash_check : 1
daemon_list : ['mds','osd','mgr','mon']
This script will read input params crash_setup, crash_check and invoke corresponding
crash module in cephfs_system_utils to perform the task.
"""
try:
fs_system_utils = CephFSSystemUtils(ceph_cluster)
config = kw.get("config")
clients = ceph_cluster.get_ceph_objects("client")
client = clients[1]
log.info("checking Pre-requisites")

if not clients:
log.info(
f"This test requires minimum 1 client nodes.This has only {len(clients)} clients"
)
return 1

daemon_list = config.get("daemon_list", ["mds"])
crash_setup = config.get("crash_setup", 0)
crash_check = config.get("crash_check", 0)
crash_copy = config.get("crash_copy", 1)
log_str = (
f"Test Params : Crash Setup : {crash_setup}, Crash check:{crash_check}"
)
log_str += f", daemon_list : {daemon_list}"
log.info(log_str)
if crash_setup == 1:
log.info(f"Setup Crash configuration for : {daemon_list}")
fs_system_utils.crash_setup(client, daemon_list=daemon_list)

if crash_check == 1:
log_dir = os.path.dirname(log.logger.handlers[0].baseFilename)
log.info(f"log path:{log_dir}")
log.info(f"Check for crash from : {daemon_list}")
fs_system_utils.crash_check(
client, crash_copy=crash_copy, daemon_list=daemon_list
)
return 0

except Exception as e:
log.info(e)
log.info(traceback.format_exc())
return 1
130 changes: 119 additions & 11 deletions tests/cephfs/cephfs_system/cephfs_system_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import random
import threading
import time

from tests.cephfs.cephfs_utilsV1 import FsUtils
from utility.log import Log
Expand All @@ -26,7 +27,7 @@ def __init__(self, ceph_cluster):
"""
self.mons = ceph_cluster.get_ceph_objects("mon")
self.mgrs = ceph_cluster.get_ceph_objects("mgr")
self._mdss = ceph_cluster.get_ceph_objects("mds")
self.mdss = ceph_cluster.get_ceph_objects("mds")
self.osds = ceph_cluster.get_ceph_objects("osd")
self.clients = ceph_cluster.get_ceph_objects("client")
self.fs_util = FsUtils(ceph_cluster)
Expand All @@ -45,16 +46,17 @@ def get_test_object(self, cephfs_config, req_type="shared"):
"""
sv_objs = []
for i in cephfs_config:
for j in cephfs_config[i]["group"]:
sv_info = cephfs_config[i]["group"][j][req_type]
for k in sv_info:
if k not in ["sv_prefix", "sv_cnt"]:
sv_obj = {}
sv_obj.update({k: sv_info[k]})
sv_obj[k].update({"fs_name": i})
if "default" not in j:
sv_obj[k].update({"group_name": j})
sv_objs.append(sv_obj)
if "CLUS_MONITOR" not in i:
for j in cephfs_config[i]["group"]:
sv_info = cephfs_config[i]["group"][j][req_type]
for k in sv_info:
if k not in ["sv_prefix", "sv_cnt"]:
sv_obj = {}
sv_obj.update({k: sv_info[k]})
sv_obj[k].update({"fs_name": i})
if "default" not in j:
sv_obj[k].update({"group_name": j})
sv_objs.append(sv_obj)

sv_obj = random.choice(sv_objs)
if req_type == "unique":
Expand Down Expand Up @@ -122,3 +124,109 @@ def get_mds_requests(self, fs_name, client):
return max(mds_reqs)
else:
return 0

def crash_setup(self, client, daemon_list=["mds"]):
"""
Enable crash module, create crash user and copy keyring file to cluster nodes
"""
cmd = "ceph mgr module enable crash"
client.exec_command(sudo=True, cmd=cmd)
daemon_nodes = {
"mds": self.mdss,
"mgr": self.mgrs,
"mon": self.mons,
"osd": self.osds,
}
log_base_dir = os.path.dirname(log.logger.handlers[0].baseFilename)

for file_name in ["ceph.conf", "ceph.client.admin.keyring"]:
dst_path = f"{log_base_dir}/{file_name}"
src_path = f"/etc/ceph/{file_name}"
client.download_file(src=src_path, dst=dst_path, sudo=True)
crash_ready_nodes = []
for daemon in daemon_list:
nodes = daemon_nodes[daemon]
for node in nodes:
if node.node.hostname not in crash_ready_nodes:
cmd = "ls /etc/ceph/ceph.client.crash.keyring"
try:
node.exec_command(sudo=True, cmd=cmd)
crash_ready_nodes.append(node.node.hostname)
except BaseException as ex:
if "No such file" in str(ex):
for file_name in ["ceph.conf", "ceph.client.admin.keyring"]:
src_path = f"{log_base_dir}/{file_name}"
dst_path = f"/etc/ceph/{file_name}"
node.upload_file(src=src_path, dst=dst_path, sudo=True)
node.exec_command(
sudo=True,
cmd="yum install -y --nogpgcheck ceph-common",
)
cmd = "ceph auth get-or-create client.crash mon 'profile crash' mgr 'profile crash'"
cmd += " > /etc/ceph/ceph.client.crash.keyring"
node.exec_command(sudo=True, cmd=cmd)
crash_ready_nodes.append(node.node.hostname)
return 0

def crash_check(self, client, crash_copy=1, daemon_list=["mds"]):
"""
Check if Crash dir exists in all daemon hosting nodes, save meta file if crash exists
"""
daemon_nodes = {
"mds": self.mdss,
"mgr": self.mgrs,
"mon": self.mons,
"osd": self.osds,
}

out, _ = client.exec_command(sudo=True, cmd="ceph fsid")
fsid = out.strip()
crash_dir = f"/var/lib/ceph/{fsid}/crash"
crash_data = {}
crash_checked_nodes = []
for daemon in daemon_list:
nodes = daemon_nodes[daemon]
for node in nodes:
if node.node.hostname not in crash_checked_nodes:
crash_list = []
cmd = f"ls {crash_dir}"
out, _ = node.exec_command(sudo=True, cmd=cmd)
crash_items = out.split()
crash_items.remove("posted")
if len(crash_items) > 0:
for crash_item in crash_items:
crash_path = f"{crash_dir}/{crash_item}"
node.exec_command(
sudo=True, cmd=f"ceph crash post -i {crash_path}/meta"
)
crash_list.append(crash_item)
crash_data.update({node: crash_list})
crash_checked_nodes.append(node.node.hostname)

log_base_dir = os.path.dirname(log.logger.handlers[0].baseFilename)
crash_log_path = f"{log_base_dir}/crash_info/"
try:
os.mkdir(crash_log_path)
except BaseException as ex:
log.info(ex)
log.info(f"crash_data:{crash_data}")

if crash_copy == 1:
for crash_node in crash_data:
crash_list = crash_data[crash_node]
node_name = crash_node.node.hostname
tmp_path = f"{crash_log_path}/{node_name}"
os.mkdir(tmp_path)
for crash_item in crash_list:
crash_dst_path = f"{crash_log_path}/{node_name}/{crash_item}"
os.mkdir(crash_dst_path)
crash_path = f"{crash_dir}/{crash_item}"

out, _ = crash_node.exec_command(sudo=True, cmd=f"ls {crash_path}")
crash_files = out.split()
for crash_file in crash_files:
src_path = f"{crash_path}/{crash_file}"
dst_path = f"{crash_dst_path}/{crash_file}"
crash_node.download_file(src=src_path, dst=dst_path, sudo=True)
log.info(f"Copied {crash_path} to {crash_dst_path}")
return 0
Loading

0 comments on commit 948c949

Please sign in to comment.