Skip to content

Commit

Permalink
Enhance system test automation for parallelism and crash configuration
Browse files Browse the repository at this point in the history
Signed-off-by: Suma R <[email protected]>
  • Loading branch information
Suma R authored and Suma R committed Nov 26, 2024
1 parent 0cb1077 commit 2e2e2e0
Show file tree
Hide file tree
Showing 5 changed files with 671 additions and 243 deletions.
19 changes: 19 additions & 0 deletions suites/squid/cephfs/tier-3_cephfs_system_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ tests:
ENABLE_LOGS : 1
daemon_list : ['mds','client','osd','mgr','mon']
daemon_dbg_level : {'mds':20,'client':20,'osd':10,'mgr':10,'mon':10}
-
test:
abort-on-fail: false
desc: "Setup Crash configuration"
module: cephfs_crash_util.py
name: cephfs-crash-setup
config:
crash_setup : 1
daemon_list : ['mds','osd','mgr','mon']

- test:
name: CephFS_System_test
module: test_parallel.py
Expand All @@ -242,6 +252,15 @@ tests:
name: "CephFS System Test Client IO 7"
config:
test_name : io_test_workflow_7
-
test:
abort-on-fail: false
desc: "Check for Crash"
module: cephfs_crash_util.py
name: cephfs-crash-check
config:
crash_check : 1
daemon_list : ['mds','osd','mgr','mon']
-
test:
abort-on-fail: false
Expand Down
81 changes: 81 additions & 0 deletions tests/cephfs/cephfs_crash_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import os
import traceback

from tests.cephfs.cephfs_system.cephfs_system_utils import CephFSSystemUtils
from utility.log import Log

log = Log(__name__)


def run(ceph_cluster, **kw):
"""
This script is a wrapper to Logs enablement and Logs collection module available in cephfs_uitlsV1
It can be included prior to test case execution to enable debug logs and post testcase execution to collect logs,
PRETEST: To enable logs
-----------------------
- test:
name: Enable system debug logs
module: cephfs_logs_util.py
config:
ENABLE_LOGS : 1
daemon_dbg_level : {'mds':5}
POSTTEST: To collect logs
-------------------------
- test:
name: Collect and upload system logs
module: cephfs_logs_util.py
config:
UPLOAD_LOGS : 1
daemon_list : ['mds']
POSTTEST: To disable logs
-------------------------
- test:
name: Disable debug logs
module: cephfs_logs_util.py
config:
DISABLE_LOGS : 1
daemon_list : ['mds']
This script will read input params ENABLE_LOGS,UPLOAD_LOGS and DISABLE_LOGS and invoke corresponding
cephfs_utilsV1 module to perform the task. If UPLOAD_LOGS, script will print the path were logs are uploadded.
"""
try:
fs_system_utils = CephFSSystemUtils(ceph_cluster)
config = kw.get("config")
clients = ceph_cluster.get_ceph_objects("client")
client = clients[1]
log.info("checking Pre-requisites")

if not clients:
log.info(
f"This test requires minimum 1 client nodes.This has only {len(clients)} clients"
)
return 1

daemon_list = config.get("daemon_list", ["mds"])
crash_setup = config.get("crash_setup", 0)
crash_check = config.get("crash_check", 0)
crash_copy = config.get("crash_copy", 1)
log_str = (
f"Test Params : Crash Setup : {crash_setup}, Crash check:{crash_check}"
)
log_str += f", daemon_list : {daemon_list}"
log.info(log_str)
if crash_setup == 1:
log.info(f"Setup Crash configuration for : {daemon_list}")
fs_system_utils.crash_setup(client, daemon_list=daemon_list)

if crash_check == 1:
log_dir = os.path.dirname(log.logger.handlers[0].baseFilename)
log.info(f"log path:{log_dir}")
log.info(f"Check for crash from : {daemon_list}")
fs_system_utils.crash_check(
client, crash_copy=crash_copy, daemon_list=daemon_list
)
return 0

except Exception as e:
log.info(e)
log.info(traceback.format_exc())
return 1
130 changes: 119 additions & 11 deletions tests/cephfs/cephfs_system/cephfs_system_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import random
import threading
import time

from tests.cephfs.cephfs_utilsV1 import FsUtils
from utility.log import Log
Expand All @@ -26,7 +27,7 @@ def __init__(self, ceph_cluster):
"""
self.mons = ceph_cluster.get_ceph_objects("mon")
self.mgrs = ceph_cluster.get_ceph_objects("mgr")
self._mdss = ceph_cluster.get_ceph_objects("mds")
self.mdss = ceph_cluster.get_ceph_objects("mds")
self.osds = ceph_cluster.get_ceph_objects("osd")
self.clients = ceph_cluster.get_ceph_objects("client")
self.fs_util = FsUtils(ceph_cluster)
Expand All @@ -45,16 +46,17 @@ def get_test_object(self, cephfs_config, req_type="shared"):
"""
sv_objs = []
for i in cephfs_config:
for j in cephfs_config[i]["group"]:
sv_info = cephfs_config[i]["group"][j][req_type]
for k in sv_info:
if k not in ["sv_prefix", "sv_cnt"]:
sv_obj = {}
sv_obj.update({k: sv_info[k]})
sv_obj[k].update({"fs_name": i})
if "default" not in j:
sv_obj[k].update({"group_name": j})
sv_objs.append(sv_obj)
if "CLUS_MONITOR" not in i:
for j in cephfs_config[i]["group"]:
sv_info = cephfs_config[i]["group"][j][req_type]
for k in sv_info:
if k not in ["sv_prefix", "sv_cnt"]:
sv_obj = {}
sv_obj.update({k: sv_info[k]})
sv_obj[k].update({"fs_name": i})
if "default" not in j:
sv_obj[k].update({"group_name": j})
sv_objs.append(sv_obj)

sv_obj = random.choice(sv_objs)
if req_type == "unique":
Expand Down Expand Up @@ -122,3 +124,109 @@ def get_mds_requests(self, fs_name, client):
return max(mds_reqs)
else:
return 0

def crash_setup(self, client, daemon_list=["mds"]):
"""
Enable crash module, create crash user and copy keyring file to cluster nodes
"""
cmd = "ceph mgr module enable crash"
client.exec_command(sudo=True, cmd=cmd)
daemon_nodes = {
"mds": self.mdss,
"mgr": self.mgrs,
"mon": self.mons,
"osd": self.osds,
}
log_base_dir = os.path.dirname(log.logger.handlers[0].baseFilename)

for file_name in ["ceph.conf", "ceph.client.admin.keyring"]:
dst_path = f"{log_base_dir}/{file_name}"
src_path = f"/etc/ceph/{file_name}"
client.download_file(src=src_path, dst=dst_path, sudo=True)
crash_ready_nodes = []
for daemon in daemon_list:
nodes = daemon_nodes[daemon]
for node in nodes:
if node.node.hostname not in crash_ready_nodes:
cmd = "ls /etc/ceph/ceph.client.crash.keyring"
try:
node.exec_command(sudo=True, cmd=cmd)
crash_ready_nodes.append(node.node.hostname)
except BaseException as ex:
if "No such file" in str(ex):
for file_name in ["ceph.conf", "ceph.client.admin.keyring"]:
src_path = f"{log_base_dir}/{file_name}"
dst_path = f"/etc/ceph/{file_name}"
node.upload_file(src=src_path, dst=dst_path, sudo=True)
node.exec_command(
sudo=True,
cmd="yum install -y --nogpgcheck ceph-common",
)
cmd = "ceph auth get-or-create client.crash mon 'profile crash' mgr 'profile crash'"
cmd += " > /etc/ceph/ceph.client.crash.keyring"
node.exec_command(sudo=True, cmd=cmd)
crash_ready_nodes.append(node.node.hostname)
return 0

def crash_check(self, client, crash_copy=1, daemon_list=["mds"]):
"""
Check if Crash dir exists in all daemon hosting nodes, save meta file if crash exists
"""
daemon_nodes = {
"mds": self.mdss,
"mgr": self.mgrs,
"mon": self.mons,
"osd": self.osds,
}

out, _ = client.exec_command(sudo=True, cmd="ceph fsid")
fsid = out.strip()
crash_dir = f"/var/lib/ceph/{fsid}/crash"
crash_data = {}
crash_checked_nodes = []
for daemon in daemon_list:
nodes = daemon_nodes[daemon]
for node in nodes:
if node.node.hostname not in crash_checked_nodes:
crash_list = []
cmd = f"ls {crash_dir}"
out, _ = node.exec_command(sudo=True, cmd=cmd)
crash_items = out.split()
crash_items.remove("posted")
if len(crash_items) > 0:
for crash_item in crash_items:
crash_path = f"{crash_dir}/{crash_item}"
node.exec_command(
sudo=True, cmd=f"ceph crash post -i {crash_path}/meta"
)
crash_list.append(crash_item)
crash_data.update({node: crash_list})
crash_checked_nodes.append(node.node.hostname)

log_base_dir = os.path.dirname(log.logger.handlers[0].baseFilename)
crash_log_path = f"{log_base_dir}/crash_info/"
try:
os.mkdir(crash_log_path)
except BaseException as ex:
log.info(ex)
log.info(f"crash_data:{crash_data}")

if crash_copy == 1:
for crash_node in crash_data:
crash_list = crash_data[crash_node]
node_name = crash_node.node.hostname
tmp_path = f"{crash_log_path}/{node_name}"
os.mkdir(tmp_path)
for crash_item in crash_list:
crash_dst_path = f"{crash_log_path}/{node_name}/{crash_item}"
os.mkdir(crash_dst_path)
crash_path = f"{crash_dir}/{crash_item}"

out, _ = crash_node.exec_command(sudo=True, cmd=f"ls {crash_path}")
crash_files = out.split()
for crash_file in crash_files:
src_path = f"{crash_path}/{crash_file}"
dst_path = f"{crash_dst_path}/{crash_file}"
crash_node.download_file(src=src_path, dst=dst_path, sudo=True)
log.info(f"Copied {crash_path} to {crash_dst_path}")
return 0
Loading

0 comments on commit 2e2e2e0

Please sign in to comment.