Skip to content

Commit

Permalink
Add CPU hotplug feature case
Browse files Browse the repository at this point in the history
Signed-off-by: qwang59 <[email protected]>
  • Loading branch information
qwang59 committed Nov 13, 2024
1 parent 2187c57 commit 2ea939c
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 0 deletions.
32 changes: 32 additions & 0 deletions BM/hotplug/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Release Notes

In the software solution, CPU hotplug and unplug refers to
CPU offline and online operations using the '/sys/devices/system/cpu' interface.

The python script utilizes the Avocade Test Framework, so it needs to be installed first

## The command to instlall the avocado from source code
```
git clone git://github.com/avocado-framework/avocado.git
cd avocado
pip install .
```

or

## Installing avocado vai pip:
```
pip3 install --user avocado-framework
```

## The command to run the case
### Running with 'runtest.py'
```
cd ..
./runtests.py -f hotplug -t hotplug/tests
```

### Running with avocado framework
```
avocado run cpu_off_on_stress.py
```
132 changes: 132 additions & 0 deletions BM/hotplug/cpu_off_on_stress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (c) 2024 Intel Corporation

"""
This script performs CPU offline/online stress test for the specified number of cycles.
Prerequisties:
Install the avocado framework and the required dependencies with below command:
git clone git://github.com/avocado-framework/avocado.git
cd avocado
pip install .
"""

import subprocess
import time
import os

from avocado.core.nrunner.runnable import Runnable

__author__ = "Wendy Wang"
__copyright__ = "GPL-2.0-only"
__license__ = "GPL version 2"

# Determine the directory of the current script
script_dir = os.path.dirname(os.path.abspath(__file__))

# Construce relative paths to the common.sh file
common_sh_path = os.path.join(script_dir, '../../common/common.sh')

class ShellCommandRunnable(Runnable):
def __init__(self, command):
self.command = command
self.stdout = None
self.stderr = None

def run(self):
try:
result = subprocess.run(self.command, shell=True, check=True, capture_output=True, text=True, executable='/bin/bash')
self.stdout = result.stdout
self.stderr = result.stderr
print(f"command '{self.command}' executed successfully.")

return result.returncode
except subprocess.CalledProcessError as e:
self.stderr = e.stderr
self.stdout = e.stdout
print (f"Error occurred: {self.stderr}")
return e.returncode

def get_online_cpu_count():
try:
# Run 'lscpu' and filter out the number of online CPUs
lscpu_command = "lscpu | grep 'On-line CPU' | awk '{print $NF}'"
result = ShellCommandRunnable(lscpu_command)

# Run the command
return_code = result.run()
if return_code != 0:
raise Exception ("Failed to get CPU count")

if result.stdout is None:
raise Exception ("No output from lscpu command")

cpu_range = result.stdout.strip().split('-')
print(f"cpu range: {cpu_range}")
if len(cpu_range) == 2:
return int(cpu_range[1]) + 1
else:
return 1 # Only one CPU available
except Exception as e:
print (f"Error getting cpu count:{e}")
return 0

def check_dmesg_error():
result = ShellCommandRunnable(f"source {common_sh_path} && extract_case_dmesg")
result.run()
dmesg_log = result.stdout

# Check any failure, error, bug in the dmesg log when stress is running
if dmesg_log and any(keyword in dmesg_log for keyword in ["fail","error","Call Trace","Bug","error"]):
return dmesg_log
return None

def cpu_off_on_stress(cycle):
"""Perform CPU offline/online stress test for the specified number of cycles"""
try:
cpu_num = get_online_cpu_count()
if cpu_num == 0:
raise Exception("On-line CPU is not available.")

print (f"The max CPU number is: {cpu_num}")

# Start stress test cycle
for i in range(1, cycle + 1):
print(f"CPUs offline online stress cycle {i}")

for cpu_id in range(cpu_num):
if cpu_id == 0:
continue
print(f"Offline CPU{cpu_id}")
# Bring CPUs offline
result = ShellCommandRunnable(f"echo 0 > /sys/devices/system/cpu/cpu{cpu_id}/online")
result_code = result.run()
if result_code != 0:
raise Exception(f"Failed to bring CPU{cpu_id} offline")

time.sleep(1)

for cpu_id in range(cpu_num):
if cpu_id == 0:
continue
print(f"Online CPU{cpu_id}")
# Bring CPUs online
result = ShellCommandRunnable(f"echo 1 > /sys/devices/system/cpu/cpu{cpu_id}/online")
result_code = result.run()
if result_code != 0:
raise Exception(f"Failed to bring CPU{cpu_id} online")

except Exception as e:
print(f"Error during CPU stress testing:{e}")

# Check dmesg log
dmesg_log = check_dmesg_error()
if dmesg_log:
print(f"Kernel dmesg shows failure after CPU offline/online stress: {dmesg_log}")
raise Exception("Kernel dmesg show failure")
else:
print("Kernel dmesg shows Okay after CPU offline/online stress.")


if __name__== '__main__':
cpu_off_on_stress(5)
3 changes: 3 additions & 0 deletions BM/hotplug/tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This script performs CPU offline/online stress test for the specified number of cycles

python3 cpu_off_on_stress.py

0 comments on commit 2ea939c

Please sign in to comment.