Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Guest test kexec 240808 #369

Merged
merged 2 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 55 additions & 8 deletions BM/guest-test/guest.test_launcher.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@ echo "$SCRIPT_DIR"
GCOV="off"
# timeout control in case of TD VM booting hang
SECONDS=0
TIMEOUT=900
TIMEOUT=1200
# EXEC_FLAG=0 shows test_executor being called
EXEC_FLAG=1
# KEXEC_CNT=0 by default, pos-integer values indicate normal kexec test cycle count
KEXEC_CNT=0
# MEM_DRAIN=NA by default, only used for tdx guest kexec test with yes or no option
MEM_DRAIN="NA"

# ERR_STR and ERR_FLAG definition
# for any unexpected error/warning/call trace handling
Expand Down Expand Up @@ -68,6 +72,8 @@ NOTE!! args passed here will override params in json config file
-i [optional] path under guest-test to standalone common.json file
-j [optional] path under guest-test to standalone qemu.config.json file
-r [optioanl] abs. path to single rpm file: kernel-img, kernel-devel or kernel-headers
-o [optional] memory drained option yes or no
-k [optional] default value 0, non-zero value matches normal kexec test cycle count
-h HELP info
EOF
}
Expand Down Expand Up @@ -114,7 +120,7 @@ echo PORT="$PORT" > "$SCRIPT_DIR"/test_params.py
# used across test_launcher.sh, qemu_runner.py, test_executor.sh

# get args for QEMU boot configurable parameters
while getopts :v:s:m:d:t:e:f:x:c:p:g:i:j:r:h arg; do
while getopts :v:s:m:d:t:e:f:x:c:p:g:i:j:r:o:k:h arg; do
case $arg in
v)
VCPU=$OPTARG
Expand Down Expand Up @@ -172,6 +178,14 @@ while getopts :v:s:m:d:t:e:f:x:c:p:g:i:j:r:h arg; do
RPM=$OPTARG
echo RPM="\"$RPM\"" >> "$SCRIPT_DIR"/test_params.py
;;
o)
MEM_DRAIN=$OPTARG
echo MEM_DRAIN="\"$MEM_DRAIN\"" >> "$SCRIPT_DIR"/test_params.py
;;
k)
KEXEC_CNT=$OPTARG
echo KEXEC_CNT="$KEXEC_CNT" >> "$SCRIPT_DIR"/test_params.py
;;
h)
usage && exit 0
;;
Expand Down Expand Up @@ -235,15 +249,42 @@ export GCOV
cd "$SCRIPT_DIR" || die "fail to switch to $SCRIPT_DIR"
rm -rf /root/.ssh/known_hosts
while read -r line; do
echo "[${VM_TYPE}_vm]: $line"
if [[ "$MEM_DRAIN" != "NA" ]]; then
echo "[${VM_TYPE}_vm_$KEXEC_CNT]: $line"
else
echo "[${VM_TYPE}_vm]: $line"
fi
# within $TIMEOUT but bypass the very first 2 seconds to avoid unexpected $BOOT_PATTERN match (from parameter handling logic)
if [[ $SECONDS -lt $TIMEOUT ]] && [[ $SECONDS -ge 2 ]]; then
if [[ "$line" == @($BOOT_PATTERN) ]] && [[ $EXEC_FLAG -ne 0 ]]; then
test_print_trc "VM_TYPE: $VM_TYPE, VCPU: $VCPU, SOCKETS: $SOCKETS, MEM: $MEM, DEBUG: $DEBUG, PMU: $PMU, CMDLINE: $CMDLINE, \
FEATURE: $FEATURE, TESTCASE: $TESTCASE, SECONDS: $SECONDS"
EXEC_FLAG=0
if ! ./guest.test_executor.sh; then EXEC_FLAG=1 && break; fi # break while read loop in case of guest.test_executor.sh test failure
if [[ "$GCOV" == "on" ]]; then break; fi # break in case of GCOV on and move to VM lifecycle management since VM keep alive
FEATURE: $FEATURE, TESTCASE: $TESTCASE, SECONDS: $SECONDS, MEM_DRAIN: $MEM_DRAIN, KEXEC_CNT: $KEXEC_CNT"
if [[ "$KEXEC_CNT" -eq 0 ]]; then
if [[ "$MEM_DRAIN" == "yes" ]]; then
TESTCASE=TD_KEXEC_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_"$KEXEC_CNT"
elif [[ "$MEM_DRAIN" == "no" ]]; then
TESTCASE=TD_KEXEC_NO_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_"$KEXEC_CNT"
fi
echo TESTCASE="\"$TESTCASE\"" >> "$SCRIPT_DIR"/test_params.py
echo KEXEC_CNT="$KEXEC_CNT" >> "$SCRIPT_DIR"/test_params.py
EXEC_FLAG=0
if ! ./guest.test_executor.sh; then EXEC_FLAG=1 && break; fi # break while read loop in case of guest.test_executor.sh test failure
if [[ "$GCOV" == "on" ]]; then break; fi # break in case of GCOV on and move to VM lifecycle management since VM keep alive
# keep in while read loop in case of pos-integer $KEXEC_CNT value
elif [[ "$KEXEC_CNT" -gt 0 ]]; then # $KEXEC_CNT pos-integer value indicates normal kexec test cycle count
./guest.test_executor.sh &
test_print_trc "guest kernel kexec test triggered @cycles: $KEXEC_CNT"
if [[ "$MEM_DRAIN" == "yes" ]]; then
TESTCASE=TD_KEXEC_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_"$KEXEC_CNT"
elif [[ "$MEM_DRAIN" == "no" ]]; then
TESTCASE=TD_KEXEC_NO_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_"$KEXEC_CNT"
fi
echo TESTCASE="\"$TESTCASE\"" >> "$SCRIPT_DIR"/test_params.py
echo KEXEC_CNT="$KEXEC_CNT" >> "$SCRIPT_DIR"/test_params.py
KEXEC_CNT=$((KEXEC_CNT-1))
else
die "abnormal kexec test cycle count $KEXEC_CNT, please check"
fi
# err_handlers string matching
elif [[ "$line" == @($ERR_STR1) ]]; then
test_print_err "There is $ERR_STR1, test is not fully PASS"
Expand All @@ -269,7 +310,13 @@ done < <(
if [ "$GCOV" == "off" ]; then
# keep timeout process run foreground for direct script execution correctness
# handle timeout effect case SIGTERM impact on terminal no type-in prompt issue
timeout --foreground "$TIMEOUT" ./guest.qemu_runner.sh || reset
if ! timeout --foreground "$TIMEOUT" ./guest.qemu_runner.sh; then
reset
test_print_err "${VM_TYPE}vm_$PORT $TESTCASE TIMEOUT!!"
sleep 3
# terminate all test processes in case of timeout
kill 0
fi
else
test_print_trc "${VM_TYPE}vm_$PORT keep alive for gcov data collection" && ./guest.qemu_runner.sh
fi
Expand Down
30 changes: 30 additions & 0 deletions BM/tdx-guest/tdx.guest_test_executor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,36 @@ case "$TESTCASE" in
guest_test_close
fi
;;
# continous TD KEXEC test group with $VCPU VCPU $MEM GB mem and no mem drain
TD_KEXEC_NO_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_2 | TD_KEXEC_NO_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_1)
guest_test_prepare tdx_kexec_test.sh
guest_test_entry tdx_kexec_test.sh "-v $VCPU -m $MEM -o no -k $KEXEC_CNT" || \
die "Failed on $TESTCASE tdx_kexec_test.sh -v $VCPU -m $MEM -o no -k $KEXEC_CNT"
;;
TD_KEXEC_NO_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_0)
guest_test_prepare tdx_kexec_test.sh
guest_test_entry tdx_kexec_test.sh "-v $VCPU -m $MEM -o no -k $KEXEC_CNT" || \
die "Failed on $TESTCASE tdx_kexec_test.sh -v $VCPU -m $MEM -o no -k $KEXEC_CNT"
if [[ "$GCOV" == "off" ]]; then
guest_test_close
fi
;;
# continous TD KEXEC test group with $VCPU VCPU $MEM GB mem and mem drain
TD_KEXEC_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_2 | TD_KEXEC_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_1)
guest_test_prepare tdx_kexec_test.sh
guest_test_entry tdx_kexec_test.sh "-v $VCPU -m $MEM -o yes -k $KEXEC_CNT" || \
die "Failed on $TESTCASE tdx_kexec_test.sh -v $VCPU -m $MEM -o yes -k $KEXEC_CNT"
guest_test_entry tdx_kexec_test.sh "-s yes" || \
die "Failed on $TESTCASE tdx_kexec_test.sh -s yes"
;;
TD_KEXEC_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_0)
guest_test_prepare tdx_kexec_test.sh
guest_test_entry tdx_kexec_test.sh "-v $VCPU -m $MEM -o yes -k $KEXEC_CNT" || \
die "Failed on $TESTCASE tdx_kexec_test.sh -v $VCPU -m $MEM -o yes -k $KEXEC_CNT"
if [[ "$GCOV" == "off" ]]; then
guest_test_close
fi
;;
:)
test_print_err "Must specify the test scenario option by [-t]"
usage && exit 1
Expand Down
172 changes: 172 additions & 0 deletions BM/tdx-guest/tdx_kexec_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (c) 2024 Intel Corporation

# Author: Hongyu Ning <[email protected]>
#
# History: 16, Aug., 2024 - Hongyu Ning - creation


# @desc This script do kexec related test and check in TDX Guest VM

###################### Variables ######################
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
echo "$SCRIPT_DIR"
source common.sh

while getopts :v:m:o:k:s: arg; do
case $arg in
v)
VCPU=$OPTARG
;;
m)
MEM=$OPTARG
;;
o)
MEM_DRAIN=$OPTARG
;;
k)
KEXEC_CNT=$OPTARG
;;
s)
KEXEC_SSH=$OPTARG
;;
*)
test_print_err "Must supply an argument to -$OPTARG."
exit 1
;;
esac
done

###################### Functions ######################
# function to check unaccepted memory and do mem drain
memory_drain() {
# check unaccepted memory
unaccepted_mem=$(grep "naccepted" /proc/meminfo | awk '{print $2}')
# drain memory if unaccepted memory is not zero
if [[ "$unaccepted_mem" -gt 0 ]]; then
tail /dev/zero
elif [[ "$unaccepted_mem" -eq 0 ]]; then
test_print_trc "unaccepted memory is zero now"
else
die "unaccepted memory check failed with unaccepted mem: $unaccepted_mem"
fi
}

# function to load kexec target kenrel image and trigger kexec switch to target kernel
kexec_load_switch() {
# load kexec target kernel image and initrd, reuse kernel cmdline
if ! kexec -d -l "/boot/vmlinuz-$(uname -r)" --initrd="/boot/initramfs-$(uname -r).img" --reuse-cmdline; then
die "failed to load kexec target kernel image, test failed"
else
test_print_trc "kexec target kernel image loaded"
fi
sleep 45
# trigger kexec switch to target kernel
test_print_trc "kexec switch to target kernel triggered"
kexec -d -e &
}

# function to check VCPU and MEMORY size in TDX guest
vcpu_mem_check() {
# check vcpu and socket number
vcpu_td=$(lscpu | grep "CPU(s)" | head -1 | awk '{print $2}')
test_print_trc "vcpu_td: $vcpu_td"

if [[ "$vcpu_td" -ne "$VCPU" ]]; then
die "Guest TD VM boot with vcpu: $vcpu_td (expected $VCPU)"
fi

# check memory size
mem_td=$(grep "MemTotal" /proc/meminfo | awk '$3=="kB" {printf "%.0f\n", $2/(1024*1024)}')
test_print_trc "mem_td: $mem_td"

# $MEM less than or equal to 4GB need special memory size check
if [[ $MEM -le 4 ]]; then
if [[ $(( MEM / mem_td )) -lt 1 ]] || [[ $(( MEM / mem_td )) -gt 2 ]]; then
die "Guest TD VM boot with memory: $mem_td GB (expected $MEM GB)"
fi
# $MEM more than 4GB use general memory size check
else
if [[ $(( MEM / mem_td )) -ne 1 ]]; then
die "Guest TD VM boot with memory: $mem_td GB (expected $MEM GB)"
fi
fi

test_print_trc "Guest TD VM boot up successfully with config:"
test_print_trc "vcpu $VCPU, memory $MEM GB"
}

# function to free memory by clear memory page caches
clear_mem_cache() {
test_print_trc "Start to clear memory page caches"
sync
sleep 1
# free page cache, dentries and inodes
echo 3 > /proc/sys/vm/drop_caches
test_print_trc "Free memory by clear memory page caches done"
}

# function to increase swap space for more virtual memory
increase_swap_space() {
test_print_trc "Start to increase swap space"
# create swap file with 2GB size
dd if=/dev/zero of=/swapfile bs=1M count=2048
chmod 600 /swapfile
mkswap /swapfile
swapon /swapfile
test_print_trc "Increase swap space done"
}

###################### Do Works ######################
if [[ "$KEXEC_SSH" != "yes" ]]; then
test_print_trc "start kexec test with vcpu: $VCPU, memory: $MEM, mem_drain: $MEM_DRAIN, kexec_cnt: $KEXEC_CNT, kexec_ssh: $KEXEC_SSH"
# do memory_drain if $MEM_DRAIN option enabled "yes"
if [[ "$MEM_DRAIN" == "yes" ]]; then
test_print_trc "start memory drain before kexec test"
memory_drain
elif [[ "$MEM_DRAIN" == "no" ]]; then
if [[ "$KEXEC_CNT" -ne 0 ]]; then
test_print_trc "skip memory drain before kexec test"
fi
else
die "unsupported memory drain option: $MEM_DRAIN"
fi

#do memory_drain in final test loop even if $MEM_DRAIN not enabled
if [[ "$KEXEC_CNT" -eq 0 ]] && [[ "$MEM_DRAIN" == "no" ]]; then
test_print_trc "Start memory drain in final test loop in case of no mem_drain before kexec test"
memory_drain
unaccepted_mem=$(grep "naccepted" /proc/meminfo | awk '{print $2}')
if [[ "$unaccepted_mem" -gt 0 ]]; then
die "unaccepted memory is not drained: $unaccepted_mem"
elif [[ "$unaccepted_mem" -eq 0 ]]; then
test_print_trc "unaccepted memory is zero now"
else
die "unaccepted memory check failed with unaccepted mem: $unaccepted_mem"
fi
fi

# do VCPU and MEM check
vcpu_mem_check || die "failed on vcpu_mem_check"

# do kexec load and switch
if [[ "$KEXEC_CNT" -gt 0 ]]; then
if [[ "$MEM_DRAIN" == "yes" ]]; then
clear_mem_cache || die "failed on clear_mem_cache"
sleep 3
increase_swap_space || die "failed on increase_swap_space"
sleep 3
elif [[ "$MEM_DRAIN" == "no" ]]; then
kexec_load_switch || die "failed on kexec_load_switch"
fi
elif [[ "$KEXEC_CNT" -eq 0 ]]; then
test_print_trc "skip kexec load and switch in final test loop"
else
die "unsupported kexec test cycle count: $KEXEC_CNT"
fi
else # $KEXEC_SSH == "yes", run kexec via ssh standalone to bypass abnormal kexec stuck
test_print_trc "run kexec_load_switch via ssh standalone"
sleep 45
kexec_load_switch || die "failed on kexec_load_switch ssh standalone"
fi
74 changes: 74 additions & 0 deletions BM/tdx-guest/tdx_kexec_test_entry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (c) 2024 Intel Corporation

# Author: Hongyu Ning <[email protected]>
#
# History: 15, Aug., 2024 - Hongyu Ning - creation


# @desc This script is general tdx guest kexec test entry

###################### Variables ######################
source common.sh
DEBUG=on

###################### Functions ######################
# helper function
usage() {
cat <<-EOF
usage: ./${0##*/}
-v number of vcpus
-m memory size in GB
-o memory drained option yes or no
-k pos-integer value for normal kexec test cycle count
-r abs. path to single rpm file: kernel-img, kernel-devel or kernel-headers
-h HELP info
EOF
}

while getopts :v:m:o:k:r:h arg; do
case $arg in
v)
VCPU=$OPTARG
;;
m)
MEM=$OPTARG
;;
o)
MEM_DRAIN=$OPTARG
;;
k)
KEXEC_CNT=$OPTARG
;;
r)
RPM=$OPTARG
;;
h)
usage && exit 0
;;
*)
test_print_err "Must supply an argument to -$OPTARG."
exit 1
;;
esac
done

###################### Do Works ######################
# install kexec test kernel rpm in target TDX guest OS image
./guest-test/guest.test_launcher.sh -v 1 -s 1 -m 16 -d "$DEBUG" -t tdx -e tdx-guest -f tdx \
-x TD_RPM_INSTALL -c "accept_memory=lazy" -p off -r "$RPM" || \
die "Failed on kexec test kernel rpm install"
sleep 3
# prepare and trigger kexec in target TDX guest OS image
if [[ "$MEM_DRAIN" == "yes" ]]; then
./guest-test/guest.test_launcher.sh -v "$VCPU" -s 1 -m "$MEM" -d "$DEBUG" -t tdx -e tdx-guest -f tdx \
-x TD_KEXEC_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_"$KEXEC_CNT" -c "accept_memory=lazy crashkernel=1G-4G:256M,4G-64G:384M,64G-:512M" -p off -o yes -k "$KEXEC_CNT" || \
die "Failed on kexec test"
elif [[ "$MEM_DRAIN" == "no" ]]; then
./guest-test/guest.test_launcher.sh -v "$VCPU" -s 1 -m "$MEM" -d "$DEBUG" -t tdx -e tdx-guest -f tdx \
-x TD_KEXEC_NO_MEM_DRAIN_"$VCPU"C_"$MEM"G_CYCLE_"$KEXEC_CNT" -c "accept_memory=lazy" -p off -o no -k "$KEXEC_CNT" || \
die "Failed on kexec test"
else
die "Invalid memory drained option"
fi
Loading
Loading