Skip to content

Commit

Permalink
Add CPU Topology sched_domain name generic test case
Browse files Browse the repository at this point in the history
Signed-off-by: Wendy Wang <[email protected]>
  • Loading branch information
qwang59 authored and ysun committed Jan 23, 2024
1 parent d2e666a commit 464a0c8
Show file tree
Hide file tree
Showing 3 changed files with 337 additions and 0 deletions.
335 changes: 335 additions & 0 deletions topology/cpu_topology.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
cd "$(dirname "$0")" 2>/dev/null || exit 1
source ../.env

TYPE_VALUE=""
HYBRID_VALUE=""
LL3_VALUE=""
DIE_VALUE=""
SNC_VALUE=""
LL3_PER_SOCKET=""

: "${CASE_NAME:=""}"

usage() {
Expand Down Expand Up @@ -172,6 +179,331 @@ bit width of level & previous levels are aligned."
fi
}

# Function to check the hybrid true or false
get_hybrid_sku() {
local hybrid_type
local cmd_output

hybrid_type=$(cpuid -l 0x07 | grep hybrid | sort -u | awk '{print $NF}')

case $hybrid_type in
true)
cmd_output="hybrid true"
HYBRID_VALUE="true"
test_print_trc "Current CPU is hybrid true"
;;
false)
cmd_output="hybrid false"
HYBRID_VALUE="false"
test_print_trc "Current CPU is hybrid false"
;;
*)
cmd_output=$hybrid_type
block_test "Unkown hybrid SKU"
return 1
;;
esac
do_cmd "echo $cmd_output"
return 0
}

# Function to check core type: Core or Atom or Pcore Only
get_core_type() {
local cpu_id=$1
local core_type
core_type=$(cpuid -l 0x1a | grep -A 3 "CPU $cpu_id" | tail -n 1 | awk '{print $5}')
local cmd_output

case $core_type in
Core)
TYPE_VALUE="Core"
cmd_output="Intel Core"
test_print_trc "Current CPU is $cmd_output"
;;
Atom)
TYPE_VALUE="Atom"
cmd_output="Intel Atom"
test_print_trc "Current CPU is $cmd_output"
;;
"(0)")
TYPE_VALUE="0x0"
cmd_output="Intel Pcore only"
test_print_trc "Current CPU is $cmd_output"
;;
*)
cmd_output=$core_type
block_test "Unknown CPU Core Type"
return 1
;;
esac
do_cmd "echo $cmd_output"
return 0
}

# Function to check if the ecore have L3 Cache
get_ecore_wo_llc() {
local cpu_list_wo_llc

cpu_list_wo_llc=$(cpuid -l 0x04 -s 3 | grep -B 2 "no more caches")
if [[ -n $cpu_list_wo_llc ]]; then
LL3_VALUE="false"
test_print_trc "SKU has CPUs no L3 Cache" && return 0 || return 1
else
LL3_VALUE="true"
test_print_trc "All CPUs have L3 Cache" && return 1 || return 0
fi
}

# Function to check if the CPU supports Die level type
get_die_level_type() {
local die_type

die_type=$(cpuid -l 0x1f -s 3 | grep "die" | sort -u | awk '{print $4}')
if [[ -n $die_type ]]; then
DIE_VALUE="true"
test_print_trc "CPU supports Die level type" && return 0 || return 1
else
DIE_VALUE="false"
test_print_trc "CPU does not support Die level type" && return 1 || return 0
fi
}

# Function to check if SNC is enabled or not
get_snc_disable() {
local socket_num

socket_num=$(lscpu | grep "Socket(s)" | awk '{print $NF}')
test_print_trc "Sockets number: $socket_num"
numa_nodes=$(lscpu | grep "NUMA node(s)" | awk '{print $NF}')
test_print_trc "NUMA Nodes: $numa_nodes"

if [[ $numa_nodes -eq $socket_num ]]; then
SNC_VALUE="disabled"
test_print_trc "CPU SNC is disabled" && return 0 || return 1
else
SNC_VALUE="enabled"
test_print_trc "CPU SNC is enabled" && return 1 || return 0
fi
}

get_lstopo_outputs() {
test_print_trc "Enable the sched_domain verbose"
do_cmd "echo Y > /sys/kernel/debug/sched/verbose"

numa_num=$(lscpu | grep "NUMA node(s)" | awk '{print $3}')
test_print_trc "lspci shows numa node num: $numa_num"

sched_domain_names=$(grep . /sys/kernel/debug/sched/domains/cpu0/domain*/name | awk -F ":" '{print $NF}')
test_print_trc "CPU0 sched_domain names: $sched_domain_names"
sched_domain_proc=$(cat /proc/schedstat)
[[ -n "$sched_domain_names" ]] || block_test "sched_domain debugfs is not available, need to check \
/proc/schedstat: $sched_domain_proc"

test_print_trc "Will run lstopo --no-io command to get topology outputs"
lstopo --no-io 1>/dev/null 2>&1 || block_test "Please install hwloc-gui.x86_64 package to get lstopo tool"
do_cmd "lstopo --no-io > topology.log"
test_print_trc "lstopo output:"
do_cmd "cat topology.log"
}

# Function to check how many L3 cache per package,
# or if the package supports multiple die
get_ll3_num() {
local pkg_num
local llc_num

test_print_trc "Check if the platform supports multiple LLC in one package:"
lstopo --no-io 1>/dev/null 2>&1 || block_test "Please install hwloc-gui.x86_64 package to get lstopo tool"
do_cmd "lstopo -v --no-io > topology_verbose.log"

if [[ -f topology_verbose.log ]]; then
pkg_num=$(grep Package topology_verbose.log | grep depth | awk -F ":" '{print $2}' | sed 's/^ *//' | awk '{print $1}')
test_print_trc "Package number is: $pkg_num"
llc_num=$(grep L3Cache topology_verbose.log | grep depth | awk -F ":" '{print $2}' | sed 's/^ *//' | awk '{print $1}')
test_print_trc "L3 Cache number is: $llc_num"
else
block_test "topology_verbose.log is not available."
fi

if [[ $llc_num -gt $pkg_num ]]; then
LL3_PER_SOCKET="yes"
test_print_trc "CPU supports multiple L3 Cache per package." && return 0 || return 1
else
LL3_PER_SOCKET="no"
test_print_trc "CPU supports 1 L3 Cache per package." && return 1 || return 0
fi
}

# Function to disable sched_domain debug verbose after testing
disable_sched_domain_debug() {
test_print_trc "Disable the sched_domain verbose"
do_cmd "echo N > /sys/kernel/debug/sched/verbose"
}

# Function to do generic sched_domain names check
# By automatically detect the core type and cache support
generic_sched_domain_names() {
local cpu_last
local i=0
local j
local k=0
local h=0
local names_bf=""
local names_af=""
local names_bf_array=()
local names_af_array=()
local smt_enable
cpu_last=$(cpuid -l 0x1f | grep "CPU" | tail -1 | sed 's/:$//' | awk '{print $NF}')

smt_enable=$(cat /sys/devices/system/cpu/smt/active)

# Enable sched_domain debug verbose and print the lstopo logs
# Get CPU capability covering L3 cache, Die level type, SNC
get_lstopo_outputs
get_ecore_wo_llc
get_die_level_type
get_snc_disable
get_ll3_num
test_print_trc

# We need to go though all the cpus and filter out the CPUs with different sched_domain
for ((i = 0; i <= cpu_last; i++)); do
# Get cpu$i's sched_domain name list, and put in array
names_lines=$(grep . /sys/kernel/debug/sched/domains/cpu$i/domain*/name | wc -l)
if [[ $names_lines -eq 1 ]]; then
names_af=$(grep . /sys/kernel/debug/sched/domains/cpu$i/domain*/name)
test_print_trc "CPU$i shows sched_domain name: $names_af"
else
names_af=$(grep . /sys/kernel/debug/sched/domains/cpu$i/domain*/name | awk -F ":" '{print $2}')
test_print_trc "CPU$i shows sched_domain name: $names_af"
fi
names_af_array=($names_af)
test_print_trc "Names_af_array value: ${names_af_array[*]}"
for ((k = 0; k < ${#names_af_array[@]}; k++)); do
if [[ "${names_bf_array[k]}" != "${names_af_array[k]}" ]]; then
test_print_trc
test_print_trc "###### CPU$i sched_domain name check ######"

# Get each sched_domain name and verify
names_count=$(grep . /sys/kernel/debug/sched/domains/cpu$i/domain*/name | awk -F ":" '{print $2}' | wc -l)
j=0
while [ $j -le "$names_count" ]; do
h=$((j + 1))
name_j=$(echo "$names_af" | sed -n "$h,1p")
# Check if SMT is enabled and supported
# Only Pcore type has chance to support SMT
# So the condition is: Pcore, SMT enable
get_hybrid_sku $i
get_core_type $i

# Test sched_domain0
if [[ $j -eq 0 ]]; then
if [[ $name_j == SMT ]] && [[ $smt_enable -eq 1 ]] && [[ $TYPE_VALUE != Atom ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected."
elif [[ $name_j == SMT ]] && [[ $smt_enable -ne 1 ]]; then
disable_sched_domain_debug
die "CPU$i sched_domain$j name $name_j is NOT expected as SMT is disabled"
elif [[ $name_j == SMT ]] && [[ $TYPE_VALUE == Atom ]]; then
disable_sched_domain_debug
die "CPU$i sched_domain$j name $name_j is NOT expected as Atom type"
# Check if CLS is enabled and supported
# CLS: cpus share L2 cache, in other word, there are multiple CPUs under L2 cache by lstopo
# Only hybrid false + Pcore only SKU, will have cores sharing the same L2 cache
elif [[ $name_j == CLS ]] && [[ $TYPE_VALUE == Atom ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected as Atom type."
# For multiple Dies SKU
elif [[ $HYBRID_VALUE == false ]] && [[ $name_j == CLS ]] &&
[[ $TYPE_VALUE == 0x0 ]] && [[ $DIE_VALUE == true ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected."
elif [[ $name_j == CLS ]]; then
disable_sched_domain_debug
die "CPU$i sched_domain$j name $name_j is on unknown SKU."
elif [[ $name_j == MC ]] && [[ $TYPE_VALUE != Atom ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SMT disable Pcore"
else
disable_sched_domain_debug
die "CPU$i sched_domain$j name shows $name_j is on unknown SKU."
fi
# Test sched_domain1 if supports
elif [[ $j -lt $names_count ]] && [[ $j -eq 1 ]]; then
# When all the CPUs share L3 cache, then PKG sched_domain will duplicate with MC
if [[ -z $name_j ]] && [[ $HYBRID_VALUE == true ]] && [[ $LL3_VALUE == true ]]; then
test_print_trc "CPU$i sched_domain$j name does not exist is expected on CPUs sharing LL3 SKU."
elif [[ $name_j == MC ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on most SKU."
elif [[ $name_j == PKG ]] && [[ $HYBRID_VALUE == true ]] && [[ $LL3_VALUE == false ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on CPUs lack of LL3 SKU."
else
disable_sched_domain_debug
die "CPU$i sched_domain$j name $name_j is on unknown SKU."
fi
# Test sched_domain2 if supports
elif [[ $j -lt $names_count ]] && [[ $j -eq 2 ]]; then
# Server SKU will not have PKG sched_domain name, but NUMA
if [[ $name_j == NUMA ]] && [[ $HYBRID_VALUE == false ]] && [[ $DIE_VALUE == false ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on Server"
# SKU with multiple Dies will have sched_domain name: DIE
elif [[ $name_j == DIE ]] && [[ $HYBRID_VALUE == false ]] && [[ $DIE_VALUE == true ]] &&
[[ $SNC_VALUE == disabled ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC disabled CBB topology Server"
# Server which have muliple LL3 per package will support DIE sched_domain
elif [[ $name_j == DIE ]] && [[ $HYBRID_VALUE == false ]] && [[ $LL3_PER_SOCKET == yes ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC disabled multiple LL3 SKU per package"
# Client with hybrid SKU will support PKG sched_domain
elif [[ $name_j == PKG ]] && [[ $HYBRID_VALUE == true ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on hybrid Client SKU"
else
disable_sched_domain_debug
die "CPU$i sched_domain$j name $name_j is on unknown SKU."
fi
# Test sched_domain3 if supports
elif [[ $j -lt $names_count ]] && [[ $j -eq 3 ]]; then
if [[ $name_j == NUMA ]] && [[ $HYBRID_VALUE == false ]] &&
[[ $LL3_PER_SOCKET == yes ]] && [[ $SNC_VALUE == disabled ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC disabled multiple LL3 SKU per package"
elif [[ $name_j == NUMA ]] && [[ $HYBRID_VALUE == false ]] &&
[[ $DIE_VALUE == yes ]] && [[ $SNC_VALUE == disabled ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC disabled multiple die SKU"
elif [[ $name_j == NUMA ]] && [[ $HYBRID_VALUE == false ]] &&
[[ $SNC_VALUE == enabled ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC enabled SKU"
else
disable_sched_domain_debug
die "CPU$i sched_domain$j name $name_j is on unknown SKU."
fi
# Test sched_domain4 if supports
elif [[ $j -lt $names_count ]] && [[ $j -eq 4 ]]; then
if [[ $name_j == NUMA ]] && [[ $HYBRID_VALUE == false ]] &&
[[ $LL3_PER_SOCKET == yes ]] && [[ $SNC_VALUE == enabled ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC enabled multiple LL3 SKU per package"
elif [[ $name_j == NUMA ]] && [[ $HYBRID_VALUE == false ]] &&
[[ $DIE_VALUE == yes ]] && [[ $SNC_VALUE == enabled ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC enabled multiple die SKU"
elif [[ $name_j == NUMA ]] && [[ $HYBRID_VALUE == false ]] &&
[[ $DIE_VALUE == no ]] && [[ $SNC_VALUE == enabled ]]; then
test_print_trc "CPU$i sched_domain$j name $name_j is expected on SNC enabled SKU"
else
disable_sched_domain_debug
die "CPU$i sched_domain$j name $name_j is on unknown SKU."
fi
fi
j=$((j + 1))
done
fi
done
if [[ $names_lines -eq 1 ]]; then
names_bf=$(grep . /sys/kernel/debug/sched/domains/cpu$i/domain*/name)
test_print_trc "CPU$i shows sched_domain name: $names_bf"
else
names_bf=$(grep . /sys/kernel/debug/sched/domains/cpu$i/domain*/name | awk -F ":" '{print $2}')
test_print_trc "CPU$i shows sched_domain name: $names_bf"
fi
names_bf_array=($names_bf)
test_print_trc "Names_bf_array value: ${names_bf_array[*]}"
done
disable_sched_domain_debug
}

cpu_topology_test() {
case $TEST_SCENARIO in
numa_nodes_compare)
Expand All @@ -189,6 +521,9 @@ cpu_topology_test() {
verify_level_type)
level_type
;;
verify_sched_domain_names)
generic_sched_domain_names
;;
esac
return 0
}
Expand Down
1 change: 1 addition & 0 deletions topology/tests-client
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ cpu_topology.sh -t verify_thread_per_core
cpu_topology.sh -t verify_cores_per_socket
cpu_topology.sh -t verify_socket_num
cpu_topology.sh -t verify_level_type
cpu_topology.sh -t verify_sched_domain_names
1 change: 1 addition & 0 deletions topology/tests-server
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ cpu_topology.sh -t verify_thread_per_core
cpu_topology.sh -t verify_cores_per_socket
cpu_topology.sh -t verify_socket_num
cpu_topology.sh -t verify_level_type
cpu_topology.sh -t verify_sched_domain_names

0 comments on commit 464a0c8

Please sign in to comment.