Skip to content

Commit

Permalink
more fixes for cpu-part
Browse files Browse the repository at this point in the history
  • Loading branch information
atheurer committed Dec 5, 2023
1 parent 4488458 commit 8e7b718
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
11 changes: 9 additions & 2 deletions endpoints/base
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,15 @@ function addto_clients_servers() {
}

function set_total_cpupart() {
# To be called only once at endpoint-deploy
local engine_label cpu_partitioning

# globals used:
# cpu_part_idx clients servers cpuPartitioning
# cpu_part_idx clients servers cpuPartitioning total_cpu_partitions

# Multiple osruntimes hosted need to differentiate their
# cpu-allocation by an assigned index. For each osruntime launched
# with cpu-partitioning must increment this index.
cpu_part_idx=0
for engine_label in ${clients[@]} ${servers[@]}; do
set +u
Expand Down Expand Up @@ -375,7 +379,10 @@ function set_osruntime_numanode_cpupart() {
os_runtime=${osruntime[default]}
fi

if [ ! -z "${cpuPartitioning[$this_cs_label]}" ]; then
# profilers never use cpu-partitioning
if echo $this_cs_label | grep -P '^profiler-\w+-\d+-\w+-\d+$'; then
cpu_partitioning=0
elif [ ! -z "${cpuPartitioning[$this_cs_label]}" ]; then
cpu_partitioning=${cpuPartitioning[$this_cs_label]}
elif [ ! -z "${cpuPartitioning[default]}" ]; then
cpu_partitioning=${cpuPartitioning[default]}
Expand Down
21 changes: 12 additions & 9 deletions endpoints/remotehost/remotehost
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ function exec_pod() {
# globals used:
# os_runtime cpu_partitioning numa_node controller_ipaddr ssh_id cs_rb_opts
# endpoint_run_dir base_run_dir max_rb_attempts total_cpu_partitions
# max_sample_failures host_mounts user host remote_cfg_dir
# max_sample_failures host_mounts user host remote_cfg_dir cpu_part_idx

set_osruntime_numanode_cpupart $engine_label

Expand All @@ -355,10 +355,13 @@ function exec_pod() {
# why is this not handled in cs_rb_opt?
echo "max_rb_attempts=$max_rb_attempts" >> $endpoint_run_dir/$env_file

# TODO
#echo "cpu_partitioning=$cpu_partitioning" >> ${endpoint_run_dir}/${env_file}
#echo "cpu_partitions=${total_cpu_partitions}" >> ${endpoint_run_dir}/${env_file}
#echo "cpu_partition_index=${count}" >> ${endpoint_run_dir}/${env_file}
echo "cpu_partitioning=$cpu_partitioning" >> ${endpoint_run_dir}/${env_file}
if [ "$cpu_partitioning" == "1" ]; then
echo "cpu_partitions=$total_cpu_partitions" >> $endpoint_run_dir/$env_file
echo "cpu_partition_index=$cpu_part_idx" >> ${endpoint_run_dir}/${env_file}
let cpu_part_idx=$cpu_part_idx+1
fi


# client and server engines never run tools
if echo $engine_label | grep -v -P '^profiler' >/dev/null; then
Expand Down Expand Up @@ -470,7 +473,7 @@ function exec_chroot() {
# globals used:
# user host controller_ipaddr endpoint_run_dir cs_rb_opts base_run_dir
# max_sample_failures max_rb_attempts total_cpu_partitions engine_script_start_timeout
# os_runtime numa_node cpu_partitioning chroot_rbind_mounts
# os_runtime numa_node cpu_partitioning chroot_rbind_mounts cpu_part_idx

set_osruntime_numanode_cpupart $engine_label
echo "Preparing to chroot $engine_label"
Expand Down Expand Up @@ -527,10 +530,10 @@ function exec_chroot() {
base_cmd+=" --max-sample-failures=$max_sample_failures"
base_cmd+=" --max-rb-attempts=$max_rb_attempts"

if echo $engine_label | grep -P '$(client|server)-\d+$' >/dev/null; then
base_cmd+=" --cpu-partitions=$total_cpu_partitions"
base_cmd+=" --cpu-partition-index=$cpu_part_idx"
base_cmd+=" --cpu-partitions=$total_cpu_partitions"
if [ "$cpu_partitioning" == "1" ]; then
base_cmd+=" --cpu-partitioning=$cpu_partitioning"
base_cmd+=" --cpu-partition-index=$cpu_part_idx"
let cpu_part_idx=$cpu_part_idx+1
fi

Expand Down

0 comments on commit 8e7b718

Please sign in to comment.