Skip to content

Commit

Permalink
Run one tool per engine for remotehost
Browse files Browse the repository at this point in the history
- engine naming needs to be resolved to allow multiple remotehost
  endpoints
  • Loading branch information
atheurer committed Nov 21, 2023
1 parent 3125528 commit 64e17dc
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 23 deletions.
116 changes: 104 additions & 12 deletions endpoints/remotehost/remotehost
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ image_cache_size=3
osruntime[default]="chroot"
host_mounts=""
hypervisor_host="none" # Default is no hypervisor
new_remotehost_followers=""

function endpoint_remotehost_engine_init() {
echo "Running endpoint_engine_init"
Expand Down Expand Up @@ -160,6 +161,18 @@ function cleanup_osruntime() {
do_ssh $user@$host podman mount
echo

# Clean up tool engines

echo "Going to remove a pod for each tool"
local tools=`awk -F: '{print $1}' $config_dir/tool-cmds/profiler/start`
tool_count=1
for tool in $tools; do
engine_label=profiler-$tool_count
container_name="${endpoint_label}_${run_id}_${engine_label}_${os_runtime}"
do_ssh $user@$host podman rm ${container_name}
let tool_count=$tool_count+1
done

for this_cs_label in ${clients[@]} ${servers[@]} ${collectors[@]}; do
set_osruntime_numanode_cpupart ${this_cs_label}

Expand Down Expand Up @@ -282,6 +295,70 @@ function remotehost_req_check() {
fi
}

function exec_pod() {
local engine_label=$1; shift
local tool=""
tool=$1; shift
local os_runtime="pod"

env_tool_file="${engine_label}_env.txt"
get_image profiler $tool_count this_image

echo "rickshaw_host=$controller_ipaddr" >> ${endpoint_run_dir}/${env_tool_file}
echo "endpoint_run_dir=$endpoint_run_dir" >> ${endpoint_run_dir}/${env_tool_file}
echo "cs_label=$engine_label" >> ${endpoint_run_dir}/${env_tool_file}
echo "tool_name=$tool" >> ${endpoint_run_dir}/${env_tool_file}
echo "base_run_dir=$base_run_dir" >> ${endpoint_run_dir}/${env_tool_file}
echo "endpoint=remotehost" >> ${endpoint_run_dir}/${env_tool_file}
echo "max_rb_attempts=$max_rb_attempts" >> ${endpoint_run_dir}/${env_tool_file}
echo "ssh_id=${ssh_id}" >> ${endpoint_run_dir}/${env_tool_file}

# roadblock opts like redis server
for cs_rb_opt in $cs_rb_opts; do
arg=$(echo $cs_rb_opt | awk -F'=' '{print $1}')
value=$(echo $cs_rb_opt | awk -F'=' '{print $2}')
arg=$(echo ${arg} | sed -e 's/^--//' -e 's/-/_/g' )
echo "${arg}=${value}" >> ${endpoint_run_dir}/${env_tool_file}
done

if pushd ${endpoint_run_dir} >/dev/null; then
echo "Copying ${endpoint_run_dir}/${env_tool_file} to ${user}@${host}:${remote_cfg_dir}"
do_scp "" "${env_tool_file}" "${user}@${host}" "${remote_cfg_dir}"
popd >/dev/null
else
echo "Failed to pushd to ${endpoint_run_dir} to scp env file"
exit 1
fi

tool_cs_cmd="podman run"
tool_cs_cmd+=" --detach=true"
tool_cs_cmd+=" --name=${endpoint_label}_${run_id}_${engine_label}_podman"
tool_cs_cmd+=" --env-file ${remote_cfg_dir}/${env_tool_file}"
tool_cs_cmd+=" --privileged --ipc=host --pid=host --net=host --security-opt=label=disable"
tool_cs_cmd+=" --mount=type=bind,source=${remote_data_dir},destination=/tmp"
tool_cs_cmd+=" --mount=type=bind,source=/lib/firmware,destination=/lib/firmware"
tool_cs_cmd+=" --mount=type=bind,source=/lib/modules,destination=/lib/modules"
tool_cs_cmd+=" --mount=type=bind,source=/usr/src,destination=/usr/src"
if [ "$host_mounts" != "" ]; then
local oldIFS=$IFS
IFS=" "
for fs in $host_mounts; do
tool_cs_cmd+=" --mount=type=bind,source=$fs,destination=$fs"
done
IFS=$oldIFS
fi
tool_cs_cmd+=" ${this_image}"

echo -e "About to run:\ndo_ssh $user@$host ${tool_cs_cmd}\n"
# Note: this never really captures non-zero exits
do_ssh $user@$host "${tool_cs_cmd}"
ssh_rc=$?
if [ ${ssh_rc} -gt 0 ]; then
echo "running ${os_runtime} failed"
exit 1
fi
}

function launch_osruntime() {
local this_cs_label this_cs_log_file base_cmd cs_cmd cs_rb_env env_file
local env_opts existing_container container_id container_mount container_name fs
Expand All @@ -304,30 +381,40 @@ function launch_osruntime() {

set_total_cpupart

# Launch the tool engines (support only pods)
local tools=`awk -F: '{print $1}' $config_dir/tool-cmds/profiler/start`
echo "Creating a pod for each of these tools: $tools"
tool_count=1
echo "Going to create a pod for each tool"
for tool in $tools; do
# This label will not work with mulitple remotehost endpoints!
engine_label=profiler-$tool_count
exec_pod $engine_label $tool
new_remotehost_followers+=" $engine_label"
let tool_count=$tool_count+1
done

# For each client and server launch the actual script which will run it.
count=1
ssh_id=$(sed -z 's/\n/\\n/g' ${config_dir}/rickshaw_id.rsa)
for this_cs_label in ${clients[@]} ${servers[@]} ${collectors[@]}; do

this_cs_type=`echo $this_cs_label | awk -F- '{print $1}'`
this_cs_id=`echo $this_cs_label | awk -F- '{print $2}'`
get_image $this_cs_type $this_cs_id this_image
set_osruntime_numanode_cpupart $this_cs_label

echo "Preparing to launch $this_cs_label"
container_name="${endpoint_label}_${run_id}_${this_cs_label}_${os_runtime}"
existing_container=`do_ssh $user@$host podman ps --all --format "{{.Names}}" | grep ^$container_name$`
if [ ! -z "$existing_container" ]; then
echo "WARNING: found existing container '$existing_container', deleting"
do_ssh $user@$host podman stop $container_name
do_ssh $user@$host podman kill $container_name
do_ssh $user@$host podman rm $container_name
fi
this_cs_log_file="$this_cs_label.txt"

if [ $count -gt 1 ]; then
# Only the first client/server needs to run tools
echo "Skipping tools execution on $this_cs_label because a previous client/server is running tools on this host"
this_disable_tools="1"
else
this_disable_tools="$disable_tools"
fi

if [ "${os_runtime}" == "chroot" ]; then
echo "using chroot"

Expand Down Expand Up @@ -387,7 +474,9 @@ function launch_osruntime() {
base_cmd+=" --cpu-partition-index=${count}"
base_cmd+=" --cpu-partitioning=$cpu_partitioning"
base_cmd+=" --engine-script-start-timeout=$engine_script_start_timeout"
base_cmd+=" --disable-tools=$this_disable_tools"
#base_cmd+=" --disable-tools=$this_disable_tools"
# For one-tool-per-engine, client and server engines never run tools
base_cmd+=" --disable-tools=1"
if [ $numa_node -gt -1 ]; then
base_cmd="numactl -N $numa_node -m $numa_node $base_cmd"
fi
Expand All @@ -412,7 +501,9 @@ function launch_osruntime() {
echo "max_sample_failures=$max_sample_failures" >> ${endpoint_run_dir}/${env_file}
echo "max_rb_attempts=$max_rb_attempts" >> ${endpoint_run_dir}/${env_file}
echo "ssh_id=${ssh_id}" >> ${endpoint_run_dir}/${env_file}
echo "disable_tools=$this_disable_tools" >> ${endpoint_run_dir}/${env_file}
#echo "disable_tools=$this_disable_tools" >> ${endpoint_run_dir}/${env_file}
# For one-tool-per-engine, client and server engines never run tools
echo "disable_tools=1" >> ${endpoint_run_dir}/${env_file}

for cs_rb_opt in $cs_rb_opts; do
arg=$(echo $cs_rb_opt | awk -F'=' '{print $1}')
Expand Down Expand Up @@ -457,7 +548,7 @@ function launch_osruntime() {
cs_cmd+=" ${this_image}"
fi

echo -e "About to run:\n${cs_cmd}\n"
echo -e "About to run:\ndo_ssh $user@$host ${cs_cmd}\n"
do_ssh $user@$host "${cs_cmd}"
ssh_rc=$?
if [ ${ssh_rc} -gt 0 ]; then
Expand Down Expand Up @@ -557,4 +648,5 @@ fi
ssh_id=$(sed -z 's/\n/\\n/g' ${config_dir}/rickshaw_id.rsa)
base_req_check
launch_osruntime
process_roadblocks remotehost
echo "about to call: process_roadblocks remotehost $new_remotehost_followers"
process_roadblocks remotehost $new_remotehost_followers
17 changes: 12 additions & 5 deletions engine/engine-script
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,26 @@ do_roadblock start-tools-begin ${default_timeout}
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}

start_tools
start_stop_tools_opt=""
if [ "$cs_type" == "profiler" ]; then
if [ -z "$tool_name" ]; then
echo "env:"
env
exit_error "tool_name not defined [$tool_name], exiting"
fi
start_stop_tools_opt=$tool_name
fi

start_tools $start_stop_tools_opt

do_roadblock start-tools-end ${default_timeout}
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}


process_bench_roadblocks


do_roadblock stop-tools-begin ${default_timeout}
do_roadblock stop-tools-end ${default_timeout} wait-for "/usr/local/bin/engine-script-library stop_tools '$(pwd)' '${tool_stop_cmds}' '${disable_tools}'"

do_roadblock stop-tools-end ${default_timeout} wait-for "/usr/local/bin/engine-script-library stop_tools '$(pwd)' '${tool_stop_cmds}' '${disable_tools}' '${start_stop_tools_opt}'"

do_roadblock send-data-begin ${default_timeout}
do_roadblock send-data-end ${default_timeout} wait-for "/usr/local/bin/engine-script-library send_data '${ssh_id_file}' '${cs_dir}' '${rickshaw_host}' '${archives_dir}/${cs_label}-data.tgz'"
Expand Down
17 changes: 14 additions & 3 deletions engine/engine-script-library
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,11 @@ function validate_core_env() {
if [ -z "$cs_label" ]; then
exit_error "The client/server label (--cs-label) was not defined"
fi
if echo $cs_label | grep -q -P '^(\w+)-\d+$'; then
regex='^\w+-\d+(-\w+){0,1}$'
if echo $cs_label | grep -q -P $regex; then
echo "engine-label \"$cs_label\" is valid"
else
exit_error 'cs_label "'$cs_label'" does not adhere to regex /^(\w+)-\d+$/'
exit_error 'cs_label "'$cs_label'" does not adhere to regex '$regex
fi

if [ -z "${max_rb_attempts}" ]; then
Expand Down Expand Up @@ -357,7 +358,7 @@ function setup_core_env() {

function get_data() {
# Get files required to run benchmark and tools
if [ $cs_type == "client" -o $cs_type == "server" -o $cs_type == "profiler" ]; then
if [ $cs_type == "client" -o $cs_type == "server" ]; then
cs_files_list="$cs_type-$cs_id-files-list"
else # worker and master do not get id-specific files-list, at least not yet
cs_files_list="$cs_type-files-list"
Expand Down Expand Up @@ -443,6 +444,7 @@ function collect_sysinfo() {
}

function start_tools() {
local one_tool=$1; shift
local tool_name tool_cmd tool_cmd_rc total_tools

echo "running start_tools()"
Expand All @@ -462,6 +464,10 @@ function start_tools() {
while read -u 9 line; do
tool_name=`echo $line | awk -F: '{print $1}'`
tool_cmd=`echo $line | sed -e s/^$tool_name://`
if [ ! -z "$one_tool" -a "$one_tool" != "$tool_name" ]; then
echo "Not starting $tool_name because this engine only runs one tool, $one_tool"
continue
fi
(( total_tools += 1 ))
/bin/mkdir -p $tool_name
if pushd $tool_name >/dev/null; then
Expand Down Expand Up @@ -1080,6 +1086,7 @@ function stop_tools() {
working_directory=${1}; shift
tool_stop_cmds_file=${1}; shift
tools_disabled=${1}; shift
local one_tool=$1; shift

echo "running stop_tools()"
echo "pwd: `/bin/pwd`"
Expand All @@ -1093,6 +1100,10 @@ function stop_tools() {
while read -u 9 line; do
tool_name=`echo $line | awk -F: '{print $1}'`
tool_cmd=`echo $line | sed -e s/^$tool_name://`
if [ ! -z "$one_tool" -a "$one_tool" != "$tool_name" ]; then
echo "Not starting $tool_name because this engine only runs one tool, $one_tool"
continue
fi
(( total_tools += 1 ))
if pushd $tool_name >/dev/null; then
echo "Stopping tool '${tool_name}' with command '${tool_cmd}'"
Expand Down
6 changes: 3 additions & 3 deletions rickshaw-run
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use toolbox::logging;
use toolbox::run;
use toolbox::jsonsettings;

$toolbox::logging::debug = 0;
$toolbox::logging::debug = 1;

my $ug = Data::UUID->new;
my %defaults = ( "num-samples" => 1, "tool-group" => "default", "test-order" => "s",
Expand Down Expand Up @@ -896,7 +896,7 @@ sub source_container_image {
if ($count == 0) {
$userenv_arg = " --userenv " . $rickshaw_project_dir . "/userenvs/" . $userenv . ".json";
$req_arg = "";
$skip_update = "false";
$skip_update = "true";
} else {
$req_arg = shift(@requirements);
$skip_update = "true";
Expand Down Expand Up @@ -2053,7 +2053,7 @@ sub prepare_bench_tool_engines() {
# The "engine-script" will first scp the list (client-files-list or server-files-list).
# then it will read this list to know what other files to copy over)
foreach my $cs_type (keys %clients_servers, @all_collector_types) {
if ($cs_type =~ /^client|server|profiler?/) {
if ($cs_type =~ /^client|server?/) {
foreach my $cs_ref (@{ $clients_servers{$cs_type} }) {
if (! defined $$cs_ref{'id'}) {
printf "cs_type: [%s] cs_ref{'id'} not defined, skipping\n", $cs_type;
Expand Down

0 comments on commit 64e17dc

Please sign in to comment.