diff --git a/endpoints/remotehost/remotehost b/endpoints/remotehost/remotehost index 390fe303..c5493014 100755 --- a/endpoints/remotehost/remotehost +++ b/endpoints/remotehost/remotehost @@ -47,6 +47,7 @@ image_cache_size=3 osruntime[default]="chroot" host_mounts="" hypervisor_host="none" # Default is no hypervisor +new_remotehost_followers="" function endpoint_remotehost_engine_init() { echo "Running endpoint_engine_init" @@ -160,6 +161,18 @@ function cleanup_osruntime() { do_ssh $user@$host podman mount echo + # Clean up tool engines + + echo "Going to remove a pod for each tool" + local tools=`awk -F: '{print $1}' $config_dir/tool-cmds/profiler/start` + tool_count=1 + for tool in $tools; do + engine_label=profiler-$tool_count + container_name="${endpoint_label}_${run_id}_${engine_label}_${os_runtime}" + do_ssh $user@$host podman rm ${container_name} + let tool_count=$tool_count+1 + done + for this_cs_label in ${clients[@]} ${servers[@]} ${collectors[@]}; do set_osruntime_numanode_cpupart ${this_cs_label} @@ -282,6 +295,70 @@ function remotehost_req_check() { fi } +function exec_pod() { + local engine_label=$1; shift + local tool="" + tool=$1; shift + local os_runtime="pod" + + env_tool_file="${engine_label}_env.txt" + get_image profiler $tool_count this_image + + echo "rickshaw_host=$controller_ipaddr" >> ${endpoint_run_dir}/${env_tool_file} + echo "endpoint_run_dir=$endpoint_run_dir" >> ${endpoint_run_dir}/${env_tool_file} + echo "cs_label=$engine_label" >> ${endpoint_run_dir}/${env_tool_file} + echo "tool_name=$tool" >> ${endpoint_run_dir}/${env_tool_file} + echo "base_run_dir=$base_run_dir" >> ${endpoint_run_dir}/${env_tool_file} + echo "endpoint=remotehost" >> ${endpoint_run_dir}/${env_tool_file} + echo "max_rb_attempts=$max_rb_attempts" >> ${endpoint_run_dir}/${env_tool_file} + echo "ssh_id=${ssh_id}" >> ${endpoint_run_dir}/${env_tool_file} + + # roadblock opts like redis server + for cs_rb_opt in $cs_rb_opts; do + arg=$(echo $cs_rb_opt | awk -F'=' '{print $1}') + value=$(echo $cs_rb_opt | awk -F'=' '{print $2}') + arg=$(echo ${arg} | sed -e 's/^--//' -e 's/-/_/g' ) + echo "${arg}=${value}" >> ${endpoint_run_dir}/${env_tool_file} + done + + if pushd ${endpoint_run_dir} >/dev/null; then + echo "Copying ${endpoint_run_dir}/${env_tool_file} to ${user}@${host}:${remote_cfg_dir}" + do_scp "" "${env_tool_file}" "${user}@${host}" "${remote_cfg_dir}" + popd >/dev/null + else + echo "Failed to pushd to ${endpoint_run_dir} to scp env file" + exit 1 + fi + + tool_cs_cmd="podman run" + tool_cs_cmd+=" --detach=true" + tool_cs_cmd+=" --name=${endpoint_label}_${run_id}_${engine_label}_podman" + tool_cs_cmd+=" --env-file ${remote_cfg_dir}/${env_tool_file}" + tool_cs_cmd+=" --privileged --ipc=host --pid=host --net=host --security-opt=label=disable" + tool_cs_cmd+=" --mount=type=bind,source=${remote_data_dir},destination=/tmp" + tool_cs_cmd+=" --mount=type=bind,source=/lib/firmware,destination=/lib/firmware" + tool_cs_cmd+=" --mount=type=bind,source=/lib/modules,destination=/lib/modules" + tool_cs_cmd+=" --mount=type=bind,source=/usr/src,destination=/usr/src" + if [ "$host_mounts" != "" ]; then + local oldIFS=$IFS + IFS=" " + for fs in $host_mounts; do + tool_cs_cmd+=" --mount=type=bind,source=$fs,destination=$fs" + done + IFS=$oldIFS + fi + tool_cs_cmd+=" ${this_image}" + + echo -e "About to run:\ndo_ssh $user@$host ${tool_cs_cmd}\n" + # Note: this never really captures non-zero exits + do_ssh $user@$host "${tool_cs_cmd}" + ssh_rc=$? + if [ ${ssh_rc} -gt 0 ]; then + echo "running ${os_runtime} failed" + exit 1 + fi +} + function launch_osruntime() { local this_cs_label this_cs_log_file base_cmd cs_cmd cs_rb_env env_file local env_opts existing_container container_id container_mount container_name fs @@ -304,30 +381,40 @@ function launch_osruntime() { set_total_cpupart + # Launch the tool engines (support only pods) + local tools=`awk -F: '{print $1}' $config_dir/tool-cmds/profiler/start` + echo "Creating a pod for each of these tools: $tools" + tool_count=1 + echo "Going to create a pod for each tool" + for tool in $tools; do + # This label will not work with mulitple remotehost endpoints! + engine_label=profiler-$tool_count + exec_pod $engine_label $tool + new_remotehost_followers+=" $engine_label" + let tool_count=$tool_count+1 + done + # For each client and server launch the actual script which will run it. count=1 + ssh_id=$(sed -z 's/\n/\\n/g' ${config_dir}/rickshaw_id.rsa) for this_cs_label in ${clients[@]} ${servers[@]} ${collectors[@]}; do + this_cs_type=`echo $this_cs_label | awk -F- '{print $1}'` this_cs_id=`echo $this_cs_label | awk -F- '{print $2}'` get_image $this_cs_type $this_cs_id this_image set_osruntime_numanode_cpupart $this_cs_label + echo "Preparing to launch $this_cs_label" container_name="${endpoint_label}_${run_id}_${this_cs_label}_${os_runtime}" existing_container=`do_ssh $user@$host podman ps --all --format "{{.Names}}" | grep ^$container_name$` if [ ! -z "$existing_container" ]; then echo "WARNING: found existing container '$existing_container', deleting" + do_ssh $user@$host podman stop $container_name + do_ssh $user@$host podman kill $container_name do_ssh $user@$host podman rm $container_name fi this_cs_log_file="$this_cs_label.txt" - if [ $count -gt 1 ]; then - # Only the first client/server needs to run tools - echo "Skipping tools execution on $this_cs_label because a previous client/server is running tools on this host" - this_disable_tools="1" - else - this_disable_tools="$disable_tools" - fi - if [ "${os_runtime}" == "chroot" ]; then echo "using chroot" @@ -387,7 +474,9 @@ function launch_osruntime() { base_cmd+=" --cpu-partition-index=${count}" base_cmd+=" --cpu-partitioning=$cpu_partitioning" base_cmd+=" --engine-script-start-timeout=$engine_script_start_timeout" - base_cmd+=" --disable-tools=$this_disable_tools" + #base_cmd+=" --disable-tools=$this_disable_tools" + # For one-tool-per-engine, client and server engines never run tools + base_cmd+=" --disable-tools=1" if [ $numa_node -gt -1 ]; then base_cmd="numactl -N $numa_node -m $numa_node $base_cmd" fi @@ -412,7 +501,9 @@ function launch_osruntime() { echo "max_sample_failures=$max_sample_failures" >> ${endpoint_run_dir}/${env_file} echo "max_rb_attempts=$max_rb_attempts" >> ${endpoint_run_dir}/${env_file} echo "ssh_id=${ssh_id}" >> ${endpoint_run_dir}/${env_file} - echo "disable_tools=$this_disable_tools" >> ${endpoint_run_dir}/${env_file} + #echo "disable_tools=$this_disable_tools" >> ${endpoint_run_dir}/${env_file} + # For one-tool-per-engine, client and server engines never run tools + echo "disable_tools=1" >> ${endpoint_run_dir}/${env_file} for cs_rb_opt in $cs_rb_opts; do arg=$(echo $cs_rb_opt | awk -F'=' '{print $1}') @@ -457,7 +548,7 @@ function launch_osruntime() { cs_cmd+=" ${this_image}" fi - echo -e "About to run:\n${cs_cmd}\n" + echo -e "About to run:\ndo_ssh $user@$host ${cs_cmd}\n" do_ssh $user@$host "${cs_cmd}" ssh_rc=$? if [ ${ssh_rc} -gt 0 ]; then @@ -557,4 +648,5 @@ fi ssh_id=$(sed -z 's/\n/\\n/g' ${config_dir}/rickshaw_id.rsa) base_req_check launch_osruntime -process_roadblocks remotehost +echo "about to call: process_roadblocks remotehost $new_remotehost_followers" +process_roadblocks remotehost $new_remotehost_followers diff --git a/engine/engine-script b/engine/engine-script index 77f180dd..906049a7 100755 --- a/engine/engine-script +++ b/engine/engine-script @@ -113,19 +113,26 @@ do_roadblock start-tools-begin ${default_timeout} roadblock_rc=$? roadblock_exit_on_error ${roadblock_rc} -start_tools +start_stop_tools_opt="" +if [ "$cs_type" == "profiler" ]; then + if [ -z "$tool_name" ]; then + echo "env:" + env + exit_error "tool_name not defined [$tool_name], exiting" + fi +start_stop_tools_opt=$tool_name +fi + +start_tools $start_stop_tools_opt do_roadblock start-tools-end ${default_timeout} roadblock_rc=$? roadblock_exit_on_error ${roadblock_rc} - process_bench_roadblocks - do_roadblock stop-tools-begin ${default_timeout} -do_roadblock stop-tools-end ${default_timeout} wait-for "/usr/local/bin/engine-script-library stop_tools '$(pwd)' '${tool_stop_cmds}' '${disable_tools}'" - +do_roadblock stop-tools-end ${default_timeout} wait-for "/usr/local/bin/engine-script-library stop_tools '$(pwd)' '${tool_stop_cmds}' '${disable_tools}' '${start_stop_tools_opt}'" do_roadblock send-data-begin ${default_timeout} do_roadblock send-data-end ${default_timeout} wait-for "/usr/local/bin/engine-script-library send_data '${ssh_id_file}' '${cs_dir}' '${rickshaw_host}' '${archives_dir}/${cs_label}-data.tgz'" diff --git a/engine/engine-script-library b/engine/engine-script-library index 7e6dee41..6c2172f8 100644 --- a/engine/engine-script-library +++ b/engine/engine-script-library @@ -302,10 +302,11 @@ function validate_core_env() { if [ -z "$cs_label" ]; then exit_error "The client/server label (--cs-label) was not defined" fi - if echo $cs_label | grep -q -P '^(\w+)-\d+$'; then + regex='^\w+-\d+(-\w+){0,1}$' + if echo $cs_label | grep -q -P $regex; then echo "engine-label \"$cs_label\" is valid" else - exit_error 'cs_label "'$cs_label'" does not adhere to regex /^(\w+)-\d+$/' + exit_error 'cs_label "'$cs_label'" does not adhere to regex '$regex fi if [ -z "${max_rb_attempts}" ]; then @@ -357,7 +358,7 @@ function setup_core_env() { function get_data() { # Get files required to run benchmark and tools - if [ $cs_type == "client" -o $cs_type == "server" -o $cs_type == "profiler" ]; then + if [ $cs_type == "client" -o $cs_type == "server" ]; then cs_files_list="$cs_type-$cs_id-files-list" else # worker and master do not get id-specific files-list, at least not yet cs_files_list="$cs_type-files-list" @@ -443,6 +444,7 @@ function collect_sysinfo() { } function start_tools() { + local one_tool=$1; shift local tool_name tool_cmd tool_cmd_rc total_tools echo "running start_tools()" @@ -462,6 +464,10 @@ function start_tools() { while read -u 9 line; do tool_name=`echo $line | awk -F: '{print $1}'` tool_cmd=`echo $line | sed -e s/^$tool_name://` + if [ ! -z "$one_tool" -a "$one_tool" != "$tool_name" ]; then + echo "Not starting $tool_name because this engine only runs one tool, $one_tool" + continue + fi (( total_tools += 1 )) /bin/mkdir -p $tool_name if pushd $tool_name >/dev/null; then @@ -1080,6 +1086,7 @@ function stop_tools() { working_directory=${1}; shift tool_stop_cmds_file=${1}; shift tools_disabled=${1}; shift + local one_tool=$1; shift echo "running stop_tools()" echo "pwd: `/bin/pwd`" @@ -1093,6 +1100,10 @@ function stop_tools() { while read -u 9 line; do tool_name=`echo $line | awk -F: '{print $1}'` tool_cmd=`echo $line | sed -e s/^$tool_name://` + if [ ! -z "$one_tool" -a "$one_tool" != "$tool_name" ]; then + echo "Not starting $tool_name because this engine only runs one tool, $one_tool" + continue + fi (( total_tools += 1 )) if pushd $tool_name >/dev/null; then echo "Stopping tool '${tool_name}' with command '${tool_cmd}'" diff --git a/rickshaw-run b/rickshaw-run index b8e34791..c2c00c4c 100755 --- a/rickshaw-run +++ b/rickshaw-run @@ -34,7 +34,7 @@ use toolbox::logging; use toolbox::run; use toolbox::jsonsettings; -$toolbox::logging::debug = 0; +$toolbox::logging::debug = 1; my $ug = Data::UUID->new; my %defaults = ( "num-samples" => 1, "tool-group" => "default", "test-order" => "s", @@ -896,7 +896,7 @@ sub source_container_image { if ($count == 0) { $userenv_arg = " --userenv " . $rickshaw_project_dir . "/userenvs/" . $userenv . ".json"; $req_arg = ""; - $skip_update = "false"; + $skip_update = "true"; } else { $req_arg = shift(@requirements); $skip_update = "true"; @@ -2053,7 +2053,7 @@ sub prepare_bench_tool_engines() { # The "engine-script" will first scp the list (client-files-list or server-files-list). # then it will read this list to know what other files to copy over) foreach my $cs_type (keys %clients_servers, @all_collector_types) { - if ($cs_type =~ /^client|server|profiler?/) { + if ($cs_type =~ /^client|server?/) { foreach my $cs_ref (@{ $clients_servers{$cs_type} }) { if (! defined $$cs_ref{'id'}) { printf "cs_type: [%s] cs_ref{'id'} not defined, skipping\n", $cs_type;