diff --git a/endpoints/base b/endpoints/base index 4f40e4ee..37fcfa14 100755 --- a/endpoints/base +++ b/endpoints/base @@ -989,11 +989,14 @@ function process_roadblocks() { roadblock_exit_on_error ${roadblock_rc} + do_roadblock "engine-init-begin" ${engine_script_start_timeout} roadblock_rc=$? roadblock_exit_on_error ${roadblock_rc} - do_roadblock "engine-init-end" ${engine_script_start_timeout} + call_endpoint_specific_function "${endpoint_type}" "engine_init" "engine-init-end" + + do_roadblock "engine-init-end" ${engine_script_start_timeout} messages "${msg_file}" roadblock_rc=$? roadblock_exit_on_error ${roadblock_rc} diff --git a/endpoints/k8s/k8s b/endpoints/k8s/k8s index 48e28831..8da3c9e0 100755 --- a/endpoints/k8s/k8s +++ b/endpoints/k8s/k8s @@ -54,6 +54,7 @@ endpoint_name="k8s" # TODO: instead of using a prefix in the pods' names, use a unique k8s project pod_prefix="rickshaw" project_name="crucible-rickshaw" +hypervisor_host="none" unique_project="0" hostNetwork="0" hugepage="0" @@ -84,6 +85,31 @@ function cleanup_json() { fi } +function endpoint_k8s_engine_init() { + echo "Running endpoint_engine_init" + msg_file="$endpoint_run_dir/env-vars.json" + echo '[' >$msg_file + local count=0 + for this_cs_label in $all_pods; do + + hosted_by=`jq -r .spec.nodeName $endpoint_run_dir/kubectl-get-pod-$this_cs_label.json` + + if [ $count -gt 0 ]; then + printf "," >>$msg_file + fi + echo '{"recipient":{"type":"follower","id":"'$this_cs_label'"},"user-object":{"env-vars":{' >>$msg_file + echo '"endpoint_label": "'$endpoint_label'",' >>$msg_file + echo '"hosted_by": "'$hosted_by'",' >>$msg_file + echo '"hypervisor_host": "'$hypervisor_host'",' >>$msg_file + echo '"userenv": "'$userenv'",' >>$msg_file + echo '"osruntime": "'$os_runtime'"' >>$msg_file + echo '}}}' >>$msg_file + + let count=$count+1 + done + echo ']' >>$msg_file +} + function endpoint_k8s_test_stop() { local msgs_dir="$1"; shift local test_id="$1"; shift @@ -1233,7 +1259,7 @@ if [ "${masters_tool_collect}" == "1" ]; then verify_pods_running tool_master_nodes $master_tool_pods echo "These nodes are hosting the master-tool pods: $master_nodes" new_k8s_followers+=" ${master_tool_pods}" - all_pods+=" $worker_tool_pods" + all_pods+=" $master_tool_pods" fi process_roadblocks k8s ${new_k8s_followers} diff --git a/endpoints/remotehost/remotehost b/endpoints/remotehost/remotehost index 3a92f69b..5490eb61 100755 --- a/endpoints/remotehost/remotehost +++ b/endpoints/remotehost/remotehost @@ -48,6 +48,29 @@ osruntime[default]="chroot" host_mounts="" hypervisor_host="none" # Default is no hypervisor +function endpoint_remotehost_engine_init() { + echo "Running endpoint_engine_init" + + msg_file="$endpoint_run_dir/env-vars.json" + echo '[' >$msg_file + local count=0 + for this_cs_label in ${clients[@]} ${servers[@]} ${collectors[@]}; do + if [ $count -gt 0]; then + printf "," >>$msg_file + fi + echo '{"recipient":{"type":"follower","id":"'$this_cs_label'"},"user-object":{"env-vars":{' >>$msg_file + echo '"endpoint_label": "'$endpoint_label'",' >>$msg_file + echo '"hosted_by": "'$host'",' >>$msg_file + echo '"hypervisor_host": "'$hypervisor_host'",' >>$msg_file + echo '"userenv": "'$userenv'",' >>$msg_file + echo '"osruntime": "'$os_runtime'"' >>$msg_file + echo '}}}' >>$msg_file + + let count=$count+1 + done + echo ']' >>$msg_file +} + function endpoint_remotehost_test_stop() { echo "Running endpoint_remotehost_test_stop" } @@ -358,7 +381,6 @@ function launch_osruntime() { base_cmd+=" --cs-label=$this_cs_label" base_cmd+=" --base-run-dir=$base_run_dir" base_cmd+=" --endpoint=remotehost" - base_cmd+=" --osruntime=${os_runtime}" base_cmd+=" --max-sample-failures=$max_sample_failures" base_cmd+=" --max-rb-attempts=$max_rb_attempts" base_cmd+=" --cpu-partitions=${total_cpu_partitions}" @@ -366,8 +388,6 @@ function launch_osruntime() { base_cmd+=" --cpu-partitioning=$cpu_partitioning" base_cmd+=" --engine-script-start-timeout=$engine_script_start_timeout" base_cmd+=" --disable-tools=$this_disable_tools" - base_cmd+=" --hosted-by=$host" # The host that runs the osruntime for this crucible engine - base_cmd+=" --hypervisor-host=$hypervisor_host" # The hypervisor host that runs the VM (if this remotehost happens to be a VM) if [ $numa_node -gt -1 ]; then base_cmd="numactl -N $numa_node -m $numa_node $base_cmd" fi @@ -389,13 +409,10 @@ function launch_osruntime() { echo "cpu_partitions=${total_cpu_partitions}" >> ${endpoint_run_dir}/${env_file} echo "cpu_partition_index=${count}" >> ${endpoint_run_dir}/${env_file} echo "endpoint=remotehost" >> ${endpoint_run_dir}/${env_file} - echo "osruntime=${os_runtime}" >> ${endpoint_run_dir}/${env_file} echo "max_sample_failures=$max_sample_failures" >> ${endpoint_run_dir}/${env_file} echo "max_rb_attempts=$max_rb_attempts" >> ${endpoint_run_dir}/${env_file} echo "ssh_id=${ssh_id}" >> ${endpoint_run_dir}/${env_file} echo "disable_tools=$this_disable_tools" >> ${endpoint_run_dir}/${env_file} - echo "hosted_by=$host" >> ${endpoint_run_dir}/${env_file} - echo "hypervisor_host=$hypervisor_host" >> ${endpoint_run_dir}/${env_file} for cs_rb_opt in $cs_rb_opts; do arg=$(echo $cs_rb_opt | awk -F'=' '{print $1}') diff --git a/engine/bootstrap b/engine/bootstrap index d16192fa..7c401ba3 100755 --- a/engine/bootstrap +++ b/engine/bootstrap @@ -129,16 +129,6 @@ while true; do export rickshaw_host="$1" shift; ;; - --hosted-by) - shift; - export hosted_by="$1" - shift; - ;; - --hypervisor-host) - shift; - export hypervisor_host="$1" - shift; - ;; --base-run-dir) shift; export base_run_dir=$1 diff --git a/engine/engine-script b/engine/engine-script index bcf0231c..77f180dd 100755 --- a/engine/engine-script +++ b/engine/engine-script @@ -45,8 +45,6 @@ if ! pushd $cs_dir; then abort_error "Could not chdir to $cs_dir" engine-init-begin exit 1 fi -# There are a number of env vars needed for post-processing metrics -env >engine-env.txt if [ "${cpu_partitioning}" == "1" ]; then if [ -z "${HK_CPUS}" ]; then @@ -67,6 +65,26 @@ roadblock_exit_on_error ${roadblock_rc} do_roadblock engine-init-end ${engine_script_start_timeout} roadblock_rc=$? roadblock_exit_on_error ${roadblock_rc} +msgs_log_file="${roadblock_msgs_dir}/engine-init-end.json" +if [ -f ${msgs_log_file} ]; then + env_vars=`jq -r '.received[] | .payload.message."user-object"."env-vars" ' ${msgs_log_file} | grep -v null` + if [ ! -z "${env_vars}" ]; then + printf "Found new env vars:" + printf "%s\n" "${env_vars}" + eval `echo $env_vars | jq -r 'to_entries | .[] | "export " + .key + "=" + .value'` + echo $env_vars | jq -r 'to_entries | .[] | "export " + .key + "=" + .value' + else + printf "Could not find new env vars\n" + fi +else + printf "Could not find %s\n" ${msgs_log_file} + echo '#/bin/ls -l '$roadblock_msgs_dir + /bin/ls -l $roadblock_msgs_dir +fi + +# There are a number of env vars needed for post-processing metrics +# Do this after engine-init, as some vars can be passed to the engine in that RB +env >engine-env.txt do_roadblock get-data-begin ${default_timeout} diff --git a/engine/engine-script-library b/engine/engine-script-library index 2f795be2..7e6dee41 100644 --- a/engine/engine-script-library +++ b/engine/engine-script-library @@ -1082,6 +1082,8 @@ function stop_tools() { tools_disabled=${1}; shift echo "running stop_tools()" + echo "pwd: `/bin/pwd`" + local old_dir=`/bin/pwd` if [ "${tools_disabled}" == "1" ]; then echo "Not stopping tools because --disable-tools=1 was used" @@ -1097,7 +1099,8 @@ function stop_tools() { eval $tool_cmd tool_cmd_rc=$? echo "Stopping tool '${tool_name}' with command '${tool_cmd}' returned ${tool_cmd_rc}" - + echo "Copying $old_dir/engine-env.txt to `/bin/pwd`" + /bin/cp $old_dir/engine-env.txt . popd >/dev/null else abort_error "stop_tools: Failed to pushd to ${tool_name}" stop-tools-end diff --git a/rickshaw-index b/rickshaw-index index 7f777718..047e2b8c 100755 --- a/rickshaw-index +++ b/rickshaw-index @@ -39,7 +39,7 @@ $toolbox::logging::debug = 0; my @pids; my $index_tools = 1; -my @suported_cdm_vers = ('v5dev', 'v6dev'); +my @suported_cdm_vers = ('v6dev', 'v7dev'); my %result; my $base_run_dir; my %cdm = ( 'ver' => '' ); @@ -369,13 +369,56 @@ sub index_metrics { { my $dir = pushd($metr_dir); - my $eng_env_file = "engine-env.txt"; - if (! -e $eng_env_file) { - $eng_env_file .= ".xz"; - } - if (! -e $eng_env_file) { - printf "pwd: %s\nCould not find %s, exiting\n", getcwd(), $eng_env_file; - exit 1; + my %eng_env_vars; + if ($cdm{'ver'} eq 'v7dev') { + # engine-type and engine-id replace cstype and csid, but cstype/id to be removed later + $eng_env_vars{'engine-type'} = $cstype; + $eng_env_vars{'engine-id'} = $csid; + # engine-role is the engine's primary purpose (to run a bvenchmark, to collect tool data, etc) + if ($cstype =~ /^client$|^server$/) { + $eng_env_vars{'engine-role'} = 'benchmarker'; + $eng_env_vars{'benchmark-role'} = $cstype; + # the following to be properly defined in a future enhancement + $eng_env_vars{'benchmark-name'} = 'unknown'; + $eng_env_vars{'benchmark-group'} = 'unknown'; + # to be 'none' once one-tool-per engine is implemented (and a benchmark engine no longer runs tools) + $eng_env_vars{'tool-name'} = 'unknown'; + } elsif ($cstype =~ /^worker$|^master$|^profiler$/) { + $eng_env_vars{'engine-role'} = 'profiler'; + $eng_env_vars{'benchmark-role'} = 'none'; + $eng_env_vars{'benchmark-name'} = 'none'; + # the following to be properly defined once one-tool-per engine is implemented + $eng_env_vars{'tool-name'} = 'unknown'; + } + + # Note that CDM being v7 should not assume all metrics have engine_env.txt because one may be re-postprocessing older crucible runs + my $eng_env_file = "engine-env.txt"; + if (! -e $eng_env_file) { + $eng_env_file .= ".xz"; + } + if (! -e $eng_env_file) { + printf "pwd: %s\nCould not find %s, will not use engine env vars for metadata\n", getcwd(), $eng_env_file; + } else { + my $eng_env_fh = new IO::Uncompress::UnXz $eng_env_file, Transparent => 1 || die "[ERROR]could not open file " . $eng_env_file; + # Cull out env vars which we want as metadata + #printf "Looking for engine env vars\n"; + my @varnames = ('HOSTNAME', 'engine_type', 'engine_role', 'benchmark_group', 'benchmark_role', 'hosted_by', 'hypervisor_host', 'osruntime', 'endpoint_label', 'userenv'); + while (<$eng_env_fh>) { + chomp; + foreach my $varname (@varnames) { + if (/^$varname=(.*)$/) { + my $val = $1; + $varname =~ s/_/-/g; + if ($varname eq "HOSTNAME") { + $varname = "hostname"; + } + $eng_env_vars{$varname} = $val; + next; + } + } + } + close $eng_env_fh; + } } # Copy data from 'parent' doc so querying directly for metric_desc with @@ -427,30 +470,14 @@ sub index_metrics { # this is where we add engine-related metadata $metr_desc_doc{'metric_desc'}{'names'}{'cstype'} = $cstype; $metr_desc_doc{'metric_desc'}{'names'}{'csid'} = $csid; - my $eng_env_fh = new IO::Uncompress::UnXz $eng_env_file, Transparent => 1 || die "[ERROR]could not open file " . $eng_env_file; - # Cull out env vars which we want as metadata - printf "Looking for engine env vars\n"; - my @varnames = ('hosted_by', 'hypervisor_host', 'osruntime'); - while (<$eng_env_fh>) { - - chomp; - printf "env: %s\n", $_; - - foreach my $varname (@varnames) { - if (/^$varname=(.*)$/) { - my $val = $1; - $varname =~ s/_/-/g; - $metr_desc_doc{'metric_desc'}{'names'}{$varname} = $val; - printf "found %s = %s\n", $varname, $val; - next; - } - } + foreach my $env_var (keys %eng_env_vars) { + $metr_desc_doc{'metric_desc'}{'names'}{$env_var} = $eng_env_vars{$env_var}; } - close $eng_env_fh; my @names_list = sort(keys(%{ $metr_desc_doc{'metric_desc'}{'names'} })); $metr_desc_doc{'metric_desc'}{'names-list'} = \@names_list; my $metr_desc_doc_json = $coder->encode(\%metr_desc_doc); + #printf "metric_desc_doc:\n %s\n", $metr_desc_doc_json; # We do not use index_es_doc() here because that requires getting all info from the %result, # and %result (rickshaw-run.json) by design does not include any metric data, as it would be # way too large. @@ -657,6 +684,7 @@ if (ref $idx_resp_ref eq ref [] ) { push(@all_indices, $idx_name); } my @sorted_vers = sort(keys(%vers)); + printf "sorted_vers: @sorted_vers \n"; $latest_ver = $sorted_vers[0]; if (defined $latest_ver) { printf "Latest CDM version found in local ES instance is %s\n", $latest_ver; @@ -688,6 +716,7 @@ if (ref $idx_resp_ref eq ref [] ) { } $cdm{'ver'} = $latest_ver; +printf "Latest CDM version found in local ES instance is %s\n", $latest_ver; if (not grep(/^$cdm{'ver'}$/, @suported_cdm_vers)) { printf "The version of CDM used in ES [%s] is not one that is supported by rickshaw-index: [%s]\n", $cdm{'ver'}, join(" ", @suported_cdm_vers); diff --git a/rickshaw-run b/rickshaw-run index 681693cf..290fe43f 100755 --- a/rickshaw-run +++ b/rickshaw-run @@ -34,7 +34,7 @@ use toolbox::logging; use toolbox::run; use toolbox::jsonsettings; -$toolbox::logging::debug = 1; +$toolbox::logging::debug = 0; my $ug = Data::UUID->new; my %defaults = ( "num-samples" => 1, "tool-group" => "default", "test-order" => "s", @@ -896,7 +896,8 @@ sub source_container_image { if ($count == 0) { $userenv_arg = " --userenv " . $rickshaw_project_dir . "/userenvs/" . $userenv . ".json"; $req_arg = ""; - $skip_update = "false"; + #$skip_update = "false"; + $skip_update = "true"; } else { $req_arg = shift(@requirements); $skip_update = "true"; diff --git a/rickshaw-settings.json b/rickshaw-settings.json index 2e80a7ee..49e41c45 100644 --- a/rickshaw-settings.json +++ b/rickshaw-settings.json @@ -3,7 +3,7 @@ "timeouts": { "default": 240, "endpoint-deploy": 1440, - "collect-sysinfo": 600, + "collect-sysinfo": 1200, "engine-start": 1440, "move-data": 300 }