Merge pull request #426 from perftool-incubator/cdmv7

Support Common Data Model "v7dev"
perftool-incubator · Nov 6, 2023 · 3125528 · 3125528
2 parents 5e3ee9d + fcd4566
commit 3125528
Show file tree

Hide file tree

Showing 6 changed files with 155 additions and 13 deletions.
diff --git a/endpoints/base b/endpoints/base
@@ -989,11 +989,14 @@ function process_roadblocks() {
     roadblock_exit_on_error ${roadblock_rc}
 
 
+
     do_roadblock "engine-init-begin" ${engine_script_start_timeout}
     roadblock_rc=$?
     roadblock_exit_on_error ${roadblock_rc}
 
-    do_roadblock "engine-init-end" ${engine_script_start_timeout}
+    call_endpoint_specific_function "${endpoint_type}" "engine_init" "engine-init-end"
+
+    do_roadblock "engine-init-end" ${engine_script_start_timeout} messages "${msg_file}"
     roadblock_rc=$?
     roadblock_exit_on_error ${roadblock_rc}
 

diff --git a/endpoints/k8s/k8s b/endpoints/k8s/k8s
@@ -54,6 +54,7 @@ endpoint_name="k8s"
 # TODO: instead of using a prefix in the pods' names, use a unique k8s project
 pod_prefix="rickshaw"
 project_name="crucible-rickshaw"
+hypervisor_host="none"
 unique_project="0"
 hostNetwork="0"
 hugepage="0"
@@ -84,6 +85,31 @@ function cleanup_json() {
     fi
 }
 
+function endpoint_k8s_engine_init() {
+    echo "Running endpoint_engine_init"
+    msg_file="$endpoint_run_dir/env-vars.json"
+    echo '[' >$msg_file
+    local count=0
+    for this_cs_label in $all_pods; do
+
+        hosted_by=`jq -r .spec.nodeName  $endpoint_run_dir/kubectl-get-pod-$this_cs_label.json`
+
+        if [ $count -gt 0 ]; then
+            printf "," >>$msg_file
+        fi
+        echo '{"recipient":{"type":"follower","id":"'$this_cs_label'"},"user-object":{"env-vars":{' >>$msg_file
+        echo '"endpoint_label": "'$endpoint_label'",' >>$msg_file
+        echo '"hosted_by": "'$hosted_by'",' >>$msg_file
+        echo '"hypervisor_host": "'$hypervisor_host'",' >>$msg_file
+        echo '"userenv": "'$userenv'",' >>$msg_file
+        echo '"osruntime": "'$os_runtime'"' >>$msg_file
+        echo '}}}' >>$msg_file
+
+        let count=$count+1
+    done
+    echo ']' >>$msg_file
+}
+
 function endpoint_k8s_test_stop() {
     local msgs_dir="$1"; shift
     local test_id="$1"; shift
@@ -1233,7 +1259,7 @@ if [ "${masters_tool_collect}" == "1" ]; then
     verify_pods_running tool_master_nodes $master_tool_pods
     echo "These nodes are hosting the master-tool pods: $master_nodes"
     new_k8s_followers+=" ${master_tool_pods}"
-    all_pods+=" $worker_tool_pods"
+    all_pods+=" $master_tool_pods"
 fi
 
 process_roadblocks k8s ${new_k8s_followers}
diff --git a/endpoints/remotehost/remotehost b/endpoints/remotehost/remotehost
@@ -46,6 +46,30 @@ endpoint_name="remotehost"
 image_cache_size=3
 osruntime[default]="chroot"
 host_mounts=""
+hypervisor_host="none" # Default is no hypervisor
+
+function endpoint_remotehost_engine_init() {
+    echo "Running endpoint_engine_init"
+
+    msg_file="$endpoint_run_dir/env-vars.json"
+    echo '[' >$msg_file
+    local count=0
+    for this_cs_label in ${clients[@]} ${servers[@]} ${collectors[@]}; do
+        if [ $count -gt 0 ]; then
+            printf "," >>$msg_file
+        fi
+        echo '{"recipient":{"type":"follower","id":"'$this_cs_label'"},"user-object":{"env-vars":{' >>$msg_file
+        echo '"endpoint_label": "'$endpoint_label'",' >>$msg_file
+        echo '"hosted_by": "'$host'",' >>$msg_file
+        echo '"hypervisor_host": "'$hypervisor_host'",' >>$msg_file
+        echo '"userenv": "'$userenv'",' >>$msg_file
+        echo '"osruntime": "'$os_runtime'"' >>$msg_file
+        echo '}}}' >>$msg_file
+
+        let count=$count+1
+    done
+    echo ']' >>$msg_file
+}
 
 function endpoint_remotehost_test_stop() {
     echo "Running endpoint_remotehost_test_stop"
@@ -209,6 +233,9 @@ function process_remotehost_opts() {
                     controller_ipaddr=`get_controller_ip $host`
                 fi
                 ;;
+            hypervisor-host)
+                hypervisor_host=$val
+                ;;
             controller-ip)
                 controller_ipaddr=$val
                 ;;
@@ -354,7 +381,6 @@ function launch_osruntime() {
             base_cmd+=" --cs-label=$this_cs_label"
             base_cmd+=" --base-run-dir=$base_run_dir"
             base_cmd+=" --endpoint=remotehost"
-            base_cmd+=" --osruntime=${os_runtime}"
             base_cmd+=" --max-sample-failures=$max_sample_failures"
             base_cmd+=" --max-rb-attempts=$max_rb_attempts"
             base_cmd+=" --cpu-partitions=${total_cpu_partitions}"
@@ -383,7 +409,6 @@ function launch_osruntime() {
             echo "cpu_partitions=${total_cpu_partitions}"   >> ${endpoint_run_dir}/${env_file}
             echo "cpu_partition_index=${count}"             >> ${endpoint_run_dir}/${env_file}
             echo "endpoint=remotehost"                      >> ${endpoint_run_dir}/${env_file}
-            echo "osruntime=${os_runtime}"                  >> ${endpoint_run_dir}/${env_file}
             echo "max_sample_failures=$max_sample_failures" >> ${endpoint_run_dir}/${env_file}
             echo "max_rb_attempts=$max_rb_attempts"         >> ${endpoint_run_dir}/${env_file}
             echo "ssh_id=${ssh_id}"                         >> ${endpoint_run_dir}/${env_file}

diff --git a/engine/engine-script b/engine/engine-script
@@ -65,6 +65,26 @@ roadblock_exit_on_error ${roadblock_rc}
 do_roadblock engine-init-end ${engine_script_start_timeout}
 roadblock_rc=$?
 roadblock_exit_on_error ${roadblock_rc}
+msgs_log_file="${roadblock_msgs_dir}/engine-init-end.json"
+if [ -f ${msgs_log_file} ]; then
+    env_vars=`jq -r '.received[] | .payload.message."user-object"."env-vars" ' ${msgs_log_file} | grep -v null`
+    if [ ! -z "${env_vars}" ]; then
+        printf "Found new env vars:"
+        printf "%s\n" "${env_vars}"
+        eval `echo $env_vars | jq -r 'to_entries | .[] | "export " + .key + "=" + .value'`
+        echo $env_vars | jq -r 'to_entries | .[] | "export " + .key + "=" + .value'
+    else
+        printf "Could not find new env vars\n"
+    fi
+else
+    printf "Could not find %s\n" ${msgs_log_file}
+    echo '#/bin/ls -l '$roadblock_msgs_dir
+    /bin/ls -l $roadblock_msgs_dir
+fi
+
+# There are a number of env vars needed for post-processing metrics
+# Do this after engine-init, as some vars can be passed to the engine in that RB
+env >engine-env.txt
 
 
 do_roadblock get-data-begin ${default_timeout}

diff --git a/engine/engine-script-library b/engine/engine-script-library
@@ -1082,6 +1082,8 @@ function stop_tools() {
     tools_disabled=${1}; shift
 
     echo "running stop_tools()"
+    echo "pwd: `/bin/pwd`"
+    local old_dir=`/bin/pwd`
 
     if [ "${tools_disabled}" == "1" ]; then
         echo "Not stopping tools because --disable-tools=1 was used"
@@ -1097,7 +1099,8 @@ function stop_tools() {
                     eval $tool_cmd
                     tool_cmd_rc=$?
                     echo "Stopping tool '${tool_name}' with command '${tool_cmd}' returned ${tool_cmd_rc}"
-
+                    echo "Copying $old_dir/engine-env.txt to `/bin/pwd`"
+                    /bin/cp $old_dir/engine-env.txt .
                     popd >/dev/null
                 else
                     abort_error "stop_tools: Failed to pushd to ${tool_name}" stop-tools-end

diff --git a/rickshaw-index b/rickshaw-index
@@ -39,7 +39,7 @@ $toolbox::logging::debug = 0;
 
 my @pids;
 my $index_tools = 1;
-my @suported_cdm_vers = ('v5dev', 'v6dev');
+my @suported_cdm_vers = ('v6dev', 'v7dev');
 my %result;
 my $base_run_dir;
 my %cdm = ( 'ver' => '' );
@@ -352,8 +352,9 @@ sub wait_for_metric_descs {
 # with create_es_doc().  Metrics can be indexed from either a benchmark sample directory or a tool
 # directory.
 sub index_metrics {
-    my $index_or_queue = shift;
-    my $metr_file = shift; # filename without .json or .csv
+    my $index_or_queue = shift; # what action to take, index = submit to ES, queue = enqueue to file for bulk index later
+    my $metr_dir = shift; # directory where metric files exist
+    my $metr_file = shift; # metric filename without .json or .csv
     my $cstype = shift;
     my $csid = shift;
     my $base_doc_ref = shift; # metric_desc doc gets populated with this, usually a run doc or period doc
@@ -364,6 +365,60 @@ sub index_metrics {
     my $earliest_begin;
     my $latest_end;
     my $coder = JSON::XS->new->canonical;
+
+    my $dir =  pushd($metr_dir);
+    my %eng_env_vars;
+    if ($cdm{'ver'} eq 'v7dev') {
+        # engine-type and engine-id replace cstype and csid, but cstype/id to be removed later
+        $eng_env_vars{'engine-type'} = $cstype;
+        $eng_env_vars{'engine-id'} = $csid;
+        # engine-role is the engine's primary purpose (to run a bvenchmark, to collect tool data, etc)
+        if ($cstype =~ /^client$|^server$/) {
+            $eng_env_vars{'engine-role'} = 'benchmarker';
+            $eng_env_vars{'benchmark-role'} = $cstype;
+            # the following to be properly defined in a future enhancement
+            $eng_env_vars{'benchmark-name'} = 'unknown';
+            $eng_env_vars{'benchmark-group'} = 'unknown';
+            # to be 'none' once one-tool-per engine is implemented (and a benchmark engine no longer runs tools)
+            $eng_env_vars{'tool-name'} = 'unknown';
+        } elsif ($cstype =~ /^worker$|^master$|^profiler$/) {
+            $eng_env_vars{'engine-role'} = 'profiler';
+            $eng_env_vars{'benchmark-role'} = 'none';
+            $eng_env_vars{'benchmark-name'} = 'none';
+            # the following to be properly defined once one-tool-per engine is implemented
+            $eng_env_vars{'tool-name'} = 'unknown';
+        }
+
+        # Note that CDM being v7 should not assume all metrics have engine_env.txt because one may be re-postprocessing older crucible runs
+        my $eng_env_file = "engine-env.txt";
+        if (! -e $eng_env_file) {
+            $eng_env_file .= ".xz";
+        }
+        if (! -e $eng_env_file) {
+            printf "pwd: %s\nCould not find %s, will not use engine env vars for metadata\n", getcwd(), $eng_env_file;
+        } else {
+            my $eng_env_fh = new IO::Uncompress::UnXz $eng_env_file, Transparent => 1 || die "[ERROR]could not open file " . $eng_env_file;
+            # Cull out env vars which we want as metadata
+            #printf "Looking for engine env vars\n";
+            my @varnames = ('HOSTNAME', 'engine_type', 'engine_role', 'benchmark_group', 'benchmark_role', 'hosted_by', 'hypervisor_host', 'osruntime', 'endpoint_label', 'userenv');
+            while (<$eng_env_fh>) {
+                chomp;
+                foreach my $varname (@varnames) {
+                    if (/^$varname=(.*)$/) {
+                        my $val = $1;
+                        $varname =~ s/_/-/g;
+                        if ($varname eq "HOSTNAME") {
+                            $varname = "hostname";
+                        }
+                        $eng_env_vars{$varname} = $val;
+                        next;
+                    }
+                }
+            }
+            close $eng_env_fh;
+        }
+    }
+
     # Copy data from 'parent' doc so querying directly for metric_desc with
     # run data is possible
     my $metr_json_file = $metr_file . ".json";
@@ -372,7 +427,7 @@ sub index_metrics {
         $metr_json_file .= ".xz";
     }
     if (! -e $metr_json_file) {
-        printf "Could not find %s, exiting\n", $metr_json_file;
+        printf "pwd: %s\nCould not find %s, exiting\n", getcwd(), $metr_json_file;
         exit 1;
     }
     if (! -e $metr_csv_file) {
@@ -410,11 +465,17 @@ sub index_metrics {
         if ( exists $$this_metr{'values'} ) {
             $metr_desc_doc{'metric_desc'}{'values'} = $$this_metr{'values'};
         }
+        # this is where we add engine-related metadata
         $metr_desc_doc{'metric_desc'}{'names'}{'cstype'} = $cstype;
         $metr_desc_doc{'metric_desc'}{'names'}{'csid'} = $csid;
+        foreach my $env_var (keys %eng_env_vars) {
+            $metr_desc_doc{'metric_desc'}{'names'}{$env_var} = $eng_env_vars{$env_var};
+        }
+
         my @names_list = sort(keys(%{ $metr_desc_doc{'metric_desc'}{'names'} }));
         $metr_desc_doc{'metric_desc'}{'names-list'} = \@names_list;
         my $metr_desc_doc_json = $coder->encode(\%metr_desc_doc);
+        #printf "metric_desc_doc:\n %s\n", $metr_desc_doc_json;
         # We do not use index_es_doc() here because that requires getting all info from the %result,
         # and %result (rickshaw-run.json) by design does not include any metric data, as it would be
         # way too large.
@@ -619,6 +680,7 @@ if (ref $idx_resp_ref eq ref [] ) {
         push(@all_indices, $idx_name);
     }
     my @sorted_vers = sort(keys(%vers));
+    printf "sorted_vers: @sorted_vers \n";
     $latest_ver = $sorted_vers[0];
     if (defined $latest_ver) {
         printf "Latest CDM version found in local ES instance is %s\n", $latest_ver;
@@ -650,6 +712,7 @@ if (ref $idx_resp_ref eq ref [] ) {
 }
 
 $cdm{'ver'} = $latest_ver;
+printf "Latest CDM version found in local ES instance is %s\n", $latest_ver;
 if (not grep(/^$cdm{'ver'}$/, @suported_cdm_vers)) {
     printf "The version of CDM used in ES [%s] is not one that is supported by rickshaw-index: [%s]\n",
            $cdm{'ver'}, join(" ",  @suported_cdm_vers);
@@ -702,7 +765,8 @@ if (-e $tool_dir and $index_tools == 1) {
                                     $tool_file =~ s/(metric-data-\S+)\.json.*/$1/;
                                     printf "Working on tool_file: %s\n", $tool_file;
 
-                                    my %job_args = ( 'tool-file' => $tool_dir . "/" . $tool_file,
+                                    my %job_args = ( 'tool-dir' => $tool_dir, 
+                                                     'tool-file' => $tool_file,
                                                      'collector' => $collector,
                                                      'num' => $num,
                                                      'doc-ref' => $base_metric_doc_ref );
@@ -724,7 +788,7 @@ foreach my $job_args (@jobs) {
         push(@pids, $pid);
         $num_jobs++;
     } else {
-        my $num_metric_docs_submitted = index_metrics('index', $$job_args{'tool-file'}, $$job_args{'collector'}, $$job_args{'num'}, $$job_args{'doc-ref'});
+        my $num_metric_docs_submitted = index_metrics('index', $$job_args{'tool-dir'}, $$job_args{'tool-file'}, $$job_args{'collector'}, $$job_args{'num'}, $$job_args{'doc-ref'});
         #$tool_dir . "/" . $tool_file, $collector, $num, $base_metric_doc_ref);
         exit 0;
     }
@@ -880,11 +944,12 @@ if (exists $result{'iterations'}) {
                                                     for (my $j = 0; $j < scalar(@{ $data{'periods'}[$k]{'metric-files'} }); $j++) {
                                                         # Metric data is still in other file(s).  For each member in 'metric-files' array,
                                                         # there should be a 2 files with the same prefix
-                                                        my $metric_file_prefix = $run_dir . "/" . $cs_id_dir . "/" . $data{'periods'}[$k]{'metric-files'}[$j];
+                                                        my $metric_file_prefix = $data{'periods'}[$k]{'metric-files'}[$j];
+                                                        my $metric_dir = $run_dir . "/" . $cs_id_dir;
                                                         my $this_begin;
                                                         my $this_end;
                                                         # index_metric() to return the easliest-begin and latest-end for metric types matching the primary-metric
-                                                        (my $num_metric_docs_submitted, $this_begin, $this_end) = index_metrics('queue', $metric_file_prefix, $cs_name, $cs_id, $base_metric_doc_ref, $data{'benchmark'}, $data{'primary-metric'});
+                                                        (my $num_metric_docs_submitted, $this_begin, $this_end) = index_metrics('queue', $metric_dir, $metric_file_prefix, $cs_name, $cs_id, $base_metric_doc_ref, $data{'benchmark'}, $data{'primary-metric'});
                                                         # From processing all metric files, get the very-earliest-begin and very-latest-end
                                                         if (defined $this_begin and defined $this_end) {
                                                             if (not defined $earliest_begin or $earliest_begin > $this_begin) {