Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
atheurer committed Nov 3, 2023
1 parent 0d698b2 commit 7a4fc6b
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 51 deletions.
5 changes: 4 additions & 1 deletion endpoints/base
Original file line number Diff line number Diff line change
Expand Up @@ -989,11 +989,14 @@ function process_roadblocks() {
roadblock_exit_on_error ${roadblock_rc}



do_roadblock "engine-init-begin" ${engine_script_start_timeout}
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}

do_roadblock "engine-init-end" ${engine_script_start_timeout}
call_endpoint_specific_function "${endpoint_type}" "engine_init" "engine-init-end"

do_roadblock "engine-init-end" ${engine_script_start_timeout} messages "${msg_file}"
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}

Expand Down
28 changes: 27 additions & 1 deletion endpoints/k8s/k8s
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ endpoint_name="k8s"
# TODO: instead of using a prefix in the pods' names, use a unique k8s project
pod_prefix="rickshaw"
project_name="crucible-rickshaw"
hypervisor_host="none"
unique_project="0"
hostNetwork="0"
hugepage="0"
Expand Down Expand Up @@ -84,6 +85,31 @@ function cleanup_json() {
fi
}

function endpoint_k8s_engine_init() {
echo "Running endpoint_engine_init"
msg_file="$endpoint_run_dir/env-vars.json"
echo '[' >$msg_file
local count=0
for this_cs_label in $all_pods; do

hosted_by=`jq -r .spec.nodeName $endpoint_run_dir/kubectl-get-pod-$this_cs_label.json`

if [ $count -gt 0 ]; then
printf "," >>$msg_file
fi
echo '{"recipient":{"type":"follower","id":"'$this_cs_label'"},"user-object":{"env-vars":{' >>$msg_file
echo '"endpoint_label": "'$endpoint_label'",' >>$msg_file
echo '"hosted_by": "'$hosted_by'",' >>$msg_file
echo '"hypervisor_host": "'$hypervisor_host'",' >>$msg_file
echo '"userenv": "'$userenv'",' >>$msg_file
echo '"osruntime": "'$os_runtime'"' >>$msg_file
echo '}}}' >>$msg_file

let count=$count+1
done
echo ']' >>$msg_file
}

function endpoint_k8s_test_stop() {
local msgs_dir="$1"; shift
local test_id="$1"; shift
Expand Down Expand Up @@ -1233,7 +1259,7 @@ if [ "${masters_tool_collect}" == "1" ]; then
verify_pods_running tool_master_nodes $master_tool_pods
echo "These nodes are hosting the master-tool pods: $master_nodes"
new_k8s_followers+=" ${master_tool_pods}"
all_pods+=" $worker_tool_pods"
all_pods+=" $master_tool_pods"
fi

process_roadblocks k8s ${new_k8s_followers}
29 changes: 23 additions & 6 deletions endpoints/remotehost/remotehost
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,29 @@ osruntime[default]="chroot"
host_mounts=""
hypervisor_host="none" # Default is no hypervisor

function endpoint_remotehost_engine_init() {
echo "Running endpoint_engine_init"

msg_file="$endpoint_run_dir/env-vars.json"
echo '[' >$msg_file
local count=0
for this_cs_label in ${clients[@]} ${servers[@]} ${collectors[@]}; do
if [ $count -gt 0]; then
printf "," >>$msg_file
fi
echo '{"recipient":{"type":"follower","id":"'$this_cs_label'"},"user-object":{"env-vars":{' >>$msg_file
echo '"endpoint_label": "'$endpoint_label'",' >>$msg_file
echo '"hosted_by": "'$host'",' >>$msg_file
echo '"hypervisor_host": "'$hypervisor_host'",' >>$msg_file
echo '"userenv": "'$userenv'",' >>$msg_file
echo '"osruntime": "'$os_runtime'"' >>$msg_file
echo '}}}' >>$msg_file

let count=$count+1
done
echo ']' >>$msg_file
}

function endpoint_remotehost_test_stop() {
echo "Running endpoint_remotehost_test_stop"
}
Expand Down Expand Up @@ -358,16 +381,13 @@ function launch_osruntime() {
base_cmd+=" --cs-label=$this_cs_label"
base_cmd+=" --base-run-dir=$base_run_dir"
base_cmd+=" --endpoint=remotehost"
base_cmd+=" --osruntime=${os_runtime}"
base_cmd+=" --max-sample-failures=$max_sample_failures"
base_cmd+=" --max-rb-attempts=$max_rb_attempts"
base_cmd+=" --cpu-partitions=${total_cpu_partitions}"
base_cmd+=" --cpu-partition-index=${count}"
base_cmd+=" --cpu-partitioning=$cpu_partitioning"
base_cmd+=" --engine-script-start-timeout=$engine_script_start_timeout"
base_cmd+=" --disable-tools=$this_disable_tools"
base_cmd+=" --hosted-by=$host" # The host that runs the osruntime for this crucible engine
base_cmd+=" --hypervisor-host=$hypervisor_host" # The hypervisor host that runs the VM (if this remotehost happens to be a VM)
if [ $numa_node -gt -1 ]; then
base_cmd="numactl -N $numa_node -m $numa_node $base_cmd"
fi
Expand All @@ -389,13 +409,10 @@ function launch_osruntime() {
echo "cpu_partitions=${total_cpu_partitions}" >> ${endpoint_run_dir}/${env_file}
echo "cpu_partition_index=${count}" >> ${endpoint_run_dir}/${env_file}
echo "endpoint=remotehost" >> ${endpoint_run_dir}/${env_file}
echo "osruntime=${os_runtime}" >> ${endpoint_run_dir}/${env_file}
echo "max_sample_failures=$max_sample_failures" >> ${endpoint_run_dir}/${env_file}
echo "max_rb_attempts=$max_rb_attempts" >> ${endpoint_run_dir}/${env_file}
echo "ssh_id=${ssh_id}" >> ${endpoint_run_dir}/${env_file}
echo "disable_tools=$this_disable_tools" >> ${endpoint_run_dir}/${env_file}
echo "hosted_by=$host" >> ${endpoint_run_dir}/${env_file}
echo "hypervisor_host=$hypervisor_host" >> ${endpoint_run_dir}/${env_file}

for cs_rb_opt in $cs_rb_opts; do
arg=$(echo $cs_rb_opt | awk -F'=' '{print $1}')
Expand Down
10 changes: 0 additions & 10 deletions engine/bootstrap
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,6 @@ while true; do
export rickshaw_host="$1"
shift;
;;
--hosted-by)
shift;
export hosted_by="$1"
shift;
;;
--hypervisor-host)
shift;
export hypervisor_host="$1"
shift;
;;
--base-run-dir)
shift;
export base_run_dir=$1
Expand Down
22 changes: 20 additions & 2 deletions engine/engine-script
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ if ! pushd $cs_dir; then
abort_error "Could not chdir to $cs_dir" engine-init-begin
exit 1
fi
# There are a number of env vars needed for post-processing metrics
env >engine-env.txt

if [ "${cpu_partitioning}" == "1" ]; then
if [ -z "${HK_CPUS}" ]; then
Expand All @@ -67,6 +65,26 @@ roadblock_exit_on_error ${roadblock_rc}
do_roadblock engine-init-end ${engine_script_start_timeout}
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}
msgs_log_file="${roadblock_msgs_dir}/engine-init-end.json"
if [ -f ${msgs_log_file} ]; then
env_vars=`jq -r '.received[] | .payload.message."user-object"."env-vars" ' ${msgs_log_file} | grep -v null`
if [ ! -z "${env_vars}" ]; then
printf "Found new env vars:"
printf "%s\n" "${env_vars}"
eval `echo $env_vars | jq -r 'to_entries | .[] | "export " + .key + "=" + .value'`
echo $env_vars | jq -r 'to_entries | .[] | "export " + .key + "=" + .value'
else
printf "Could not find new env vars\n"
fi
else
printf "Could not find %s\n" ${msgs_log_file}
echo '#/bin/ls -l '$roadblock_msgs_dir
/bin/ls -l $roadblock_msgs_dir
fi

# There are a number of env vars needed for post-processing metrics
# Do this after engine-init, as some vars can be passed to the engine in that RB
env >engine-env.txt


do_roadblock get-data-begin ${default_timeout}
Expand Down
5 changes: 4 additions & 1 deletion engine/engine-script-library
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,8 @@ function stop_tools() {
tools_disabled=${1}; shift

echo "running stop_tools()"
echo "pwd: `/bin/pwd`"
local old_dir=`/bin/pwd`

if [ "${tools_disabled}" == "1" ]; then
echo "Not stopping tools because --disable-tools=1 was used"
Expand All @@ -1097,7 +1099,8 @@ function stop_tools() {
eval $tool_cmd
tool_cmd_rc=$?
echo "Stopping tool '${tool_name}' with command '${tool_cmd}' returned ${tool_cmd_rc}"

echo "Copying $old_dir/engine-env.txt to `/bin/pwd`"
/bin/cp $old_dir/engine-env.txt .
popd >/dev/null
else
abort_error "stop_tools: Failed to pushd to ${tool_name}" stop-tools-end
Expand Down
83 changes: 56 additions & 27 deletions rickshaw-index
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ $toolbox::logging::debug = 0;

my @pids;
my $index_tools = 1;
my @suported_cdm_vers = ('v5dev', 'v6dev');
my @suported_cdm_vers = ('v6dev', 'v7dev');
my %result;
my $base_run_dir;
my %cdm = ( 'ver' => '' );
Expand Down Expand Up @@ -369,13 +369,56 @@ sub index_metrics {
{
my $dir = pushd($metr_dir);

my $eng_env_file = "engine-env.txt";
if (! -e $eng_env_file) {
$eng_env_file .= ".xz";
}
if (! -e $eng_env_file) {
printf "pwd: %s\nCould not find %s, exiting\n", getcwd(), $eng_env_file;
exit 1;
my %eng_env_vars;
if ($cdm{'ver'} eq 'v7dev') {
# engine-type and engine-id replace cstype and csid, but cstype/id to be removed later
$eng_env_vars{'engine-type'} = $cstype;
$eng_env_vars{'engine-id'} = $csid;
# engine-role is the engine's primary purpose (to run a bvenchmark, to collect tool data, etc)
if ($cstype =~ /^client$|^server$/) {
$eng_env_vars{'engine-role'} = 'benchmarker';
$eng_env_vars{'benchmark-role'} = $cstype;
# the following to be properly defined in a future enhancement
$eng_env_vars{'benchmark-name'} = 'unknown';
$eng_env_vars{'benchmark-group'} = 'unknown';
# to be 'none' once one-tool-per engine is implemented (and a benchmark engine no longer runs tools)
$eng_env_vars{'tool-name'} = 'unknown';
} elsif ($cstype =~ /^worker$|^master$|^profiler$/) {
$eng_env_vars{'engine-role'} = 'profiler';
$eng_env_vars{'benchmark-role'} = 'none';
$eng_env_vars{'benchmark-name'} = 'none';
# the following to be properly defined once one-tool-per engine is implemented
$eng_env_vars{'tool-name'} = 'unknown';
}

# Note that CDM being v7 should not assume all metrics have engine_env.txt because one may be re-postprocessing older crucible runs
my $eng_env_file = "engine-env.txt";
if (! -e $eng_env_file) {
$eng_env_file .= ".xz";
}
if (! -e $eng_env_file) {
printf "pwd: %s\nCould not find %s, will not use engine env vars for metadata\n", getcwd(), $eng_env_file;
} else {
my $eng_env_fh = new IO::Uncompress::UnXz $eng_env_file, Transparent => 1 || die "[ERROR]could not open file " . $eng_env_file;
# Cull out env vars which we want as metadata
#printf "Looking for engine env vars\n";
my @varnames = ('HOSTNAME', 'engine_type', 'engine_role', 'benchmark_group', 'benchmark_role', 'hosted_by', 'hypervisor_host', 'osruntime', 'endpoint_label', 'userenv');
while (<$eng_env_fh>) {
chomp;
foreach my $varname (@varnames) {
if (/^$varname=(.*)$/) {
my $val = $1;
$varname =~ s/_/-/g;
if ($varname eq "HOSTNAME") {
$varname = "hostname";
}
$eng_env_vars{$varname} = $val;
next;
}
}
}
close $eng_env_fh;
}
}

# Copy data from 'parent' doc so querying directly for metric_desc with
Expand Down Expand Up @@ -427,30 +470,14 @@ sub index_metrics {
# this is where we add engine-related metadata
$metr_desc_doc{'metric_desc'}{'names'}{'cstype'} = $cstype;
$metr_desc_doc{'metric_desc'}{'names'}{'csid'} = $csid;
my $eng_env_fh = new IO::Uncompress::UnXz $eng_env_file, Transparent => 1 || die "[ERROR]could not open file " . $eng_env_file;
# Cull out env vars which we want as metadata
printf "Looking for engine env vars\n";
my @varnames = ('hosted_by', 'hypervisor_host', 'osruntime');
while (<$eng_env_fh>) {

chomp;
printf "env: %s\n", $_;

foreach my $varname (@varnames) {
if (/^$varname=(.*)$/) {
my $val = $1;
$varname =~ s/_/-/g;
$metr_desc_doc{'metric_desc'}{'names'}{$varname} = $val;
printf "found %s = %s\n", $varname, $val;
next;
}
}
foreach my $env_var (keys %eng_env_vars) {
$metr_desc_doc{'metric_desc'}{'names'}{$env_var} = $eng_env_vars{$env_var};
}
close $eng_env_fh;

my @names_list = sort(keys(%{ $metr_desc_doc{'metric_desc'}{'names'} }));
$metr_desc_doc{'metric_desc'}{'names-list'} = \@names_list;
my $metr_desc_doc_json = $coder->encode(\%metr_desc_doc);
#printf "metric_desc_doc:\n %s\n", $metr_desc_doc_json;
# We do not use index_es_doc() here because that requires getting all info from the %result,
# and %result (rickshaw-run.json) by design does not include any metric data, as it would be
# way too large.
Expand Down Expand Up @@ -657,6 +684,7 @@ if (ref $idx_resp_ref eq ref [] ) {
push(@all_indices, $idx_name);
}
my @sorted_vers = sort(keys(%vers));
printf "sorted_vers: @sorted_vers \n";
$latest_ver = $sorted_vers[0];
if (defined $latest_ver) {
printf "Latest CDM version found in local ES instance is %s\n", $latest_ver;
Expand Down Expand Up @@ -688,6 +716,7 @@ if (ref $idx_resp_ref eq ref [] ) {
}

$cdm{'ver'} = $latest_ver;
printf "Latest CDM version found in local ES instance is %s\n", $latest_ver;
if (not grep(/^$cdm{'ver'}$/, @suported_cdm_vers)) {
printf "The version of CDM used in ES [%s] is not one that is supported by rickshaw-index: [%s]\n",
$cdm{'ver'}, join(" ", @suported_cdm_vers);
Expand Down
5 changes: 3 additions & 2 deletions rickshaw-run
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use toolbox::logging;
use toolbox::run;
use toolbox::jsonsettings;

$toolbox::logging::debug = 1;
$toolbox::logging::debug = 0;

my $ug = Data::UUID->new;
my %defaults = ( "num-samples" => 1, "tool-group" => "default", "test-order" => "s",
Expand Down Expand Up @@ -896,7 +896,8 @@ sub source_container_image {
if ($count == 0) {
$userenv_arg = " --userenv " . $rickshaw_project_dir . "/userenvs/" . $userenv . ".json";
$req_arg = "";
$skip_update = "false";
#$skip_update = "false";
$skip_update = "true";
} else {
$req_arg = shift(@requirements);
$skip_update = "true";
Expand Down
2 changes: 1 addition & 1 deletion rickshaw-settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"timeouts": {
"default": 240,
"endpoint-deploy": 1440,
"collect-sysinfo": 600,
"collect-sysinfo": 1200,
"engine-start": 1440,
"move-data": 300
}
Expand Down

0 comments on commit 7a4fc6b

Please sign in to comment.