Skip to content

Commit

Permalink
improve wait-for-scan logic
Browse files Browse the repository at this point in the history
  • Loading branch information
ric-evans committed Sep 27, 2024
1 parent 1901674 commit ee2385d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
13 changes: 0 additions & 13 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,6 @@ jobs:
find $CI_TEST_RUN_STDOUT_STDERR_DIR/worker-2/pilot-* -name "stderrfile" -o -name "stdoutfile" | xargs more | cat
echo "::::::::::::::" && tree $CI_TEST_RUN_STDOUT_STDERR_DIR/worker-2/pilot-*
- name: reco-icetray logs
if: always()
run: |
sudo apt install tree
tree $SKYSCAN_DEBUG_DIR
find $SKYSCAN_DEBUG_DIR -type f -not -name "*.i3" -not -name "*.pkl" -exec "more" {} + | cat # recursively cats with filenames (delimited by :::::::)
- name: rabbitmq logs
if: always()
Expand Down Expand Up @@ -574,13 +568,6 @@ jobs:
find $CI_TEST_RUN_STDOUT_STDERR_DIR/worker-2/pilot-* -name "stderrfile" -o -name "stdoutfile" | xargs more | cat
echo "::::::::::::::" && tree $CI_TEST_RUN_STDOUT_STDERR_DIR/worker-2/pilot-*
- name: reco-icetray logs
if: always()
run: |
sudo apt install tree
tree $SKYSCAN_DEBUG_DIR
find $SKYSCAN_DEBUG_DIR -type f -not -name "*.i3" -not -name "*.pkl" -exec "more" {} + | cat # recursively cats with filenames (delimited by :::::::)
- name: rabbitmq logs
if: always()
run: |
Expand Down
21 changes: 16 additions & 5 deletions resources/launch_scripts/local-scan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ else
arg_predictive_scanning_threshold="--predictive-scanning-threshold $_PREDICTIVE_SCANNING_THRESHOLD"
fi

declare -A pidmap # map of background pids to wait on

# Launch Server
./docker/launch_server.sh \
--reco-algo $_RECO_ALGO \
Expand All @@ -54,7 +56,7 @@ fi
--real-event \
2>&1 | tee "$outdir"/server.out \
&
server_pid=$!
pidmap["$!"]="central server"

# Wait for startup.json
export CI_SKYSCAN_STARTUP_JSON="$(realpath "./startup.json")"
Expand All @@ -73,10 +75,19 @@ for i in $(seq 1 $nworkers); do
--debug-directory $SKYSCAN_DEBUG_DIR \
2>&1 | tee $dir/pilot.out \
&
pidmap["$!"]="worker #$i"
echo -e "\tworker #$i launched"
done

# Wait for scan
# -- we don't actually care about the workers, if they fail or not
# -- if all the workers fail, then the sever times out and we can look at worker logs
wait $server_pid
# Wait for scan components to finish
while [ ${#pidmap[@]} -gt 0 ]; do
# Wait for the first finished process
if ! finished_pid=$(wait -n); then
echo "ERROR: component '${pidmap[$finished_pid]}' failed"
sleep 5 # May need to wait for output files to be written
kill "${!pidmap[@]}" 2>/dev/null # kill all
exit 1
fi
# Remove the finished PID from the associative array
unset pidmap["$finished_pid"]
done

0 comments on commit ee2385d

Please sign in to comment.