From 058d7ce60c708e1fc1d1f59fde331a8665f3618e Mon Sep 17 00:00:00 2001 From: mu2epro Date: Wed, 28 Jun 2023 21:59:17 -0500 Subject: [PATCH 1/4] handle a few error cases better --- Campaigns/Demo/reco.sh | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Campaigns/Demo/reco.sh b/Campaigns/Demo/reco.sh index 200f01f..fd6abbc 100755 --- a/Campaigns/Demo/reco.sh +++ b/Campaigns/Demo/reco.sh @@ -10,8 +10,6 @@ RCT=0 source /cvmfs/mu2e.opensciencegrid.org/bin/OfflineOps/functions.sh RCT=$((RCT+$?)) -tee_date "Starting Demo reco.sh" - if [ ! "$OFFLINEOPS_DIR" ]; then echo "ERROR - OFFLINEOPS_DIR needs to be defined before this script runs" RCT=$((RCT+$?)) @@ -41,7 +39,7 @@ else LOCTXT=scratch fi -control_summary exe +control_summary start get_next_SAM_file RCT=$((RCT+$?)) @@ -67,12 +65,9 @@ if [[ $RCT -eq 0 && -n "$MOO_INPUT" ]]; then echo "services.DbService.version : $DBV" >> local.fcl echo "services.DbService.verbose : $DBE" >> local.fcl - NEVARG="" [ "$MOO_FAKE" == "true" ] && NEVARG="-n 5" - tee_date "processing $MOO_INPUT" - mu2e $NEVARG -s $MOO_INPUT -o $RAFN -c local.fcl RC=$? @@ -100,7 +95,9 @@ if [ $RCT -ne 0 ]; then tee_date "removing data files from output list" rm output.txt fi -echo "$LOCTXT $LGFN none" >> output.txt +if [ "$LGFN" ]; then + echo "$LOCTXT $LGFN none" >> output.txt +fi tee_date "Final ls" ls -l @@ -112,7 +109,11 @@ control_summary final if [ "$MOO_LOCAL" ]; then RCP=0 else - pushOutput output.txt + if [ -a output.txt ]; then + pushOutput output.txt + else + tee_date "skipping pushOutput, no files to move" + fi RCP=$? fi From 2e49dcc36ce31c4453d6e8055be9e4ce50645102 Mon Sep 17 00:00:00 2001 From: mu2epro Date: Wed, 28 Jun 2023 22:00:40 -0500 Subject: [PATCH 2/4] a more complete understanding of SAM get-next-file timeouts, better logging --- Util/functions.sh | 65 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/Util/functions.sh b/Util/functions.sh index 7ef0487..4bcba8f 100644 --- a/Util/functions.sh +++ b/Util/functions.sh @@ -113,8 +113,13 @@ node_summary() { echo "pwd: $PWD" if [ $VERBOSE -gt 0 ]; then - echo "df on system:" - df -h + if [ $VERBOSE -gt 1 ]; then + echo "df on system:" + df -h + else + echo "df on system (skipping cvmfs partitions):" + df -h | grep -v cvmfs + fi echo "ulimit:" ulimit -a @@ -224,6 +229,12 @@ create_config() { # get_next_SAM_file() { + [ $MOO_VERBOSE -ge 1 ] && tee_date "starting get-next-file" + + [ $MOO_VERBOSE -ge 2 ] && printenv | grep SAM + + export MOO_INPUT="" + if [ -n "$MOO_LOCAL_INPUT" ]; then export MOO_INPUT="" @@ -240,27 +251,57 @@ get_next_SAM_file() { fi if ! command -v samweb > /dev/null 2>&1 ; then - echo "ERROR - get_next_SAM_file called without samweb available" + tee_date "ERROR - get_next_SAM_file called without samweb available" return 1 fi if [[ -z "$SAM_PROJECT" || -z "$SAM_CONSUMER_ID" ]]; then - echo "ERROR - get_next_SAM_file called without SAM consumer environmentals" + tee_date "ERROR - get_next_SAM_file called without SAM consumer environmentals" return 1 fi - local TEMP=$(samweb get-next-file $SAM_PROJECT $SAM_CONSUMER_ID 2>&1 ) + + local TMPS=$(mktemp) + local TMPE=$(mktemp) + + samweb get-next-file $SAM_PROJECT $SAM_CONSUMER_ID 1>$TMPS 2>$TMPE local TT=$? - if [[ $TT -eq 0 ]]; then - export MOO_INPUT="$TEMP" - if [ -n "$TEMP" ]; then - export MOO_INPUT_LIST=${MOO_INPUT_LIST:+$MOO_INPUT_LIST,}$MOO_INPUT + local STDO=$(cat $TMPS) + local STDE=$(cat $TMPE) + rm -f $TMPS $TMPE + + # if command timeout, TT=0 but output contains Traceback + local RC=0 + if [ $TT -eq 0 ]; then + if [[ "$STDE" =~ "Traceback" ]]; then + # case of final timeout + RC=1 + else + # case of a file delivered + # case of no more files (STDO="") + RC=0 fi else - export MOO_INPUT="" - [ $MOO_VERBOSE -ge 1 ] && echo "get-next-file output: $TEMP" + RC=1 fi - return $TT + if [[ $MOO_VERBOSE -ge 2 || $RC -ne 0 || -n "$STDE" ]]; then + echo "[$(date)] get-next-file returned:" + echo "stdout=$STDO" + echo "stderr=$STDE" + echo "command rc=$TT" + fi + + if [ $RC -eq 0 ]; then + # blank STDO might just mean end of input files + export MOO_INPUT="$STDO" + if [ -n "$MOO_INPUT" ]; then + export MOO_INPUT_LIST=${MOO_INPUT_LIST:+$MOO_INPUT_LIST,}$MOO_INPUT + fi + fi + + [ $MOO_VERBOSE -ge 1 ] && tee_date "returning get-next-file RC=$RC MOO_INPUT=$MOO_INPUT" + + return $RC } From 5103b8873907f50874a26d4670c708d2f57fdf39 Mon Sep 17 00:00:00 2001 From: mu2epro Date: Wed, 28 Jun 2023 22:01:45 -0500 Subject: [PATCH 3/4] more logical local workflow --- Util/wrapper.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Util/wrapper.sh b/Util/wrapper.sh index 7a93f1a..4a16874 100755 --- a/Util/wrapper.sh +++ b/Util/wrapper.sh @@ -86,10 +86,10 @@ fi # # create a config string out of POMS, cfg and input versions # -if [[ -z "$MOO_CAMPAIGN" && -n "$POMS4_CAMPAIGN_NAME" ]]; then +if [ -n "$POMS4_CAMPAIGN_NAME" ]; then export MOO_CAMPAIGN="$POMS4_CAMPAIGN_NAME" fi -if [[ -z "MOO_CAMPAIGN_STAGE" && -n "POMS4_CAMPAIGN_STAGE_NAME" ]]; then +if [ -n "POMS4_CAMPAIGN_STAGE_NAME" ]; then export MOO_CAMPAIGN_STAGE=$POMS4_CAMPAIGN_STAGE_NAME fi @@ -101,7 +101,7 @@ save_environment wrapper_end # run the executable script # -tee_date start script $OFFLINEOPS_DIR/Campaigns/$MOO_SCRIPT +tee_date "************ start script $OFFLINEOPS_DIR/Campaigns/$MOO_SCRIPT" $OFFLINEOPS_DIR/Campaigns/$MOO_SCRIPT RC=$? tee_date OfflineOps/wrapper exiting with RC=$RC From b6a26981ab5c925b10355705e4263d8cc7c96d3a Mon Sep 17 00:00:00 2001 From: mu2epro Date: Wed, 28 Jun 2023 22:26:16 -0500 Subject: [PATCH 4/4] change for v7 --- Campaigns/CRVWB/reco.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Campaigns/CRVWB/reco.sh b/Campaigns/CRVWB/reco.sh index d32c6d5..c12dbe0 100755 --- a/Campaigns/CRVWB/reco.sh +++ b/Campaigns/CRVWB/reco.sh @@ -19,7 +19,7 @@ tee_date "args are: $@" setup mu2e setup CRVTeststand $MOO_CRVTESTSTAND -muse setup Offline v10_20_00 +muse setup Offline v10_22_01 setup -B mu2etools setup -B mu2efiletools setup -B sam_web_client @@ -89,8 +89,8 @@ do parserCrv $SEQ RCT=$((RCT+$?)) - tee_date "Running calibCrv $SEQ" - calibCrv $SEQ + tee_date "Running calibCrv -a -0.13 $SEQ" + calibCrv -a -0.13 $SEQ RCT=$((RCT+$?)) FLAG=""