Skip to content

Commit

Permalink
Merge pull request #210 from HSF/flin
Browse files Browse the repository at this point in the history
v0.3.4; fix duplicate templates; wrapper exicde message
  • Loading branch information
mightqxc authored Jan 26, 2024
2 parents 8ca0b44 + 178ed5d commit b77511b
Show file tree
Hide file tree
Showing 17 changed files with 81 additions and 121 deletions.
98 changes: 0 additions & 98 deletions package/setup.py

This file was deleted.

2 changes: 1 addition & 1 deletion pandaharvester/commit_timestamp.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
timestamp = "22-01-2024 10:23:33 on hatchbuild (by mightqxc)"
timestamp = "26-01-2024 12:22:14 on flin (by mightqxc)"
12 changes: 9 additions & 3 deletions pandaharvester/harvestermessenger/shared_file_messenger.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
except ImportError:
from scandir import scandir, walk

import distutils.spawn
try:
from shutil import which
except ImportError:
# before python 3.3
from distutils.spawn import find_executable as which

import fnmatch
import multiprocessing
import os.path
Expand All @@ -30,10 +35,11 @@
from concurrent.futures import ThreadPoolExecutor as Pool

from future.utils import iteritems
from past.builtins import long

from pandaharvester.harvesterconfig import harvester_config
from pandaharvester.harvestercore import core_utils
from pandaharvester.harvestercore.work_spec import WorkSpec
from past.builtins import long

from .base_messenger import BaseMessenger

Expand Down Expand Up @@ -141,7 +147,7 @@ def tar_directory(dir_name, tar_name=None, max_depth=None, extra_files=None, sub
com += r"-type f \( " + filter_log_tgz(extra_files) + r"\) "
com += r'| grep -v {0} | tr "\n" "\0" | '.format(jobSpecFileName)
com += "tar "
if distutils.spawn.find_executable("pigz") is None:
if which("pigz") is None:
com += "-z "
else:
com += "-I pigz "
Expand Down
19 changes: 16 additions & 3 deletions pandaharvester/harvestermonitor/htcondor_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from concurrent.futures import ThreadPoolExecutor as Pool

import six

from pandaharvester.harvesterconfig import harvester_config
from pandaharvester.harvestercore import core_utils
from pandaharvester.harvestercore.pilot_errors import PilotErrors
Expand All @@ -15,6 +16,7 @@
condor_job_id_from_workspec,
get_host_batchid_map,
)
from pandaharvester.harvestermonitor.monitor_common import get_payload_errstr_from_ec

# logger
baseLogger = core_utils.setup_logger("htcondor_monitor")
Expand Down Expand Up @@ -44,7 +46,7 @@


# Check one worker
def _check_one_worker(workspec, job_ads_all_dict, cancel_unknown=False, held_timeout=3600):
def _check_one_worker(workspec, job_ads_all_dict, cancel_unknown=False, held_timeout=3600, payload_type=None):
# Make logger for one single worker
tmpLog = core_utils.make_logger(baseLogger, f"workerID={workspec.workerID}", method_name="_check_one_worker")
# Initialize newStatus
Expand Down Expand Up @@ -160,7 +162,11 @@ def _check_one_worker(workspec, job_ads_all_dict, cancel_unknown=False, held_tim
else:
# Other return codes are considered failed
newStatus = WorkSpec.ST_failed
errStr = f"Payload execution error: returned non-zero {payloadExitCode}"
errStr = ""
if payload_type:
errStr = get_payload_errstr_from_ec(payload_type, payloadExitCode)
if not errStr:
errStr = f"Payload execution error: returned non-zero {payloadExitCode}"
tmpLog.debug(errStr)
# Map return code to Pilot error code
reduced_exit_code = payloadExitCode // 256 if (payloadExitCode % 256 == 0) else payloadExitCode
Expand Down Expand Up @@ -233,6 +239,10 @@ def __init__(self, **kwarg):
self.condorHostConfig_list
except AttributeError:
self.condorHostConfig_list = []
try:
self.payloadType
except AttributeError:
self.payloadType = None

# check workers
def check_workers(self, workspec_list):
Expand All @@ -256,7 +266,10 @@ def check_workers(self, workspec_list):
# Check for all workers
with Pool(self.nProcesses) as _pool:
retIterator = _pool.map(
lambda _x: _check_one_worker(_x, job_ads_all_dict, cancel_unknown=self.cancelUnknown, held_timeout=self.heldTimeout), workspec_list
lambda _x: _check_one_worker(
_x, job_ads_all_dict, cancel_unknown=self.cancelUnknown, held_timeout=self.heldTimeout, payload_type=self.payloadType
),
workspec_list,
)
retList = list(retIterator)
tmpLog.debug("done")
Expand Down
16 changes: 16 additions & 0 deletions pandaharvester/harvestermonitor/monitor_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#########################
# Maps of payload exit code to error message
#########################

ec_em_map = {
"atlas_pilot_wrapper": {
1: "wrapper fault",
2: "wrapper killed stuck pilot",
},
}


def get_payload_errstr_from_ec(payload_type, exit_code):
tmp_em_map = ec_em_map.get(payload_type, {})
errstr = tmp_em_map.get(exit_code, "")
return errstr
2 changes: 1 addition & 1 deletion pandaharvester/panda_pkg_info.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
release_version = "0.3.3"
release_version = "0.3.4"
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@ directory = "dist"
packages = ["pandaharvester"]

[tool.hatch.build.targets.wheel.shared-data]
"templates" = "etc/panda"
"templates/panda" = "etc/panda"
"templates/sysconfig" = "etc/sysconfig"
"templates/init.d" = "etc/rc.d/init.d"
"templates/systemd" = "etc/systemd/system"
"templates/logrotate.d" = "etc/logrotate.d"
"templates/bin" = "local/bin"

[tool.hatch.build.targets.wheel.hooks.custom]
Expand Down
17 changes: 12 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,10 @@
(
"etc/panda",
[
"templates/panda_harvester.cfg.rpmnew.template",
"templates/logrotate.d/panda_harvester",
"templates/panda_harvester-httpd.conf.rpmnew.template",
"templates/panda_supervisord.cfg.rpmnew.template",
"templates/panda_harvester-uwsgi.ini.rpmnew.template",
"templates/panda/panda_harvester.cfg.rpmnew.template",
"templates/panda/panda_harvester-httpd.conf.rpmnew.template",
"templates/panda/panda_supervisord.cfg.rpmnew.template",
"templates/panda/panda_harvester-uwsgi.ini.rpmnew.template",
],
),
# sysconfig
Expand All @@ -80,6 +79,14 @@
# systemd
(
"etc/systemd",
[
"templates/logrotate.d/panda_harvester",
"templates/logrotate.d/panda_harvester_service",
],
),
# logrotate
(
"etc/logrotate.d",
[
"templates/systemd/panda_harvester-uwsgi.service.template",
],
Expand Down
13 changes: 10 additions & 3 deletions templates/bin/harvester-admin.rpmnew.template
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#======= START CONFIGURATION SECTION ==========================

# user to run harvester-admin
userName="#FIXME"
userName="@@panda_user@@"

# setup python and virtual env
VIRTUAL_ENV="/#FIXME"
VIRTUAL_ENV="@@virtual_env@@"

#======= END CONFIGURATION SECTION ============================

Expand All @@ -20,11 +20,18 @@ if [ -r ${VIRTUAL_ENV}/etc/sysconfig/panda_harvester ]; then
. ${VIRTUAL_ENV}/etc/sysconfig/panda_harvester
fi

# source harvester env variables (new)
if [ -r ${VIRTUAL_ENV}/etc/sysconfig/panda_harvester_env ]; then
set -a
. ${VIRTUAL_ENV}/etc/sysconfig/panda_harvester_env
set +a
fi

# path
HARVESTER_ADMIN_PY="${SITE_PACKAGES_PATH}/pandaharvester/harvesterscripts/harvester_admin.py"

# vars
if [ "x${userName}y" == "xy" -o "x${userName}y" == "x#FIXMEy" ]; then
if [ "x${userName}y" == "xy" ]; then
userName=`whoami`
fi

Expand Down
2 changes: 1 addition & 1 deletion templates/bin/panda_jedi-renice
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
# import env vars from sysconfig
source ${VIRTUAL_ENV}/etc/sysconfig/panda_harvester

SITE_PACKAGES_PATH=`python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"`
SITE_PACKAGES_PATH=`python -c "from sysconfig import get_path; print(get_path('purelib'))"`
python ${SITE_PACKAGES_PATH}/pandaharvester/harvestertest/renice.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ master-fifo = $(UWSGI_FIFO)
processes = 2
threads = 4

worker-reload-mercy = 1
worker-reload-mercy = 2

buffer = 32768
post-buffering = 32768
Expand Down
2 changes: 1 addition & 1 deletion templates/sysconfig/panda_harvester.rpmnew.template
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export PANDA_HOME=${VIRTUAL_ENV}
export TZ=UTC

# python site packages
export SITE_PACKAGES_PATH=`python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"`
export SITE_PACKAGES_PATH=`python -c "from sysconfig import get_path; print(get_path('purelib'))"`

# import panda modules
export PYTHONPATH=${SITE_PACKAGES_PATH}/pandacommon:${SITE_PACKAGES_PATH}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,12 @@ PYTHONPATH=@@install_purelib@@/pandacommon:@@install_purelib@@

# option for frontend
OPTIONS="-f @@virtual_env@@/etc/panda/panda_harvester-httpd.conf"

# log dir
LOG_DIR="/var/log/harvester"

# pid file
PIDFILE="/var/log/harvester/panda_harvester.pid"

# uWSGI master fifo
UWSGI_FIFO="/var/log/harvester/uwsgi-panda_harvester.fifo"
5 changes: 2 additions & 3 deletions templates/systemd/panda_harvester-uwsgi.service.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ After=network.target remote-fs.target nss-lookup.target
[Service]
Type=exec
RemainAfterExit=true
Environment="LOG_DIR=/var/log/harvester" "PIDFILE=/var/log/harvester/panda_harvester.pid" "UWSGI_FIFO=/var/log/harvester/uwsgi-panda_harvester.fifo"
EnvironmentFile=@@virtual_env@@/etc/sysconfig/panda_harvester_env
ExecStartPre=/bin/sh -c '${VIRTUAL_ENV}/bin/python @@install_purelib@@/pandaharvester/harvesterscripts/prescript.py -f ${LOG_DIR}/.harvester_info'
ExecStartPre=/bin/sh -c '${VIRTUAL_ENV}/bin/python ${SITE_PACKAGES_PATH}/pandaharvester/harvesterscripts/prescript.py -f ${LOG_DIR}/.harvester_info'
ExecStart=/bin/sh -c '${VIRTUAL_ENV}/bin/uwsgi ${VIRTUAL_ENV}/etc/panda/panda_harvester-uwsgi.ini >> ${LOG_DIR}/panda_harvester_stdout.log 2>> ${LOG_DIR}/panda_harvester_stderr.log'
ExecReload=/bin/sh -c '/bin/echo "c" > ${UWSGI_FIFO}'
ExecStop=/bin/sh -c '/bin/echo "Q" > ${UWSGI_FIFO}'
ExecStop=/bin/sh -c '/bin/echo "q" > ${UWSGI_FIFO}'
Restart=on-failure

[Install]
Expand Down

0 comments on commit b77511b

Please sign in to comment.