From 66968724ea594c05b530b94fae341898ec0fcaac Mon Sep 17 00:00:00 2001 From: Lincoln Bryant Date: Mon, 24 Jul 2023 10:37:33 -0500 Subject: [PATCH 1/2] Upgrade to EL6 base, HTCondor 10, remove CVMFSExec, clean Sysview --- Dockerfile | 22 +-- sysview-client/client/__init__.py | 0 sysview-client/client/client.py | 285 ------------------------------ sysview-client/sysclient | 35 ---- 4 files changed, 1 insertion(+), 341 deletions(-) delete mode 100644 sysview-client/client/__init__.py delete mode 100644 sysview-client/client/client.py delete mode 100755 sysview-client/sysclient diff --git a/Dockerfile b/Dockerfile index f0e54dd..a67b483 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=opensciencegrid/software-base:3.5-el7-release +ARG BASE_IMAGE=hub.opensciencegrid.org/opensciencegrid/software-base:3.6-el7-release FROM ${BASE_IMAGE} ARG BASE_IMAGE @@ -44,31 +44,11 @@ RUN yum install -y docker-ce-cli RUN yum install -y http://mirror.grid.uchicago.edu/pub/mwt2/sw/el7/mwt2-sysview-worker-2.0.3-1.noarch.rpm RUN yum install -y python36-tabulate -# Add CVMFSEXEC -RUN git clone https://github.com/cvmfs/cvmfsexec /cvmfsexec \ - && cd /cvmfsexec \ - && ./makedist osg \ - # /cvmfs-cache and /cvmfs-logs is where the cache and logs will go; possibly bind-mounted. \ - # Needs to be 1777 so the unpriv user can use it. \ - # (Can't just chown, don't know the UID of the unpriv user.) \ - && mkdir -p /cvmfs-cache /cvmfs-logs \ - && chmod 1777 /cvmfs-cache /cvmfs-logs \ - && rm -rf dist/var/lib/cvmfs log \ - && ln -s /cvmfs-cache dist/var/lib/cvmfs \ - && ln -s /cvmfs-logs log \ - # tar up and delete the contents of /cvmfsexec so the unpriv user can extract it and own the files. \ - && tar -czf /cvmfsexec.tar.gz ./* \ - && rm -rf ./* \ - # Again, needs to be 1777 so the unpriv user can extract into it. \ - && chmod 1777 /cvmfsexec - COPY condor/*.conf /etc/condor/config.d/ COPY cron/* /etc/cron.d/ COPY supervisor/* /etc/supervisord.d/ COPY image-config/* /etc/osg/image-config.d/ COPY libexec/* /usr/local/libexec/ -COPY sysview-client/sysclient /bin/ -COPY sysview-client/client /usr/lib/python3.6/site-packages/sysview/client COPY scripts/condor_node_check.sh /usr/local/sbin/ COPY scripts/entrypoint.sh /bin/entrypoint.sh diff --git a/sysview-client/client/__init__.py b/sysview-client/client/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/sysview-client/client/client.py b/sysview-client/client/client.py deleted file mode 100644 index 3e20efb..0000000 --- a/sysview-client/client/client.py +++ /dev/null @@ -1,285 +0,0 @@ -import os -import pwd -import time -import logging -import argparse -import tabulate - -from sysview.common.utils import get_config -from sysview.common.utils import expand_hostlist -from sysview.common.utils import get_base_parser - -""" -Parser for systools -""" -def get_parser(): - parser = get_base_parser('sysclient') - - subparsers = parser.add_subparsers(help='Subcommand to run') - - parser_hoststatus = subparsers.add_parser('hoststatus', help='Get current manual status of a host list') - parser_hoststatus.add_argument('hostlist', help='Host(s) to query') - parser_hoststatus.set_defaults(func=hoststatus) - - parser_nodestatus = subparsers.add_parser('nodestatus', help='Get current machine status of a host list') - parser_nodestatus.add_argument('hostlist', help='Host(s) to query') - parser_nodestatus.set_defaults(func=nodestatus) - - parser_online = subparsers.add_parser('online', help='Mark a list of hosts "online" in the cache') - parser_online.add_argument('hostlist', help='Host(s) to mark online') - parser_online.set_defaults(func=online) - - parser_offline = subparsers.add_parser('offline', help='Mark a list of hosts "offline" in the cache with the reason "Reason"') - parser_offline.add_argument('hostlist', help='Host(s) to mark offline') - parser_offline.add_argument('-r', '--reason', help="Reason") - parser_offline.set_defaults(func=offline) - - parser_backfill = subparsers.add_parser('backfill', help='Mark a list of hosts "backfill" in the cache with the reason "Reason"') - parser_backfill.add_argument('hostlist', help='Host(s) to mark backfill') - parser_backfill.add_argument('-r', '--reason', help="Reason") - parser_backfill.set_defaults(func=backfill) - - parser_dumpsite = subparsers.add_parser('dump_site', help='Dump site information from the cache') - parser_dumpsite.add_argument('site', help='Site name') - parser_dumpsite.add_argument('--filename', help='Filename for output (write to stdout if unspecified)') - parser_dumpsite.set_defaults(func=dump_site) - - parser_loadsite = subparsers.add_parser('load_site', help='Load site information in dump_site format back into the cache') - parser_loadsite.add_argument('filename', help='Filename for input') - parser_loadsite.set_defaults(func=load_site) - - return parser - - -""" -Get current status of a host list -""" -def get_status(keynames, hostlist, cache): - logger = logging.getLogger(__name__) - logger.info('Running get_status') - data = [] - header = [ - 'Node', - 'TimeStamp', - 'Last updated by user on date', - 'State', - 'Reason'] - hosts = expand_hostlist(hostlist) - keys = ['%s.%s' % (host, keyname) for host in hosts for keyname in keynames.values()] - values = cache.get_multi(keys) - - logger.debug('Hostlist: %s' % hosts) - logger.debug('Keys: %s' % keynames) - - for host in hosts: - try: - timestamp = time.strftime( - "%F %T", - time.localtime( - int(values["%s.%s" % (host, keynames['timestamp'])]))) - except KeyError: - timestamp = 'UNDEF' - try: - mtimestamp = time.strftime( - "%F %T", - time.localtime( - int(values["%s.%s" % (host, keynames['mtimestamp'])]))) - except KeyError: - mtimestamp = 'UNDEF' - try: - status = values["%s.%s" % (host, keynames['status'])] - except KeyError: - status = 'UNDEF' - try: - reason = values["%s.%s" % (host, keynames['message'])] - except KeyError: - reason = 'UNDEF' - try: - user = values["%s.%s" % (host, keynames['user'])] - except KeyError: - user = 'UNDEF' - data.append((host, timestamp, ' '.join((user, mtimestamp)), status, reason)) - - print(tabulate.tabulate(data, headers=header, tablefmt='orgtbl')) - - -""" -Get current manual status of a host list -""" -def hoststatus(args, cache): - logger = logging.getLogger(__name__) - logger.info('Running hoststatus') - keynames = { - 'status': 'manualstatus', - 'message': 'manualreason', - 'user': 'manualuser', - 'mtimestamp': 'manualtimestamp', - 'timestamp': 'timestamp'} - get_status(keynames=keynames, hostlist=args.hostlist, cache=cache) - - -""" -Get current machine status of a host list -""" -def nodestatus(args, cache): - logger = logging.getLogger(__name__) - logger.info('Running nodestatus') - keynames = { - 'status': 'status', - 'message': 'message', - 'user': 'manualuser', - 'mtimestamp': 'manualtimestamp', - 'timestamp': 'timestamp'} - get_status(keynames=keynames, hostlist=args.hostlist, cache=cache) - - -""" -Update the status of a given hostname (or list of hostnames) -""" -def update_status(hostlist, status, reason, cache): - logger = logging.getLogger(__name__) - logger.info('Running update_status') - data = {} - hosts = expand_hostlist(hostlist) - mu = pwd.getpwuid(os.getuid()).pw_name - mts = int(time.time()) - - logger.debug('Hostlist: %s' % hosts) - logger.debug('Status: %s' % status) - logger.debug('Reason: %s' % reason) - logger.debug('User: %s' % mu) - logger.debug('Timestamp: %s' % mts) - - for host in hosts: - data['%s.manualstatus' % host] = status - data['%s.manualreason' % host] = reason - data['%s.manualuser' % host] = mu - data['%s.manualtimestamp' % host] = mts - data['%s.timestamp' % host] = mts - - cache.set_multi(data) - - -""" -Set nodes to online -""" -def online(args, cache): - logger = logging.getLogger(__name__) - logger.info('Running online') - reason = '' - update_status( - hostlist=args.hostlist, - status='online', - reason=reason, - cache=cache) - - -""" -Set nodes to offline -""" -def offline(args, cache): - logger = logging.getLogger(__name__) - logger.info('Running offline') - reason = args.reason or '*Reason Not Set By User*' - update_status( - hostlist=args.hostlist, - status='offline', - reason=reason, - cache=cache) - - -""" -Set nodes to backfill -""" -def backfill(args, cache): - logger = logging.getLogger(__name__) - logger.info('Running backfill') - reason = args.reason or '*Reason Not Set By User*' - update_status( - hostlist=args.hostlist, - status='backfill', - reason=reason, - cache=cache) - - -""" -Dump site information from the cache - -The dump is of the format Node:State:Reason:User:Timestamp - - Node Short name of the node (such as uct2-c267, iut2-c199, mwt2-c103 - State State of the node, online|backfill|offline - Reason Reason a node is not online - User User who updated the state - Timestamp Time the node state was last updated -""" -def dump_site(args, cache): - logger = logging.getLogger(__name__) - logger.info('Running dump_site') - - config = get_config(args.config_file) - keynames = ['manualstatus', 'manualreason', 'manualuser', 'manualtimestamp'] - hosts = [] - - if not config.has_section(args.site): - logger.critical("Configuration file is missing site '%s'" % args.site) - logger.critical("Valid sites: %s" % " ".join([c[0] for c in config.items('collectors')])) - exit(1) - - if args.filename: - f = open(args.filename, 'w') - - for hostlist in config.items(args.site): - hosts.extend(expand_hostlist(hostlist[0])) - - keys = ["%s.%s" % (host, keyname) for host in hosts for keyname in keynames] - - values = cache.get_multi(keys) - - for host in hosts: - try: - status = values['%s.manualstatus' % host] or values['%s.status' % host] - except KeyError: - status = 'Unknown' - try: - reason = values['%s.manualreason' % host] or values['%s.message' % host] - except KeyError: - reason = 'Unknown' - try: - user = values['%s.manualuser' % host] - except KeyError: - user = 'Unknown' - try: - mts = values['%s.manualtimestamp' % host] - except KeyError: - mts = 0 - - if args.filename: - f.write('%s:%s:%s:%s:%s\n' % (host, status, reason, user, mts)) - else: - print('%s:%s:%s:%s:%s' % (host, status, reason, user, mts)) - - if args.filename: - f.close() - - -""" -Load line in dump_site format back into memcache - -The dump is of the format Node:State:Reason - - Node Short name of the node (such as uct2-c267, iut2-c199, mwt2-c103 - State State of the node, online|backfill|offline - Reason Reason a node is not online - User User who updated the state - Timestamp Time the node state was last updated -""" -def load_site(args, cache): - logger = logging.getLogger(__name__) - logger.info('Running load_site') - - with open(args.filename, 'r') as f: - for line in f: - host, status, reason, user, timestamp = line.split(':') - update_status(host, status, reason, cache) - diff --git a/sysview-client/sysclient b/sysview-client/sysclient deleted file mode 100755 index 134adf7..0000000 --- a/sysview-client/sysclient +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python3 - -import logging -from sysview.common.utils import get_config -from sysview.common.cache import get_cache -from sysview.client.client import get_parser - -if __name__ == '__main__': - parser = get_parser() - args = parser.parse_args() - - if args.verbose >= 2: - loglevel = logging.DEBUG - elif args.verbose == 1: - loglevel = logging.INFO - else: - loglevel = logging.WARNING - - logging.basicConfig( - level=loglevel, - format='%(levelname)s %(module)s.%(funcName)s(): %(message)s' - ) - logger = logging.getLogger(__name__) - - logger.info('Logger level: %d' % logger.getEffectiveLevel()) - - config = get_config(args.config_file) - cache = get_cache(args=args, config=config) - - try: - args.func(args=args, cache=cache) - except AttributeError as e: - print(e) - parser.print_help() - From 09f36a159a53bb041e47a2fc3d569aecef796c18 Mon Sep 17 00:00:00 2001 From: Lincoln Bryant Date: Mon, 24 Jul 2023 10:38:14 -0500 Subject: [PATCH 2/2] update sysview client version --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a67b483..cc3985e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,7 @@ RUN yum install --enablerepo=osg-upcoming -y condor RUN yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo RUN yum install -y docker-ce-cli -RUN yum install -y http://mirror.grid.uchicago.edu/pub/mwt2/sw/el7/mwt2-sysview-worker-2.0.3-1.noarch.rpm +RUN yum install -y http://mirror.grid.uchicago.edu/pub/mwt2/sw/el7/mwt2-sysview-worker-2.0.5-1.noarch.rpm RUN yum install -y python36-tabulate COPY condor/*.conf /etc/condor/config.d/