From 35012e9d4321fe9c4819a1773fdd5cd63aa168b4 Mon Sep 17 00:00:00 2001 From: David Schultz Date: Tue, 7 Nov 2023 15:25:46 -0600 Subject: [PATCH] add condor startd hold expressions --- iceprod/server/plugins/condor_direct.py | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/iceprod/server/plugins/condor_direct.py b/iceprod/server/plugins/condor_direct.py index aa7f65d9..b3035ec3 100644 --- a/iceprod/server/plugins/condor_direct.py +++ b/iceprod/server/plugins/condor_direct.py @@ -224,6 +224,15 @@ async def get_hold_reason(self, submit_dir, resources=None): if submit_dir is None: submit_dir = '' reason = None + submit_filename = os.path.join(submit_dir, 'condor.submit') + submit_data = {} + if os.path.exists(submit_filename): + with open(submit_filename) as f: + for line in f: + line = line.strip().lower() + if line: + key, value = line.split('=', 1) + submit_data[key.strip()] = value.strip() filename = os.path.join(submit_dir, 'condor.log') if os.path.exists(filename): with open(filename) as f: @@ -278,6 +287,34 @@ async def get_hold_reason(self, submit_dir, resources=None): if resource_type in resources: reason += f'{resources[resource_type]}' break + elif 'cpu usage exceeded request_cpus' in line: + reason = 'Resource overusage for cpu: ' + try: + reason += str(int(submit_data['request_cpus'])) + except Exception: + pass + break + elif 'memory usage exceeded request_memory' in line: + reason = 'Resource overusage for memory: ' + try: + reason += str(float(submit_data['request_memory'])/1000.) + except Exception: + pass + break + elif 'disk usage exceeded request_disk' in line: + reason = 'Resource overusage for disk: ' + try: + reason += str(float(submit_data['request_disk'])/1000000.) + except Exception: + pass + break + elif 'runtime exceeded maximum' in line: + reason = 'Resource overusage for time: ' + try: + reason += str(float(line.split()[-2].strip('('))/3600) + except Exception: + pass + break elif 'Transfer output files failure' in line: reason = 'Failed to transfer output files' break