Skip to content

Commit

Permalink
add condor startd hold expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
dsschult committed Nov 7, 2023
1 parent cbf3173 commit 35012e9
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions iceprod/server/plugins/condor_direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,15 @@ async def get_hold_reason(self, submit_dir, resources=None):
if submit_dir is None:
submit_dir = ''
reason = None
submit_filename = os.path.join(submit_dir, 'condor.submit')
submit_data = {}
if os.path.exists(submit_filename):
with open(submit_filename) as f:
for line in f:
line = line.strip().lower()
if line:
key, value = line.split('=', 1)
submit_data[key.strip()] = value.strip()
filename = os.path.join(submit_dir, 'condor.log')
if os.path.exists(filename):
with open(filename) as f:
Expand Down Expand Up @@ -278,6 +287,34 @@ async def get_hold_reason(self, submit_dir, resources=None):
if resource_type in resources:
reason += f'{resources[resource_type]}'
break
elif 'cpu usage exceeded request_cpus' in line:
reason = 'Resource overusage for cpu: '
try:
reason += str(int(submit_data['request_cpus']))
except Exception:
pass
break
elif 'memory usage exceeded request_memory' in line:
reason = 'Resource overusage for memory: '
try:
reason += str(float(submit_data['request_memory'])/1000.)
except Exception:
pass
break
elif 'disk usage exceeded request_disk' in line:
reason = 'Resource overusage for disk: '
try:
reason += str(float(submit_data['request_disk'])/1000000.)
except Exception:
pass
break
elif 'runtime exceeded maximum' in line:
reason = 'Resource overusage for time: '
try:
reason += str(float(line.split()[-2].strip('('))/3600)
except Exception:
pass
break
elif 'Transfer output files failure' in line:
reason = 'Failed to transfer output files'
break
Expand Down

0 comments on commit 35012e9

Please sign in to comment.