-
Notifications
You must be signed in to change notification settings - Fork 2
/
vectorize
executable file
·421 lines (334 loc) · 11.7 KB
/
vectorize
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
#!/bin/tcsh -f
## given a file of commands (one command per line), runs them in
## parallel on NCAR machines using MPMD approach
set defaultq = yes
set help = no
set submit = yes
set QUEUE = casper
set defaulte = yes
set defaultm = yes
set memory = no
set WALLTIME = 00:10:00
## Argument parsing from util-linux-ng getopt-parse.tcsh example.
# Use a temp variable b/c eval nukes getopt return value. ':q' copies
# argv list w/ no substitutions
set temp=(`getopt -a -n vectorize -s tcsh -o d:e:hj:m:np:q:w: --long dir:,env:,help,jobname:,memory:,nosub,project:,queue:,walltime: -- $argv:q`)
if ($? != 0) then
echo "Terminating..." >/dev/stderr
exit 1
endif
# Quote the parens b/c the result is a list, and they need to be
# evaluated when eval is called. 'q' prevents substitutions.
eval set argv=\($temp:q\)
while (1)
switch($1:q)
case -d:
case --dir:
set dir = $2:q ; shift ; shift
breaksw;
case -e:
case --env:
set defaulte = no
set env = $2:q ; shift ; shift
breaksw;
case -h:
case --help:
set help = yes ; shift
breaksw
case -j:
case --jobname:
set JOBNAME = $2:q ; shift ; shift
breaksw;
case -m:
case --memory:
set defaultm = no
set MEMORY = $2:q ; shift ; shift
breaksw;
case -n:
case --nosub:
set submit = no ; shift
breaksw
case -p:
case --project:
set PROJECT = $2:q ; shift ; shift
breaksw;
case -q:
case --queue:
set defaultq = no
set QUEUE = $2:q ; shift ; shift
breaksw;
case -w:
case --walltime:
set WALLTIME = $2:q ; shift ; shift
breaksw;
case --:
shift
break
default:
echo "vectorize: Internal error!" ; exit 1
endsw
end
## Check for bad arguments, print usage message
if(! $?PROJECT) then
set PROJECT = none
echo "vectorize: ERROR: project code not defined"
endif
if( $#argv < 1) then
echo "vectorize: ERROR: no commandfile"
endif
if( $#argv > 1) then
echo "vectorize: ERROR: too many arguments"
endif
if($help == yes || $PROJECT == none || $#argv != 1) then
cat <<EOUSAGE
Usage: vectorize [-d dir] [-e env] [-h] [-j jobname] [-m mem] [-n] [-p proj] [-q queue] [-w walltime] cmdfile
-d, --dir: directory for output; defaults to \`mktemp -d\`
-e, --env: environment file; defaults to ~/.vectorize/<queue>
-h, --help: prints this usage message and exit
-j, --jobname: PBS jobname; defaults to `basename cmdfile .txt`
-m, --memory: memory needed per task in GB
-n, --nosub: don't submit job, just set everything up
-p, --project: project code; defaults to PROJECT envariable (if set)
-q, --queue: queue to submit to; defaults to 'casper'
-w, --walltime: wallclock limit for job; defaults to 00:10:00
cmdfile: a file with one command per line, to be run in parallel
Vectorize is a utility for running many independent single-threaded
commands in parallel on casper or cheyenne using MPMD parallelism.
Given a file with one command (task) per line, it builds a PBS job
script that will run each command on a separate processor and submits
it to the scheduler. If you only want the PBS script, use the --nosub
flag to set everything up without submitting the job. You can also
use --nosub to build a job to be submitted as dependent on another
job.
Stdout and stderr from the tasks are captured in dir/out and dir/err,
respectively. NOTE: currently this doesn't work correctly on
cheyenne; the tasks will all run (I believe), but only a single nodes'
worth of output is captured. The commandfile and the PBS job script
are also stored in dir as dir/cmd and dir/pbs, respectively.
To allow customizaton of the environment that the jobs run in, the
contents of the environment file are inserted directly into the PBS
job script. Put any commands like "module load" in an environment
file. The default environment file is ~/.vectorize/<queue>. Note
that the PBS job script is written in tcsh.
On the share queue, commandfile is run in serial on a single
processor. On casper, the commandfile is split into one job per
command and run using a job array. On cheyenne, vectorize will figure
out how many nods your job needs, splitting them up as evenly as
possible while making sure that the total memory requirements (if
specified) don't exceed the node capacity. It will warn you if the
job will only fit on large-memory nodes, and won't create a job that
has less than 40% (15/36) utilization of the CPUs on the node.
Note that tasks running on casper and the share queue are piped as-is
through the user's default shell (\$SHELL) to run; tasks running on
cheyenne have to be wrapped in a bash invocation, like so:
bash -c '<task>'
If you need to delete a job that uses a job array, note that the
square brackets are part of the job id.
EOUSAGE
exit 1
endif
#### FIXME ####
## The job array approach doesn't mix well with MPMD, because using a
## job array requires a single output file, but when the jobs are
## running on multiple nodes, everything runs (I think) but you only
## end up with output from a single node (as best I can tell.) Which
## is to say, this new version works on casper and the share queue,
## but not on cheyenne.
## The solution is to create a subdirectory for each node, split the
## commandfile into pieces (one for each node, which we're doing
## already), and then recursively call the script again on each of the
## splits. This shouldn't be too bad to implement, but I don't have
## time to deal with it right now, because while it seems pretty
## straightforward, if I hit any speedpumps it could extend much
## longer than the time I have available. So I'm putting this on hold
## for now.
if ($QUEUE != "casper" && $QUEUE != "share") then
echo "vectorize: WARNING: running on cheyenne should work, but output is not captured properly."
echo "You will probably only get output from one node. Proceed with caution."
endif
## bail out early if no command file
if (-z $1) then
echo "vectorize: ERROR: empty command file; exiting"
exit 1
endif
if (! -e $1) then
echo "vectorize: ERROR: no such command file: '$1'; exiting"
exit 1
endif
## set memstring for resource request, checking for bad memory specification
set memstring = ''
if($defaultm == no) then
if ($MEMORY !~ ^[0-9.]+$ ) then
echo "vectorize: ERROR: --memory option must be numeric"
exit 1
endif
if ($QUEUE == casper) then
## max memory on casper ~= 1.5 TB
set memstring = `perl -e "print($MEMORY>1500?'bad':':mem=' . $MEMORY . 'GB')"`
else if ($QUEUE == share) then
## max available memory on share = 109 GB; warn if asking for more than half
set memstring = `perl -e "print($MEMORY>109 ? 'bad' : ':mem=' . $MEMORY . 'GB')"`
if ($memstring != bad) then
set warning = '"vectorize: WARNING: requesting more than half the available memory on share queue nodes\n"'
perl -e "if($MEMORY > 109/2){print STDOUT $warning}"
endif
else
## 3 GB = 45 GB / 15 CPU = max memory @ 40% utilization on std cheyenne nodes
## 7.27 = 109 GB / 15 CPU = max memory @ 40% util on large-memory nodes
set memstring = `perl -e "print($MEMORY>109/15 ? 'bad' : ':mem=' . $MEMORY . 'GB')"`
if ($memstring != bad) then
set warning = '"vectorize: WARNING: job will only fit on large-memory nodes\n"'
perl -e "if($MEMORY > 3){print STDOUT $warning}"
endif
endif
if ($memstring == bad) then
echo "vectorize: ERROR: memory request ($MEMORY GB) exceeds available resources for queue $QUEUE"
echo "(casper: 1500 GB; share: 109 GB; cheyenne: 7.26 GB/CPU @ 40% utilization)"
exit 1
endif
endif
## if not specified, set environment based on queue
if ($defaulte == yes) then
set env = "~/.vectorize/$QUEUE"
endif
if (! -e $env) then
if ($defaulte == yes) then
echo "vectorize: ERROR: no default environment for queue '$QUEUE'; exiting"
echo "To resolve this error, create file ~/.vectorize/$QUEUE containing"
echo "any 'module load' commands, etc. needed in the PBS script."
else
echo "vectorize: ERROR: no such environment file: '$env'; exiting"
endif
exit 1
endif
## create output dir
if ( $?dir ) then
mkdir -p $dir
else
set dir = `mktemp -d`
endif
## figure out how many jobs and process commandfiles accordingly
## share: one job, run commandfile as-is
## casper: one job per task, split commandfile into single lines
## other: split cmdfile into chunks & wrap in shell invocation for MPMD
## no memory requirement = use all 36 CPUs / node
## otherwise fill each node as full as possible
## normal nodes: ~45 GB memory, large nodes: ~109 GB
## (available of 64 and 128 GB, respectively)
## use `cat cmdfile | wc` instead of `wc cmdfile` to drop filename
set NNODES = 1
if ($QUEUE == share) then
set NJOBS = 1
set NCPUS = 1
else if ($QUEUE == casper) then
set NJOBS = `cat $1 | wc -l`
set NCPUS = 1
else
set ntasks = `cat $1 | wc -l`
if ($defaultm == yes) then
set NJOBS = `perl -e "use POSIX; print ceil($ntasks/36)"`
set NCPUS = 36
else
set NCPUS = `perl -e "use POSIX; print floor(109/$MEMORY)"`
set NJOBS = `perl -e "use POSIX; print(ceil($ntasks/$NCPUS))"`
endif
set NNODES = $NJOBS
unset ntasks
endif
## This is only because we have to do the rename thing below.
if ($NJOBS > 9999) then
echo "vectorize: ERROR: too many jobs ($NJOBS)"
exit 1
endif
mkdir -p $dir
cp $1 $dir/cmd
if ($QUEUE != share && $QUEUE != casper) then
## wrap each command in a shell invocation for MPMD
sed -i "s|\(.*\)|bash -c '\1'|g" $dir/cmd
endif
if($NJOBS > 1 && $QUEUE != share) then
if($QUEUE == casper) then
split -a 4 --numeric-suffix=1 --lines=1 $1 $dir/cmd.
# using -nl/N, where N = number of lines in file, gets weird with
# very long lines, and can generate empty files (which then hang
# until they timeout)
else
split -a 4 --numeric-suffix=1 -nl/$NJOBS $1 $dir/cmd.
endif
## need to strip leading zeros for PBS job array handling
rename cmd.0 cmd. $dir/cmd*
rename cmd.0 cmd. $dir/cmd*
rename cmd.0 cmd. $dir/cmd*
endif
## setup
if (! $?JOBNAME ) then
set JOBNAME = `basename $1 .txt`
endif
set CMDFILE = $dir/cmd
set OUTFILE = $dir/out
set ERRFILE = $dir/err
set pbsfile = $dir/pbs
## create PBS batch script
cat <<EOF > $pbsfile
#!/bin/tcsh
##### parameters for PBS scheduling via qsub
##### submit jobs as "qsub scriptname"
# account code to charge to
#PBS -A $PROJECT
# which queue to use
#PBS -q $QUEUE
# job name
#PBS -N $JOBNAME
# stdout file
#PBS -o $OUTFILE
# stderr file
#PBS -e $ERRFILE
# runtime limit
#PBS -l walltime=$WALLTIME
# resource request
# select: num nodes
# ncpus: num cpus per node to use for this job
# (36 for cheyenne, 1 for share or casper)
# mpiprocs: matches ncpus (i.e., 1 process per CPU)
# ompthreads: num threads per process (always 1)
# memory (if set): memory required per cpu
# will affect which nodes get scheduled
#PBS -l select=${NNODES}:ncpus=${NCPUS}:mpiprocs=${NCPUS}:ompthreads=1$memstring
EOF
if($NJOBS > 1) then
cat <<EOFJOBS >> $pbsfile
# job array
#PBS -J 1-$NJOBS
EOFJOBS
endif
## add module load commands, etc.
cat $env >> $pbsfile
echo "\n" >> $pbsfile
## commands to run cmdfile
if ($QUEUE == share) then
echo $SHELL $CMDFILE >> $pbsfile
else if ($QUEUE == casper) then
if ($NJOBS > 1) then
echo $SHELL $CMDFILE'.$PBS_ARRAY_INDEX' >> $pbsfile
else
echo $SHELL $CMDFILE >> $pbsfile
endif
else
echo 'setenv MPI_SHEPHERD true' >> $pbsfile
if ($NJOBS > 1) then
echo 'mpiexec_mpt -n '$NCPUS' launch_cf.sh '$CMDFILE'.$PBS_ARRAY_INDEX' >> $pbsfile
else
echo 'mpiexec_mpt -n '$NCPUS' launch_cf.sh '$CMDFILE >> $pbsfile
endif
endif
## submit job (or not)
if($submit == yes) then
if ($QUEUE == casper && $NCAR_HOST == cheyenne) then
qsubcasper $pbsfile
else
qsub $pbsfile
endif
endif
exit 0
# Copyright 2018 Univ. Corp. for Atmos. Research
# Author: Seth McGinnis, [email protected]