-
Notifications
You must be signed in to change notification settings - Fork 1
/
jobs.py
181 lines (146 loc) · 6.05 KB
/
jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""Generate training jobs scripts.
Quick Start:
.. code-block:: python
# make sure you have the dependencies
pip install slurmjobs numpy fire
# NOTE: slurmjobs does not require slurm - it's just templating
# this will generate folders for all job formats
python jobs.py
# or you can just generate for one type (shell)
python jobs.py shell
This script supports 3 different output formats:
- ``singularity``: Use this if you have an sbatch environment with Singularity (e.g. NYU Greene)
- ``sbatch``: Use this if you have an sbatch environment without Singularity (e.g. like NYU Prince (R.I.P.))
- ``shell``: Use this if you just want to run the scripts with bash. This will run wherever!
Unless you are on an HPC environment, you're probably just going to want the shell version.
This will generate shell scripts
You can train over any of the command line flags in each of the scripts. Here
we're focusing on:
- ``num_regions``: range(3, 12, 2) - the number of horizontal regions to localize over.
- ``folds``: range(0, 5) - The validation folds to use. The other 4 are used for training.
This will generate a grid as a product of each of these variables. So since there are 5 each, the
total number of generated jobs is 25 for training and a corresponding 25 for testing.
Copyright (c) 2022 Magdalena Fuentes, Bea Steers, Julia Wilkins
All rights reserved.
This source code is licensed under the BSD-3-Clause license found in the
LICENSE file in the root directory of this source tree.
"""
# import os
import slurmjobs
# You should run this script from inside localize_sound
# sets the default CLI generation format
slurmjobs.Jobs.cli = 'argparse'
# sbatch and singularity config
# sbatch arguments are placed at the top of your sbatch file.
# The output format is: ``--{key}={value}``
sbatch = {
# 'email': f'{os.getenv("USER")}@nyu.edu',
'mem': '96GB',
'time': '1-23:00',
'gres': 'gpu:1',
'cpus_per_task': 4,
}
# job search grid config
fov = 120
num_regions = [5]
in_dur = 4
point_sources = [True, None]
filter_confirmed = [True]
audio_filtered_labels = [None]
train_dataset = 'Urbansas'
val_dataset = 'Urbansas'
folds = [0, 1, 2, 3, 4]
batch_size = 128
metric = 'val_auc_pr'
loss = ['WeightedBinaryCrossentropy']
#########################
# Job Formats
# setup the different job formats
class Singularity(slurmjobs.Singularity):
options = dict(
slurmjobs.Singularity.options,
# override default options
sif='cuda11.2.2-cudnn8-devel-ubuntu20.04.sif',
overlay='/scratch/mf3734/share/urbansas/urbansas-5GB-200K.ext3',
sbatch=sbatch,
)
class Slurm(slurmjobs.Slurm):
options = dict(
slurmjobs.Singularity.options,
sbatch=sbatch
)
class Shell(slurmjobs.Shell):
options = dict(
slurmjobs.Shell.options,
# setting this as True will launch jobs in the background (using nohup) which allows
# you to launch all jobs at once. I figure that behavior may be surprising
# so it is disabled by default. But if you want a more sbatch-like environment, you
# can set this as true
background=False,
)
#########################
# Parameter Grids
# create the training grid
# # train folds are everything not in validation
train_folds = [folds[:i] + folds[i+1:] for i in range(len(folds))]
grid = slurmjobs.Grid([
('point_sources', point_sources),
('filter_confirmed', filter_confirmed),
('audio_filtered_labels', audio_filtered_labels),
(('train_folds', 'val_folds'), (train_folds, folds))
])
############################
# Generate Functions
def generate(kind, name=None):
# get the job format class
if kind.lower() in {'singularity', 'sing'}:
cls = Singularity
elif kind.lower() in {'slurm', 'sbatch'}:
cls = Slurm
elif kind.lower() in {'shell', 'bash'}:
cls = Shell
else:
raise ValueError("Invalid job kind. Pick one of: {singularity, slurm, shell}")
# this is where to save the output scripts
root_dir = f'jobs/{kind}'
# train jobs - writes to jobs/{kind}/train_model
batch_train = cls('python train_model.py', job_id='config_name', root_dir=root_dir, name=f'train_model{f"-{name}" if name else ""}')
# generate the jobs and print out a summary
run_script, job_paths = batch_train.generate(
grid,
train_dataset=train_dataset,
val_dataset=val_dataset,
overwrite=True)
slurmjobs.util.summary(run_script, job_paths)
# predict jobs - writes to jobs/{kind}/predict_model
batch_predict = cls('python predict_model.py', job_id=False, root_dir=root_dir, name=f'predict_model{f"-{name}" if name else ""}')
# create a grid for prediction using the training grid
predict_grid = slurmjobs.LiteralGrid([
{
'folds': d['val_folds'],
'config_name': batch_predict.format_job_id(d, name=f'train_model{f"-{name}" if name else ""}'),
'point_sources': d['point_sources'],
'filter_confirmed': d['filter_confirmed']
}
for d in grid
])
print(predict_grid)
# generate the jobs and print out a summary
run_script, job_paths = batch_predict.generate(
predict_grid, overwrite=True)
slurmjobs.util.summary(run_script, job_paths)
def main(*kinds, name=None):
'''Generate job formats. The generated files are located under ``jobs/{kind}``.
Arguments:
*kinds: The names of the job types to use. Available options:
- ``singularity``: Use this if you have an sbatch environment with Singularity (e.g. NYU Greene)
- ``sbatch``: Use this if you have an sbatch environment without Singularity (e.g. like NYU Prince (R.I.P.))
- ``shell``: Use this if you just want to run the scripts with bash. This will launch the script as a
background task.
name: name extension to append to folder name (python jobs.py --name exp1 results in rain_model-exp1 folder)
'''
for k in kinds or ['singularity', 'shell']:
generate(k, name)
if __name__ == '__main__':
import fire
fire.Fire(main)