-
Notifications
You must be signed in to change notification settings - Fork 1
/
launch_jobs.py
61 lines (51 loc) · 2.21 KB
/
launch_jobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import argparse
import numpy as np
"""This is a script to repeatedly launch experiments (i.e. invoke
`run_experiment.py`) and generate the full set of results from the paper.
NOTE: Some of this code is specific to Harvard Odyssey, and would need to be
rewritten to work on different research clusters."""
parser = argparse.ArgumentParser()
parser.add_argument("--base_dir", type=str)
parser.add_argument("--conda_env", type=str)
parser.add_argument("--partition", type=str)
parser.add_argument("--mem_limit", type=str, default='20000')
FLAGS = parser.parse_args()
slurm_template = """#!/bin/bash
#SBATCH --mem={mem_limit}
#SBATCH -t {time_limit}
#SBATCH -p {partition}
#SBATCH -o {out_file}
#SBATCH -e {err_file}
module load Anaconda3/5.0.1-fasrc01
source activate {conda_env}
{job_command}
"""
def launch_job(restart, dataset, n_models, split, time_limit=None, mem_limit=None):
if time_limit is None: time_limit = '0-{0:02d}:00'.format(min(24, n_models*2))
if mem_limit is None: mem_limit = FLAGS.mem_limit
save_dir = 'restart-{}__dataset-{}__n-models-{}__split-{}/'.format(restart+1, dataset, n_models, split)
save_dir = os.path.join(FLAGS.base_dir, save_dir)
out_file = os.path.join(save_dir, 'job-%j.out')
err_file = os.path.join(save_dir, 'job-%j.err')
slurm_file = os.path.join(save_dir, 'job.slurm')
os.system('mkdir -p {}'.format(save_dir))
job_command = "python -u run_experiment.py --save_dir={} --n_models={} --dataset={} --split={}".format(save_dir, n_models, dataset, split)
slurm_command = slurm_template.format(
job_command=job_command,
time_limit=time_limit,
mem_limit=mem_limit,
partition=FLAGS.partition,
conda_env=FLAGS.conda_env,
out_file=out_file,
err_file=err_file)
with open(slurm_file, "w") as f: f.write(slurm_command)
os.system("cat {} | sbatch".format(slurm_file))
datasets = ['covertype', 'ionosphere', 'sonar', 'spectf', 'mushroom', 'electricity']
datasets += ['icu'] # available on request if you have access to MIMIC-III.
for restart in range(10):
for n_models in [2,3,5,8,13]:
for dataset in datasets:
splits = ['none', 'limit'] if 'icu' in dataset else ['none', 'norm']
for split in splits:
launch_job(restart, dataset, n_models, split)