Skip to content

Commit

Permalink
Add new slurm sbatch profile
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Jun 20, 2024
1 parent 7246295 commit b67bf0d
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 24 deletions.
53 changes: 53 additions & 0 deletions config/slurm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#
# SBatch arguments for different run profile
#

multi-node-full:
# DGX run: 2 nodes x 8 A100 80Go SXM4
- --partition=staff-idt
- -w cn-d[003-004]
- --ntasks=1
- --gpus-per-task=a100l:8
- --exclusive
- --nodes=2
- --cpus-per-task=128
- --time=1:30:00
- --ntasks-per-node=1
- --mem=0

single-node-full:
# DGX run: 1 node x 8 A100 80Go SXM4
- --partition=staff-idt
- -w cn-d[003-004]
- --ntasks=1
- --gpus-per-task=a100l:8
- --exclusive
- --nodes=1
- --cpus-per-task=128
- --time=1:30:00
- --ntasks-per-node=1
- --mem=0

multi-node-small:
# Any GPU, 2 nodes x 2 GPU
- --partition=staff-idt
- --ntasks=1
- --gpus-per-task=2
- --exclusive
- --nodes=2
- --cpus-per-task=16
- --time=1:30:00
- --ntasks-per-node=1
- --mem=64G

single-node-small:
# Any GPU, 1 node x 2 GPU
- --partition=staff-idt
- --ntasks=1
- --gpus-per-task=2
- --exclusive
- --nodes=1
- --cpus-per-task=16
- --time=1:30:00
- --ntasks-per-node=1
- --mem=64G
6 changes: 3 additions & 3 deletions milabench/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""This file is generated, do not modify"""

__tag__ = "v0.1.0-12-g39e7cce9"
__commit__ = "39e7cce9aec8a9e1ae7713137f287353ce718875"
__date__ = "2024-06-17 13:41:35 -0400"
__tag__ = "v0.1.0-20-g7246295a"
__commit__ = "7246295a356186b55fa4b2b75480e3700c279b15"
__date__ = "2024-06-20 09:18:17 -0400"
41 changes: 26 additions & 15 deletions milabench/cli/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import importlib_resources
import requests
import yaml
from coleo import Option, tooled


Expand All @@ -14,6 +15,7 @@ class Arguments:
sync: bool = False
dry : bool = False
args: list = field(default_factory=list)
profile: str = None
# fmt: on


Expand All @@ -25,11 +27,29 @@ def arguments():
# Print the command and return without running it
dry: Option & bool = False

# sbatch run profile
profile: Option & str = None

# pip arguments
# [remainder]
args: Option = []

return Arguments(sync, dry, args)
return Arguments(sync, dry, args, profile)


def get_sbatch_profiles(profile, default):
ROOT = os.path.dirname(__file__)
default_scaling_config = os.path.join(ROOT, "..", "..", "config", "slurm.yaml")

with open(default_scaling_config, "r") as fp:
sbatch_profiles = yaml.safe_load(fp)

args = sbatch_profiles.get(profile)

if args is None:
args = sbatch_profiles.get(default)

return args


@tooled
Expand All @@ -39,9 +59,9 @@ def cli_schedule(args=None):
if args is None:
args = arguments()

launch_milabench(args.args, sbatch_args=None, dry=args.dry, sync=args.sync)

sbatch_args = get_sbatch_profiles(args.profile, "single-node-small")

launch_milabench(args.args, sbatch_args=sbatch_args, dry=args.dry, sync=args.sync)


def popen(cmd, callback=None):
Expand Down Expand Up @@ -120,6 +140,7 @@ class SetupOptions:
config: str = "milabench/config/standard.yaml"
env: str = "./env"
python: str = "3.9"
fun: str = "run"

def deduce_remote(self, current_branch):
prefix = "refs/heads/"
Expand Down Expand Up @@ -164,6 +185,8 @@ def arguments(self):
self.env,
"-p",
self.python,
"-f",
self.fun
]


Expand All @@ -173,18 +196,6 @@ def launch_milabench(args, sbatch_args=None, dry: bool = False, sync: bool = Fal
)
sbatch_script = str(sbatch_script)

# salloc --gres=gpu:rtx8000:1 --mem=64G --cpus-per-gpu=4

if sbatch_args is None:
sbatch_args = [
"--ntasks=1",
"--gpus-per-task=rtx8000:2",
"--cpus-per-task=8",
"--time=01:30:00",
"--ntasks-per-node=1",
"--mem=64G",
]

script_args = SetupOptions()
script_args.deduce_from_repository()
script_args = script_args.arguments()
Expand Down
Empty file removed milabench/schedule.py
Empty file.
23 changes: 17 additions & 6 deletions milabench/scripts/milabench_run.bash
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,16 @@ function parse_args() {
BASE="$LOC/base"
ENV="./env"
REMAINING_ARGS=""
FUN="run"

while getopts ":hm:p:e:b:o:c:" opt; do
while getopts ":hm:p:e:b:o:c:f:" opt; do
case $opt in
h)
usage
;;
f)
FUN="$OPTARG"
;;
p)
PYTHON="$OPTARG"
;;
Expand Down Expand Up @@ -119,8 +123,6 @@ function setup() {
}

function pin() {
parse_args

conda_env

setup
Expand All @@ -138,8 +140,6 @@ function pin() {
}

function run() {
parse_args

conda_env

setup
Expand Down Expand Up @@ -178,4 +178,15 @@ function run() {
echo "----"
echo "Done after $SECONDS"
echo ""
}
}

parse_args

case "$FUN" in
run)
run
;;
pin)
pin
;;
esac

0 comments on commit b67bf0d

Please sign in to comment.