Skip to content

Commit

Permalink
Add tooll to display environment variables
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Jul 23, 2024
1 parent ab47881 commit 5bc0df7
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 8 deletions.
70 changes: 63 additions & 7 deletions config/slurm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ multi-node-full:
- --time=2:00:00
- --ntasks-per-node=1
- --mem=0
- --export=ALL,MILABENCH_SIZER_AUTO=0

single-node-full:
# DGX run: 1 node x 8 A100 80Go SXM4
Expand All @@ -27,27 +28,82 @@ single-node-full:
- --time=1:30:00
- --ntasks-per-node=1
- --mem=0
- --export=ALL,MILABENCH_SIZER_AUTO=0

multi-node-small:
# Any GPU, 2 nodes x 2 GPU
#
#
#
single-node-small:
# Any GPU, 1 node x 2 GPU
- --partition=staff-idt
- --ntasks=1
- --gpus-per-task=2
- --exclusive
- --nodes=2
- --nodes=1
- --cpus-per-task=16
- --time=1:30:00
- --ntasks-per-node=1
- --mem=128G
- --export=ALL,MILABENCH_SIZER_AUTO=1,MILABENCH_SIZER_MULTIPLE=8

single-node-small:
# Any GPU, 1 node x 2 GPU

multi-node-small:
# rtx8000, 2 nodes x 2 GPU
- --partition=staff-idt
- --gpus-per-task=rtx8000:2
- --ntasks=1
- --gpus-per-task=2
- --exclusive
- --nodes=1
- --nodes=2
- --cpus-per-task=16
- --time=1:30:00
- --ntasks-per-node=1
- --mem=128G
- --export=ALL,MILABENCH_SIZER_AUTO=1,MILABENCH_SIZER_MULTIPLE=8

#
# RTS 48Go
#
multi-node-rtx:
- --partition=staff-idt
- --gpus-per-task=rtx8000:8
- --ntasks=1
- --exclusive
- --nodes=2
- --cpus-per-task=64
- --time=1:30:00
- --ntasks-per-node=1
- --mem=0
- --exclusive
- --export=ALL,MILABENCH_SIZER_AUTO=1,MILABENCH_SIZER_MULTIPLE=8

#
# V100 - 32 Go
#
multi-node-v100:
- --partition=staff-idt
- --gpus-per-task=v100:8
- --ntasks=1
- --exclusive
- --nodes=2
- --cpus-per-task=40
- --time=1:30:00
- --ntasks-per-node=1
- --mem=0
- --exclusive
- --export=ALL,MILABENCH_SIZER_AUTO=1,MILABENCH_SIZER_MULTIPLE=8

#
# Small A100 - 40Go
#
multi-node-a100:
- --partition=staff-idt
- --gpus-per-task=a100:8
- --ntasks=1
- --exclusive
- --nodes=2
- --cpus-per-task=128
- --time=1:30:00
- --ntasks-per-node=1
- --mem=0
- --exclusive
- --export=ALL,MILABENCH_SIZER_AUTO=1,MILABENCH_SIZER_MULTIPLE=8
83 changes: 82 additions & 1 deletion milabench/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,27 @@ def _print():
warn_no_config = print_once("No system config found, using defaults")


_global_options = {}

def _track_options(name, type, default, value):
"""This is just a helper so command line can display the options"""
global _global_options

try:
_global_options[name] = {
"type": type,
"default": default,
"value": value
}
except:
pass


def as_environment_variable(name):
frags = name.split(".")
return "MILABENCH_" + "_".join(map(str.upper, frags))


def option(name, etype, default=None):
options = dict()
system = system_global.get()
Expand All @@ -49,7 +70,7 @@ def option(name, etype, default=None):
warn_no_config()

frags = name.split(".")
env_name = "MILABENCH_" + "_".join(map(str.upper, frags))
env_name = as_environment_variable(name)
env_value = getenv(env_name, etype)

lookup = options
Expand All @@ -59,6 +80,8 @@ def option(name, etype, default=None):
system_value = lookup.get(frags[-1], None)
final_value = env_value or system_value or default

_track_options(name, etype, default, final_value)

if final_value is None:
return None
try:
Expand Down Expand Up @@ -169,15 +192,27 @@ class Nodes:
user: str


@dataclass
class Github:
pat: str = option("github.path", str, None)


@dataclass
class SystemConfig:
"""This is meant to be an exhaustive list of all the environment overrides"""
arch: str = getenv("MILABENCH_GPU_ARCH", str)
sshkey: str = None
docker_image: str = None
nodes: list[Nodes] = field(default_factory=list)
gpu: GPUConfig = None
options: Options = None

base: str = option("base", str, None)
config: str = option("config", str, None)
dash: bool = option("dash", bool, 1)
noterm: bool = option("noterm", bool, 0)
github: Github = None


def check_node_config(nodes):
mandatory_fields = ["name", "ip", "user"]
Expand Down Expand Up @@ -353,3 +388,49 @@ def build_system_config(config_file, defaults=None, gpu=True):
system["self"] = self

return config


def show_overrides(to_json=False):
import json
import copy
config = {}

for name, value in _global_options.items():
frags = name.split('.')

dct = config
for p in frags[:-1]:
dct = dct.setdefault(p, dict())

val_name = frags[-1]
val = copy.deepcopy(value)

val["type"] = str(val["type"].__name__)
dct[val_name] = val
val["env_name"] = envname = as_environment_variable(name)

def compact(d, depth):
for k, v in d.items():
idt = " " * depth

if "env_name" in v:
value = v["value"]
default = v["default"]
if value != default:
print(f"{idt}{k:<{30 - len(idt)}}: {str(value):<40} (default={default})")
else:
print(f"{idt}{k:<{30 - len(idt)}}: {str(value):<40} {v['env_name']}")
else:
print(f"{idt}{k}:")
compact(v, depth + 1)

if to_json:
print(json.dumps(config, indent=2))
else:
compact(config, 0)




if __name__ == "__main__":
show_overrides()

0 comments on commit 5bc0df7

Please sign in to comment.