diff --git a/milabench/executors.py b/milabench/executors.py index 3307aea37..8e1fb4ff7 100644 --- a/milabench/executors.py +++ b/milabench/executors.py @@ -126,7 +126,7 @@ async def execute(self, phase="run", timeout=False, timeout_delay=600, **kwargs) timeout_tasks = [] for pack, argv, _kwargs in self.commands(): await pack.send(event="config", data=pack.config) - await pack.send(event="meta", data=machine_metadata()) + await pack.send(event="meta", data=machine_metadata(pack)) fut = pack.execute(*argv, **{**_kwargs, **kwargs}) coro.append(fut) diff --git a/milabench/metadata.py b/milabench/metadata.py index 9bba3763b..7e9068da7 100644 --- a/milabench/metadata.py +++ b/milabench/metadata.py @@ -1,76 +1,17 @@ import os from datetime import datetime import cpuinfo +import subprocess import traceback +import json from voir.instruments.gpu import get_gpu_info from ._version import __commit__, __tag__, __date__ -from .vcs import retrieve_git_versions +from .scripts.vcs import retrieve_git_versions +import milabench.scripts.torchversion as torchversion -def get_pytorch_version(): - - def clean(k: str): - pad = ' - ' - if k.startswith(pad): - return k[len(pad):].strip() - return k.strip() - - def find_config(lines, key): - for line in lines: - if key in line: - return clean(line) - - return None - - def parse_build_settings(settings): - flags = dict() - - if settings is None: - return flags - - _, settings = settings.split(':') - for setting in settings.split(','): - try: - k, v = setting.split('=', maxsplit=1) - flags[k.strip()] = v.strip() - except ValueError: - pass - - return flags - - try: - import torch - conf = torch.__config__.show().split('\n') - - compiler = conf[1] - cpp = find_config(conf, 'C++ Version') - intel = find_config(conf, 'oneAPI') - mkl = find_config(conf, 'OpenMP') - openmp = find_config(conf, 'OpenMP') - lapack = find_config(conf, 'LAPACK') - nnpack = find_config(conf, 'NNPACK') - cpu = find_config(conf, 'CPU') - build_settings = find_config(conf, 'Build settings') - - return dict( - torch=torch.__version__, - compiler=clean(compiler), - cpp=clean(cpp), - intel=clean(intel), - mkl=clean(mkl), - openmp=clean(openmp), - lapack=clean(lapack), - nnpack=clean(nnpack), - cpu=clean(cpu), - build_settings=parse_build_settings(build_settings) - ) - - except ImportError: - return "NA" - - def _get_gpu_info(): try: return get_gpu_info() @@ -79,14 +20,32 @@ def _get_gpu_info(): return {} +def fetch_torch_version(pack): + cwd = pack.dirs.code + exec_env = pack.full_env(dict()) -def machine_metadata(): + result = subprocess.run( + [str(x) for x in ["python", torchversion.__file__]], + env=exec_env, + cwd=cwd, + capture_output=True, + ) + + return json.loads(result.stdout) + + +def machine_metadata(pack=None): """Retrieve machine metadata""" uname = os.uname() gpus = _get_gpu_info() cpu = cpuinfo.get_cpu_info() + if pack is None: + torchv = torchversion.get_pytorch_version() + else: + torchv = fetch_torch_version(pack) + return { "cpu": { "count": os.cpu_count(), @@ -106,5 +65,5 @@ def machine_metadata(): __commit__, __date__, ), - "pytorch": get_pytorch_version() + "pytorch": torchv, } diff --git a/milabench/pack.py b/milabench/pack.py index ba8d48723..25d1bf27e 100644 --- a/milabench/pack.py +++ b/milabench/pack.py @@ -193,6 +193,17 @@ def conda_install(self, *args, **kwargs): args = [str(x) for x in args] return self._nox_session.conda_install(*args, **kwargs, silent=False) + def execute_sync(self, *args, env=dict(), cwd=None): + args = [str(x) for x in args] + if cwd is None: + cwd = self.dirs.code + + exec_env = self.full_env(env) + + import subprocess + + return subprocess.run(args, env=exec_env, cwd=cwd, capture_output=True) + async def execute(self, *args, cwd=None, env={}, external=False, **kwargs): """Run a command in the virtual environment. diff --git a/milabench/scripts/__init__.py b/milabench/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/scripts/torchversion.py b/milabench/scripts/torchversion.py new file mode 100644 index 000000000..1c0b5a03a --- /dev/null +++ b/milabench/scripts/torchversion.py @@ -0,0 +1,66 @@ +def get_pytorch_version(): + def clean(k: str): + pad = " - " + if k.startswith(pad): + return k[len(pad) :].strip() + return k.strip() + + def find_config(lines, key): + for line in lines: + if key in line: + return clean(line) + + return None + + def parse_build_settings(settings): + flags = dict() + + if settings is None: + return flags + + _, settings = settings.split(":") + for setting in settings.split(","): + try: + k, v = setting.split("=", maxsplit=1) + flags[k.strip()] = v.strip() + except ValueError: + pass + + return flags + + try: + import torch + + conf = torch.__config__.show().split("\n") + + compiler = conf[1] + cpp = find_config(conf, "C++ Version") + intel = find_config(conf, "oneAPI") + mkl = find_config(conf, "OpenMP") + openmp = find_config(conf, "OpenMP") + lapack = find_config(conf, "LAPACK") + nnpack = find_config(conf, "NNPACK") + cpu = find_config(conf, "CPU") + build_settings = find_config(conf, "Build settings") + + return dict( + torch=torch.__version__, + compiler=clean(compiler), + cpp=clean(cpp), + intel=clean(intel), + mkl=clean(mkl), + openmp=clean(openmp), + lapack=clean(lapack), + nnpack=clean(nnpack), + cpu=clean(cpu), + build_settings=parse_build_settings(build_settings), + ) + + except ImportError: + return dict() + + +if __name__ == "__main__": + import json + + print(json.dumps(get_pytorch_version())) diff --git a/milabench/vcs.py b/milabench/scripts/vcs.py similarity index 100% rename from milabench/vcs.py rename to milabench/scripts/vcs.py diff --git a/scripts/schedule.sh b/scripts/schedule.sh new file mode 100644 index 000000000..8cbabf3b6 --- /dev/null +++ b/scripts/schedule.sh @@ -0,0 +1,25 @@ + + +WORKSPACE="$(pwd)/workspace" + + +ARCH="cuda" +PYTHON="3.9" +BRANCH="master" +ORIGIN="https://github.com/mila-iqia/milabench.git" +LOC="$SLURM_TMPDIR" +CONFIG="$(pwd)/config/standard.yaml" +BASE="$WORKSPACE" + +export HF_HOME=$BASE/cache +export HF_DATASETS_CACHE=$BASE/cache +export TORCH_HOME=$BASE/cache +export XDG_CACHE_HOME=$BASE/cache + +export MILABENCH_GPU_ARCH=$ARCH +export MILABENCH_DASH=no +export PYTHONUNBUFFERED=1 +export MILABENCH_BASE=$BASE +export MILABENCH_CONFIG=$CONFIG + +# . scripts/schedule.sh && milabench run --select resnet50