From 8e62d37035b022ab918213497dc0f139e1412f49 Mon Sep 17 00:00:00 2001 From: Setepenre Date: Mon, 6 Nov 2023 09:37:45 -0500 Subject: [PATCH] Add flops benchmark (#169) --- .gitignore | 2 + .pin/constraints-cuda-torch.txt | 31 ++- .pin/constraints-rocm-torch.txt | 43 ++-- .../accelerate_opt/requirements.cuda.txt | 14 +- .../accelerate_opt/requirements.rocm.txt | 16 +- benchmarks/dlrm/requirements.cuda.txt | 17 +- benchmarks/dlrm/requirements.rocm.txt | 19 +- benchmarks/flops/activator | 7 + benchmarks/flops/benchfile.py | 19 ++ benchmarks/flops/main.py | 184 ++++++++++++++++++ benchmarks/flops/prepare.py | 1 + benchmarks/flops/requirements.cuda.txt | 153 +++++++++++++++ benchmarks/flops/requirements.in | 4 + benchmarks/flops/requirements.rocm.txt | 162 +++++++++++++++ benchmarks/huggingface/requirements.cuda.txt | 8 +- benchmarks/huggingface/requirements.rocm.txt | 10 +- benchmarks/rwkv/requirements.cuda.txt | 8 +- benchmarks/rwkv/requirements.rocm.txt | 10 +- benchmarks/stargan/requirements.cuda.txt | 8 +- benchmarks/stargan/requirements.rocm.txt | 10 +- benchmarks/super-slomo/requirements.cuda.txt | 8 +- benchmarks/super-slomo/requirements.rocm.txt | 10 +- benchmarks/timm/requirements.cuda.txt | 8 +- benchmarks/timm/requirements.rocm.txt | 10 +- benchmarks/torchvision/requirements.cuda.txt | 8 +- benchmarks/torchvision/requirements.rocm.txt | 10 +- config/base.yaml | 56 +++++- config/standard.yaml | 16 ++ constraints/cuda.txt | 3 +- constraints/rocm.txt | 2 +- milabench/_version.py | 6 +- milabench/dashboard/__init__.py | 0 milabench/dashboard/live_report.py | 0 milabench/dashboard/rawoutput.py | 0 milabench/executors.py | 11 ++ milabench/schedule.py | 10 +- milabench/scripts/milabench_docker.bash | 5 + milabench/scripts/milabench_run.bash | 16 +- milabench/scripts/setup.bash | 111 +++++++++++ 39 files changed, 876 insertions(+), 140 deletions(-) create mode 100755 benchmarks/flops/activator create mode 100644 benchmarks/flops/benchfile.py create mode 100755 benchmarks/flops/main.py create mode 100755 benchmarks/flops/prepare.py create mode 100644 benchmarks/flops/requirements.cuda.txt create mode 100644 benchmarks/flops/requirements.in create mode 100644 benchmarks/flops/requirements.rocm.txt create mode 100644 milabench/dashboard/__init__.py create mode 100644 milabench/dashboard/live_report.py create mode 100644 milabench/dashboard/rawoutput.py create mode 100644 milabench/scripts/milabench_docker.bash create mode 100644 milabench/scripts/setup.bash diff --git a/.gitignore b/.gitignore index 8e6de4a30..18dafb9c6 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ sqlite.db .no_report trash/ +workspace/ +slurm-* diff --git a/.pin/constraints-cuda-torch.txt b/.pin/constraints-cuda-torch.txt index bb30508fa..f52d2aa9d 100644 --- a/.pin/constraints-cuda-torch.txt +++ b/.pin/constraints-cuda-torch.txt @@ -2,13 +2,13 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=.pin/constraints-cuda-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in +# pip-compile --config=pyproject.toml --output-file=.pin/constraints-cuda-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 absl-py==2.0.0 # via tensorboard -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -28,7 +28,7 @@ cachetools==5.3.2 # via google-auth certifi==2023.7.22 # via requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # aiohttp # requests @@ -57,7 +57,7 @@ executing==1.2.0 # via varname fbgemm-gpu==0.5.0+cu118 # via torchrec -filelock==3.12.4 +filelock==3.13.1 # via # huggingface-hub # torch @@ -82,7 +82,7 @@ giving==0.4.2 # via # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # google-auth-oauthlib # tensorboard @@ -90,7 +90,7 @@ google-auth-oauthlib==1.1.0 # via tensorboard graphviz==0.20.1 # via torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via tensorboard hjson==3.1.0 # via deepspeed @@ -116,7 +116,7 @@ lightning-utilities==0.9.0 # via # pytorch-lightning # torchmetrics -markdown==3.5 +markdown==3.5.1 # via tensorboard markdown-it-py==3.0.0 # via rich @@ -138,7 +138,7 @@ multiprocess==0.70.15 # evaluate mypy-extensions==1.0.0 # via typing-inspect -networkx==3.2 +networkx==3.2.1 # via torch ninja==1.11.1.1 # via @@ -167,7 +167,7 @@ oauthlib==3.2.2 # via requests-oauthlib omegaconf==2.3.0 # via voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in opencv-python==4.8.1.78 # via -r benchmarks/super-slomo/requirements.in @@ -185,7 +185,7 @@ packaging==23.2 # pytorch-lightning # torchmetrics # transformers -pandas==2.1.1 +pandas==2.1.2 # via # datasets # evaluate @@ -203,7 +203,7 @@ ptera==1.4.1 # via voir py-cpuinfo==9.0.0 # via deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via datasets pyasn1==0.5.0 # via @@ -294,7 +294,7 @@ tokenizers==0.14.1 # via transformers torch==2.1.0+cu118 # via - # -r benchmarks/huggingface/requirements.in + # -r benchmarks/stargan/requirements.in # -r benchmarks/super-slomo/requirements.in # accelerate # deepspeed @@ -313,8 +313,8 @@ torchrec==0.5.0+cu118 # via -r benchmarks/dlrm/requirements.in torchvision==0.16.0+cu118 # via + # -r benchmarks/stargan/requirements.in # -r benchmarks/super-slomo/requirements.in - # -r benchmarks/torchvision/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.6.0 @@ -340,7 +340,6 @@ typing-extensions==4.8.0 # via # huggingface-hub # lightning-utilities - # onnx # pydantic # pyre-extensions # pytorch-lightning @@ -359,9 +358,9 @@ urllib3==1.26.18 # torchx varname==0.10.0 # via giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via - # -r benchmarks/huggingface/requirements.in + # -r benchmarks/stargan/requirements.in # -r benchmarks/super-slomo/requirements.in websocket-client==1.6.4 # via docker diff --git a/.pin/constraints-rocm-torch.txt b/.pin/constraints-rocm-torch.txt index 484e52a77..c50a448fe 100644 --- a/.pin/constraints-rocm-torch.txt +++ b/.pin/constraints-rocm-torch.txt @@ -2,13 +2,13 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=.pin/constraints-rocm-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in +# pip-compile --config=pyproject.toml --output-file=.pin/constraints-rocm-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in # --extra-index-url https://download.pytorch.org/whl/rocm5.6/ absl-py==2.0.0 # via tensorboard -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -28,7 +28,7 @@ cachetools==5.3.2 # via google-auth certifi==2023.7.22 # via requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # aiohttp # requests @@ -59,7 +59,7 @@ executing==1.2.0 # via varname fbgemm-gpu==0.5.0 # via torchrec -filelock==3.12.4 +filelock==3.13.1 # via # huggingface-hub # pytorch-triton-rocm @@ -84,7 +84,7 @@ giving==0.4.2 # via # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # google-auth-oauthlib # tensorboard @@ -92,7 +92,7 @@ google-auth-oauthlib==1.1.0 # via tensorboard graphviz==0.20.1 # via torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via tensorboard hjson==3.1.0 # via deepspeed @@ -118,9 +118,9 @@ lightning-utilities==0.9.0 # via # pytorch-lightning # torchmetrics -lit==17.0.3 +lit==17.0.4 # via pytorch-triton-rocm -markdown==3.5 +markdown==3.5.1 # via tensorboard markdown-it-py==3.0.0 # via rich @@ -142,7 +142,7 @@ multiprocess==0.70.15 # evaluate mypy-extensions==1.0.0 # via typing-inspect -networkx==3.2 +networkx==3.2.1 # via torch ninja==1.11.1.1 # via @@ -151,7 +151,7 @@ ninja==1.11.1.1 numpy==1.26.1 # via # -r benchmarks/dlrm/requirements.in - # -r benchmarks/stargan/requirements.in + # -r benchmarks/rwkv/requirements.in # accelerate # datasets # deepspeed @@ -172,7 +172,7 @@ oauthlib==3.2.2 # via requests-oauthlib omegaconf==2.3.0 # via voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in opencv-python==4.8.1.78 # via -r benchmarks/super-slomo/requirements.in @@ -190,7 +190,7 @@ packaging==23.2 # pytorch-lightning # torchmetrics # transformers -pandas==2.1.1 +pandas==2.1.2 # via # datasets # evaluate @@ -208,7 +208,7 @@ ptera==1.4.1 # via voir py-cpuinfo==9.0.0 # via deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via datasets pyasn1==0.5.0 # via @@ -301,8 +301,8 @@ tokenizers==0.14.1 # via transformers torch==2.1.0+rocm5.6 # via - # -r benchmarks/stargan/requirements.in - # -r benchmarks/timm/requirements.in + # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/flops/requirements.in # accelerate # deepspeed # pytorch-lightning @@ -321,8 +321,8 @@ torchrec==0.5.0 # via -r benchmarks/dlrm/requirements.in torchvision==0.16.0+rocm5.6 # via - # -r benchmarks/stargan/requirements.in - # -r benchmarks/timm/requirements.in + # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/flops/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.6.0 @@ -330,7 +330,7 @@ torchx==0.6.0 tqdm==4.66.1 # via # -r benchmarks/dlrm/requirements.in - # -r benchmarks/torchvision/requirements.in + # -r benchmarks/flops/requirements.in # datasets # deepspeed # evaluate @@ -346,7 +346,6 @@ typing-extensions==4.8.0 # via # huggingface-hub # lightning-utilities - # onnx # pydantic # pyre-extensions # pytorch-lightning @@ -365,10 +364,10 @@ urllib3==1.26.18 # torchx varname==0.10.0 # via giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via - # -r benchmarks/stargan/requirements.in - # -r benchmarks/timm/requirements.in + # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/flops/requirements.in websocket-client==1.6.4 # via docker werkzeug==3.0.1 diff --git a/benchmarks/accelerate_opt/requirements.cuda.txt b/benchmarks/accelerate_opt/requirements.cuda.txt index 563d7a85b..552fcc115 100644 --- a/benchmarks/accelerate_opt/requirements.cuda.txt +++ b/benchmarks/accelerate_opt/requirements.cuda.txt @@ -6,7 +6,7 @@ # --extra-index-url https://download.pytorch.org/whl/cu118 -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -37,7 +37,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -64,7 +64,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -135,7 +135,7 @@ multiprocess==0.70.15 # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # evaluate -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -171,7 +171,7 @@ packaging==23.2 # evaluate # huggingface-hub # transformers -pandas==2.1.1 +pandas==2.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets @@ -193,7 +193,7 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets @@ -313,7 +313,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/accelerate_opt/requirements.in xxhash==3.4.1 # via diff --git a/benchmarks/accelerate_opt/requirements.rocm.txt b/benchmarks/accelerate_opt/requirements.rocm.txt index c6c6f501d..8fc0ca376 100644 --- a/benchmarks/accelerate_opt/requirements.rocm.txt +++ b/benchmarks/accelerate_opt/requirements.rocm.txt @@ -6,7 +6,7 @@ # --extra-index-url https://download.pytorch.org/whl/rocm5.6/ -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -37,7 +37,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp @@ -68,7 +68,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -113,7 +113,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -143,7 +143,7 @@ multiprocess==0.70.15 # -c .pin/../.pin/constraints-rocm-torch.txt # datasets # evaluate -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -179,7 +179,7 @@ packaging==23.2 # evaluate # huggingface-hub # transformers -pandas==2.1.1 +pandas==2.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets @@ -201,7 +201,7 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets @@ -322,7 +322,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/accelerate_opt/requirements.in xxhash==3.4.1 # via diff --git a/benchmarks/dlrm/requirements.cuda.txt b/benchmarks/dlrm/requirements.cuda.txt index 438da3a4d..8e1993d6a 100644 --- a/benchmarks/dlrm/requirements.cuda.txt +++ b/benchmarks/dlrm/requirements.cuda.txt @@ -26,7 +26,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -50,7 +50,7 @@ fbgemm-gpu==0.5.0+cu118 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchrec -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ giving==0.4.2 # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # google-auth-oauthlib @@ -81,7 +81,7 @@ graphviz==0.20.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -105,7 +105,7 @@ lightning-utilities==0.9.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchmetrics -markdown==3.5 +markdown==3.5.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -130,7 +130,7 @@ mypy-extensions==1.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # typing-inspect -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -150,7 +150,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in ovld==0.3.2 # via @@ -281,7 +281,6 @@ typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # lightning-utilities - # onnx # pyre-extensions # reactivex # torch @@ -300,7 +299,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/dlrm/requirements.in websocket-client==1.6.4 # via diff --git a/benchmarks/dlrm/requirements.rocm.txt b/benchmarks/dlrm/requirements.rocm.txt index 4ce758a16..fc2a93ad0 100644 --- a/benchmarks/dlrm/requirements.rocm.txt +++ b/benchmarks/dlrm/requirements.rocm.txt @@ -26,7 +26,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -54,7 +54,7 @@ fbgemm-gpu==0.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchrec -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -72,7 +72,7 @@ giving==0.4.2 # -c .pin/../.pin/constraints-rocm-torch.txt # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # google-auth-oauthlib @@ -85,7 +85,7 @@ graphviz==0.20.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard @@ -109,11 +109,11 @@ lightning-utilities==0.9.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchmetrics -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown==3.5 +markdown==3.5.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard @@ -138,7 +138,7 @@ mypy-extensions==1.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # typing-inspect -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -159,7 +159,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in ovld==0.3.2 # via @@ -291,7 +291,6 @@ typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # lightning-utilities - # onnx # pyre-extensions # reactivex # torch @@ -310,7 +309,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/dlrm/requirements.in websocket-client==1.6.4 # via diff --git a/benchmarks/flops/activator b/benchmarks/flops/activator new file mode 100755 index 000000000..083c28cb1 --- /dev/null +++ b/benchmarks/flops/activator @@ -0,0 +1,7 @@ +#!/bin/bash + +venv="$1" +shift + +source "$venv"/bin/activate +exec "$@" diff --git a/benchmarks/flops/benchfile.py b/benchmarks/flops/benchfile.py new file mode 100644 index 000000000..b00415f0f --- /dev/null +++ b/benchmarks/flops/benchfile.py @@ -0,0 +1,19 @@ +from milabench.pack import Package + + +class FlopsBenchmarch(Package): + base_requirements = "requirements.in" + prepare_script = "prepare.py" + main_script = "main.py" + + def build_run_plan(self) -> "execs.Executor": + import milabench.executors as execs + + main = self.dirs.code / self.main_script + pack = execs.PackExecutor(self, *self.argv, lazy=True) + # pack = execs.VoirExecutor(pack, cwd=main.parent) + pack = execs.ActivatorExecutor(pack, use_stdout=True) + return pack + + +__pack__ = FlopsBenchmarch diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py new file mode 100755 index 000000000..d72bf7186 --- /dev/null +++ b/benchmarks/flops/main.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python + +from argparse import ArgumentParser +import json +import time +import sys +import multiprocessing + +import torch + +from voir.smuggle import SmuggleWriter +from voir.instruments.gpu import get_gpu_info +from voir.instruments.utils import Monitor + +KILO = 1e3 +MEGA = 1e6 +GIGA = 1e9 +TERA = 1e12 +EXA = 1e18 + + +def _worker(state, queue, func, delay): + import time + + while state['running']: + queue.put(func()) + time.sleep(delay) + +class Monitor: + def __init__(self, delay, func): + self.manager = multiprocessing.Manager() + self.state = self.manager.dict() + self.state['running'] = True + self.results = multiprocessing.Queue() + self.process = multiprocessing.Process( + target=_worker, + args=(self.state, self.results, func, delay), + ) + + def start(self): + self.process.start() + + def stop(self): + self.state['running'] = False + self.process.join() + + +def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA): + # Not sure how much thop is correct in its computation + # it says it return MAC but I feel its methods is wrong + from thop import profile + + # MAC: Multiply–accumulate operation + batch = torch.randn(*shape, dtype=dtype, device="cuda:0") + + flops, _ = profile(model, inputs=(batch,)) + + with torch.no_grad(): + # Prepare + torch.cuda.empty_cache() + + batch = batch.cuda() + model = model.to(dtype=dtype, device="cuda:0") + + torch.cuda.synchronize() + + # Start + start = time.time() + + for i in range(repeat): + _ = model(batch) + + torch.cuda.synchronize() + end = time.time() + # -- + + return (flops * repeat) / (end - start) / unit + + + +def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32, log=None): + torch.cuda.empty_cache() + a = torch.eye(n, dtype=dtype, device="cuda:0") + x = torch.randn((m, n), dtype=dtype, device="cuda:0") + y = torch.zeros_like(x) + + F = N * (2 * m * n * n + 2 * m * n * n) + + for i in range(R): + torch.cuda.synchronize() + ts = -time.time() + + for _ in range(N): + # No allocation in main loop using dual-out strategy + y = torch.mm(x, a, out=y) + x = torch.mm(y, a, out=x) + + torch.cuda.synchronize() + ts += time.time() + + if log is not None: + log({ + "task": "train", + "rate": F / ts / unit, + "units": "Tflops" + }) + + torch.cuda.empty_cache() + + +def setupvoir(): + # wtf this do + data_file = SmuggleWriter(sys.stdout) + # data_file = sys.stdout + + def log(data): + if data_file is not None: + data["t"] = time.time() + print(json.dumps(data), file=data_file) + + while not monitor.results.empty(): + print(json.dumps(monitor.results.get()), file=data_file) + + def monitor_fn(): + data = { + gpu["device"]: { + "memory": [ + gpu["memory"]["used"], + gpu["memory"]["total"], + ], + "load": gpu["utilization"]["compute"], + "temperature": gpu["temperature"], + "power": gpu["power"] + } + for gpu in get_gpu_info()["gpus"].values() + } + return {"task": "main", "gpudata": data, "t": time.time()} + + monitor = Monitor(0.5, monitor_fn) + monitor.start() + return log, monitor + + + +def main(): + dtypes = { + 'bf16': torch.bfloat16, + 'fp16': torch.float16, + 'fp32': torch.float32, + } + + parser = ArgumentParser() + parser.add_argument('--repeat', type=int, default=100) + parser.add_argument('--number', type=int, default=100) + parser.add_argument('--m', type=int, default=256) + parser.add_argument('--n', type=int, default=256) + parser.add_argument('--dtype', type=str, default='fp32', choices=dtypes.keys()) + parser.add_argument('--tf32', action='store_true', default=False) + + args = parser.parse_args() + + torch.backends.cuda.matmul.allow_tf32 = False + if args.tf32: + torch.backends.cuda.matmul.allow_tf32 = True + + log, monitor = setupvoir() + + f( + args.number, + args.repeat, + args.m, + args.n, + TERA, + dtypes[args.dtype], + log + ) + + monitor.stop() + +if __name__ == "__main__": + main() + + + diff --git a/benchmarks/flops/prepare.py b/benchmarks/flops/prepare.py new file mode 100755 index 000000000..4265cc3e6 --- /dev/null +++ b/benchmarks/flops/prepare.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/benchmarks/flops/requirements.cuda.txt b/benchmarks/flops/requirements.cuda.txt new file mode 100644 index 000000000..65c830e7a --- /dev/null +++ b/benchmarks/flops/requirements.cuda.txt @@ -0,0 +1,153 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-flops.txt benchmarks/flops/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/cu118 + +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +certifi==2023.7.22 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +codefind==0.1.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera +executing==1.2.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # varname +filelock==3.13.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch + # triton +fsspec==2023.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +giving==0.4.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera + # voir +idna==3.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +jinja2==3.1.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich +markupsafe==2.1.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # sympy +networkx==3.2.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +numpy==1.26.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +ovld==0.3.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pillow==10.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pygments==2.16.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich +pynvml==11.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pyyaml==6.0.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +requests==2.31.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +rich==13.6.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # asttokens +sympy==1.12 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +torch==2.1.0+cu118 + # via + # -r benchmarks/flops/requirements.in + # torchvision +torchvision==0.16.0+cu118 + # via -r benchmarks/flops/requirements.in +tqdm==4.66.1 + # via -r benchmarks/flops/requirements.in +triton==2.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +typing-extensions==4.8.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # reactivex + # torch +urllib3==1.26.18 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +varname==0.10.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +voir==0.2.11 + # via -r benchmarks/flops/requirements.in diff --git a/benchmarks/flops/requirements.in b/benchmarks/flops/requirements.in new file mode 100644 index 000000000..7d30d94e7 --- /dev/null +++ b/benchmarks/flops/requirements.in @@ -0,0 +1,4 @@ +torch +torchvision +tqdm +voir diff --git a/benchmarks/flops/requirements.rocm.txt b/benchmarks/flops/requirements.rocm.txt new file mode 100644 index 000000000..86e259787 --- /dev/null +++ b/benchmarks/flops/requirements.rocm.txt @@ -0,0 +1,162 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-flops.txt benchmarks/flops/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ + +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving +certifi==2023.7.22 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +charset-normalizer==3.3.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +cmake==3.27.7 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm +codefind==0.1.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # ptera +executing==1.2.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # varname +filelock==3.13.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm + # torch +fsspec==2023.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +giving==0.4.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # ptera + # voir +idna==3.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +jinja2==3.1.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +lit==17.0.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # rich +markupsafe==2.1.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # markdown-it-py +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # sympy +networkx==3.2.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +numpy==1.26.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +ovld==0.3.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +pillow==10.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +pygments==2.16.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # rich +pynvml==11.5.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +pytorch-triton-rocm==2.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +pyyaml==6.0.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving +requests==2.31.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +rich==13.6.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # asttokens +sympy==1.12 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +torch==2.1.0+rocm5.6 + # via + # -r benchmarks/flops/requirements.in + # pytorch-triton-rocm + # torchvision +torchvision==0.16.0+rocm5.6 + # via -r benchmarks/flops/requirements.in +tqdm==4.66.1 + # via -r benchmarks/flops/requirements.in +typing-extensions==4.8.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # reactivex + # torch +urllib3==1.26.18 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +varname==0.10.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving +voir==0.2.11 + # via -r benchmarks/flops/requirements.in diff --git a/benchmarks/huggingface/requirements.cuda.txt b/benchmarks/huggingface/requirements.cuda.txt index ce100130c..d4c17b767 100644 --- a/benchmarks/huggingface/requirements.cuda.txt +++ b/benchmarks/huggingface/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -175,5 +175,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/huggingface/requirements.in diff --git a/benchmarks/huggingface/requirements.rocm.txt b/benchmarks/huggingface/requirements.rocm.txt index 8c1366c32..963defffa 100644 --- a/benchmarks/huggingface/requirements.rocm.txt +++ b/benchmarks/huggingface/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -64,7 +64,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -84,7 +84,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -185,5 +185,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/huggingface/requirements.in diff --git a/benchmarks/rwkv/requirements.cuda.txt b/benchmarks/rwkv/requirements.cuda.txt index 13e39de84..b32c5f2f4 100644 --- a/benchmarks/rwkv/requirements.cuda.txt +++ b/benchmarks/rwkv/requirements.cuda.txt @@ -34,7 +34,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -49,7 +49,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -108,7 +108,7 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # yarl -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -223,7 +223,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/rwkv/requirements.in yarl==1.9.2 # via diff --git a/benchmarks/rwkv/requirements.rocm.txt b/benchmarks/rwkv/requirements.rocm.txt index 1e7449465..bf060a8a4 100644 --- a/benchmarks/rwkv/requirements.rocm.txt +++ b/benchmarks/rwkv/requirements.rocm.txt @@ -34,7 +34,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp @@ -53,7 +53,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -91,7 +91,7 @@ lightning-utilities==0.9.0 # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-lightning # torchmetrics -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -116,7 +116,7 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # yarl -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -232,7 +232,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/rwkv/requirements.in yarl==1.9.2 # via diff --git a/benchmarks/stargan/requirements.cuda.txt b/benchmarks/stargan/requirements.cuda.txt index 435795cc8..23572d865 100644 --- a/benchmarks/stargan/requirements.cuda.txt +++ b/benchmarks/stargan/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -147,5 +147,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/stargan/requirements.in diff --git a/benchmarks/stargan/requirements.rocm.txt b/benchmarks/stargan/requirements.rocm.txt index bbea36f98..1e0f5eccf 100644 --- a/benchmarks/stargan/requirements.rocm.txt +++ b/benchmarks/stargan/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -56,7 +56,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -156,5 +156,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/stargan/requirements.in diff --git a/benchmarks/super-slomo/requirements.cuda.txt b/benchmarks/super-slomo/requirements.cuda.txt index c4c93813e..657aa0053 100644 --- a/benchmarks/super-slomo/requirements.cuda.txt +++ b/benchmarks/super-slomo/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -152,5 +152,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/super-slomo/requirements.in diff --git a/benchmarks/super-slomo/requirements.rocm.txt b/benchmarks/super-slomo/requirements.rocm.txt index 30b2bd53c..230461051 100644 --- a/benchmarks/super-slomo/requirements.rocm.txt +++ b/benchmarks/super-slomo/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -56,7 +56,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -161,5 +161,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/super-slomo/requirements.in diff --git a/benchmarks/timm/requirements.cuda.txt b/benchmarks/timm/requirements.cuda.txt index 7a66f12e8..619a16aba 100644 --- a/benchmarks/timm/requirements.cuda.txt +++ b/benchmarks/timm/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -72,7 +72,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -164,5 +164,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/timm/requirements.in diff --git a/benchmarks/timm/requirements.rocm.txt b/benchmarks/timm/requirements.rocm.txt index e53283a33..6c09c25b0 100644 --- a/benchmarks/timm/requirements.rocm.txt +++ b/benchmarks/timm/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -60,7 +60,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -80,7 +80,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -173,5 +173,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/timm/requirements.in diff --git a/benchmarks/torchvision/requirements.cuda.txt b/benchmarks/torchvision/requirements.cuda.txt index bb149eec3..e9740262c 100644 --- a/benchmarks/torchvision/requirements.cuda.txt +++ b/benchmarks/torchvision/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -149,5 +149,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/torchvision/requirements.in diff --git a/benchmarks/torchvision/requirements.rocm.txt b/benchmarks/torchvision/requirements.rocm.txt index 58fbe4cd8..40a8ade9b 100644 --- a/benchmarks/torchvision/requirements.rocm.txt +++ b/benchmarks/torchvision/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -56,7 +56,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -158,5 +158,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/torchvision/requirements.in diff --git a/config/base.yaml b/config/base.yaml index 109d3ee10..40595c674 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -23,6 +23,22 @@ _torchvision: --no-stdout: true --epochs: 50 +_flops: + inherits: _defaults + definition: ../benchmarks/flops + group: flops + install_group: torch + plan: + method: per_gpu + + tags: + - diagnostic + - flops + + argv: + --number: 10 + --repeat: 90 + _hf: inherits: _defaults definition: ../benchmarks/huggingface @@ -86,6 +102,44 @@ _accelerate_opt: use_deepspeed: true num_machines: 1 + +fp16: + inherits: _flops + + argv: + --number: 30 + --repeat: 90 + --m: 8192 + --n: 8192 + --dtype: fp16 + + +bf16: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: bf16 + +tf32: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: fp32 + --tf32: true + +fp32: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: fp32 + + resnet50: inherits: _torchvision tags: @@ -100,7 +154,7 @@ resnet50: efficientnet_b4: inherits: _torchvision - tags: + tags: - vision - classification diff --git a/config/standard.yaml b/config/standard.yaml index 809f0a134..2b2363934 100644 --- a/config/standard.yaml +++ b/config/standard.yaml @@ -121,6 +121,22 @@ rwkv: enabled: true weight: 1.0 +fp16: + enabled: true + weight: 0.0 + +bf16: + enabled: true + weight: 0.0 + +tf32: + enabled: true + weight: 0.0 + +fp32: + enabled: true + weight: 0.0 + ################## # Disabled tests # ################## diff --git a/constraints/cuda.txt b/constraints/cuda.txt index c21d70b5e..cb2bbd770 100644 --- a/constraints/cuda.txt +++ b/constraints/cuda.txt @@ -1,2 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -voir @ git+https://github.com/breuleux/voir.git +voir > 0.2.10 + diff --git a/constraints/rocm.txt b/constraints/rocm.txt index 07a8feac5..9b46f6813 100644 --- a/constraints/rocm.txt +++ b/constraints/rocm.txt @@ -1,2 +1,2 @@ --extra-index-url https://download.pytorch.org/whl/rocm5.6/ -voir @ git+https://github.com/breuleux/voir.git +voir > 0.2.10 \ No newline at end of file diff --git a/milabench/_version.py b/milabench/_version.py index 8fc180822..f6782f930 100644 --- a/milabench/_version.py +++ b/milabench/_version.py @@ -1,5 +1,5 @@ """This file is generated, do not modify""" -__tag__ = "v0.0.6-43-g89f56f6" -__commit__ = "89f56f670db0f22880d057262a320c935c217d77" -__date__ = "2023-10-19 19:29:36 -0400" +__tag__ = "v0.0.6-33-ga23bd12" +__commit__ = "a23bd123851402b63116293dc7634b231b7e21b4" +__date__ = "2023-10-31 13:04:11 -0400" diff --git a/milabench/dashboard/__init__.py b/milabench/dashboard/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/dashboard/live_report.py b/milabench/dashboard/live_report.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/dashboard/rawoutput.py b/milabench/dashboard/rawoutput.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/executors.py b/milabench/executors.py index 41d308919..b109c896b 100644 --- a/milabench/executors.py +++ b/milabench/executors.py @@ -630,6 +630,17 @@ def __init__(self, executor: Executor, gpus: list = None, **kwargs) -> None: super().__init__(*executors, **kwargs) +class ActivatorExecutor(SingleCmdExecutor): + def __init__(self, pack: pack.BasePackage, **kwargs): + super().__init__(pack, **kwargs) + + def _argv(self, **_) -> List: + return [ + f"{self.pack.dirs.code / 'activator'}", + f"{self.pack.dirs.venv}" + ] + + # Accelerate class AccelerateLaunchExecutor(SingleCmdExecutor): """Execute a `BasePackage` with Accelerate diff --git a/milabench/schedule.py b/milabench/schedule.py index c572e0e38..21f4b25ed 100644 --- a/milabench/schedule.py +++ b/milabench/schedule.py @@ -23,10 +23,10 @@ def println(line): ) as process: def readoutput(): process.stdout.flush() - line = process.stdout.readline() + for line in process.stdout.readline(): - if callback: - callback(line) + if callback: + callback(line) try: while process.poll() is None: @@ -130,10 +130,12 @@ def launch_milabench(args, sbatch_args=None, dry: bool = False, sync: bool = Fal sbatch_script = importlib_resources.files(__name__) / "scripts" / "milabench_run.bash" sbatch_script = str(sbatch_script) + # salloc --gres=gpu:rtx8000:1 --mem=64G --cpus-per-gpu=4 + if sbatch_args is None: sbatch_args = [ "--ntasks=1", - "--gpus-per-task=rtx8000:1", + "--gpus-per-task=4g.40gb:1", "--cpus-per-task=4", "--time=01:30:00", "--ntasks-per-node=1", diff --git a/milabench/scripts/milabench_docker.bash b/milabench/scripts/milabench_docker.bash new file mode 100644 index 000000000..7a9bfcc19 --- /dev/null +++ b/milabench/scripts/milabench_docker.bash @@ -0,0 +1,5 @@ +#!/bin/bash + + +# CPU only + diff --git a/milabench/scripts/milabench_run.bash b/milabench/scripts/milabench_run.bash index cf502cbf4..693a80139 100755 --- a/milabench/scripts/milabench_run.bash +++ b/milabench/scripts/milabench_run.bash @@ -17,11 +17,13 @@ ARCH="cuda" PYTHON="3.9" BRANCH="master" ORIGIN="https://github.com/mila-iqia/milabench.git" -CONFIG="$SLURM_TMPDIR/milabench/config/standard.yaml" -BASE="$SLURM_TMPDIR/base" +LOC="$SLURM_TMPDIR" +CONFIG="$LOC/milabench/config/standard.yaml" +BASE="$LOC/base" ENV="./env" REMAINING_ARGS="" + while getopts ":hm:p:e:b:o:c:" opt; do case $opt in h) @@ -45,6 +47,12 @@ while getopts ":hm:p:e:b:o:c:" opt; do a) ARCH="$OPTARG" ;; + l) + # FIX ME + LOC="$OPTARG" + CONFIG="$LOC/milabench/config/standard.yaml" + BASE="$LOC/base" + ;; :) echo "Option -$OPTARG requires an argument." >&2 usage @@ -72,7 +80,7 @@ if [ -e $HOME/.credentials.env ]; then source $HOME/.credentials.env fi -cd $SLURM_TMPDIR +cd $LOC # # Create a new environment # @@ -97,7 +105,7 @@ export MILABENCH_CONFIG=$CONFIG git clone --single-branch --depth 1 -b $BRANCH $ORIGIN python -m pip install -e ./milabench -SYSTEM="$SLURM_TMPDIR/system.yaml" +SYSTEM="$LOC/system.yaml" echo "" echo "System" diff --git a/milabench/scripts/setup.bash b/milabench/scripts/setup.bash new file mode 100644 index 000000000..dd3e3f496 --- /dev/null +++ b/milabench/scripts/setup.bash @@ -0,0 +1,111 @@ +#!/bin/bash + +function usage() { + echo "Usage: $0 [-m] [-p]" + echo " -h Display this help message." + echo " -b arch GPU arch (default: cuda)" + echo " -b BRANCH Branch to checkout (default: master)" + echo " -o ORIGIN Origin to use (default: github/mila/milabench)" + echo " -c CONFIG Configuration (default: milabench/config/standard.yaml)" + echo " -e ENV Environment (default: ./env)" + echo " -p PYTHON Python version (default: 3.9)" + echo " ARGUMENT Any additional argument you want to process." + exit 1 +} + +ARCH="cuda" +PYTHON="3.9" +BRANCH="master" +ORIGIN="https://github.com/mila-iqia/milabench.git" +CONFIG="$SLURM_TMPDIR/milabench/config/standard.yaml" +BASE="$SLURM_TMPDIR/base" +ENV="./env" +REMAINING_ARGS="" + +while getopts ":hm:p:e:b:o:c:" opt; do + case $opt in + h) + usage + ;; + p) + PYTHON="$OPTARG" + ;; + b) + BRANCH="$OPTARG" + ;; + o) + ORIGIN="$OPTARG" + ;; + c) + CONFIG="$OPTARG" + ;; + e) + ENV="$OPTARG" + ;; + a) + ARCH="$OPTARG" + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + usage + ;; + esac +done + +shift "$((OPTIND-1))" +REMAINING_ARGS="$@" + +echo " PYTHON: $PYTHON" +echo " branch: $BRANCH" +echo " origin: $ORIGIN" +echo " config: $CONFIG" +echo " env: $ENV" +echo " args: $REMAINING_ARGS" +# +# Fix problem with conda saying it is not "init properly" +# +CONDA_EXEC="$(which conda)" +CONDA_BASE=$(dirname $CONDA_EXEC) +source $CONDA_BASE/../etc/profile.d/conda.sh + +if [ -e $HOME/.credentials.env ]; then + source $HOME/.credentials.env +fi + +cd $SLURM_TMPDIR +# +# Create a new environment +# +if [ ! -d "$ENV" ] && [ "$ENV" != "base" ] && [ ! -d "$CONDA_ENVS/$ENV" ]; then + conda create --prefix $ENV python=$PYTHON -y +fi +conda activate $ENV + +export HF_HOME=$BASE/cache +export HF_DATASETS_CACHE=$BASE/cache +export TORCH_HOME=$BASE/cache +export XDG_CACHE_HOME=$BASE/cache +export MILABENCH_GPU_ARCH=$ARCH + +export MILABENCH_DASH=no +export PYTHONUNBUFFERED=1 +export MILABENCH_BASE=$BASE +export MILABENCH_CONFIG=$CONFIG + +# +# Fetch the repo +# +git clone --single-branch --depth 1 -b $BRANCH $ORIGIN +python -m pip install -e ./milabench + +SYSTEM="$SLURM_TMPDIR/system.yaml" + +echo "" +echo "System" +echo "------" + +milabench slurm_system +milabench slurm_system > $SYSTEM + +module load gcc/9.3.0 +module load cuda/11.8