From 08069b3398a29300a5515799925e2f1924a188fe Mon Sep 17 00:00:00 2001 From: Pierre Delaunay Date: Mon, 16 Oct 2023 13:57:55 -0400 Subject: [PATCH 01/10] Add flops benchmark --- benchmarks/flops/benchfile.py | 10 +++ benchmarks/flops/main.py | 109 +++++++++++++++++++++++++++++++ benchmarks/flops/prepare.py | 0 benchmarks/flops/requirements.in | 4 ++ config/base.yaml | 45 +++++++++++++ config/standard.yaml | 16 +++++ 6 files changed, 184 insertions(+) create mode 100644 benchmarks/flops/benchfile.py create mode 100644 benchmarks/flops/main.py create mode 100644 benchmarks/flops/prepare.py create mode 100644 benchmarks/flops/requirements.in diff --git a/benchmarks/flops/benchfile.py b/benchmarks/flops/benchfile.py new file mode 100644 index 000000000..16542b17a --- /dev/null +++ b/benchmarks/flops/benchfile.py @@ -0,0 +1,10 @@ +from milabench.pack import Package + + +class FlopsBenchmarch(Package): + base_requirements = "requirements.in" + prepare_script = "prepare.py" + main_script = "main.py" + + +__pack__ = FlopsBenchmarch diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py new file mode 100644 index 000000000..f7e3265a0 --- /dev/null +++ b/benchmarks/flops/main.py @@ -0,0 +1,109 @@ +from argparse import ArgumentParser +import json +import time +import sys + +import torch + +from voir.smuggle import SmuggleWriter +from voir.instruments.gpu import get_gpu_info +from voir.instruments.utils import Monitor + +KILO = 1e3 +MEGA = 1e6 +GIGA = 1e9 +TERA = 1e12 +EXA = 1e18 + + +def f(N, m=5000000, n=256, unit=TERA, dtype=torch.float32): + torch.cuda.empty_cache() + a = torch.eye(n, dtype=dtype, device="cuda:0") + x = torch.randn((m, n), dtype=dtype, device="cuda:0") + y = torch.zeros_like(x) + + torch.cuda.synchronize() + ts = -time.time() + + for _ in range(N): + # No allocation in main loop using dual-out strategy + y = torch.mm(x, a, out=y) + x = torch.mm(y, a, out=x) + + torch.cuda.synchronize() + ts += time.time() + torch.cuda.empty_cache() + F = N * (2 * m * n * n + 2 * m * n * n) + return F / ts / unit + + + +def setupvoir(): + data_file = SmuggleWriter(sys.stdout) + def log(data): + if data_file is not None: + print(json.dumps(data), file=data_file) + + def monitor_fn(): + data = { + gpu["device"]: { + "memory": [ + gpu["memory"]["used"], + gpu["memory"]["total"], + ], + "load": gpu["utilization"]["compute"], + "temperature": gpu["temperature"], + } + for gpu in get_gpu_info()["gpus"].values() + } + log({"task": "main", "gpudata": data}) + + monitor = Monitor(3, monitor_fn) + monitor.start() + return log, monitor + + + +def main(): + dtypes = { + 'bf16': torch.bfloat16, + 'fp16': torch.float16, + 'fp32': torch.float32, + } + + parser = ArgumentParser() + parser.add_argument('--repeat', type=int, default=100) + parser.add_argument('--m', type=int, default=256) + parser.add_argument('--n', type=int, default=256) + parser.add_argument('--dtype', type=str, default='fp32', choices=dtypes.keys()) + parser.add_argument('--unit', default='TERA') + parser.add_argument('--tf32', action='store_true', default=False) + + args = parser.parse_args() + + torch.backends.cuda.matmul.allow_tf32 = False + if args.tf32: + torch.backends.cuda.matmul.allow_tf32 = True + + log, monitor = setupvoir() + + flops = f( + args.repeat, + args.m, + args.n, + args.unit, + dtypes[args.dtype] + ) + + log({ + "task": "train", + "rate": flops, + "units": "Tflops" + }) + + monitor.stop() + + + + + diff --git a/benchmarks/flops/prepare.py b/benchmarks/flops/prepare.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/flops/requirements.in b/benchmarks/flops/requirements.in new file mode 100644 index 000000000..bfda01636 --- /dev/null +++ b/benchmarks/flops/requirements.in @@ -0,0 +1,4 @@ +torch +torchvision +tqdm +voir>=0.2.9,<0.3 diff --git a/config/base.yaml b/config/base.yaml index d6f5c074d..85b0659c0 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -23,6 +23,16 @@ _torchvision: --no-stdout: true --epochs: 50 +_flops: + inherits: _defaults + definition: ../benchmarks/flops + group: torchvision + install_group: torch + plan: + method: per_gpu + argv: + --repeat: 100 + _hf: inherits: _defaults definition: ../benchmarks/huggingface @@ -86,6 +96,41 @@ _accelerate_opt: use_deepspeed: true num_machines: 1 + +fp16: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: fp16 + +bf16: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: bf16 + +tf32: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: fp32 + --tf32: true + +fp32: + inherits: _flops + + argv: + --m: 8192 + --n: 8192 + --dtype: fp32 + + resnet50: inherits: _torchvision tags: diff --git a/config/standard.yaml b/config/standard.yaml index 809f0a134..b6d3cc752 100644 --- a/config/standard.yaml +++ b/config/standard.yaml @@ -121,6 +121,22 @@ rwkv: enabled: true weight: 1.0 +fp16: + enabled: true + weight: 0.0 + +bf16: + enabled: true + weight: 0.0 + +tf322: + enabled: true + weight: 0.0 + +fp32: + enabled: true + weight: 0.0 + ################## # Disabled tests # ################## From 8e80c847b19b9afd90878f3ca9d691fee5c5aa88 Mon Sep 17 00:00:00 2001 From: Pierre Delaunay Date: Tue, 17 Oct 2023 11:11:37 -0400 Subject: [PATCH 02/10] Add model flops --- benchmarks/flops/main.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py index f7e3265a0..5fb193472 100644 --- a/benchmarks/flops/main.py +++ b/benchmarks/flops/main.py @@ -16,6 +16,39 @@ EXA = 1e18 +def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA): + # Not sure how much thop is correct in its computation + # it says it return MAC but I feel its methods is wrong + from thop import profile + + # MAC: Multiply–accumulate operation + batch = torch.randn(*shape, dtype=dtype, device="cuda:0") + + flops, _ = profile(model, inputs=(batch,)) + + with torch.no_grad(): + # Prepare + torch.cuda.empty_cache() + + batch = batch.cuda() + model = model.to(dtype=dtype, device="cuda:0") + + torch.cuda.synchronize() + + # Start + start = time.time() + + for i in range(repeat): + _ = model(batch) + + torch.cuda.synchronize() + end = time.time() + # -- + + return (flops * repeat) / (end - start) / unit + + + def f(N, m=5000000, n=256, unit=TERA, dtype=torch.float32): torch.cuda.empty_cache() a = torch.eye(n, dtype=dtype, device="cuda:0") From 7199fbe08a3db1c2ea825418ba33b1736700679c Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Thu, 26 Oct 2023 12:50:52 -0400 Subject: [PATCH 03/10] - --- config/standard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/standard.yaml b/config/standard.yaml index b6d3cc752..2b2363934 100644 --- a/config/standard.yaml +++ b/config/standard.yaml @@ -129,7 +129,7 @@ bf16: enabled: true weight: 0.0 -tf322: +tf32: enabled: true weight: 0.0 From eea024931b2426b55ef0e863bb36c937c7690bc6 Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Thu, 26 Oct 2023 13:17:25 -0400 Subject: [PATCH 04/10] Generate pinned dependencies --- .pin/constraints-cuda-torch.txt | 10 +-- .pin/constraints-rocm-torch.txt | 14 ++-- benchmarks/flops/prepare.py | 1 + benchmarks/flops/requirements.cuda.txt | 81 +++++++++++++++++++++++ benchmarks/flops/requirements.in | 1 - benchmarks/flops/requirements.rocm.txt | 90 ++++++++++++++++++++++++++ benchmarks/flops/voirfile.py | 40 ++++++++++++ config/base.yaml | 2 +- 8 files changed, 225 insertions(+), 14 deletions(-) mode change 100644 => 100755 benchmarks/flops/prepare.py create mode 100644 benchmarks/flops/requirements.cuda.txt create mode 100644 benchmarks/flops/requirements.rocm.txt create mode 100644 benchmarks/flops/voirfile.py diff --git a/.pin/constraints-cuda-torch.txt b/.pin/constraints-cuda-torch.txt index bb30508fa..b291d5f1f 100644 --- a/.pin/constraints-cuda-torch.txt +++ b/.pin/constraints-cuda-torch.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=.pin/constraints-cuda-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in +# pip-compile --config=pyproject.toml --output-file=.pin/constraints-cuda-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 @@ -146,7 +146,7 @@ ninja==1.11.1.1 # deepspeed numpy==1.26.1 # via - # -r benchmarks/stargan/requirements.in + # -r benchmarks/rwkv/requirements.in # -r benchmarks/super-slomo/requirements.in # accelerate # datasets @@ -294,7 +294,7 @@ tokenizers==0.14.1 # via transformers torch==2.1.0+cu118 # via - # -r benchmarks/huggingface/requirements.in + # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/super-slomo/requirements.in # accelerate # deepspeed @@ -313,8 +313,8 @@ torchrec==0.5.0+cu118 # via -r benchmarks/dlrm/requirements.in torchvision==0.16.0+cu118 # via + # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/super-slomo/requirements.in - # -r benchmarks/torchvision/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.6.0 @@ -361,7 +361,7 @@ varname==0.10.0 # via giving voir @ git+https://github.com/breuleux/voir.git # via - # -r benchmarks/huggingface/requirements.in + # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/super-slomo/requirements.in websocket-client==1.6.4 # via docker diff --git a/.pin/constraints-rocm-torch.txt b/.pin/constraints-rocm-torch.txt index 484e52a77..4857e582c 100644 --- a/.pin/constraints-rocm-torch.txt +++ b/.pin/constraints-rocm-torch.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=.pin/constraints-rocm-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in +# pip-compile --config=pyproject.toml --output-file=.pin/constraints-rocm-torch.txt --resolver=backtracking .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/rwkv/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in # --extra-index-url https://download.pytorch.org/whl/rocm5.6/ @@ -150,8 +150,8 @@ ninja==1.11.1.1 # deepspeed numpy==1.26.1 # via - # -r benchmarks/dlrm/requirements.in # -r benchmarks/stargan/requirements.in + # -r benchmarks/super-slomo/requirements.in # accelerate # datasets # deepspeed @@ -302,7 +302,7 @@ tokenizers==0.14.1 torch==2.1.0+rocm5.6 # via # -r benchmarks/stargan/requirements.in - # -r benchmarks/timm/requirements.in + # -r benchmarks/super-slomo/requirements.in # accelerate # deepspeed # pytorch-lightning @@ -322,15 +322,15 @@ torchrec==0.5.0 torchvision==0.16.0+rocm5.6 # via # -r benchmarks/stargan/requirements.in - # -r benchmarks/timm/requirements.in + # -r benchmarks/super-slomo/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.6.0 # via -r benchmarks/dlrm/requirements.in tqdm==4.66.1 # via - # -r benchmarks/dlrm/requirements.in - # -r benchmarks/torchvision/requirements.in + # -r benchmarks/flops/requirements.in + # -r benchmarks/super-slomo/requirements.in # datasets # deepspeed # evaluate @@ -368,7 +368,7 @@ varname==0.10.0 voir @ git+https://github.com/breuleux/voir.git # via # -r benchmarks/stargan/requirements.in - # -r benchmarks/timm/requirements.in + # -r benchmarks/super-slomo/requirements.in websocket-client==1.6.4 # via docker werkzeug==3.0.1 diff --git a/benchmarks/flops/prepare.py b/benchmarks/flops/prepare.py old mode 100644 new mode 100755 index e69de29bb..4265cc3e6 --- a/benchmarks/flops/prepare.py +++ b/benchmarks/flops/prepare.py @@ -0,0 +1 @@ +#!/usr/bin/env python diff --git a/benchmarks/flops/requirements.cuda.txt b/benchmarks/flops/requirements.cuda.txt new file mode 100644 index 000000000..a836b4c7f --- /dev/null +++ b/benchmarks/flops/requirements.cuda.txt @@ -0,0 +1,81 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-torchvision.txt benchmarks/flops/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/cu118 + +certifi==2023.7.22 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +charset-normalizer==3.3.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +filelock==3.12.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch + # triton +fsspec==2023.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +idna==3.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests +jinja2==3.1.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +markupsafe==2.1.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # jinja2 +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # sympy +networkx==3.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +numpy==1.26.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +pillow==10.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +requests==2.31.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchvision +sympy==1.12 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +torch==2.1.0+cu118 + # via + # -r benchmarks/flops/requirements.in + # torchvision +torchvision==0.16.0+cu118 + # via -r benchmarks/flops/requirements.in +tqdm==4.66.1 + # via -r benchmarks/flops/requirements.in +triton==2.1.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +typing-extensions==4.8.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torch +urllib3==1.26.18 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # requests diff --git a/benchmarks/flops/requirements.in b/benchmarks/flops/requirements.in index bfda01636..be3d29759 100644 --- a/benchmarks/flops/requirements.in +++ b/benchmarks/flops/requirements.in @@ -1,4 +1,3 @@ torch torchvision tqdm -voir>=0.2.9,<0.3 diff --git a/benchmarks/flops/requirements.rocm.txt b/benchmarks/flops/requirements.rocm.txt new file mode 100644 index 000000000..b680b543a --- /dev/null +++ b/benchmarks/flops/requirements.rocm.txt @@ -0,0 +1,90 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-torchvision.txt benchmarks/flops/requirements.in +# +--extra-index-url https://download.pytorch.org/whl/rocm5.6/ + +certifi==2023.7.22 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +charset-normalizer==3.3.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +cmake==3.27.7 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm +filelock==3.12.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm + # torch +fsspec==2023.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +idna==3.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests +jinja2==3.1.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +lit==17.0.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # pytorch-triton-rocm +markupsafe==2.1.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # jinja2 +mpmath==1.3.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # sympy +networkx==3.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +numpy==1.26.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +pillow==10.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +pytorch-triton-rocm==2.1.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +requests==2.31.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torchvision +sympy==1.12 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +torch==2.1.0+rocm5.6 + # via + # -r benchmarks/flops/requirements.in + # pytorch-triton-rocm + # torchvision +torchvision==0.16.0+rocm5.6 + # via -r benchmarks/flops/requirements.in +tqdm==4.66.1 + # via -r benchmarks/flops/requirements.in +typing-extensions==4.8.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # torch +urllib3==1.26.18 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # requests diff --git a/benchmarks/flops/voirfile.py b/benchmarks/flops/voirfile.py new file mode 100644 index 000000000..0ef0854ac --- /dev/null +++ b/benchmarks/flops/voirfile.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass + +from voir import configurable +from voir.instruments import dash, early_stop, gpu_monitor, log, rate + + +@dataclass +class Config: + """voir configuration""" + + # Whether to display the dash or not + dash: bool = False + + # How often to log the rates + interval: str = "1s" + + # Number of rates to skip before logging + skip: int = 5 + + # Number of rates to log before stopping + stop: int = 1 + + # Number of seconds between each gpu poll + gpu_poll: int = 3 + + +@configurable +def instrument_main(ov, options: Config): + import torch + + yield ov.phases.init + + if options.dash: + ov.require(dash) + + ov.require( + log("value", "progress", "rate", "units", "loss", "gpudata", context="task"), + early_stop(n=options.stop, key="rate", task="train"), + gpu_monitor(poll_interval=options.gpu_poll), + ) diff --git a/config/base.yaml b/config/base.yaml index 60bba3c12..861a607e0 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -26,7 +26,7 @@ _torchvision: _flops: inherits: _defaults definition: ../benchmarks/flops - group: torchvision + group: flops install_group: torch plan: method: per_gpu From ecc19d6897b1da5883a5b45636f66958e21646df Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Tue, 31 Oct 2023 12:46:13 -0400 Subject: [PATCH 05/10] Add repeat & number args --- benchmarks/flops/main.py | 61 +++++++------ benchmarks/flops/voirfile.py | 40 --------- config/base.yaml | 3 +- milabench/dashboard/__init__.py | 0 milabench/dashboard/live_report.py | 0 milabench/dashboard/rawoutput.py | 0 milabench/scripts/milabench_docker.bash | 5 ++ milabench/scripts/milabench_run.bash | 16 +++- milabench/scripts/setup.bash | 111 ++++++++++++++++++++++++ 9 files changed, 164 insertions(+), 72 deletions(-) delete mode 100644 benchmarks/flops/voirfile.py create mode 100644 milabench/dashboard/__init__.py create mode 100644 milabench/dashboard/live_report.py create mode 100644 milabench/dashboard/rawoutput.py create mode 100644 milabench/scripts/milabench_docker.bash create mode 100644 milabench/scripts/setup.bash diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py index 5fb193472..31bac438b 100644 --- a/benchmarks/flops/main.py +++ b/benchmarks/flops/main.py @@ -49,26 +49,31 @@ def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, un -def f(N, m=5000000, n=256, unit=TERA, dtype=torch.float32): +def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32): torch.cuda.empty_cache() a = torch.eye(n, dtype=dtype, device="cuda:0") x = torch.randn((m, n), dtype=dtype, device="cuda:0") y = torch.zeros_like(x) - torch.cuda.synchronize() - ts = -time.time() - - for _ in range(N): - # No allocation in main loop using dual-out strategy - y = torch.mm(x, a, out=y) - x = torch.mm(y, a, out=x) - - torch.cuda.synchronize() - ts += time.time() - torch.cuda.empty_cache() F = N * (2 * m * n * n + 2 * m * n * n) - return F / ts / unit + results = [0 for _ in range(R)] + + for i in range(R): + torch.cuda.synchronize() + ts = -time.time() + + for _ in range(N): + # No allocation in main loop using dual-out strategy + y = torch.mm(x, a, out=y) + x = torch.mm(y, a, out=x) + + torch.cuda.synchronize() + ts += time.time() + + results[i] = F / ts / unit + torch.cuda.empty_cache() + return results def setupvoir(): @@ -106,6 +111,7 @@ def main(): parser = ArgumentParser() parser.add_argument('--repeat', type=int, default=100) + parser.add_argument('--number', type=int, default=100) parser.add_argument('--m', type=int, default=256) parser.add_argument('--n', type=int, default=256) parser.add_argument('--dtype', type=str, default='fp32', choices=dtypes.keys()) @@ -120,20 +126,21 @@ def main(): log, monitor = setupvoir() - flops = f( - args.repeat, - args.m, - args.n, - args.unit, - dtypes[args.dtype] - ) - - log({ - "task": "train", - "rate": flops, - "units": "Tflops" - }) - + for flops in f( + args.number, + args.repeat, + args.m, + args.n, + args.unit, + dtypes[args.dtype] + ): + + log({ + "task": "train", + "rate": flops, + "units": "Tflops" + }) + monitor.stop() diff --git a/benchmarks/flops/voirfile.py b/benchmarks/flops/voirfile.py deleted file mode 100644 index 0ef0854ac..000000000 --- a/benchmarks/flops/voirfile.py +++ /dev/null @@ -1,40 +0,0 @@ -from dataclasses import dataclass - -from voir import configurable -from voir.instruments import dash, early_stop, gpu_monitor, log, rate - - -@dataclass -class Config: - """voir configuration""" - - # Whether to display the dash or not - dash: bool = False - - # How often to log the rates - interval: str = "1s" - - # Number of rates to skip before logging - skip: int = 5 - - # Number of rates to log before stopping - stop: int = 1 - - # Number of seconds between each gpu poll - gpu_poll: int = 3 - - -@configurable -def instrument_main(ov, options: Config): - import torch - - yield ov.phases.init - - if options.dash: - ov.require(dash) - - ov.require( - log("value", "progress", "rate", "units", "loss", "gpudata", context="task"), - early_stop(n=options.stop, key="rate", task="train"), - gpu_monitor(poll_interval=options.gpu_poll), - ) diff --git a/config/base.yaml b/config/base.yaml index 861a607e0..2c69db738 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -31,7 +31,8 @@ _flops: plan: method: per_gpu argv: - --repeat: 100 + --number: 30 + --repeat: 30 _hf: inherits: _defaults diff --git a/milabench/dashboard/__init__.py b/milabench/dashboard/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/dashboard/live_report.py b/milabench/dashboard/live_report.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/dashboard/rawoutput.py b/milabench/dashboard/rawoutput.py new file mode 100644 index 000000000..e69de29bb diff --git a/milabench/scripts/milabench_docker.bash b/milabench/scripts/milabench_docker.bash new file mode 100644 index 000000000..7a9bfcc19 --- /dev/null +++ b/milabench/scripts/milabench_docker.bash @@ -0,0 +1,5 @@ +#!/bin/bash + + +# CPU only + diff --git a/milabench/scripts/milabench_run.bash b/milabench/scripts/milabench_run.bash index cf502cbf4..693a80139 100755 --- a/milabench/scripts/milabench_run.bash +++ b/milabench/scripts/milabench_run.bash @@ -17,11 +17,13 @@ ARCH="cuda" PYTHON="3.9" BRANCH="master" ORIGIN="https://github.com/mila-iqia/milabench.git" -CONFIG="$SLURM_TMPDIR/milabench/config/standard.yaml" -BASE="$SLURM_TMPDIR/base" +LOC="$SLURM_TMPDIR" +CONFIG="$LOC/milabench/config/standard.yaml" +BASE="$LOC/base" ENV="./env" REMAINING_ARGS="" + while getopts ":hm:p:e:b:o:c:" opt; do case $opt in h) @@ -45,6 +47,12 @@ while getopts ":hm:p:e:b:o:c:" opt; do a) ARCH="$OPTARG" ;; + l) + # FIX ME + LOC="$OPTARG" + CONFIG="$LOC/milabench/config/standard.yaml" + BASE="$LOC/base" + ;; :) echo "Option -$OPTARG requires an argument." >&2 usage @@ -72,7 +80,7 @@ if [ -e $HOME/.credentials.env ]; then source $HOME/.credentials.env fi -cd $SLURM_TMPDIR +cd $LOC # # Create a new environment # @@ -97,7 +105,7 @@ export MILABENCH_CONFIG=$CONFIG git clone --single-branch --depth 1 -b $BRANCH $ORIGIN python -m pip install -e ./milabench -SYSTEM="$SLURM_TMPDIR/system.yaml" +SYSTEM="$LOC/system.yaml" echo "" echo "System" diff --git a/milabench/scripts/setup.bash b/milabench/scripts/setup.bash new file mode 100644 index 000000000..dd3e3f496 --- /dev/null +++ b/milabench/scripts/setup.bash @@ -0,0 +1,111 @@ +#!/bin/bash + +function usage() { + echo "Usage: $0 [-m] [-p]" + echo " -h Display this help message." + echo " -b arch GPU arch (default: cuda)" + echo " -b BRANCH Branch to checkout (default: master)" + echo " -o ORIGIN Origin to use (default: github/mila/milabench)" + echo " -c CONFIG Configuration (default: milabench/config/standard.yaml)" + echo " -e ENV Environment (default: ./env)" + echo " -p PYTHON Python version (default: 3.9)" + echo " ARGUMENT Any additional argument you want to process." + exit 1 +} + +ARCH="cuda" +PYTHON="3.9" +BRANCH="master" +ORIGIN="https://github.com/mila-iqia/milabench.git" +CONFIG="$SLURM_TMPDIR/milabench/config/standard.yaml" +BASE="$SLURM_TMPDIR/base" +ENV="./env" +REMAINING_ARGS="" + +while getopts ":hm:p:e:b:o:c:" opt; do + case $opt in + h) + usage + ;; + p) + PYTHON="$OPTARG" + ;; + b) + BRANCH="$OPTARG" + ;; + o) + ORIGIN="$OPTARG" + ;; + c) + CONFIG="$OPTARG" + ;; + e) + ENV="$OPTARG" + ;; + a) + ARCH="$OPTARG" + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + usage + ;; + esac +done + +shift "$((OPTIND-1))" +REMAINING_ARGS="$@" + +echo " PYTHON: $PYTHON" +echo " branch: $BRANCH" +echo " origin: $ORIGIN" +echo " config: $CONFIG" +echo " env: $ENV" +echo " args: $REMAINING_ARGS" +# +# Fix problem with conda saying it is not "init properly" +# +CONDA_EXEC="$(which conda)" +CONDA_BASE=$(dirname $CONDA_EXEC) +source $CONDA_BASE/../etc/profile.d/conda.sh + +if [ -e $HOME/.credentials.env ]; then + source $HOME/.credentials.env +fi + +cd $SLURM_TMPDIR +# +# Create a new environment +# +if [ ! -d "$ENV" ] && [ "$ENV" != "base" ] && [ ! -d "$CONDA_ENVS/$ENV" ]; then + conda create --prefix $ENV python=$PYTHON -y +fi +conda activate $ENV + +export HF_HOME=$BASE/cache +export HF_DATASETS_CACHE=$BASE/cache +export TORCH_HOME=$BASE/cache +export XDG_CACHE_HOME=$BASE/cache +export MILABENCH_GPU_ARCH=$ARCH + +export MILABENCH_DASH=no +export PYTHONUNBUFFERED=1 +export MILABENCH_BASE=$BASE +export MILABENCH_CONFIG=$CONFIG + +# +# Fetch the repo +# +git clone --single-branch --depth 1 -b $BRANCH $ORIGIN +python -m pip install -e ./milabench + +SYSTEM="$SLURM_TMPDIR/system.yaml" + +echo "" +echo "System" +echo "------" + +milabench slurm_system +milabench slurm_system > $SYSTEM + +module load gcc/9.3.0 +module load cuda/11.8 From a23bd123851402b63116293dc7634b231b7e21b4 Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Tue, 31 Oct 2023 13:04:11 -0400 Subject: [PATCH 06/10] Add tag --- config/base.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/config/base.yaml b/config/base.yaml index 2c69db738..4b4ff827e 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -30,6 +30,11 @@ _flops: install_group: torch plan: method: per_gpu + + tags: + - diagnostic + - flops + argv: --number: 30 --repeat: 30 @@ -146,7 +151,7 @@ resnet50: efficientnet_b4: inherits: _torchvision - tags: + tags: - vision - classification From be34e4cdec8b220c114796556817c01f1869cc29 Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Tue, 31 Oct 2023 14:53:03 -0400 Subject: [PATCH 07/10] Add an activator --- benchmarks/flops/benchfile.py | 11 ++++++++++- benchmarks/flops/main.py | 2 ++ milabench/_version.py | 6 +++--- milabench/executors.py | 11 +++++++++++ milabench/schedule.py | 2 ++ 5 files changed, 28 insertions(+), 4 deletions(-) mode change 100644 => 100755 benchmarks/flops/main.py diff --git a/benchmarks/flops/benchfile.py b/benchmarks/flops/benchfile.py index 16542b17a..823f28f27 100644 --- a/benchmarks/flops/benchfile.py +++ b/benchmarks/flops/benchfile.py @@ -5,6 +5,15 @@ class FlopsBenchmarch(Package): base_requirements = "requirements.in" prepare_script = "prepare.py" main_script = "main.py" - + + def build_run_plan(self) -> "execs.Executor": + import milabench.executors as execs + + main = self.dirs.code / self.main_script + pack = execs.PackExecutor(self, *self.argv, lazy=True) + # pack = execs.VoirExecutor(pack, cwd=main.parent) + pack = execs.ActivatorExecutor(pack) + return pack + __pack__ = FlopsBenchmarch diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py old mode 100644 new mode 100755 index 31bac438b..93b020a76 --- a/benchmarks/flops/main.py +++ b/benchmarks/flops/main.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + from argparse import ArgumentParser import json import time diff --git a/milabench/_version.py b/milabench/_version.py index 8fc180822..f6782f930 100644 --- a/milabench/_version.py +++ b/milabench/_version.py @@ -1,5 +1,5 @@ """This file is generated, do not modify""" -__tag__ = "v0.0.6-43-g89f56f6" -__commit__ = "89f56f670db0f22880d057262a320c935c217d77" -__date__ = "2023-10-19 19:29:36 -0400" +__tag__ = "v0.0.6-33-ga23bd12" +__commit__ = "a23bd123851402b63116293dc7634b231b7e21b4" +__date__ = "2023-10-31 13:04:11 -0400" diff --git a/milabench/executors.py b/milabench/executors.py index 41d308919..b109c896b 100644 --- a/milabench/executors.py +++ b/milabench/executors.py @@ -630,6 +630,17 @@ def __init__(self, executor: Executor, gpus: list = None, **kwargs) -> None: super().__init__(*executors, **kwargs) +class ActivatorExecutor(SingleCmdExecutor): + def __init__(self, pack: pack.BasePackage, **kwargs): + super().__init__(pack, **kwargs) + + def _argv(self, **_) -> List: + return [ + f"{self.pack.dirs.code / 'activator'}", + f"{self.pack.dirs.venv}" + ] + + # Accelerate class AccelerateLaunchExecutor(SingleCmdExecutor): """Execute a `BasePackage` with Accelerate diff --git a/milabench/schedule.py b/milabench/schedule.py index c572e0e38..c1b7a8009 100644 --- a/milabench/schedule.py +++ b/milabench/schedule.py @@ -130,6 +130,8 @@ def launch_milabench(args, sbatch_args=None, dry: bool = False, sync: bool = Fal sbatch_script = importlib_resources.files(__name__) / "scripts" / "milabench_run.bash" sbatch_script = str(sbatch_script) + # salloc --gres=gpu:rtx8000:1 --mem=64G --cpus-per-gpu=4 + if sbatch_args is None: sbatch_args = [ "--ntasks=1", From c1da61b1b5098b90270cf4d69bbcdf5f4323d658 Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Tue, 31 Oct 2023 16:08:23 -0400 Subject: [PATCH 08/10] Tweaks --- benchmarks/flops/activator | 7 +++ benchmarks/flops/main.py | 86 ++++++++++++++++++++++---------- benchmarks/flops/requirements.in | 1 + config/base.yaml | 4 +- 4 files changed, 69 insertions(+), 29 deletions(-) create mode 100755 benchmarks/flops/activator diff --git a/benchmarks/flops/activator b/benchmarks/flops/activator new file mode 100755 index 000000000..083c28cb1 --- /dev/null +++ b/benchmarks/flops/activator @@ -0,0 +1,7 @@ +#!/bin/bash + +venv="$1" +shift + +source "$venv"/bin/activate +exec "$@" diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py index 93b020a76..00aacc8e9 100755 --- a/benchmarks/flops/main.py +++ b/benchmarks/flops/main.py @@ -4,6 +4,7 @@ import json import time import sys +import multiprocessing import torch @@ -18,6 +19,32 @@ EXA = 1e18 +def _worker(state, queue, func, delay): + import time + + while state['running']: + queue.put(func()) + time.sleep(delay) + +class Monitor: + def __init__(self, delay, func): + self.manager = multiprocessing.Manager() + self.state = self.manager.dict() + self.state['running'] = True + self.results = multiprocessing.Queue() + self.process = multiprocessing.Process( + target=_worker, + args=(self.state, self.results, func, delay), + ) + + def start(self): + self.process.start() + + def stop(self): + self.state['running'] = False + self.process.join() + + def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, unit=TERA): # Not sure how much thop is correct in its computation # it says it return MAC but I feel its methods is wrong @@ -51,15 +78,14 @@ def modelflops(model: torch.nn.Module, shape, repeat=10, dtype=torch.float32, un -def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32): +def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32, log=None): torch.cuda.empty_cache() a = torch.eye(n, dtype=dtype, device="cuda:0") x = torch.randn((m, n), dtype=dtype, device="cuda:0") y = torch.zeros_like(x) F = N * (2 * m * n * n + 2 * m * n * n) - results = [0 for _ in range(R)] - + for i in range(R): torch.cuda.synchronize() ts = -time.time() @@ -72,17 +98,28 @@ def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32): torch.cuda.synchronize() ts += time.time() - results[i] = F / ts / unit - + if log is not None: + log({ + "task": "train", + "rate": F / ts / unit, + "units": "Tflops" + }) + torch.cuda.empty_cache() - return results def setupvoir(): - data_file = SmuggleWriter(sys.stdout) + # wtf this do + # data_file = SmuggleWriter(sys.stdout) + data_file = sys.stdout + def log(data): if data_file is not None: + data["t"] = time.time() print(json.dumps(data), file=data_file) + + while not monitor.results.empty(): + print(json.dumps(monitor.results.get()), file=data_file) def monitor_fn(): data = { @@ -96,9 +133,9 @@ def monitor_fn(): } for gpu in get_gpu_info()["gpus"].values() } - log({"task": "main", "gpudata": data}) + return {"task": "main", "gpudata": data, "t": time.time()} - monitor = Monitor(3, monitor_fn) + monitor = Monitor(0.5, monitor_fn) monitor.start() return log, monitor @@ -117,35 +154,30 @@ def main(): parser.add_argument('--m', type=int, default=256) parser.add_argument('--n', type=int, default=256) parser.add_argument('--dtype', type=str, default='fp32', choices=dtypes.keys()) - parser.add_argument('--unit', default='TERA') parser.add_argument('--tf32', action='store_true', default=False) args = parser.parse_args() - + torch.backends.cuda.matmul.allow_tf32 = False if args.tf32: torch.backends.cuda.matmul.allow_tf32 = True log, monitor = setupvoir() - for flops in f( - args.number, - args.repeat, - args.m, - args.n, - args.unit, - dtypes[args.dtype] - ): - - log({ - "task": "train", - "rate": flops, - "units": "Tflops" - }) - + f( + args.number, + args.repeat, + args.m, + args.n, + TERA, + dtypes[args.dtype], + log + ) + monitor.stop() - +if __name__ == "__main__": + main() diff --git a/benchmarks/flops/requirements.in b/benchmarks/flops/requirements.in index be3d29759..7d30d94e7 100644 --- a/benchmarks/flops/requirements.in +++ b/benchmarks/flops/requirements.in @@ -1,3 +1,4 @@ torch torchvision tqdm +voir diff --git a/config/base.yaml b/config/base.yaml index 4b4ff827e..9947b97b2 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -36,8 +36,8 @@ _flops: - flops argv: - --number: 30 - --repeat: 30 + --number: 10 + --repeat: 90 _hf: inherits: _defaults From 5131c2c3a302b99f7d2fe425f0748c98f2b7d625 Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Tue, 31 Oct 2023 18:27:21 -0400 Subject: [PATCH 09/10] Working --- benchmarks/flops/benchfile.py | 2 +- benchmarks/flops/main.py | 5 +++-- benchmarks/flops/requirements.cuda.txt | 2 ++ benchmarks/flops/requirements.rocm.txt | 2 ++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/benchmarks/flops/benchfile.py b/benchmarks/flops/benchfile.py index 823f28f27..b00415f0f 100644 --- a/benchmarks/flops/benchfile.py +++ b/benchmarks/flops/benchfile.py @@ -12,7 +12,7 @@ def build_run_plan(self) -> "execs.Executor": main = self.dirs.code / self.main_script pack = execs.PackExecutor(self, *self.argv, lazy=True) # pack = execs.VoirExecutor(pack, cwd=main.parent) - pack = execs.ActivatorExecutor(pack) + pack = execs.ActivatorExecutor(pack, use_stdout=True) return pack diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py index 00aacc8e9..d72bf7186 100755 --- a/benchmarks/flops/main.py +++ b/benchmarks/flops/main.py @@ -110,8 +110,8 @@ def f(N, R=30, m=5000000, n=256, unit=TERA, dtype=torch.float32, log=None): def setupvoir(): # wtf this do - # data_file = SmuggleWriter(sys.stdout) - data_file = sys.stdout + data_file = SmuggleWriter(sys.stdout) + # data_file = sys.stdout def log(data): if data_file is not None: @@ -130,6 +130,7 @@ def monitor_fn(): ], "load": gpu["utilization"]["compute"], "temperature": gpu["temperature"], + "power": gpu["power"] } for gpu in get_gpu_info()["gpus"].values() } diff --git a/benchmarks/flops/requirements.cuda.txt b/benchmarks/flops/requirements.cuda.txt index a836b4c7f..d88213bfe 100644 --- a/benchmarks/flops/requirements.cuda.txt +++ b/benchmarks/flops/requirements.cuda.txt @@ -79,3 +79,5 @@ urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests +voir @ git+https://github.com/breuleux/voir.git + # via -r benchmarks/accelerate_opt/requirements.in \ No newline at end of file diff --git a/benchmarks/flops/requirements.rocm.txt b/benchmarks/flops/requirements.rocm.txt index b680b543a..725210584 100644 --- a/benchmarks/flops/requirements.rocm.txt +++ b/benchmarks/flops/requirements.rocm.txt @@ -88,3 +88,5 @@ urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests +voir @ git+https://github.com/breuleux/voir.git + # via -r benchmarks/accelerate_opt/requirements.in \ No newline at end of file From 848c3c3a7987bf5e1b3b4de9888c4ea003d4b2a4 Mon Sep 17 00:00:00 2001 From: "pierre.delaunay" Date: Wed, 1 Nov 2023 13:08:51 -0400 Subject: [PATCH 10/10] Update voir --- .gitignore | 2 + .pin/constraints-cuda-torch.txt | 31 ++++--- .pin/constraints-rocm-torch.txt | 43 +++++----- .../accelerate_opt/requirements.cuda.txt | 14 ++-- .../accelerate_opt/requirements.rocm.txt | 16 ++-- benchmarks/dlrm/requirements.cuda.txt | 17 ++-- benchmarks/dlrm/requirements.rocm.txt | 19 ++--- benchmarks/flops/requirements.cuda.txt | 82 ++++++++++++++++-- benchmarks/flops/requirements.rocm.txt | 84 +++++++++++++++++-- benchmarks/huggingface/requirements.cuda.txt | 8 +- benchmarks/huggingface/requirements.rocm.txt | 10 +-- benchmarks/rwkv/requirements.cuda.txt | 8 +- benchmarks/rwkv/requirements.rocm.txt | 10 +-- benchmarks/stargan/requirements.cuda.txt | 8 +- benchmarks/stargan/requirements.rocm.txt | 10 +-- benchmarks/super-slomo/requirements.cuda.txt | 8 +- benchmarks/super-slomo/requirements.rocm.txt | 10 +-- benchmarks/timm/requirements.cuda.txt | 8 +- benchmarks/timm/requirements.rocm.txt | 10 +-- benchmarks/torchvision/requirements.cuda.txt | 8 +- benchmarks/torchvision/requirements.rocm.txt | 10 +-- config/base.yaml | 5 +- constraints/cuda.txt | 3 +- constraints/rocm.txt | 2 +- milabench/schedule.py | 8 +- 25 files changed, 288 insertions(+), 146 deletions(-) diff --git a/.gitignore b/.gitignore index 8e6de4a30..18dafb9c6 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ sqlite.db .no_report trash/ +workspace/ +slurm-* diff --git a/.pin/constraints-cuda-torch.txt b/.pin/constraints-cuda-torch.txt index b291d5f1f..f52d2aa9d 100644 --- a/.pin/constraints-cuda-torch.txt +++ b/.pin/constraints-cuda-torch.txt @@ -8,7 +8,7 @@ absl-py==2.0.0 # via tensorboard -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -28,7 +28,7 @@ cachetools==5.3.2 # via google-auth certifi==2023.7.22 # via requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # aiohttp # requests @@ -57,7 +57,7 @@ executing==1.2.0 # via varname fbgemm-gpu==0.5.0+cu118 # via torchrec -filelock==3.12.4 +filelock==3.13.1 # via # huggingface-hub # torch @@ -82,7 +82,7 @@ giving==0.4.2 # via # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # google-auth-oauthlib # tensorboard @@ -90,7 +90,7 @@ google-auth-oauthlib==1.1.0 # via tensorboard graphviz==0.20.1 # via torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via tensorboard hjson==3.1.0 # via deepspeed @@ -116,7 +116,7 @@ lightning-utilities==0.9.0 # via # pytorch-lightning # torchmetrics -markdown==3.5 +markdown==3.5.1 # via tensorboard markdown-it-py==3.0.0 # via rich @@ -138,7 +138,7 @@ multiprocess==0.70.15 # evaluate mypy-extensions==1.0.0 # via typing-inspect -networkx==3.2 +networkx==3.2.1 # via torch ninja==1.11.1.1 # via @@ -146,7 +146,7 @@ ninja==1.11.1.1 # deepspeed numpy==1.26.1 # via - # -r benchmarks/rwkv/requirements.in + # -r benchmarks/stargan/requirements.in # -r benchmarks/super-slomo/requirements.in # accelerate # datasets @@ -167,7 +167,7 @@ oauthlib==3.2.2 # via requests-oauthlib omegaconf==2.3.0 # via voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in opencv-python==4.8.1.78 # via -r benchmarks/super-slomo/requirements.in @@ -185,7 +185,7 @@ packaging==23.2 # pytorch-lightning # torchmetrics # transformers -pandas==2.1.1 +pandas==2.1.2 # via # datasets # evaluate @@ -203,7 +203,7 @@ ptera==1.4.1 # via voir py-cpuinfo==9.0.0 # via deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via datasets pyasn1==0.5.0 # via @@ -294,7 +294,7 @@ tokenizers==0.14.1 # via transformers torch==2.1.0+cu118 # via - # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/stargan/requirements.in # -r benchmarks/super-slomo/requirements.in # accelerate # deepspeed @@ -313,7 +313,7 @@ torchrec==0.5.0+cu118 # via -r benchmarks/dlrm/requirements.in torchvision==0.16.0+cu118 # via - # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/stargan/requirements.in # -r benchmarks/super-slomo/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in @@ -340,7 +340,6 @@ typing-extensions==4.8.0 # via # huggingface-hub # lightning-utilities - # onnx # pydantic # pyre-extensions # pytorch-lightning @@ -359,9 +358,9 @@ urllib3==1.26.18 # torchx varname==0.10.0 # via giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via - # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/stargan/requirements.in # -r benchmarks/super-slomo/requirements.in websocket-client==1.6.4 # via docker diff --git a/.pin/constraints-rocm-torch.txt b/.pin/constraints-rocm-torch.txt index 4857e582c..c50a448fe 100644 --- a/.pin/constraints-rocm-torch.txt +++ b/.pin/constraints-rocm-torch.txt @@ -8,7 +8,7 @@ absl-py==2.0.0 # via tensorboard -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -28,7 +28,7 @@ cachetools==5.3.2 # via google-auth certifi==2023.7.22 # via requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # aiohttp # requests @@ -59,7 +59,7 @@ executing==1.2.0 # via varname fbgemm-gpu==0.5.0 # via torchrec -filelock==3.12.4 +filelock==3.13.1 # via # huggingface-hub # pytorch-triton-rocm @@ -84,7 +84,7 @@ giving==0.4.2 # via # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # google-auth-oauthlib # tensorboard @@ -92,7 +92,7 @@ google-auth-oauthlib==1.1.0 # via tensorboard graphviz==0.20.1 # via torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via tensorboard hjson==3.1.0 # via deepspeed @@ -118,9 +118,9 @@ lightning-utilities==0.9.0 # via # pytorch-lightning # torchmetrics -lit==17.0.3 +lit==17.0.4 # via pytorch-triton-rocm -markdown==3.5 +markdown==3.5.1 # via tensorboard markdown-it-py==3.0.0 # via rich @@ -142,7 +142,7 @@ multiprocess==0.70.15 # evaluate mypy-extensions==1.0.0 # via typing-inspect -networkx==3.2 +networkx==3.2.1 # via torch ninja==1.11.1.1 # via @@ -150,8 +150,8 @@ ninja==1.11.1.1 # deepspeed numpy==1.26.1 # via - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/dlrm/requirements.in + # -r benchmarks/rwkv/requirements.in # accelerate # datasets # deepspeed @@ -172,7 +172,7 @@ oauthlib==3.2.2 # via requests-oauthlib omegaconf==2.3.0 # via voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in opencv-python==4.8.1.78 # via -r benchmarks/super-slomo/requirements.in @@ -190,7 +190,7 @@ packaging==23.2 # pytorch-lightning # torchmetrics # transformers -pandas==2.1.1 +pandas==2.1.2 # via # datasets # evaluate @@ -208,7 +208,7 @@ ptera==1.4.1 # via voir py-cpuinfo==9.0.0 # via deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via datasets pyasn1==0.5.0 # via @@ -301,8 +301,8 @@ tokenizers==0.14.1 # via transformers torch==2.1.0+rocm5.6 # via - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/flops/requirements.in # accelerate # deepspeed # pytorch-lightning @@ -321,16 +321,16 @@ torchrec==0.5.0 # via -r benchmarks/dlrm/requirements.in torchvision==0.16.0+rocm5.6 # via - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/flops/requirements.in torchviz==0.0.2 # via -r benchmarks/dlrm/requirements.in torchx==0.6.0 # via -r benchmarks/dlrm/requirements.in tqdm==4.66.1 # via + # -r benchmarks/dlrm/requirements.in # -r benchmarks/flops/requirements.in - # -r benchmarks/super-slomo/requirements.in # datasets # deepspeed # evaluate @@ -346,7 +346,6 @@ typing-extensions==4.8.0 # via # huggingface-hub # lightning-utilities - # onnx # pydantic # pyre-extensions # pytorch-lightning @@ -365,10 +364,10 @@ urllib3==1.26.18 # torchx varname==0.10.0 # via giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via - # -r benchmarks/stargan/requirements.in - # -r benchmarks/super-slomo/requirements.in + # -r benchmarks/accelerate_opt/requirements.in + # -r benchmarks/flops/requirements.in websocket-client==1.6.4 # via docker werkzeug==3.0.1 diff --git a/benchmarks/accelerate_opt/requirements.cuda.txt b/benchmarks/accelerate_opt/requirements.cuda.txt index 563d7a85b..552fcc115 100644 --- a/benchmarks/accelerate_opt/requirements.cuda.txt +++ b/benchmarks/accelerate_opt/requirements.cuda.txt @@ -6,7 +6,7 @@ # --extra-index-url https://download.pytorch.org/whl/cu118 -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -37,7 +37,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -64,7 +64,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -135,7 +135,7 @@ multiprocess==0.70.15 # -c .pin/../.pin/constraints-cuda-torch.txt # datasets # evaluate -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -171,7 +171,7 @@ packaging==23.2 # evaluate # huggingface-hub # transformers -pandas==2.1.1 +pandas==2.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets @@ -193,7 +193,7 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # datasets @@ -313,7 +313,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/accelerate_opt/requirements.in xxhash==3.4.1 # via diff --git a/benchmarks/accelerate_opt/requirements.rocm.txt b/benchmarks/accelerate_opt/requirements.rocm.txt index c6c6f501d..8fc0ca376 100644 --- a/benchmarks/accelerate_opt/requirements.rocm.txt +++ b/benchmarks/accelerate_opt/requirements.rocm.txt @@ -6,7 +6,7 @@ # --extra-index-url https://download.pytorch.org/whl/rocm5.6/ -accelerate==0.24.0 +accelerate==0.24.1 # via -r benchmarks/accelerate_opt/requirements.in aiohttp==3.8.6 # via @@ -37,7 +37,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp @@ -68,7 +68,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -113,7 +113,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -143,7 +143,7 @@ multiprocess==0.70.15 # -c .pin/../.pin/constraints-rocm-torch.txt # datasets # evaluate -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -179,7 +179,7 @@ packaging==23.2 # evaluate # huggingface-hub # transformers -pandas==2.1.1 +pandas==2.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets @@ -201,7 +201,7 @@ py-cpuinfo==9.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # deepspeed -pyarrow==13.0.0 +pyarrow==14.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # datasets @@ -322,7 +322,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/accelerate_opt/requirements.in xxhash==3.4.1 # via diff --git a/benchmarks/dlrm/requirements.cuda.txt b/benchmarks/dlrm/requirements.cuda.txt index 438da3a4d..8e1993d6a 100644 --- a/benchmarks/dlrm/requirements.cuda.txt +++ b/benchmarks/dlrm/requirements.cuda.txt @@ -26,7 +26,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -50,7 +50,7 @@ fbgemm-gpu==0.5.0+cu118 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchrec -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ giving==0.4.2 # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt # google-auth-oauthlib @@ -81,7 +81,7 @@ graphviz==0.20.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -105,7 +105,7 @@ lightning-utilities==0.9.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchmetrics -markdown==3.5 +markdown==3.5.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # tensorboard @@ -130,7 +130,7 @@ mypy-extensions==1.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # typing-inspect -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -150,7 +150,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in ovld==0.3.2 # via @@ -281,7 +281,6 @@ typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # lightning-utilities - # onnx # pyre-extensions # reactivex # torch @@ -300,7 +299,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/dlrm/requirements.in websocket-client==1.6.4 # via diff --git a/benchmarks/dlrm/requirements.rocm.txt b/benchmarks/dlrm/requirements.rocm.txt index 4ce758a16..fc2a93ad0 100644 --- a/benchmarks/dlrm/requirements.rocm.txt +++ b/benchmarks/dlrm/requirements.rocm.txt @@ -26,7 +26,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -54,7 +54,7 @@ fbgemm-gpu==0.5.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchrec -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -72,7 +72,7 @@ giving==0.4.2 # -c .pin/../.pin/constraints-rocm-torch.txt # ptera # voir -google-auth==2.23.3 +google-auth==2.23.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # google-auth-oauthlib @@ -85,7 +85,7 @@ graphviz==0.20.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchviz -grpcio==1.59.0 +grpcio==1.59.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard @@ -109,11 +109,11 @@ lightning-utilities==0.9.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchmetrics -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -markdown==3.5 +markdown==3.5.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # tensorboard @@ -138,7 +138,7 @@ mypy-extensions==1.0.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # typing-inspect -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -159,7 +159,7 @@ omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # voir -onnx==1.14.1 +onnx==1.15.0 # via -r benchmarks/dlrm/requirements.in ovld==0.3.2 # via @@ -291,7 +291,6 @@ typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # lightning-utilities - # onnx # pyre-extensions # reactivex # torch @@ -310,7 +309,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/dlrm/requirements.in websocket-client==1.6.4 # via diff --git a/benchmarks/flops/requirements.cuda.txt b/benchmarks/flops/requirements.cuda.txt index d88213bfe..65c830e7a 100644 --- a/benchmarks/flops/requirements.cuda.txt +++ b/benchmarks/flops/requirements.cuda.txt @@ -2,19 +2,35 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-torchvision.txt benchmarks/flops/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.cuda.txt --resolver=backtracking .pin/tmp-constraints-cuda-flops.txt benchmarks/flops/requirements.in # --extra-index-url https://download.pytorch.org/whl/cu118 +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -filelock==3.12.4 +codefind==0.1.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera +executing==1.2.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # varname +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -23,6 +39,11 @@ fsspec==2023.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch +giving==0.4.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # ptera + # voir idna==3.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -31,15 +52,23 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # markdown-it-py mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -47,14 +76,50 @@ numpy==1.26.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +ovld==0.3.2 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir pillow==10.1.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pygments==2.16.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # rich +pynvml==11.5.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +pyyaml==6.0.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving requests==2.31.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torchvision +rich==13.6.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # asttokens sympy==1.12 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -74,10 +139,15 @@ triton==2.1.0 typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt + # reactivex # torch urllib3==1.26.18 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -voir @ git+https://github.com/breuleux/voir.git - # via -r benchmarks/accelerate_opt/requirements.in \ No newline at end of file +varname==0.10.0 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # giving +voir==0.2.11 + # via -r benchmarks/flops/requirements.in diff --git a/benchmarks/flops/requirements.rocm.txt b/benchmarks/flops/requirements.rocm.txt index 725210584..86e259787 100644 --- a/benchmarks/flops/requirements.rocm.txt +++ b/benchmarks/flops/requirements.rocm.txt @@ -2,15 +2,23 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-torchvision.txt benchmarks/flops/requirements.in +# pip-compile --config=pyproject.toml --output-file=benchmarks/flops/requirements.rocm.txt --resolver=backtracking .pin/tmp-constraints-rocm-flops.txt benchmarks/flops/requirements.in # --extra-index-url https://download.pytorch.org/whl/rocm5.6/ +antlr4-python3-runtime==4.9.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # omegaconf +asttokens==2.4.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -18,7 +26,15 @@ cmake==3.27.7 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm -filelock==3.12.4 +codefind==0.1.3 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # ptera +executing==1.2.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # varname +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -27,6 +43,11 @@ fsspec==2023.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch +giving==0.4.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # ptera + # voir idna==3.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -35,19 +56,27 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm +markdown-it-py==3.0.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # rich markupsafe==2.1.3 # via # -c .pin/../.pin/constraints-rocm-torch.txt # jinja2 +mdurl==0.1.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # markdown-it-py mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -55,18 +84,54 @@ numpy==1.26.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision +omegaconf==2.3.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +ovld==0.3.2 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir pillow==10.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision +ptera==1.4.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +pygments==2.16.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # rich +pynvml==11.5.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir pytorch-triton-rocm==2.1.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch +pyyaml==6.0.1 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # omegaconf +reactivex==4.0.4 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving requests==2.31.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torchvision +rich==13.6.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # voir +six==1.16.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # asttokens sympy==1.12 # via # -c .pin/../.pin/constraints-rocm-torch.txt @@ -83,10 +148,15 @@ tqdm==4.66.1 typing-extensions==4.8.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt + # reactivex # torch urllib3==1.26.18 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -voir @ git+https://github.com/breuleux/voir.git - # via -r benchmarks/accelerate_opt/requirements.in \ No newline at end of file +varname==0.10.0 + # via + # -c .pin/../.pin/constraints-rocm-torch.txt + # giving +voir==0.2.11 + # via -r benchmarks/flops/requirements.in diff --git a/benchmarks/huggingface/requirements.cuda.txt b/benchmarks/huggingface/requirements.cuda.txt index ce100130c..d4c17b767 100644 --- a/benchmarks/huggingface/requirements.cuda.txt +++ b/benchmarks/huggingface/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -175,5 +175,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/huggingface/requirements.in diff --git a/benchmarks/huggingface/requirements.rocm.txt b/benchmarks/huggingface/requirements.rocm.txt index 8c1366c32..963defffa 100644 --- a/benchmarks/huggingface/requirements.rocm.txt +++ b/benchmarks/huggingface/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -64,7 +64,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -84,7 +84,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -185,5 +185,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/huggingface/requirements.in diff --git a/benchmarks/rwkv/requirements.cuda.txt b/benchmarks/rwkv/requirements.cuda.txt index 13e39de84..b32c5f2f4 100644 --- a/benchmarks/rwkv/requirements.cuda.txt +++ b/benchmarks/rwkv/requirements.cuda.txt @@ -34,7 +34,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp @@ -49,7 +49,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -108,7 +108,7 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-cuda-torch.txt # aiohttp # yarl -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -223,7 +223,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/rwkv/requirements.in yarl==1.9.2 # via diff --git a/benchmarks/rwkv/requirements.rocm.txt b/benchmarks/rwkv/requirements.rocm.txt index 1e7449465..bf060a8a4 100644 --- a/benchmarks/rwkv/requirements.rocm.txt +++ b/benchmarks/rwkv/requirements.rocm.txt @@ -34,7 +34,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp @@ -53,7 +53,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -91,7 +91,7 @@ lightning-utilities==0.9.0 # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-lightning # torchmetrics -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -116,7 +116,7 @@ multidict==6.0.4 # -c .pin/../.pin/constraints-rocm-torch.txt # aiohttp # yarl -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -232,7 +232,7 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/rwkv/requirements.in yarl==1.9.2 # via diff --git a/benchmarks/stargan/requirements.cuda.txt b/benchmarks/stargan/requirements.cuda.txt index 435795cc8..23572d865 100644 --- a/benchmarks/stargan/requirements.cuda.txt +++ b/benchmarks/stargan/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -147,5 +147,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/stargan/requirements.in diff --git a/benchmarks/stargan/requirements.rocm.txt b/benchmarks/stargan/requirements.rocm.txt index bbea36f98..1e0f5eccf 100644 --- a/benchmarks/stargan/requirements.rocm.txt +++ b/benchmarks/stargan/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -56,7 +56,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -156,5 +156,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/stargan/requirements.in diff --git a/benchmarks/super-slomo/requirements.cuda.txt b/benchmarks/super-slomo/requirements.cuda.txt index c4c93813e..657aa0053 100644 --- a/benchmarks/super-slomo/requirements.cuda.txt +++ b/benchmarks/super-slomo/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -152,5 +152,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/super-slomo/requirements.in diff --git a/benchmarks/super-slomo/requirements.rocm.txt b/benchmarks/super-slomo/requirements.rocm.txt index 30b2bd53c..230461051 100644 --- a/benchmarks/super-slomo/requirements.rocm.txt +++ b/benchmarks/super-slomo/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -56,7 +56,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -161,5 +161,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/super-slomo/requirements.in diff --git a/benchmarks/timm/requirements.cuda.txt b/benchmarks/timm/requirements.cuda.txt index 7a66f12e8..619a16aba 100644 --- a/benchmarks/timm/requirements.cuda.txt +++ b/benchmarks/timm/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # huggingface-hub @@ -72,7 +72,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -164,5 +164,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/timm/requirements.in diff --git a/benchmarks/timm/requirements.rocm.txt b/benchmarks/timm/requirements.rocm.txt index e53283a33..6c09c25b0 100644 --- a/benchmarks/timm/requirements.rocm.txt +++ b/benchmarks/timm/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # huggingface-hub @@ -60,7 +60,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -80,7 +80,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -173,5 +173,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/timm/requirements.in diff --git a/benchmarks/torchvision/requirements.cuda.txt b/benchmarks/torchvision/requirements.cuda.txt index bb149eec3..e9740262c 100644 --- a/benchmarks/torchvision/requirements.cuda.txt +++ b/benchmarks/torchvision/requirements.cuda.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # requests @@ -30,7 +30,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -68,7 +68,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch @@ -149,5 +149,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/torchvision/requirements.in diff --git a/benchmarks/torchvision/requirements.rocm.txt b/benchmarks/torchvision/requirements.rocm.txt index 58fbe4cd8..40a8ade9b 100644 --- a/benchmarks/torchvision/requirements.rocm.txt +++ b/benchmarks/torchvision/requirements.rocm.txt @@ -18,7 +18,7 @@ certifi==2023.7.22 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests -charset-normalizer==3.3.1 +charset-normalizer==3.3.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # requests @@ -34,7 +34,7 @@ executing==1.2.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # varname -filelock==3.12.4 +filelock==3.13.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -56,7 +56,7 @@ jinja2==3.1.2 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch -lit==17.0.3 +lit==17.0.4 # via # -c .pin/../.pin/constraints-rocm-torch.txt # pytorch-triton-rocm @@ -76,7 +76,7 @@ mpmath==1.3.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # sympy -networkx==3.2 +networkx==3.2.1 # via # -c .pin/../.pin/constraints-rocm-torch.txt # torch @@ -158,5 +158,5 @@ varname==0.10.0 # via # -c .pin/../.pin/constraints-rocm-torch.txt # giving -voir @ git+https://github.com/breuleux/voir.git +voir==0.2.11 # via -r benchmarks/torchvision/requirements.in diff --git a/config/base.yaml b/config/base.yaml index 9947b97b2..40595c674 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -105,12 +105,15 @@ _accelerate_opt: fp16: inherits: _flops - + argv: + --number: 30 + --repeat: 90 --m: 8192 --n: 8192 --dtype: fp16 + bf16: inherits: _flops diff --git a/constraints/cuda.txt b/constraints/cuda.txt index c21d70b5e..cb2bbd770 100644 --- a/constraints/cuda.txt +++ b/constraints/cuda.txt @@ -1,2 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -voir @ git+https://github.com/breuleux/voir.git +voir > 0.2.10 + diff --git a/constraints/rocm.txt b/constraints/rocm.txt index 07a8feac5..9b46f6813 100644 --- a/constraints/rocm.txt +++ b/constraints/rocm.txt @@ -1,2 +1,2 @@ --extra-index-url https://download.pytorch.org/whl/rocm5.6/ -voir @ git+https://github.com/breuleux/voir.git +voir > 0.2.10 \ No newline at end of file diff --git a/milabench/schedule.py b/milabench/schedule.py index c1b7a8009..21f4b25ed 100644 --- a/milabench/schedule.py +++ b/milabench/schedule.py @@ -23,10 +23,10 @@ def println(line): ) as process: def readoutput(): process.stdout.flush() - line = process.stdout.readline() + for line in process.stdout.readline(): - if callback: - callback(line) + if callback: + callback(line) try: while process.poll() is None: @@ -135,7 +135,7 @@ def launch_milabench(args, sbatch_args=None, dry: bool = False, sync: bool = Fal if sbatch_args is None: sbatch_args = [ "--ntasks=1", - "--gpus-per-task=rtx8000:1", + "--gpus-per-task=4g.40gb:1", "--cpus-per-task=4", "--time=01:30:00", "--ntasks-per-node=1",