diff --git a/.pin/constraints-cuda-torch.txt b/.pin/constraints-cuda-torch.txt index 8efdeccc2..15343ce73 100644 --- a/.pin/constraints-cuda-torch.txt +++ b/.pin/constraints-cuda-torch.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=.pin/constraints-cuda-torch.txt .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/brax/requirements.in benchmarks/diffusion/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/lightning/requirements.in benchmarks/llama/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in benchmarks/torchvision_ddp/requirements.in +# pip-compile --output-file=.pin/constraints-cuda-torch.txt .pin/tmp-constraints.txt benchmarks/accelerate_opt/requirements.in benchmarks/brax/requirements.in benchmarks/diffusion/requirements.in benchmarks/dinov2/requirements.in benchmarks/dlrm/requirements.in benchmarks/flops/requirements.in benchmarks/huggingface/requirements.in benchmarks/lightning/requirements.in benchmarks/llama/requirements.in benchmarks/stargan/requirements.in benchmarks/super-slomo/requirements.in benchmarks/timm/requirements.in benchmarks/torchvision/requirements.in benchmarks/torchvision_ddp/requirements.in # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 @@ -58,7 +58,9 @@ chex==0.1.86 click==8.1.7 # via flask cloudpickle==3.0.0 - # via gym + # via + # gym + # submitit codefind==0.1.6 # via ptera contextlib2==21.6.0 @@ -137,6 +139,8 @@ fsspec[http]==2024.5.0 # torchx future==1.0.0 # via -r benchmarks/dlrm/requirements.in +fvcore==0.1.5.post20221221 + # via -r benchmarks/dinov2/requirements.in gdown==5.2.0 # via -r benchmarks/stargan/requirements.in giving==0.4.2 @@ -181,6 +185,10 @@ importlib-resources==6.4.0 # argklass # etils # torchcompat +iopath==0.1.10 + # via + # -r benchmarks/dinov2/requirements.in + # fvcore itsdangerous==2.2.0 # via flask jax[cuda12]==0.4.28 @@ -283,6 +291,7 @@ numpy==1.26.4 # fairscale # fbgemm-gpu # flax + # fvcore # gym # jax # jaxlib @@ -307,6 +316,7 @@ numpy==1.26.4 # torchvision # transformers # trimesh + # xformers nvidia-cublas-cu12==12.1.3.1 # via # jax @@ -358,7 +368,9 @@ nvidia-nvjitlink-cu12==12.5.82 nvidia-nvtx-cu12==12.1.105 # via torch omegaconf==2.3.0 - # via voir + # via + # -r benchmarks/dinov2/requirements.in + # voir onnx==1.16.1 # via -r benchmarks/dlrm/requirements.in opencv-python==4.10.0.84 @@ -397,7 +409,10 @@ pillow==10.4.0 # -r benchmarks/huggingface/requirements.in # brax # diffusers + # fvcore # torchvision +portalocker==2.10.1 + # via iopath protobuf==4.25.3 # via # onnx @@ -449,6 +464,7 @@ pyyaml==6.0.1 # accelerate # datasets # flax + # fvcore # huggingface-hub # lightning # ml-collections @@ -457,6 +473,7 @@ pyyaml==6.0.1 # pytorch-lightning # torchx # transformers + # yacs reactivex==4.0.4 # via giving regex==2024.5.15 @@ -487,6 +504,7 @@ scikit-learn==1.5.1 # via -r benchmarks/dlrm/requirements.in scipy==1.14.0 # via + # -r benchmarks/dinov2/requirements.in # brax # jax # jaxlib @@ -504,10 +522,14 @@ six==1.16.0 # tensorboard soupsieve==2.5 # via beautifulsoup4 +submitit==1.5.1 + # via -r benchmarks/dinov2/requirements.in sympy==1.13.1 # via torch tabulate==0.9.0 - # via torchx + # via + # fvcore + # torchx tensorboard==2.17.0 # via -r benchmarks/dlrm/requirements.in tensorboard-data-server==0.7.2 @@ -519,7 +541,9 @@ tensorstore==0.1.63 # flax # orbax-checkpoint termcolor==2.4.0 - # via fire + # via + # fire + # fvcore threadpoolctl==3.5.0 # via scikit-learn tokenizers==0.19.1 @@ -530,6 +554,7 @@ torch==2.3.1+cu121 # via # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/brax/requirements.in + # -r benchmarks/dinov2/requirements.in # -r benchmarks/dlrm/requirements.in # -r benchmarks/flops/requirements.in # -r benchmarks/huggingface/requirements.in @@ -550,6 +575,7 @@ torch==2.3.1+cu121 # torchmetrics # torchvision # torchviz + # xformers torchaudio==2.3.1+cu121 # via -r benchmarks/accelerate_opt/requirements.in torchcompat==1.1.4 @@ -561,6 +587,7 @@ torchcompat==1.1.4 # -r benchmarks/torchvision_ddp/requirements.in torchmetrics==1.0.3 # via + # -r benchmarks/dinov2/requirements.in # lightning # pytorch-lightning # torchrec @@ -570,6 +597,7 @@ torchvision==0.18.1+cu121 # via # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/diffusion/requirements.in + # -r benchmarks/dinov2/requirements.in # -r benchmarks/flops/requirements.in # -r benchmarks/lightning/requirements.in # -r benchmarks/stargan/requirements.in @@ -592,8 +620,10 @@ tqdm==4.66.4 # datasets # deepspeed # evaluate + # fvcore # gdown # huggingface-hub + # iopath # lightning # pytorch-lightning # torchrec @@ -617,6 +647,7 @@ typing-extensions==4.12.2 # etils # flax # huggingface-hub + # iopath # lightning # lightning-utilities # orbax-checkpoint @@ -625,6 +656,7 @@ typing-extensions==4.12.2 # pyre-extensions # pytorch-lightning # reactivex + # submitit # torch # typing-inspect typing-inspect==0.9.0 @@ -644,6 +676,7 @@ voir==0.2.17 # -r benchmarks/accelerate_opt/requirements.in # -r benchmarks/brax/requirements.in # -r benchmarks/diffusion/requirements.in + # -r benchmarks/dinov2/requirements.in # -r benchmarks/dlrm/requirements.in # -r benchmarks/flops/requirements.in # -r benchmarks/huggingface/requirements.in @@ -658,10 +691,14 @@ werkzeug==3.0.3 # via # flask # tensorboard +xformers==0.0.27 + # via -r benchmarks/dinov2/requirements.in xxhash==3.4.1 # via # datasets # evaluate +yacs==0.1.8 + # via fvcore yarl==1.9.4 # via aiohttp zipp==3.19.2 diff --git a/benchmarks/brax/requirements.cuda.txt b/benchmarks/dinov2/requirements.cuda.txt similarity index 53% rename from benchmarks/brax/requirements.cuda.txt rename to benchmarks/dinov2/requirements.cuda.txt index ea6216a23..a92790725 100644 --- a/benchmarks/brax/requirements.cuda.txt +++ b/benchmarks/dinov2/requirements.cuda.txt @@ -2,24 +2,13 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=benchmarks/brax/requirements.cuda.txt .pin/tmp-constraints-cuda-brax.txt benchmarks/brax/requirements.in +# pip-compile --output-file=benchmarks/dinov2/requirements.cuda.txt .pin/tmp-constraints-cuda-dinov2-giant-gpus.txt benchmarks/dinov2/requirements.in # --extra-index-url https://pypi.ngc.nvidia.com --extra-index-url https://download.pytorch.org/whl/cu121 --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html --trusted-host pypi.ngc.nvidia.com -absl-py==2.1.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # chex - # dm-env - # ml-collections - # mujoco - # mujoco-mjx - # optax - # orbax-checkpoint antlr4-python3-runtime==4.9.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -28,50 +17,14 @@ asttokens==2.4.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # giving -blinker==1.8.2 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # flask -brax==0.10.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # -r benchmarks/brax/requirements.in -chex==0.1.86 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # optax -click==8.1.7 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # flask cloudpickle==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # gym + # submitit codefind==0.1.6 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera -contextlib2==21.6.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # ml-collections -dm-env==1.6 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -dm-tree==0.1.8 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # dm-env -etils[epath,epy]==1.7.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # mujoco - # mujoco-mjx - # optax - # orbax-checkpoint executing==1.2.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -81,92 +34,32 @@ filelock==3.15.4 # -c .pin/../.pin/constraints-cuda-torch.txt # torch # triton -flask==3.0.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # flask-cors -flask-cors==4.0.1 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -flax==0.8.5 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax fsspec==2024.5.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # etils # torch +fvcore==0.1.5.post20221221 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/dinov2/requirements.in giving==0.4.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # ptera # voir -glfw==2.7.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # mujoco -grpcio==1.65.1 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -gym==0.26.2 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -gym-notices==0.0.8 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # gym -importlib-resources==6.4.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # etils -itsdangerous==2.2.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # flask -jax[cuda12]==0.4.28 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # -r benchmarks/brax/requirements.in - # brax - # chex - # flax - # jaxopt - # mujoco-mjx - # optax - # orbax-checkpoint -jax-cuda12-pjrt==0.4.28 +iopath==0.1.10 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax-cuda12-plugin -jax-cuda12-plugin==0.4.28 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # jax -jaxlib==0.4.28+cuda12.cudnn89 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # chex - # jax - # jaxopt - # mujoco-mjx - # optax - # orbax-checkpoint -jaxopt==0.8.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax + # -r benchmarks/dinov2/requirements.in + # fvcore jinja2==3.1.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # flask # torch +lightning-utilities==0.11.5 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # torchmetrics markdown-it-py==3.0.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -175,43 +68,14 @@ markupsafe==2.1.5 # via # -c .pin/../.pin/constraints-cuda-torch.txt # jinja2 - # werkzeug mdurl==0.1.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt # markdown-it-py -ml-collections==0.1.1 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -ml-dtypes==0.4.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # jax - # jaxlib - # tensorstore mpmath==1.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # sympy -msgpack==1.0.8 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # flax - # orbax-checkpoint -mujoco==3.2.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # mujoco-mjx -mujoco-mjx==3.2.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -nest-asyncio==1.6.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # orbax-checkpoint networkx==3.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -219,39 +83,21 @@ networkx==3.3 numpy==1.26.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # chex - # dm-env - # flax - # gym - # jax - # jaxlib - # jaxopt - # ml-dtypes - # mujoco - # opt-einsum - # optax - # orbax-checkpoint + # fvcore # scipy - # tensorboardx - # tensorstore - # trimesh + # torchmetrics + # torchvision + # xformers nvidia-cublas-cu12==12.1.3.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch nvidia-cuda-cupti-cu12==12.1.105 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # torch -nvidia-cuda-nvcc-cu12==12.5.82 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # jax nvidia-cuda-nvrtc-cu12==12.1.105 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -259,17 +105,14 @@ nvidia-cuda-nvrtc-cu12==12.1.105 nvidia-cuda-runtime-cu12==12.1.105 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # torch nvidia-cudnn-cu12==8.9.2.26 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # torch nvidia-cufft-cu12==11.0.2.54 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # torch nvidia-curand-cu12==10.3.2.106 # via @@ -278,23 +121,19 @@ nvidia-curand-cu12==10.3.2.106 nvidia-cusolver-cu12==11.4.5.107 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # torch nvidia-cusparse-cu12==12.1.0.106 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # nvidia-cusolver-cu12 # torch nvidia-nccl-cu12==2.20.5 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # torch nvidia-nvjitlink-cu12==12.5.82 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # jax # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 @@ -304,21 +143,8 @@ nvidia-nvtx-cu12==12.1.105 omegaconf==2.3.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/dinov2/requirements.in # voir -opt-einsum==3.3.0 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # jax -optax==0.2.3 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # flax -orbax-checkpoint==0.5.22 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # flax ovld==0.3.5 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -326,16 +152,17 @@ ovld==0.3.5 packaging==24.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # tensorboardx + # lightning-utilities + # torchmetrics pillow==10.4.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -protobuf==4.25.3 + # fvcore + # torchvision +portalocker==2.10.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # orbax-checkpoint - # tensorboardx + # iopath psutil==5.9.8 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -352,21 +179,12 @@ pynvml==11.5.3 # via # -c .pin/../.pin/constraints-cuda-torch.txt # voir -pyopengl==3.1.7 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # mujoco -pytinyrenderer==0.0.14 - # via - # -c .pin/../.pin/constraints-cuda-torch.txt - # brax pyyaml==6.0.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # flax - # ml-collections + # fvcore # omegaconf - # orbax-checkpoint + # yacs reactivex==4.0.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -374,47 +192,51 @@ reactivex==4.0.4 rich==13.7.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # flax # voir scipy==1.14.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # jax - # jaxlib - # jaxopt - # mujoco-mjx + # -r benchmarks/dinov2/requirements.in six==1.16.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt # asttokens - # ml-collections +submitit==1.5.1 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/dinov2/requirements.in sympy==1.13.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt # torch -tensorboardx==2.6.2.2 +tabulate==0.9.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # brax -tensorstore==0.1.63 + # fvcore +termcolor==2.4.0 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # flax - # orbax-checkpoint -toolz==0.12.1 + # fvcore +torch==2.3.1+cu121 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # chex -torch==2.3.1+cu121 + # -r benchmarks/dinov2/requirements.in + # torchmetrics + # torchvision + # xformers +torchmetrics==1.0.3 + # via + # -c .pin/../.pin/constraints-cuda-torch.txt + # -r benchmarks/dinov2/requirements.in +torchvision==0.18.1+cu121 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # -r benchmarks/brax/requirements.in -trimesh==4.4.3 + # -r benchmarks/dinov2/requirements.in +tqdm==4.66.4 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # mujoco-mjx + # fvcore + # iopath triton==2.3.1 # via # -c .pin/../.pin/constraints-cuda-torch.txt @@ -422,12 +244,10 @@ triton==2.3.1 typing-extensions==4.12.2 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # brax - # chex - # etils - # flax - # orbax-checkpoint + # iopath + # lightning-utilities # reactivex + # submitit # torch varname==0.10.0 # via @@ -437,12 +257,15 @@ voir==0.2.17 # via # -c .pin/../.pin/constraints-cuda-torch.txt # -c .pin/../constraints/cuda.txt - # -r benchmarks/brax/requirements.in -werkzeug==3.0.3 + # -r benchmarks/dinov2/requirements.in +xformers==0.0.27 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # flask -zipp==3.19.2 + # -r benchmarks/dinov2/requirements.in +yacs==0.1.8 # via # -c .pin/../.pin/constraints-cuda-torch.txt - # etils + # fvcore + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/benchmarks/huggingface/prepare.py b/benchmarks/huggingface/prepare.py index d1bdaf280..1f5f80850 100755 --- a/benchmarks/huggingface/prepare.py +++ b/benchmarks/huggingface/prepare.py @@ -7,7 +7,7 @@ args = parser().parse_args() print(f"Preparing {args.model}") make_config = models[args.model] - make_config() + make_config(args) # bert dataset # t5 dataset diff --git a/benchmate/benchmate/datagen.py b/benchmate/benchmate/datagen.py index daf0ed075..a7a753099 100644 --- a/benchmate/benchmate/datagen.py +++ b/benchmate/benchmate/datagen.py @@ -97,6 +97,8 @@ def fakeimagenet_args(): parser.add_argument("--val", default=0.1, type=float, nargs="+") parser.add_argument("--test", default=0.1, type=float, nargs="+") args, _ = parser.parse_known_args() + return args + def generate_fakeimagenet(args=None): # config = json.loads(os.environ["MILABENCH_CONFIG"]) diff --git a/milabench/_version.py b/milabench/_version.py index eddbdfb72..d8ae9287b 100644 --- a/milabench/_version.py +++ b/milabench/_version.py @@ -1,5 +1,5 @@ """This file is generated, do not modify""" -__tag__ = "v0.1.0-23-gb9954e68" -__commit__ = "b9954e68e71a29fff2e7b16d8bcfaf7646629992" -__date__ = "2024-07-25 12:06:00 -0400" +__tag__ = "v0.1.0-30-g64aa548b" +__commit__ = "64aa548ba07d3c6bb298e435b8ac43c69eb75738" +__date__ = "2024-07-26 13:07:25 -0400" diff --git a/scripts/article/run_cuda_dev.sh b/scripts/article/run_cuda_dev.sh index 5e69f88cb..7651864e8 100644 --- a/scripts/article/run_cuda_dev.sh +++ b/scripts/article/run_cuda_dev.sh @@ -51,7 +51,7 @@ install_prepare() { . $MILABENCH_WORDIR/env/bin/activate pip install -e $MILABENCH_SOURCE - # milabench pin --variant cuda "$@" + milabench pin --variant cuda "$@" # # Install milabench's benchmarks in their venv @@ -69,7 +69,7 @@ install_prepare() { # pip install -e $MILABENCH_WORDIR/torchcompat # pip install torch torchvision torchaudio - pip install fvcore xFormers + # pip install fvcore xFormers # DALI stuff # pip install --extra-index-url https://pypi.nvidia.com --upgrade nvidia-dali-cuda120 diff --git a/tests/test_command_reg/test_command_reg_one_node.txt b/tests/test_command_reg/test_command_reg_one_node.txt index 35f198150..95d1d5f4c 100644 --- a/tests/test_command_reg/test_command_reg_one_node.txt +++ b/tests/test_command_reg/test_command_reg_one_node.txt @@ -15,8 +15,8 @@ export MILABENCH_DIR_DATA=$BASE/data export MILABENCH_DIR_RUNS=$BASE/runs export MILABENCH_DIR_EXTRA=$BASE/extra/llm export MILABENCH_DIR_CACHE=$BASE/cache +export OMP_NUM_THREADS=128 export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}' -export OMP_NUM_THREADS=8 echo "---" echo "llama" @@ -124,8 +124,8 @@ time ( ) echo "---" -echo "resnet152-ddp" -echo "=============" +echo "resnet152-ddp-gpus" +echo "==================" time ( $SRC/milabench/benchmarks/torchvision_ddp/activator $BASE/venv/torch $SRC/milabench/benchmarks/torchvision_ddp/main.py --epochs 10 --num-workers 8 --loader torch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 256 & wait @@ -327,10 +327,10 @@ time ( ) echo "---" -echo "resnet152-multi" -echo "===============" +echo "resnet152-gpus" +echo "==============" time ( - $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model resnet152 --batch-size 256 --output $BASE/extra/timm/dev/resnet152-multi.0 --checkpoint-hist 1 & + $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model resnet152 --batch-size 256 --output $BASE/extra/timm/dev/resnet152-gpus.0 --checkpoint-hist 1 & wait ) @@ -350,10 +350,10 @@ time ( ) echo "---" -echo "davit_large-multi" -echo "=================" +echo "davit_large-gpus" +echo "================" time ( - $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model davit_large --batch-size 128 --lr-base 0.01 --output $BASE/extra/timm/dev/davit_large-multi.0 --checkpoint-hist 1 & + $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model davit_large --batch-size 128 --lr-base 0.01 --output $BASE/extra/timm/dev/davit_large-gpus.0 --checkpoint-hist 1 & wait ) @@ -373,32 +373,32 @@ time ( ) echo "---" -echo "opt-1_3b" -echo "========" +echo "opt-1_3b-gpus" +echo "=============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/accelerate_opt/main.py --max_train_steps 100 --dataset_name wikitext --dataset_config_name wikitext-103-v1 --dataset_rev b08601e --validation_split_percentage 5 --per_gpu_batch_size 1 --cpus_per_gpu 8 --cache $BASE/cache --model_name facebook/opt-1.3b & wait ) echo "---" -echo "opt-1_3b-multinode" -echo "==================" +echo "opt-1_3b-nodes" +echo "==============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/accelerate_opt/main.py --max_train_steps 100 --dataset_name wikitext --dataset_config_name wikitext-103-v1 --dataset_rev b08601e --validation_split_percentage 5 --per_gpu_batch_size 1 --cpus_per_gpu 8 --cache $BASE/cache --model_name facebook/opt-1.3b & wait ) echo "---" -echo "opt-6_7b" -echo "========" +echo "opt-6_7b-gpus" +echo "=============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --use_deepspeed --deepspeed_multinode_launcher=standard --zero_stage=2 --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/accelerate_opt/main.py --max_train_steps 100 --dataset_name wikitext --dataset_config_name wikitext-103-v1 --dataset_rev b08601e --validation_split_percentage 5 --per_gpu_batch_size 1 --cpus_per_gpu 8 --cache $BASE/cache --model_name facebook/opt-6.7b & wait ) echo "---" -echo "opt-6_7b-multinode" -echo "==================" +echo "opt-6_7b-nodes" +echo "==============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --use_deepspeed --deepspeed_multinode_launcher=standard --zero_stage=2 --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/accelerate_opt/main.py --max_train_steps 100 --dataset_name wikitext --dataset_config_name wikitext-103-v1 --dataset_rev b08601e --validation_split_percentage 5 --per_gpu_batch_size 1 --cpus_per_gpu 8 --cache $BASE/cache --model_name facebook/opt-6.7b & wait @@ -454,7 +454,7 @@ echo "---" echo "diffusion-gpus" echo "==============" time ( - $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=4 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/diffusion/main.py --train_batch_size 32 --num_epochs 5 & + $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=4 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/diffusion/main.py --num_epochs 5 --batch_size 32 --num_workers 8 & wait ) @@ -462,14 +462,14 @@ echo "---" echo "lightning" echo "=========" time ( - CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & + CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & wait ) @@ -477,7 +477,15 @@ echo "---" echo "lightning-gpus" echo "==============" time ( - $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + wait +) + +echo "---" +echo "dinov2-giant-gpus" +echo "=================" +time ( + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/dinov2/main.py --output-dir $BASE/extra/dinov2-giant-gpus/output --no-resume --config-file src/dinov2/configs/train/vitg14.yaml train.dataset_path=ImageNet:split=TRAIN:root=$BASE/data/FakeImageNet:extra=$BASE/data/FakeImageNet train.batch_size_per_gpu=32 train.saveckp_freq=100 train.num_workers=10 & wait ) diff --git a/tests/test_command_reg/test_command_reg_two_nodes.txt b/tests/test_command_reg/test_command_reg_two_nodes.txt index 2817f77f8..387d2d474 100644 --- a/tests/test_command_reg/test_command_reg_two_nodes.txt +++ b/tests/test_command_reg/test_command_reg_two_nodes.txt @@ -15,8 +15,8 @@ export MILABENCH_DIR_DATA=$BASE/data export MILABENCH_DIR_RUNS=$BASE/runs export MILABENCH_DIR_EXTRA=$BASE/extra/llm export MILABENCH_DIR_CACHE=$BASE/cache +export OMP_NUM_THREADS=128 export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}, {"ip": "192.168.0.11", "main": false, "name": "1", "port": 22, "user": "username", "hostname": "192.168.0.11", "aliaslist": [], "ipaddrlist": ["192.168.0.11"], "local": false}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}' -export OMP_NUM_THREADS=8 echo "---" echo "llama" @@ -124,8 +124,8 @@ time ( ) echo "---" -echo "resnet152-ddp" -echo "=============" +echo "resnet152-ddp-gpus" +echo "==================" time ( $SRC/milabench/benchmarks/torchvision_ddp/activator $BASE/venv/torch $SRC/milabench/benchmarks/torchvision_ddp/main.py --epochs 10 --num-workers 8 --loader torch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 256 & wait @@ -327,10 +327,10 @@ time ( ) echo "---" -echo "resnet152-multi" -echo "===============" +echo "resnet152-gpus" +echo "==============" time ( - $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model resnet152 --batch-size 256 --output $BASE/extra/timm/dev/resnet152-multi.0 --checkpoint-hist 1 & + $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model resnet152 --batch-size 256 --output $BASE/extra/timm/dev/resnet152-gpus.0 --checkpoint-hist 1 & wait ) @@ -350,10 +350,10 @@ time ( ) echo "---" -echo "davit_large-multi" -echo "=================" +echo "davit_large-gpus" +echo "================" time ( - $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model davit_large --batch-size 128 --lr-base 0.01 --output $BASE/extra/timm/dev/davit_large-multi.0 --checkpoint-hist 1 & + $BASE/venv/torch/bin/benchrun --nproc-per-node=8 -m python $SRC/milabench/benchmarks/timm/pytorch-image-models/train.py --amp --amp-dtype bfloat16 --device cuda --val-split --data-dir $BASE/data --dataset FakeImageNet --workers 8 --model davit_large --batch-size 128 --lr-base 0.01 --output $BASE/extra/timm/dev/davit_large-gpus.0 --checkpoint-hist 1 & wait ) @@ -373,16 +373,16 @@ time ( ) echo "---" -echo "opt-1_3b" -echo "========" +echo "opt-1_3b-gpus" +echo "=============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/accelerate_opt/main.py --max_train_steps 100 --dataset_name wikitext --dataset_config_name wikitext-103-v1 --dataset_rev b08601e --validation_split_percentage 5 --per_gpu_batch_size 1 --cpus_per_gpu 8 --cache $BASE/cache --model_name facebook/opt-1.3b & wait ) echo "---" -echo "opt-1_3b-multinode" -echo "==================" +echo "opt-1_3b-nodes" +echo "==============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=2 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=16 & ssh -oCheckHostIP=no -oStrictHostKeyChecking=no -oPasswordAuthentication=no -oPasswordAuthentication=no -p 22 username@192.168.0.11 $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=1 --num_machines=2 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=16 & @@ -390,16 +390,16 @@ time ( ) echo "---" -echo "opt-6_7b" -echo "========" +echo "opt-6_7b-gpus" +echo "=============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --use_deepspeed --deepspeed_multinode_launcher=standard --zero_stage=2 --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/accelerate_opt/main.py --max_train_steps 100 --dataset_name wikitext --dataset_config_name wikitext-103-v1 --dataset_rev b08601e --validation_split_percentage 5 --per_gpu_batch_size 1 --cpus_per_gpu 8 --cache $BASE/cache --model_name facebook/opt-6.7b & wait ) echo "---" -echo "opt-6_7b-multinode" -echo "==================" +echo "opt-6_7b-nodes" +echo "==============" time ( $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=2 --use_deepspeed --deepspeed_multinode_launcher=standard --zero_stage=2 --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=16 & ssh -oCheckHostIP=no -oStrictHostKeyChecking=no -oPasswordAuthentication=no -oPasswordAuthentication=no -p 22 username@192.168.0.11 $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=1 --num_machines=2 --use_deepspeed --deepspeed_multinode_launcher=standard --zero_stage=2 --gradient_accumulation_steps=1 --num_cpu_threads_per_process=8 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=16 & @@ -456,7 +456,7 @@ echo "---" echo "diffusion-gpus" echo "==============" time ( - $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=4 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/diffusion/main.py --train_batch_size 32 --num_epochs 5 & + $SRC/milabench/milabench/scripts/activator $BASE/venv/torch accelerate launch --mixed_precision=bf16 --dynamo_backend=no --machine_rank=0 --num_machines=1 --multi_gpu --gradient_accumulation_steps=1 --num_cpu_threads_per_process=4 --main_process_ip=127.0.0.1 --main_process_port=22 --num_processes=8 $SRC/milabench/benchmarks/diffusion/main.py --num_epochs 5 --batch_size 32 --num_workers 8 & wait ) @@ -464,14 +464,14 @@ echo "---" echo "lightning" echo "=========" time ( - CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & - CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & + CUDA_VISIBLE_DEVICES=0 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=1 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=2 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=3 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=4 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=5 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=6 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + CUDA_VISIBLE_DEVICES=7 python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & wait ) @@ -479,7 +479,15 @@ echo "---" echo "lightning-gpus" echo "==============" time ( - $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 & + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/lightning/main.py --epochs 10 --num-workers 8 --loader pytorch --data $BASE/data/FakeImageNet --model resnet152 --batch-size 16 & + wait +) + +echo "---" +echo "dinov2-giant-gpus" +echo "=================" +time ( + $BASE/venv/torch/bin/benchrun --nnodes=1 --rdzv-backend=c10d --rdzv-endpoint=127.0.0.1:29400 --master-addr=127.0.0.1 --master-port=29400 --nproc-per-node=8 --no-python -- python $SRC/milabench/benchmarks/dinov2/main.py --output-dir $BASE/extra/dinov2-giant-gpus/output --no-resume --config-file src/dinov2/configs/train/vitg14.yaml train.dataset_path=ImageNet:split=TRAIN:root=$BASE/data/FakeImageNet:extra=$BASE/data/FakeImageNet train.batch_size_per_gpu=32 train.saveckp_freq=100 train.num_workers=10 & wait ) diff --git a/tests/test_mock/test_milabench_bad_run.txt b/tests/test_mock/test_milabench_bad_run.txt index f30881ec1..e9b4ffa42 100644 --- a/tests/test_mock/test_milabench_bad_run.txt +++ b/tests/test_mock/test_milabench_bad_run.txt @@ -13,7 +13,7 @@ benchio.0 | Traceback (most recent call last): | File "$TMP/venv/benchio/bin/voir", line 8, in | sys.exit(main()) - | File "$TMP/venv/benchio/lib/python3.10/site-packages/voir/cli.py", line 124, in main + | File "$TMP/venv/benchio/lib/python3.10/site-packages/voir/cli.py", line 128, in main | ov(sys.argv[1:] if argv is None else argv) | File "$TMP/venv/benchio/lib/python3.10/site-packages/voir/phase.py", line 331, in __call__ | self._run(*args, **kwargs) @@ -35,7 +35,7 @@ benchio.1 | Traceback (most recent call last): | File "$TMP/venv/benchio/bin/voir", line 8, in | sys.exit(main()) - | File "$TMP/venv/benchio/lib/python3.10/site-packages/voir/cli.py", line 124, in main + | File "$TMP/venv/benchio/lib/python3.10/site-packages/voir/cli.py", line 128, in main | ov(sys.argv[1:] if argv is None else argv) | File "$TMP/venv/benchio/lib/python3.10/site-packages/voir/phase.py", line 331, in __call__ | self._run(*args, **kwargs)