Skip to content

Commit

Permalink
Latest batch size model
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Sep 19, 2024
1 parent 71e45c7 commit 5f2263e
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 43 deletions.
6 changes: 4 additions & 2 deletions .pin/constraints-cuda-torch.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/dinov2/requirements.cuda.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions benchmarks/llm/requirements.cuda.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions benchmarks/llm/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ torch
PyYAML
argklass
fairscale
torchao

# Prepare
accelerate
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/purejaxrl/requirements.cuda.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/rlhf/requirements.cuda.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/torchatari/requirements.cuda.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/vjepa/requirements.cuda.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

101 changes: 68 additions & 33 deletions config/scaling.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ bert-tf32-fp16:
112: 81140.75 MiB
optimized: 128
bf16: {}
# brax:
# arg: --batch-size
# model:
# 1024: 4912.25 MiB
# cleanrljax:
# arg: --num_steps
# optimized: 128
brax:
arg: --batch-size
model:
1024: 4912.25 MiB
cleanrljax:
arg: --num_steps
optimized: 128
convnext_large-fp16:
arg: --batch-size
model:
Expand Down Expand Up @@ -194,24 +194,28 @@ diffusion-single:
4: 23478.75 MiB
16: 33850.25 MiB
32: 55354.25 MiB
pna:
arg: --batch-size

dimenet:
arg: --batch-size
model:
2: 452.6875 MiB
4: 1604.25 MiB
24: 4776.25 MiB
56: 6330.25 MiB
64: 12274.25 MiB
112: 15294.25 MiB
128: 13002.25 MiB
240: 67506.25 MiB
280: 56556.25 MiB
488: 80406.25 MiB
dinov2-giant-gpus:
arg: train.batch_size_per_gpu={batch_size}
model:
1: 32240.25 MiB
2: 32252.25 MiB
4: 32404.25 MiB
16: 38350.25 MiB
32: 69614 MiB
24: 48856.25 MiB
32: 72102.25 MiB
optimized: 32
dinov2-giant-nodes:
arg: train.batch_size_per_gpu={batch_size}
Expand All @@ -222,16 +226,17 @@ dinov2-giant-single:
2: 20682.25 MiB
4: 20682.25 MiB
16: 52748.25 MiB
24: 60792.25 MiB
32: 74544.25 MiB
dlrm: {}
# dqn:
# arg: --buffer_batch_size
# model:
# 1024: 81.81005859375 MiB
# 2048: 83.40380859375 MiB
# 32768: 131.21630859375 MiB
# 65536: 182.21630859375 MiB
# optimized: 128
dqn:
arg: --buffer_batch_size
model:
1024: 81.81005859375 MiB
2048: 83.40380859375 MiB
32768: 131.21630859375 MiB
65536: 182.21630859375 MiB
optimized: 128
focalnet:
arg: --batch-size
model:
Expand Down Expand Up @@ -260,9 +265,15 @@ lightning:
2: 1054.25 MiB
4: 1856.25 MiB
16: 4728.25 MiB
24: 5482.25 MiB
32: 6352.25 MiB
56: 1054.25 MiB
64: 1856.25 MiB
120: 14522.25 MiB
128: 14818.25 MiB
240: 25480.25 MiB
488: 49042.25 MiB
664: 65914.25 MiB
lightning-gpus:
arg: --batch-size
model:
Expand All @@ -271,7 +282,12 @@ lightning-gpus:
4: 1156.75 MiB
8: 1260.75 MiB
16: 4150.75 MiB
48: 11056.25 MiB
112: 16776.25 MiB
128: 15858 MiB
240: 28942.25 MiB
504: 54100.25 MiB
624: 65386.25 MiB
optimized: 16
llama: {}
llava-gpus:
Expand All @@ -280,6 +296,7 @@ llava-gpus:
llava-single:
arg: --batch_size
model:
1: 72614.25 MiB
2: 15168.25 MiB
4: 72362.25 MiB
optimized: 1
Expand Down Expand Up @@ -341,18 +358,21 @@ opt-6_7b-multinode:
model:
1: 55380 MiB
optimized: 1
# ppo:
# arg: --num_steps
# model:
# 8: 80.791748046875 MiB
# 16: 80.916748046875 MiB
# 32: 81.166748046875 MiB
# 64: 81.666748046875 MiB
# 128: 82.666748046875 MiB
# 1024: 96.666748046875 MiB
# 2048: 132.484619140625 MiB
# 4096: 205.328369140625 MiB
# optimized: 32
pna:
arg: --batch-size
ppo:
arg: --num_steps
model:
8: 80.791748046875 MiB
16: 80.916748046875 MiB
32: 81.166748046875 MiB
64: 81.666748046875 MiB
128: 82.666748046875 MiB
1024: 96.666748046875 MiB
2048: 132.484619140625 MiB
4096: 205.328369140625 MiB
2517448: 62094.25 MiB
optimized: 32
recursiongfn:
arg: --batch_size
model:
Expand Down Expand Up @@ -477,6 +497,11 @@ resnet50-noio:
4: 1854.25 MiB
16: 3052.25 MiB
32: 4690.25 MiB
56: 7114.25 MiB
136: 15194.25 MiB
288: 30632.25 MiB
592: 64483.8125 MiB
736: 76050.25 MiB
rlhf-gpus:
arg: --per_device_train_batch_size
model:
Expand All @@ -487,6 +512,9 @@ rlhf-gpus:
32: 17918.25 MiB
64: 24374.25 MiB
128: 25830.25 MiB
136: 29442.25 MiB
392: 15372.25 MiB
520: 15808.25 MiB
optimized: 64
rlhf-single:
arg: --per_device_train_batch_size
Expand All @@ -496,8 +524,12 @@ rlhf-single:
4: 8822.25 MiB
16: 9694.25 MiB
32: 12952.25 MiB
40: 14638.25 MiB
64: 19422.25 MiB
120: 31048.25 MiB
128: 32442.25 MiB
280: 63262.25 MiB
352: 77536.25 MiB
optimized: 64
rwkv:
arg: --micro_bsz
Expand Down Expand Up @@ -553,19 +585,22 @@ torchatari:
vjepa-gpus:
arg: --batch_size
model:
1: 27196.25 MiB
2: 28896.25 MiB
4: 30784.25 MiB
16: 52722.25 MiB
32: 76372.25 MiB
32: 77124.25 MiB
optimized: 24
vjepa-single:
arg: --batch_size
model:
1: 6644.25 MiB
2: 18984.25 MiB
4: 11860.25 MiB
8: 30764.25 MiB
16: 45516.25 MiB
32: 70586.25 MiB
24: 57574.25 MiB
32: 67122.25 MiB
optimized: 24
whisper:
arg: --batch-size
Expand Down
6 changes: 3 additions & 3 deletions scripts/article/run_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ if [ "$MILABENCH_PREPARE" -eq 0 ]; then
# milabench prepare --system $MILABENCH_WORDIR/system.yaml $ARGS

# pip install torch
# milabench pin --variant cuda --from-scratch $ARGS
# milabench install --system $MILABENCH_WORDIR/system.yaml --force $ARGS
# milabench prepare --system $MILABENCH_WORDIR/system.yaml $ARGS
milabench pin --variant cuda --from-scratch $ARGS
milabench install --system $MILABENCH_WORDIR/system.yaml --force $ARGS
milabench prepare --system $MILABENCH_WORDIR/system.yaml $ARGS

ARGS="--select resnet50-noio,brax,lightning,dinov2-giant-single,dinov2-giant-gpus,llm-lora-ddp-gpus,llm-lora-ddp-nodes,llm-lora-mp-gpus,llm-full-mp-gpus,llm-full-mp-nodes,dqn,ppo,dimenet,llava-single,rlhf-single,rlhf-gpus,vjepa-single,vjepa-gpus"

Expand Down

0 comments on commit 5f2263e

Please sign in to comment.