Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Jun 11, 2024
1 parent c151b98 commit 009ae82
Show file tree
Hide file tree
Showing 23 changed files with 130 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .pin/constraints-hpu-torch.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions .pin/constraints-xpu-torch.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/brax/requirements.cuda.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/brax/requirements.hpu.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/brax/requirements.rocm.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion benchmarks/brax/requirements.xpu.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 16 additions & 6 deletions benchmate/benchmate/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, task):
self.delayed = []

def append(self, *args, **kwargs):
self.delayed.append(args, kwargs)
self.delayed.append((args, kwargs))

def record(self, *args, **kwargs):
"""Record data for a future metric.
Expand Down Expand Up @@ -230,6 +230,7 @@ def __init__(
self.raise_stop_program = raise_stop_program # Does TimedIterator raise StopProgram
self.profile_instrumentation = False
self.overhead = []
self.previous_overhead = 0
self.loader_init_time = []
self.sub_overhead = 0

Expand Down Expand Up @@ -259,7 +260,8 @@ def wrapped(self, iterator):
# Time IO wait + batch compute
start = self.event_fn(enable_timing=True)
start.record()

self.previous_overhead = 0

for data in iterator:
yield data

Expand All @@ -268,7 +270,7 @@ def wrapped(self, iterator):
end.record()

bs = self.deduce_batch_size(data)
self.events.append((start, end, bs, self.overhead[-1]))
self.events.append((start, end, bs, self.previous_overhead))

# Log progress so it looks somewhat responsive
self.log_progress()
Expand All @@ -279,8 +281,15 @@ def wrapped(self, iterator):
break

start = end
self.overhead.append(ct.elapsed())


# Note: first step does not have overhead because end event is recorded
# before the overhead starts
# Note: It is not sure if the CPU overhead impacst the device at all
# since we avoid sync it is possible the device is working during
# the overhead section and that the effective overhead ends up being minimal
self.previous_overhead = ct.elapsed()
self.overhead.append(self.previous_overhead)

self._push()
self.earlystop()

Expand Down Expand Up @@ -323,7 +332,7 @@ def batch_size(self, bs):
def _push_time_steps(self):
for start, end, bs, overhead in self.events:
end.synchronize()
elapsed = (start.elapsed_time(end) - self.sub_overhead * overhead) / self.unit
elapsed = (start.elapsed_time(end)) / self.unit
rate = self.batch_size(bs) / elapsed
self.log_rate(rate)

Expand All @@ -337,6 +346,7 @@ def _push_profile_metrics(self):

for iterinit in self.loader_init_time:
self.message(__iter__=iterinit, units="s", task=self.task)
self.previous_overhead = 0
self.overhead = []
self.loader_init_time = []

Expand Down
2 changes: 1 addition & 1 deletion constraints/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

#
#
voir > 0.2.10
voir >= 0.2.15
torchcompat >= 1.0.0
2 changes: 1 addition & 1 deletion constraints/hpu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

#
#
voir > 0.2.10
voir >= 0.2.15
torchcompat >= 1.0.0

2 changes: 1 addition & 1 deletion constraints/rocm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

#
#
voir > 0.2.10
voir >= 0.2.15
torchcompat >= 1.0.0
3 changes: 2 additions & 1 deletion constraints/xpu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ torchaudio>=2.1.0a0
intel-extension-for-pytorch>=2.1.10+xpu
oneccl_bind_pt==2.1.100+xpu
intel-extension-for-pytorch-deepspeed>=2.1.30
intel-extension-for-openxla>=0.3.0

#
#
voir > 0.2.10
voir >= 0.2.15
torchcompat >= 1.0.0
6 changes: 3 additions & 3 deletions milabench/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""This file is generated, do not modify"""

__tag__ = "v0.0.6-140-g57343f1"
__commit__ = "57343f10ef2b4ce598011ee308ebd06b4c654495"
__date__ = "2024-06-10 11:52:37 -0400"
__tag__ = "v0.0.10-145-gc151b985"
__commit__ = "c151b98546f32d9c0671507f8526ed13598e3407"
__date__ = "2024-06-11 14:30:04 -0400"
3 changes: 3 additions & 0 deletions milabench/sizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,9 @@ def resolve_argv(pack, argv):
"cpu": "gloo"
}

if device_count <= 0:
device_count = 1

context["arch"] = arch
context["ccl"] = ccl.get(arch, "gloo")
context["cpu_count"] = multiprocessing.cpu_count()
Expand Down
1 change: 1 addition & 0 deletions scripts/article/run_hpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ install_prepare() {

#
# Generate/download datasets, download models etc...
#
milabench prepare
}

Expand Down
10 changes: 10 additions & 0 deletions scripts/article/run_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,26 @@ install_prepare() {
#
milabench install

#
# Override/add package to milabench venv here
#
which pip
# pip install ...

(
. $BENCHMARK_VENV/bin/activate

#
# Override/add package to the benchmark venv here
#
which pip
pip uninstall torch torchvision torchaudio
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
)

#
# Generate/download datasets, download models etc...
#
milabench prepare
}

Expand Down
1 change: 1 addition & 0 deletions scripts/article/run_xpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ install_prepare() {

#
# Generate/download datasets, download models etc...
#
milabench prepare
}

Expand Down
55 changes: 55 additions & 0 deletions tests/benchmate/test_timed_iterator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import time

import pytest

from benchmate.metrics import TimedIterator, StopProgram


class CPUEvent:
def __init__(self, **kwargs):
self.start = 0

def record(self):
self.start = time.time()

def elapsed_time(self, end):
# shoudl return ms
return (end.start - self.start) * 1000

def synchronize(self):
pass


def test_wrapper():
batch = [1, 2]
process_time = 0.1

iterable = [(batch, 3) for i in range(10)]
messages = []

def push(**kwargs):
nonlocal messages
messages.append(kwargs)

loader = TimedIterator(
iterable, event_fn=CPUEvent, earlystop=50, raise_stop_program=True, push=push
)

with pytest.raises(StopProgram):
for e in range(200):
for i in loader:
time.sleep(process_time)

assert len(messages) == 117

rate_acc = 0
rate_count = 0
for msg in messages:
if rate := msg.get("rate"):
rate_acc += rate
rate_count += 1

assert rate_count == 50, "Program should stop once we reached the necessary count"
assert (
abs((rate_acc / rate_count) - len(batch) / process_time) < 0.5
), "Computed rate should be close to theorical rate"
13 changes: 13 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from pathlib import Path
import stat

import voir.instruments.gpu as voirgpu

Expand Down Expand Up @@ -88,6 +89,18 @@ def set_env():
os.environ["MILABENCH_DASH"] = "no"
os.environ["MILABENCH_GPU_ARCH"] = backend

#
# milabench expects voir to be installed in the bench venv
# we fake one to use the one we have in the current env
os.makedirs("output/venv/benchio/bin/", exist_ok=True)
voirexec = "output/venv/benchio/bin/voir"
with open(voirexec, "w") as fp:
fp.write("#!/bin/bash\n")
fp.write("python -m voir \"$@\"")

current_permissions = stat.S_IMODE(os.lstat(voirexec).st_mode)
os.chmod(voirexec, current_permissions | (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))

if backend == "mock":
oldsmi = voirgpu.DEVICESMI
voirgpu.DEVICESMI = MockDeviceSMI()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def fake_config(n):

def test_capabilties_ok():
pack = BasePackage(fake_config(10))
assert sync_is_system_capable(pack)
assert sync_is_system_capable(pack) is True


def test_capabilties_not_ok():
Expand Down
4 changes: 2 additions & 2 deletions tests/test_executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def test_njobs_gpus_executor():
acc = 0
for r in proceed(njobs.execute()):
if r.event == "start":
assert r.data["command"][0] == "torchrun"
assert r.data["command"][0].endswith("torchrun")
acc += 1
print(r)

Expand All @@ -218,7 +218,7 @@ def test_njobs_gpu_executor():
print(r)

if r.event == "start":
assert r.data["command"][0] == "voir"
assert r.data["command"][0].endswith("voir")

acc += 1

Expand Down
14 changes: 11 additions & 3 deletions tests/test_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ def test_scaler_disabled(multipack):
assert pack.argv == []


def fakeexec(pack):
from milabench.sizer import resolve_argv, scale_argv
sized_args = scale_argv(pack, pack.argv)
final_args = resolve_argv(pack, sized_args)
return final_args


def test_scaler_enabled(multipack, config):
from milabench.config import system_global
import contextvars
Expand All @@ -83,12 +90,13 @@ def update_ctx():
)
sizer_global.set(sizer)
system = system_global.get()
system["gpu"]["capacity"] = "41920 MiB"
gpu = system.setdefault("gpu", dict())
gpu["capacity"] = "41920 MiB"

ctx.run(update_ctx)

for k, pack in multipack.packs.items():
assert ctx.run(lambda: pack.argv) == ["--batch_size", "232"]
assert ctx.run(lambda: fakeexec(pack)) == ["--batch_size", "232"]

# Sizer is only enabled inside the context
assert pack.argv == []
assert fakeexec(pack) == []
4 changes: 2 additions & 2 deletions tests/test_summary/test_report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Source: XXX
=================
Benchmark results
=================
fail n perf sem% std% peak_memory score weight
benchio 0 4 7979.82 2.9% 17.2% -1 7979.81831 2.00
bench | fail | n | perf | sem% | std% | peak_memory | score | weight
benchio | 0 | 4 | 7979.82 | 2.9% | 17.2% | -1 | 7979.82 | 2.00

Scores
------
Expand Down
4 changes: 2 additions & 2 deletions tests/test_summary/test_report_folder_does_average.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Source: XXX
=================
Benchmark results
=================
fail n perf sem% std% peak_memory score weight
benchio 0 6 7878.45 2.5% 18.0% 24456 7878.451302 2.00
bench | fail | n | perf | sem% | std% | peak_memory | score | weight
benchio | 0 | 6 | 7878.45 | 2.5% | 18.0% | 24456 | 7878.45 | 2.00

Scores
------
Expand Down

0 comments on commit 009ae82

Please sign in to comment.