Fix tests

mila-iqia · Jun 11, 2024 · f4ae462 · f4ae462
1 parent c151b98
commit f4ae462
Show file tree

Hide file tree

Showing 23 changed files with 120 additions and 28 deletions.
diff --git a/.pin/constraints-hpu-torch.txt b/.pin/constraints-hpu-torch.txt
diff --git a/.pin/constraints-xpu-torch.txt b/.pin/constraints-xpu-torch.txt
diff --git a/benchmarks/brax/requirements.cuda.txt b/benchmarks/brax/requirements.cuda.txt
diff --git a/benchmarks/brax/requirements.hpu.txt b/benchmarks/brax/requirements.hpu.txt
diff --git a/benchmarks/brax/requirements.rocm.txt b/benchmarks/brax/requirements.rocm.txt
diff --git a/benchmarks/brax/requirements.xpu.txt b/benchmarks/brax/requirements.xpu.txt
diff --git a/benchmate/benchmate/metrics.py b/benchmate/benchmate/metrics.py
@@ -230,6 +230,7 @@ def __init__(
         self.raise_stop_program = raise_stop_program # Does TimedIterator raise StopProgram
         self.profile_instrumentation = False
         self.overhead = []
+        self.previous_overhead = 0
         self.loader_init_time = []
         self.sub_overhead = 0
 
@@ -268,7 +269,7 @@ def wrapped(self, iterator):
                 end.record()
 
                 bs = self.deduce_batch_size(data)
-                self.events.append((start, end, bs, self.overhead[-1]))
+                self.events.append((start, end, bs, self.previous_overhead))
 
                 # Log progress so it looks somewhat responsive
                 self.log_progress()
@@ -279,7 +280,9 @@ def wrapped(self, iterator):
                     break
 
                 start = end
-                self.overhead.append(ct.elapsed())
+
+            self.previous_overhead = ct.elapsed()
+            self.overhead.append(self.previous_overhead)
 
         self._push()
         self.earlystop()
@@ -337,6 +340,7 @@ def _push_profile_metrics(self):
 
             for iterinit in self.loader_init_time:
                 self.message(__iter__=iterinit, units="s", task=self.task)
+        self.previous_overhead = 0
         self.overhead = []
         self.loader_init_time = []
 

diff --git a/constraints/cuda.txt b/constraints/cuda.txt
@@ -2,5 +2,5 @@
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
diff --git a/constraints/hpu.txt b/constraints/hpu.txt
@@ -3,6 +3,6 @@
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
 
diff --git a/constraints/rocm.txt b/constraints/rocm.txt
@@ -2,5 +2,5 @@
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
diff --git a/constraints/xpu.txt b/constraints/xpu.txt
@@ -10,8 +10,9 @@ torchaudio>=2.1.0a0
 intel-extension-for-pytorch>=2.1.10+xpu
 oneccl_bind_pt==2.1.100+xpu
 intel-extension-for-pytorch-deepspeed>=2.1.30
+intel-extension-for-openxla>=0.3.0
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
diff --git a/milabench/_version.py b/milabench/_version.py
@@ -1,5 +1,5 @@
 """This file is generated, do not modify"""
 
-__tag__ = "v0.0.6-140-g57343f1"
-__commit__ = "57343f10ef2b4ce598011ee308ebd06b4c654495"
-__date__ = "2024-06-10 11:52:37 -0400"
+__tag__ = "v0.0.10-145-gc151b985"
+__commit__ = "c151b98546f32d9c0671507f8526ed13598e3407"
+__date__ = "2024-06-11 14:30:04 -0400"
diff --git a/milabench/sizer.py b/milabench/sizer.py
@@ -284,6 +284,9 @@ def resolve_argv(pack, argv):
         "cpu": "gloo"
     }
 
+    if device_count <= 0:
+        device_count = 1
+
     context["arch"] = arch
     context["ccl"] = ccl.get(arch, "gloo")
     context["cpu_count"] = multiprocessing.cpu_count()

diff --git a/scripts/article/run_hpu.sh b/scripts/article/run_hpu.sh
@@ -60,6 +60,7 @@ install_prepare() {
 
     #
     #   Generate/download datasets, download models etc...
+    #
     milabench prepare
 }
 

diff --git a/scripts/article/run_rocm.sh b/scripts/article/run_rocm.sh
@@ -27,16 +27,26 @@ install_prepare() {
     #
     milabench install
 
+    #
+    # Override/add package to milabench venv here
+    #
     which pip
+    # pip install ...
 
     (
         . $BENCHMARK_VENV/bin/activate
+
+        #
+        # Override/add package to the benchmark venv here
+        #
         which pip
+        pip uninstall torch torchvision torchaudio
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
     )
 
     #
     #   Generate/download datasets, download models etc...
+    #
     milabench prepare
 }
 

diff --git a/scripts/article/run_xpu.sh b/scripts/article/run_xpu.sh
@@ -44,6 +44,7 @@ install_prepare() {
 
     #
     #   Generate/download datasets, download models etc...
+    #
     milabench prepare
 }
 

diff --git a/tests/benchmate/test_timed_iterator.py b/tests/benchmate/test_timed_iterator.py
@@ -0,0 +1,55 @@
+import time
+
+import pytest
+
+from benchmate.metrics import TimedIterator, StopProgram
+
+
+class CPUEvent:
+    def __init__(self, **kwargs):
+        self.start = 0
+
+    def record(self):
+        self.start = time.time()
+
+    def elapsed_time(self, end):
+        # shoudl return ms
+        return (end.start - self.start) * 1000
+
+    def synchronize(self):
+        pass
+
+
+def test_wrapper():
+    batch = [1, 2]
+    process_time = 0.1
+
+    iterable = [(batch, 3) for i in range(10)]
+    messages = []
+
+    def push(**kwargs):
+        nonlocal messages
+        messages.append(kwargs)
+
+    loader = TimedIterator(
+        iterable, event_fn=CPUEvent, earlystop=50, raise_stop_program=True, push=push
+    )
+
+    with pytest.raises(StopProgram):
+        for e in range(200):
+            for i in loader:
+                time.sleep(process_time)
+
+    assert len(messages) == 117
+
+    rate_acc = 0
+    rate_count = 0
+    for msg in messages:
+        if rate := msg.get("rate"):
+            rate_acc += rate
+            rate_count += 1
+
+    assert rate_count == 50, "Program should stop once we reached the necessary count"
+    assert (
+        abs((rate_acc / rate_count) - len(batch) / process_time) < 0.5
+    ), "Computed rate should be close to theorical rate"
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,5 +1,6 @@
 import os
 from pathlib import Path
+import stat
 
 import voir.instruments.gpu as voirgpu
 
@@ -88,6 +89,18 @@ def set_env():
     os.environ["MILABENCH_DASH"] = "no"
     os.environ["MILABENCH_GPU_ARCH"] = backend
 
+    #
+    # milabench expects voir to be installed in the bench venv
+    # we fake one to use the one we have in the current env
+    os.makedirs("output/venv/benchio/bin/", exist_ok=True)
+    voirexec = "output/venv/benchio/bin/voir"
+    with open(voirexec, "w") as fp:
+        fp.write("#!/bin/bash\n")
+        fp.write("python -m voir \"$@\"")
+
+    current_permissions = stat.S_IMODE(os.lstat(voirexec).st_mode)
+    os.chmod(voirexec, current_permissions | (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))
+
     if backend == "mock":
         oldsmi = voirgpu.DEVICESMI
         voirgpu.DEVICESMI = MockDeviceSMI()

diff --git a/tests/test_capabilities.py b/tests/test_capabilities.py
@@ -21,7 +21,7 @@ def fake_config(n):
 
 def test_capabilties_ok():
     pack = BasePackage(fake_config(10))
-    assert sync_is_system_capable(pack)
+    assert sync_is_system_capable(pack) is True
 
 
 def test_capabilties_not_ok():

diff --git a/tests/test_executors.py b/tests/test_executors.py
@@ -197,7 +197,7 @@ def test_njobs_gpus_executor():
     acc = 0
     for r in proceed(njobs.execute()):
         if r.event == "start":
-            assert r.data["command"][0] == "torchrun"
+            assert r.data["command"][0].endswith("torchrun")
         acc += 1
         print(r)
 
@@ -218,7 +218,7 @@ def test_njobs_gpu_executor():
         print(r)
 
         if r.event == "start":
-            assert r.data["command"][0] == "voir"
+            assert r.data["command"][0].endswith("voir")
 
         acc += 1
 

diff --git a/tests/test_scaler.py b/tests/test_scaler.py
@@ -66,6 +66,13 @@ def test_scaler_disabled(multipack):
         assert pack.argv == []
 
 
+def fakeexec(pack):
+    from milabench.sizer import resolve_argv, scale_argv
+    sized_args = scale_argv(pack, pack.argv)
+    final_args = resolve_argv(pack, sized_args)
+    return final_args
+
+
 def test_scaler_enabled(multipack, config):
     from milabench.config import system_global
     import contextvars
@@ -83,12 +90,13 @@ def update_ctx():
         )
         sizer_global.set(sizer)
         system = system_global.get()
-        system["gpu"]["capacity"] = "41920 MiB"
+        gpu = system.setdefault("gpu", dict())
+        gpu["capacity"] = "41920 MiB"
 
     ctx.run(update_ctx)
 
     for k, pack in multipack.packs.items():
-        assert ctx.run(lambda: pack.argv) == ["--batch_size", "232"]
+        assert ctx.run(lambda: fakeexec(pack)) == ["--batch_size", "232"]
 
         # Sizer is only enabled inside the context
-        assert pack.argv == []
+        assert fakeexec(pack) == []
diff --git a/tests/test_summary/test_report.txt b/tests/test_summary/test_report.txt
@@ -2,8 +2,8 @@ Source: XXX
 =================
 Benchmark results
 =================
-        fail n       perf   sem%   std% peak_memory       score weight
-benchio    0 4    7979.82   2.9%  17.2%          -1  7979.81831   2.00
+bench                          | fail | n |       perf |   sem% |   std% | peak_memory |      score | weight
+benchio                        |    0 | 4 |    7979.82 |   2.9% |  17.2% |          -1 |    7979.82 |  2.00
 
 Scores
 ------

diff --git a/tests/test_summary/test_report_folder_does_average.txt b/tests/test_summary/test_report_folder_does_average.txt
@@ -2,8 +2,8 @@ Source: XXX
 =================
 Benchmark results
 =================
-        fail n       perf   sem%   std% peak_memory        score weight
-benchio    0 6    7878.45   2.5%  18.0%       24456  7878.451302   2.00
+bench                          | fail | n |       perf |   sem% |   std% | peak_memory |      score | weight
+benchio                        |    0 | 6 |    7878.45 |   2.5% |  18.0% |       24456 |    7878.45 |  2.00
 
 Scores
 ------
-Original file line number
+Diff line change
@@ Expand Up / @@ -60,6 +60,7 @@ install_prepare() { @@
         #
         #   Generate/download datasets, download models etc...
+        #
         milabench prepare
     }
@@ Expand Down @@