diff --git a/.pin/constraints-hpu-torch.txt b/.pin/constraints-hpu-torch.txt
index 20f5f2672..de77f3bfd 100644
--- a/.pin/constraints-hpu-torch.txt
+++ b/.pin/constraints-hpu-torch.txt
@@ -587,7 +587,7 @@ urllib3==1.26.18
     #   torchx
 varname==0.10.0
     # via giving
-voir==0.2.14
+voir==0.2.15
     # via
     #   -c .pin/../constraints/hpu.txt
     #   -r benchmarks/accelerate_opt/requirements.in
diff --git a/.pin/constraints-xpu-torch.txt b/.pin/constraints-xpu-torch.txt
index 1d12ca32a..1ffcf44a1 100644
--- a/.pin/constraints-xpu-torch.txt
+++ b/.pin/constraints-xpu-torch.txt
@@ -592,11 +592,7 @@ urllib3==1.26.18
     #   torchx
 varname==0.10.0
     # via giving
-<<<<<<< HEAD
 voir==0.2.15
-=======
-voir==0.2.14
->>>>>>> baa6757f78c08eb64ed139ebec96250f9ef6f180
     # via
     #   -c .pin/../constraints/xpu.txt
     #   -r benchmarks/accelerate_opt/requirements.in
diff --git a/benchmarks/brax/requirements.cuda.txt b/benchmarks/brax/requirements.cuda.txt
index 5e7dc7c3d..38a54509a 100644
--- a/benchmarks/brax/requirements.cuda.txt
+++ b/benchmarks/brax/requirements.cuda.txt
@@ -432,7 +432,7 @@ varname==0.10.0
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
     #   giving
-voir==0.2.14
+voir==0.2.15
     # via
     #   -c .pin/../.pin/constraints-cuda-torch.txt
     #   -c .pin/../constraints/cuda.txt
diff --git a/benchmarks/brax/requirements.hpu.txt b/benchmarks/brax/requirements.hpu.txt
index ed3084061..d79e7242c 100644
--- a/benchmarks/brax/requirements.hpu.txt
+++ b/benchmarks/brax/requirements.hpu.txt
@@ -431,7 +431,7 @@ varname==0.10.0
     # via
     #   -c .pin/../.pin/constraints-hpu-torch.txt
     #   giving
-voir==0.2.14
+voir==0.2.15
     # via
     #   -c .pin/../.pin/constraints-hpu-torch.txt
     #   -c .pin/../constraints/hpu.txt
diff --git a/benchmarks/brax/requirements.rocm.txt b/benchmarks/brax/requirements.rocm.txt
index 6e1503248..c77018b5e 100644
--- a/benchmarks/brax/requirements.rocm.txt
+++ b/benchmarks/brax/requirements.rocm.txt
@@ -417,7 +417,7 @@ varname==0.10.0
     # via
     #   -c .pin/../.pin/constraints-rocm-torch.txt
     #   giving
-voir==0.2.14
+voir==0.2.15
     # via
     #   -c .pin/../.pin/constraints-rocm-torch.txt
     #   -c .pin/../constraints/rocm.txt
diff --git a/benchmarks/brax/requirements.xpu.txt b/benchmarks/brax/requirements.xpu.txt
index 41b63f8a5..75a03f5aa 100644
--- a/benchmarks/brax/requirements.xpu.txt
+++ b/benchmarks/brax/requirements.xpu.txt
@@ -433,7 +433,7 @@ varname==0.10.0
     # via
     #   -c .pin/../.pin/constraints-xpu-torch.txt
     #   giving
-voir==0.2.14
+voir==0.2.15
     # via
     #   -c .pin/../.pin/constraints-xpu-torch.txt
     #   -c .pin/../constraints/xpu.txt
diff --git a/benchmate/benchmate/metrics.py b/benchmate/benchmate/metrics.py
index 53bef1ad1..64f4712c2 100644
--- a/benchmate/benchmate/metrics.py
+++ b/benchmate/benchmate/metrics.py
@@ -230,6 +230,7 @@ def __init__(
         self.raise_stop_program = raise_stop_program # Does TimedIterator raise StopProgram
         self.profile_instrumentation = False
         self.overhead = []
+        self.previous_overhead = 0
         self.loader_init_time = []
         self.sub_overhead = 0
     
@@ -268,7 +269,7 @@ def wrapped(self, iterator):
                 end.record()
 
                 bs = self.deduce_batch_size(data)
-                self.events.append((start, end, bs, self.overhead[-1]))
+                self.events.append((start, end, bs, self.previous_overhead))
 
                 # Log progress so it looks somewhat responsive
                 self.log_progress()
@@ -279,7 +280,9 @@ def wrapped(self, iterator):
                     break
 
                 start = end
-                self.overhead.append(ct.elapsed())
+
+            self.previous_overhead = ct.elapsed()
+            self.overhead.append(self.previous_overhead)
 
         self._push()
         self.earlystop()
@@ -337,6 +340,7 @@ def _push_profile_metrics(self):
 
             for iterinit in self.loader_init_time:
                 self.message(__iter__=iterinit, units="s", task=self.task)
+        self.previous_overhead = 0
         self.overhead = []
         self.loader_init_time = []
     
diff --git a/constraints/cuda.txt b/constraints/cuda.txt
index 56109d809..41588f46a 100644
--- a/constraints/cuda.txt
+++ b/constraints/cuda.txt
@@ -2,5 +2,5 @@
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
\ No newline at end of file
diff --git a/constraints/hpu.txt b/constraints/hpu.txt
index 1dba3a1ee..3cc36920d 100644
--- a/constraints/hpu.txt
+++ b/constraints/hpu.txt
@@ -3,6 +3,6 @@
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
 
diff --git a/constraints/rocm.txt b/constraints/rocm.txt
index 1bf0919e8..cdde4e6a1 100644
--- a/constraints/rocm.txt
+++ b/constraints/rocm.txt
@@ -2,5 +2,5 @@
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
\ No newline at end of file
diff --git a/constraints/xpu.txt b/constraints/xpu.txt
index d0cf6bdac..8b8b39db7 100644
--- a/constraints/xpu.txt
+++ b/constraints/xpu.txt
@@ -10,8 +10,9 @@ torchaudio>=2.1.0a0
 intel-extension-for-pytorch>=2.1.10+xpu
 oneccl_bind_pt==2.1.100+xpu
 intel-extension-for-pytorch-deepspeed>=2.1.30
+intel-extension-for-openxla>=0.3.0
 
 #
 #
-voir > 0.2.10
+voir >= 0.2.15
 torchcompat >= 1.0.0
\ No newline at end of file
diff --git a/milabench/_version.py b/milabench/_version.py
index 57c79d91e..ced2a5852 100644
--- a/milabench/_version.py
+++ b/milabench/_version.py
@@ -1,5 +1,5 @@
 """This file is generated, do not modify"""
 
-__tag__ = "v0.0.6-140-g57343f1"
-__commit__ = "57343f10ef2b4ce598011ee308ebd06b4c654495"
-__date__ = "2024-06-10 11:52:37 -0400"
+__tag__ = "v0.0.10-145-gc151b985"
+__commit__ = "c151b98546f32d9c0671507f8526ed13598e3407"
+__date__ = "2024-06-11 14:30:04 -0400"
diff --git a/milabench/sizer.py b/milabench/sizer.py
index 5c206b7a8..bd0bc82c6 100644
--- a/milabench/sizer.py
+++ b/milabench/sizer.py
@@ -284,6 +284,9 @@ def resolve_argv(pack, argv):
         "cpu": "gloo"
     }
 
+    if device_count <= 0:
+        device_count = 1
+
     context["arch"] = arch
     context["ccl"] = ccl.get(arch, "gloo")
     context["cpu_count"] = multiprocessing.cpu_count()
diff --git a/scripts/article/run_hpu.sh b/scripts/article/run_hpu.sh
index c732b09a4..f6add4850 100644
--- a/scripts/article/run_hpu.sh
+++ b/scripts/article/run_hpu.sh
@@ -60,6 +60,7 @@ install_prepare() {
 
     #
     #   Generate/download datasets, download models etc...
+    #
     milabench prepare
 }
 
diff --git a/scripts/article/run_rocm.sh b/scripts/article/run_rocm.sh
index eaafd522f..819374e66 100644
--- a/scripts/article/run_rocm.sh
+++ b/scripts/article/run_rocm.sh
@@ -27,16 +27,26 @@ install_prepare() {
     #
     milabench install
 
+    #
+    # Override/add package to milabench venv here
+    #
     which pip
+    # pip install ...
 
     (
         . $BENCHMARK_VENV/bin/activate
+
+        #
+        # Override/add package to the benchmark venv here
+        #
         which pip
+        pip uninstall torch torchvision torchaudio
         pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
     )
 
     #
     #   Generate/download datasets, download models etc...
+    #
     milabench prepare
 }
 
diff --git a/scripts/article/run_xpu.sh b/scripts/article/run_xpu.sh
index 9e51b0bc0..86c741107 100644
--- a/scripts/article/run_xpu.sh
+++ b/scripts/article/run_xpu.sh
@@ -44,6 +44,7 @@ install_prepare() {
 
     #
     #   Generate/download datasets, download models etc...
+    #
     milabench prepare
 }
 
diff --git a/tests/benchmate/test_timed_iterator.py b/tests/benchmate/test_timed_iterator.py
new file mode 100644
index 000000000..0c581a704
--- /dev/null
+++ b/tests/benchmate/test_timed_iterator.py
@@ -0,0 +1,55 @@
+import time
+
+import pytest
+
+from benchmate.metrics import TimedIterator, StopProgram
+
+
+class CPUEvent:
+    def __init__(self, **kwargs):
+        self.start = 0
+
+    def record(self):
+        self.start = time.time()
+
+    def elapsed_time(self, end):
+        # shoudl return ms
+        return (end.start - self.start) * 1000
+
+    def synchronize(self):
+        pass
+
+
+def test_wrapper():
+    batch = [1, 2]
+    process_time = 0.1
+
+    iterable = [(batch, 3) for i in range(10)]
+    messages = []
+
+    def push(**kwargs):
+        nonlocal messages
+        messages.append(kwargs)
+
+    loader = TimedIterator(
+        iterable, event_fn=CPUEvent, earlystop=50, raise_stop_program=True, push=push
+    )
+
+    with pytest.raises(StopProgram):
+        for e in range(200):
+            for i in loader:
+                time.sleep(process_time)
+
+    assert len(messages) == 117
+
+    rate_acc = 0
+    rate_count = 0
+    for msg in messages:
+        if rate := msg.get("rate"):
+            rate_acc += rate
+            rate_count += 1
+
+    assert rate_count == 50, "Program should stop once we reached the necessary count"
+    assert (
+        abs((rate_acc / rate_count) - len(batch) / process_time) < 0.5
+    ), "Computed rate should be close to theorical rate"
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 2ff6efc34..10e2d28f6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,6 @@
 import os
 from pathlib import Path
+import stat
 
 import voir.instruments.gpu as voirgpu
 
@@ -88,6 +89,18 @@ def set_env():
     os.environ["MILABENCH_DASH"] = "no"
     os.environ["MILABENCH_GPU_ARCH"] = backend
 
+    #
+    # milabench expects voir to be installed in the bench venv
+    # we fake one to use the one we have in the current env
+    os.makedirs("output/venv/benchio/bin/", exist_ok=True)
+    voirexec = "output/venv/benchio/bin/voir"
+    with open(voirexec, "w") as fp:
+        fp.write("#!/bin/bash\n")
+        fp.write("python -m voir \"$@\"")
+
+    current_permissions = stat.S_IMODE(os.lstat(voirexec).st_mode)
+    os.chmod(voirexec, current_permissions | (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))
+
     if backend == "mock":
         oldsmi = voirgpu.DEVICESMI
         voirgpu.DEVICESMI = MockDeviceSMI()
diff --git a/tests/test_capabilities.py b/tests/test_capabilities.py
index 468ef0712..6fa7236f0 100644
--- a/tests/test_capabilities.py
+++ b/tests/test_capabilities.py
@@ -21,7 +21,7 @@ def fake_config(n):
 
 def test_capabilties_ok():
     pack = BasePackage(fake_config(10))
-    assert sync_is_system_capable(pack)
+    assert sync_is_system_capable(pack) is True
 
 
 def test_capabilties_not_ok():
diff --git a/tests/test_executors.py b/tests/test_executors.py
index 4a4bf598a..c6837b005 100644
--- a/tests/test_executors.py
+++ b/tests/test_executors.py
@@ -197,7 +197,7 @@ def test_njobs_gpus_executor():
     acc = 0
     for r in proceed(njobs.execute()):
         if r.event == "start":
-            assert r.data["command"][0] == "torchrun"
+            assert r.data["command"][0].endswith("torchrun")
         acc += 1
         print(r)
 
@@ -218,7 +218,7 @@ def test_njobs_gpu_executor():
         print(r)
 
         if r.event == "start":
-            assert r.data["command"][0] == "voir"
+            assert r.data["command"][0].endswith("voir")
 
         acc += 1
 
diff --git a/tests/test_scaler.py b/tests/test_scaler.py
index 283048c8b..5d8d561b4 100644
--- a/tests/test_scaler.py
+++ b/tests/test_scaler.py
@@ -66,6 +66,13 @@ def test_scaler_disabled(multipack):
         assert pack.argv == []
 
 
+def fakeexec(pack):
+    from milabench.sizer import resolve_argv, scale_argv
+    sized_args = scale_argv(pack, pack.argv)
+    final_args = resolve_argv(pack, sized_args)
+    return final_args
+
+
 def test_scaler_enabled(multipack, config):
     from milabench.config import system_global
     import contextvars
@@ -83,12 +90,13 @@ def update_ctx():
         )
         sizer_global.set(sizer)
         system = system_global.get()
-        system["gpu"]["capacity"] = "41920 MiB"
+        gpu = system.setdefault("gpu", dict())
+        gpu["capacity"] = "41920 MiB"
 
     ctx.run(update_ctx)
 
     for k, pack in multipack.packs.items():
-        assert ctx.run(lambda: pack.argv) == ["--batch_size", "232"]
+        assert ctx.run(lambda: fakeexec(pack)) == ["--batch_size", "232"]
 
         # Sizer is only enabled inside the context
-        assert pack.argv == []
+        assert fakeexec(pack) == []
diff --git a/tests/test_summary/test_report.txt b/tests/test_summary/test_report.txt
index 7d8474e01..937a59561 100644
--- a/tests/test_summary/test_report.txt
+++ b/tests/test_summary/test_report.txt
@@ -2,8 +2,8 @@ Source: XXX
 =================
 Benchmark results
 =================
-        fail n       perf   sem%   std% peak_memory       score weight
-benchio    0 4    7979.82   2.9%  17.2%          -1  7979.81831   2.00
+bench                          | fail | n |       perf |   sem% |   std% | peak_memory |      score | weight
+benchio                        |    0 | 4 |    7979.82 |   2.9% |  17.2% |          -1 |    7979.82 |  2.00
 
 Scores
 ------
diff --git a/tests/test_summary/test_report_folder_does_average.txt b/tests/test_summary/test_report_folder_does_average.txt
index 3cc299dbf..5abe96e68 100644
--- a/tests/test_summary/test_report_folder_does_average.txt
+++ b/tests/test_summary/test_report_folder_does_average.txt
@@ -2,8 +2,8 @@ Source: XXX
 =================
 Benchmark results
 =================
-        fail n       perf   sem%   std% peak_memory        score weight
-benchio    0 6    7878.45   2.5%  18.0%       24456  7878.451302   2.00
+bench                          | fail | n |       perf |   sem% |   std% | peak_memory |      score | weight
+benchio                        |    0 | 6 |    7878.45 |   2.5% |  18.0% |       24456 |    7878.45 |  2.00
 
 Scores
 ------