diff --git a/milabench/testing.py b/milabench/testing.py
index 9dcd455ee..06d47ad67 100644
--- a/milabench/testing.py
+++ b/milabench/testing.py
@@ -11,6 +11,26 @@
 from milabench.utils import multilogger, validation_layers
 
 
+here = Path(__file__).parent
+
+
+def official_config(name):
+    p = here / ".." / "config" / f"{name}.yaml"
+
+    if p.exists():
+        return p.resolve()
+    
+    raise FileNotFoundError(f"{p} does not exist") 
+
+
+def resolved_config(name):
+    from .config import build_config
+
+    p = official_config(name)
+
+    return build_config(p)
+
+
 class ReplayPackage(BasePackage):
     """Disable some folder creation for replay purposes"""
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 3d021c570..fb0391830 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,6 +2,8 @@
 import stat
 from pathlib import Path
 
+
+from milabench.testing import official_config
 import pytest
 import voir.instruments.gpu as voirgpu
 
@@ -21,20 +23,17 @@ def runs_folder():
 def config():
     def get_config(name):
         return here / "config" / f"{name}.yaml"
-
     return get_config
 
 
-
-@pytest.fixture
-def official_config():
-    def get_config(name):
-        return here / ".." / "config" / f"{name}.yaml"
-    return get_config
+@pytest.fixture(scope='module')
+def module_tmp_dir():
+    import tempfile
+    yield tempfile.mkdtemp()
 
 
-@pytest.fixture
-def standard_config(official_config):
+@pytest.fixture(scope='module')
+def standard_config():
     return official_config("standard")
 
 
diff --git a/tests/test_command_reg/test_command_reg_one_node.txt b/tests/test_command_reg/test_command_reg_one_node.txt
index 7c4c59d27..1390e87e8 100644
--- a/tests/test_command_reg/test_command_reg_one_node.txt
+++ b/tests/test_command_reg/test_command_reg_one_node.txt
@@ -15,7 +15,7 @@ export MILABENCH_DIR_DATA=$BASE/data
 export MILABENCH_DIR_RUNS=$BASE/runs
 export MILABENCH_DIR_EXTRA=$BASE/extra/llm
 export MILABENCH_DIR_CACHE=$BASE/cache
-export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/tests/../config", "config_file": "$SRC/milabench/tests/../config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}'
+export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}'
 export OMP_NUM_THREADS=8
 
 echo "---"
diff --git a/tests/test_command_reg/test_command_reg_two_nodes.txt b/tests/test_command_reg/test_command_reg_two_nodes.txt
index 6eda9ea72..3ecda3b0f 100644
--- a/tests/test_command_reg/test_command_reg_two_nodes.txt
+++ b/tests/test_command_reg/test_command_reg_two_nodes.txt
@@ -15,7 +15,7 @@ export MILABENCH_DIR_DATA=$BASE/data
 export MILABENCH_DIR_RUNS=$BASE/runs
 export MILABENCH_DIR_EXTRA=$BASE/extra/llm
 export MILABENCH_DIR_CACHE=$BASE/cache
-export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}, {"ip": "192.168.0.11", "main": false, "name": "1", "port": 22, "user": "username", "hostname": "192.168.0.11", "aliaslist": [], "ipaddrlist": ["192.168.0.11"], "local": false}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/tests/../config", "config_file": "$SRC/milabench/tests/../config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}'
+export MILABENCH_CONFIG='{"system": {"arch": "cuda", "sshkey": null, "nodes": [{"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}, {"ip": "192.168.0.11", "main": false, "name": "1", "port": 22, "user": "username", "hostname": "192.168.0.11", "aliaslist": [], "ipaddrlist": ["192.168.0.11"], "local": false}], "self": {"ip": "127.0.0.1", "main": true, "name": "0", "port": 22, "user": "username", "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["127.0.0.1"], "local": true}}, "dirs": {"base": "$BASE", "venv": "$BASE/venv/torch", "data": "$BASE/data", "runs": "$BASE/runs", "extra": "$BASE/extra/llm", "cache": "$BASE/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "dev", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "$SRC/milabench/config", "config_file": "$SRC/milabench/config/standard.yaml", "definition": "$SRC/milabench/benchmarks/llama", "tags": ["inference", "llm", "nlp"], "plan": {"method": "per_gpu"}, "weight": 1.0, "name": "llama", "tag": ["llama"]}'
 export OMP_NUM_THREADS=8
 
 echo "---"
diff --git a/tests/test_mock.py b/tests/test_mock.py
index 69ab8e2ca..1b9cd7cd7 100644
--- a/tests/test_mock.py
+++ b/tests/test_mock.py
@@ -1,20 +1,11 @@
+from contextlib import contextmanager
+import os
 
+import milabench.alt_async
 import milabench.commands.executors
+from milabench.testing import resolved_config
 
-import traceback
-from pytest import fixture
-
-
-@fixture
-def args(standard_config, tmp_path):
-    return [
-        "--base", str(tmp_path),
-        "--config", str(standard_config)
-    ]
-
-
-async def mock_exec(command, phase="run", timeout=False, timeout_delay=600, **kwargs):
-    return [0]
+import pytest
 
 
 def run_cli(*args):
@@ -27,18 +18,72 @@ def run_cli(*args):
         assert not exc.code
 
 
-def test_milabench(monkeypatch, args):
-    monkeypatch.setenv("MILABENCH_GPU_ARCH", "cuda")
-    monkeypatch.setattr(milabench.commands, "execute_command", mock_exec)
+def benchlist(enabled=True):
+    standard = resolved_config("standard")
+
+    for key, value in standard.items():
+        if value.get("enabled", False):
+            if key[0] != "_":
+                yield key
+
+
+# We want to reuse this fixtures for each bench
+# so we do not run some steps multiple times
+@pytest.fixture(scope='module')
+def args(standard_config, module_tmp_dir):
+    return [
+        "--base", str(module_tmp_dir),
+        "--config", str(standard_config)
+    ]
 
-    run_cli("install", *args)
 
-    run_cli("prepare", *args)
+def mock_voir_run(argv, info, timeout=None, constructor=None, env=None, **options):
+    from voir.proc import Multiplexer
+    mp = Multiplexer(timeout=timeout, constructor=constructor)
+    mp.start(["sleep", "1"], info=info, env=env, **options)
+    return mp
+
+
+def count_file_like(path, name):
+    try:
+        acc = 0
+        for file in os.listdir(path + "/runs"):
+            if file.startswith(name):
+                acc += 1
+        return acc
+    except FileNotFoundError:
+        return 0
+
+
+@contextmanager
+def filecount_inc(path, name):
+    """Check that a new file was created after running"""
+    old = count_file_like(path, name)
+    yield
+    new = count_file_like(path, name)
+
+    assert new == old + 1
+
+
+@pytest.mark.parametrize("bench", benchlist())
+def test_milabench(monkeypatch, args, bench, module_tmp_dir):
+    from milabench.cli.dry import assume_gpu
+
+    monkeypatch.setenv("MILABENCH_GPU_ARCH", "cuda")
+    
+    with filecount_inc(module_tmp_dir, "install"):
+        run_cli("install", *args, "--select", bench)
+
+    with filecount_inc(module_tmp_dir, "prepare"):
+        run_cli("prepare", *args, "--select", bench)
 
     #
     # use Mock GPU-SMI
     #
-    monkeypatch.setenv("MILABENCH_GPU_ARCH", "mock")
-    from milabench.cli.dry import assume_gpu
-    with assume_gpu(8):
-        run_cli("run", *args, "--no-report")
+    with monkeypatch.context() as ctx:
+        ctx.setattr(milabench.alt_async, "voir_run", mock_voir_run)
+        ctx.setenv("MILABENCH_GPU_ARCH", "mock")
+
+        with filecount_inc(module_tmp_dir, bench):
+            with assume_gpu(8):
+                run_cli("run", *args, "--no-report", "--select", bench, "--run-name", str(bench))