Skip to content

Commit

Permalink
Do a mock run where everything except run is executed
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Jul 8, 2024
1 parent dd7f388 commit 3ce28f7
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 8 deletions.
24 changes: 19 additions & 5 deletions benchmate/benchmate/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from collections import defaultdict
from pathlib import Path

import torchcompat.core as acc
import torch
from tqdm import tqdm

Expand Down Expand Up @@ -79,26 +80,39 @@ def generate_sets(root, sets, shape):
json.dump(sets, fp)


def device_count():
try:
return acc.device_count()
except:
return 1

def generate_fakeimagenet():
# config = json.loads(os.environ["MILABENCH_CONFIG"])

parser = argparse.ArgumentParser()
parser.add_argument("--batch-size", default=512, type=int)
parser.add_argument("--batch-count", default=60, type=int)
parser.add_argument("--device-count", default=device_count(), type=int)
parser.add_argument("--image-size", default=[3, 384, 384], type=int, nargs="+")
parser.add_argument("--val", default=0.1, type=float, nargs="+")
parser.add_argument("--test", default=0.1, type=float, nargs="+")

args, _ = parser.parse_known_args()

data_directory = os.environ["MILABENCH_DIR_DATA"]
dest = os.path.join(data_directory, "FakeImageNet")

dest = os.path.join(data_directory, f"FakeImageNet")
print(f"Generating fake data into {dest}...")

total_images = args.batch_size * args.batch_count
total_images = args.batch_size * args.batch_count * args.device_count
size_spec = {
"train": total_images,
"val": int(total_images * args.val),
"test": int(total_images * args.test),
f"train": total_images,
f"val": int(total_images * args.val),
f"test": int(total_images * args.test),
}



generate_sets(dest, size_spec, args.image_size)
print("Done!")

Expand Down
17 changes: 17 additions & 0 deletions benchmate/benchmate/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import os
from collections import defaultdict
import math

import torch
from torch.utils.data.distributed import DistributedSampler


def no_transform(args):
Expand Down Expand Up @@ -48,3 +52,16 @@ def __getitem__(self, item):

def __len__(self):
return len(self.clip)


class ExclusiveSetSampler(DistributedSampler):
def __init__(self, dataset, num_sets: int, set_id: int, shuffle: bool = True, seed: int = 0, drop_last: bool = False) -> None:
super().__init__(
dataset,
num_replicas=num_sets,
rank=set_id,
shuffle=shuffle,
seed=seed,
drop_last=drop_last
)

6 changes: 3 additions & 3 deletions milabench/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""This file is generated, do not modify"""

__tag__ = "v0.1.0-20-gf2cc75f8"
__commit__ = "f2cc75f8a4728dcac223c91153119b43fec05698"
__date__ = "2024-07-03 12:37:48 -0400"
__tag__ = "v0.1.0-24-gdd7f3888"
__commit__ = "dd7f3888ac0524b3b587e415d1de0e2019cd751f"
__date__ = "2024-07-03 16:07:47 -0400"
52 changes: 52 additions & 0 deletions milabench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,55 @@ def enumerate_rank(nodes):
else:
yield rank, node
rank += 1


def get_available_ram(leeway=1024):
import os
import psutil
try:
# Note: if slurm does not give us access to the entire RAM we wont be able
# to do much
if jobid := os.getenv("SLURM_JOB_ID", None):
filename = f"/sys/fs/cgroup/system.slice/slurmstepd.scope/job_{jobid}"
mem_max = filename + "/memory.max"
mem_cur = filename + "/memory.current"
with open(mem_max, "r") as fp:
mem_max = int(fp.read())
with open(mem_cur, "r") as fp:
mem_cur = int(fp.read())
return (int(mem_max) - int(mem_cur) - leeway)
except Exception as err:
vm = psutil.virtual_memory()
return vm.available - leeway


def fill_ram():
#
# This takes too long to be a viable option
#
import numpy as np
available = get_available_ram()
array = np.zeros((available,), dtype=np.int8)
# unless we use it it wont allocate it
array.fill(1)
del array


def empty_cache():
"""Empty ram cache before a bench"""
import subprocess

# make users able to manage cgroup
# $SUDO chmod 777 -R /sys/fs/cgroup/*

# make sysctl executable by user, for cache cleaning
# $SUDO chmod u+s /sbin/sysctl

try:
# finish on the fly writes
subprocess.run(["sync"])
# Drop the cache
subprocess.run(["sysctl", "vm.drop_caches=3"])
subprocess.run(["sudo", "sysctl", "vm.drop_caches=3"])
except Exception:
pass
44 changes: 44 additions & 0 deletions tests/test_mock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

import milabench.commands.executors

import traceback
from pytest import fixture


@fixture
def args(standard_config, tmp_path):
return [
"--base", str(tmp_path),
"--config", str(standard_config)
]


async def mock_exec(command, phase="run", timeout=False, timeout_delay=600, **kwargs):
return [0]


def run_cli(*args):
from milabench.cli import main

print(" ".join(args))
try:
main(args)
except SystemExit as exc:
assert not exc.code


def test_milabench(monkeypatch, args):
monkeypatch.setenv("MILABENCH_GPU_ARCH", "cuda")
monkeypatch.setattr(milabench.commands, "execute_command", mock_exec)

run_cli("install", *args)

run_cli("prepare", *args)

#
# use Mock GPU-SMI
#
monkeypatch.setenv("MILABENCH_GPU_ARCH", "mock")
from milabench.cli.dry import assume_gpu
with assume_gpu(8):
run_cli("run", *args, "--no-report")

0 comments on commit 3ce28f7

Please sign in to comment.