From 3d2718031897bcc5eac93e1304442ba528f5840c Mon Sep 17 00:00:00 2001
From: Pierre Delaunay <pierre@delaunay.io>
Date: Thu, 21 Nov 2024 13:04:41 -0500
Subject: [PATCH] Revert bad commit

---
 benchmarks/diffusion/main.py                |  5 --
 benchmarks/flops/benchfile.py               | 14 ++----
 benchmarks/flops/dev.yaml                   | 56 ---------------------
 benchmarks/flops/dev/extra/flops/mark_torch |  0
 benchmarks/flops/main.py                    |  3 +-
 benchmarks/flops/requirements.cpu.txt       |  5 --
 benchmarks/flops/simple.sh                  | 13 -----
 benchmarks/geo_gnn/modelsize.py             | 36 -------------
 benchmarks/purejaxrl/dqn.py                 |  5 --
 benchmarks/purejaxrl/ppo.py                 |  6 ---
 benchmarks/recursiongfn/main.py             |  4 +-
 benchmarks/torchatari/main.py               |  3 --
 benchmate/benchmate/models.py               | 36 -------------
 benchmate/benchmate/monitor.py              | 35 ++++---------
 milabench/_version.py                       |  6 +--
 milabench/commands/executors.py             |  3 --
 milabench/pack.py                           |  2 -
 milabench/report.py                         |  4 +-
 18 files changed, 21 insertions(+), 215 deletions(-)
 delete mode 100644 benchmarks/flops/dev.yaml
 delete mode 100644 benchmarks/flops/dev/extra/flops/mark_torch
 delete mode 100644 benchmarks/flops/requirements.cpu.txt
 delete mode 100644 benchmarks/flops/simple.sh
 delete mode 100644 benchmarks/geo_gnn/modelsize.py
 delete mode 100644 benchmate/benchmate/models.py

diff --git a/benchmarks/diffusion/main.py b/benchmarks/diffusion/main.py
index c5b7757ee..0bcb67d50 100755
--- a/benchmarks/diffusion/main.py
+++ b/benchmarks/diffusion/main.py
@@ -57,11 +57,6 @@ def models(accelerator, args: Arguments):
     unet = UNet2DConditionModel.from_pretrained(
         args.model, subfolder="unet", revision=args.revision, variant=args.variant
     )
-    
-    from benchmate.models import model_size
-    print(model_size(unet))
-    print(model_size(encoder))
-    print(model_size(vae))
 
     vae.requires_grad_(False)
     encoder.requires_grad_(False)
diff --git a/benchmarks/flops/benchfile.py b/benchmarks/flops/benchfile.py
index 3090a3c75..59c5c4a7f 100644
--- a/benchmarks/flops/benchfile.py
+++ b/benchmarks/flops/benchfile.py
@@ -8,15 +8,11 @@ class FlopsBenchmarch(Package):
 
     def build_run_plan(self) -> "Command":
         import milabench.commands as cmd
-        main = self.dirs.code / self.main_script
+
         pack = cmd.PackCommand(self, *self.argv, lazy=True)
-            
-        use_stdout = True
-        
-        if use_stdout:
-            return pack.use_stdout()
-        else:
-            pack = cmd.VoirCommand(pack, cwd=main.parent)
-            return pack
+        # pack = cmd.VoirCommand(pack, cwd=main.parent)
+        pack = cmd.ActivatorCommand(pack)
+        return pack.use_stdout()
+
 
 __pack__ = FlopsBenchmarch
diff --git a/benchmarks/flops/dev.yaml b/benchmarks/flops/dev.yaml
deleted file mode 100644
index 5106bf407..000000000
--- a/benchmarks/flops/dev.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-
-
-_flops:
-  inherits: _defaults
-  definition: .
-  group: flops
-  install-variant: unpinned
-  install_group: torch
-  plan:
-    method: per_gpu
-  
-  tags:
-    - diagnostic
-    - flops
-    - monogpu
-    - nobatch
-  
-  argv:
-    --number: 30
-    --repeat: 90
-
-
-fp16:
-  inherits: _flops
-
-  argv:
-    --number: 30
-    --repeat: 10
-    --m: 8192
-    --n: 8192
-    --dtype: fp16
-
-bf16:
-  inherits: _flops
- 
-  argv:
-    --m: 8192
-    --n: 8192
-    --dtype: bf16
-
-tf32:
-  inherits: _flops
- 
-  argv:
-    --m: 8192
-    --n: 8192
-    --dtype: fp32
-    --tf32: true
-
-fp32:
-  inherits: _flops
- 
-  argv:
-    --m: 256
-    --n: 256
-    --dtype: fp32  
diff --git a/benchmarks/flops/dev/extra/flops/mark_torch b/benchmarks/flops/dev/extra/flops/mark_torch
deleted file mode 100644
index e69de29bb..000000000
diff --git a/benchmarks/flops/main.py b/benchmarks/flops/main.py
index ba03518a6..e4f05c178 100755
--- a/benchmarks/flops/main.py
+++ b/benchmarks/flops/main.py
@@ -109,12 +109,11 @@ def main():
 
     log, monitor = setupvoir()
 
-    # FIXME
-    #with monitor:
     f(args.number, args.repeat, args.m, args.n, TERA, dtypes[args.dtype], log)
 
     monitor.stop()
 
+
 if __name__ == "__main__":
     main()
     print("done")
diff --git a/benchmarks/flops/requirements.cpu.txt b/benchmarks/flops/requirements.cpu.txt
deleted file mode 100644
index 88f8b61e5..000000000
--- a/benchmarks/flops/requirements.cpu.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-torch
-torchvision
-torchcompat
-tqdm
-voir
diff --git a/benchmarks/flops/simple.sh b/benchmarks/flops/simple.sh
deleted file mode 100644
index 3f54d4243..000000000
--- a/benchmarks/flops/simple.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-
-
-export MILABENCH_BASE="$(pwd)/dev"
-export MILABENCH_CONFIG="$(pwd)/dev.yaml"
-
-
-milabench install  --select fp32
-
-milabench prepare  --select fp32
-
-milabench run --select fp32
diff --git a/benchmarks/geo_gnn/modelsize.py b/benchmarks/geo_gnn/modelsize.py
deleted file mode 100644
index 0b65655a2..000000000
--- a/benchmarks/geo_gnn/modelsize.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from torch_geometric.nn.models import PNA as _PNA, DimeNet as _DimeNet
-
-import torch
-
-from benchmate.models import model_size
-
-
-print(model_size(_DimeNet(
-    hidden_channels=64,
-    out_channels=1,
-    num_blocks=6,
-    num_bilinear=8,
-    num_spherical=7,
-    num_radial=6,
-    cutoff=10.0,
-    envelope_exponent=5,
-    num_before_skip=1,
-    num_after_skip=2,
-    num_output_layers=3,
-)
-
-))
-
-print(model_size(
-_PNA(
-    # Basic GCNN setup
-    in_channels=1, 
-    out_channels=1,
-    hidden_channels=64,
-    num_layers=64,
-    # https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.PNAConv.html
-    aggregators=['mean', 'min', 'max', 'std'],
-    scalers=['identity', 'amplification', 'attenuation'],
-    # Histogram of in-degrees of nodes in the training set, used by scalers to normalize
-    deg=torch.tensor(4),
-)))
\ No newline at end of file
diff --git a/benchmarks/purejaxrl/dqn.py b/benchmarks/purejaxrl/dqn.py
index 85e9e8b5d..fc0a97b8d 100644
--- a/benchmarks/purejaxrl/dqn.py
+++ b/benchmarks/purejaxrl/dqn.py
@@ -98,11 +98,6 @@ def train(rng):
         init_x = jnp.zeros(env.observation_space(env_params).shape)
         network_params = network.init(_rng, init_x)
 
-
-        param_count = sum(x.size for x in jax.tree.leaves(network_params))
-        print("PARAM COUNT", param_count)
-        
-        
         def linear_schedule(count):
             frac = 1.0 - (count / config["NUM_UPDATES"])
             return config["LR"] * frac
diff --git a/benchmarks/purejaxrl/ppo.py b/benchmarks/purejaxrl/ppo.py
index a70e195ea..0cc8896cc 100644
--- a/benchmarks/purejaxrl/ppo.py
+++ b/benchmarks/purejaxrl/ppo.py
@@ -107,15 +107,9 @@ def train(rng):
         network = ActorCritic(
             env.action_space(env_params).shape[0], activation=config["ACTIVATION"]
         )
-        
-        
         rng, _rng = jax.random.split(rng)
         init_x = jnp.zeros(env.observation_space(env_params).shape)
         network_params = network.init(_rng, init_x)
-        
-        param_count = sum(x.size for x in jax.tree.leaves(network_params))
-        print("PARAM COUNT", param_count)
-        
         if config["ANNEAL_LR"]:
             tx = optax.chain(
                 optax.clip_by_global_norm(config["MAX_GRAD_NORM"]),
diff --git a/benchmarks/recursiongfn/main.py b/benchmarks/recursiongfn/main.py
index 7099247dc..81d08e8aa 100644
--- a/benchmarks/recursiongfn/main.py
+++ b/benchmarks/recursiongfn/main.py
@@ -92,13 +92,11 @@ def __init__(
         self.num_cond_dim = self.temperature_conditional.encoding_size()
 
     def _load_task_models(self):
-        xdg_cache = os.environ.get("XDG_CACHE_HOME")
+        xdg_cache = os.environ["XDG_CACHE_HOME"]
         model = bengio2021flow.load_original_model(
             cache=True,
             location=Path(os.path.join(xdg_cache, "bengio2021flow_proxy.pkl.gz")),
         )
-        from benchmate.models import model_size
-        print(model_size(model))
         model.to(get_worker_device())
         model = self._wrap_model(model)
         return {"seh": model}
diff --git a/benchmarks/torchatari/main.py b/benchmarks/torchatari/main.py
index 898a8cabe..bf5b7ef65 100644
--- a/benchmarks/torchatari/main.py
+++ b/benchmarks/torchatari/main.py
@@ -201,10 +201,7 @@ def main():
     envs = RecordEpisodeStatistics(envs)
     assert isinstance(envs.action_space, gym.spaces.Discrete), "only discrete action space is supported"
 
-
-    from benchmate.models import model_size
     agent = Agent(envs).to(device)
-    print(model_size(agent))
     optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)
 
     # ALGO Logic: Storage setup
diff --git a/benchmate/benchmate/models.py b/benchmate/benchmate/models.py
deleted file mode 100644
index efd13e2c6..000000000
--- a/benchmate/benchmate/models.py
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-def model_summary(model, input_shape):
-    try:
-        from torchsummary import summary
-        
-        summary(model, input_shape)
-    except:
-        print("Could not print summary")
-
-
-def model_size(model):
-    param_size = 0
-    param_count = 0
-    for param in model.parameters():
-        param_count += param.nelement()
-        param_size += param.nelement() * param.element_size()
-    
-    buffer_size = 0
-    buffer_count = 0
-    for buff in model.buffers():
-        buffer_count += buff.nelement()
-        buffer_size += buff.nelement() * buff.element_size()
-    
-    return {
-        "param": {
-            "count": param_count,
-            "size": param_size / 1024**2,
-            "unit": "MB"
-        },
-        "buffer": {
-            "count": buffer_count,
-            "size": buffer_size / 1024**2,
-            "unit": "MB"
-        }
-    }
diff --git a/benchmate/benchmate/monitor.py b/benchmate/benchmate/monitor.py
index 294d0c88f..0ad34a3d3 100644
--- a/benchmate/benchmate/monitor.py
+++ b/benchmate/benchmate/monitor.py
@@ -13,30 +13,7 @@
 from voir.instruments.io import io_monitor
 from voir.instruments.network import network_monitor
 from voir.instruments.monitor import monitor
-from voir.helpers import current_overseer
 
-from .metrics import sumggle_push, give_push, file_push
-
-
-def auto_push():
-    # use_stdout = int(os.getenv("MILABENCH_USE_STDOUT", 0))
-    mb_managed = int(os.getenv("MILABENCH_MANAGED", 0))
-
-    # Milabench managed: we need to push metrics to it
-    if mb_managed == 1:
-        # Using voir, DATA_FD is defined as well
-        ov = current_overseer.get()
-        if ov is not None:
-            return ov.give
-        
-        # Not using Voir, using structured stdout
-        if int(os.getenv("MILABENCH_USE_STDOUT", 0)) == 1:
-            return sumggle_push()
-
-        raise RuntimeError("Could not find something to push to")
-
-    # Not using milabench; using stdout
-    return file_push()
 
 
 @instrument_definition
@@ -64,10 +41,16 @@ def monitor_node(ov, poll_interval=1, arch=None):
 
 
 def _smuggle_monitor(poll_interval=10, worker_init=None, **monitors):
-    log = auto_push()
-    
+    data_file = SmuggleWriter(sys.stdout)
     def mblog(data):
-        log(**data)
+        nonlocal data_file
+
+        if data_file is not None:
+            try:
+                print(json.dumps(data), file=data_file)
+            except ValueError:
+                pass
+                # print("Is bench ending?, ignoring ValueError")
     
     def get():
         t = time.time()
diff --git a/milabench/_version.py b/milabench/_version.py
index 5f55a16da..cdd2418dd 100644
--- a/milabench/_version.py
+++ b/milabench/_version.py
@@ -1,5 +1,5 @@
 """This file is generated, do not modify"""
 
-__tag__ = "v0.1.0-129-ga60a3aa"
-__commit__ = "a60a3aae21e87e46bcce403620a3f56c12878554"
-__date__ = "2024-11-06 22:52:12 -0500"
+__tag__ = "v1.0.0_RC1-9-g6d1e1140"
+__commit__ = "6d1e114000cc4200ea307330032234db6696e40d"
+__date__ = "2024-09-30 14:39:43 -0400"
diff --git a/milabench/commands/executors.py b/milabench/commands/executors.py
index 807a261e2..f0402d29b 100644
--- a/milabench/commands/executors.py
+++ b/milabench/commands/executors.py
@@ -32,9 +32,6 @@ async def execute(pack, *args, cwd=None, env={}, external=False, use_stdout=Fals
     sized_args = scale_argv(pack, args)
     final_args = resolve_argv(pack, sized_args)
 
-    if use_stdout:
-        exec_env["MILABENCH_USE_STDOUT"] = "1"
-
     return await run(
         final_args,
         **kwargs,
diff --git a/milabench/pack.py b/milabench/pack.py
index 20feca39d..1cdde0939 100644
--- a/milabench/pack.py
+++ b/milabench/pack.py
@@ -335,8 +335,6 @@ def make_env(self):
             f"MILABENCH_DIR_{name.upper()}": path
             for name, path in self.config["dirs"].items()
         }
-        
-        env["MILABENCH_MANAGED"] = "1"
 
         env["OMP_NUM_THREADS"] = resolve_placeholder(self, "{cpu_per_gpu}")
 
diff --git a/milabench/report.py b/milabench/report.py
index bdc4999cc..c54ed8ddd 100644
--- a/milabench/report.py
+++ b/milabench/report.py
@@ -525,12 +525,12 @@ def pandas_to_string(df, formatters=_formatters):
     # Compute column size
     col_size = defaultdict(int)
     for index, row in df.iterrows():
-        col_size["bench"] = max(col_size["bench"], len(index), len("bench"))
+        col_size["bench"] = max(col_size["bench"], len(index))
         for col, val in zip(columns, row):
             fmt = formatters.get(col)
             if fmt is not None:
                 val = fmt(val)
-                col_size[col] = max(col_size[col], len(val), len(col))
+                col_size[col] = max(col_size[col], len(val))
 
     # Generate report
     sep = " | "