From deb8771c140f4c588a8878ee96a090e817713823 Mon Sep 17 00:00:00 2001 From: Pierre Delaunay Date: Thu, 1 Aug 2024 19:34:10 -0400 Subject: [PATCH] instrumentation concept --- benchmarks/torch_ppo_atari_envpool/dev.yaml | 1 + benchmarks/torch_ppo_atari_envpool/main.py | 17 ++++--- .../torch_ppo_atari_envpool/requirements.in | 2 + .../torch_ppo_atari_envpool/voirfile.py | 50 ++++++++++++++++--- 4 files changed, 56 insertions(+), 14 deletions(-) diff --git a/benchmarks/torch_ppo_atari_envpool/dev.yaml b/benchmarks/torch_ppo_atari_envpool/dev.yaml index c01211d98..c8668abb4 100644 --- a/benchmarks/torch_ppo_atari_envpool/dev.yaml +++ b/benchmarks/torch_ppo_atari_envpool/dev.yaml @@ -1,5 +1,6 @@ torch_ppo_atari_envpool: + max_duration: 60000 inherits: _defaults definition: . install-variant: unpinned diff --git a/benchmarks/torch_ppo_atari_envpool/main.py b/benchmarks/torch_ppo_atari_envpool/main.py index 7af2e7bbf..62c9b3a07 100644 --- a/benchmarks/torch_ppo_atari_envpool/main.py +++ b/benchmarks/torch_ppo_atari_envpool/main.py @@ -14,7 +14,7 @@ import tyro from torch.distributions.categorical import Categorical from torch.utils.tensorboard import SummaryWriter - +import torchcompat.core as acc @dataclass class Args: @@ -149,7 +149,7 @@ def get_action_and_value(self, x, action=None): return action, probs.log_prob(action), probs.entropy(), self.critic(hidden) -if __name__ == "__main__": +def main(): args = tyro.cli(Args) args.batch_size = int(args.num_envs * args.num_steps) args.minibatch_size = int(args.batch_size // args.num_minibatches) @@ -179,7 +179,7 @@ def get_action_and_value(self, x, action=None): torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic - device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + device = acc.fetch_device(0) # env setup envs = envpool.make( @@ -213,8 +213,9 @@ def get_action_and_value(self, x, action=None): start_time = time.time() next_obs = torch.Tensor(envs.reset()).to(device) next_done = torch.zeros(args.num_envs).to(device) + iterations = range(1, args.num_iterations + 1) - for iteration in range(1, args.num_iterations + 1): + for iteration in iterations: # Annealing the rate if instructed to do so. if args.anneal_lr: frac = 1.0 - (iteration - 1.0) / args.num_iterations @@ -240,7 +241,7 @@ def get_action_and_value(self, x, action=None): for idx, d in enumerate(next_done): if d and info["lives"][idx] == 0: - print(f"global_step={global_step}, episodic_return={info['r'][idx]}") + # print(f"global_step={global_step}, episodic_return={info['r'][idx]}") avg_returns.append(info["r"][idx]) writer.add_scalar("charts/avg_episodic_return", np.average(avg_returns), global_step) writer.add_scalar("charts/episodic_return", info["r"][idx], global_step) @@ -341,4 +342,8 @@ def get_action_and_value(self, x, action=None): writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step) envs.close() - writer.close() \ No newline at end of file + writer.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/benchmarks/torch_ppo_atari_envpool/requirements.in b/benchmarks/torch_ppo_atari_envpool/requirements.in index dbd35ac19..c264f5563 100644 --- a/benchmarks/torch_ppo_atari_envpool/requirements.in +++ b/benchmarks/torch_ppo_atari_envpool/requirements.in @@ -5,3 +5,5 @@ torch tyro voir tensorboard +torchcompat +cantilever diff --git a/benchmarks/torch_ppo_atari_envpool/voirfile.py b/benchmarks/torch_ppo_atari_envpool/voirfile.py index d93f886cd..ba11707d0 100644 --- a/benchmarks/torch_ppo_atari_envpool/voirfile.py +++ b/benchmarks/torch_ppo_atari_envpool/voirfile.py @@ -1,8 +1,10 @@ from dataclasses import dataclass from voir import configurable -from voir.instruments import dash, early_stop, log, rate -from benchmate.monitor import monitor_monogpu +from voir.phase import StopProgram +from benchmate.observer import BenchObserver +from benchmate.monitor import voirfile_monitor + @dataclass class Config: @@ -28,11 +30,43 @@ class Config: def instrument_main(ov, options: Config): yield ov.phases.init - if options.dash: - ov.require(dash) + # GPU monitor, rate, loss etc... + voirfile_monitor(ov, options) + + yield ov.phases.load_script + + step_per_iteration = 0 + + def fetch_args(args): + nonlocal step_per_iteration + step_per_iteration = args.num_envs * args.num_steps + return args + + def batch_size(x): + return step_per_iteration - ov.require( - log("value", "progress", "rate", "units", "loss", "gpudata", context="task"), - early_stop(n=options.stop, key="rate", task="train"), - monitor_monogpu(poll_interval=options.gpu_poll), + observer = BenchObserver( + earlystop=options.stop + options.skip, + batch_size_fn=batch_size, ) + + probe = ov.probe("//main > args", overridable=True) + probe['args'].override(fetch_args) + + # measure the time it took to execute the body + probe = ov.probe("//main > iterations", overridable=True) + probe['iterations'].override(observer.loader) + + probe = ov.probe("//main > loss", overridable=True) + probe["loss"].override(observer.record_loss) + + probe = ov.probe("//main > optimizer", overridable=True) + probe['optimizer'].override(observer.optimizer) + + # + # Run the benchmark + # + try: + yield ov.phases.run_script + except StopProgram: + print("early stopped") \ No newline at end of file