From 85c446d859bd899ad2d2c00ab1ad791b0717da76 Mon Sep 17 00:00:00 2001 From: DennisSoemers Date: Tue, 12 Dec 2023 21:39:21 +0100 Subject: [PATCH] updated java repo --- .gitmodules | 6 +- .../gym-microrts-static-files/league.csv | 28 +- experiments/ppo_gridnet_eval.py | 410 +++++++++--------- gym_microrts/microrts | 2 +- 4 files changed, 223 insertions(+), 223 deletions(-) diff --git a/.gitmodules b/.gitmodules index 34854703..d1783a9d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "gym_microrts/microrts"] - path = gym_microrts/microrts - url = https://github.com/Farama-Foundation/MicroRTS.git +[submodule "gym_microrts/microrts"] + path = gym_microrts/microrts + url = https://github.com/Farama-Foundation/MicroRTS.git diff --git a/experiments/gym-microrts-static-files/league.csv b/experiments/gym-microrts-static-files/league.csv index 7d95b129..c6b165a2 100644 --- a/experiments/gym-microrts-static-files/league.csv +++ b/experiments/gym-microrts-static-files/league.csv @@ -1,14 +1,14 @@ -name,mu,sigma,trueskill -coacAI,37.01208300442514,1.2057837814702337,33.39473166001444 -workerRushAI,32.177999126535994,1.0158657259647073,29.13040194864187 -droplet,32.046013299709365,1.0151344815092278,29.000609855181683 -mixedBot,31.485630788984253,1.0538188458547628,28.324174251419965 -izanagi,30.251048792700562,1.048141521227313,27.10662422901862 -tiamat,27.8300726697548,1.0437640107384225,24.69878063753953 -lightRushAI,26.430335172946624,1.0158591694179326,23.382757664692825 -rojo,25.174781972400247,1.0053993662036687,22.15858387378924 -guidedRojoA3N,23.137975074471896,0.9950992581462096,20.152677300033268 -naiveMCTSAI,20.932758765019557,0.9990214832647916,17.93569431522518 -randomBiasedAI,16.41033348962392,1.1908998345256558,12.837633986046953 -passiveAI,6.315587903310498,2.2292417057662304,-0.3721372139881929 -randomAI,5.9383357094823985,2.1599970738074847,-0.5416555119400552 +name,mu,sigma,trueskill +coacAI,37.01208300442514,1.2057837814702337,33.39473166001444 +workerRushAI,32.177999126535994,1.0158657259647073,29.13040194864187 +droplet,32.046013299709365,1.0151344815092278,29.000609855181683 +mixedBot,31.485630788984253,1.0538188458547628,28.324174251419965 +izanagi,30.251048792700562,1.048141521227313,27.10662422901862 +tiamat,27.8300726697548,1.0437640107384225,24.69878063753953 +lightRushAI,26.430335172946624,1.0158591694179326,23.382757664692825 +rojo,25.174781972400247,1.0053993662036687,22.15858387378924 +guidedRojoA3N,23.137975074471896,0.9950992581462096,20.152677300033268 +naiveMCTSAI,20.932758765019557,0.9990214832647916,17.93569431522518 +randomBiasedAI,16.41033348962392,1.1908998345256558,12.837633986046953 +passiveAI,6.315587903310498,2.2292417057662304,-0.3721372139881929 +randomAI,5.9383357094823985,2.1599970738074847,-0.5416555119400552 diff --git a/experiments/ppo_gridnet_eval.py b/experiments/ppo_gridnet_eval.py index 8cc06382..1d9637fc 100644 --- a/experiments/ppo_gridnet_eval.py +++ b/experiments/ppo_gridnet_eval.py @@ -1,205 +1,205 @@ -# http://proceedings.mlr.press/v97/han19a/han19a.pdf - -import argparse -import os -import random -import time -from distutils.util import strtobool - -import numpy as np -import torch -from gym.spaces import MultiDiscrete -from stable_baselines3.common.vec_env import VecMonitor, VecVideoRecorder -from torch.utils.tensorboard import SummaryWriter - -from gym_microrts import microrts_ai # noqa - - -def parse_args(): - # fmt: off - parser = argparse.ArgumentParser() - parser.add_argument('--exp-name', type=str, default=os.path.basename(__file__).rstrip(".py"), - help='the name of this experiment') - parser.add_argument('--gym-id', type=str, default="MicroRTSGridModeVecEnv", - help='the id of the gym environment') - parser.add_argument('--learning-rate', type=float, default=2.5e-4, - help='the learning rate of the optimizer') - parser.add_argument('--seed', type=int, default=1, - help='seed of the experiment') - parser.add_argument('--total-timesteps', type=int, default=1000000, - help='total timesteps of the experiments') - parser.add_argument('--torch-deterministic', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True, - help='if toggled, `torch.backends.cudnn.deterministic=False`') - parser.add_argument('--cuda', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True, - help='if toggled, cuda will not be enabled by default') - parser.add_argument('--prod-mode', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True, - help='run the script in production mode and use wandb to log outputs') - parser.add_argument('--capture-video', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True, - help='whether to capture videos of the agent performances (check out `videos` folder)') - parser.add_argument('--wandb-project-name', type=str, default="cleanRL", - help="the wandb's project name") - parser.add_argument('--wandb-entity', type=str, default=None, - help="the entity (team) of wandb's project") - - # Algorithm specific arguments - parser.add_argument('--partial-obs', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True, - help='if toggled, the game will have partial observability') - parser.add_argument('--num-steps', type=int, default=256, - help='the number of steps per game environment') - parser.add_argument("--agent-model-path", type=str, default="gym-microrts-static-files/agent_sota.pt", - help="the path to the agent's model") - parser.add_argument("--agent2-model-path", type=str, default="gym-microrts-static-files/agent_sota.pt", - help="the path to the agent's model") - parser.add_argument('--ai', type=str, default="", - help='the opponent AI to evaluate against') - parser.add_argument('--model-type', type=str, default=f"ppo_gridnet", choices=["ppo_gridnet_large", "ppo_gridnet"], - help='the output path of the leaderboard csv') - args = parser.parse_args() - if not args.seed: - args.seed = int(time.time()) - if args.ai: - args.num_bot_envs, args.num_selfplay_envs = 1, 0 - else: - args.num_bot_envs, args.num_selfplay_envs = 0, 2 - args.num_envs = args.num_selfplay_envs + args.num_bot_envs - args.batch_size = int(args.num_envs * args.num_steps) - args.num_updates = args.total_timesteps // args.batch_size - # fmt: on - return args - - -if __name__ == "__main__": - args = parse_args() - - if args.model_type == "ppo_gridnet_large": - from ppo_gridnet_large import Agent, MicroRTSStatsRecorder - - from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv - else: - from ppo_gridnet import Agent, MicroRTSStatsRecorder - - from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv - - # TRY NOT TO MODIFY: setup the environment - experiment_name = f"{args.gym_id}__{args.exp_name}__{args.seed}__{int(time.time())}" - if args.prod_mode: - import wandb - - run = wandb.init( - project=args.wandb_project_name, - entity=args.wandb_entity, - sync_tensorboard=True, - config=vars(args), - name=experiment_name, - monitor_gym=True, - save_code=True, - ) - CHECKPOINT_FREQUENCY = 10 - writer = SummaryWriter(f"runs/{experiment_name}") - writer.add_text( - "hyperparameters", "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])) - ) - - # TRY NOT TO MODIFY: seeding - device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) - torch.backends.cudnn.deterministic = args.torch_deterministic - - ais = [] - if args.ai: - ais = [eval(f"microrts_ai.{args.ai}")] - envs = MicroRTSGridModeVecEnv( - num_bot_envs=len(ais), - num_selfplay_envs=args.num_selfplay_envs, - partial_obs=args.partial_obs, - max_steps=5000, - render_theme=2, - ai2s=ais, - map_paths=["maps/16x16/basesWorkers16x16A.xml"], - reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), - ) - envs = MicroRTSStatsRecorder(envs) - envs = VecMonitor(envs) - if args.capture_video: - envs = VecVideoRecorder( - envs, f"videos/{experiment_name}", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000 - ) - assert isinstance(envs.action_space, MultiDiscrete), "only MultiDiscrete action space is supported" - - agent = Agent(envs).to(device) - agent2 = Agent(envs).to(device) - - # ALGO Logic: Storage for epoch data - mapsize = 16 * 16 - invalid_action_shape = (mapsize, envs.action_plane_space.nvec.sum()) - - # TRY NOT TO MODIFY: start the game - global_step = 0 - start_time = time.time() - # Note how `next_obs` and `next_done` are used; their usage is equivalent to - # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60 - next_obs = torch.Tensor(envs.reset()).to(device) - next_done = torch.zeros(args.num_envs).to(device) - - ## CRASH AND RESUME LOGIC: - starting_update = 1 - agent.load_state_dict(torch.load(args.agent_model_path, map_location=device)) - agent.eval() - if not args.ai: - agent2.load_state_dict(torch.load(args.agent2_model_path, map_location=device)) - agent2.eval() - - print("Model's state_dict:") - for param_tensor in agent.state_dict(): - print(param_tensor, "\t", agent.state_dict()[param_tensor].size()) - total_params = sum([param.nelement() for param in agent.parameters()]) - print("Model's total parameters:", total_params) - - for update in range(starting_update, args.num_updates + 1): - # TRY NOT TO MODIFY: prepare the execution of the game. - for step in range(0, args.num_steps): - envs.render() - global_step += 1 * args.num_envs - # ALGO LOGIC: put action logic here - with torch.no_grad(): - invalid_action_masks = torch.tensor(np.array(envs.get_action_mask())).to(device) - - if args.ai: - action, logproba, _, _, vs = agent.get_action_and_value( - next_obs, envs=envs, invalid_action_masks=invalid_action_masks, device=device - ) - else: - p1_obs = next_obs[::2] - p2_obs = next_obs[1::2] - p1_mask = invalid_action_masks[::2] - p2_mask = invalid_action_masks[1::2] - - p1_action, _, _, _, _ = agent.get_action_and_value( - p1_obs, envs=envs, invalid_action_masks=p1_mask, device=device - ) - p2_action, _, _, _, _ = agent2.get_action_and_value( - p2_obs, envs=envs, invalid_action_masks=p2_mask, device=device - ) - action = torch.zeros((args.num_envs, p2_action.shape[1], p2_action.shape[2])) - action[::2] = p1_action - action[1::2] = p2_action - - try: - next_obs, rs, ds, infos = envs.step(action.cpu().numpy().reshape(envs.num_envs, -1)) - next_obs = torch.Tensor(next_obs).to(device) - except Exception as e: - e.printStackTrace() - raise - - for idx, info in enumerate(infos): - if "episode" in info.keys(): - if args.ai: - print("against", args.ai, info["microrts_stats"]["WinLossRewardFunction"]) - else: - if idx % 2 == 0: - print(f"player{idx % 2}", info["microrts_stats"]["WinLossRewardFunction"]) - - envs.close() - writer.close() +# http://proceedings.mlr.press/v97/han19a/han19a.pdf + +import argparse +import os +import random +import time +from distutils.util import strtobool + +import numpy as np +import torch +from gym.spaces import MultiDiscrete +from stable_baselines3.common.vec_env import VecMonitor, VecVideoRecorder +from torch.utils.tensorboard import SummaryWriter + +from gym_microrts import microrts_ai # noqa + + +def parse_args(): + # fmt: off + parser = argparse.ArgumentParser() + parser.add_argument('--exp-name', type=str, default=os.path.basename(__file__).rstrip(".py"), + help='the name of this experiment') + parser.add_argument('--gym-id', type=str, default="MicroRTSGridModeVecEnv", + help='the id of the gym environment') + parser.add_argument('--learning-rate', type=float, default=2.5e-4, + help='the learning rate of the optimizer') + parser.add_argument('--seed', type=int, default=1, + help='seed of the experiment') + parser.add_argument('--total-timesteps', type=int, default=1000000, + help='total timesteps of the experiments') + parser.add_argument('--torch-deterministic', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True, + help='if toggled, `torch.backends.cudnn.deterministic=False`') + parser.add_argument('--cuda', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True, + help='if toggled, cuda will not be enabled by default') + parser.add_argument('--prod-mode', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True, + help='run the script in production mode and use wandb to log outputs') + parser.add_argument('--capture-video', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True, + help='whether to capture videos of the agent performances (check out `videos` folder)') + parser.add_argument('--wandb-project-name', type=str, default="cleanRL", + help="the wandb's project name") + parser.add_argument('--wandb-entity', type=str, default=None, + help="the entity (team) of wandb's project") + + # Algorithm specific arguments + parser.add_argument('--partial-obs', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True, + help='if toggled, the game will have partial observability') + parser.add_argument('--num-steps', type=int, default=256, + help='the number of steps per game environment') + parser.add_argument("--agent-model-path", type=str, default="gym-microrts-static-files/agent_sota.pt", + help="the path to the agent's model") + parser.add_argument("--agent2-model-path", type=str, default="gym-microrts-static-files/agent_sota.pt", + help="the path to the agent's model") + parser.add_argument('--ai', type=str, default="", + help='the opponent AI to evaluate against') + parser.add_argument('--model-type', type=str, default=f"ppo_gridnet", choices=["ppo_gridnet_large", "ppo_gridnet"], + help='the output path of the leaderboard csv') + args = parser.parse_args() + if not args.seed: + args.seed = int(time.time()) + if args.ai: + args.num_bot_envs, args.num_selfplay_envs = 1, 0 + else: + args.num_bot_envs, args.num_selfplay_envs = 0, 2 + args.num_envs = args.num_selfplay_envs + args.num_bot_envs + args.batch_size = int(args.num_envs * args.num_steps) + args.num_updates = args.total_timesteps // args.batch_size + # fmt: on + return args + + +if __name__ == "__main__": + args = parse_args() + + if args.model_type == "ppo_gridnet_large": + from ppo_gridnet_large import Agent, MicroRTSStatsRecorder + + from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv + else: + from ppo_gridnet import Agent, MicroRTSStatsRecorder + + from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv + + # TRY NOT TO MODIFY: setup the environment + experiment_name = f"{args.gym_id}__{args.exp_name}__{args.seed}__{int(time.time())}" + if args.prod_mode: + import wandb + + run = wandb.init( + project=args.wandb_project_name, + entity=args.wandb_entity, + sync_tensorboard=True, + config=vars(args), + name=experiment_name, + monitor_gym=True, + save_code=True, + ) + CHECKPOINT_FREQUENCY = 10 + writer = SummaryWriter(f"runs/{experiment_name}") + writer.add_text( + "hyperparameters", "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])) + ) + + # TRY NOT TO MODIFY: seeding + device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + torch.backends.cudnn.deterministic = args.torch_deterministic + + ais = [] + if args.ai: + ais = [eval(f"microrts_ai.{args.ai}")] + envs = MicroRTSGridModeVecEnv( + num_bot_envs=len(ais), + num_selfplay_envs=args.num_selfplay_envs, + partial_obs=args.partial_obs, + max_steps=5000, + render_theme=2, + ai2s=ais, + map_paths=["maps/16x16/basesWorkers16x16A.xml"], + reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]), + ) + envs = MicroRTSStatsRecorder(envs) + envs = VecMonitor(envs) + if args.capture_video: + envs = VecVideoRecorder( + envs, f"videos/{experiment_name}", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000 + ) + assert isinstance(envs.action_space, MultiDiscrete), "only MultiDiscrete action space is supported" + + agent = Agent(envs).to(device) + agent2 = Agent(envs).to(device) + + # ALGO Logic: Storage for epoch data + mapsize = 16 * 16 + invalid_action_shape = (mapsize, envs.action_plane_space.nvec.sum()) + + # TRY NOT TO MODIFY: start the game + global_step = 0 + start_time = time.time() + # Note how `next_obs` and `next_done` are used; their usage is equivalent to + # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60 + next_obs = torch.Tensor(envs.reset()).to(device) + next_done = torch.zeros(args.num_envs).to(device) + + ## CRASH AND RESUME LOGIC: + starting_update = 1 + agent.load_state_dict(torch.load(args.agent_model_path, map_location=device)) + agent.eval() + if not args.ai: + agent2.load_state_dict(torch.load(args.agent2_model_path, map_location=device)) + agent2.eval() + + print("Model's state_dict:") + for param_tensor in agent.state_dict(): + print(param_tensor, "\t", agent.state_dict()[param_tensor].size()) + total_params = sum([param.nelement() for param in agent.parameters()]) + print("Model's total parameters:", total_params) + + for update in range(starting_update, args.num_updates + 1): + # TRY NOT TO MODIFY: prepare the execution of the game. + for step in range(0, args.num_steps): + envs.render() + global_step += 1 * args.num_envs + # ALGO LOGIC: put action logic here + with torch.no_grad(): + invalid_action_masks = torch.tensor(np.array(envs.get_action_mask())).to(device) + + if args.ai: + action, logproba, _, _, vs = agent.get_action_and_value( + next_obs, envs=envs, invalid_action_masks=invalid_action_masks, device=device + ) + else: + p1_obs = next_obs[::2] + p2_obs = next_obs[1::2] + p1_mask = invalid_action_masks[::2] + p2_mask = invalid_action_masks[1::2] + + p1_action, _, _, _, _ = agent.get_action_and_value( + p1_obs, envs=envs, invalid_action_masks=p1_mask, device=device + ) + p2_action, _, _, _, _ = agent2.get_action_and_value( + p2_obs, envs=envs, invalid_action_masks=p2_mask, device=device + ) + action = torch.zeros((args.num_envs, p2_action.shape[1], p2_action.shape[2])) + action[::2] = p1_action + action[1::2] = p2_action + + try: + next_obs, rs, ds, infos = envs.step(action.cpu().numpy().reshape(envs.num_envs, -1)) + next_obs = torch.Tensor(next_obs).to(device) + except Exception as e: + e.printStackTrace() + raise + + for idx, info in enumerate(infos): + if "episode" in info.keys(): + if args.ai: + print("against", args.ai, info["microrts_stats"]["WinLossRewardFunction"]) + else: + if idx % 2 == 0: + print(f"player{idx % 2}", info["microrts_stats"]["WinLossRewardFunction"]) + + envs.close() + writer.close() diff --git a/gym_microrts/microrts b/gym_microrts/microrts index 12408d16..bf76c052 160000 --- a/gym_microrts/microrts +++ b/gym_microrts/microrts @@ -1 +1 @@ -Subproject commit 12408d16eed812921e8f5d42b680855577397832 +Subproject commit bf76c0528b97937d09fa055f8cfc51cf80155bf7