Skip to content

Commit

Permalink
Update visualization scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
jagdeepsb committed Jun 15, 2024
1 parent 032dc73 commit df4b8ac
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 195 deletions.
88 changes: 22 additions & 66 deletions examples/make_gifs.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@

import os
import numpy as np
import torch
import gym
from PIL import Image
import imageio
from pygifsicle import optimize

import os, sys
root_dir = os.path.dirname(os.path.abspath(__file__))
external_dir = os.path.join(root_dir, 'externals')
sys.path.insert(0, root_dir)
sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))

import evogym.envs
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from utils.algo_utils import *
from ppo.envs import make_vec_envs
from ppo.utils import get_vec_normalize
import utils.mp_group as mp

def get_generations(load_dir, exp_name):
Expand All @@ -37,54 +31,31 @@ def get_exp_gen_data(exp_name, load_dir, gen):
robot_data.append((int(line.split()[0]), float(line.split()[1])))
return robot_data

def dummy_callback(_):
pass

def save_robot_gif(out_path, env_name, body_path, ctrl_path):
def save_robot_gif(out_path, env_name, body_path, ctrl_path, seed=42):
global GIF_RESOLUTION

structure_data = np.load(body_path)
structure = []
for key, value in structure_data.items():
structure.append(value)
structure = tuple(structure)

env = make_vec_envs(env_name, structure, 1000, 1, None, None, device='cpu', allow_early_resets=False)
env.get_attr("default_viewer", indices=None)[0].set_resolution(GIF_RESOLUTION)

actor_critic, obs_rms = torch.load(ctrl_path, map_location='cpu')

vec_norm = get_vec_normalize(env)
if vec_norm is not None:
vec_norm.eval()
vec_norm.obs_rms = obs_rms

recurrent_hidden_states = torch.zeros(1, actor_critic.recurrent_hidden_state_size)
masks = torch.zeros(1, 1)

obs = env.reset()
img = env.render(mode='img')
reward = None

model = PPO.load(ctrl_path)

# Parallel environments
vec_env = make_vec_env(env_name, n_envs=1, seed=seed, env_kwargs={
'body': structure[0],
'connections': structure[1],
"render_mode": "img",
})

obs = vec_env.reset()
imgs = [vec_env.env_method('render')[0]] # vec env is stupid; .render() dosent work
done = False

imgs = []
# arrays = []
while not done:

with torch.no_grad():
value, action, _, recurrent_hidden_states = actor_critic.act(
obs, recurrent_hidden_states, masks, deterministic=True)

obs, reward, done, _ = env.step(action)
img = env.render(mode='img')
imgs.append(img)

masks.fill_(0.0 if (done) else 1.0)

if done == True:
env.reset()

env.close()
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = vec_env.step(action)
imgs.append(vec_env.env_method('render')[0])

imageio.mimsave(f'{out_path}.gif', imgs, duration=(1/50.0))
try:
Expand Down Expand Up @@ -120,7 +91,6 @@ def __str__(self):
out += f'{comp}_'
return out[:-1]


class Job():
def __init__(
self,
Expand Down Expand Up @@ -212,7 +182,7 @@ def generate(self, load_dir, save_dir, depth=0):
for idx, reward in get_exp_gen_data(exp_name, load_dir, gen):
robots.append(Robot(
body_path = os.path.join(load_dir, exp_name, f"generation_{gen}", "structure", f"{idx}.npz"),
ctrl_path = os.path.join(load_dir, exp_name, f"generation_{gen}", "controller", f"robot_{idx}_controller.pt"),
ctrl_path = os.path.join(load_dir, exp_name, f"generation_{gen}", "controller", f"{idx}.zip"),
reward = reward,
env_name = env_name,
exp_name = exp_name if len(self.experiment_names) != 1 else None,
Expand All @@ -231,25 +201,11 @@ def generate(self, load_dir, save_dir, depth=0):
robot.body_path,
robot.ctrl_path
)

# multiprocessing is currently broken

# group = mp.Group()
# for i, robot in zip(ranks, robots):
# gif_args = (
# os.path.join(save_dir, f'{i}_{robot}'),
# robot.env_name,
# robot.body_path,
# robot.ctrl_path
# )
# group.add_job(save_robot_gif, gif_args, callback=dummy_callback)
# group.run_jobs(NUM_PROC)

GIF_RESOLUTION = (1280/5, 720/5)
# NUM_PROC = 8
if __name__ == '__main__':
exp_root = os.path.join('saved_data')
save_dir = os.path.join(root_dir, 'saved_data', 'all_media')
save_dir = os.path.join('saved_data', 'all_media')

my_job = Job(
name = 'test_ga',
Expand Down
8 changes: 4 additions & 4 deletions examples/run_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import evogym.envs
from evogym import WorldObject

from new_ppo.args import add_ppo_args
from new_ppo.run import run_ppo
from new_ppo.eval import eval_policy
from ppo.args import add_ppo_args
from ppo.run import run_ppo
from ppo.eval import eval_policy

if __name__ == "__main__":

Expand All @@ -22,7 +22,7 @@
"--save-dir", default="saved_data", type=str, help="Directory to save agent logs (default: saved_data)"
)
parser.add_argument(
"--exp-name", default="ppo_test", type=str, help="Name of the model to save (default: ppo_test)"
"--exp-name", default="test_ppo", type=str, help="Name of the model to save (default: test_ppo)"
)
parser.add_argument(
"--robot-path", default=os.path.join("world_data", "speed_bot.json"), type=str, help="Path to the robot json file (default: world_data/speed_bot.json)"
Expand Down
198 changes: 73 additions & 125 deletions examples/visualize.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,43 @@
import os, sys
root_dir = os.path.dirname(os.path.abspath(__file__))
external_dir = os.path.join(root_dir, 'externals')
sys.path.insert(0, root_dir)
sys.path.insert(1, os.path.join(external_dir, 'PyTorch-NEAT'))
sys.path.insert(1, os.path.join(external_dir, 'pytorch_a2c_ppo_acktr_gail'))
import os

import json
import argparse
import sys
import numpy as np
import torch
import gym
from typing import Optional

from utils.algo_utils import *
from ppo.envs import make_vec_envs
from ppo.utils import get_vec_normalize

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import evogym.envs

def rollout(
env_name: str,
n_iters: int,
model: PPO,
body: np.ndarray,
connections: Optional[np.ndarray] = None,
seed: int = 42,
):
# Parallel environments
vec_env = make_vec_env(env_name, n_envs=1, seed=seed, env_kwargs={
'body': body,
'connections': connections,
"render_mode": "human",
})

# Rollout
reward_sum = 0
obs = vec_env.reset()
count = 0
while count < n_iters:
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = vec_env.step(action)
reward_sum += reward[0]
count += 1
if done:
print(f'\nTotal reward: {reward_sum:.5f}\n')
vec_env.close()

def visualize_codesign(args, exp_name):
global EXPERIMENT_PARENT_DIR
gen_list = os.listdir(os.path.join(EXPERIMENT_PARENT_DIR, exp_name))
Expand Down Expand Up @@ -98,63 +118,10 @@ def visualize_codesign(args, exp_name):

if num_iters == 0:
continue

env = make_vec_envs(
args.env_name,
structure,
1000,
1,
None,
None,
device='cpu',
allow_early_resets=False)

# We need to use the same statistics for normalization as used in training
try:
save_path_controller = os.path.join(EXPERIMENT_PARENT_DIR, exp_name, "generation_" + str(gen_number), "controller", "robot_" + str(robot_index) + "_controller" + ".pt")
actor_critic, obs_rms = \
torch.load(save_path_controller,
map_location='cpu')
except:
print(f'\nCould not load robot controller data at {save_path_controller}.\n')
continue

vec_norm = get_vec_normalize(env)
if vec_norm is not None:
vec_norm.eval()
vec_norm.obs_rms = obs_rms

recurrent_hidden_states = torch.zeros(1,
actor_critic.recurrent_hidden_state_size)
masks = torch.zeros(1, 1)

obs = env.reset()
env.render('screen')

total_steps = 0
reward_sum = 0
while total_steps < num_iters:
with torch.no_grad():
value, action, _, recurrent_hidden_states = actor_critic.act(
obs, recurrent_hidden_states, masks, deterministic=args.det)


# Obser reward and next obs
obs, reward, done, _ = env.step(action)
masks.fill_(0.0 if (done) else 1.0)
reward_sum += reward

if done == True:
env.reset()
reward_sum = float(reward_sum.numpy().flatten()[0])
print(f'\ntotal reward: {round(reward_sum, 5)}\n')
reward_sum = 0

env.render('screen')

total_steps += 1

env.venv.close()
save_path_controller = os.path.join(EXPERIMENT_PARENT_DIR, exp_name, "generation_" + str(gen_number), "controller", f'{robot_index}.zip')
model = PPO.load(save_path_controller)
rollout(args.env_name, num_iters, model, structure[0], structure[1])

def visualize_group_ppo(args, exp_name):

Expand Down Expand Up @@ -218,65 +185,45 @@ def visualize_group_ppo(args, exp_name):
for key, value in structure_data.items():
structure.append(value)
structure = tuple(structure)

save_path_controller = os.path.join(exp_dir, job, "controller", f"{robot}_{env_name}.zip")
model = PPO.load(save_path_controller)
rollout(env_name, num_iters, model, structure[0], structure[1])

def visualize_ppo(args, exp_name):

env = make_vec_envs(
env_name,
structure,
1000,
1,
None,
None,
device='cpu',
allow_early_resets=False)

# We need to use the same statistics for normalization as used in training
try:
save_path_controller = os.path.join(exp_dir, job, "controller", f"robot_{robot}_{env_name}_controller.pt")
actor_critic, obs_rms = \
torch.load(save_path_controller,
map_location='cpu')
except:
print(f'\nCould not load robot controller data at {save_path_controller}.\n')
continue

vec_norm = get_vec_normalize(env)
if vec_norm is not None:
vec_norm.eval()
vec_norm.obs_rms = obs_rms

recurrent_hidden_states = torch.zeros(1,
actor_critic.recurrent_hidden_state_size)
masks = torch.zeros(1, 1)

obs = env.reset()
env.render('screen')

total_steps = 0
reward_sum = 0
while total_steps < num_iters:
with torch.no_grad():
value, action, _, recurrent_hidden_states = actor_critic.act(
obs, recurrent_hidden_states, masks, deterministic=args.det)


# Obser reward and next obs
obs, reward, done, _ = env.step(action)
masks.fill_(0.0 if (done) else 1.0)
reward_sum += reward
exp_dir = os.path.join(EXPERIMENT_PARENT_DIR, exp_name)
out_file = os.path.join(exp_dir, 'ppo_result.json')
out = {}
with open(out_file, 'r') as f:
out = json.load(f)

reward = out['best_reward']
env_name = out['env_name']

print(f'\nEnvironment: {env_name}\nReward: {reward}')

if done == True:
env.reset()
reward_sum = float(reward_sum.numpy().flatten()[0])
print(f'\ntotal reward: {round(reward_sum, 5)}\n')
reward_sum = 0
while True:
print()
print("Enter num iters: ", end="")
num_iters = int(input())
print()

env.render('screen')
if num_iters == 0:
continue

total_steps += 1
save_path_structure = os.path.join(exp_dir, "structure", f"{env_name}.npz")
structure_data = np.load(save_path_structure)
structure = []
for key, value in structure_data.items():
structure.append(value)
structure = tuple(structure)

env.venv.close()
save_path_controller = os.path.join(exp_dir, "controller", f"{env_name}.zip")
model = PPO.load(save_path_controller)
rollout(env_name, num_iters, model, structure[0], structure[1])

EXPERIMENT_PARENT_DIR = os.path.join(root_dir, 'saved_data')
EXPERIMENT_PARENT_DIR = os.path.join('saved_data')
if __name__ == "__main__":

parser = argparse.ArgumentParser(description='RL')
Expand All @@ -301,9 +248,10 @@ def visualize_group_ppo(args, exp_name):
exp_name = input()

files_in_exp_dir = os.listdir(os.path.join(EXPERIMENT_PARENT_DIR, exp_name))
# group ppo experiment
if 'output.json' in files_in_exp_dir:

if 'output.json' in files_in_exp_dir: # group ppo experiment
visualize_group_ppo(args, exp_name)
# codesign experiment
else:
elif 'ppo_result.json' in files_in_exp_dir: # ppo experiment
visualize_ppo(args, exp_name)
else: # codesign experiment
visualize_codesign(args, exp_name)
Loading

0 comments on commit df4b8ac

Please sign in to comment.