diff --git a/readme.md b/README.md similarity index 77% rename from readme.md rename to README.md index 7010f727..01b1c053 100644 --- a/readme.md +++ b/README.md @@ -4,18 +4,27 @@ Apex is a small, modular library that contains some implementations of continuous reinforcement learning algorithms. Fully compatible with OpenAI gym. +running1 +running2 + ## Running experiments ### Basics Any algorithm can be run from the apex.py entry point. -To run DDPG on Walker2d-v2, +To run PPO on a cassie environment, + +```bash +python apex.py ppo --env_name Cassie-v0 --num_procs 12 --run_name experiment01 +``` + +To run TD3 on the gym environment Walker-v2, ```bash -python apex.py ddpg --env_name Walker2d-v2 --batch_size 64 +python apex.py td3_async --env_name Walker-v2 --num_procs 12 --run_name experiment02 ``` -### Logging details / Monitoring live training progress +## Logging details / Monitoring live training progress Tensorboard logging is enabled by default for all algorithms. The logger expects that you supply an argument named ```logdir```, containing the root directory you want to store your logfiles in, and an argument named ```seed```, which is used to seed the pseudorandom number generators. A basic command line script illustrating this is: @@ -27,9 +36,9 @@ python apex.py ars --logdir logs/ars --seed 1337 The resulting directory tree would look something like this: ``` trained_models/ # directory with all of the saved models and tensorboard logs -└── td3_async # algorithm name +└── ars # algorithm name └── Cassie-v0 # environment name - └── 8b8b12-seed1 # unique run name created with hash of hyperparameters + seed + └── 8b8b12-seed1 # unique run name created with hash of hyperparameters ├── actor.pt # actor network for algo ├── critic.pt # critic network for algo ├── events.out.tfevents # tensorboard binary file @@ -41,17 +50,16 @@ Using tensorboard makes it easy to compare experiments and resume training later To see live training progress -Run ```$ tensorboard --logdir logs/ --port=8097``` then navigate to ```http://localhost:8097/``` in your browser - -## Unit tests -You can run the unit tests using pytest. +Run ```$ tensorboard --logdir logs/``` then navigate to ```http://localhost:6006/``` in your browser -### To Do -- [ ] Sphinx documentation and github wiki -- [ ] Support loading pickle of hyperparameters for resuming training -- [ ] Improve/Tune implementations of TD3, add capability to resume training for off policy algorithms +## Cassie Environments: +* `Cassie-v0` : basic unified environment for walking/running policies +* `CassieTraj-v0` : unified environment with reference trajectories +* `CassiePlayground-v0` : environment for executing autonomous missions +* `CassieStanding-v0` : environment for training standing policies -## Features: +## Algorithms: +#### Currently implemented: * Parallelism with [Ray](https://github.com/ray-project/ray) * [GAE](https://arxiv.org/abs/1506.02438)/TD(lambda) estimators * [PPO](https://arxiv.org/abs/1707.06347), VPG with ratio objective and with log likelihood objective diff --git a/apex.py b/apex.py index 86b5adbb..9654a5e9 100644 --- a/apex.py +++ b/apex.py @@ -1,6 +1,8 @@ import torch import sys, pickle, argparse -from util import color, print_logo, env_factory, create_logger, EvalProcessClass, parse_previous, collect_data +from util.logo import print_logo +from util.log import parse_previous +from util.eval import EvalProcessClass if __name__ == "__main__": @@ -10,31 +12,31 @@ """ General arguments for configuring the environment """ + # command input, state input, env attributes + parser.add_argument("--command_profile", default="clock", type=str.lower, choices=["clock", "phase", "traj"]) + parser.add_argument("--input_profile", default="full", type=str.lower, choices=["full", "min"]) parser.add_argument("--simrate", default=50, type=int, help="simrate of environment") - parser.add_argument("--traj", default="walking", type=str, help="reference trajectory to use. options are 'aslip', 'walking', 'stepping'") - parser.add_argument("--phase_based", default=False, action='store_true', dest='phase_based') - parser.add_argument("--not_clock_based", default=True, action='store_false', dest='clock_based') - parser.add_argument("--not_state_est", default=True, action='store_false', dest='state_est') parser.add_argument("--not_dyn_random", default=True, action='store_false', dest='dyn_random') - parser.add_argument("--not_no_delta", default=True, action='store_false', dest='no_delta') - parser.add_argument("--not_mirror", default=True, action='store_false', dest='mirror') # mirror actions or not parser.add_argument("--learn_gains", default=False, action='store_true', dest='learn_gains') # learn PD gains or not + # attributes for trajectory based environments + parser.add_argument("--traj", default="walking", type=str, help="reference trajectory to use. options are 'aslip', 'walking', 'stepping'") + parser.add_argument("--not_no_delta", default=True, action='store_false', dest='no_delta') parser.add_argument("--ik_baseline", default=False, action='store_true', dest='ik_baseline') # use ik as baseline for aslip + delta policies? + # mirror loss and reward + parser.add_argument("--not_mirror", default=True, action='store_false', dest='mirror') # mirror actions or not parser.add_argument("--reward", default=None, type=str) # reward to use. this is a required argument - # parser.add_argument("--gainsDivide", default=1.0, type=float) """ General arguments for configuring the logger """ + parser.add_argument("--env_name", default="Cassie-v0") # environment name parser.add_argument("--run_name", default=None) # run name - """ - Arguments generally used for Curriculum Learning + General arguments for Curriculum Learning """ parser.add_argument("--exchange_reward", default=None) # Can only be used with previous (below) parser.add_argument("--previous", type=str, default=None) # path to directory of previous policies for resuming training - parser.add_argument("--fixed_speed", type=float, default=None) # Fixed speed to train/test at if len(sys.argv) < 2: print("Usage: python apex.py [option]", sys.argv) @@ -59,7 +61,6 @@ parser.add_argument("--recurrent", "-r", action='store_true') # whether to use a recurrent policy parser.add_argument("--logdir", default="./trained_models/ars/", type=str) parser.add_argument("--seed", "-s", default=0, type=int) - parser.add_argument("--env_name", "-e", default="Hopper-v3") parser.add_argument("--average_every", default=10, type=int) parser.add_argument("--save_model", "-m", default=None, type=str) # where to save the trained model to parser.add_argument("--redis", default=None) @@ -105,7 +106,6 @@ else: parser.add_argument("--logdir", default="./trained_models/ddpg/", type=str) parser.add_argument("--seed", "-s", default=0, type=int) - parser.add_argument("--env_name", "-e", default="Hopper-v3") args = parser.parse_args() @@ -126,7 +126,6 @@ # general args parser.add_argument("--logdir", default="./trained_models/syncTD3/", type=str) parser.add_argument("--previous", type=str, default=None) # path to directory of previous policies for resuming training - parser.add_argument("--env_name", default="Cassie-v0") # environment name parser.add_argument("--history", default=0, type=int) # number of previous states to use as input parser.add_argument("--redis_address", type=str, default=None) # address of redis server (for cluster setups) parser.add_argument("--seed", default=0, type=int) # Sets Gym, PyTorch and Numpy seeds @@ -168,7 +167,6 @@ from rl.algos.async_td3 import run_experiment # args common for actors and learners - parser.add_argument("--env_name", default="Cassie-v0") # environment name parser.add_argument("--hidden_size", default=256) # neurons in hidden layer parser.add_argument("--history", default=0, type=int) # number of previous states to use as input @@ -223,8 +221,6 @@ from rl.algos.ppo import run_experiment # general args - parser.add_argument("--algo_name", default="ppo") # algo name - parser.add_argument("--env_name", "-e", default="Cassie-v0") parser.add_argument("--logdir", type=str, default="./trained_models/ppo/") # Where to log diagnostics to parser.add_argument("--seed", default=0, type=int) # Sets Gym, PyTorch and Numpy seeds parser.add_argument("--history", default=0, type=int) # number of previous states to use as input @@ -252,13 +248,8 @@ parser.add_argument("--max_traj_len", type=int, default=400, help="Max episode horizon") parser.add_argument("--recurrent", action='store_true') parser.add_argument("--bounded", type=bool, default=False) - args = parser.parse_args() - # Argument setup checks. Ideally all arg settings are compatible with each other, but that's not convenient for fast development - if (args.ik_baseline and not args.traj == "aslip") or (args.learn_gains and args.mirror): - raise Exception("Incompatible environment config settings") - args = parse_previous(args) run_experiment(args) @@ -267,8 +258,7 @@ sys.argv.remove(sys.argv[1]) - parser.add_argument("--path", type=str, default="./trained_models/ppo/Cassie-v0/7b7e24-seed0/", help="path to folder containing policy and run details") - parser.add_argument("--env_name", default="Cassie-v0", type=str) + parser.add_argument("--path", type=str, default="./trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/", help="path to folder containing policy and run details") parser.add_argument("--traj_len", default=400, type=str) parser.add_argument("--history", default=0, type=int) # number of previous states to use as input parser.add_argument("--mission", default="default", type=str) # only used by playground environment @@ -276,25 +266,15 @@ parser.add_argument("--debug", default=False, action='store_true') parser.add_argument("--no_stats", dest="stats", default=True, action='store_false') parser.add_argument("--no_viz", default=False, action='store_true') - parser.add_argument("--collect_data", default=False, action='store_true') args = parser.parse_args() run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) - if not hasattr(run_args, "simrate"): - run_args.simrate = 50 - print("manually choosing simrate as 50 (40 Hz)") - if not hasattr(run_args, "phase_based"): - run_args.phase_based = False - policy = torch.load(args.path + "actor.pt") policy.eval() - if args.collect_data: - collect_data(policy, args, run_args) - else: - # eval_policy(policy, args, run_args) - # eval_policy_input_viz(policy, args, run_args) - ev = EvalProcessClass(args, run_args) - ev.eval_policy(policy, args, run_args) + # eval_policy(policy, args, run_args) + # eval_policy_input_viz(policy, args, run_args) + ev = EvalProcessClass(args, run_args) + ev.eval_policy(policy, args, run_args) diff --git a/cassie/__init__.py b/cassie/__init__.py index bee375af..a63ec94f 100644 --- a/cassie/__init__.py +++ b/cassie/__init__.py @@ -1,8 +1,10 @@ +# Unified +from .cassie import CassieEnv +from .cassie_traj import CassieTrajEnv from .cassie_playground import CassiePlayground -from .cassie import CassieEnv_v2 as CassieEnv -from .cassie_min import CassieEnv_v3 as CassieMinEnv -from .cassie_standing_env import CassieStandingEnv -from .cassie import CassieEnv_v2 +from .cassie_standing_env import CassieStandingEnv # sorta old/unused + +# Proprietary from .cassie_noaccel_footdist_omniscient import CassieEnv_noaccel_footdist_omniscient from .cassie_footdist_env import CassieEnv_footdist from .cassie_noaccel_footdist_env import CassieEnv_noaccel_footdist @@ -10,7 +12,7 @@ from .cassie_novel_footdist_env import CassieEnv_novel_footdist from .cassie_mininput_env import CassieEnv_mininput - +# CassieMujocoSim from .cassiemujoco import * diff --git a/cassie/cassie.py b/cassie/cassie.py index 89612bda..978e27b9 100644 --- a/cassie/cassie.py +++ b/cassie/cassie.py @@ -2,14 +2,13 @@ from .cassiemujoco import pd_in_t, state_out_t, CassieSim, CassieVis -from .trajectory import * from cassie.quaternion_function import * from cassie.phase_function import * from .rewards import * from math import floor -import numpy as np +import numpy as np import os import random import copy @@ -25,57 +24,29 @@ def load_reward_clock_funcs(path): return clock_funcs -class CassieEnv_v2: - def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=True, state_est=True, dynamics_randomization=True, - no_delta=True, learn_gains=False, ik_baseline=False, reward="iros_paper", +class CassieEnv: + def __init__(self, traj='walking', simrate=50, command_profile="clock", input_profile="full", dynamics_randomization=True, + learn_gains=False, reward="iros_paper", config="./cassie/cassiemujoco/cassie.xml", history=0, **kwargs): dirname = os.path.dirname(__file__) - #xml_path = os.path.join(dirname, "cassiemujoco", "cassie.xml") - #self.sim = CassieSim(xml_path) self.config = config self.sim = CassieSim(self.config) # self.sim = CassieSim("./cassie/cassiemujoco/cassie_drop_step.xml") self.vis = None # Arguments for the simulation and state space - self.phase_based = phase_based - self.clock_based = clock_based - self.state_est = state_est - self.no_delta = no_delta + self.command_profile = command_profile + self.input_profile = input_profile + self.clock_based = True self.dynamics_randomization = dynamics_randomization - self.ik_baseline = ik_baseline - assert(not (phase_based and clock_based)) - - # Arguments generally used for curriculum training + # Arguments for reward function self.reward_func = reward self.early_term_cutoff = 0.3 - curriculum_defaults = { - "fixed_speed" : None - } - for (arg, default) in curriculum_defaults.items(): - setattr(self, arg, kwargs.get(arg, default)) - - # CONFIGURE REF TRAJECTORY to use - if traj == "aslip": - self.speeds = np.array([x / 10 for x in range(0, 21)]) - self.trajectories = getAllTrajectories(self.speeds) - self.num_speeds = len(self.trajectories) - self.traj_idx = 0 - self.speed = self.speeds[self.traj_idx] - self.trajectory = self.trajectories[self.traj_idx] - self.aslip_traj = True - else: - self.aslip_traj = False - if traj == "walking": - traj_path = os.path.join(dirname, "trajectory", "stepdata.bin") - elif traj == "stepping": - traj_path = os.path.join(dirname, "trajectory", "more-poses-trial.bin") - self.trajectory = CassieTrajectory(traj_path) - self.speed = 0 - self.observation_space, self.clock_inds, self.mirrored_obs = self.set_up_state_space() + # State space + self.observation_space, self.clock_inds, self.mirrored_obs = self.set_up_state_space(self.command_profile, self.input_profile) # Adds option for state history for FF nets self._obs = len(self.observation_space) @@ -88,46 +59,41 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr # learn gains means there is a delta on the default PD gains ***FOR EACH LEG*** self.learn_gains = learn_gains - if self.learn_gains == True: + if self.learn_gains: self.action_space = np.zeros(10 + 20) + self.mirrored_acts = [-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4, + -15, -16, 17, 18, 19, -10, -11, 12, 13, 14, + -25, -26, 27, 28, 29, -20, -21, 22, 23, 24] else: self.action_space = np.zeros(10) + self.mirrored_acts = [-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4] self.u = pd_in_t() # TODO: should probably initialize this to current state self.cassie_state = state_out_t() - - self.simrate = simrate # simulate X mujoco steps with same pd target - # 50 brings simulation from 2000Hz to exactly 40Hz - - self.simsteps = 0 # number of simulation steps - - self.time = 0 # number of time steps in current episode - self.phase = 0 # portion of the phase the robot is in - self.counter = 0 # number of phase cycles completed in episode + self.simrate = simrate # simulate X mujoco steps with same pd target. 50 brings simulation from 2000Hz to exactly 40Hz + self.time = 0 # number of time steps in current episode + self.phase = 0 # portion of the phase the robot is in + self.counter = 0 # number of phase cycles completed in episode # NOTE: a reference trajectory represents ONE phase cycle - # phase function variables - self.swing_duration = 0.15 - self.stance_duration = 0.25 - self.stance_mode = "zero" - # constants - self.strict_relaxer = 0.1 - self.have_incentive = False if "no_incentive" in self.reward_func else True + # should be floor(len(traj) / simrate) - 1 + # should be VERY cautious here because wrapping around trajectory + # badly can cause assymetrical/bad gaits + self.phaselen = 32 + self.phase_add = 1 - if not self.phase_based: - # should be floor(len(traj) / simrate) - 1 - # should be VERY cautious here because wrapping around trajectory - # badly can cause assymetrical/bad gaits - self.phaselen = floor(len(self.trajectory) / self.simrate) - 1 if not self.aslip_traj else self.trajectory.length - 1 + # NOTE: phase_based modifies self.phaselen throughout training # Set up phase based / load in clock based reward func + self.have_incentive = False if "no_incentive" in self.reward_func else True + self.strict_relaxer = 0.1 self.early_reward = False - if self.phase_based: + if self.command_profile == "phase": self.set_up_phase_reward() - elif self.clock_based: + elif self.command_profile == "clock": self.set_up_clock_reward(dirname) # see include/cassiemujoco.h for meaning of these indices @@ -138,18 +104,23 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr self.vel_index = np.array([0,1,2,3,4,5,6,7,8,12,13,14,18,19,20,21,25,26,27,31]) # CONFIGURE OFFSET for No Delta Policies - if self.aslip_traj: - ref_pos, ref_vel = self.get_ref_state(self.phase) - self.offset = ref_pos[self.pos_idx] - else: - self.offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968]) + self.offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968]) - self.phase_add = 1 + self.max_orient_change = 0.2 + + self.max_simrate = self.simrate + 10 + self.min_simrate = self.simrate - 20 + + self.max_speed = 4.0 + self.min_speed = -0.3 + + self.max_side_speed = 0.3 + self.min_side_speed = -0.3 # global flat foot orientation, can be useful part of reward function: self.neutral_foot_orient = np.array([-0.24790886454547323, -0.24679713195445646, -0.6609396704367185, 0.663921021343526]) - # tracking various variables for reward funcs + # various tracking variables for reward funcs self.stepcount = 0 self.l_high = False # only true if foot is above 0.2m self.r_high = False @@ -174,16 +145,25 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr self.dynamics_randomization = dynamics_randomization self.slope_rand = dynamics_randomization self.joint_rand = dynamics_randomization + self.max_pitch_incline = 0.03 self.max_roll_incline = 0.03 + self.encoder_noise = 0.01 + self.damping_low = 0.3 self.damping_high = 5.0 + self.mass_low = 0.5 self.mass_high = 1.5 + self.fric_low = 0.4 self.fric_high = 1.1 + self.speed = 0 + self.side_speed = 0 + self.orient_add = 0 + # Record default dynamics parameters self.default_damping = self.sim.get_dof_damping() self.default_mass = self.sim.get_body_mass() @@ -191,34 +171,20 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr self.default_fric = self.sim.get_geom_friction() self.default_rgba = self.sim.get_geom_rgba() self.default_quat = self.sim.get_geom_quat() + self.motor_encoder_noise = np.zeros(10) self.joint_encoder_noise = np.zeros(6) - ### Trims ### - self.joint_offsets = np.zeros(16) - self.com_vel_offset = 0 - self.y_offset = 0 - - ### Random commands during training ### - self.speed_schedule = np.zeros(4) - self.orient_add = 0 - self.orient_time = 500 - # Keep track of actions, torques self.prev_action = None self.curr_action = None self.prev_torque = None - # for RNN policies - self.critic_state = None - self.debug = False # Set up phase reward for dynamic phase functions def set_up_phase_reward(self): - self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) - if "early" in self.reward_func: self.early_reward = True @@ -243,11 +209,6 @@ def set_up_clock_reward(self, dirname): self.left_clock = self.reward_clock_func["left"] self.right_clock = self.reward_clock_func["right"] self.reward_func = "load_clock" - elif "aslip" in self.reward_func: - self.reward_clock_func = load_reward_clock_funcs(os.path.join(dirname, "rewards", "reward_clock_funcs", "no_incentive_aslip_clock_strict0.3.pkl")) - self.left_clock = self.reward_clock_func["left"] - self.right_clock = self.reward_clock_func["right"] - self.reward_func = "aslip_clock" else: if "grounded" in self.reward_func: @@ -270,76 +231,67 @@ def set_up_clock_reward(self, dirname): # approach = "" self.reward_func = "clock" - self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) - - def set_up_state_space(self): - - mjstate_size = 40 - state_est_size = 46 - - speed_size = 1 + def set_up_state_space(self, command_profile, input_profile): - clock_size = 2 - # NOTE: phase_based includes clock also + full_state_est_size = 46 + min_state_est_size = 21 + speed_size = 2 # x speed, y speed + clock_size = 2 # sin, cos phase_size = 5 # swing duration, stance duration, one-hot encoding of stance mode - # Determine robot state portion of obs, mirr_obs - if self.state_est: + # input --> FULL + if input_profile == "full": base_mir_obs = np.array([0.1, 1, -2, 3, -4, -10, -11, 12, 13, 14, -5, -6, 7, 8, 9, 15, -16, 17, -18, 19, -20, -26, -27, 28, 29, 30, -21, -22, 23, 24, 25, 31, -32, 33, 37, 38, 39, 34, 35, 36, 43, 44, 45, 40, 41, 42]) - obs_size = state_est_size + obs_size = full_state_est_size + # input --> MIN + elif input_profile == "min": + base_mir_obs = np.array([ + 3, 4, 5, # L foot relative pos + 0.1, 1, 2, # R foot relative pos + 6, -7, 8, -9, # pelvis orient (quaternion) + -10, 11, -12, # pelvis rot Vel + 17, -18, 19, -20, # L foot orient + 13, -14, 15, -16 # R foot orient + ]) + obs_size = min_state_est_size else: - base_mir_obs = np.array([0.1, 1, 2, -3, 4, -5, -13, -14, 15, 16, 17, 18, 19, -6, -7, 8, 9, 10, 11, 12, 20, -21, 22, -23, 24, -25, -33, -34, 35, 36, 37, 38, 39, -26, -27, 28, 29, 30, 31, 32]) - obs_size = mjstate_size - - # CLOCK_BASED : clock, speed - if self.clock_based: + raise NotImplementedError + + # command --> CLOCK_BASED : clock, speed + if command_profile == "clock": append_obs = np.array([len(base_mir_obs) + i for i in range(clock_size+speed_size)]) mirrored_obs = np.concatenate([base_mir_obs, append_obs]) clock_inds = append_obs[0:clock_size].tolist() obs_size += clock_size + speed_size - # PHASE_BASED : clock, phase info, speed - elif self.phase_based: + # command --> PHASE_BASED : clock, phase info, speed + elif command_profile == "phase": append_obs = np.array([len(base_mir_obs) + i for i in range(clock_size+phase_size+speed_size)]) mirrored_obs = np.concatenate([base_mir_obs, append_obs]) clock_inds = append_obs[0:clock_size].tolist() obs_size += clock_size + phase_size + speed_size - # REF TRAJ INPUT : use traj indices determined earlier else: - # Find the mirrored trajectory - if self.aslip_traj: - ref_traj_size = 18 - mirrored_traj = np.array([6,7,8,9,10,11,0.1,1,2,3,4,5,12,13,14,15,16,17]) - else: - ref_traj_size = 40 - mirrored_traj = np.array([0.1, 1, 2, 3, 4, 5, -13, -14, 15, 16, 17, 18, 19, -6, -7, 8, 9, 10, 11, 12, - 20, 21, 22, 23, 24, 25, -33, -34, 35, 36, 37, 38, 39, -26, -27, 28, 29, 30, 31, 32]) - mirrored_traj_sign = np.multiply(np.sign(mirrored_traj), obs_size+np.floor(np.abs(mirrored_traj))) - mirrored_obs = np.concatenate([base_mir_obs, mirrored_traj_sign]) - clock_inds = None - obs_size += ref_traj_size + raise NotImplementedError observation_space = np.zeros(obs_size) mirrored_obs = mirrored_obs.tolist() - # check_arr = np.arange(obs_size, dtype=np.float64) - # check_arr[0] = 0.1 - # print("mir obs check: ", np.all(np.sort(np.abs(mirrored_obs)) == check_arr)) - # exit() - return observation_space, clock_inds, mirrored_obs - def step_simulation(self, action, learned_gains=None): + def rotate_to_orient(self, vec): + quaternion = euler2quat(z=self.orient_add, y=0, x=0) + iquaternion = inverse_quaternion(quaternion) - if not self.ik_baseline: - if self.aslip_traj and self.phase == self.phaselen - 1: - ref_pos, ref_vel = self.get_ref_state(0) - else: - ref_pos, ref_vel = self.get_ref_state(self.phase + self.phase_add) - else: - ref_pos = self.trajectory.ik_pos[self.simsteps] + if len(vec) == 3: + return rotate_by_quaternion(vec, iquaternion) + + elif len(vec) == 4: + new_orient = quaternion_product(iquaternion, vec) + if new_orient[0] < 0: + new_orient = -new_orient + return new_orient + + def step_simulation(self, action, learned_gains=None): - if not self.no_delta: - self.offset = ref_pos[self.pos_idx] target = action + self.offset if self.joint_rand: @@ -353,7 +305,7 @@ def step_simulation(self, action, learned_gains=None): # TODO: move setting gains out of the loop? # maybe write a wrapper for pd_in_t ? - if self.learn_gains == False: + if not self.learn_gains: self.u.leftLeg.motorPd.pGain[i] = self.P[i] self.u.rightLeg.motorPd.pGain[i] = self.P[i] self.u.leftLeg.motorPd.dGain[i] = self.D[i] @@ -401,16 +353,7 @@ def step_simulation(self, action, learned_gains=None): # Basic version of step_simulation, that only simulates forward in time, does not do any other # computation for reward, etc. Is faster and should be used for evaluation purposes def step_sim_basic(self, action, learned_gains=None): - if not self.ik_baseline: - if self.aslip_traj and self.phase == self.phaselen - 1: - ref_pos, ref_vel = self.get_ref_state(0) - else: - ref_pos, ref_vel = self.get_ref_state(self.phase + self.phase_add) - else: - ref_pos = self.trajectory.ik_pos[self.simsteps] - if not self.no_delta: - self.offset = ref_pos[self.pos_idx] target = action + self.offset if self.joint_rand: @@ -445,9 +388,8 @@ def step_sim_basic(self, action, learned_gains=None): def step(self, action, return_omniscient_state=False, f_term=0): - delay_rand = 7 if self.dynamics_randomization: - simrate = self.simrate + np.random.randint(-delay_rand, delay_rand+1) + simrate = np.random.uniform(self.max_simrate, self.min_simrate) else: simrate = self.simrate @@ -470,7 +412,6 @@ def step(self, action, return_omniscient_state=False, f_term=0): self.step_simulation(action, learned_gains) else: self.step_simulation(action) - self.simsteps += 1 qpos = np.copy(self.sim.qpos()) qvel = np.copy(self.sim.qvel()) # Foot Force Tracking @@ -506,9 +447,8 @@ def step(self, action, return_omniscient_state=False, f_term=0): self.time += 1 self.phase += self.phase_add - if (self.aslip_traj and self.phase >= self.phaselen) or self.phase > self.phaselen: + if self.phase > self.phaselen: self.last_pelvis_pos = self.sim.qpos()[0:3] - self.simsteps = 0 self.phase = 0 self.counter += 1 @@ -540,6 +480,16 @@ def step(self, action, return_omniscient_state=False, f_term=0): if reward < self.early_term_cutoff: done = True + if np.random.randint(300) == 0: # random changes to orientation + self.orient_add += np.random.uniform(-self.max_orient_change, self.max_orient_change) + + if np.random.randint(100) == 0: # random changes to speed + self.speed = np.random.uniform(self.min_speed, self.max_speed) + self.speed = np.clip(self.speed, self.min_speed, self.max_speed) + + if np.random.randint(300) == 0: # random changes to sidespeed + self.side_speed = np.random.uniform(self.min_side_speed, self.max_side_speed) + if return_omniscient_state: return self.get_full_state(), self.get_omniscient_state(), reward, done, {} else: @@ -556,14 +506,12 @@ def step_basic(self, action, return_omniscient_state=False): self.step_sim_basic(action, learned_gains) else: self.step_sim_basic(action) - self.simsteps += 1 self.time += 1 self.phase += self.phase_add - if (self.aslip_traj and self.phase >= self.phaselen) or self.phase > self.phaselen: + if self.phase > self.phaselen: self.last_pelvis_pos = self.sim.qpos()[0:3] - self.simsteps = 0 self.phase = 0 self.counter += 1 @@ -574,26 +522,11 @@ def step_basic(self, action, return_omniscient_state=False): def reset(self): - if self.aslip_traj: - self.traj_idx = random.randint(0, self.num_speeds-1) - self.speed = self.speeds[self.traj_idx] - # print("current speed: {}\tcurrent traj: {}".format(self.speed, random_speed_idx)) - self.trajectory = self.trajectories[self.traj_idx] # switch the current trajectory - self.phaselen = self.trajectory.length - 1 - else: - self.speed = (random.randint(0, 40)) / 10 - # # Make sure that if speed is above 2, freq is at least 1.2 - # if self.speed > 1.3:# or np.any(self.speed_schedule > 1.6): - # self.phase_add = 1.3 + 0.7*random.random() - # else: - # self.phase_add = 1 + random.random() - - if self.fixed_speed != None: - self.traj_idx = (np.abs(self.speeds - self.speed)).argmin() - self.speed = self.fixed_speed + self.speed = np.random.uniform(self.min_speed, self.max_speed) + self.side_speed = np.random.uniform(self.min_side_speed, self.max_side_speed) # Set up phase based - if self.phase_based: + if self.command_profile == "phase": if self.phase_input_mode == "library": # constrain speed a bit further self.speed = (random.randint(0, 30)) / 10 @@ -611,7 +544,7 @@ def reset(self): self.stance_mode = np.random.choice(["grounded", "aerial", "zero"]) self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) # ELSE load in clock based reward func - elif self.reward_func == "aslip_clock" or self.reward_func == "load_clock": + elif self.reward_func == "load_clock": pass # ELSE use simple relationship to define swing and stance duration else: @@ -620,13 +553,11 @@ def reset(self): self.stance_mode = "grounded" else: self.stance_mode = "aerial" - total_duration = (0.9 - 0.2 / 3.0 * self.speed) / 2 - self.swing_duration = (0.375 + ((0.625 - 0.375) / 3) * self.speed) * total_duration - self.stance_duration = (0.625 - ((0.625 - 0.375) / 3) * self.speed) * total_duration + total_duration = (0.9 - 0.25 / 3.0 * abs(self.speed)) / 2 + self.swing_duration = (0.30 + ((0.70 - 0.30) / 3) * abs(self.speed)) * total_duration + self.stance_duration = (0.70 - ((0.70 - 0.30) / 3) * abs(self.speed)) * total_duration self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) - self.simsteps = 0 - self.phase = random.randint(0, floor(self.phaselen)) self.time = 0 self.counter = 0 @@ -728,27 +659,15 @@ def reset(self): # apply dynamics self.sim.set_const() - qpos, qvel = self.get_ref_state(self.phase) - # orientation = random.randint(-10, 10) * np.pi / 25 - # quaternion = euler2quat(z=orientation, y=0, x=0) - # qpos[3:7] = quaternion - # self.y_offset = 0#random.uniform(-3.5, 3.5) - # qpos[1] = self.y_offset - - if self.aslip_traj: - qvel = np.zeros(qvel.shape) - - self.sim.set_qpos(qpos) - self.sim.set_qvel(qvel) - self.last_pelvis_pos = self.sim.qpos()[0:3] # Need to reset u? Or better way to reset cassie_state than taking step self.cassie_state = self.sim.step_pd(self.u) + # reset commands self.orient_add = 0 # random.randint(-10, 10) * np.pi / 25 - self.orient_time = 0 # random.randint(50, 200) - self.com_vel_offset = 0 # 0.1*np.random.uniform(-0.1, 0.1, 2) + self.speed = np.random.uniform(self.min_speed, self.max_speed) + self.side_speed = np.random.uniform(self.min_side_speed, self.max_side_speed) # reset mujoco tracking variables self.l_foot_frc = 0 @@ -761,28 +680,18 @@ def reset(self): return self.get_full_state() def reset_for_test(self, full_reset=False): - self.simsteps = 0 self.phase = 0 self.time = 0 self.counter = 0 self.orient_add = 0 - self.orient_time = np.inf - self.y_offset = 0 self.phase_add = 1 self.state_history = [np.zeros(self._obs) for _ in range(self.history+1)] - if self.aslip_traj: - self.traj_idx = 0 - self.speed = 0 - # print("current speed: {}".format(self.speed)) - self.trajectory = self.trajectories[self.traj_idx] # switch the current trajectory - self.phaselen = self.trajectory.length - 1 - else: - self.speed = 0 + self.speed = 0 # load in clock based reward func - if self.reward_func == "aslip_clock" or self.reward_func == "load_clock": + if self.reward_func == "load_clock": pass # ELSE use simple relationship to define swing and stance duration else: @@ -793,9 +702,6 @@ def reset_for_test(self, full_reset=False): self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) if not full_reset: - qpos, qvel = self.get_ref_state(self.phase) - self.sim.set_qpos(qpos) - self.sim.set_qvel(qvel) # reset mujoco tracking variables self.last_pelvis_pos = self.sim.qpos()[0:3] @@ -842,42 +748,27 @@ def reset_cassie_state(self): # Helper function for updating the speed, used in visualization tests # not needed in training cause we don't change speeds in middle of rollout, and # we randomize the starting phase of each rollout - def update_speed(self, new_speed): - if self.aslip_traj: - self.traj_idx = (np.abs(self.speeds - new_speed)).argmin() - self.speed = self.traj_idx / 10 - self.trajectory = self.trajectories[self.traj_idx] - old_phaselen = self.phaselen - self.phaselen = self.trajectory.length - 1 - # update phase - self.phase = int(self.phaselen * self.phase / old_phaselen) - # new offset - ref_pos, ref_vel = self.get_ref_state(self.phase) - self.offset = ref_pos[self.pos_idx] - else: - self.speed = new_speed + def update_speed(self, new_speed, new_side_speed=0.0): + + self.speed = np.clip(new_speed, self.min_speed, self.max_speed) + self.side_speed = np.clip(new_side_speed, self.min_side_speed, self.max_side_speed) - if self.phase_based: + if self.command_profile == "phase": self.swing_duration = max(0.01, self.swing_duration) self.stance_duration = max(0.01, self.stance_duration) old_phaselen = self.phaselen self.set_up_phase_reward() self.phase = int(self.phaselen * self.phase / old_phaselen) - elif self.reward_func == "aslip_clock": - pass else: - total_duration = (0.9 - 0.2 / 3.0 * self.speed) / 2 - self.swing_duration = (0.375 + ((0.625 - 0.375) / 3) * self.speed) * total_duration - self.stance_duration = (0.625 - ((0.625 - 0.375) / 3) * self.speed) * total_duration + total_duration = (0.9 - 0.25 / 3.0 * self.speed) / 2 + self.swing_duration = (0.30 + ((0.70 - 0.30) / 3) * self.speed) * total_duration + self.stance_duration = (0.70 - ((0.70 - 0.30) / 3) * self.speed) * total_duration old_phaselen = self.phaselen self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) self.phase = int(self.phaselen * self.phase / old_phaselen) def compute_reward(self, action): - qpos = np.copy(self.sim.qpos()) - qvel = np.copy(self.sim.qvel()) - ref_pos, ref_vel = self.get_ref_state(self.phase) if self.reward_func == "clock" or self.reward_func == "load_clock" or self.reward_func == "switch_clock": self.early_term_cutoff = -99. if self.early_reward: @@ -890,78 +781,13 @@ def compute_reward(self, action): elif self.reward_func == "max_vel_clock": self.early_term_cutoff = -99. return max_vel_clock_reward(self, action) - elif self.reward_func == "aslip_clock": - self.early_term_cutoff = -99. - return aslip_clock_reward(self, action) - elif self.reward_func == "aslip_old": - self.early_term_cutoff = 0.0 - return aslip_old_reward(self, action) - elif self.reward_func == "iros_paper": - return iros_paper_reward(self) - elif self.reward_func == "5k_speed_reward": - return old_speed_reward(self) - elif self.reward_func == "trajmatch_footorient_hiprollvelact_reward": - return trajmatch_footorient_hiprollvelact_reward(self) - elif self.reward_func == "speedmatch_footorient_hiprollvelact_reward": - return speedmatch_footorient_hiprollvelact_reward(self) else: raise NotImplementedError - # get the corresponding state from the reference trajectory for the current phase - def get_ref_state(self, phase=None): - if phase is None: - phase = self.phase - - if phase > self.phaselen: - phase = 0 - - # TODO: make this not so hackish - if phase > floor(len(self.trajectory) / self.simrate) - 1: - phase = floor((phase / self.phaselen) * len(self.trajectory) / self.simrate) - - desired_ind = phase * self.simrate if not self.aslip_traj else phase - # phase_diff = desired_ind - math.floor(desired_ind) - # if phase_diff != 0: # desired ind is an int - # pos_prev = np.copy(self.trajectory.qpos[math.floor(desired_ind)]) - # vel_prev = np.copy(self.trajectory.qvel[math.floor(desired_ind)]) - # pos_next = np.copy(self.trajectory.qpos[math.ceil(desired_ind)]) - # vel_next = np.copy(self.trajectory.qvel[math.ceil(desired_ind)]) - # pos = pos_prev + phase_diff * (pos_next - pos_prev) - # vel = vel_prev + phase_diff * (vel_next - vel_prev) - # else: - # print("desired ind: ", desired_ind) - pos = np.copy(self.trajectory.qpos[int(desired_ind)]) - vel = np.copy(self.trajectory.qvel[int(desired_ind)]) - - # this is just setting the x to where it "should" be given the number - # of cycles - # pos[0] += (self.trajectory.qpos[-1, 0] - self.trajectory.qpos[0, 0]) * self.counter - - # ^ should only matter for COM error calculation, - # gets dropped out of state variable for input reasons - - ### Setting variable speed - if not self.aslip_traj: - pos[0] *= self.speed - pos[0] += (self.trajectory.qpos[-1, 0] - self.trajectory.qpos[0, 0]) * self.counter * self.speed - else: - pos[0] += (self.trajectory.qpos[-1, 0] - self.trajectory.qpos[0, 0]) * self.counter - - # setting lateral distance target to 0? - # regardless of reference trajectory? - pos[1] = 0 - - if not self.aslip_traj: - vel[0] *= self.speed - - return pos, vel - def get_full_state(self): qpos = np.copy(self.sim.qpos()) qvel = np.copy(self.sim.qvel()) - ref_pos, ref_vel = self.get_ref_state(self.phase + self.phase_add) - # TODO: maybe convert to set subtraction for clarity # {i for i in range(35)} - # {0, 10, 11, 12, 13, 17, 18, 19, 24, 25, 26, 27, 31, 32, 33} @@ -975,69 +801,57 @@ def get_full_state(self): # trajectory despite being global coord. Y is only invariant to straight # line trajectories. - # CLOCK BASED (NO TRAJECTORY) - if self.phase_based: + # command --> PHASE_BASED : clock, phase info, speed + if self.command_profile == "phase": clock = [np.sin(2 * np.pi * self.phase / self.phaselen), np.cos(2 * np.pi * self.phase / self.phaselen)] - - ext_state = np.concatenate((clock, [self.swing_duration, self.stance_duration], encode_stance_mode(self.stance_mode), [self.speed])) - - elif self.clock_based: + ext_state = np.concatenate((clock, [self.swing_duration, self.stance_duration], encode_stance_mode(self.stance_mode), [self.speed, self.side_speed])) + # command --> CLOCK_BASED : clock, speed + elif self.command_profile == "clock": clock = [np.sin(2 * np.pi * self.phase / self.phaselen), np.cos(2 * np.pi * self.phase / self.phaselen)] - ext_state = np.concatenate((clock, [self.speed])) - - # ASLIP TRAJECTORY - elif self.aslip_traj and not self.clock_based: - if(self.phase == 0): - # ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.cassie_state, self.last_pelvis_pos, self.phaselen - 1, offset=self.vertOffset)) - ext_state = np.concatenate(get_ref_aslip_unaltered_state(self, self.phaselen - 1)) - else: - # ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.cassie_state, self.last_pelvis_pos, self.phaselen - 1, offset=self.vertOffset)) - ext_state = np.concatenate(get_ref_aslip_unaltered_state(self, self.phase)) - - # OTHER TRAJECTORY + ext_state = np.concatenate((clock, [self.speed, self.side_speed])) else: - ext_state = np.concatenate([ref_pos[self.pos_index], ref_vel[self.vel_index]]) + raise NotImplementedError # Update orientation - new_orient = self.cassie_state.pelvis.orientation[:] - new_translationalVelocity = self.cassie_state.pelvis.translationalVelocity[:] - new_translationalAcceleleration = self.cassie_state.pelvis.translationalAcceleration[:] - # new_translationalVelocity[0:2] += self.com_vel_offset - quaternion = euler2quat(z=self.orient_add, y=0, x=0) - iquaternion = inverse_quaternion(quaternion) - new_orient = quaternion_product(iquaternion, self.cassie_state.pelvis.orientation[:]) - if new_orient[0] < 0: - new_orient = -new_orient - new_translationalVelocity = rotate_by_quaternion(self.cassie_state.pelvis.translationalVelocity[:], iquaternion) - new_translationalAcceleleration = rotate_by_quaternion(self.cassie_state.pelvis.translationalAcceleration[:], iquaternion) - motor_pos = self.cassie_state.motor.position[:] - joint_pos = self.cassie_state.joint.position[:] + new_orient = self.rotate_to_orient(self.cassie_state.pelvis.orientation[:]) + new_translationalVelocity = self.rotate_to_orient(self.cassie_state.pelvis.translationalVelocity[:]) + new_translationalAcceleleration = self.rotate_to_orient(self.cassie_state.pelvis.translationalAcceleration[:]) + + # motor and joint poses if self.joint_rand: - motor_pos += self.joint_offsets[0:10] - joint_pos += self.joint_offsets[10:16] - - # Use state estimator - robot_state = np.concatenate([ - [self.cassie_state.pelvis.position[2] - self.cassie_state.terrain.height], # pelvis height - new_orient, # pelvis orientation - motor_pos, # actuated joint positions - - new_translationalVelocity, # pelvis translational velocity - self.cassie_state.pelvis.rotationalVelocity[:], # pelvis rotational velocity - self.cassie_state.motor.velocity[:], # actuated joint velocities - - new_translationalAcceleleration, # pelvis translational acceleration - - joint_pos, # unactuated joint positions - self.cassie_state.joint.velocity[:] # unactuated joint velocities - ]) - - if self.state_est: - state = np.concatenate([robot_state, ext_state]) + motor_pos = self.cassie_state.motor.position[:] + self.motor_encoder_noise + joint_pos = self.cassie_state.joint.position[:] + self.joint_encoder_noise + else: + motor_pos = self.cassie_state.motor.position[:] + joint_pos = self.cassie_state.joint.position[:] + + if self.input_profile == "min": + robot_state = np.concatenate([ + self.cassie_state.leftFoot.position[:], # left foot position + self.cassie_state.rightFoot.position[:], # right foot position + new_orient, # pelvis orientation + self.cassie_state.pelvis.rotationalVelocity[:], # pelvis rotational velocity + self.cassie_state.leftFoot.orientation[:], # left foot orientation + self.cassie_state.rightFoot.orientation[:] # right foot orientation + ]) + elif self.input_profile == "full": + robot_state = np.concatenate([ + [self.cassie_state.pelvis.position[2] - self.cassie_state.terrain.height], # pelvis height + new_orient, # pelvis orientation + motor_pos, # actuated joint positions + new_translationalVelocity, # pelvis translational velocity + self.cassie_state.pelvis.rotationalVelocity[:], # pelvis rotational velocity + self.cassie_state.motor.velocity[:], # actuated joint velocities + new_translationalAcceleleration, # pelvis translational acceleration + joint_pos, # unactuated joint positions + self.cassie_state.joint.velocity[:] # unactuated joint velocities + ]) else: - state = np.concatenate([qpos[self.pos_index], qvel[self.vel_index], ext_state]) + raise NotImplementedError + + state = np.concatenate([robot_state, ext_state]) self.state_history.insert(0, state) self.state_history = self.state_history[:self.history+1] @@ -1050,44 +864,6 @@ def render(self): return self.vis.draw(self.sim) - def get_state_info(self): - # state estimator info - pelvis_pos = self.cassie_state.pelvis.position[:] - pelvis_pos[2] = pelvis_pos[2] - self.cassie_state.terrain.height - lf_pos = self.cassie_state.leftFoot.position[:] - rf_pos = self.cassie_state.rightFoot.position[:] - lf_pos_global = [pelvis_pos[i] + lf_pos[i] for i in range(3)] - rf_pos_global = [pelvis_pos[i] + rf_pos[i] for i in range(3)] - # robot_state_info = np.array([pelvis_pos, lf_pos, rf_pos]) - robot_state_info = np.array([pelvis_pos, lf_pos_global, rf_pos_global]) - - # mujoco info - qpos = self.sim.qpos() - actual_compos = qpos[0:3] - actual_lf = self.l_foot_pos - actual_rf = self.r_foot_pos - # actual_lf = self.l_foot_pos - qpos[0:3] - # actual_rf = self.r_foot_pos - qpos[0:3] - actual_state_info = np.array([actual_compos, actual_lf, actual_rf]) - - return robot_state_info, actual_state_info - - # This is assumed to be called after env.step. hence, using self.phase - 1 - def get_traj_and_state_info(self): - # traj info used in rewards - traj_info = get_ref_aslip_global_state(self, self.phase-1) - # traj_info = get_ref_aslip_unaltered_state(self, self.phase) - traj_info = [traj_info[4], traj_info[2], traj_info[0]] - - # traj info going into the policy - # traj_cmd_info = get_ref_aslip_ext_state(self, self.cassie_state, self.last_pelvis_pos, self.phase, offset=self.vertOffset) - traj_cmd_info = get_ref_aslip_unaltered_state(self, self.phase-1) - traj_cmd_info = [traj_cmd_info[4], traj_cmd_info[2], traj_cmd_info[0]] - - robot_state_info, actual_state_info = self.get_state_info() - - return traj_info, traj_cmd_info, robot_state_info, actual_state_info - # Currently unused # def get_omniscient_state(self): diff --git a/cassie/cassie_min.py b/cassie/cassie_traj.py similarity index 84% rename from cassie/cassie_min.py rename to cassie/cassie_traj.py index 4c1dde27..428f6254 100644 --- a/cassie/cassie_min.py +++ b/cassie/cassie_traj.py @@ -9,7 +9,7 @@ from math import floor -import numpy as np +import numpy as np import os import random import copy @@ -25,39 +25,40 @@ def load_reward_clock_funcs(path): return clock_funcs -class CassieEnv_v3: - def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=True, state_est=True, dynamics_randomization=True, - no_delta=True, learn_gains=False, ik_baseline=False, reward="iros_paper", +class CassieTrajEnv: + def __init__(self, traj='walking', simrate=50, command_profile="clock", input_profile="full", dynamics_randomization=True, + learn_gains=False, reward="iros_paper", + no_delta=True, ik_baseline=False, config="./cassie/cassiemujoco/cassie.xml", history=0, **kwargs): dirname = os.path.dirname(__file__) - #xml_path = os.path.join(dirname, "cassiemujoco", "cassie.xml") - #self.sim = CassieSim(xml_path) self.config = config self.sim = CassieSim(self.config) # self.sim = CassieSim("./cassie/cassiemujoco/cassie_drop_step.xml") self.vis = None + # legacy env attributes clock_based and phase_based + if command_profile == "clock": + self.clock_based, self.phase_based = True, False + elif command_profile == "phase": + self.clock_based, self.phase_based = False, True + elif command_profile == "traj": + self.clock_based, self.phase_based = False, False + if input_profile == "full": + self.min_input = False + elif input_profile == "min": + self.min_input = True + # Arguments for the simulation and state space - self.phase_based = phase_based - self.clock_based = clock_based - self.state_est = state_est - self.no_delta = no_delta self.dynamics_randomization = dynamics_randomization + self.no_delta = no_delta self.ik_baseline = ik_baseline - assert(not (phase_based and clock_based)) - - # Arguments generally used for curriculum training + # Arguments for reward function self.reward_func = reward self.early_term_cutoff = 0.3 - curriculum_defaults = { - "fixed_speed" : None - } - for (arg, default) in curriculum_defaults.items(): - setattr(self, arg, kwargs.get(arg, default)) - # CONFIGURE REF TRAJECTORY to use + # CONFIGURE REF TRAJECTORY if traj == "aslip": self.speeds = np.array([x / 10 for x in range(0, 21)]) self.trajectories = getAllTrajectories(self.speeds) @@ -88,42 +89,38 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr # learn gains means there is a delta on the default PD gains ***FOR EACH LEG*** self.learn_gains = learn_gains - if self.learn_gains == True: + if self.learn_gains: self.action_space = np.zeros(10 + 20) + self.mirrored_acts = [-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4, + -15, -16, 17, 18, 19, -10, -11, 12, 13, 14, + -25, -26, 27, 28, 29, -20, -21, 22, 23, 24] else: self.action_space = np.zeros(10) + self.mirrored_acts = [-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4] self.u = pd_in_t() # TODO: should probably initialize this to current state self.cassie_state = state_out_t() - - self.simrate = simrate # simulate X mujoco steps with same pd target - # 50 brings simulation from 2000Hz to exactly 40Hz - - self.simsteps = 0 # number of simulation steps - - self.time = 0 # number of time steps in current episode - self.phase = 0 # portion of the phase the robot is in - self.counter = 0 # number of phase cycles completed in episode + self.simrate = simrate # simulate X mujoco steps with same pd target. 50 brings simulation from 2000Hz to exactly 40Hz + self.simsteps = 0 # tracks number of mujoco simulation steps + self.time = 0 # number of time steps in current episode + self.phase = 0 # portion of the phase the robot is in + self.counter = 0 # number of phase cycles completed in episode # NOTE: a reference trajectory represents ONE phase cycle - # phase function variables - self.swing_duration = 0.15 - self.stance_duration = 0.25 - self.stance_mode = "zero" - # constants - self.strict_relaxer = 0.1 - self.have_incentive = False if "no_incentive" in self.reward_func else True + # should be floor(len(traj) / simrate) - 1 + # should be VERY cautious here because wrapping around trajectory + # badly can cause assymetrical/bad gaits + self.phaselen = floor(len(self.trajectory) / self.simrate) - 1 if not self.aslip_traj else self.trajectory.length - 1 + self.phase_add = 1 - if not self.phase_based: - # should be floor(len(traj) / simrate) - 1 - # should be VERY cautious here because wrapping around trajectory - # badly can cause assymetrical/bad gaits - self.phaselen = floor(len(self.trajectory) / self.simrate) - 1 if not self.aslip_traj else self.trajectory.length - 1 + # NOTE: phase_based modifies self.phaselen throughout training # Set up phase based / load in clock based reward func + self.strict_relaxer = 0.1 + self.have_incentive = False if "no_incentive" in self.reward_func else True self.early_reward = False if self.phase_based: self.set_up_phase_reward() @@ -144,12 +141,21 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr else: self.offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968]) - self.phase_add = 1 + self.max_orient_change = 0.2 + + self.max_simrate = self.simrate + 10 + self.min_simrate = self.simrate - 20 + + self.max_speed = 4.0 + self.min_speed = -0.3 + + self.max_side_speed = 0.3 + self.min_side_speed = -0.3 # global flat foot orientation, can be useful part of reward function: self.neutral_foot_orient = np.array([-0.24790886454547323, -0.24679713195445646, -0.6609396704367185, 0.663921021343526]) - # tracking various variables for reward funcs + # various tracking variables for reward funcs self.stepcount = 0 self.l_high = False # only true if foot is above 0.2m self.r_high = False @@ -174,16 +180,25 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr self.dynamics_randomization = dynamics_randomization self.slope_rand = dynamics_randomization self.joint_rand = dynamics_randomization + self.max_pitch_incline = 0.03 self.max_roll_incline = 0.03 + self.encoder_noise = 0.01 + self.damping_low = 0.3 self.damping_high = 5.0 + self.mass_low = 0.5 self.mass_high = 1.5 + self.fric_low = 0.4 self.fric_high = 1.1 + self.speed = 0 + self.side_speed = 0 + self.orient_add = 0 + # Record default dynamics parameters self.default_damping = self.sim.get_dof_damping() self.default_mass = self.sim.get_body_mass() @@ -191,34 +206,20 @@ def __init__(self, traj='walking', simrate=50, phase_based=False, clock_based=Tr self.default_fric = self.sim.get_geom_friction() self.default_rgba = self.sim.get_geom_rgba() self.default_quat = self.sim.get_geom_quat() + self.motor_encoder_noise = np.zeros(10) self.joint_encoder_noise = np.zeros(6) - ### Trims ### - self.joint_offsets = np.zeros(16) - self.com_vel_offset = 0 - self.y_offset = 0 - - ### Random commands during training ### - self.speed_schedule = np.zeros(4) - self.orient_add = 0 - self.orient_time = 500 - # Keep track of actions, torques self.prev_action = None self.curr_action = None self.prev_torque = None - # for RNN policies - self.critic_state = None - self.debug = False # Set up phase reward for dynamic phase functions def set_up_phase_reward(self): - self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) - if "early" in self.reward_func: self.early_reward = True @@ -270,44 +271,46 @@ def set_up_clock_reward(self, dirname): # approach = "" self.reward_func = "clock" - self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) - def set_up_state_space(self): - mjstate_size = 40 - state_est_size = 21 - - speed_size = 1 - - clock_size = 2 - # NOTE: phase_based includes clock also + full_state_est_size = 46 + min_state_est_size = 21 + speed_size = 2 # x speed, y speed + clock_size = 2 # sin, cos phase_size = 5 # swing duration, stance duration, one-hot encoding of stance mode - # Determine robot state portion of obs, mirr_obs - if self.state_est: + # input --> FULL + if not self.min_input: + base_mir_obs = np.array([0.1, 1, -2, 3, -4, -10, -11, 12, 13, 14, -5, -6, 7, 8, 9, 15, -16, 17, -18, 19, -20, -26, -27, 28, 29, 30, -21, -22, 23, 24, 25, 31, -32, 33, 37, 38, 39, 34, 35, 36, 43, 44, 45, 40, 41, 42]) + obs_size = full_state_est_size + + # input --> MIN + else: base_mir_obs = np.array([ - 3, 4, 5, 0.1, 1, 2, 6, -7, 8, -9, # L and R foot, orient - -10, 11, -12, # Rot Vel - 17, -18, 19, -20, 13, -14, 15, -16 + 3, 4, 5, # L foot relative pos + 0.1, 1, 2, # R foot relative pos + 6, -7, 8, -9, # pelvis orient (quaternion) + -10, 11, -12, # pelvis rot Vel + 17, -18, 19, -20, # L foot orient + 13, -14, 15, -16 # R foot orient ]) - obs_size = state_est_size - else: - base_mir_obs = np.array([0.1, 1, 2, -3, 4, -5, -13, -14, 15, 16, 17, 18, 19, -6, -7, 8, 9, 10, 11, 12, 20, -21, 22, -23, 24, -25, -33, -34, 35, 36, 37, 38, 39, -26, -27, 28, 29, 30, 31, 32]) - obs_size = mjstate_size + obs_size = min_state_est_size - # CLOCK_BASED : clock, speed + # command --> CLOCK_BASED : clock, speed if self.clock_based: append_obs = np.array([len(base_mir_obs) + i for i in range(clock_size+speed_size)]) mirrored_obs = np.concatenate([base_mir_obs, append_obs]) clock_inds = append_obs[0:clock_size].tolist() obs_size += clock_size + speed_size - # PHASE_BASED : clock, phase info, speed + + # command --> PHASE_BASED : clock, phase info, speed elif self.phase_based: append_obs = np.array([len(base_mir_obs) + i for i in range(clock_size+phase_size+speed_size)]) mirrored_obs = np.concatenate([base_mir_obs, append_obs]) clock_inds = append_obs[0:clock_size].tolist() obs_size += clock_size + phase_size + speed_size - # REF TRAJ INPUT : use traj indices determined earlier + + # command --> REF_TRAJ_BASED : single-timestep trajectory else: # Find the mirrored trajectory if self.aslip_traj: @@ -325,13 +328,21 @@ def set_up_state_space(self): observation_space = np.zeros(obs_size) mirrored_obs = mirrored_obs.tolist() - # check_arr = np.arange(obs_size, dtype=np.float64) - # check_arr[0] = 0.1 - # print("mir obs check: ", np.all(np.sort(np.abs(mirrored_obs)) == check_arr)) - # exit() - return observation_space, clock_inds, mirrored_obs + def rotate_to_orient(self, vec): + quaternion = euler2quat(z=self.orient_add, y=0, x=0) + iquaternion = inverse_quaternion(quaternion) + + if len(vec) == 3: + return rotate_by_quaternion(vec, iquaternion) + + elif len(vec) == 4: + new_orient = quaternion_product(iquaternion, vec) + if new_orient[0] < 0: + new_orient = -new_orient + return new_orient + def step_simulation(self, action, learned_gains=None): if not self.ik_baseline: @@ -357,7 +368,7 @@ def step_simulation(self, action, learned_gains=None): # TODO: move setting gains out of the loop? # maybe write a wrapper for pd_in_t ? - if self.learn_gains == False: + if not self.learn_gains: self.u.leftLeg.motorPd.pGain[i] = self.P[i] self.u.rightLeg.motorPd.pGain[i] = self.P[i] self.u.leftLeg.motorPd.dGain[i] = self.D[i] @@ -449,9 +460,8 @@ def step_sim_basic(self, action, learned_gains=None): def step(self, action, return_omniscient_state=False, f_term=0): - delay_rand = 7 if self.dynamics_randomization: - simrate = self.simrate + np.random.randint(-delay_rand, delay_rand+1) + simrate = np.random.uniform(self.max_simrate, self.min_simrate) else: simrate = self.simrate @@ -544,6 +554,16 @@ def step(self, action, return_omniscient_state=False, f_term=0): if reward < self.early_term_cutoff: done = True + if np.random.randint(300) == 0: # random changes to orientation + self.orient_add += np.random.uniform(-self.max_orient_change, self.max_orient_change) + + if np.random.randint(100) == 0: # random changes to speed + self.speed = np.random.uniform(self.min_speed, self.max_speed) + self.speed = np.clip(self.speed, self.min_speed, self.max_speed) + + if np.random.randint(300) == 0: # random changes to sidespeed + self.side_speed = np.random.uniform(self.min_side_speed, self.max_side_speed) + if return_omniscient_state: return self.get_full_state(), self.get_omniscient_state(), reward, done, {} else: @@ -586,16 +606,13 @@ def reset(self): self.phaselen = self.trajectory.length - 1 else: self.speed = (random.randint(0, 40)) / 10 + self.side_speed = np.random.uniform(self.min_side_speed, self.max_side_speed) # # Make sure that if speed is above 2, freq is at least 1.2 # if self.speed > 1.3:# or np.any(self.speed_schedule > 1.6): # self.phase_add = 1.3 + 0.7*random.random() # else: # self.phase_add = 1 + random.random() - if self.fixed_speed != None: - self.traj_idx = (np.abs(self.speeds - self.speed)).argmin() - self.speed = self.fixed_speed - # Set up phase based if self.phase_based: if self.phase_input_mode == "library": @@ -624,9 +641,9 @@ def reset(self): self.stance_mode = "grounded" else: self.stance_mode = "aerial" - total_duration = (0.9 - 0.2 / 3.0 * self.speed) / 2 - self.swing_duration = (0.375 + ((0.625 - 0.375) / 3) * self.speed) * total_duration - self.stance_duration = (0.625 - ((0.625 - 0.375) / 3) * self.speed) * total_duration + total_duration = (0.9 - 0.25 / 3.0 * abs(self.speed)) / 2 + self.swing_duration = (0.30 + ((0.70 - 0.30) / 3) * abs(self.speed)) * total_duration + self.stance_duration = (0.70 - ((0.70 - 0.30) / 3) * abs(self.speed)) * total_duration self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) self.simsteps = 0 @@ -733,11 +750,6 @@ def reset(self): self.sim.set_const() qpos, qvel = self.get_ref_state(self.phase) - # orientation = random.randint(-10, 10) * np.pi / 25 - # quaternion = euler2quat(z=orientation, y=0, x=0) - # qpos[3:7] = quaternion - # self.y_offset = 0#random.uniform(-3.5, 3.5) - # qpos[1] = self.y_offset if self.aslip_traj: qvel = np.zeros(qvel.shape) @@ -750,9 +762,10 @@ def reset(self): # Need to reset u? Or better way to reset cassie_state than taking step self.cassie_state = self.sim.step_pd(self.u) + # reset commands self.orient_add = 0 # random.randint(-10, 10) * np.pi / 25 - self.orient_time = 0 # random.randint(50, 200) - self.com_vel_offset = 0 # 0.1*np.random.uniform(-0.1, 0.1, 2) + self.speed = np.random.uniform(self.min_speed, self.max_speed) + self.side_speed = np.random.uniform(self.min_side_speed, self.max_side_speed) # reset mujoco tracking variables self.l_foot_frc = 0 @@ -770,8 +783,6 @@ def reset_for_test(self, full_reset=False): self.time = 0 self.counter = 0 self.orient_add = 0 - self.orient_time = np.inf - self.y_offset = 0 self.phase_add = 1 self.state_history = [np.zeros(self._obs) for _ in range(self.history+1)] @@ -846,7 +857,8 @@ def reset_cassie_state(self): # Helper function for updating the speed, used in visualization tests # not needed in training cause we don't change speeds in middle of rollout, and # we randomize the starting phase of each rollout - def update_speed(self, new_speed): + def update_speed(self, new_speed, new_side_speed=0.0): + if self.aslip_traj: self.traj_idx = (np.abs(self.speeds - new_speed)).argmin() self.speed = self.traj_idx / 10 @@ -859,8 +871,10 @@ def update_speed(self, new_speed): ref_pos, ref_vel = self.get_ref_state(self.phase) self.offset = ref_pos[self.pos_idx] else: - self.speed = new_speed + self.speed = np.clip(new_speed, self.min_speed, self.max_speed) + self.side_speed = np.clip(new_side_speed, self.min_side_speed, self.max_side_speed) + if self.phase_based: self.swing_duration = max(0.01, self.swing_duration) self.stance_duration = max(0.01, self.stance_duration) @@ -870,18 +884,15 @@ def update_speed(self, new_speed): elif self.reward_func == "aslip_clock": pass else: - total_duration = (0.9 - 0.2 / 3.0 * self.speed) / 2 - self.swing_duration = (0.375 + ((0.625 - 0.375) / 3) * self.speed) * total_duration - self.stance_duration = (0.625 - ((0.625 - 0.375) / 3) * self.speed) * total_duration + total_duration = (0.9 - 0.25 / 3.0 * self.speed) / 2 + self.swing_duration = (0.30 + ((0.70 - 0.30) / 3) * self.speed) * total_duration + self.stance_duration = (0.70 - ((0.70 - 0.30) / 3) * self.speed) * total_duration old_phaselen = self.phaselen self.left_clock, self.right_clock, self.phaselen = create_phase_reward(self.swing_duration, self.stance_duration, self.strict_relaxer, self.stance_mode, self.have_incentive, FREQ=2000//self.simrate) self.phase = int(self.phaselen * self.phase / old_phaselen) def compute_reward(self, action): - qpos = np.copy(self.sim.qpos()) - qvel = np.copy(self.sim.qvel()) - ref_pos, ref_vel = self.get_ref_state(self.phase) if self.reward_func == "clock" or self.reward_func == "load_clock" or self.reward_func == "switch_clock": self.early_term_cutoff = -99. if self.early_reward: @@ -979,19 +990,17 @@ def get_full_state(self): # trajectory despite being global coord. Y is only invariant to straight # line trajectories. - # CLOCK BASED (NO TRAJECTORY) + # command --> PHASE_BASED : clock, phase info, speed if self.phase_based: clock = [np.sin(2 * np.pi * self.phase / self.phaselen), np.cos(2 * np.pi * self.phase / self.phaselen)] - - ext_state = np.concatenate((clock, [self.swing_duration, self.stance_duration], encode_stance_mode(self.stance_mode), [self.speed])) - + ext_state = np.concatenate((clock, [self.swing_duration, self.stance_duration], encode_stance_mode(self.stance_mode), [self.speed, self.side_speed])) + # command --> CLOCK_BASED : clock, speed elif self.clock_based: clock = [np.sin(2 * np.pi * self.phase / self.phaselen), np.cos(2 * np.pi * self.phase / self.phaselen)] - ext_state = np.concatenate((clock, [self.speed])) - - # ASLIP TRAJECTORY + ext_state = np.concatenate((clock, [self.speed, self.side_speed])) + # command --> REF_TRAJ_BASED : aslip trajectory elif self.aslip_traj and not self.clock_based: if(self.phase == 0): # ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.cassie_state, self.last_pelvis_pos, self.phaselen - 1, offset=self.vertOffset)) @@ -999,45 +1008,46 @@ def get_full_state(self): else: # ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.cassie_state, self.last_pelvis_pos, self.phaselen - 1, offset=self.vertOffset)) ext_state = np.concatenate(get_ref_aslip_unaltered_state(self, self.phase)) - - # OTHER TRAJECTORY + # command --> REF_TRAJ_BASED : agility trajectory else: ext_state = np.concatenate([ref_pos[self.pos_index], ref_vel[self.vel_index]]) # Update orientation - new_orient = self.cassie_state.pelvis.orientation[:] - new_translationalVelocity = self.cassie_state.pelvis.translationalVelocity[:] - new_translationalAcceleleration = self.cassie_state.pelvis.translationalAcceleration[:] - # new_translationalVelocity[0:2] += self.com_vel_offset - quaternion = euler2quat(z=self.orient_add, y=0, x=0) - iquaternion = inverse_quaternion(quaternion) - new_orient = quaternion_product(iquaternion, self.cassie_state.pelvis.orientation[:]) - if new_orient[0] < 0: - new_orient = -new_orient - new_translationalVelocity = rotate_by_quaternion(self.cassie_state.pelvis.translationalVelocity[:], iquaternion) - new_translationalAcceleleration = rotate_by_quaternion(self.cassie_state.pelvis.translationalAcceleration[:], iquaternion) - motor_pos = self.cassie_state.motor.position[:] - joint_pos = self.cassie_state.joint.position[:] - # if self.joint_rand: - # motor_pos += self.joint_offsets[0:10] - # joint_pos += self.joint_offsets[10:16] - - # Use state estimator - robot_state = np.concatenate([ - self.cassie_state.leftFoot.position[:], # left foot position - self.cassie_state.rightFoot.position[:], # right foot position - new_orient, # pelvis orientation - - self.cassie_state.pelvis.rotationalVelocity[:], # pelvis rotational velocity - - self.cassie_state.leftFoot.orientation[:], # unactuated joint positions - self.cassie_state.rightFoot.orientation[:] # unactuated joint velocities - ]) - - if self.state_est: - state = np.concatenate([robot_state, ext_state]) + new_orient = self.rotate_to_orient(self.cassie_state.pelvis.orientation[:]) + new_translationalVelocity = self.rotate_to_orient(self.cassie_state.pelvis.translationalVelocity[:]) + new_translationalAcceleleration = self.rotate_to_orient(self.cassie_state.pelvis.translationalAcceleration[:]) + + # motor and joint poses + if self.joint_rand: + motor_pos = self.cassie_state.motor.position[:] + self.motor_encoder_noise + joint_pos = self.cassie_state.joint.position[:] + self.joint_encoder_noise else: - state = np.concatenate([qpos[self.pos_index], qvel[self.vel_index], ext_state]) + motor_pos = self.cassie_state.motor.position[:] + joint_pos = self.cassie_state.joint.position[:] + + if self.min_input: + robot_state = np.concatenate([ + self.cassie_state.leftFoot.position[:], # left foot position + self.cassie_state.rightFoot.position[:], # right foot position + new_orient, # pelvis orientation + self.cassie_state.pelvis.rotationalVelocity[:], # pelvis rotational velocity + self.cassie_state.leftFoot.orientation[:], # left foot orientation + self.cassie_state.rightFoot.orientation[:] # right foot orientation + ]) + else: + robot_state = np.concatenate([ + [self.cassie_state.pelvis.position[2] - self.cassie_state.terrain.height], # pelvis height + new_orient, # pelvis orientation + motor_pos, # actuated joint positions + new_translationalVelocity, # pelvis translational velocity + self.cassie_state.pelvis.rotationalVelocity[:], # pelvis rotational velocity + self.cassie_state.motor.velocity[:], # actuated joint velocities + new_translationalAcceleleration, # pelvis translational acceleration + joint_pos, # unactuated joint positions + self.cassie_state.joint.velocity[:] # unactuated joint velocities + ]) + + state = np.concatenate([robot_state, ext_state]) self.state_history.insert(0, state) self.state_history = self.state_history[:self.history+1] diff --git a/deprecated/RL_controller_aslip.py b/deprecated/RL_controller_aslip.py deleted file mode 100644 index 33299b16..00000000 --- a/deprecated/RL_controller_aslip.py +++ /dev/null @@ -1,358 +0,0 @@ - -from cassie.cassiemujoco.cassieUDP import * -from cassie.cassiemujoco.cassiemujoco_ctypes import * -# from cassie.speed_env import CassieEnv_speed -# from cassie.speed_double_freq_env import CassieEnv_speed_dfreq -# from cassie.speed_no_delta_env import CassieEnv_speed_no_delta -# from cassie.speed_no_delta_neutral_foot_env import CassieEnv_speed_no_delta_neutral_foot - -import time -import numpy as np -import torch -import pickle -import platform -from cassie.quaternion_function import * - -#import signal -import atexit -import sys -import select -import tty -import termios - -""" -We need to include the trajectory library for the right offset information, as well as the right phaselen and speed -""" - -def getAllTrajectories(speeds): - trajectories = [] - - for i, speed in enumerate(speeds): - dirname = os.path.dirname(__file__) - traj_path = os.path.join(dirname, "cassie", "trajectory", "aslipTrajsSweep", "walkCycle_{}.pkl".format(speed)) - trajectories.append(CassieIKTrajectory(traj_path)) - - # print("Got all trajectories") - return trajectories - -class CassieIKTrajectory: - def __init__(self, filepath): - with open(filepath, "rb") as f: - trajectory = pickle.load(f) - - self.qpos = np.copy(trajectory["qpos"]) - self.length = self.qpos.shape[0] - self.qvel = np.copy(trajectory["qvel"]) - self.rfoot = np.copy(trajectory["rfoot"]) - self.lfoot = np.copy(trajectory["lfoot"]) - -# simrate used to be 60 -class TrajectoryInfo: - def __init__(self): - - self.freq_adjust = 1 - - self.speeds = [x / 10 for x in range(0, 21)] - self.trajectories = getAllTrajectories(self.speeds) - self.num_speeds = len(self.trajectories) - - self.time = 0 # number of time steps in current episode - self.phase = 0 # portion of the phase the robot is in - self.counter = 0 # number of phase cycles completed in episode - - # NOTE: each trajectory in trajectories should have the same length - self.speed = self.speeds[0] - self.trajectory = self.trajectories[0] - - # NOTE: a reference trajectory represents ONE phase cycle - - # should be floor(len(traj) / simrate) - 1 - # should be VERY cautious here because wrapping around trajectory - # badly can cause assymetrical/bad gaits - # self.phaselen = floor(self.trajectory.length / self.simrate) - 1 - self.phaselen = self.trajectory.length - 1 - - # see include/cassiemujoco.h for meaning of these indices - self.pos_idx = [7, 8, 9, 14, 20, 21, 22, 23, 28, 34] - self.vel_idx = [6, 7, 8, 12, 18, 19, 20, 21, 25, 31] - - # maybe make ref traj only send relevant idxs? - ref_pos, ref_vel = self.get_ref_state(self.phase) - self.offset = ref_pos[self.pos_idx] - self.phase_add = 1 - - # get the corresponding state from the reference trajectory for the current phase - def get_ref_state(self, phase=None): - if phase is None: - phase = self.phase - - if phase > self.phaselen: - phase = 0 - - # pos = np.copy(self.trajectory.qpos[phase * self.simrate]) - pos = np.copy(self.trajectory.qpos[phase]) - - # this is just setting the x to where it "should" be given the number - # of cycles - #pos[0] += (self.trajectory.qpos[-1, 0] - self.trajectory.qpos[0, 0]) * self.counter - pos[0] += (self.trajectory.qpos[-1, 0] - self.trajectory.qpos[0, 0]) * self.counter - - # ^ should only matter for COM error calculation, - # gets dropped out of state variable for input reasons - - # setting lateral distance target to 0? - # regardless of reference trajectory? - pos[1] = 0 - - # vel = np.copy(self.trajectory.qvel[phase * self.simrate]) - vel = np.copy(self.trajectory.qvel[phase]) - - return pos, vel - - def update_info(self, new_speed): - - self.speed = new_speed - - # find closest speed in [0.0, 0.1, ... 3.0]. use this to find new trajectory - self.trajectory = self.trajectories[ (np.abs([speed_i - self.speed for speed_i in self.speeds])).argmin() ] - - # new offset - ref_pos, ref_vel = self.get_ref_state(self.phase) - self.offset = ref_pos[self.pos_idx] - - # phaselen - self.phaselen = self.trajectory.length - 1 - - return self.phaselen, self.offset - -time_log = [] # time stamp -input_log = [] # network inputs -output_log = [] # network outputs -state_log = [] # cassie state -target_log = [] # PD target log -traj_log = [] # reference trajectory log - -policy_name = "aslip_unified_0_v2" -filename = "logdata" -directory = "./hardware_logs/" + policy_name + "/" -if not os.path.exists(directory): - os.makedirs(directory) -filep = open("./hardware_logs/" + policy_name + "/" + filename + ".pkl", "wb") - -def log(): - data = {"time": time_log, "output": output_log, "input": input_log, "state": state_log, "target": target_log, "trajectory": traj_log} - pickle.dump(data, filep) - -def isData(): - return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) - -atexit.register(log) - -# Prevent latency issues by disabling multithreading in pytorch -torch.set_num_threads(1) - -# Prepare model -# env = CassieEnv_speed_no_delta_neutral_foot("walking", clock_based=True, state_est=True) -# env.reset_for_test() -traj = TrajectoryInfo() - -# policy = torch.load("./trained_models/old_aslip/final_v1/aslip_unified_freq_correction.pt") -policy = torch.load("./trained_models/" + policy_name + ".pt") -policy.eval() - -max_speed = 2.0 -min_speed = 0.0 -max_y_speed = 0.5 -min_y_speed = -0.5 - -# Initialize control structure with gains -P = np.array([100, 100, 88, 96, 50, 100, 100, 88, 96, 50]) -D = np.array([10.0, 10.0, 8.0, 9.6, 5.0, 10.0, 10.0, 8.0, 9.6, 5.0]) -u = pd_in_t() -for i in range(5): - u.leftLeg.motorPd.pGain[i] = P[i] - u.leftLeg.motorPd.dGain[i] = D[i] - u.rightLeg.motorPd.pGain[i] = P[i+5] - u.rightLeg.motorPd.dGain[i] = D[i+5] - -pos_index = np.array([2,3,4,5,6,7,8,9,14,15,16,20,21,22,23,28,29,30,34]) -vel_index = np.array([0,1,2,3,4,5,6,7,8,12,13,14,18,19,20,21,25,26,27,31]) -pos_mirror_index = np.array([2,3,4,5,6,21,22,23,28,29,30,34,7,8,9,14,15,16,20]) -vel_mirror_index = np.array([0,1,2,3,4,5,19,20,21,25,26,27,31,6,7,8,12,13,14,18]) -offset = traj.offset -# offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968]) - -# Determine whether running in simulation or on the robot -if platform.node() == 'cassie': - cassie = CassieUdp(remote_addr='10.10.10.3', remote_port='25010', - local_addr='10.10.10.100', local_port='25011') -else: - cassie = CassieUdp() # local testing - - -# Connect to the simulator or robot -print('Connecting...') -y = None -while y is None: - cassie.send_pd(pd_in_t()) - time.sleep(0.001) - y = cassie.recv_newest_pd() -received_data = True -print('Connected!\n') - -# Record time -t = time.monotonic() -t0 = t - -orient_add = 0 -old_settings = termios.tcgetattr(sys.stdin) -try: - tty.setcbreak(sys.stdin.fileno()) - while True: - # Wait until next cycle time - while time.monotonic() - t < 60/2000: - time.sleep(0.001) - t = time.monotonic() - tt = time.monotonic() - t0 - - # Get newest state - state = cassie.recv_newest_pd() - - if state is None: - print('Missed a cycle') - continue - - if platform.node() == 'cassie': - # Radio control - orient_add -= state.radio.channel[3] / 60.0 - print("orient add: ", orient_add) - traj.speed = max(min_speed, state.radio.channel[0] * max_speed) - traj.speed = min(max_speed, state.radio.channel[0] * max_speed) - # traj.phase_add = state.radio.channel[5] + 1 - # env.y_speed = max(min_y_speed, -state.radio.channel[1] * max_y_speed) - # env.y_speed = min(max_y_speed, -state.radio.channel[1] * max_y_speed) - else: - # Automatically change orientation and speed - tt = time.monotonic() - t0 - orient_add += 0#math.sin(t / 8) / 400 - if isData(): - c = sys.stdin.read(1) - if c == 'w': - traj.speed += .1 - print("Increasing speed to: ", traj.speed) - elif c == 's': - traj.speed -= .1 - print("Decreasing speed to: ", traj.speed) - # elif c == 'a': - # y_speed += .1 - # print("Increasing y speed to: ", y_speed) - # elif c == 'd': - # y_speed -= .1 - # print("Decreasing y speed to: ", y_speed) - elif c == 'j': - traj.freq_adjust += .1 - print("Increasing frequency to: ", traj.freq_adjust) - elif c == 'h': - traj.freq_adjust -= .1 - print("Decreasing frequency to: ", traj.freq_adjust) - elif c == 'l': - orient_add += .1 - print("Increasing orient_add to: ", orient_add) - elif c == 'k': - orient_add -= .1 - print("Decreasing orient_add to: ", orient_add) - elif c == 'p': - print("Applying force") - push = 100 - push_dir = 0 - force_arr = np.zeros(6) - force_arr[push_dir] = push - env.sim.apply_force(force_arr) - else: - pass - traj.speed = max(min_speed, traj.speed) - traj.speed = min(max_speed, traj.speed) - # y_speed = max(min_y_speed, y_speed) - # y_speed = min(max_y_speed, y_speed) - # print("y_speed: ", y_speed) - # print("frequency: ", traj.phase_add) - - traj.update_info(traj.speed) - - clock = [np.sin(2 * np.pi * traj.phase * traj.freq_adjust / traj.phaselen), np.cos(2 * np.pi * traj.phase * traj.freq_adjust / traj.phaselen)] - - # euler_orient = quaternion2euler(state.pelvis.orientation[:]) - # print("euler orient: ", euler_orient + np.array([orient_add, 0, 0])) - # new_orient = euler2quat(euler_orient + np.array([orient_add, 0, 0])) - quaternion = euler2quat(z=orient_add, y=0, x=0) - iquaternion = inverse_quaternion(quaternion) - new_orient = quaternion_product(iquaternion, state.pelvis.orientation[:]) - if new_orient[0] < 0: - new_orient = -new_orient - new_translationalVelocity = rotate_by_quaternion(state.pelvis.translationalVelocity[:], iquaternion) - print("orig orientation: ", state.pelvis.orientation[:]) - print('new_orientation: {}'.format(new_orient)) - - # ext_state = np.concatenate((clock, [speed, y_speed])) - ext_state = np.concatenate((clock, [traj.speed] )) - robot_state = np.concatenate([ - [state.pelvis.position[2] - state.terrain.height], # pelvis height - new_orient, - # state.pelvis.orientation[:], # pelvis orientation - state.motor.position[:], # actuated joint positions - - # state.pelvis.translationalVelocity[:], # pelvis translational velocity - new_translationalVelocity[:], - state.pelvis.rotationalVelocity[:], # pelvis rotational velocity - state.motor.velocity[:], # actuated joint velocities - - state.pelvis.translationalAcceleration[:], # pelvis translational acceleration - - state.joint.position[:], # unactuated joint positions - state.joint.velocity[:] # unactuated joint velocities - ]) - RL_state = np.concatenate([robot_state, ext_state]) - - actual_speed = state.pelvis.translationalVelocity[0] - print("target speed: {:.2f}\tactual speed: {:.2f}\tfreq: {}".format(traj.speed, actual_speed, traj.freq_adjust)) - - #pretending the height is always 1.0 - # RL_state[0] = 1.0 - - # Construct input vector - torch_state = torch.Tensor(RL_state) - # torch_state = shared_obs_stats.normalize(torch_state) - - # Get action - action = policy.act(torch_state, True) - env_action = action.data.numpy() - target = env_action + traj.offset - - # print(state.pelvis.position[2] - state.terrain.height) - - # Send action - for i in range(5): - u.leftLeg.motorPd.pTarget[i] = target[i] - u.rightLeg.motorPd.pTarget[i] = target[i+5] - #time.sleep(0.005) - cassie.send_pd(u) - - # Measure delay - print('delay: {:6.1f} ms'.format((time.monotonic() - t) * 1000)) - - # Logging - time_log.append(time.time()) - state_log.append(state) - input_log.append(RL_state) - output_log.append(env_action) - target_log.append(target) - traj_log.append(traj.offset) - - # Track phase - traj.phase += traj.phase_add - if traj.phase >= traj.phaselen: - traj.phase = 0 - traj.counter += 1 - -finally: - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) \ No newline at end of file diff --git a/deprecated/figures/16itrensembleplot.png b/deprecated/figures/16itrensembleplot.png deleted file mode 100644 index bd0d0113..00000000 Binary files a/deprecated/figures/16itrensembleplot.png and /dev/null differ diff --git a/deprecated/figures/16itrpolicyplot.png b/deprecated/figures/16itrpolicyplot.png deleted file mode 100644 index 45f7fdcb..00000000 Binary files a/deprecated/figures/16itrpolicyplot.png and /dev/null differ diff --git a/deprecated/figures/stable_reward=23.7.png b/deprecated/figures/stable_reward=23.7.png deleted file mode 100644 index f233617a..00000000 Binary files a/deprecated/figures/stable_reward=23.7.png and /dev/null differ diff --git a/deprecated/figures/weird_reference_trajectory.png b/deprecated/figures/weird_reference_trajectory.png deleted file mode 100644 index 11533bf9..00000000 Binary files a/deprecated/figures/weird_reference_trajectory.png and /dev/null differ diff --git a/deprecated/logging.py b/deprecated/logging.py deleted file mode 100644 index f184d10f..00000000 --- a/deprecated/logging.py +++ /dev/null @@ -1,210 +0,0 @@ -""" -A simple live-updating logger for logging training progress. - -Based largely off Berkely's DRL course HW4, which is itself inspired by rllab. -https://github.com/berkeleydeeprlcourse/homework/blob/master/hw4/logz.py -""" -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -plt.style.use('ggplot') - -from functools import partial -import os.path as osp, shutil, time, atexit, os, subprocess, hashlib, sys -import configparser -from collections import OrderedDict -import numpy as np - -import ray - -matplotlib.rcParams.update({'font.size': 8}) - -#from scipy.signal import medfilt - -class Logger(): - def __init__(self, args, env_name, viz=True, viz_list=[]): - self.ansi = dict( - gray=30, - red=31, - green=32, - yellow=33, - blue=34, - magenta=35, - cyan=36, - white=37, - crimson=38 - ) - - self.name = args.name - - self.viz_list = ["all"] - - self.args = args - - if viz: - from visdom import Visdom - self.viz = Visdom(port=args.viz_port) - self.env = env_name - self.plots = {} - else: - self.viz = None - - self.output_dir = self._get_directory(args) - self.init = True - self.header = [] - self.current_row = {} - - def _get_directory(self, args): - """Use hyperparms to set a directory to output diagnostic files.""" - #get hyperparameters as dictionary - arg_dict = args.__dict__ - - assert "seed" in arg_dict, \ - "You must provide a 'seed' key in your command line arguments" - - assert "logdir" in arg_dict, \ - "You must provide a 'logdir' key in your command line arguments." - - #sort the keys so the same hyperparameters will always have the same hash - arg_dict = OrderedDict(sorted(arg_dict.items(), key=lambda t: t[0])) - - #remove seed so it doesn't get hashed, store value for filename - # same for logging directory - seed = str(arg_dict.pop("seed")) - logdir = str(arg_dict.pop('logdir')) - - # get a unique hash for the hyperparameter settings, truncated at 10 chars - arg_hash = hashlib.md5(str(arg_dict).encode('ascii')).hexdigest()[0:10] - - output_dir = osp.join(logdir, arg_hash) - - # create a directory with the hyperparm hash as its name, if it doesn't - # already exist. - os.makedirs(output_dir, exist_ok=True) - - # create a file for this seed, this is where output will be logged - filename = "seed" + seed + ".log" - - # currently logged-to directories will be pre-pended with "ACTIVE_" - active_path = osp.join(output_dir, filename) - - # Create a file with all the hyperparam settings in plaintext - info_path = osp.join(output_dir, "experiment.info") - self._generate_info_file(open(info_path, 'w'), arg_dict) - - print(self._colorize("Logging data to %s" % active_path, - 'green', bold=True)) - - return active_path - - def record(self, key, val, x_val, title_name, x_var_name="Timesteps", split_name="train"): - """ - Log some diagnostic value in current iteration. - - Call this exactly once for each diagnostic, every iteration - """ - if self.init: - self.header.append(key) - # if self.viz is not None: - # self.wins.append(None) - else: - assert key in self.header, \ - "Key %s not in header. All keys must be set in first iteration" % key - - assert key not in self.current_row, \ - "You already set key %s this iteration. Did you forget to call dump()?" % key - - self.current_row[key] = val - - if self.viz is not None: - self.plot(key, x_var_name, split_name, title_name, x_val, val) - - def dump(self): - """Write all of the diagnostics from the current iteration""" - vals = [] - - sys.stdout.write("-" * 37 + "\n") - for key in self.header: - val = self.current_row.get(key, "") - if hasattr(val, "__float__"): - valstr = "%8.3g" % val - else: - valstr = val - - sys.stdout.write("| %15s | %15s |" % (key, valstr) + "\n") - vals.append(float(val)) - sys.stdout.write("-" * 37 + "\n") - sys.stdout.flush() - - output_file = None - if self.init: - output_file = open(self.output_dir, "w") - - output_file.write("\t".join(self.header)) - output_file.write("\n") - else: - output_file = open(self.output_dir, "a") - - output_file.write("\t".join(map(str, vals))) - output_file.write("\n") - output_file.flush() - output_file.close() - - self.current_row.clear() - - self.init = False - - # if self.viz is not None: - # self.plot() - - def config_monitor(self, config_path=None): - if config_path is None: - config_path = os.path.join(os.path.dirname(__file__), "../config/monitor.ini") - - config = configparser.ConfigParser() - config.read(config_path) - - return config["monitor"] - - def plot(self, var_name, x_var_name, split_name, title_name, x, y): - if var_name not in self.plots: - self.plots[var_name] = self.viz.line(X=np.array([x,x]), Y=np.array([y,y]), env=self.env, opts=dict( - legend=[split_name], - title=title_name, - xlabel=x_var_name, - ylabel=var_name - )) - else: - self.viz.line(X=np.array([x]), Y=np.array([y]), env=self.env, win=self.plots[var_name], name=split_name, update = 'append') - - def _load_data(self): - log_file = open(self.output_dir, 'r') - - header = log_file.readline().rstrip('\n').split('\t') - - data = [] - for line in log_file: - vals = line.rstrip('\n').split('\t') - vals = [float(val) for val in vals] - data.append(vals) - - data = np.array(data) - - log_file.close() - return data, header - - def _generate_info_file(self, file, arg_dict): - for key, val in arg_dict.items(): - file.write("%s: %s" % (key, val)) - file.write('\n') - - def _colorize(self, string, color, bold=False, highlight=False): - """Format string to print with color 'color' if printed to unix terminal.""" - attr = [] - num = self.ansi[color] - if highlight: - num += 10 - attr.append(str(num)) - if bold: - attr.append('1') - return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) \ No newline at end of file diff --git a/deprecated/old/dagger.py b/deprecated/old/dagger.py deleted file mode 100644 index fd4639e3..00000000 --- a/deprecated/old/dagger.py +++ /dev/null @@ -1,36 +0,0 @@ -from rl.envs import SlipEnv -from rl.policies import GaussianMLP -from rl.algos import DAgger -from rl.utils import policyplot - -import torch -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--dagger_itr", type=int, default=100, - help="number of iterations of DAgger") -parser.add_argument("--epochs", type=int, default=10, - help="number of optimization epochs") -parser.add_argument("--trj_len", type=int, default=10000, - help="maximum trajectory length") -parser.add_argument("--seed", type=int, default=1, - help="random seed for experiment") -args = parser.parse_args() - -if __name__ == "__main__": - torch.manual_seed(args.seed) - - env = SlipEnv(0.001) - obs_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] - - learner = GaussianMLP(obs_dim, action_dim, (64,)) - algo = DAgger(env, learner, None) - - algo.train( - dagger_itr=args.dagger_itr, - epochs=args.epochs, - trj_len=args.trj_len - ) - - policyplot(env, learner, args.trj_len) diff --git a/deprecated/old/experiment.sh b/deprecated/old/experiment.sh deleted file mode 100755 index 115f0161..00000000 --- a/deprecated/old/experiment.sh +++ /dev/null @@ -1,4 +0,0 @@ -for i in {1..15} -do - python examples/ppo.py --name "modelv$i" --seed $RANDOM & -done \ No newline at end of file diff --git a/deprecated/old/parallel_profiler.py b/deprecated/old/parallel_profiler.py deleted file mode 100644 index e69de29b..00000000 diff --git a/deprecated/old/vpg.py b/deprecated/old/vpg.py deleted file mode 100644 index 1fb61122..00000000 --- a/deprecated/old/vpg.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Python file for automatically running experiments from command line.""" -import argparse - -from rllab.envs.box2d.cartpole_env import CartpoleEnv -from rllab.envs.box2d.double_pendulum_env import DoublePendulumEnv -from rllab.envs.mujoco.walker2d_env import Walker2DEnv -from rllab.envs.mujoco.hopper_env import HopperEnv -from rllab.envs.mujoco.walker2d_env import Walker2DEnv -from rllab.envs.gym_env import GymEnv -from rllab.envs.normalized_env import normalize - -from rl.utils import run_experiment -from rl.policies import GaussianA2C -from rl.baselines import FeatureEncodingBaseline -from rl.algos import VPG, CPI - -import gym -import torch - - -parser = argparse.ArgumentParser() - -CPI.add_arguments(parser) - -parser.add_argument("--seed", type=int, default=1, - help="RNG seed") -parser.add_argument("--logdir", type=str, default="/tmp/rl/experiments/", - help="Where to log diagnostics to") - -args = parser.parse_args() - -if __name__ == "__main__": - #env = normalize(Walker2DEnv()) - - env = gym.make("Walker2d-v1") - - #env.seed(args.seed) - #torch.manual_seed(args.seed) - - obs_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] - - policy = GaussianA2C(obs_dim, action_dim, (32,)) - - algo = CPI( - env=env, - policy=policy, - lr=args.lr, - tau=args.tau - ) - - run_experiment( - algo=algo, - args=args, - log=True, - monitor=False, - render=True - ) diff --git a/deprecated/plotting.py b/deprecated/plotting.py deleted file mode 100644 index dfeb0e0f..00000000 --- a/deprecated/plotting.py +++ /dev/null @@ -1,62 +0,0 @@ -"""This screws up visualize.py""" -""" -import numpy as np -from matplotlib import pyplot as plt -from matplotlib.lines import Line2D - -from torch.autograd import Variable as Var -from torch import Tensor - - -class RealtimePlot(): - def __init__(self, style='ggplot'): - plt.style.use(style) - plt.ion() - self.fig, self.ax = plt.subplots() - self.xlim = 0 - self.yvals = [] - self.line = Line2D([], []) - self.ax.add_line(self.line) - - def config(self, ylabel, xlabel): - self.ax.set_ylabel(ylabel) - self.ax.set_xlabel(xlabel) - self.fig.tight_layout() - - def plot(self, y): - self.yvals.append(y) - self.line.set_data(np.arange(len(self.yvals)), self.yvals) - - self.ax.relim() - self.ax.autoscale_view() - self.ax.set_xlim(0, self.xlim) - self.xlim += 1 - - self.fig.canvas.flush_events() - - def done(self): - plt.ioff() - plt.show() - - -def policyplot(env, policy, trj_len): - obs_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] - - y = np.zeros((trj_len, action_dim)) - X = np.zeros((trj_len, obs_dim)) - - obs = env.reset() - for t in range(trj_len): - X[t, :] = obs - action = policy(Var(Tensor(obs[None, :]))).data.numpy()[0] - y[t, :] = action - obs = env.step(action)[0] - - fig, axes = plt.subplots(1, action_dim) - - for a in range(action_dim): - axes[a].plot(np.arange(trj_len), y[:, a]) - - plt.show() -""" \ No newline at end of file diff --git a/deprecated/renderpol.py b/deprecated/renderpol.py deleted file mode 100644 index 69f092ff..00000000 --- a/deprecated/renderpol.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys -sys.path.append("..") # Adds higher directory to python modules path. - -from rl.utils import renderpolicy, rendermultipolicy, renderpolicy_speedinput, rendermultipolicy_speedinput -from cassie import CassieEnv - -import torch - -import numpy as np -import os -import time - -import argparse -import pickle - -parser = argparse.ArgumentParser() -parser.add_argument("--path", type=str, default="./trained_models/ppo/Cassie-v0/7b7e24-seed0/", help="path to folder containing policy and run details") -args = parser.parse_args() -run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) - -cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta) -policy = torch.load(args.path + "actor.pt") -policy.eval() - -# cassie_env = CassieEnv(traj="aslip", clock_based=False, state_est=True, dynamics_randomization=False, no_delta=False) -# policy = torch.load(args.path + "aslip_unified_task10_v7.pt") -# policy.eval() - -renderpolicy_speedinput(cassie_env, policy, deterministic=False, dt=0.05, speedup = 2) \ No newline at end of file diff --git a/deprecated/results/TD3_Cassie-mimic-v0_0.npy b/deprecated/results/TD3_Cassie-mimic-v0_0.npy deleted file mode 100644 index 2efc4409..00000000 Binary files a/deprecated/results/TD3_Cassie-mimic-v0_0.npy and /dev/null differ diff --git a/deprecated/setup.py b/deprecated/setup.py deleted file mode 100644 index f1d377a2..00000000 --- a/deprecated/setup.py +++ /dev/null @@ -1,13 +0,0 @@ -from setuptools import setup, find_packages - -#REQUIRED_PACKAGES = ["torch==0.4", "gym", "numpy", "visdom"] - -setup(name='apex', - version='1.0', - description='Deep reinforcement learning for continuous control', - author='Pedro Morais, Yesh Godse, Jonah Siekmann', - author_email='autranemorais@gmail.com, yesh.godse@gmail.com, siekmanj@oregonstate.edu', - license='MIT', - packages=find_packages(exclude=("tests")), - #install_requires=REQUIRED_PACKAGES -) diff --git a/deprecated/test_ppo.py b/deprecated/test_ppo.py deleted file mode 100644 index 95693684..00000000 --- a/deprecated/test_ppo.py +++ /dev/null @@ -1,524 +0,0 @@ -# TODO: organize this file -import argparse -import pickle -import torch - -import time - -import functools - -# NOTE: importing cassie for some reason breaks openai gym, BUG ? -from cassie import CassieEnv, CassieTSEnv, CassieIKEnv -from cassie.no_delta_env import CassieEnv_nodelta -from cassie.speed_env import CassieEnv_speed -from cassie.speed_double_freq_env import CassieEnv_speed_dfreq -from cassie.speed_no_delta_env import CassieEnv_speed_no_delta - -from rl.envs import Normalize, Vectorize -from rl.policies import GaussianMLP, EnsemblePolicy - -import matplotlib -from matplotlib import pyplot as plt -plt.style.use('ggplot') - -import numpy as np -np.set_printoptions(precision=2, suppress=True) - -from cassie.cassiemujoco import pd_in_t - -#TODO: convert all of this into ipynb - - -# TODO: add .dt to all environments. OpenAI should do the same... -def visualize(env, policy, trj_len, deterministic=True, dt=0.033, speedup=1): - R = [] - r_ep = 0 - done = False - - with torch.no_grad(): - state = torch.Tensor(env.reset()) - - - for t in range(trj_len): - - #start = time.time() - _, action = policy.act(state, deterministic) - #print("policy time: ", time.time() - start) - - #start = time.time() - state, reward, done, _ = env.step(action.data.numpy()) - #print("env time: ", time.time() - start) - - r_ep += reward - - if done: - state = env.reset() - R += [r_ep] - r_ep = 0 - - state = torch.Tensor(state) - - env.render() - - time.sleep(dt / speedup) - - if not done: - R += [r_ep] - - print("avg reward:", sum(R)/len(R)) - print("avg timesteps:", trj_len / len(R)) - - - -def cassie_policyplot(env, policy, trj_len, render=False, title=None): - cassie_action = ["hip roll", "hip yaw", "hip pitch", "knee", "foot"] - - obs_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] - - y_delta = np.zeros((trj_len, action_dim)) - y_ref = np.zeros((trj_len, action_dim)) - X = np.zeros((trj_len, action_dim)) # not real X - - with torch.no_grad(): - state = torch.Tensor(env.reset()) - - for t in range(trj_len): - _, action = policy.act(state, True) - - #X[t, :] = state.data.numpy() - y_delta[t, :] = action.data.numpy() # policy delta - - # oooof this is messy/hackish - ref_pos, _ = env.venv.envs[0].get_ref_state(env.venv.envs[0].phase) - y_ref[t, :] = ref_pos[env.venv.envs[0].pos_idx] # base PD target - - X[t, :] = np.copy(env.venv.envs[0].sim.qpos())[env.venv.envs[0].pos_idx] - - state, reward, done, _ = env.step(action.data.numpy()) - - state = torch.Tensor(state) - - if render: - env.render() - time.sleep(0.033) - - # one row for each leg - plot_rows = 2 - plot_cols = action_dim // 2 - - fig, axes = plt.subplots(plot_rows, plot_cols, figsize=(20, 10)) - - if title is not None: - fig.suptitle(title, fontsize=16) - - for r in range(plot_rows): # 2 legs - for c in range(plot_cols): # 5 actions - a = r * plot_cols + c - axes[r][c].plot(np.arange(trj_len), y_delta[:, a], "C0", label="delta") - axes[r][c].plot(np.arange(trj_len), y_ref[:, a], "C1", label="reference") - axes[r][c].plot(np.arange(trj_len), y_delta[:, a] + y_ref[:, a], "C2--", label="summed") - - axes[r][c].plot(np.arange(trj_len), X[:, a], "g--", label="result") - - axes[0][c].set_xlabel(cassie_action[c]) - axes[0][c].xaxis.set_label_position('top') - axes[r][0].set_ylabel(["left leg", "right leg"][r]) - - plt.tight_layout() - - if title is not None: - plt.subplots_adjust(top=0.93) - - - axes[0][0].legend(loc='upper left') - plt.show() - - -def policyplot(env, policy, trj_len): - obs_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] - - y = np.zeros((trj_len, action_dim)) - X = np.zeros((trj_len, obs_dim)) - - state = torch.Tensor(env.reset()) - for t in range(trj_len): - - _, action = policy.act(state, True) - - X[t, :] = state.data.numpy() - y[t, :] = action.data.numpy() - - state, _, _, _ = env.step(action.data.numpy()) - - fig, axes = plt.subplots(1, action_dim) - - - for a in range(action_dim): - axes[a].plot(np.arange(trj_len), y[:, a]) - - plt.show() - -# TODO: add no_grad to all of these - -def get_rewards(env, policy, num_trj, deterministic=False): - r_ep = 0 - R = [] - done = False - - state = torch.Tensor(env.reset()) - while len(R) < num_trj: - _, action = policy.act(state, deterministic) - state, reward, done, _ = env.step(action.data.numpy()) - - r_ep += reward - - if done: - state = env.reset() - R += [r_ep[0]] - r_ep = 0 - - - state = torch.Tensor(state) - - if not done: - R += [r_ep[0]] - - return R - -# TODO: add n_trials arg, pass in policy function instead - -def plot_reward_dist(env, policy, num_trj, deterministic=False): - r_ep = 0 - R = [] - done = False - - state = torch.Tensor(env.reset()) - while len(R) < num_trj: - _, action = policy.act(state, deterministic) - state, reward, done, _ = env.step(action.data.numpy()) - - r_ep += reward - - if done: - state = env.reset() - R += [r_ep[0]] - r_ep = 0 - - state = torch.Tensor(state) - - if not done: - R += [r_ep[0]] - - plt.xlabel("Return") - plt.ylabel("Frequency") - plt.hist(R, edgecolor='black', label=policy.__class__.__name__) # TODO: make labels less hacky - -def plot_action_dist(env, policy, num_trj, deterministic=False): - n = 0 - A = [] - done = False - - state = torch.Tensor(env.reset()) - while n < num_trj: - _, action = policy.act(state, deterministic) - state, reward, done, _ = env.step(action.data.numpy()) - - A += [action.mean().data.numpy()] - - if done: - state = env.reset() - n += 1 - - - state = torch.Tensor(state) - - print(np.std(A)) - - plt.xlabel("Mean Action") - plt.ylabel("Frequency") - plt.hist(A, edgecolor='black', label=policy.__class__.__name__) # TODO: make labels less hacky - -def print_semantic_state(s): - # only works for full state for now - assert s.numel() == 80 - - x = [None] * 20 - - # TODO: add mask variable to env to specifiy which states are used - - x [ 0] = "Pelvis y" - x [ 1] = "Pelvis z" - x [ 2] = "Pelvis qw" - x [ 3] = "Pelvis qx" - x [ 4] = "Pelvis qy" - x [ 5] = "Pelvis qz" - x [ 6] = "Left hip roll" #(Motor [0]) - x [ 7] = "Left hip yaw" #(Motor [1]) - x [ 8] = "Left hip pitch" #(Motor [2]) - x [ 9] = "Left knee" #(Motor [3]) - x [10] = "Left shin" #(Joint [0]) - x [11] = "Left tarsus" #(Joint [1]) - x [12] = "Left foot" #(Motor [4], Joint [2]) - x [13] = "Right hip roll" #(Motor [5]) - x [14] = "Right hip yaw" #(Motor [6]) - x [15] = "Right hip pitch" #(Motor [7]) - x [16] = "Right knee" #(Motor [8]) - x [17] = "Right shin" #(Joint [3]) - x [18] = "Right tarsus" #(Joint [4]) - x [19] = "Right foot" #(Motor [9], Joint [5]) - - x_dot = [None] * 20 - - x_dot[ 0] = "Pelvis x" - x_dot[ 1] = "Pelvis y" - x_dot[ 2] = "Pelvis z" - x_dot[ 3] = "Pelvis wx" - x_dot[ 4] = "Pelvis wy" - x_dot[ 5] = "Pelvis wz" - x_dot[ 6] = "Left hip roll" #(Motor [0]) - x_dot[ 7] = "Left hip yaw" #(Motor [1]) - x_dot[ 8] = "Left hip pitch" #(Motor [2]) - x_dot[ 9] = "Left knee" #(Motor [3]) - x_dot[10] = "Left shin" #(Joint [0]) - x_dot[11] = "Left tarsus" #(Joint [1]) - x_dot[12] = "Left foot" #(Motor [4], Joint [2]) - x_dot[13] = "Right hip roll" #(Motor [5]) - x_dot[14] = "Right hip yaw" #(Motor [6]) - x_dot[15] = "Right hip pitch" #(Motor [7]) - x_dot[16] = "Right knee" #(Motor [8]) - x_dot[17] = "Right shin" #(Joint [3]) - x_dot[18] = "Right tarsus" #(Joint [4]) - x_dot[19] = "Right foot" #(Motor [9], Joint [5]) - - s_obs = s[:, :40] - s_ref = s[:, 40:] - - print("\nObserved position") - for i in range(20): - if s_obs[:, i].item() > 1: - print("{0}: \r\t\t\t{1:.2f}".format(x[i], s_obs[:, i].item())) - - print("\nObserved velocity") - for i in range(20): - if s_obs[:, 20 + i].item() > 1: - print("{0}: \r\t\t\t{1:.2f}".format(x_dot[i], s_obs[:, 20 + i].item())) - - print("\nReference position") - for i in range(20): - if s_ref[:, i].item() > 1: - print("{0}: \r\t\t\t{1:.2f}".format(x[i], s_ref[:, i].item())) - - print("\nReference velocity") - for i in range(20): - if s_ref[:, 20 + i].item() > 1: - print("{0}: \r\t\t\t{1:.2f}".format(x_dot[i], s_ref[:, 20 + i].item())) - -# my idea for extending perturbation saliency to dynamical systems -def saliency(policy, state, naive=False): - scores = torch.zeros_like(state) - for i in range(state.size(1)): - score = 0 - - s_prime = state.clone() - # sample values from (e - 0.25, e + 0.25) - max_r = 0.25 - - # change in value/related to number of samples - dr = 0.05 - - # current change - r = -max_r # start at min r - if not naive: - while r <= max_r: - # TODO: visualize s_prime - # would require setting qpos and visualizing - # setting qpos would require knowing the mapping of indices - # from s to qpos - s_prime[:, i] = state[:, i] + r - - v, a = policy.act(state) - v_prime, a_prime = policy.act(s_prime) - - score += 0.5 * torch.norm(v - v_prime, 2) - - r += dr - else: - s_prime[:, i] = 0 - - v, a = policy.act(state) - v_prime, a_prime = policy.act(s_prime) - - score += 0.5 * torch.norm(v - v_prime, 2) - - - score /= 2 * max_r - scores[:, i] = score - scores = scores / torch.max(scores) * 10 - print_semantic_state(scores) - -# more plots: -# state visitation -# value function -# reward distribution - -def make_env_fn(state_est=False): - def _thunk(): - #return CassieEnv("walking", clock_based=True, state_est=state_est) - return CassieIKEnv("walking", clock_based=True) - return _thunk - - -parser = argparse.ArgumentParser(description="Run a model, including visualization and plotting.") -parser.add_argument("-p", "--model_path", type=str, default="./trained_models/demo2.pt", - help="File path for model to test") -parser.add_argument("-x", "--no-visualize", dest="visualize", default=True, action='store_false', - help="Don't render the policy.") -parser.add_argument("-g", "--graph", dest="plot", default=False, action='store_true', - help="Graph the output of the policy.") - -parser.add_argument("--glen", type=int, default=150, - help="Length of trajectory to graph.") -parser.add_argument("--vlen", type=int, default=75, - help="Length of trajectory to visualize") - -parser.add_argument("--noise", default=False, action="store_true", - help="Visualize policy with exploration.") - -parser.add_argument("--new", default=False, action="store_true", - help="Visualize new (untrained) policy") - -args = parser.parse_args() - - -# TODO: add command line arguments for normalization on/off, and for ensemble policy? - -if __name__ == "__main__": - torch.set_num_threads(1) # see: https://github.com/pytorch/pytorch/issues/13757 - - input() - - if args.new: - env_fn = make_env_fn(state_est=args.state_est) - - # env_fn = make_cassie_env("walking", clock_based=True) - # env_fn = functools.partial(CassieEnv_speed, "walking", clock_based=True, state_est=False) - # env_fn = functools.partial(CassieEnv_nodelta, "walking", clock_based=True, state_est=False) - # env_fn = functools.partial(CassieEnv_speed_dfreq, "walking", clock_based = True, state_est=args.state_est) - - env = Vectorize([env_fn]) - - obs_dim = env_fn().observation_space.shape[0] - action_dim = env_fn().action_space.shape[0] - - policy = GaussianMLP(obs_dim, action_dim, nonlinearity="relu", init_std=np.exp(-2), learn_std=False) - - # policy2 = ActorCriticNet(obs_dim, action_dim, [256, 256]) - - # #print(policy, sum(p.numel() for p in policy.parameters())) - # #print(policy2, sum(p.numel() for p in policy2.parameters())) - - # plot_action_dist(env, policy, 100) - # plot_action_dist(env, policy2, 100) - # plt.legend() - # plt.show() - - # R1 = [] - # R2 = [] - # R3 = [] - # for _ in range(5): - # R1 += get_rewards(env, GaussianMLP(obs_dim, action_dim, nonlinearity="relu", init_std=np.exp(-2), learn_std=False), 50) - # R2 += get_rewards(env, ActorCriticNet(obs_dim, action_dim, [256, 256]), 50) - # R3 += get_rewards(env, XieNet(obs_dim, action_dim, [256, 256]), 50) - - - # plt.xlabel("Return") - # plt.ylabel("Frequency") - # plt.hist(R1, edgecolor='black', label="GaussianMLP") - # plt.hist(R2, edgecolor='black', label="ActorCriticNet") - # plt.hist(R3, edgecolor='black', label="XieNet") - # plt.legend() - # plt.show() - - # plot_reward_dist(env, policy2, 500) - - if args.visualize: - visualize(env, policy, args.vlen, deterministic=not args.noise) - - if args.plot: - cassie_policyplot(env, policy, args.glen) - - else: - policy = torch.load(args.model_path) - policy.train(0) - - env_fn = make_env_fn() - - env = env_fn() - - # while True: - # state = torch.Tensor(env.reset()) - # print("phase: {0}".format(env.venv.envs[0].phase)) - - # saliency(policy, state, naive=False) - - # env.venv.envs[0].sim.step_pd(pd_in_t()) - - # env.render() - # input() - - if args.visualize: - visualize(env, policy, args.vlen, deterministic=not args.noise) - - if args.plot: - cassie_policyplot(env, policy, args.glen) - - exit() - - - # TODO: try averaging obs_norm? to seperate obs normalization for each - # averaging obs_norm probably wont work as all policies are expecting different normalization parameters - # ob normalization should therefore should either be part of the policy or somehow syncronized - # across experiments. The former is easier to implement - # policy could keep track of its own obs_rms and the env could take it in and update it? - # ^ might break OpenAI gym compatibility - - # other possibility: maybe norm parameters converge on their own over time? Although if they did - # an ensemble probably wouldn't change behavior - - # SOLUTION: give trained policies an ob_rms parameter to normalize their own observations, - # keep normalization calculation in environment for parallelization - - # NOTE: reward normalization affects stuff - - #### Ensemble policy - vpolicy = [] - - mmean = 0 - mvar = 0 - for i in range(1, 16): - policy = torch.load("trained_models/modelv{}.pt".format(i)) - - vpolicy += [policy] - - print("visualizing policy {}".format(i)) - # if i == 15: - visualize(env, policy, 1) - #cassie_policyplot(env, policy, 100, "policy {}".format(i)) - - mmean /= 10 - mvar /= 10 - - env.ob_rms.mean = mmean - env.ob_rms.var = mvar - - #for i in range(10): - - # NOTE: reference trajectory for ensemble policy looks weird? - # Because it's the NORMALIZED reference trajectory^^ - # but still, weird - policy = EnsemblePolicy(vpolicy) - - #visualize(env, policy, 100) - cassie_policyplot(env, policy, 75, render=True) diff --git a/deprecated/test_ray.py b/deprecated/test_ray.py deleted file mode 100644 index be2dc3bb..00000000 --- a/deprecated/test_ray.py +++ /dev/null @@ -1,98 +0,0 @@ -import ray -import time - -@ray.remote -class Actor(): - def __init__(self, memory_id, learner_id): - self.counter = 0 - self.memory_id = memory_id - self.learner_id = learner_id - - def collect_experience(self): - bar = False - while True: - #print("collecting") - # simulate load of stepping through environment - time.sleep(0.01) - # increment counter - self.counter += 1 - # send new data to memory server - self.memory_id.receive_experience.remote(self.counter) - - # periodically query learner - if self.counter % 5 == 0: - foo, bar = ray.get(self.learner_id.send_step_count.remote()) - print("got stuff from learner") - - if bar: - print("quitting collection loop") - return - -@ray.remote -class MemoryServer(): - def __init__(self): - self.counter = 0 - - def receive_experience(self, data): - self.counter += data - - def send_experience(self): - return self.counter - -@ray.remote -class Evaluator(): - def __init__(self): - self.counter = 0 - - def evaluate_loop(self, learner_id): - bar = False - while True: - time.sleep(0.5) - foo, bar = ray.get(learner_id.send_step_count.remote()) - print("Evaluating: {}".format(foo)) - if bar: - return - -@ray.remote -class Learner(): - def __init__(self, memory_id, eval_id): - self.steps = 0 - self.done = False - self.num_train_steps = 100 - self.memory_id = memory_id - self.eval_id = eval_id - - def train(self): - while self.steps < self.num_train_steps: - # collect experience from memory - memory_data = ray.get(self.memory_id.send_experience.remote()) - # simulate optimization - time.sleep(0.25) - # increment number of steps - self.steps += 1 - - print("exiting learner") - self.done = True - - def send_step_count(self): - return self.steps, self.done - - -if __name__ == "__main__": - ray.init() - - # create many Actors, one Memory Server, one Evaluator, one Learner - memory_id = MemoryServer.remote() - eval_id = Evaluator.remote() - learner_id = Learner.remote(memory_id, eval_id) - actor_ids = [Actor.remote(memory_id, learner_id) for _ in range(10)] - - # Start actor collection loop and learner train loop and evaluator loop - futures = [actor_id.collect_experience.remote() for actor_id in actor_ids] - futures.append(learner_id.train.remote()) - futures.append(eval_id.evaluate_loop.remote(learner_id)) - - ray.wait(futures, num_returns=len(futures)) - - print("All done.") - diff --git a/deprecated/test_recurrence.py b/deprecated/test_recurrence.py deleted file mode 100644 index f8d1c484..00000000 --- a/deprecated/test_recurrence.py +++ /dev/null @@ -1,22 +0,0 @@ -import torch -import torch.nn as nn -import numpy as np -from rl.policies.recurrent import RecurrentNet - -x = torch.ones(1, 4) -actor = RecurrentNet(4, 3) - -actions = [] -loss = nn.MSELoss() - -for i in range(5): - actions.append(actor.forward(x)) - print(actions) - -actions = torch.cat([action for action in actions]) -print(actions) -target = torch.randn(5, 3) - -grad = loss(actions, target) -grad.backward() -print("success!") diff --git a/deprecated/test_reference_traj.py b/deprecated/test_reference_traj.py deleted file mode 100644 index 0400c4e8..00000000 --- a/deprecated/test_reference_traj.py +++ /dev/null @@ -1,43 +0,0 @@ -import matplotlib.pyplot as plt - -import numpy as np - -from cassie import CassieEnv, CassieTSEnv, UnifiedCassieIKEnv, CassieEnv_nodelta, CassieEnv_rand_dyn, CassieEnv_speed_dfreq - -# env_fn = partial(CassieIKEnv, "walking", clock_based=True, state_est=state_est, speed=speed) -env = UnifiedCassieIKEnv("walking", clock_based=True, state_est=True) - - -env.reset() -# env.render() - -qposes = [] -qvels = [] - -# manually set the environment's speed - -env.speed = 0.0 - -print("\nTesting Stuff\n") - -print(env.phaselen) - -for i in range(env.phaselen): - - """ - output of get_ref_state for clock and state estimator - np.concatenate([qpos[pos_index], qvel[vel_index], np.concatenate((clock, [self.speed]))]]) - """ - - # get reference state - pos, vel = env.get_ref_state(i) - - # extract qpos, qvel - qposes.append(pos) - qvels.append(vel) - -# plot the qpos -fig = plt.figure(figsize=(10,10)) -ax = fig.add_subplot(111) -ax.plot(qposes) -plt.savefig('ref_qposes.png') \ No newline at end of file diff --git a/deprecated/tests/test_ppo.py b/deprecated/tests/test_ppo.py deleted file mode 100644 index 8216cf32..00000000 --- a/deprecated/tests/test_ppo.py +++ /dev/null @@ -1,187 +0,0 @@ -from collections import defaultdict - -import pytest -import numpy as np - -import torch - -from rl.policies import GaussianMLP -from rl.algos import PPO - -class SampleTesterEnv: - def __init__(self, obs_dim, action_dim, done_state=10, gamma=0.99): - """ - A simple environment that unit tests whether or the - experience buffer and trajectory sampling code are - producing the correct output. This is to test for things - like off-by-one errors in the experience buffers or - reward-to-go calculations. - - In other words: - - Makes sure the "experience table" is of the form: - - -------------------- - s_0 | a_0 | r_0 - -------------------- - . . . - . . . - . . . - -------------------- - s_T | a_T | r_T - -------------------- - s_T+1 | | - -------------------- - - where entries are defined by the MDP transitions: - - s_0 -> (s_1, a_0, r_0) -> ... -> (s_T+1, a_T, r_T) - - """ - - self.observation_space = np.zeros(obs_dim) - self.action_space = np.zeros(action_dim) - - self.state = 0 - self.obs_dim = obs_dim - - # TODO: add GAE support? - self.gamma = gamma - - self.done = done_state - - self.actions = [] - - def step(self, action): - self.state += 1 - - output = np.ones(shape=(1, self.obs_dim)) * self.state - - done = (self.state % self.done) == 0 - - #print("real: ", done) - - # the first reward corresponds to the second state - reward = np.ones(shape=(1, 1)) * (self.state - 1) - - self.actions.append(action.squeeze(0)) # TODO - - return output, reward, done, None - - def reset(self): - self.state = 0 - - output = np.ones(shape=(1, self.obs_dim)) * self.state - return output - - -@pytest.mark.parametrize("num_steps, obs_dim, action_dim", [ - (5, 1, 1), - (10, 1, 1), - (25, 1, 1), - (10, 80, 10), - (30, 80, 10), - (35, 80, 10) -]) -def test_ppo_sample(num_steps, obs_dim, action_dim): - # useful for debugging - np.set_printoptions(threshold=10000) - torch.set_printoptions(threshold=10000) - - # TODO: test value est bootstrap for truncated trajectories - - gamma = 0.99 - - env = SampleTesterEnv(obs_dim=obs_dim, action_dim=action_dim, gamma=gamma) - policy = GaussianMLP(obs_dim, action_dim) - - # don't need to specify args that don't affect ppo.sample() - args = defaultdict(lambda: None, {'gamma': gamma}) - - algo = PPO(args) - - memory = algo.sample(env, policy, num_steps, 100) - - states, actions, rewards, returns = map(torch.Tensor, - (memory.states, memory.actions, memory.rewards, memory.returns) - ) - - num_steps = states.shape[0] - - assert states.shape == (num_steps, obs_dim) - assert actions.shape == (num_steps, action_dim) - assert rewards.shape == (num_steps, 1) - assert rewards.shape == (num_steps, 1) - - expected_states = np.array([(np.ones(shape=(obs_dim,)) * (s % env.done)) for s in range(num_steps)]) - assert np.allclose(states, expected_states) - - expected_rewards = np.array([(np.ones(shape=(1)) * (s % env.done)) for s in range(num_steps)]) - assert np.allclose(rewards, expected_rewards) - - expected_actions = np.array(env.actions) - assert np.allclose(actions, expected_actions) - - expected_returns, R = [], 0 - for r in reversed(expected_rewards): - R = R * gamma + r - - expected_returns.insert(0, R.copy()) - - if r == 0: # this only happens on initial state, so restart the return - R = 0 - - expected_returns = np.array(expected_returns) - assert np.allclose(returns, expected_returns) - - -@pytest.mark.parametrize("num_steps, obs_dim, action_dim", [ - (5, 1, 1), - (10, 1, 1), - (25, 1, 1), - (10, 80, 10), - (30, 80, 10), - (35, 80, 10) -]) -def test_ppo_sample_parallel(num_steps, obs_dim, action_dim): - # useful for debugging - np.set_printoptions(threshold=10000) - torch.set_printoptions(threshold=10000) - - # TODO: test value est bootstrap for truncated trajectories - - gamma = 0.99 - - from functools import partial - - env = SampleTesterEnv(obs_dim=obs_dim, action_dim=action_dim, gamma=gamma) - env_fn = partial( - SampleTesterEnv, - obs_dim=obs_dim, - action_dim=action_dim, - gamma=gamma - ) - - policy = GaussianMLP(obs_dim, action_dim) - - # don't need to specify args that don't affect ppo.sample() - args = defaultdict(lambda: None, {'gamma': gamma, 'num_procs': 4}) - - algo = PPO(args) - - memory = algo.sample_parallel(env_fn, policy, num_steps, 100) - - expected_memory = algo.sample(env, policy, 40, 100) - - #breakpoint() - - assert np.allclose(memory.states, expected_memory.states) - #assert np.allclose(memory.actions, expected_memory.actions) - assert np.allclose(memory.rewards, expected_memory.rewards) - #assert np.allclose(memory.returns, expected_memory.returns) - assert np.allclose(memory.returns, expected_memory.returns) - - assert np.allclose(memory.ep_returns, expected_memory.ep_returns) - assert np.allclose(memory.ep_lens, expected_memory.ep_lens) - -test_ppo_sample_parallel(5, 1, 1) \ No newline at end of file diff --git a/deprecated/validate.py b/deprecated/validate.py deleted file mode 100644 index ea0670e1..00000000 --- a/deprecated/validate.py +++ /dev/null @@ -1,212 +0,0 @@ -#validate the sensitivity of policy to environment parameters - -##########To do######### -#Formalize desired output -#Get to run with user input of env and policy without iteration -#Iterate through each parameter - -def iterativeValidation(policy, env, max_traj_len=1000, visualize=True, env_name=None, speed=0.0, state_est=True, clock_based=False): - # Follow apex.py and use an env_factory, or expect an env on input? - - #if env_name is None: - # env = env_factory(policy.env_name, speed=speed, state_est=state_est, clock_based=clock_based)() - #else: - # env = env_factory(env_name, speed=speed, state_est=state_est, clock_based=clock_based)() - - # This stuff was ripped out from Jonah's dynamics randomization. - # We are leaving it as is while building the infrastructure for testing. - # Future plan: Put all the damp/mass ranges into a tuple and iterate through, - # at each iteration running a test with the current parameter randomized, and all - # others default. Then log results and output in a sane and useful way. - - damp = env.default_damping - weak_factor = 0.5 - strong_factor = 1.5 - pelvis_damp_range = [[damp[0], damp[0]], - [damp[1], damp[1]], - [damp[2], damp[2]], - [damp[3], damp[3]], - [damp[4], damp[4]], - [damp[5], damp[5]]] # 0->5 - - hip_damp_range = [[damp[6]*weak_factor, damp[6]*strong_factor], - [damp[7]*weak_factor, damp[7]*strong_factor], - [damp[8]*weak_factor, damp[8]*strong_factor]] # 6->8 and 19->21 - - achilles_damp_range = [[damp[9]*weak_factor, damp[9]*strong_factor], - [damp[10]*weak_factor, damp[10]*strong_factor], - [damp[11]*weak_factor, damp[11]*strong_factor]] # 9->11 and 22->24 - - knee_damp_range = [[damp[12]*weak_factor, damp[12]*strong_factor]] # 12 and 25 - shin_damp_range = [[damp[13]*weak_factor, damp[13]*strong_factor]] # 13 and 26 - tarsus_damp_range = [[damp[14], damp[14]]] # 14 and 27 - heel_damp_range = [[damp[15], damp[15]]] # 15 and 28 - fcrank_damp_range = [[damp[16]*weak_factor, damp[16]*strong_factor]] # 16 and 29 - prod_damp_range = [[damp[17], damp[17]]] # 17 and 30 - foot_damp_range = [[damp[18]*weak_factor, damp[18]*strong_factor]] # 18 and 31 - - side_damp = hip_damp_range + achilles_damp_range + knee_damp_range + shin_damp_range + tarsus_damp_range + heel_damp_range + fcrank_damp_range + prod_damp_range + foot_damp_range - damp_range = pelvis_damp_range + side_damp + side_damp - damp_noise = [np.random.uniform(a, b) for a, b in damp_range] - - hi = 1.3 - lo = 0.7 - m = env.default_mass - pelvis_mass_range = [[lo*m[1], hi*m[1]]] # 1 - hip_mass_range = [[lo*m[2], hi*m[2]], # 2->4 and 14->16 - [lo*m[3], hi*m[3]], - [lo*m[4], hi*m[4]]] - - achilles_mass_range = [[lo*m[5], hi*m[5]]] # 5 and 17 - knee_mass_range = [[lo*m[6], hi*m[6]]] # 6 and 18 - knee_spring_mass_range = [[lo*m[7], hi*m[7]]] # 7 and 19 - shin_mass_range = [[lo*m[8], hi*m[8]]] # 8 and 20 - tarsus_mass_range = [[lo*m[9], hi*m[9]]] # 9 and 21 - heel_spring_mass_range = [[lo*m[10], hi*m[10]]] # 10 and 22 - fcrank_mass_range = [[lo*m[11], hi*m[11]]] # 11 and 23 - prod_mass_range = [[lo*m[12], hi*m[12]]] # 12 and 24 - foot_mass_range = [[lo*m[13], hi*m[13]]] # 13 and 25 - - side_mass = hip_mass_range + achilles_mass_range \ - + knee_mass_range + knee_spring_mass_range \ - + shin_mass_range + tarsus_mass_range \ - + heel_spring_mass_range + fcrank_mass_range \ - + prod_mass_range + foot_mass_range - - mass_range = [[0, 0]] + pelvis_mass_range + side_mass + side_mass - mass_noise = [np.random.uniform(a, b) for a, b in mass_range] - - delta_y_min, delta_y_max = env.default_ipos[4] - 0.07, env.default_ipos[4] + 0.07 - delta_z_min, delta_z_max = env.default_ipos[5] - 0.04, env.default_ipos[5] + 0.04 - com_noise = [0, 0, 0] + [np.random.uniform(-0.25, 0.06)] + [np.random.uniform(delta_y_min, delta_y_max)] + [np.random.uniform(delta_z_min, delta_z_max)] + list(env.default_ipos[6:]) - - fric_noise = [np.random.uniform(0.4, 1.4)] + [np.random.uniform(3e-3, 8e-3)] + list(env.default_fric[2:]) - - env.sim.set_dof_damping(np.clip(damp_noise, 0, None)) - env.sim.set_body_mass(np.clip(mass_noise, 0, None)) - env.sim.set_body_ipos(com_noise) - env.sim.set_ground_friction(np.clip(fric_noise, 0, None)) - - # From policy_eval - while True: - state = env.reset() - done = False - timesteps = 0 - eval_reward = 0 - while not done and timesteps < max_traj_len: - if hasattr(env, 'simrate'): - start = time.time() - - action = policy.forward(torch.Tensor(state)).detach().numpy() - state, reward, done, _ = env.step(action) - if visualize: - env.render() - eval_reward += reward - timesteps += 1 - - if hasattr(env, 'simrate'): - # assume 30hz (hack) - end = time.time() - delaytime = max(0, 1000 / 30000 - (end-start)) - time.sleep(delaytime) - - print("Eval reward: ", eval_reward) - -# Testing to see if the above is even working - -from apex import env_factory -import torch -env = env_factory("CassieIK-v0") -#policy = torch.load("./trained_models/ddpg/ddpg_actor.pt") -policy = torch.load("./logs/ddpg/Hopper-v3/538e63-seed0/actor.pt") -iterativeValidation(policy, env) - -#nbody layout: -# 0: worldbody (zero) -# 1: pelvis - -# 2: left hip roll -# 3: left hip yaw -# 4: left hip pitch -# 5: left achilles rod -# 6: left knee -# 7: left knee spring -# 8: left shin -# 9: left tarsus -# 10: left heel spring -# 12: left foot crank -# 12: left plantar rod -# 13: left foot - -# 14: right hip roll -# 15: right hip yaw -# 16: right hip pitch -# 17: right achilles rod -# 18: right knee -# 19: right knee spring -# 20: right shin -# 21: right tarsus -# 22: right heel spring -# 23: right foot crank -# 24: right plantar rod -# 25: right foot - - -# qpos layout -# [ 0] Pelvis x -# [ 1] Pelvis y -# [ 2] Pelvis z -# [ 3] Pelvis orientation qw -# [ 4] Pelvis orientation qx -# [ 5] Pelvis orientation qy -# [ 6] Pelvis orientation qz -# [ 7] Left hip roll (Motor [0]) -# [ 8] Left hip yaw (Motor [1]) -# [ 9] Left hip pitch (Motor [2]) -# [10] Left achilles rod qw -# [11] Left achilles rod qx -# [12] Left achilles rod qy -# [13] Left achilles rod qz -# [14] Left knee (Motor [3]) -# [15] Left shin (Joint [0]) -# [16] Left tarsus (Joint [1]) -# [17] Left heel spring -# [18] Left foot crank -# [19] Left plantar rod -# [20] Left foot (Motor [4], Joint [2]) -# [21] Right hip roll (Motor [5]) -# [22] Right hip yaw (Motor [6]) -# [23] Right hip pitch (Motor [7]) -# [24] Right achilles rod qw -# [25] Right achilles rod qx -# [26] Right achilles rod qy -# [27] Right achilles rod qz -# [28] Right knee (Motor [8]) -# [29] Right shin (Joint [3]) -# [30] Right tarsus (Joint [4]) -# [31] Right heel spring -# [32] Right foot crank -# [33] Right plantar rod -# [34] Right foot (Motor [9], Joint [5]) - -# qvel layout -# [ 0] Pelvis x -# [ 1] Pelvis y -# [ 2] Pelvis z -# [ 3] Pelvis orientation wx -# [ 4] Pelvis orientation wy -# [ 5] Pelvis orientation wz -# [ 6] Left hip roll (Motor [0]) -# [ 7] Left hip yaw (Motor [1]) -# [ 8] Left hip pitch (Motor [2]) -# [ 9] Left knee (Motor [3]) -# [10] Left shin (Joint [0]) -# [11] Left tarsus (Joint [1]) -# [12] Left foot (Motor [4], Joint [2]) -# [13] Right hip roll (Motor [5]) -# [14] Right hip yaw (Motor [6]) -# [15] Right hip pitch (Motor [7]) -# [16] Right knee (Motor [8]) -# [17] Right shin (Joint [3]) -# [18] Right tarsus (Joint [4]) -# [19] Right foot (Motor [9], Joint [5]) diff --git a/img/output.gif b/img/output.gif new file mode 100644 index 00000000..ba560fc4 Binary files /dev/null and b/img/output.gif differ diff --git a/img/output2.gif b/img/output2.gif new file mode 100644 index 00000000..c8d09b97 Binary files /dev/null and b/img/output2.gif differ diff --git a/plot_policy.py b/plot_policy.py index 3d127666..ef82776b 100644 --- a/plot_policy.py +++ b/plot_policy.py @@ -4,11 +4,7 @@ from torch.autograd import Variable import time, os, sys -from util import env_factory -<<<<<<< HEAD -from rl.policies.actor import Gaussian_FF_Actor -======= ->>>>>>> ed7815cacac97f21f6b66cbbf8f22c610e5f1d19 +from util.env import env_factory import argparse import pickle @@ -28,30 +24,40 @@ print("pre steps: {}\tnum_steps: {}".format(args.pre_steps, args.num_steps)) print("pre speed: {}\tplot_speed: {}".format(args.pre_speed, args.plot_speed)) -# RUN_NAME = run_args.run_name if run_args.run_name != None else "plot" - -# RUN_NAME = "7b7e24-seed0" -# POLICY_PATH = "../trained_models/ppo/Cassie-v0/" + RUN_NAME + "/actor.pt" - -# Load environment and policy -if (not hasattr('run_args', 'simrate')): - run_args.simrate = 50 -env_fn = env_factory(run_args.env_name, traj=run_args.traj, simrate=run_args.simrate, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, - mirror=False, clock_based=run_args.clock_based, reward="iros_paper", history=run_args.history) +# Load environment +env_fn = env = env_factory( + run_args.env_name, + command_profile=run_args.command_profile, + input_profile=run_args.input_profile, + simrate=run_args.simrate, + dynamics_randomization=run_args.dyn_random, + mirror=run_args.mirror, + learn_gains=run_args.learn_gains, + reward=run_args.reward, + history=run_args.history, + no_delta=run_args.no_delta, + traj=run_args.traj, + ik_baseline=run_args.ik_baseline +) cassie_env = env_fn() + +# Load policy policy = torch.load(os.path.join(args.path, "actor.pt")) policy.eval() - if hasattr(policy, 'init_hidden_state'): policy.init_hidden_state() -obs_dim = cassie_env.observation_space.shape[0] # TODO: could make obs and ac space static properties +obs_dim = cassie_env.observation_space.shape[0] # TODO: could make obs and ac space static properties action_dim = cassie_env.action_space.shape[0] -no_delta = cassie_env.no_delta +if hasattr(cassie_env, "no_delta"): + no_delta = cassie_env.no_delta +else: + no_delta = True limittargs = False lininterp = False offset = cassie_env.offset +learn_gains = cassie_env.learn_gains num_steps = args.num_steps pre_steps = args.pre_steps @@ -89,6 +95,8 @@ cassie_env.update_speed(args.plot_speed) for i in range(num_steps): action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() + if learn_gains: + action, gain_deltas = action[:10], action[10:] lin_steps = int(60 * 3/4) # Number of steps to interpolate over. Should be between 0 and self.simrate alpha = 1 / lin_steps for j in range(simrate): @@ -122,7 +130,10 @@ zero2zero_clock = 0.5*(np.cos(2*np.pi/(cassie_env.phaselen+1)*(cassie_env.phase-(cassie_env.phaselen+1)/2)) + 1) one2one_clock = 0.5*(np.cos(2*np.pi/(cassie_env.phaselen+1)*cassie_env.phase) + 1) - cassie_env.step_simulation(real_action) + if learn_gains: + cassie_env.step_simulation(real_action, learned_gains=gain_deltas) + else: + cassie_env.step_simulation(real_action) curr_qpos = cassie_env.sim.qpos() curr_qvel = cassie_env.sim.qvel() motor_pos[i*simrate+j, :] = np.array(curr_qpos)[cassie_env.pos_idx] diff --git a/rl/algos/__init__.py b/rl/algos/__init__.py index 9afe683a..8b137891 100644 --- a/rl/algos/__init__.py +++ b/rl/algos/__init__.py @@ -1,3 +1 @@ -from .ars import ARS -from .ppo import PPO -from .dpg import DPG + diff --git a/rl/algos/ars.py b/rl/algos/ars.py index 33485793..c371ffc0 100644 --- a/rl/algos/ars.py +++ b/rl/algos/ars.py @@ -3,7 +3,8 @@ import ray import time -from apex import env_factory, create_logger +from util.env import env_factory +from util.log import create_logger # This function adapted from https://github.com/modestyachts/ARS/blob/master/code/shared_noise.py # (Thanks to Horia Mania) diff --git a/rl/algos/async_td3.py b/rl/algos/async_td3.py index 7ae9956e..f59917f6 100644 --- a/rl/algos/async_td3.py +++ b/rl/algos/async_td3.py @@ -4,7 +4,7 @@ from rl.policies.critic import Dual_Q_Critic as Critic # Plot results -from apex import create_logger +from util.log import create_logger import time import os @@ -26,7 +26,8 @@ # TODO: create way to resume experiment by loading actor and critic pt files def run_experiment(args): torch.set_num_threads(1) - from apex import env_factory, create_logger + from util.env import env_factory + from util.log import create_logger # Start ray ray.init(num_gpus=0, include_webui=True, redis_address=args.redis_address) diff --git a/rl/algos/ppo.py b/rl/algos/ppo.py index 6b8b4f67..2b7e55ff 100644 --- a/rl/algos/ppo.py +++ b/rl/algos/ppo.py @@ -505,19 +505,11 @@ def train(self, self.save(policy, critic) def run_experiment(args): - from util import env_factory, create_logger - - # torch.set_num_threads(1) - - if args.ik_baseline and args.no_delta: - args.ik_baseline = False - - # TODO: remove this at some point once phase_based is stable - if args.phase_based: - args.clock_based = False + from util.env import env_factory + from util.log import create_logger # wrapper function for creating parallelized envs - env_fn = env_factory(args.env_name, traj=args.traj, simrate=args.simrate, phase_based=args.phase_based, clock_based=args.clock_based, state_est=args.state_est, no_delta=args.no_delta, learn_gains=args.learn_gains, ik_baseline=args.ik_baseline, dynamics_randomization=args.dyn_random, mirror=args.mirror, reward=args.reward, history=args.history) + env_fn = env_factory(args.env_name, simrate=args.simrate, command_profile=args.command_profile, input_profile=args.input_profile, learn_gains=args.learn_gains, dynamics_randomization=args.dyn_random, reward=args.reward, history=args.history, mirror=args.mirror, ik_baseline=args.ik_baseline, no_delta=args.no_delta, traj=args.traj) obs_dim = env_fn().observation_space.shape[0] action_dim = env_fn().action_space.shape[0] @@ -566,20 +558,6 @@ def run_experiment(args): algo = PPO(args=vars(args), save_path=logger.dir) - print() - print("Environment: {}".format(args.env_name)) - print(" ├ traj: {}".format(args.traj)) - print(" ├ phase_based: {}".format(args.phase_based)) - print(" ├ clock_based: {}".format(args.clock_based)) - print(" ├ state_est: {}".format(args.state_est)) - print(" ├ dyn_random: {}".format(args.dyn_random)) - print(" ├ no_delta: {}".format(args.no_delta)) - print(" ├ mirror: {}".format(args.mirror)) - print(" ├ ik baseline: {}".format(args.ik_baseline)) - print(" ├ learn gains: {}".format(args.learn_gains)) - print(" ├ reward: {}".format(env_fn().reward_func)) - print(" └ obs_dim: {}".format(obs_dim)) - print() print("Synchronous Distributed Proximal Policy Optimization:") print(" ├ recurrent: {}".format(args.recurrent)) diff --git a/test_policy.py b/test_policy.py index e2750a30..dbcb11ae 100644 --- a/test_policy.py +++ b/test_policy.py @@ -1,12 +1,11 @@ -from cassie import CassieEnv, CassiePlayground -from rl.policies.actor import GaussianMLP_Actor +from cassie import CassiePlayground from tools.test_commands import * from tools.eval_perturb import * from tools.eval_mission import * from tools.compare_pols import * from tools.eval_sensitivity import * from collections import OrderedDict -from util import env_factory +from util.env import env_factory import torch import pickle @@ -47,8 +46,20 @@ # env_fn = partial(CassieEnv, traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) # Make mirror False so that env_factory returns a regular wrap env function and not a symmetric env function that can be called to return # a cassie environment (symmetric env cannot be called to make another env) -env_fn = env_factory(run_args.env_name, traj=run_args.traj, simrate=run_args.simrate, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, - mirror=False, clock_based=run_args.clock_based, reward=run_args.reward, history=run_args.history) +env_fn = env = env_factory( + run_args.env_name, + command_profile=run_args.command_profile, + input_profile=run_args.input_profile, + simrate=run_args.simrate, + dynamics_randomization=run_args.dyn_random, + mirror=run_args.mirror, + learn_gains=run_args.learn_gains, + reward=run_args.reward, + history=run_args.history, + no_delta=run_args.no_delta, + traj=run_args.traj, + ik_baseline=run_args.ik_baseline +) cassie_env = env_fn() policy = torch.load(os.path.join(args.path, "actor.pt")) if args.eval: @@ -56,25 +67,24 @@ if hasattr(policy, 'init_hidden_state'): policy.init_hidden_state() - -#TODO: make returning/save data in file inside function consist for all testing functions +# TODO: make returning/save data in file inside function consist for all testing functions def test_commands(cassie_env, policy, args): print("Testing speed and orient commands") if args.n_procs == 1: - save_data = eval_commands(cassie_env, policy, num_steps=args.n_steps, num_commands=args.n_commands, + save_data = eval_commands(cassie_env, policy, num_steps=args.n_steps, num_commands=args.n_commands, max_speed=args.max_speed, min_speed=args.min_speed, num_iters=args.n_iter) np.save(os.path.join(args.path, "eval_commands.npy"), save_data) else: - eval_commands_multi(env_fn, policy, num_steps=args.n_steps, num_commands=args.n_commands, max_speed=args.max_speed, + eval_commands_multi(env_fn, policy, num_steps=args.n_steps, num_commands=args.n_commands, max_speed=args.max_speed, min_speed=args.min_speed, num_iters=args.n_iter, num_procs=args.n_procs, filename=os.path.join(args.path, "eval_commands.npy")) def test_perturbs(cassie_env, policy, args): print("Testing perturbations") if args.n_procs == 1: - save_data = compute_perturbs(cassie_env, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size, + save_data = compute_perturbs(cassie_env, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size, perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles) else: - save_data = compute_perturbs_multi(env_fn, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size, + save_data = compute_perturbs_multi(env_fn, policy, wait_time=args.wait_time, perturb_duration=args.pert_dur, perturb_size=args.pert_size, perturb_incr=args.pert_incr, perturb_body=args.pert_body, num_angles=args.num_angles, num_procs=args.n_procs) np.save(os.path.join(args.path, "eval_perturbs.npy"), save_data) @@ -92,7 +102,7 @@ def test_perturbs(cassie_env, policy, args): if not args.viz: print("Testing missions") save_data = [] - + for mission in missions: print(mission + " mission:") cassie_env = CassiePlayground(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, mission=mission) diff --git a/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.info b/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.info index 5fa94265..5d3940b6 100644 --- a/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.info +++ b/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.info @@ -1,31 +1,13 @@ -algo_name: ppo -clip: 0.2 -clock_based: True +command_profile: clock dyn_random: False -entropy_coeff: 0.0 env_name: Cassie-v0 -epochs: 5 -eps: 1e-05 -gamma: 0.99 history: 0 -input_norm_steps: 100 -lam: 0.95 -lr: 0.0001 -max_grad_norm: 0.05 -max_traj_len: 300 -minibatch_size: 2048 +ik_baseline: None +input_profile: full +learn_gains: False mirror: True -n_itr: 20000 -name: model no_delta: True -num_procs: 64 -num_steps: 187 -previous: None recurrent: False -redis_address: None reward: 5k_speed_reward simrate: 60 -state_est: True -traj: walking -use_gae: False -viz_port: 8097 +traj: None diff --git a/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl b/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl index 74d2df1d..2fb18138 100644 Binary files a/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl and b/trained_models/nodelta_neutral_StateEst_symmetry_speed0-3_freq1-2/experiment.pkl differ diff --git a/util.py b/util.py deleted file mode 100644 index ccc8c817..00000000 --- a/util.py +++ /dev/null @@ -1,752 +0,0 @@ -import torch -import hashlib, os, pickle -from collections import OrderedDict -import sys, time -from cassie.quaternion_function import * - -from tkinter import * -import multiprocessing as mp -from cassie.phase_function import LivePlot -import matplotlib.pyplot as plt - -import tty -import termios -import select -import numpy as np -from cassie import CassieEnv, CassieMinEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_footdist, CassieEnv_noaccel_footdist, CassieEnv_novel_footdist, CassieEnv_mininput -from cassie.cassiemujoco import CassieSim - -class color: - BOLD = '\033[1m\033[48m' - END = '\033[0m' - ORANGE = '\033[38;5;202m' - BLACK = '\033[38;5;240m' - - -def print_logo(subtitle="", option=2): - print() - print(color.BOLD + color.ORANGE + " .8. " + color.BLACK + " 8 888888888o " + color.ORANGE + "8 8888888888 `8.`8888. ,8' ") - print(color.BOLD + color.ORANGE + " .888. " + color.BLACK + " 8 8888 `88. " + color.ORANGE + "8 8888 `8.`8888. ,8' ") - print(color.BOLD + color.ORANGE + " :88888. " + color.BLACK + " 8 8888 `88 " + color.ORANGE + "8 8888 `8.`8888. ,8' ") - print(color.BOLD + color.ORANGE + " . `88888. " + color.BLACK + " 8 8888 ,88 " + color.ORANGE + "8 8888 `8.`8888.,8' ") - print(color.BOLD + color.ORANGE + " .8. `88888. " + color.BLACK + " 8 8888. ,88' " + color.ORANGE + "8 888888888888 `8.`88888' ") - print(color.BOLD + color.ORANGE + " .8`8. `88888. " + color.BLACK + " 8 888888888P' " + color.ORANGE + "8 8888 .88.`8888. ") - print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8'`8.`8888. ") - print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ") - print(color.BOLD + color.ORANGE + " .888888888. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ") - print(color.BOLD + color.ORANGE + ".8' `8. `88888." + color.BLACK + " 8 8888 " + color.ORANGE + "8 888888888888 .8' `8.`8888. " + color.END) - print("\n") - print(subtitle) - print("\n") - -def env_factory(path, traj="walking", simrate=50, phase_based=False, clock_based=True, state_est=True, dynamics_randomization=True, mirror=False, no_delta=False, ik_baseline=False, learn_gains=False, reward=None, history=0, fixed_speed=None, **kwargs): - from functools import partial - - """ - Returns an *uninstantiated* environment constructor. - - Since environments containing cpointers (e.g. Mujoco envs) can't be serialized, - this allows us to pass their constructors to Ray remote functions instead - (since the gym registry isn't shared across ray subprocesses we can't simply - pass gym.make() either) - - Note: env.unwrapped.spec is never set, if that matters for some reason. - """ - - - # Custom Cassie Environment - if path in ['Cassie-v0', 'CassieMin-v0', 'CassiePlayground-v0', 'CassieStandingEnv-v0', 'CassieNoaccelFootDistOmniscient', 'CassieFootDist', 'CassieNoaccelFootDist', 'CassieNoaccelFootDistNojoint', 'CassieNovelFootDist', 'CassieMinInput']: - from cassie import CassieEnv, CassieMinEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_footdist, CassieEnv_noaccel_footdist, CassieEnv_noaccel_footdist_nojoint, CassieEnv_novel_footdist, CassieEnv_mininput - - if path == 'Cassie-v0': - # env_fn = partial(CassieEnv, traj=traj, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, reward=reward, history=history) - env_fn = partial(CassieEnv, traj=traj, simrate=simrate, phase_based=phase_based, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, learn_gains=learn_gains, ik_baseline=ik_baseline, reward=reward, history=history, fixed_speed=fixed_speed) - elif path == 'CassieMin-v0': - env_fn = partial(CassieMinEnv, traj=traj, simrate=simrate, phase_based=phase_based, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, learn_gains=learn_gains, ik_baseline=ik_baseline, reward=reward, history=history, fixed_speed=fixed_speed) - elif path == 'CassiePlayground-v0': - env_fn = partial(CassiePlayground, traj=traj, simrate=simrate, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, reward=reward, history=history) - elif path == 'CassieStandingEnv-v0': - env_fn = partial(CassieStandingEnv, simrate=simrate, state_est=state_est) - elif path == 'CassieNoaccelFootDistOmniscient': - env_fn = partial(CassieEnv_noaccel_footdist_omniscient, simrate=simrate, traj=traj, clock_based=clock_based, state_est=state_est, dynamics_randomization=True, no_delta=no_delta, reward=reward, history=history) - elif path == 'CassieFootDist': - env_fn = partial(CassieEnv_footdist, traj=traj, simrate=simrate, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, reward=reward, history=history) - elif path == 'CassieNoaccelFootDist': - env_fn = partial(CassieEnv_noaccel_footdist, traj=traj, simrate=simrate, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, reward=reward, history=history) - elif path == "CassieNoaccelFootDistNojoint": - env_fn = partial(CassieEnv_noaccel_footdist_nojoint, traj=traj, simrate=simrate, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, reward=reward, history=history) - elif path == "CassieNovelFootDist": - env_fn = partial(CassieEnv_novel_footdist, traj=traj, simrate=simrate, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, reward=reward, history=history) - elif path == "CassieMinInput": - env_fn = partial(CassieEnv_mininput, traj=traj, simrate=simrate, clock_based=clock_based, state_est=state_est, dynamics_randomization=dynamics_randomization, no_delta=no_delta, reward=reward, history=history) - - - # TODO for Yesh: make mirrored_obs an attribute of environment, configured based on setup parameters - if mirror: - from rl.envs.wrappers import SymmetricEnv - env_fn = partial(SymmetricEnv, env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=[-5, -6, 7, 8, 9, -0.1, -1, 2, 3, 4]) - - return env_fn - - # OpenAI Gym environment - else: - import gym - spec = gym.envs.registry.spec(path) - _kwargs = spec._kwargs.copy() - _kwargs.update(kwargs) - - try: - if callable(spec._entry_point): - cls = spec._entry_point(**_kwargs) - else: - cls = gym.envs.registration.load(spec._entry_point) - except AttributeError: - if callable(spec.entry_point): - cls = spec.entry_point(**_kwargs) - else: - cls = gym.envs.registration.load(spec.entry_point) - - return partial(cls, **_kwargs) - -# Logger stores in trained_models by default -def create_logger(args): - from torch.utils.tensorboard import SummaryWriter - """Use hyperparms to set a directory to output diagnostic files.""" - - arg_dict = args.__dict__ - assert "seed" in arg_dict, \ - "You must provide a 'seed' key in your command line arguments" - assert "logdir" in arg_dict, \ - "You must provide a 'logdir' key in your command line arguments." - assert "env_name" in arg_dict, \ - "You must provide a 'env_name' key in your command line arguments." - - # sort the keys so the same hyperparameters will always have the same hash - arg_dict = OrderedDict(sorted(arg_dict.items(), key=lambda t: t[0])) - - # remove seed so it doesn't get hashed, store value for filename - # same for logging directory - run_name = arg_dict.pop('run_name') - seed = str(arg_dict.pop("seed")) - logdir = str(arg_dict.pop('logdir')) - env_name = str(arg_dict['env_name']) - - # see if this run has a unique name, if so then that is going to be the name of the folder, even if it overrirdes - if run_name is not None: - logdir = os.path.join(logdir, env_name) - output_dir = os.path.join(logdir, run_name) - else: - # see if we are resuming a previous run, if we are mark as continued - if args.previous is not None: - if args.exchange_reward is not None: - output_dir = args.previous[0:-1] + "_NEW-" + args.reward - else: - print(args.previous[0:-1]) - output_dir = args.previous[0:-1] + '-cont' - else: - # get a unique hash for the hyperparameter settings, truncated at 10 chars - arg_hash = hashlib.md5(str(arg_dict).encode('ascii')).hexdigest()[0:6] + '-seed' + seed - logdir = os.path.join(logdir, env_name) - output_dir = os.path.join(logdir, arg_hash) - - # create a directory with the hyperparm hash as its name, if it doesn't - # already exist. - os.makedirs(output_dir, exist_ok=True) - - # Create a file with all the hyperparam settings in human-readable plaintext, - # also pickle file for resuming training easily - info_path = os.path.join(output_dir, "experiment.info") - pkl_path = os.path.join(output_dir, "experiment.pkl") - with open(pkl_path, 'wb') as file: - pickle.dump(args, file) - with open(info_path, 'w') as file: - for key, val in arg_dict.items(): - file.write("%s: %s" % (key, val)) - file.write('\n') - - logger = SummaryWriter(output_dir, flush_secs=0.1) # flush_secs=0.1 actually slows down quite a bit, even on parallelized set ups - print("Logging to " + color.BOLD + color.ORANGE + str(output_dir) + color.END) - - logger.dir = output_dir - return logger - -# #TODO: Add pausing, and window quiting along with other render functionality -# def eval_policy(policy, args, run_args): - -# import tty -# import termios -# import select -# import numpy as np -# from cassie import CassieEnv, CassiePlayground, CassieStandingEnv, CassieEnv_noaccel_footdist_omniscient, CassieEnv_footdist, CassieEnv_noaccel_footdist -# from cassie.cassiemujoco import CassieSim - -# def isData(): -# return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) - -# max_traj_len = args.traj_len -# visualize = not args.no_viz -# print("env name: ", run_args.env_name) -# if run_args.env_name is None: -# env_name = args.env_name -# else: -# env_name = run_args.env_name -# if env_name == "Cassie-v0": -# env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, phase_based=run_args.phase_based, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) -# elif env_name == "CassiePlayground-v0": -# env = CassiePlayground(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history, mission=args.mission) -# elif env_name == "CassieNoaccelFootDistOmniscient": -# env = CassieEnv_noaccel_footdist_omniscient(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) -# elif env_name == "CassieFootDist": -# env = CassieEnv_footdist(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) -# elif env_name == "CassieNoaccelFootDist": -# env = CassieEnv_noaccel_footdist(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) -# else: -# env = CassieStandingEnv(state_est=run_args.state_est) - -# if args.debug: -# env.debug = True - -# if args.terrain is not None and ".npy" in args.terrain: -# env.sim = CassieSim("cassie_hfield.xml") -# hfield_data = np.load(os.path.join("./cassie/cassiemujoco/terrains/", args.terrain)) -# env.sim.set_hfield_data(hfield_data.flatten()) - -# print(env.reward_func) - -# if hasattr(policy, 'init_hidden_state'): -# policy.init_hidden_state() - -# old_settings = termios.tcgetattr(sys.stdin) - -# orient_add = 0 - -# slowmo = False - -# if visualize: -# env.render() -# render_state = True -# try: -# tty.setcbreak(sys.stdin.fileno()) - -# state = env.reset_for_test() -# done = False -# timesteps = 0 -# eval_reward = 0 -# speed = 0.0 - -# env.update_speed(speed) - -# while render_state: - -# if isData(): -# c = sys.stdin.read(1) -# if c == 'w': -# speed += 0.1 -# elif c == 's': -# speed -= 0.1 -# elif c == 'j': -# env.phase_add += .1 -# print("Increasing frequency to: {:.1f}".format(env.phase_add)) -# elif c == 'h': -# env.phase_add -= .1 -# print("Decreasing frequency to: {:.1f}".format(env.phase_add)) -# elif c == 'l': -# orient_add += .1 -# print("Increasing orient_add to: ", orient_add) -# elif c == 'k': -# orient_add -= .1 -# print("Decreasing orient_add to: ", orient_add) - -# elif c == 'x': -# env.swing_duration += .01 -# print("Increasing swing duration to: ", env.swing_duration) -# elif c == 'z': -# env.swing_duration -= .01 -# print("Decreasing swing duration to: ", env.swing_duration) -# elif c == 'v': -# env.stance_duration += .01 -# print("Increasing stance duration to: ", env.stance_duration) -# elif c == 'c': -# env.stance_duration -= .01 -# print("Decreasing stance duration to: ", env.stance_duration) - -# elif c == '1': -# env.stance_mode = "zero" -# print("Stance mode: ", env.stance_mode) -# elif c == '2': -# env.stance_mode = "grounded" -# print("Stance mode: ", env.stance_mode) -# elif c == '3': -# env.stance_mode = "aerial" -# print("Stance mode: ", env.stance_mode) -# elif c == 'r': -# state = env.reset() -# speed = env.speed -# print("Resetting environment via env.reset()") -# elif c == 'p': -# push = 100 -# push_dir = 2 -# force_arr = np.zeros(6) -# force_arr[push_dir] = push -# env.sim.apply_force(force_arr) -# elif c == 't': -# slowmo = not slowmo -# print("Slowmo : ", slowmo) - -# env.update_speed(speed) -# # print(speed) -# print("speed: ", env.speed) - -# if hasattr(env, 'simrate'): -# start = time.time() - -# if (not env.vis.ispaused()): -# # Update Orientation -# env.orient_add = orient_add -# # quaternion = euler2quat(z=orient_add, y=0, x=0) -# # iquaternion = inverse_quaternion(quaternion) - -# # # TODO: Should probably not assume these indices. Should make them not hard coded -# # if env.state_est: -# # curr_orient = state[1:5] -# # curr_transvel = state[15:18] -# # else: -# # curr_orient = state[2:6] -# # curr_transvel = state[20:23] - -# # new_orient = quaternion_product(iquaternion, curr_orient) - -# # if new_orient[0] < 0: -# # new_orient = -new_orient - -# # new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion) - -# # if env.state_est: -# # state[1:5] = torch.FloatTensor(new_orient) -# # state[15:18] = torch.FloatTensor(new_translationalVelocity) -# # # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. -# # else: -# # state[2:6] = torch.FloatTensor(new_orient) -# # state[20:23] = torch.FloatTensor(new_translationalVelocity) - -# action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() - -# state, reward, done, _ = env.step(action) - -# # if env.lfoot_vel[2] < -0.6: -# # print("left foot z vel over 0.6: ", env.lfoot_vel[2]) -# # if env.rfoot_vel[2] < -0.6: -# # print("right foot z vel over 0.6: ", env.rfoot_vel[2]) - -# eval_reward += reward -# timesteps += 1 -# qvel = env.sim.qvel() -# # print("actual speed: ", np.linalg.norm(qvel[0:2])) -# # print("commanded speed: ", env.speed) - -# if args.no_viz: -# yaw = quaternion2euler(new_orient)[2] -# print("stp = {} yaw = {:.2f} spd = {} ep_r = {:.2f} stp_r = {:.2f}".format(timesteps, yaw, speed, eval_reward, reward)) - -# if visualize: -# render_state = env.render() -# if hasattr(env, 'simrate'): -# # assume 40hz -# end = time.time() -# delaytime = max(0, 1000 / 40000 - (end-start)) -# if slowmo: -# while(time.time() - end < delaytime*10): -# env.render() -# time.sleep(delaytime) -# else: -# time.sleep(delaytime) - -# print("Eval reward: ", eval_reward) - -# finally: -# termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) - -class EvalProcessClass(): - def __init__(self, args, run_args): - - if run_args.phase_based and not args.no_viz: - - self.plot_pipe, plotter_pipe = mp.Pipe() - self.plotter = LivePlot() - self.plot_process = mp.Process(target=self.plotter, args=(plotter_pipe,), daemon=True) - self.plot_process.start() - - #TODO: Add pausing, and window quiting along with other render functionality - def eval_policy(self, policy, args, run_args): - - def print_input_update(e): - print(f"\n\nstance dur.: {e.stance_duration:.2f}\t swing dur.: {e.swing_duration:.2f}\t stance mode: {e.stance_mode}\n") - - if run_args.phase_based and not args.no_viz: - send = self.plot_pipe.send - - def isData(): - return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) - - if args.debug: - args.stats = False - - if args.reward is None: - args.reward = run_args.reward - - max_traj_len = args.traj_len - visualize = not args.no_viz - print("env name: ", run_args.env_name) - if run_args.env_name is None: - env_name = args.env_name - else: - env_name = run_args.env_name - if env_name == "Cassie-v0": - env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, phase_based=run_args.phase_based, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieMin-v0": - env = CassieMinEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, phase_based=run_args.phase_based, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassiePlayground-v0": - env = CassiePlayground(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history, mission=args.mission) - elif env_name == "CassieNoaccelFootDistOmniscient": - env = CassieEnv_noaccel_footdist_omniscient(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieFootDist": - env = CassieEnv_footdist(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieNoaccelFootDist": - env = CassieEnv_noaccel_footdist(traj=run_args.traj, simrate=run_args.simrate, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieNoaccelFootDistNojoint": - env = CassieEnv_noaccel_footdist_nojoint(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieNovelFootDist": - env = CassieEnv_novel_footdist(traj=run_args.traj, simrate=run_args.simrate, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta, reward=args.reward, history=run_args.history) - elif env_name == "CassieMinInput": - env = CassieEnv_mininput(traj=run_args.traj, simrate=run_args.simrate, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta, reward=args.reward, history=run_args.history) - else: - env = CassieStandingEnv(state_est=run_args.state_est) - - if args.debug: - env.debug = True - - if args.terrain is not None and ".npy" in args.terrain: - env.sim = CassieSim("cassie_hfield.xml") - hfield_data = np.load(os.path.join("./cassie/cassiemujoco/terrains/", args.terrain)) - env.sim.set_hfield_data(hfield_data.flatten()) - - print(env.reward_func) - print() - - if hasattr(policy, 'init_hidden_state'): - policy.init_hidden_state() - - old_settings = termios.tcgetattr(sys.stdin) - - orient_add = 0 - - slowmo = False - - if visualize: - env.render() - render_state = True - try: - tty.setcbreak(sys.stdin.fileno()) - - state = env.reset_for_test() - done = False - timesteps = 0 - eval_reward = 0 - speed = 0.0 - - env.update_speed(speed) - - while render_state: - - if isData(): - c = sys.stdin.read(1) - if c == 'w': - speed += 0.1 - elif c == 's': - speed -= 0.1 - elif c == 'j': - env.phase_add += .1 - # print("Increasing frequency to: {:.1f}".format(env.phase_add)) - elif c == 'h': - env.phase_add -= .1 - # print("Decreasing frequency to: {:.1f}".format(env.phase_add)) - elif c == 'l': - orient_add -= .1 - # print("Increasing orient_add to: ", orient_add) - elif c == 'k': - orient_add += .1 - # print("Decreasing orient_add to: ", orient_add) - - elif c == 'x': - env.swing_duration += .01 - print_input_update(env) - elif c == 'z': - env.swing_duration -= .01 - print_input_update(env) - elif c == 'v': - env.stance_duration += .01 - print_input_update(env) - elif c == 'c': - env.stance_duration -= .01 - print_input_update(env) - - elif c == '1': - env.stance_mode = "zero" - print_input_update(env) - elif c == '2': - env.stance_mode = "grounded" - print_input_update(env) - elif c == '3': - env.stance_mode = "aerial" - - elif c == 'r': - state = env.reset() - speed = env.speed - if hasattr(policy, 'init_hidden_state'): - policy.init_hidden_state() - print("Resetting environment via env.reset()") - elif c == 'p': - push = 100 - push_dir = 2 - force_arr = np.zeros(6) - force_arr[push_dir] = push - env.sim.apply_force(force_arr) - elif c == 't': - slowmo = not slowmo - print("Slowmo : \n", slowmo) - - env.update_speed(speed) - # print(speed) - - if env.phase_based and visualize: - send((env.swing_duration, env.stance_duration, env.strict_relaxer, env.stance_mode, env.have_incentive)) - - if args.stats: - print(f"act spd: {env.sim.qvel()[0]:.2f} cmd speed: {env.speed:.2f} phase add: {env.phase_add:.2f} orient add: {orient_add:.2f}", end="\r") - # print(f"act spd: {env.sim.qvel()[0]:.2f}\t cmd speed: {env.speed:.2f}\t phase add: {env.phase_add:.2f}\t orient add: {orient_add}", end="\r") - - if hasattr(env, 'simrate'): - start = time.time() - - if (not env.vis.ispaused()): - # Update Orientation - env.orient_add = orient_add - # quaternion = euler2quat(z=orient_add, y=0, x=0) - # iquaternion = inverse_quaternion(quaternion) - - # # TODO: Should probably not assume these indices. Should make them not hard coded - # if env.state_est: - # curr_orient = state[1:5] - # curr_transvel = state[15:18] - # else: - # curr_orient = state[2:6] - # curr_transvel = state[20:23] - - # new_orient = quaternion_product(iquaternion, curr_orient) - - # if new_orient[0] < 0: - # new_orient = -new_orient - - # new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion) - - # if env.state_est: - # state[1:5] = torch.FloatTensor(new_orient) - # state[15:18] = torch.FloatTensor(new_translationalVelocity) - # # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. - # else: - # state[2:6] = torch.FloatTensor(new_orient) - # state[20:23] = torch.FloatTensor(new_translationalVelocity) - - action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() - - state, reward, done, _ = env.step(action) - - # if env.lfoot_vel[2] < -0.6: - # print("left foot z vel over 0.6: ", env.lfoot_vel[2]) - # if env.rfoot_vel[2] < -0.6: - # print("right foot z vel over 0.6: ", env.rfoot_vel[2]) - - eval_reward += reward - timesteps += 1 - qvel = env.sim.qvel() - # print("actual speed: ", np.linalg.norm(qvel[0:2])) - # print("commanded speed: ", env.speed) - - if args.no_viz: - yaw = quaternion2euler(new_orient)[2] - print("stp = {} yaw = {:.2f} spd = {} ep_r = {:.2f} stp_r = {:.2f}".format(timesteps, yaw, speed, eval_reward, reward)) - - if visualize: - render_state = env.render() - if hasattr(env, 'simrate'): - # assume 40hz - end = time.time() - delaytime = max(0, 1000 / 40000 - (end-start)) - if slowmo: - while(time.time() - end < delaytime*10): - env.render() - time.sleep(delaytime) - else: - time.sleep(delaytime) - - print("Eval reward: ", eval_reward) - - finally: - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) - -def collect_data(policy, args, run_args): - wait_steps = 0 - num_cycles = 35 - speed = 0.0 - - if args.reward is None: - args.reward = run_args.reward - - if run_args.env_name is None: - env_name = args.env_name - else: - env_name = run_args.env_name - if env_name == "Cassie-v0": - env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, phase_based=run_args.phase_based, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassiePlayground-v0": - env = CassiePlayground(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history, mission=args.mission) - elif env_name == "CassieNoaccelFootDistOmniscient": - env = CassieEnv_noaccel_footdist_omniscient(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieFootDist": - env = CassieEnv_footdist(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieNoaccelFootDist": - env = CassieEnv_noaccel_footdist(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - elif env_name == "CassieNoaccelFootDistNojoint": - env = CassieEnv_noaccel_footdist_nojoint(traj=run_args.traj, state_est=run_args.state_est, no_delta=run_args.no_delta, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, reward=args.reward, history=run_args.history) - else: - env = CassieStandingEnv(state_est=run_args.state_est) - - if hasattr(policy, 'init_hidden_state'): - policy.init_hidden_state() - - state = torch.Tensor(env.reset_for_test()) - env.update_speed(speed) - env.render() - - time_log, speed_log, grf_log = [], [], [] - - # print("iros: ", iros_env.simrate, iros_env.phaselen) - # print("aslip: ", aslip_env.simrate, aslip_env.phaselen) - - with torch.no_grad(): - - # Run few cycles to stabilize (do separate incase two envs have diff phaselens) - for i in range(wait_steps): - action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() - state, reward, done, _ = env.step(action) - env.render() - print(f"act spd: {env.sim.qvel()[0]:.2f}\t cmd speed: {env.speed:.2f}") - # curr_qpos = aslip_env.sim.qpos() - # print("curr height: ", curr_qpos[2]) - - # Collect actual data (avg foot force over simrate) - # start_time = time.time() - # for i in range(math.floor(num_cycles*env.phaselen)+1): - # action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() - # state, reward, done, _ = env.step(action) - # speed_log.append([env.speed, env.sim.qvel()[0]]) - - # # grf_log.append(np.concatenate([[time.time()-start_time],env.sim.get_foot_forces()])) - # grf_log.append([time.time()-start_time, env.l_foot_frc, env.r_foot_frc]) - # env.render() - # print(f"act spd: {env.sim.qvel()[0]:.2f}\t cmd speed: {env.speed:.2f}") - # # curr_qpos = aslip_env.sim.qpos() - # # print("curr height: ", curr_qpos[2]) - - # Collect actual data (foot force each sim step) - print("Start actual data") - start_time = time.time() - for i in range(num_cycles): - for j in range(math.floor(env.phaselen)+1): - action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() - for k in range(env.simrate): - env.step_simulation(action) - time_log.append(time.time()-start_time) - speed_log.append([env.speed, env.sim.qvel()[0]]) - grf_log.append(env.sim.get_foot_forces()) - - env.time += 1 - env.phase += env.phase_add - if env.phase > env.phaselen: - env.phase = 0 - env.counter += 1 - state = env.get_full_state() - env.speed = i * 0.1 - env.render() - - time_log, speed_log, grf_log = map(np.array, (time_log, speed_log, grf_log)) - print(speed_log.shape) - print(grf_log.shape) - - ### Process the data - - # average data and get std dev - mean_speed = np.mean(speed_log, axis=0) - stddev_speed = np.mean(speed_log, axis=0) - mean_grfs = np.mean(grf_log[:, 1:], axis=0) - stddev_grfs = np.std(grf_log[:, 1:], axis=0) - print(mean_speed) - print(stddev_speed) - print(mean_grfs) - print(stddev_grfs) - - ### Save the data - output = { - "time": time_log, - "speed": speed_log, - "grfs": grf_log - } - with open(os.path.join(args.path, "collect_data.pkl"), "wb") as f: - pickle.dump(output, f) - - with open(os.path.join(args.path, "collect_data.pkl"), "rb") as f: - data = pickle.load(f) - - time_data = data["time"] - speed_data = data["speed"] - grf_data = data["grfs"] - - ### Plot the data - fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True) - axs[0].plot(time_data, speed_data, label="commanded speed") - axs[0].plot(time_data, speed_data, label="actual speed") - axs[0].set_title("Speed") - axs[0].set_xlabel("Time (Seconds)") - axs[0].set_ylabel("m/s") - axs[1].plot(time_data, grf_log[:,0], label="sim left foot") - axs[1].plot(time_data, grf_log[:,1], label="sim right foot") - axs[1].set_title("GRFs") - axs[1].set_xlabel("Time (Seconds)") - axs[1].set_ylabel("Newtons") - plt.legend(loc="upper right") - plt.show() - -# Rule for curriculum learning is that env observation space should be the same (so attributes like env.clock_based or env.state_est shouldn't be different and are forced to be same here) -# deal with loading hyperparameters of previous run continuation -def parse_previous(args): - if args.previous is not None: - run_args = pickle.load(open(args.previous + "experiment.pkl", "rb")) - args.env_name = run_args.env_name - args.traj = run_args.traj - args.phase_based = run_args.phase_based - args.clock_based = run_args.clock_based - args.state_est = run_args.state_est - args.no_delta = run_args.no_delta - args.recurrent = run_args.recurrent - args.learn_gains = run_args.learn_gains - args.ik_baseline = run_args.ik_baseline, - if args.env_name == "CassiePlayground-v0": - args.reward = "command" - args.run_name = run_args.run_name + "-playground" - if args.exchange_reward != None: - args.reward = args.exchange_reward - args.run_name = run_args.run_name + "_NEW-" + args.reward - else: - args.reward = run_args.reward - args.run_name = run_args.run_name + "--cont" - return args diff --git a/util/env.py b/util/env.py new file mode 100644 index 00000000..32c0c053 --- /dev/null +++ b/util/env.py @@ -0,0 +1,72 @@ +import os +import time +import torch +import numpy as np + +from cassie import CassieEnv, CassieTrajEnv, CassiePlayground, CassieStandingEnv + +def env_factory(path, command_profile="clock", input_profile="full", simrate=50, dynamics_randomization=True, mirror=False, learn_gains=False, reward=None, history=0, no_delta=True, traj=None, ik_baseline=False, **kwargs): + from functools import partial + + """ + Returns an *uninstantiated* environment constructor. + + Since environments containing cpointers (e.g. Mujoco envs) can't be serialized, + this allows us to pass their constructors to Ray remote functions instead + (since the gym registry isn't shared across ray subprocesses we can't simply + pass gym.make() either) + + Note: env.unwrapped.spec is never set, if that matters for some reason. + """ + + # Custom Cassie Environment + if path in ['Cassie-v0', 'CassieTraj-v0', 'CassiePlayground-v0', 'CassieStandingEnv-v0']: + + if path == 'Cassie-v0': + env_fn = partial(CassieEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history) + elif path == 'CassieTraj-v0': + env_fn = partial(CassieTrajEnv, traj=traj, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, no_delta=no_delta, learn_gains=learn_gains, ik_baseline=ik_baseline, reward=reward, history=history) + elif path == 'CassiePlayground-v0': + env_fn = partial(CassiePlayground, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history) + elif path == 'CassieStandingEnv-v0': + env_fn = partial(CassieStandingEnv, command_profile=command_profile, input_profile=input_profile, simrate=simrate, dynamics_randomization=dynamics_randomization, learn_gains=learn_gains, reward=reward, history=history) + + if mirror: + from rl.envs.wrappers import SymmetricEnv + env_fn = partial(SymmetricEnv, env_fn, mirrored_obs=env_fn().mirrored_obs, mirrored_act=env_fn().mirrored_acts) + + print() + print("Environment: {}".format(path)) + print(" ├ reward: {}".format(reward)) + print(" ├ input prof: {}".format(input_profile)) + print(" ├ cmd prof: {}".format(command_profile)) + print(" ├ learn gains: {}".format(learn_gains)) + print(" ├ dyn_random: {}".format(dynamics_randomization)) + print(" ├ mirror: {}".format(mirror)) + if path == "CassieTraj-v0": + print(" ├ traj: {}".format(traj)) + print(" ├ ik baseline: {}".format(ik_baseline)) + print(" ├ no_delta: {}".format(no_delta)) + print(" └ obs_dim: {}".format(env_fn().observation_space.shape[0])) + + return env_fn + + # OpenAI Gym environment + else: + import gym + spec = gym.envs.registry.spec(path) + _kwargs = spec._kwargs.copy() + _kwargs.update(kwargs) + + try: + if callable(spec._entry_point): + cls = spec._entry_point(**_kwargs) + else: + cls = gym.envs.registration.load(spec._entry_point) + except AttributeError: + if callable(spec.entry_point): + cls = spec.entry_point(**_kwargs) + else: + cls = gym.envs.registration.load(spec.entry_point) + + return partial(cls, **_kwargs) diff --git a/util/eval.py b/util/eval.py new file mode 100644 index 00000000..42f84aaf --- /dev/null +++ b/util/eval.py @@ -0,0 +1,206 @@ +import torch +import os +import sys, time + +from tkinter import * +import multiprocessing as mp +from cassie.phase_function import LivePlot +import matplotlib.pyplot as plt + +import tty +import termios +import select +import numpy as np + +from util.env import env_factory + +class EvalProcessClass(): + def __init__(self, args, run_args): + + if run_args.command_profile == "phase" and not args.no_viz: + self.live_plot = True + self.plot_pipe, plotter_pipe = mp.Pipe() + self.plotter = LivePlot() + self.plot_process = mp.Process(target=self.plotter, args=(plotter_pipe,), daemon=True) + self.plot_process.start() + else: + self.live_plot = False + + #TODO: Add pausing, and window quiting along with other render functionality + def eval_policy(self, policy, args, run_args): + + def print_input_update(e): + print(f"\n\nstance dur.: {e.stance_duration:.2f}\t swing dur.: {e.swing_duration:.2f}\t stance mode: {e.stance_mode}\n") + + if self.live_plot: + send = self.plot_pipe.send + + def isData(): + return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) + + if args.debug: + args.stats = False + + if args.reward is None: + args.reward = run_args.reward + + max_traj_len = args.traj_len + visualize = not args.no_viz + print("env name: ", run_args.env_name) + if run_args.env_name is None: + env_name = args.env_name + else: + env_name = run_args.env_name + + env = env_factory( + args.env_name, + command_profile=run_args.command_profile, + input_profile=run_args.input_profile, + simrate=run_args.simrate, + dynamics_randomization=run_args.dyn_random, + mirror=run_args.mirror, + learn_gains=run_args.learn_gains, + reward=run_args.reward, + history=run_args.history, + no_delta=run_args.no_delta, + traj=run_args.traj, + ik_baseline=run_args.ik_baseline + )() + + if args.debug: + env.debug = True + + if args.terrain is not None and ".npy" in args.terrain: + env.sim = CassieSim("cassie_hfield.xml") + hfield_data = np.load(os.path.join("./cassie/cassiemujoco/terrains/", args.terrain)) + env.sim.set_hfield_data(hfield_data.flatten()) + + print(env.reward_func) + print() + + if hasattr(policy, 'init_hidden_state'): + policy.init_hidden_state() + + old_settings = termios.tcgetattr(sys.stdin) + + orient_add = 0 + + slowmo = False + + if visualize: + env.render() + render_state = True + try: + tty.setcbreak(sys.stdin.fileno()) + + state = env.reset_for_test() + done = False + timesteps = 0 + eval_reward = 0 + speed = 0.0 + side_speed = 0.0 + + env.update_speed(speed, side_speed) + + while render_state: + + if isData(): + c = sys.stdin.read(1) + if c == 'w': + speed += 0.1 + elif c == 's': + speed -= 0.1 + elif c == 'd': + side_speed += 0.02 + elif c == 'a': + side_speed -= 0. + elif c == 'j': + env.phase_add += .1 + # print("Increasing frequency to: {:.1f}".format(env.phase_add)) + elif c == 'h': + env.phase_add -= .1 + # print("Decreasing frequency to: {:.1f}".format(env.phase_add)) + elif c == 'l': + orient_add -= .1 + # print("Increasing orient_add to: ", orient_add) + elif c == 'k': + orient_add += .1 + # print("Decreasing orient_add to: ", orient_add) + + elif c == 'x': + env.swing_duration += .01 + print_input_update(env) + elif c == 'z': + env.swing_duration -= .01 + print_input_update(env) + elif c == 'v': + env.stance_duration += .01 + print_input_update(env) + elif c == 'c': + env.stance_duration -= .01 + print_input_update(env) + + elif c == '1': + env.stance_mode = "zero" + print_input_update(env) + elif c == '2': + env.stance_mode = "grounded" + print_input_update(env) + elif c == '3': + env.stance_mode = "aerial" + + elif c == 'r': + state = env.reset() + speed = env.speed + if hasattr(policy, 'init_hidden_state'): + policy.init_hidden_state() + print("Resetting environment via env.reset()") + elif c == 'p': + push = 100 + push_dir = 2 + force_arr = np.zeros(6) + force_arr[push_dir] = push + env.sim.apply_force(force_arr) + elif c == 't': + slowmo = not slowmo + print("Slowmo : \n", slowmo) + + env.update_speed(speed, side_speed) + # print(speed) + + if self.live_plot: + send((env.swing_duration, env.stance_duration, env.strict_relaxer, env.stance_mode, env.have_incentive)) + + if args.stats: + print(f"act spd: {env.sim.qvel()[0]:+.2f} cmd speed: {env.speed:+.2f} cmd_sd_spd: {env.side_speed:+.2f} phase add: {env.phase_add:.2f} orient add: {orient_add:+.2f}", end="\r") + + if hasattr(env, 'simrate'): + start = time.time() + + if (not env.vis.ispaused()): + # Update Orientation + env.orient_add = orient_add + + action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() + + state, reward, done, _ = env.step(action) + + eval_reward += reward + timesteps += 1 + + if visualize: + render_state = env.render() + if hasattr(env, 'simrate'): + end = time.time() + delaytime = max(0, env.simrate / 2000 - (end-start)) + if slowmo: + while(time.time() - end < delaytime*10): + env.render() + time.sleep(delaytime) + else: + time.sleep(delaytime) + + print("Eval reward: ", eval_reward) + + finally: + termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) \ No newline at end of file diff --git a/util/log.py b/util/log.py new file mode 100644 index 00000000..e4285e1e --- /dev/null +++ b/util/log.py @@ -0,0 +1,91 @@ +from collections import OrderedDict +import hashlib, os, pickle + +class color: + BOLD = '\033[1m\033[48m' + END = '\033[0m' + ORANGE = '\033[38;5;202m' + BLACK = '\033[38;5;240m' + +# Logger stores in trained_models by default +def create_logger(args): + from torch.utils.tensorboard import SummaryWriter + """Use hyperparms to set a directory to output diagnostic files.""" + + arg_dict = args.__dict__ + assert "seed" in arg_dict, \ + "You must provide a 'seed' key in your command line arguments" + assert "logdir" in arg_dict, \ + "You must provide a 'logdir' key in your command line arguments." + assert "env_name" in arg_dict, \ + "You must provide a 'env_name' key in your command line arguments." + + # sort the keys so the same hyperparameters will always have the same hash + arg_dict = OrderedDict(sorted(arg_dict.items(), key=lambda t: t[0])) + + # remove seed so it doesn't get hashed, store value for filename + # same for logging directory + run_name = arg_dict.pop('run_name') + seed = str(arg_dict.pop("seed")) + logdir = str(arg_dict.pop('logdir')) + env_name = str(arg_dict['env_name']) + + # see if this run has a unique name, if so then that is going to be the name of the folder, even if it overrirdes + if run_name is not None: + logdir = os.path.join(logdir, env_name) + output_dir = os.path.join(logdir, run_name) + else: + # see if we are resuming a previous run, if we are mark as continued + if args.previous is not None: + if args.exchange_reward is not None: + output_dir = args.previous[0:-1] + "_NEW-" + args.reward + else: + print(args.previous[0:-1]) + output_dir = args.previous[0:-1] + '-cont' + else: + # get a unique hash for the hyperparameter settings, truncated at 10 chars + arg_hash = hashlib.md5(str(arg_dict).encode('ascii')).hexdigest()[0:6] + '-seed' + seed + logdir = os.path.join(logdir, env_name) + output_dir = os.path.join(logdir, arg_hash) + + # create a directory with the hyperparm hash as its name, if it doesn't + # already exist. + os.makedirs(output_dir, exist_ok=True) + + # Create a file with all the hyperparam settings in human-readable plaintext, + # also pickle file for resuming training easily + info_path = os.path.join(output_dir, "experiment.info") + pkl_path = os.path.join(output_dir, "experiment.pkl") + with open(pkl_path, 'wb') as file: + pickle.dump(args, file) + with open(info_path, 'w') as file: + for key, val in arg_dict.items(): + file.write("%s: %s" % (key, val)) + file.write('\n') + + logger = SummaryWriter(output_dir, flush_secs=0.1) # flush_secs=0.1 actually slows down quite a bit, even on parallelized set ups + print("Logging to " + color.BOLD + color.ORANGE + str(output_dir) + color.END) + + logger.dir = output_dir + return logger + +# Rule for curriculum learning is that env observation space should be the same (so attributes like env.clock_based or env.state_est shouldn't be different and are forced to be same here) +# deal with loading hyperparameters of previous run continuation +def parse_previous(args): + if args.previous is not None: + run_args = pickle.load(open(args.previous + "experiment.pkl", "rb")) + args.recurrent = run_args.recurrent + args.env_name = run_args.env_name + args.command_profile = run_args.command_profile + args.input_profile = run_args.input_profile + args.learn_gains = run_args.learn_gains + args.traj = run_args.traj + args.no_delta = run_args.no_delta + args.ik_baseline = run_args.ik_baseline + if args.exchange_reward is not None: + args.reward = args.exchange_reward + args.run_name = run_args.run_name + "_NEW-" + args.reward + else: + args.reward = run_args.reward + args.run_name = run_args.run_name + "--cont" + return args diff --git a/util/logo.py b/util/logo.py new file mode 100644 index 00000000..a4ea363d --- /dev/null +++ b/util/logo.py @@ -0,0 +1,22 @@ +class color: + BOLD = '\033[1m\033[48m' + END = '\033[0m' + ORANGE = '\033[38;5;202m' + BLACK = '\033[38;5;240m' + + +def print_logo(subtitle="", option=2): + print() + print(color.BOLD + color.ORANGE + " .8. " + color.BLACK + " 8 888888888o " + color.ORANGE + "8 8888888888 `8.`8888. ,8' ") + print(color.BOLD + color.ORANGE + " .888. " + color.BLACK + " 8 8888 `88. " + color.ORANGE + "8 8888 `8.`8888. ,8' ") + print(color.BOLD + color.ORANGE + " :88888. " + color.BLACK + " 8 8888 `88 " + color.ORANGE + "8 8888 `8.`8888. ,8' ") + print(color.BOLD + color.ORANGE + " . `88888. " + color.BLACK + " 8 8888 ,88 " + color.ORANGE + "8 8888 `8.`8888.,8' ") + print(color.BOLD + color.ORANGE + " .8. `88888. " + color.BLACK + " 8 8888. ,88' " + color.ORANGE + "8 888888888888 `8.`88888' ") + print(color.BOLD + color.ORANGE + " .8`8. `88888. " + color.BLACK + " 8 888888888P' " + color.ORANGE + "8 8888 .88.`8888. ") + print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8'`8.`8888. ") + print(color.BOLD + color.ORANGE + " .8' `8. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ") + print(color.BOLD + color.ORANGE + " .888888888. `88888. " + color.BLACK + " 8 8888 " + color.ORANGE + "8 8888 .8' `8.`8888. ") + print(color.BOLD + color.ORANGE + ".8' `8. `88888." + color.BLACK + " 8 8888 " + color.ORANGE + "8 888888888888 .8' `8.`8888. " + color.END) + print("\n") + print(subtitle) + print("\n")