From c824a326f2d19bec53d66978ecfe618ec4cc911b Mon Sep 17 00:00:00 2001 From: pjfernandecabo Date: Sat, 4 Mar 2023 17:22:07 -0500 Subject: [PATCH] fixed q-learn error in select_action() --- .../train_followlane_qlearn_carla.py | 40 +++++ rl_studio/agents/f1/loaders.py | 7 - rl_studio/algorithms/qlearn.py | 27 ++- ...nfig_training_followlane_qlearn_carla.yaml | 6 +- .../carla/followlane/followlane_qlearn.py | 159 +++++------------- .../carla/utils/visualize_multiple_sensors.py | 2 + 6 files changed, 105 insertions(+), 136 deletions(-) diff --git a/rl_studio/agents/auto_carla/train_followlane_qlearn_carla.py b/rl_studio/agents/auto_carla/train_followlane_qlearn_carla.py index 945c50b91..5713f764b 100755 --- a/rl_studio/agents/auto_carla/train_followlane_qlearn_carla.py +++ b/rl_studio/agents/auto_carla/train_followlane_qlearn_carla.py @@ -96,6 +96,12 @@ def main(self): log = LoggingHandler(self.log_file) env = gym.make(self.env_params.env_name, **self.environment.environment) + start_time = datetime.now() + best_epoch = 1 + current_max_reward = 0 + best_step = 0 + best_epoch_training_time = 0 + epsilon = self.environment.environment["epsilon"] epsilon_decay = epsilon / (self.env_params.total_episodes) # epsilon = epsilon / 2 @@ -187,7 +193,41 @@ def main(self): state=state, ) env.display_manager.render() + # render params + """ + render_params( + action=action, + episode=episode, + step=step, + v=self.global_params.actions_set[action][ + 0 + ], # this case for discrete + w=self.global_params.actions_set[action][ + 1 + ], # this case for discrete + epsilon=epsilon, + observation=observation, + reward_in_step=reward, + cumulated_reward=cumulated_reward, + current_max_reward=current_max_reward, + done=done, + ) + """ + # End epoch + if step > self.env_params.estimated_steps: + done = True + np.save( + f"{self.global_params.models_dir}/{time.strftime('%Y%m%d-%H%M%S')}_Circuit-{self.environment.environment['circuit_name']}_States-{self.environment.environment['states']}_Actions-{self.environment.environment['action_space']}_Rewards-{self.environment.environment['reward_function']}_epsilon-{round(epsilon,3)}_epoch-{episode}_step-{step}_reward-{int(cumulated_reward)}-qtable.npy", + qlearn.q_table, + ) + # updating epsilon for exploration + if epsilon > self.environment.environment["epsilon_min"]: + # self.epsilon *= self.epsilon_discount + epsilon -= epsilon_decay + epsilon = qlearn.update_epsilon( + max(self.environment.environment["epsilon_min"], epsilon) + ) ## ------------ destroy actors env.destroy_all_actors() diff --git a/rl_studio/agents/f1/loaders.py b/rl_studio/agents/f1/loaders.py index fbecbc348..5596199bb 100644 --- a/rl_studio/agents/f1/loaders.py +++ b/rl_studio/agents/f1/loaders.py @@ -547,12 +547,6 @@ def __init__(self, config) -> None: self.environment["save_every_step"] = config[self.environment_set][self.env][ "save_every_step" ] - self.environment["init_pose"] = config[self.environment_set][self.env][ - "init_pose" - ] - self.environment["goal_pose"] = config[self.environment_set][self.env][ - "goal_pose" - ] self.environment["filter"] = config[self.environment_set][self.env]["filter"] self.environment["generation"] = config[self.environment_set][self.env][ "generation" @@ -577,7 +571,6 @@ def __init__(self, config) -> None: self.environment["waypoints_road_id"] = config[self.environment_set][self.env][ "waypoints_road_id" ] - # --------- Image self.environment["height_image"] = config["agents"][self.agent][ "camera_params" diff --git a/rl_studio/algorithms/qlearn.py b/rl_studio/algorithms/qlearn.py index f8284ffa0..bf05abc41 100644 --- a/rl_studio/algorithms/qlearn.py +++ b/rl_studio/algorithms/qlearn.py @@ -7,6 +7,10 @@ class QLearnF1: + """table based + Gazebo + """ + def __init__( self, states_len, actions, actions_len, epsilon, alpha, gamma, num_regions ): @@ -135,6 +139,10 @@ def save_model( class QLearn: + """original dict based + Gazebo + """ + def __init__(self, actions, epsilon=0.99, alpha=0.8, gamma=0.9): self.q = {} self.epsilon = epsilon # exploration constant @@ -300,6 +308,8 @@ def updateEpsilon(self, epsilon): class QLearnCarla: + """dict based""" + def __init__(self, actions, epsilon, alpha, gamma): self.q_table = {} self.epsilon = epsilon # exploration constant @@ -312,17 +322,16 @@ def __init__(self, actions, epsilon, alpha, gamma): def select_action(self, state): q_list = [self.q_table.get((state, a), 0.0) for a in self.actions] - # print(f"\n{q_list = }") + print(f"\n{q_list = }") max_q = max(q_list) - # print(f"{max_q}") + print(f"{max_q = }") count_max = q_list.count(max_q) - # print(f"{count_max}") - if count_max > 1: - best = [i for i in range(len(q_list)) if q_list[i] == max_q] - max_q = random.choice(best) - # print(f"{len(self.actions)=}") - if np.random.random() > self.epsilon: - action = q_list.index(max_q) + print(f"{count_max= }") + best_index = [index for index, value in enumerate(q_list) if value == max_q] + + if count_max > 1 or np.random.random() > self.epsilon: + # best = [i for i in range(len(q_list)) if q_list[i] == max_q] + action = random.choice(best_index) else: action = np.random.randint(0, len(self.actions)) diff --git a/rl_studio/config/config_training_followlane_qlearn_carla.yaml b/rl_studio/config/config_training_followlane_qlearn_carla.yaml index 9ec99d912..6e0a9036d 100644 --- a/rl_studio/config/config_training_followlane_qlearn_carla.yaml +++ b/rl_studio/config/config_training_followlane_qlearn_carla.yaml @@ -36,7 +36,7 @@ inference: algorithm: qlearn: alpha: 0.7 - epsilon: 0.95 + epsilon: 0.3 epsilon_min: 0.05 gamma: 0.9 @@ -115,11 +115,9 @@ carla_environments: traffic_pedestrians: False city_lights: False car_lights: False - estimated_steps: 5 + estimated_steps: 100 save_episodes: 10 save_every_step: 1000 - init_pose: - goal_pose: filter: vehicle.* generation: "2" rolename: "hero" #name diff --git a/rl_studio/envs/carla/followlane/followlane_qlearn.py b/rl_studio/envs/carla/followlane/followlane_qlearn.py index 108ab1d4c..c1e16fa40 100644 --- a/rl_studio/envs/carla/followlane/followlane_qlearn.py +++ b/rl_studio/envs/carla/followlane/followlane_qlearn.py @@ -14,21 +14,18 @@ from rl_studio.agents.utils import ( print_messages, + render_params, ) from rl_studio.envs.carla.followlane.followlane_env import FollowLaneEnv from rl_studio.envs.carla.followlane.settings import FollowLaneCarlaConfig from rl_studio.envs.carla.followlane.utils import AutoCarlaUtils -from rl_studio.envs.carla.utils.bounding_boxes import BasicSynchronousClient from rl_studio.envs.carla.utils.visualize_multiple_sensors import ( DisplayManager, SensorManager, CustomTimer, ) import pygame -from rl_studio.envs.carla.utils.global_route_planner import ( - GlobalRoutePlanner, -) class FollowLaneQlearnStaticWeatherNoTraffic(FollowLaneEnv): @@ -62,11 +59,6 @@ def __init__(self, **config): self.traffic_manager.set_synchronous_mode(False) self.world.apply_settings(settings) - # self.camera = None - # self.vehicle = None - # self.display = None - # self.image = None - ## -- display manager self.display_manager = DisplayManager( grid_size=[2, 3], @@ -78,6 +70,9 @@ def __init__(self, **config): self.perfect_distance_pixels = None self.perfect_distance_normalized = None + self._control = carla.VehicleControl() + self.params = {} + def reset(self): self.collision_hist = [] @@ -126,92 +121,6 @@ def reset(self): ## --- Sensor collision self.setup_col_sensor() - ## --- Cameras - # self.camera_spectator = SensorManager( - # self.world, - # self.display_manager, - # "RGBCamera", - # carla.Transform(carla.Location(x=-5, z=2.8), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[0, 0], - # ) - # self.camera_spectator_segmentated = SensorManager( - # self.world, - # self.display_manager, - # "SemanticCamera", - # carla.Transform(carla.Location(x=-5, z=2.8), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[1, 0], - # ) - # self.sergio_camera_spectator = SensorManager( - # self.world, - # self.display_manager, - # "SemanticCameraSergio", - # carla.Transform(carla.Location(x=-5, z=2.8), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[2, 0], - # ) - # self.front_camera = SensorManager( - # self.world, - # self.display_manager, - # "RGBCamera", - # carla.Transform(carla.Location(x=2, z=1), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[0, 1], - # ) - - # self.front_camera_segmentated = SensorManager( - # self.world, - # self.display_manager, - # "SemanticCamera", - # carla.Transform(carla.Location(x=2, z=1), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[1, 1], - # ) - - # self.sergio_front_camera = SensorManager( - # self.world, - # self.display_manager, - # "SemanticCameraSergio", - # carla.Transform(carla.Location(x=2, z=1), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[2, 1], - # ) - # self.front_camera_mas_baja = SensorManager( - # self.world, - # self.display_manager, - # "RGBCamera", - # carla.Transform(carla.Location(x=2, z=0.5), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[0, 2], - # ) - # self.front_camera_mas_baja_segmentated = SensorManager( - # self.world, - # self.display_manager, - # "SemanticCamera", - # carla.Transform(carla.Location(x=2, z=0.5), carla.Rotation(yaw=+00)), - # self.car, - # {}, - # display_pos=[1, 2], - # ) - - # self.sergio_front_camera_mas_baja = SensorManager( - # self.world, - # self.display_manager, - # "SemanticCameraSergio", - # carla.Transform(carla.Location(x=2, z=0.5), carla.Rotation(yaw=+0)), - # self.car, - # {}, - # display_pos=[2, 2], - # ) - self.front_camera_1_5_bev = SensorManager( self.world, self.display_manager, @@ -434,7 +343,7 @@ def step(self, action): # print(f"=============== STEP ===================") ### -------- send action - params = self.control(action) + self.control(action) # print(f"params = {params}") # params["pos"] = 270 @@ -443,6 +352,16 @@ def step(self, action): # stados = [stados] # print(f"stados = {stados}") + render_params( + speed=self.params["speed"], + steering_angle=self.params["steering_angle"], + Steer=self.params["Steer"], + location=self.params["location"], + Throttle=self.params["Throttle"], + Brake=self.params["Brake"], + height=self.params["height"], + ) + ## -- states mask = self.preprocess_image( self.front_camera_1_5_red_mask.front_camera_red_mask @@ -479,7 +398,7 @@ def step(self, action): states_16 = counter_states.get(16) ## -------- Rewards - reward = self.rewards_easy(error, params) + reward = self.rewards_easy(error, self.params) ## ----- hacemos la salida done = False @@ -514,32 +433,40 @@ def control(self, action): steering_angle = 0 if action == 0: - self.car.apply_control(carla.VehicleControl(throttle=1, steer=-0.2)) + # self.car.apply_control(carla.VehicleControl(throttle=1, steer=-0.2)) + self._control.throttle = min(self._control.throttle + 0.1, 1.00) + self._control.steer = -0.2 steering_angle = -0.2 elif action == 1: - self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.0)) + # self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.0)) + self._control.throttle = min(self._control.throttle + 0.1, 1.00) + self._control.steer = 0.0 steering_angle = 0 elif action == 2: - self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.2)) + # self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.2)) + self._control.throttle = min(self._control.throttle + 0.1, 1.00) + self._control.steer = 0.2 steering_angle = 0.2 - # elif action == 3: - # self.vehicle.apply_control(carla.VehicleControl(throttle=0.2, steer=-0.4)) - # steering_angle = 0.4 - # elif action == 4: - # self.vehicle.apply_control(carla.VehicleControl(throttle=0.2, steer=0.4)) - # steering_angle = 0.4 - else: - print("error in action") - pass - params = {} - v = self.car.get_velocity() - params["velocity"] = math.sqrt(v.x**2 + v.y**2 + v.z**2) + self.car.apply_control(self._control) + t = self.car.get_transform() + v = self.car.get_velocity() + c = self.car.get_control() w = self.car.get_angular_velocity() - params["steering_angle"] = steering_angle + self.params["speed"] = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2)) + self.params["steering_angle"] = w + print(f"{self.params['steering_angle'].x = }") + print(f"{self.params['steering_angle'].y = }") + print(f"{self.params['steering_angle'].z = }") - return params + self.params["location"] = (t.location.x, t.location.y) + self.params["Throttle"] = c.throttle + self.params["Steer"] = c.steer + self.params["Brake"] = c.brake + self.params["height"] = t.location.z + + # return self.params def rewards_followlane_dist_v_angle(self, error, params): # rewards = [] @@ -572,8 +499,8 @@ def rewards_easy(self, error, params): rewards.append(-100) function_reward = sum(rewards) / len(rewards) - function_reward += params["velocity"] * 0.5 - function_reward -= params["steering_angle"] * 1.02 + function_reward += params["speed"] * 0.5 + # function_reward -= params["steering_angle"] * 1.02 return function_reward diff --git a/rl_studio/envs/carla/utils/visualize_multiple_sensors.py b/rl_studio/envs/carla/utils/visualize_multiple_sensors.py index 9ddabf54a..b726b87e7 100644 --- a/rl_studio/envs/carla/utils/visualize_multiple_sensors.py +++ b/rl_studio/envs/carla/utils/visualize_multiple_sensors.py @@ -46,6 +46,7 @@ def time(self): class DisplayManager: def __init__(self, grid_size, window_size): + pygame.init() pygame.font.init() self.display = pygame.display.set_mode( @@ -56,6 +57,7 @@ def __init__(self, grid_size, window_size): self.window_size = window_size self.sensor_list = [] # self.actor_list = [] + # self.visualize = visualize def get_window_size(self): return [int(self.window_size[0]), int(self.window_size[1])]