Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed q-learn error in select_action() #167

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions rl_studio/agents/auto_carla/train_followlane_qlearn_carla.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ def main(self):
log = LoggingHandler(self.log_file)
env = gym.make(self.env_params.env_name, **self.environment.environment)

start_time = datetime.now()
best_epoch = 1
current_max_reward = 0
best_step = 0
best_epoch_training_time = 0

epsilon = self.environment.environment["epsilon"]
epsilon_decay = epsilon / (self.env_params.total_episodes)
# epsilon = epsilon / 2
Expand Down Expand Up @@ -187,7 +193,41 @@ def main(self):
state=state,
)
env.display_manager.render()
# render params
"""
render_params(
action=action,
episode=episode,
step=step,
v=self.global_params.actions_set[action][
0
], # this case for discrete
w=self.global_params.actions_set[action][
1
], # this case for discrete
epsilon=epsilon,
observation=observation,
reward_in_step=reward,
cumulated_reward=cumulated_reward,
current_max_reward=current_max_reward,
done=done,
)
"""
# End epoch
if step > self.env_params.estimated_steps:
done = True
np.save(
f"{self.global_params.models_dir}/{time.strftime('%Y%m%d-%H%M%S')}_Circuit-{self.environment.environment['circuit_name']}_States-{self.environment.environment['states']}_Actions-{self.environment.environment['action_space']}_Rewards-{self.environment.environment['reward_function']}_epsilon-{round(epsilon,3)}_epoch-{episode}_step-{step}_reward-{int(cumulated_reward)}-qtable.npy",
qlearn.q_table,
)

# updating epsilon for exploration
if epsilon > self.environment.environment["epsilon_min"]:
# self.epsilon *= self.epsilon_discount
epsilon -= epsilon_decay
epsilon = qlearn.update_epsilon(
max(self.environment.environment["epsilon_min"], epsilon)
)
## ------------ destroy actors

env.destroy_all_actors()
Expand Down
7 changes: 0 additions & 7 deletions rl_studio/agents/f1/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,12 +547,6 @@ def __init__(self, config) -> None:
self.environment["save_every_step"] = config[self.environment_set][self.env][
"save_every_step"
]
self.environment["init_pose"] = config[self.environment_set][self.env][
"init_pose"
]
self.environment["goal_pose"] = config[self.environment_set][self.env][
"goal_pose"
]
self.environment["filter"] = config[self.environment_set][self.env]["filter"]
self.environment["generation"] = config[self.environment_set][self.env][
"generation"
Expand All @@ -577,7 +571,6 @@ def __init__(self, config) -> None:
self.environment["waypoints_road_id"] = config[self.environment_set][self.env][
"waypoints_road_id"
]

# --------- Image
self.environment["height_image"] = config["agents"][self.agent][
"camera_params"
Expand Down
27 changes: 18 additions & 9 deletions rl_studio/algorithms/qlearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@


class QLearnF1:
"""table based
Gazebo
"""

def __init__(
self, states_len, actions, actions_len, epsilon, alpha, gamma, num_regions
):
Expand Down Expand Up @@ -135,6 +139,10 @@ def save_model(


class QLearn:
"""original dict based
Gazebo
"""

def __init__(self, actions, epsilon=0.99, alpha=0.8, gamma=0.9):
self.q = {}
self.epsilon = epsilon # exploration constant
Expand Down Expand Up @@ -300,6 +308,8 @@ def updateEpsilon(self, epsilon):


class QLearnCarla:
"""dict based"""

def __init__(self, actions, epsilon, alpha, gamma):
self.q_table = {}
self.epsilon = epsilon # exploration constant
Expand All @@ -312,17 +322,16 @@ def __init__(self, actions, epsilon, alpha, gamma):

def select_action(self, state):
q_list = [self.q_table.get((state, a), 0.0) for a in self.actions]
# print(f"\n{q_list = }")
print(f"\n{q_list = }")
max_q = max(q_list)
# print(f"{max_q}")
print(f"{max_q = }")
count_max = q_list.count(max_q)
# print(f"{count_max}")
if count_max > 1:
best = [i for i in range(len(q_list)) if q_list[i] == max_q]
max_q = random.choice(best)
# print(f"{len(self.actions)=}")
if np.random.random() > self.epsilon:
action = q_list.index(max_q)
print(f"{count_max= }")
best_index = [index for index, value in enumerate(q_list) if value == max_q]

if count_max > 1 or np.random.random() > self.epsilon:
# best = [i for i in range(len(q_list)) if q_list[i] == max_q]
action = random.choice(best_index)
else:
action = np.random.randint(0, len(self.actions))

Expand Down
6 changes: 2 additions & 4 deletions rl_studio/config/config_training_followlane_qlearn_carla.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ inference:
algorithm:
qlearn:
alpha: 0.7
epsilon: 0.95
epsilon: 0.3
epsilon_min: 0.05
gamma: 0.9

Expand Down Expand Up @@ -115,11 +115,9 @@ carla_environments:
traffic_pedestrians: False
city_lights: False
car_lights: False
estimated_steps: 5
estimated_steps: 100
save_episodes: 10
save_every_step: 1000
init_pose:
goal_pose:
filter: vehicle.*
generation: "2"
rolename: "hero" #name
Expand Down
159 changes: 43 additions & 116 deletions rl_studio/envs/carla/followlane/followlane_qlearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,18 @@

from rl_studio.agents.utils import (
print_messages,
render_params,
)
from rl_studio.envs.carla.followlane.followlane_env import FollowLaneEnv
from rl_studio.envs.carla.followlane.settings import FollowLaneCarlaConfig
from rl_studio.envs.carla.followlane.utils import AutoCarlaUtils

from rl_studio.envs.carla.utils.bounding_boxes import BasicSynchronousClient
from rl_studio.envs.carla.utils.visualize_multiple_sensors import (
DisplayManager,
SensorManager,
CustomTimer,
)
import pygame
from rl_studio.envs.carla.utils.global_route_planner import (
GlobalRoutePlanner,
)


class FollowLaneQlearnStaticWeatherNoTraffic(FollowLaneEnv):
Expand Down Expand Up @@ -62,11 +59,6 @@ def __init__(self, **config):
self.traffic_manager.set_synchronous_mode(False)
self.world.apply_settings(settings)

# self.camera = None
# self.vehicle = None
# self.display = None
# self.image = None

## -- display manager
self.display_manager = DisplayManager(
grid_size=[2, 3],
Expand All @@ -78,6 +70,9 @@ def __init__(self, **config):
self.perfect_distance_pixels = None
self.perfect_distance_normalized = None

self._control = carla.VehicleControl()
self.params = {}

def reset(self):

self.collision_hist = []
Expand Down Expand Up @@ -126,92 +121,6 @@ def reset(self):
## --- Sensor collision
self.setup_col_sensor()

## --- Cameras
# self.camera_spectator = SensorManager(
# self.world,
# self.display_manager,
# "RGBCamera",
# carla.Transform(carla.Location(x=-5, z=2.8), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[0, 0],
# )
# self.camera_spectator_segmentated = SensorManager(
# self.world,
# self.display_manager,
# "SemanticCamera",
# carla.Transform(carla.Location(x=-5, z=2.8), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[1, 0],
# )
# self.sergio_camera_spectator = SensorManager(
# self.world,
# self.display_manager,
# "SemanticCameraSergio",
# carla.Transform(carla.Location(x=-5, z=2.8), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[2, 0],
# )
# self.front_camera = SensorManager(
# self.world,
# self.display_manager,
# "RGBCamera",
# carla.Transform(carla.Location(x=2, z=1), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[0, 1],
# )

# self.front_camera_segmentated = SensorManager(
# self.world,
# self.display_manager,
# "SemanticCamera",
# carla.Transform(carla.Location(x=2, z=1), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[1, 1],
# )

# self.sergio_front_camera = SensorManager(
# self.world,
# self.display_manager,
# "SemanticCameraSergio",
# carla.Transform(carla.Location(x=2, z=1), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[2, 1],
# )
# self.front_camera_mas_baja = SensorManager(
# self.world,
# self.display_manager,
# "RGBCamera",
# carla.Transform(carla.Location(x=2, z=0.5), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[0, 2],
# )
# self.front_camera_mas_baja_segmentated = SensorManager(
# self.world,
# self.display_manager,
# "SemanticCamera",
# carla.Transform(carla.Location(x=2, z=0.5), carla.Rotation(yaw=+00)),
# self.car,
# {},
# display_pos=[1, 2],
# )

# self.sergio_front_camera_mas_baja = SensorManager(
# self.world,
# self.display_manager,
# "SemanticCameraSergio",
# carla.Transform(carla.Location(x=2, z=0.5), carla.Rotation(yaw=+0)),
# self.car,
# {},
# display_pos=[2, 2],
# )

self.front_camera_1_5_bev = SensorManager(
self.world,
self.display_manager,
Expand Down Expand Up @@ -434,7 +343,7 @@ def step(self, action):
# print(f"=============== STEP ===================")

### -------- send action
params = self.control(action)
self.control(action)
# print(f"params = {params}")

# params["pos"] = 270
Expand All @@ -443,6 +352,16 @@ def step(self, action):
# stados = [stados]
# print(f"stados = {stados}")

render_params(
speed=self.params["speed"],
steering_angle=self.params["steering_angle"],
Steer=self.params["Steer"],
location=self.params["location"],
Throttle=self.params["Throttle"],
Brake=self.params["Brake"],
height=self.params["height"],
)

## -- states
mask = self.preprocess_image(
self.front_camera_1_5_red_mask.front_camera_red_mask
Expand Down Expand Up @@ -479,7 +398,7 @@ def step(self, action):
states_16 = counter_states.get(16)

## -------- Rewards
reward = self.rewards_easy(error, params)
reward = self.rewards_easy(error, self.params)

## ----- hacemos la salida
done = False
Expand Down Expand Up @@ -514,32 +433,40 @@ def control(self, action):

steering_angle = 0
if action == 0:
self.car.apply_control(carla.VehicleControl(throttle=1, steer=-0.2))
# self.car.apply_control(carla.VehicleControl(throttle=1, steer=-0.2))
self._control.throttle = min(self._control.throttle + 0.1, 1.00)
self._control.steer = -0.2
steering_angle = -0.2
elif action == 1:
self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.0))
# self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.0))
self._control.throttle = min(self._control.throttle + 0.1, 1.00)
self._control.steer = 0.0
steering_angle = 0
elif action == 2:
self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.2))
# self.car.apply_control(carla.VehicleControl(throttle=1, steer=0.2))
self._control.throttle = min(self._control.throttle + 0.1, 1.00)
self._control.steer = 0.2
steering_angle = 0.2
# elif action == 3:
# self.vehicle.apply_control(carla.VehicleControl(throttle=0.2, steer=-0.4))
# steering_angle = 0.4
# elif action == 4:
# self.vehicle.apply_control(carla.VehicleControl(throttle=0.2, steer=0.4))
# steering_angle = 0.4
else:
print("error in action")
pass
params = {}

v = self.car.get_velocity()
params["velocity"] = math.sqrt(v.x**2 + v.y**2 + v.z**2)
self.car.apply_control(self._control)

t = self.car.get_transform()
v = self.car.get_velocity()
c = self.car.get_control()
w = self.car.get_angular_velocity()
params["steering_angle"] = steering_angle
self.params["speed"] = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
self.params["steering_angle"] = w
print(f"{self.params['steering_angle'].x = }")
print(f"{self.params['steering_angle'].y = }")
print(f"{self.params['steering_angle'].z = }")

return params
self.params["location"] = (t.location.x, t.location.y)
self.params["Throttle"] = c.throttle
self.params["Steer"] = c.steer
self.params["Brake"] = c.brake
self.params["height"] = t.location.z

# return self.params

def rewards_followlane_dist_v_angle(self, error, params):
# rewards = []
Expand Down Expand Up @@ -572,8 +499,8 @@ def rewards_easy(self, error, params):
rewards.append(-100)

function_reward = sum(rewards) / len(rewards)
function_reward += params["velocity"] * 0.5
function_reward -= params["steering_angle"] * 1.02
function_reward += params["speed"] * 0.5
# function_reward -= params["steering_angle"] * 1.02

return function_reward

Expand Down
Loading