-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRL_Implementation.py
203 lines (164 loc) · 7.99 KB
/
RL_Implementation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import numpy as np
import tensorflow as tf
import random
import traci
from collections import deque
import csv
import matplotlib.pyplot as plt
import math
# Define the DQN Agent
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # Discount factor
self.epsilon = 1.0 # Exploration rate
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()
def _build_model(self):
model = tf.keras.Sequential([
tf.keras.layers.Dense(24, input_dim=self.state_size, activation='relu'),#input as state size
tf.keras.layers.Dense(24, activation='relu'),#hidden layer
tf.keras.layers.Dense(self.action_size, activation='linear')#output layer
])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse')
return model
def act(self, state):#epsilon greedy policy
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state, verbose=0)
return np.argmax(act_values[0])
def replay(self, batch_size):
# Sample a minibatch of experiences from memory
minibatch = random.sample(self.memory, batch_size)
# Prepare batches of states and next states
states = np.vstack([sample[0] for sample in minibatch]) # Extract states
next_states = np.vstack([sample[3] for sample in minibatch]) # Extract next states
# Predict Q-values for current states and next states in a batch
target_f = self.model.predict(states, verbose=0)
next_target = self.model.predict(next_states, verbose=0)
for i, (state, action, reward, next_state, done) in enumerate(minibatch):
# Compute Q-learning target
target = reward if done else reward + self.gamma * np.amax(next_target[i])
target_f[i][action] = target # Update Q-value for the selected action
# Train the model using the batch of states and updated Q-values
self.model.fit(states, target_f, epochs=1, verbose=0, batch_size=batch_size)
# Decay the exploration rate
if self.epsilon > self.epsilon_min:
self.epsilon *= math.exp(math.log(self.epsilon_min / self.epsilon) / 100)
# functions to interact with SUMO
def get_traffic_state():
lanes = traci.lane.getIDList() # Get all lanes in the simulation
state = []
for lane in lanes:
num_vehicles = traci.lane.getLastStepVehicleNumber(lane)
waiting_time = traci.lane.getWaitingTime(lane)
traffic_light_phase = traci.trafficlight.getPhase("J1") # Junction_ID
state.extend([num_vehicles, waiting_time, traffic_light_phase])
return state
def apply_action(action):#apply action to the traffic light
traffic_light_id = "J1"
if action == 0:
traci.trafficlight.setPhase(traffic_light_id, 0)
elif action == 1:
traci.trafficlight.setPhase(traffic_light_id, 1)
elif action == 2:
traci.trafficlight.setPhase(traffic_light_id, 2)
elif action == 3:
traci.trafficlight.setPhase(traffic_light_id, 3)
def compute_reward():
total_waiting_time = sum(traci.lane.getWaitingTime(lane) for lane in traci.lane.getIDList())
return -total_waiting_time # Negative reward for waiting time at the intersection
def track_co2_emissions():
lanes = traci.lane.getIDList() # Get all lanes in the simulation
total_co2_stopped = 0 # Initialize the total CO2 emissions for stopped vehicles
for lane in lanes:
# Get the IDs of all vehicles in the lane at this simulation step
vehicle_ids = traci.lane.getLastStepVehicleIDs(lane)
for vehicle_id in vehicle_ids:
# Get the speed of the vehicle
vehicle_speed = traci.vehicle.getSpeed(vehicle_id)
# If the vehicle is stopped, calculate the CO2 emissions
if vehicle_speed == 0:
total_co2_stopped += traci.vehicle.getCO2Emission(vehicle_id)
return total_co2_stopped
def save_results_to_csv(filename, data):
try:
with open(filename, mode='a', newline='') as file:
writer = csv.writer(file)
writer.writerow(data)
print(f"Data saved to {filename}")
except Exception as e:
print(f"Error saving data: {e}")
def save_model(agent, filename="traffic_model.keras"):
agent.model.save(filename)
# Run the RL simulation
def run_rl_simulation():
sumo_binary = "sumo" # "sumo-gui" for GUI, "sumo" for without GUI
sumo_config = "/Users/yashraj/Library/CloudStorage/OneDrive-TechnischeHochschuleIngolstadt/THI/Academics/Sem 3/General Elective/Smart Mobility/My Paper/New/RL--based-Traffic-Intersection-Management/Main_Simulation.sumocfg" # Replace with your SUMO config file path
traci.start([sumo_binary, "-c", sumo_config, "--no-step-log", "true", "--log", "false"])
avg_waiting_times_per_episode = [] # To store average waiting times per episode
avg_co2_emissions_per_episode = [] # To store average CO2 emissions per episode
state_size = 108 # Update based on features from get_traffic_state
action_size = 4 # 4 actions (4 phases)
agent = DQNAgent(state_size=state_size, action_size=action_size)
episodes = 100 # Total episodes to run
for episode in range(episodes):
print(f"Starting episode {episode + 1}/{episodes}")
traci.load(["-c", sumo_config])
step = 0
state = np.array([get_traffic_state()])
episode_waiting_times = []
episode_co2_emissions = []
while step < 100:
step += 1
traci.simulationStep()
print(f"Episode {episode + 1}/{episodes}, Step {step}/100")
action = agent.act(state)
apply_action(action)
reward = compute_reward()
next_state = np.array([get_traffic_state()])
done = False
agent.memory.append((state, action, reward, next_state, done))
state = next_state
if len(agent.memory) > 32:
agent.replay(32)
# Track waiting time and CO2 emissions for the current step
total_waiting_time = sum(get_traffic_state()[1::3]) # Summing waiting times from state
total_co2_stopped = track_co2_emissions()
episode_waiting_times.append(total_waiting_time)
episode_co2_emissions.append(total_co2_stopped)
avg_waiting_time_rl = np.mean(episode_waiting_times)
avg_co2_emissions_rl = np.mean(episode_co2_emissions)
avg_waiting_times_per_episode.append(avg_waiting_time_rl)
avg_co2_emissions_per_episode.append(avg_co2_emissions_rl)
print(f"Episode {episode + 1} completed. Average waiting time: {avg_waiting_time_rl}, Average CO2 emissions: {avg_co2_emissions_rl}")
save_results_to_csv("waiting_times_rl.csv", [avg_waiting_time_rl])
save_results_to_csv("co2_emissions_rl.csv", [avg_co2_emissions_rl])
save_model(agent)
traci.close()
# Plot average waiting time
plt.plot(range(1, episodes + 1), avg_waiting_times_per_episode, marker='o', label="Waiting Time")
plt.title('Average Waiting Time per Episode_rl')
plt.xlabel('Episode')
plt.ylabel('Average Waiting Time')
plt.grid()
plt.legend()
plt.savefig("average_waiting_time_rl.png", dpi=300)
plt.show()
# Plot average CO2 emissions
plt.plot(range(1, episodes + 1), avg_co2_emissions_per_episode, marker='o', color='orange', label="CO2 Emissions")
plt.title('Average CO2 Emissions per Episode_rl')
plt.xlabel('Episode')
plt.ylabel('Average CO2 Emissions')
plt.grid()
plt.legend()
plt.savefig("average_co2_emissions_rl.png", dpi=300)
plt.show()
# Run the RL simulation
if __name__ == "__main__":
run_rl_simulation()