-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_program.py
290 lines (231 loc) · 9.38 KB
/
main_program.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# make all necessary import statements
from module import *
import tensorflow as tf
import tensorflow_lattice as tfl
from keras import layers
import tf_agents as tfa
from tf_agents.agents.reinforce import reinforce_agent
from tf_agents.drivers import py_driver
from tf_agents.environments import tf_py_environment
from tf_agents.networks import actor_distribution_network
from tf_agents.policies import py_tf_eager_policy
from tf_agents.replay_buffers import reverb_replay_buffer
from tf_agents.replay_buffers import reverb_utils
from tf_agents.specs import tensor_spec
from tf_agents.utils import common
import matplotlib.pyplot as plt
import reverb
# <---------------we define neural net parameters and models here----------------------------->
# Channel and Special attention classes imported from modules.py
# no sound in simulation, so sound processing functionality omitted
# creating sequential model of network
model = tf.keras.models.Sequential()
# tensorflow layer for simulated camera data
image_layer = tf.keras.models.Sequential([
tf.keras.applications.resnet50.ResNet50(
include_top=False,
weights="imagenet",
input_tensor=None,
input_shape=None,
pooling=None),
ChannelAttention(128, 8),
SpatialAttention(7),
layers.GlobalAveragePooling2D(),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(128, 3, padding='same', activation='elu'),
layers.BatchNormalization(),
layers.Conv2D(128, 3, padding='same', activation='elu'),
ChannelAttention(128, 8),
SpatialAttention(7),
])
# tensorflow layer for simulated radar data
radar_layer = tf.keras.models.Sequential([
tf.keras.applications.resnet50.ResNet50(
include_top=False,
weights=None,
input_tensor=None,
input_shape=None,
pooling=None),
ChannelAttention(128, 8),
SpatialAttention(7),
layers.GlobalAveragePooling2D(),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(128, 3, padding='same', activation='elu'),
layers.BatchNormalization(),
layers.Conv2D(128, 3, padding='same', activation='elu'),
ChannelAttention(128, 8),
SpatialAttention(7),
])
# tensorflow layer for simulated lidar data
lidar_layer = tf.keras.models.Sequential([
tf.keras.applications.resnet50.ResNet50(
include_top=False,
weights="imagenet",
input_tensor=None,
input_shape=None,
pooling=None),
ChannelAttention(128, 8),
SpatialAttention(7),
layers.GlobalAveragePooling2D(),
layers.MaxPooling2D(pool_size=(2, 2)),
layers.Conv2D(128, 3, padding='same', activation='elu'),
layers.BatchNormalization(),
layers.Conv2D(128, 3, padding='same', activation='elu'),
ChannelAttention(128, 8),
SpatialAttention(7),
])
# final fusion layer in which outputs of all preceding three layers will be sent
fusion_layer = tf.keras.models.Sequential([
tf.keras.layers.dense(100, activation='relu'),
tf.keras.layers.dense(50, activation='relu'),
tf.keras.layers.dense(25, activation='relu'),
tf.keras.layers.dense(10, activation=None),
])
# <--pre-training all the models before training agent to allow for high-efficiency training of model in simulation---->
# this step will not be done in federated learning module as it is to be only done once
# compiling the models
radar_layer.compile(
optimizer=tf.keras.optimizers.adam_v2,
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=tf.keras.mectrics.Accuracy(),
)
lidar_layer.compile(
optimizer=tf.keras.optimizers.adam_v2,
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=tf.keras.mectrics.Accuracy(),
)
"""
Statements to be executed after importing radar and lidar data from internet, will most probably
require cloud or gpu support(will probably prefer cloud due to ease of setup, personal gpu to setup and program from
start to finish will take a lot of time and effort to start and run)
radar_layer.fit(x_train_radar, y_train_radar, batch_size=64, epochs=10)
lidar_layer.fit(x_train_lidar,y_train_lidar,batch_size=64, epochs=10)
"""
# defining a parallel layer in which three neural networks will be added in parallel
main_layer = tfl.layers.ParallelCombination()
main_layer.append(image_layer)
main_layer.append(radar_layer)
main_layer.append(lidar_layer)
# the tensorflow model design is completed
model.add(main_layer)
model.add(fusion_layer)
# <---------------we define tensorflow agent here----------------------------------->
# defining agent hyper parameters
env_name = "Mathworks simulation"
num_iterations = 5000
collect_episodes_per_iteration = 5
replay_buffer_capacity = 2000
learning_rate = 1e-3
log_interval = 25
num_eval_episodes = 10
eval_interval = 50
# setting two environments(one for training and one for evaluation)
train_py_env = tfa.environments.suite_gym.load(env_name)
eval_py_env = tfa.environments.suite_gym.load(env_name)
# converting the environment to tensor form to speedup traning and evaluation
train_env = tf_py_environment.TFPyEnvironment(train_py_env)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)
# creating the ego agent distribution network
input_tensor_spec = tf.TensorSpec(shape=[10], dtype=tf.int64, name="Input data")
output_tensor_spec = tf.TensorSpec(shape=[10], dtype=tf.int64, name="Policy decision")
actor_net=actor_distribution_network.ActorDistributionNetwork(
input_tensor_spec,
output_tensor_spec,
preprocessing_layers=main_layer,
fc_layer_params=(200, 100),
name='The king of all ego actors')
# initialising some agent training hyper parameters
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
train_step_counter = tf.Variable(0)
#building the training agent
tf_agent = reinforce_agent.ReinforceAgent(
train_env.time_step_spec(),
train_env.action_spec(),
actor_network=actor_net,
optimizer=optimizer,
normalize_returns=True,
train_step_counter=train_step_counter)
tf_agent.initialize()
# setting initial agent policies
eval_policy = tf_agent.policy
collect_policy = tf_agent.collect_policy
# computing average return for agent policies for agent training
def compute_avg_return(environment, policy, num_episodes=10):
total_return = 0.0
for i in range(num_episodes):
time_step = environment.reset()
episode_return = 0.0
while not time_step.is_last():
action_step = policy.action(time_step)
time_step = environment.step(action_step.action)
episode_return += time_step.reward
total_return += episode_return
average_return = total_return / num_episodes
return average_return
# program to store data from simulation and use during training
# WARNING: THIS PORTION OF CODE WILL ONLY WORK IN LINUS, IT WILL NOT WORK IN WINDOWS OR MACOS
# IT WILL SHOW COULDN'T FIND REFERENCE ERROR, PLEASE COMMENT THIS PORTION OUT IF YOU ARE
# NOT USING LINUS OR A LINUX BASED OS OR ARE ON MOBILE OR TABLET
table_name = 'uniform_table'
replay_buffer_signature = tensor_spec.from_spec(
tf_agent.collect_data_spec)
replay_buffer_signature = tensor_spec.add_outer_dim(
replay_buffer_signature)
table = reverb.Table(
table_name,
max_size=replay_buffer_capacity,
sampler=reverb.selectors.Uniform(),
remover=reverb.selectors.Fifo(),
rate_limiter=reverb.rate_limiters.MinSize(1),
signature=replay_buffer_signature)
reverb_server = reverb.Server([table])
replay_buffer = reverb_replay_buffer.ReverbReplayBuffer(
tf_agent.collect_data_spec,
table_name=table_name,
sequence_length=None,
local_server=reverb_server)
rb_observer = reverb_utils.ReverbAddEpisodeObserver(
replay_buffer.py_client,
table_name,
replay_buffer_capacity
)
# program to store collected data(observation,action,rewards etc not whole simulation) in drive
def collect_episode(environment, policy, num_episodes):
driver = py_driver.PyDriver(
environment,
py_tf_eager_policy.PyTFEagerPolicy(
policy, use_tf_function=True),
[rb_observer],
max_episodes=num_episodes)
initial_time_step = environment.reset()
driver.run(initial_time_step)
# training the agent
# Optimize by wrapping some code in a graph using TF function.
tf_agent.train = common.function(tf_agent.train)
# Reset the train step
tf_agent.train_step_counter.assign(0)
# Evaluate the agent's policy once before training.
avg_return = compute_avg_return(eval_env, tf_agent.policy, num_eval_episodes)
returns = [avg_return]
for _ in range(num_iterations):
# Collect a few episodes using collect_policy and save to the replay buffer.
collect_episode(
train_py_env, tf_agent.collect_policy, collect_episodes_per_iteration)
# Use data from the buffer and update the agent's network.
iterator = iter(replay_buffer.as_dataset(sample_batch_size=1))
trajectories, _ = next(iterator)
train_loss = tf_agent.train(experience=trajectories)
replay_buffer.clear()
step = tf_agent.train_step_counter.numpy()
if step % log_interval == 0:
print('step = {0}: loss = {1}'.format(step, train_loss.loss))
if step % eval_interval == 0:
avg_return = compute_avg_return(eval_env, tf_agent.policy, num_eval_episodes)
print('step = {0}: Average Return = {1}'.format(step, avg_return))
returns.append(avg_return)
# program to print efficiency of agent over time
steps = range(0, num_iterations + 1, eval_interval)
plt.plot(steps, returns)
plt.ylabel('Average Return')
plt.xlabel('Step')
plt.ylim(top=5000)