Skip to content

Commit

Permalink
Merge pull request #52 from RLBots/copy_trainer
Browse files Browse the repository at this point in the history
Added keras support to the model.
Also added some ways to visualize the models
Fixed a ton of bugs
  • Loading branch information
dtracers authored Jan 23, 2018
2 parents 62b6a0d + afb2697 commit 07a523c
Show file tree
Hide file tree
Showing 36 changed files with 1,209 additions and 533 deletions.
19 changes: 10 additions & 9 deletions TutorialBot/tutorial_bot_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class TutorialBotOutput:
# Constants
distance_from_ball_to_go_fast = tf.constant(600.0)
distance_from_ball_to_boost = tf.constant(1500.0) # Minimum distance to ball for using boost
distance_from_ball_to_boost = tf.constant(2000.0) # Minimum distance to ball for using boost
unreal_to_degrees = tf.constant(
1.0 / 65536.0 * 360.0) # The numbers used to convert unreal rotation units to degrees
true = tf.constant(1.0)
Expand All @@ -23,7 +23,7 @@ def distance(self, x1, y1, x2, y2):
def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance_to_ball, is_on_ground):
full_turn_angle = 70.0
half_turn_angle = 30.0
powerslide_angle_constant = 710.0 # The angle (from the front of the bot to the ball) to start to powerslide.
powerslide_angle_constant = 80.0 # The angle (from the front of the bot to the ball) to start to powerslide.

angle_front_to_target = self.feature_creator.generate_angle_to_target(bot_position.X, bot_position.Y,
bot_rotation,
Expand All @@ -46,14 +46,15 @@ def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance

jump = tf.cast(should_jump, tf.float32)

powerslide_angle = full_turn_angle * tf.cast(tf.less(1000.0, distance_to_ball), tf.float32)
powerslide_angle = powerslide_angle_constant + powerslide_angle

ps = tf.greater(tf.abs(angle_front_to_target), powerslide_angle)
ps = tf.logical_and(tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle),
tf.less_equal(distance_to_ball, 2000.0))
# ps = tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle)
power_slide = tf.cast(ps, tf.float32)

should_not_dodge = tf.cast(tf.greater_equal(distance_to_ball, 500), tf.float32)

# if jump is 1 then we should not execute a turn
safe_steer = steer * (1.0 - jump)
safe_steer = steer * (1.0 - jump * should_not_dodge)
return (safe_steer, power_slide, jump)

def get_output_vector(self, values):
Expand All @@ -79,8 +80,8 @@ def get_output_vector(self, values):
xy_distance = self.distance(bot_pos.X, bot_pos.Y, ball_pos.X, ball_pos.Y)

# Boost when ball is far enough away
boost = tf.logical_and(tf.greater(xy_distance, self.distance_from_ball_to_boost),
tf.greater(car_boost, 34))
boost = tf.logical_and(tf.greater_equal(xy_distance, self.distance_from_ball_to_boost / car_boost),
tf.greater_equal(car_boost, 10))
full_throttle = 0.5 * tf.cast(tf.greater(xy_distance, self.distance_from_ball_to_go_fast), tf.float32)
throttle = full_throttle + tf.constant(0.5)

Expand Down
1 change: 1 addition & 0 deletions bot_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def run(self):
print('\n\n\n\n Match has ended so ending bot loop\n\n\n\n\n')
break

controller_input = None
# Run the Agent only if the gameInfo has updated.
tick_game_time = game_tick_packet.gameInfo.TimeSeconds
should_call_while_paused = datetime.now() - last_call_real_time >= MAX_AGENT_CALL_PERIOD
Expand Down
25 changes: 20 additions & 5 deletions conversions/input/input_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,17 @@ def get_car_info(self, game_tick_packet, index):
player_team = game_tick_packet.gamecars[index].Team
player_boost = game_tick_packet.gamecars[index].Boost
last_touched_ball = self.get_last_touched_ball(game_tick_packet.gamecars[index], game_tick_packet.gameball.LatestTouch)
car_array = [player_x, player_y, player_z, player_pitch, player_yaw, player_roll,
player_speed_x, player_speed_y, player_speed_z, player_angular_speed_x,
player_angular_speed_y, player_angular_speed_z,
player_on_ground, player_supersonic, player_demolished, player_jumped,
player_double_jumped, player_team, player_boost, last_touched_ball]
car_array = [player_x, player_y, player_z,
player_pitch, player_yaw, player_roll,
player_speed_x, player_speed_y, player_speed_z,
player_angular_speed_x, player_angular_speed_y, player_angular_speed_z,
player_on_ground,
player_supersonic,
player_demolished,
player_jumped, player_double_jumped,
player_team,
player_boost,
last_touched_ball]
return car_array

def get_last_touched_ball(self, car, latest_touch):
Expand Down Expand Up @@ -190,6 +196,15 @@ def get_score_info(self, Score, diff_in_score):

return [score, goals, own_goals, assists, saves, shots, demolitions, diff_in_score]

def format_array(self, array):
"""
Formats the array to properly fit the model
:param input_length: The batch size of the array
:param array: A numpy array that is being rescaled
:return: A new array that has been properly formatted
"""
return np.array(array, dtype=np.float32)

def flattenArrays(self, array_of_array):
"""
Takes an array of arrays and flattens it into a single array
Expand Down
42 changes: 42 additions & 0 deletions conversions/input/simple_input_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy as np

from conversions.input.input_formatter import InputFormatter


class SimpleInputFormatter(InputFormatter):

def create_input_array(self, game_tick_packet, passed_time=0.0):
# posx, posy, posz, rotx, roty, rotz, vx, vy, vz, angvx, angy, angvz, boost_amt, ballx, bally, ballz, ballvx, ballvy, ballvz
inputs = [game_tick_packet.gamecars[self.index].Location.X,
game_tick_packet.gamecars[self.index].Location.Y,
game_tick_packet.gamecars[self.index].Location.Z,
game_tick_packet.gamecars[self.index].Rotation.Pitch,
game_tick_packet.gamecars[self.index].Rotation.Yaw,
game_tick_packet.gamecars[self.index].Rotation.Roll,
game_tick_packet.gamecars[self.index].Velocity.X,
game_tick_packet.gamecars[self.index].Velocity.Y,
game_tick_packet.gamecars[self.index].Velocity.Z,
game_tick_packet.gamecars[self.index].AngularVelocity.X,
game_tick_packet.gamecars[self.index].AngularVelocity.Y,
game_tick_packet.gamecars[self.index].AngularVelocity.Z,
game_tick_packet.gamecars[self.index].Boost,
game_tick_packet.gameball.Location.X,
game_tick_packet.gameball.Location.Y,
game_tick_packet.gameball.Location.Z,
game_tick_packet.gameball.Velocity.X,
game_tick_packet.gameball.Velocity.Y,
game_tick_packet.gameball.Velocity.Z
]
return inputs

def get_state_dim(self):
return 19

def format_array(self, input_length, array):
"""
Formats the array to properly fit the model
:param input_length: The batch size of the array
:param array: A numpy array that is being rescaled
:return: A new array that has been properly formatted
"""
return array.reshape(input_length, get_state_dim())
16 changes: 8 additions & 8 deletions conversions/output_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ def get_car_info(array, index):
car_info.Rotation = create_3D_rotation(array, index + 3)
car_info.Velocity = create_3D_point(array, index + 6)
car_info.AngularVelocity = create_3D_point(array, index + 9)
car_info.bOnGround = array[12]
car_info.bSuperSonic = array[13]
car_info.bDemolished = array[14]
car_info.bJumped = array[15]
car_info.bDoubleJumped = array[16]
car_info.Team = array[17]
car_info.Boost = array[18]
car_info.bLastTouchedBall = array[19]
car_info.bOnGround = array[index + 12]
car_info.bSuperSonic = array[index + 13]
car_info.bDemolished = array[index + 14]
car_info.bJumped = array[index + 15]
car_info.bDoubleJumped = array[index + 16]
car_info.Team = array[index + 17]
car_info.Boost = array[index + 18]
car_info.bLastTouchedBall = array[index + 19]
return car_info


Expand Down
8 changes: 8 additions & 0 deletions modelHelpers/actions/action_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@
[('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))],
[]]

regression_everything = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_SQUARE_MEAN),
('yaw', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('pitch', (-1, 1.5, .5), LOSS_SQUARE_MEAN),
('roll', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('jump', (0, 2, 1), LOSS_SQUARE_MEAN),
('boost', (0, 2, 1), LOSS_SQUARE_MEAN),
('handbrake', (0, 2, 1), LOSS_SQUARE_MEAN)],
[],
[]]

def get_handler(split_mode=True, control_scheme=default_scheme):
"""
Creates a handler based on the options given.
Expand Down
15 changes: 11 additions & 4 deletions modelHelpers/actions/action_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,22 +165,26 @@ def get_random_action(self):
pass

def get_random_option(self):
return [random.randrange(self.get_logit_size())]
return [random.randrange(self.get_action_sizes())]

def run_func_on_split_tensors(self, input_tensors, split_func):
def run_func_on_split_tensors(self, input_tensors, split_func, return_as_list=False):
"""
Optionally splits the tensor and runs a function on the split tensor
If the tensor should not be split it runs the function on the entire tensor
:param tf: tensorflow
:param input_tensors: needs to have shape of (?, num_actions)
:param split_func: a function that is called with a tensor or array the same rank as input_tensor.
It should return a tensor with the same rank as input_tensor
:return: a stacked tensor (see tf.stack) or the same tensor depending on if it is in split mode or not.
:param return_as_list If true then the result will be a list of tensors instead of a single stacked tensor
:return: a single tensor or a tensor wrapped in a list
"""

if not isinstance(input_tensors, collections.Sequence):
input_tensors = [input_tensors]
return split_func(*input_tensors)
if return_as_list:
return [split_func(*input_tensors)]
return [split_func(*input_tensors)]


def optionally_split_numpy_arrays(self, numpy_array, split_func, is_already_split=False):
"""
Expand Down Expand Up @@ -256,3 +260,6 @@ def scale_layer(self, layer, index):

def get_loss_type(self, index):
return 'softmax'

def is_classification(self, index):
return True
66 changes: 39 additions & 27 deletions modelHelpers/actions/dynamic_action_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,29 +62,7 @@ def create_range_action(self, item):
action_data = np.arange(*item[1])
return action_data

def create_actions(self):
self.reset()

for i, item in enumerate(self.control_names):
self.control_names_index_map[item] = i

ranges = self.control_scheme[0]
combo_scheme = self.control_scheme[1]
copies = self.control_scheme[2]

for item in ranges:
action = self.create_range_action(item)
self.action_sizes.append(len(action))
self.action_name_index_map[item[0]] = len(self.action_list_names)
if len(item) > 2:
self.action_loss_type_map[len(self.action_list_names)] = item[2]
else:
self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS
self.action_list_names.append(item[0])
self.actions.append(action)

self.ranged_actions = list(self.actions)

def create_combo_actions(self, combo_scheme):
for item in combo_scheme:
action = self.create_range_action(item)
self.combo_name_list.append(item[0])
Expand All @@ -101,6 +79,38 @@ def create_actions(self):
self.action_list_names.append(COMBO)
self.actions.append(self.button_combo)

def create_ranged_actions(self, ranges):
for item in ranges:
action = self.create_range_action(item)
self.action_sizes.append(len(action))
self.action_name_index_map[item[0]] = len(self.action_list_names)
if len(item) > 2:
self.action_loss_type_map[len(self.action_list_names)] = item[2]
else:
self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS
self.action_list_names.append(item[0])
self.actions.append(action)

self.ranged_actions = list(self.actions)

def create_actions(self):
self.reset()

for i, item in enumerate(self.control_names):
self.control_names_index_map[item] = i

ranges = self.control_scheme[0]
combo_scheme = self.control_scheme[1]
copies = self.control_scheme[2]

if len(ranges) > 0:
self.create_ranged_actions(ranges)

if len(combo_scheme) > 0:
self.create_combo_actions(combo_scheme)
else:
self.action_name_index_map[COMBO] = -1

for item in copies:
self.action_name_index_map[item[0]] = self.action_name_index_map[item[1]]
return self.actions
Expand All @@ -110,7 +120,7 @@ def create_action_map(self):

def create_controller_from_selection(self, action_selection):
if len(action_selection) != len(self.actions):
raise Exception('Invalid action selection size')
raise Exception('Invalid action selection size' + str(len(action_selection)) + ':' + str(len(self.actions)))

combo_index = self.action_name_index_map[COMBO]
controller_output = []
Expand Down Expand Up @@ -171,6 +181,7 @@ def create_tensorflow_controller_from_selection(self, action_selection, batch_si
output = tf.gather_nd(ranged_action, tf.stack([indexer, tf.cast(selection, tf.int32)], axis=1))
controller_output.append(output)
else:
# selection = tf.Print(selection, [selection], control)
controller_output.append(selection)

# make sure everything is the same type
Expand Down Expand Up @@ -259,8 +270,9 @@ def create_action_indexes_graph(self, real_action, batch_size=None):
elif indexes[action_index] is None:
indexes[action_index] = tf.squeeze(real_control, axis=1)

combo_action = self._create_combo_index_graph(combo_list, real_action)
indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1)
if len(self.combo_list) > 0:
combo_action = self._create_combo_index_graph(combo_list, real_action)
indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1)

result = tf.stack(indexes, axis=1)
return result
Expand All @@ -283,7 +295,7 @@ def get_action_loss_from_logits(self, logits, labels, index):
def get_last_layer_activation_function(self, func, index):
if self.is_classification(index):
return func
return None
return tf.nn.tanh

def scale_layer(self, layer, index):
"""
Expand Down
4 changes: 3 additions & 1 deletion modelHelpers/data_normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ def apply_normalization(self, input_array):
# error_prevention = tf.cast(tf.equal(diff, 0.0), tf.float32)
# diff = diff + error_prevention

result = (input_array - min) / diff

#result = (input_array - min) / diff
result = input_array / diff
#result = tf.Print(result, [min], 'min', summarize=16)
#result = tf.Print(result, [max], 'max', summarize=16)
#result = tf.Print(result, [input_array[0]], 'inp', summarize=30)
Expand Down
Loading

0 comments on commit 07a523c

Please sign in to comment.