Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
masus04 committed Sep 14, 2018
2 parents e579151 + 98f57f0 commit b5026ea
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 13 deletions.
3 changes: 1 addition & 2 deletions Othello/experiments/trainBaselinePlayerVsBest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def reset(self):
return self

def run(self, lr, silent=False):
self.player1 = self.pretrained_player if self.pretrained_player else LargeFCBaselinePlayer(lr=lr)
self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr)

# Player 2 has the same start conditions as Player 1 but does not train
self.player2 = self.player1.copy(shared_weights=False)
Expand All @@ -47,7 +47,6 @@ def run(self, lr, silent=False):

results, losses = self.simulation.run_simulations(games_per_evaluation)
self.add_loss(np.mean(losses))
self.add_results(("Losses", np.mean(losses)))
self.add_results(("Best", np.mean(results)))

# evaluate
Expand Down
8 changes: 4 additions & 4 deletions Othello/experiments/trainBaselinePlayerVsSelf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def reset(self):
return self

def run(self, lr, silent=False):
self.player1 = self.pretrained_player if self.pretrained_player else LargeFCBaselinePlayer(lr=lr)
self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr)

games_per_evaluation = self.games // self.evaluations
start_time = datetime.now()
Expand All @@ -44,7 +44,7 @@ def run(self, lr, silent=False):

results, losses = self.simulation.run_simulations(games_per_evaluation)
self.add_loss(np.mean(losses))
self.add_results(("Losses", np.mean(losses)))
self.add_results(("Self", np.mean(results)))

# evaluate
if episode*games_per_evaluation % 1000 == 0:
Expand All @@ -54,8 +54,8 @@ def run(self, lr, silent=False):

if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time):
self.plot_and_save(
"%s vs BEST" % (self.player1.__str__() + (" milestones" if MILESTONES else "")),
"Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s"
"%s vs SELF" % (self.player1.__str__() + (" milestones" if MILESTONES else "")),
"Train %s vs Self\nGames: %s Evaluations: %s\nTime: %s"
% (self.player1, episode*games_per_evaluation, self.evaluations, config.time_diff(start_time)))

# If x/5th of training is completed, save milestone
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def run(self, lr, silent=False):

results, losses = self.simulation.run_simulations(games_per_evaluation)
self.add_loss(np.mean(losses))
self.add_results(("Losses", np.mean(losses)))
self.add_results(("Training Results", np.mean(results)))

# evaluate
self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def run(self, lr, weight_decay, silent=False):
self.simulation = TicTacToe([self.player1, self.player2])
results, losses = self.simulation.run_simulations(games_per_evaluation)
self.add_loss(np.mean(losses))
self.add_results(("Loss", np.mean(losses)))
self.add_results(("Best", np.mean(results)))

# evaluate
Expand Down
5 changes: 2 additions & 3 deletions TicTacToe/experiments/reinforced/trainBaselinePlayerVsSelf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def run(self, lr, weight_decay, silent=False):

results, losses = self.simulation.run_simulations(games_per_evaluation)
self.add_loss(np.mean(losses))
self.add_results(("Loss", np.mean(losses)))
self.add_results(("Self", np.mean(results)))

# evaluate
Expand All @@ -50,8 +49,8 @@ def run(self, lr, weight_decay, silent=False):

if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time):
self.plot_and_save(
"%s vs BEST" % (self.player1),
"Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s"
"%s vs SELF" % (self.player1),
"Train %s vs Self version of self\nGames: %s Evaluations: %s\nTime: %s"
% (self.player1, episode*games_per_evaluation, self.evaluations, config.time_diff(start_time)))

self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(self.player1, silent=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def run(self, lr, weight_decay, silent=False):

results, losses = self.simulation.run_simulations(games_per_evaluation)
self.add_loss(np.mean(losses))
self.add_results(("Loss", np.mean(losses)))
self.add_results(("Training Results", np.mean(results)))

# evaluate
self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode
Expand Down
2 changes: 1 addition & 1 deletion abstractClasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def forward(self, input, legal_moves_map):
x = input
# set illegal moves to zero, softmax, set illegal moves to zero again
# x = input * legal_moves_map
x = self.softmax(x)
x = self.softmax(x).exp()
x = x * legal_moves_map

return x
Expand Down

0 comments on commit b5026ea

Please sign in to comment.