diff --git a/Othello/experiments/trainBaselinePlayerVsBest.py b/Othello/experiments/trainBaselinePlayerVsBest.py index 80dcba3..0c4d9f4 100644 --- a/Othello/experiments/trainBaselinePlayerVsBest.py +++ b/Othello/experiments/trainBaselinePlayerVsBest.py @@ -25,7 +25,7 @@ def reset(self): return self def run(self, lr, silent=False): - self.player1 = self.pretrained_player if self.pretrained_player else LargeFCBaselinePlayer(lr=lr) + self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr) # Player 2 has the same start conditions as Player 1 but does not train self.player2 = self.player1.copy(shared_weights=False) @@ -47,7 +47,6 @@ def run(self, lr, silent=False): results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) - self.add_results(("Losses", np.mean(losses))) self.add_results(("Best", np.mean(results))) # evaluate diff --git a/Othello/experiments/trainBaselinePlayerVsSelf.py b/Othello/experiments/trainBaselinePlayerVsSelf.py index 2b09fd9..80d3ac3 100644 --- a/Othello/experiments/trainBaselinePlayerVsSelf.py +++ b/Othello/experiments/trainBaselinePlayerVsSelf.py @@ -25,7 +25,7 @@ def reset(self): return self def run(self, lr, silent=False): - self.player1 = self.pretrained_player if self.pretrained_player else LargeFCBaselinePlayer(lr=lr) + self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() @@ -44,7 +44,7 @@ def run(self, lr, silent=False): results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) - self.add_results(("Losses", np.mean(losses))) + self.add_results(("Self", np.mean(results))) # evaluate if episode*games_per_evaluation % 1000 == 0: @@ -54,8 +54,8 @@ def run(self, lr, silent=False): if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( - "%s vs BEST" % (self.player1.__str__() + (" milestones" if MILESTONES else "")), - "Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s" + "%s vs SELF" % (self.player1.__str__() + (" milestones" if MILESTONES else "")), + "Train %s vs Self\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, episode*games_per_evaluation, self.evaluations, config.time_diff(start_time))) # If x/5th of training is completed, save milestone diff --git a/Othello/experiments/trainBaselinePlayerVsTraditionalOpponent.py b/Othello/experiments/trainBaselinePlayerVsTraditionalOpponent.py index d060d40..5f1665d 100644 --- a/Othello/experiments/trainBaselinePlayerVsTraditionalOpponent.py +++ b/Othello/experiments/trainBaselinePlayerVsTraditionalOpponent.py @@ -45,7 +45,7 @@ def run(self, lr, silent=False): results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) - self.add_results(("Losses", np.mean(losses))) + self.add_results(("Training Results", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode diff --git a/TicTacToe/experiments/reinforced/trainBaselinePlayerVsBest.py b/TicTacToe/experiments/reinforced/trainBaselinePlayerVsBest.py index 564307b..5a65a2c 100644 --- a/TicTacToe/experiments/reinforced/trainBaselinePlayerVsBest.py +++ b/TicTacToe/experiments/reinforced/trainBaselinePlayerVsBest.py @@ -40,7 +40,6 @@ def run(self, lr, weight_decay, silent=False): self.simulation = TicTacToe([self.player1, self.player2]) results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) - self.add_results(("Loss", np.mean(losses))) self.add_results(("Best", np.mean(results))) # evaluate diff --git a/TicTacToe/experiments/reinforced/trainBaselinePlayerVsSelf.py b/TicTacToe/experiments/reinforced/trainBaselinePlayerVsSelf.py index c141142..a07575b 100644 --- a/TicTacToe/experiments/reinforced/trainBaselinePlayerVsSelf.py +++ b/TicTacToe/experiments/reinforced/trainBaselinePlayerVsSelf.py @@ -39,7 +39,6 @@ def run(self, lr, weight_decay, silent=False): results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) - self.add_results(("Loss", np.mean(losses))) self.add_results(("Self", np.mean(results))) # evaluate @@ -50,8 +49,8 @@ def run(self, lr, weight_decay, silent=False): if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( - "%s vs BEST" % (self.player1), - "Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s" + "%s vs SELF" % (self.player1), + "Train %s vs Self version of self\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, episode*games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(self.player1, silent=False) diff --git a/TicTacToe/experiments/reinforced/trainBaselinePlayerVsTraditionalOpponent.py b/TicTacToe/experiments/reinforced/trainBaselinePlayerVsTraditionalOpponent.py index f458476..4abe11c 100644 --- a/TicTacToe/experiments/reinforced/trainBaselinePlayerVsTraditionalOpponent.py +++ b/TicTacToe/experiments/reinforced/trainBaselinePlayerVsTraditionalOpponent.py @@ -46,7 +46,7 @@ def run(self, lr, weight_decay, silent=False): results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) - self.add_results(("Loss", np.mean(losses))) + self.add_results(("Training Results", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode diff --git a/abstractClasses.py b/abstractClasses.py index 0394196..09955f3 100644 --- a/abstractClasses.py +++ b/abstractClasses.py @@ -330,7 +330,7 @@ def forward(self, input, legal_moves_map): x = input # set illegal moves to zero, softmax, set illegal moves to zero again # x = input * legal_moves_map - x = self.softmax(x) + x = self.softmax(x).exp() x = x * legal_moves_map return x