-
Notifications
You must be signed in to change notification settings - Fork 6
/
Train.py
242 lines (210 loc) · 8.55 KB
/
Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# Importing files from this project
import ResNet
import MCTS
import Multiprocessing
import time
# from TicTacToe import Gamelogic
# from TicTacToe import Config
from FourInARow import Gamelogic
from FourInARow import Config
# from keras.optimizers import SGD
# from loss import softmax_cross_entropy_with_logits, softmax
import numpy as np
# Creating and returning a tree with properties specified from the input
def get_tree(config, agent, game, dirichlet_noise=True, seed=0):
tree = MCTS.MCTS() # (game, game.get_board(), None, config)
tree.dirichlet_noise = dirichlet_noise
tree.NN_input_dim = config.board_dims
tree.policy_output_dim = config.policy_output_dim
tree.NN_output_to_moves_func = config.NN_output_to_moves
tree.move_to_number_func = config.move_to_number
tree.number_to_move_func = config.number_to_move
tree.set_evaluation(agent)
tree.set_game(game)
# print("setting seed", seed)
tree.set_seed(seed)
return tree
def get_game_object():
return Gamelogic.FourInARow()
class GameGenerator:
def __init__(self, config, agent, seed=0):
self.game = get_game_object()
self.tree = get_tree(config, agent, self.game, seed=seed)
self.history = []
self.policy_targets = []
self.player_moved_list = []
self.positions = []
def run_part1(self):
return self.tree.search()
def run_part2(self, result):
self.tree.backpropagate(result)
return self
def execute_best_move(self):
state = self.game.get_state()
temp_move = self.tree.get_temperature_move(state, len_hist=len(self.history))
# print(self.tree.get_prior_probabilities(state))
# print("temp move", temp_move, self.tree.seed)
self.history.append(temp_move)
self.policy_targets.append(np.array(self.tree.get_posterior_probabilities(state)))
self.player_moved_list.append(self.game.get_turn())
self.positions.append(np.array(self.game.get_board()))
self.game.execute_move(temp_move)
return self, self.game.is_final()
def reset_tree(self):
self.tree.reset_search()
# self.tree.root.board_state = self.game.get_board()
def get_results(self):
game_outcome = self.game.get_outcome()
value_targets = [game_outcome[x] for x in self.player_moved_list]
return self.history, self.positions, self.policy_targets, value_targets
# Generating data by self-play
def generate_data(res_dict, config1, num_games_each_process, num_search, num_process, name_weights, seeds):
# print("Starting", num_process)
# import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
import tensorflow as tf
# print("_a_")
import ResNet as ResNet_p
# print("_a_")
from keras.backend.tensorflow_backend import set_session
# print("_a_")
from keras.optimizers import SGD
# print("_j_")
from loss import softmax_cross_entropy_with_logits, softmax
# print("_i_")
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.015)
# print("_h_")
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
# print("_g_")
set_session(sess)
# print("_f_")
h, w, d = config1.board_dims[1:]
# print("_e_")
agent = ResNet_p.ResNet.build(h, w, d, 128, config1.policy_output_dim, num_res_blocks=10)
# print("_d_")
agent.load_weights(name_weights)
# print("_c_")
game_generators = [GameGenerator(config1, agent, seed=seeds[i]) for i in range(num_games_each_process)]
# print("_b_")
x = []
y_policy = []
y_value = []
print("Ready to play", num_process)
predicted = {}
while len(game_generators):
# print("test1")
res = [game_generator.reset_tree() for game_generator in game_generators]
print("len_dict", len(predicted.keys()))
for i in range(num_search):
# print("test2")
res = [game_generator.run_part1() for game_generator in game_generators]
# print("test3")
to_predict = []
to_predict_generators = []
no_predict_generators = []
predicted_generators = []
predicted_states = []
for l in range(len(res)):
# print("test4")
if res[l] is not None:
if np.array_str(res[l]) in predicted:
predicted_generators.append(game_generators[l])
predicted_states.append(res[l])
else:
# print("To predict", res[l])
# print("Stack:", game_generators[l].tree.search_stack, game_generators[l].tree, game_generators[l])
to_predict.append(res[l])
to_predict_generators.append(game_generators[l])
predicted[np.array_str(res[l])] = None
else:
no_predict_generators.append(game_generators[l])
# print("test5")
# print("test6")
if len(to_predict):
# print("to_predict", len(to_predict))
batch = np.array(to_predict)
results = agent.predict(batch)
for j in range(len(to_predict)):
predicted[np.array_str(to_predict[j])] = [results[0][j], results[1][j][0]]
# print("result", results)
# print("test7")
[predicted_generators[j].run_part2(np.array(predicted[np.array_str(predicted_states[j])])) for j in
range(len(predicted_generators))]
[to_predict_generators[j].run_part2(np.array([results[0][j], results[1][j][0]])) for j in
range(len(to_predict_generators))]
# print("test8")
[no_predict_generators[i].run_part2(None) for i in range(len(no_predict_generators))]
# print("test9")
print("LEN", len(game_generators))
res = [game_generator.execute_best_move() for game_generator in game_generators]
game_generators = []
finished_games = []
for game_generator, finished in res:
if finished:
finished_games.append(game_generator)
continue
game_generators.append(game_generator)
game_results = [game_generator.get_results() for game_generator in finished_games]
for moves, history, policy_targets, value_targets in game_results:
print("moves", moves)
x.extend(history)
y_policy.extend(policy_targets)
y_value.extend(value_targets)
print("finished", )
res_dict[str(num_process)] = [x, y_policy, y_value]
# # Generating data by self-play
# def generate_data(game, agent, config, num_sim=100, games=1):
# tree = get_tree(config, agent, game)
#
# x = []
# y_policy = []
# y_value = []
#
# for curr_game in range(games):
#
# game.__init__()
# history = []
# policy_targets = []
# player_moved_list = []
# positions = []
#
# while not game.is_final():
# tree.reset_search()
# print("num_sim", num_sim)
# tree.search_series(num_sim)
# state = game.get_state()
# temp_move = tree.get_temperature_move(state)
# print("move:", temp_move)
# print("temp_probs:", tree.get_temperature_probabilities(state))
# history.append(temp_move)
# policy_targets.append(np.array(tree.get_posterior_probabilities(state)))
# print("prior_probs:", tree.get_prior_probabilities(state)) #reshape(1,3,3,2)
# print("pol_targets", policy_targets[-1])
# player_moved_list.append(game.get_turn())
# positions.append(np.array(game.get_board()))
#
# game.execute_move(temp_move)
#
# game_outcome = game.get_outcome()
# value_targets = [game_outcome[x] for x in player_moved_list]
# print("val_targets:", value_targets)
#
# x = x + positions
# y_policy = y_policy + policy_targets
# y_value = y_value + value_targets
#
# print("History:", history)
#
# return np.array(x), np.array(y_policy), np.array(y_value)
# Training AlphaZero by generating data from self-play and fitting the network
def choose_best_legal_move(legal_moves, y_pred):
best_move = np.argmax(y_pred)
print("Best move", best_move)
if (y_pred[best_move] == 0):
return None
if best_move in legal_moves:
return best_move
else:
y_pred[best_move] = 0
return choose_best_legal_move(legal_moves, y_pred)