From 6291b7c0b5dbbafcccb8c7ca157285bb66814151 Mon Sep 17 00:00:00 2001 From: oystkva Date: Tue, 30 Apr 2024 00:07:08 +0200 Subject: [PATCH] feat: :art: Changed datatype for weights in "agent tournament" from list of tuples of int and agent to np.array with weight vectors. Other funcitionality fixed accordingly --- main.py | 27 ++---- src/agents/agent.py | 19 +--- src/agents/geneticAlgAgent.py | 160 ++++++++++++++++++++++------------ src/agents/heuristic.py | 16 +--- 4 files changed, 117 insertions(+), 105 deletions(-) diff --git a/main.py b/main.py index fff6320..cc35e57 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,8 @@ from src.agents.heuristic_trainer import train from src.agents.geneticAlgAgentJon import GeneticAlgAgentJM +import numpy as np + def test(): # algAgent = GeneticAlgAgentJM() @@ -38,15 +40,14 @@ def test(): # print(utility(boards, 0, -1, 0, 0, 0)) # boards.printBoard() -<<<<<<< HEAD -<<<<<<< HEAD + # board = Tetris() # manager = TetrisGameManager(board) # agent = create_agent("heuristic") # # manager.startGame() -<<<<<<< HEAD + # # train() @@ -54,25 +55,13 @@ def test(): # algAgent.number_of_selection(2) # print(algAgent.getBestPop()) - test() + # test() - # cProfile.run('main()', 'restats') -======= - #train() + # # cProfile.run('main()', 'restats') + # # train() ->>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced) -======= -======= ->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29 board = Tetris() # manager = TetrisGameManager(board) # agent = create_agent("heuristic") - agents = train_genetic_algorithm(10) - -<<<<<<< HEAD - # manager.startDemo(agent) ->>>>>>> fa9eeb9 (Co-authored-by: HÃ¥vard Fossdal ) -======= - # manager.startDemo(agent) ->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29 + agents = train_genetic_algorithm(50) diff --git a/src/agents/agent.py b/src/agents/agent.py index 010fa13..02485ea 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -35,7 +35,7 @@ def result(board: Tetris) -> Union[Action, list[Action]]: pass -def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces_dropped: int = 1_000_000_000) -> Tetris: +def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces_dropped : int = 1_000_000_000) -> Tetris: """ Plays a game of Tetris with the given agent. @@ -47,24 +47,10 @@ def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces Returns: The final state of the board after the game is over. """ -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #count = 0 - while not board.isGameOver(): -======= pieces_dropped = 0 while not board.isGameOver() and pieces_dropped < max_pieces_dropped: ->>>>>>> b842fc6 (feat: :rocket: genetic agent class and it's training algorithm commenced) -======= - pieces_dropped = 0 - while not board.isGameOver() and pieces_dropped < max_pieces_dropped: ->>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced) -======= - pieces_dropped = 0 - while not board.isGameOver() and pieces_dropped < max_pieces_dropped: ->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29 # Get the result of the agent's action for _ in range(actions_per_drop): result = agent.result(board) @@ -82,9 +68,12 @@ def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces board.updateBoard() #board.printBoard() pieces_dropped += 1 + if pieces_dropped == max_pieces_dropped: + print("Max pieces dropped") return board + def playGameDemoStepByStep(agent: Agent, board: Tetris) -> Tetris: """ Plays a game of Tetris with the given agent where actions are slowed down for demonstration purposes. diff --git a/src/agents/geneticAlgAgent.py b/src/agents/geneticAlgAgent.py index 39953bf..88ee184 100644 --- a/src/agents/geneticAlgAgent.py +++ b/src/agents/geneticAlgAgent.py @@ -18,8 +18,11 @@ class GeneticAgent(Agent): NUMBER_OF_GAMES = 10 - def __init__(self): - self.weight_vector = [random.uniform(-2.00, 2.00) for _ in range(5)] + def __init__(self, weight_vector = None): + self.weight_vector = weight_vector + if weight_vector is None: + self.weight_vector = np.random.uniform(-2, 2, 5) + self._normalize_weights() def result(self, board: Tetris) -> list[Action]: possible_boards = board.getPossibleBoards() @@ -35,28 +38,23 @@ def result(self, board: Tetris) -> list[Action]: # Find the actions needed to transform the current board to the new board - actions = [] - try: - actions = transition_model(board, best_board) - return actions - except: - return actions - - def get_weight_vector(self) -> list[float]: + actions = transition_model(board, best_board) + return actions + + def get_weight_vector(self): return self.weight_vector def _fitness(self, board: Tetris) -> float: fitness = 0 for _ in range(self.NUMBER_OF_GAMES): - end_board = play_game(self, board, max_pieces_dropped=500) - fitness += end_board.rowsRemoved + end_board = play_game(self, board, max_pieces_dropped=1500) + fitness += end_board.rowsRemoved / self.NUMBER_OF_GAMES return fitness def _normalize_weights(self): - ## TODO: Fix this function + self.weight_vector /= np.linalg.norm(self.weight_vector) - self.weight_vector = [x/sqrt(sum([i**2 for i in self.weight_vector])) for x in self.weight_vector] def mutate_child(self): @@ -64,38 +62,56 @@ def mutate_child(self): if random.random() < 0.05: self.get_weight_vector()[i] += random.uniform(-0.20, 0.20) - def _crossover(self, parent1: tuple[float, 'GeneticAgent'], parent2: - tuple[float, 'GeneticAgent'] ) -> None: - for i in range(len(self.get_weight_vector())): - self.weight_vector[i] = ((parent1[0]*parent1[1].get_weight_vector()[i] + parent2[0]*parent2[1].get_weight_vector()[i])) - - + def _crossover(self, parent1, parent1_fitness : float, parent2, parent2_fitness : float): + if parent1_fitness == 0 and parent2_fitness == 0: + parent1_weight = 0.5 + parent2_weight = 0.5 + else: + parent1_weight = parent1_fitness / (parent1_fitness + parent2_fitness) + parent2_weight = parent2_fitness / (parent1_fitness + parent2_fitness) + self.weight_vector = np.add(parent1_weight * parent1, parent2_weight * parent2) + self._normalize_weights() + +def indices_sorted_by_fitness(fitness): + return np.argsort(-fitness) -def average_weight_values(agents: list[float, GeneticAgent]) -> float: - sum_of_weights = 0 - for agent in agents: - sum_of_weights += sum(agent[1].get_weight_vector())/len(agent[1].get_weight_vector()) - return sum_of_weights/len(agents) - +def norm_of_weights(weights) -> float: + norm = 0 + transposed_weights = weights.transpose() + for weight_type in transposed_weights: + norm += (np.amax(weight_type) - np.amin(weight_type))**2 + return sqrt(norm) + + +def calculate_fitnesses(candidates): + fitness = np.array([]) + for candidate in candidates: + agent = GeneticAgent(candidate) + board = Tetris() + fitness = np.append(fitness, agent._fitness(board)) + print("Parents fitnesses: ", fitness[np.argsort(-fitness)]) + return fitness -def train_genetic_algorithm(init_population_size: int, tol = 1e-6) -> list[tuple[float, GeneticAgent]]: - candidates = [] # List of genetic agents on the form (fitness, agent) - candidate_fitness = np.array([]) +def train_genetic_algorithm(init_population_size: int, tol = 1e-6): + weight_candidates = np.array([np.random.uniform(-2, 2, 5) for _ in range(init_population_size)]) + weight_fitnesses = np.array([]) print("Starting genetic algorithm") for i in range(init_population_size): print("Creating candidate ", i) - candidate = GeneticAgent() + candidate = GeneticAgent(weight_candidates[i]) board = Tetris() fitness = candidate._fitness(board) - candidates.append((fitness, candidate)) + weight_fitnesses = np.append(weight_fitnesses, fitness) # Sort the candidates based on their fitness print("Initial population done") - child_candidates = [] - tolerance = average_weight_values(candidates) + print("Fitnesses: ", weight_fitnesses[np.argsort(-weight_fitnesses)]) + child_candidates = np.array([[]]) + child_fitnesses = np.array([]) + tolerance = norm_of_weights(weight_candidates) iterations = 0 print("Starting iterations") while abs(tolerance) > tol: @@ -104,30 +120,47 @@ def train_genetic_algorithm(init_population_size: int, tol = 1e-6) -> list[tuple print("Starting new generation") while len(child_candidates) < 0.3*init_population_size: random_indices = select_random_parents(init_population_size) - parent_candidates = [] + print("Parents selected") + parent_candidates = np.array([[]]) for i in random_indices: - parent_candidates.append((candidates[i][0], candidates[i][1])) - parent_candidates = sorted(parent_candidates, key=operator.itemgetter(0), reverse=True) - print(len(parent_candidates)) - child_tuple = make_offspring(board, parent_candidates[0], parent_candidates[1]) - child_candidates.append((child_tuple)) - tolerance = average_weight_values(candidates) - candidates = sorted(candidates, key=operator.itemgetter(0), reverse=True) - candiates = candiates[:init_population_size*0.7+1] - for child in child_candidates: - candidates.append(child) - tolerance -= average_weight_values(candidates) - print("Generation done") + parent_candidates = np.append(parent_candidates, weight_candidates[i]).reshape(-1, 5) + parent_fitness = calculate_fitnesses(parent_candidates) + parent_candidates = parent_candidates[np.argsort(-parent_fitness)] + parent_fitness = parent_fitness[np.argsort(-parent_fitness)] + child, child_fitness = make_offspring(board, parent_candidates[0], parent_fitness[0], parent_candidates[1], parent_fitness[1]) + child_candidates = np.append(child_candidates, child).reshape(-1, 5) + child_fitnesses = np.append(child_fitnesses, child_fitness) + print("Child ", len(child_candidates), " done") + tolerance = norm_of_weights(weight_candidates) + print("Children appended") + + weight_fitnesses = calculate_fitnesses(weight_candidates) + weight_candidates = weight_candidates[np.argsort(-weight_fitnesses)] + weight_fitnesses = weight_fitnesses[np.argsort(-weight_fitnesses)] + weight_candidates = weight_candidates[:(int(np.floor(init_population_size*0.7))+1)] + weight_fitnesses = weight_fitnesses[:(int(np.floor(init_population_size*0.7))+1)] + + for c_candidate in child_candidates: + weight_candidates = np.append(weight_candidates, c_candidate).reshape(-1, 5) + for c_fitness in child_fitnesses: + weight_fitnesses = np.append(weight_fitnesses, c_fitness) + print("Children added to population") + tolerance -= norm_of_weights(weight_candidates) + child_candidates = np.array([[]]) + child_fitnesses = np.array([]) + print("Generation of iteration ", iterations, " done") print("-------------------") print(iterations, " iterations done") - candidates = sorted(candidates, key=operator.itemgetter(0), reverse=True) - print("Best candidates weights: [", candidate[0].get_weight_vector()[0], ", ", candidate[0].get_weight_vector()[1], ", ", candidate[0].get_weight_vector()[2], ", ", candidate[0].get_weight_vector()[3], "]") - - return candidates + weight_fitnesses = calculate_fitnesses(weight_candidates) + weight_candidates = weight_candidates[np.argsort(-weight_fitnesses)] + weight_fitnesses = weight_fitnesses[np.argsort(-weight_fitnesses)] + print("Best candidate weights: [", weight_candidates[0][0], ", ", weight_candidates[0][1], ", ", weight_candidates[0][2], ", ", weight_candidates[0][3], ", ", weight_candidates[0][4], "]") + return weight_candidates[0] + #print("Best candidates weights: [", candidate[0].get_weight_vector()[0], ", ", candidate[0].get_weight_vector()[1], ", ", candidate[0].get_weight_vector()[2], ", ", candidate[0].get_weight_vector()[3], "]") -def select_random_parents(init_population_size: int) -> list[int]: +def select_random_parents(init_population_size: int): """ Selects 10% of the population randomly to be parents for the next generation. @@ -135,7 +168,7 @@ def select_random_parents(init_population_size: int) -> list[int]: list of indices of unique selected agents. """ random_selection = [] - while len(random_selection) < max(2, init_population_size/10): + while len(random_selection) < max(2, (init_population_size/10)): random_index = random.randint(0, init_population_size - 1) if random_index not in random_selection: random_selection.append(random_index) @@ -143,17 +176,32 @@ def select_random_parents(init_population_size: int) -> list[int]: -def make_offspring(board: Tetris, parent1: tuple[float, GeneticAgent], parent2: tuple[float, GeneticAgent]) -> tuple[float, GeneticAgent]: +def make_offspring(board: Tetris, parent1, parent1_fitness : float, parent2, parent2_fitness : float): child = GeneticAgent() - child.weight_vector = child._crossover(parent1, parent2) + child._crossover(parent1, parent1_fitness, parent2, parent2_fitness) child.mutate_child() child._normalize_weights() + board = Tetris() child_fitness = child._fitness(board) - - return (child_fitness, child) + return child.weight_vector, child_fitness # def mutate_child(child: geneticAgent) -> geneticAgent: # for i in range(len(child.get_weight_vector())): # if random.random() < 0.05: # child.get_weight_vector()[i] += random.uniform(-0.20, 0.20) # return child + + +""""""""" +weights = np.array([ + [0.0, 0.0, 0.0, 0.0, 1.0], + [0.1, 0.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 1.0, 1.0], + [0.1, 0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 1.0] + ]) + fitness = np.array([3, 4, 2, 6, 4]) + + weights = weights[np.argsort(-fitness)] + + print(weights)""" \ No newline at end of file diff --git a/src/agents/heuristic.py b/src/agents/heuristic.py index e95d69f..e61af2a 100644 --- a/src/agents/heuristic.py +++ b/src/agents/heuristic.py @@ -7,19 +7,12 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh lines_cleared_weight: float, bumpiness_weight: float, holes_weight: float) -> float: """Returns the utility of the given game state.""" sum = 0 -<<<<<<< HEAD aggregate, max_height, bumpiness = calculate_heights(gameState) sum += aggregate_heights_weight * aggregate sum += max_height_weight * max_height sum += lines_cleared_weight * lines_cleaned(gameState) sum += bumpiness_weight * bumpiness -======= - sum += aggregate_heights_weight * aggregate_heights(gameState) - sum += max_height_weight * max_height(gameState) - sum += lines_cleared_weight * lines_cleared(gameState) - sum += bumpiness_weight * bumpiness(gameState) ->>>>>>> b842fc6 (feat: :rocket: genetic agent class and it's training algorithm commenced) sum += holes_weight * find_holes(gameState) # print("--------------------") @@ -32,6 +25,7 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh return sum + def calculate_heights(gameState: Tetris) -> tuple[int, int, int]: """Calculates the sum and maximum height of the columns in the game state.""" #sum_heights = 0 @@ -81,16 +75,8 @@ def max_height(gameState: Tetris) -> int: return max(checkedList) -<<<<<<< HEAD -<<<<<<< HEAD # Does this work? row cleared in get_possible_boards?? def lines_cleaned(gameState: Tetris) -> int: -======= -def lines_cleared(gameState: Tetris) -> int: ->>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced) -======= -def lines_cleared(gameState: Tetris) -> int: ->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29 """Retrurns the number of lines cleared.""" sum = 0 for row in gameState.board: