From 6291b7c0b5dbbafcccb8c7ca157285bb66814151 Mon Sep 17 00:00:00 2001
From: oystkva <oystkva@stud.ntnu.no>
Date: Tue, 30 Apr 2024 00:07:08 +0200
Subject: [PATCH] feat: :art: Changed datatype for weights in "agent
 tournament" from list of tuples of int and agent to np.array with weight
 vectors. Other funcitionality fixed accordingly

---
 main.py                       |  27 ++----
 src/agents/agent.py           |  19 +---
 src/agents/geneticAlgAgent.py | 160 ++++++++++++++++++++++------------
 src/agents/heuristic.py       |  16 +---
 4 files changed, 117 insertions(+), 105 deletions(-)

diff --git a/main.py b/main.py
index fff6320..cc35e57 100644
--- a/main.py
+++ b/main.py
@@ -10,6 +10,8 @@
 from src.agents.heuristic_trainer import train
 from src.agents.geneticAlgAgentJon import GeneticAlgAgentJM
 
+import numpy as np
+
 
 def test():
     # algAgent = GeneticAlgAgentJM()
@@ -38,15 +40,14 @@ def test():
     #     print(utility(boards, 0, -1, 0, 0, 0))
     #     boards.printBoard()
     
-<<<<<<< HEAD
-<<<<<<< HEAD
+
     # board = Tetris()
     # manager = TetrisGameManager(board)
     # agent = create_agent("heuristic")
     
     # # manager.startGame()
 
-<<<<<<< HEAD
+
     # # train()
 
 
@@ -54,25 +55,13 @@ def test():
     # algAgent.number_of_selection(2)
     # print(algAgent.getBestPop())
     
-    test()
+    # test()
     
         
-    # cProfile.run('main()', 'restats')  
-=======
-    #train()
+    # # cProfile.run('main()', 'restats')  
+    # # train()
     
->>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced)
-=======
-=======
->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
     board = Tetris()
     # manager = TetrisGameManager(board)
     # agent = create_agent("heuristic")
-    agents = train_genetic_algorithm(10)
-
-<<<<<<< HEAD
-    # manager.startDemo(agent)
->>>>>>> fa9eeb9 (Co-authored-by: Håvard Fossdal <HFossdal@users.noreply.github.com>)
-=======
-    # manager.startDemo(agent)
->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
+    agents = train_genetic_algorithm(50)
diff --git a/src/agents/agent.py b/src/agents/agent.py
index 010fa13..02485ea 100644
--- a/src/agents/agent.py
+++ b/src/agents/agent.py
@@ -35,7 +35,7 @@ def result(board: Tetris) -> Union[Action, list[Action]]:
         pass
 
 
-def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces_dropped: int = 1_000_000_000) -> Tetris:
+def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces_dropped : int = 1_000_000_000) -> Tetris:
     """
     Plays a game of Tetris with the given agent.
 
@@ -47,24 +47,10 @@ def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces
     Returns:
         The final state of the board after the game is over.
     """
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
     #count = 0
 
-    while not board.isGameOver():
-=======
     pieces_dropped = 0
     while not board.isGameOver() and pieces_dropped < max_pieces_dropped:
->>>>>>> b842fc6 (feat: :rocket: genetic agent class and it's training algorithm commenced)
-=======
-    pieces_dropped = 0
-    while not board.isGameOver() and pieces_dropped < max_pieces_dropped:
->>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced)
-=======
-    pieces_dropped = 0
-    while not board.isGameOver() and pieces_dropped < max_pieces_dropped:
->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
         # Get the result of the agent's action
         for _ in range(actions_per_drop):
             result = agent.result(board)
@@ -82,9 +68,12 @@ def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces
             board.updateBoard()
         #board.printBoard()
         pieces_dropped += 1
+        if pieces_dropped == max_pieces_dropped:
+            print("Max pieces dropped")
 
     return board
 
+
 def playGameDemoStepByStep(agent: Agent, board: Tetris) -> Tetris:
     """
     Plays a game of Tetris with the given agent where actions are slowed down for demonstration purposes.
diff --git a/src/agents/geneticAlgAgent.py b/src/agents/geneticAlgAgent.py
index 39953bf..88ee184 100644
--- a/src/agents/geneticAlgAgent.py
+++ b/src/agents/geneticAlgAgent.py
@@ -18,8 +18,11 @@ class GeneticAgent(Agent):
 
     NUMBER_OF_GAMES = 10
 
-    def __init__(self):
-        self.weight_vector = [random.uniform(-2.00, 2.00) for _ in range(5)]
+    def __init__(self, weight_vector = None):
+        self.weight_vector = weight_vector
+        if weight_vector is None:
+            self.weight_vector = np.random.uniform(-2, 2, 5)
+        self._normalize_weights()
 
     def result(self, board: Tetris) -> list[Action]:
         possible_boards = board.getPossibleBoards()
@@ -35,28 +38,23 @@ def result(self, board: Tetris) -> list[Action]:
 
             
         # Find the actions needed to transform the current board to the new board
-        actions = []
-        try:
-            actions = transition_model(board, best_board)
-            return actions
-        except:
-            return actions
-
-    def get_weight_vector(self) -> list[float]:
+        actions = transition_model(board, best_board)
+        return actions
+
+    def get_weight_vector(self):
         return self.weight_vector
             
 
     def _fitness(self, board: Tetris) -> float:
         fitness = 0
         for _ in range(self.NUMBER_OF_GAMES):
-            end_board = play_game(self, board, max_pieces_dropped=500)
-            fitness += end_board.rowsRemoved
+            end_board = play_game(self, board, max_pieces_dropped=1500)
+            fitness += end_board.rowsRemoved / self.NUMBER_OF_GAMES
         return fitness
         
     def _normalize_weights(self):
-        ## TODO: Fix this function 
+        self.weight_vector /= np.linalg.norm(self.weight_vector)
         
-        self.weight_vector = [x/sqrt(sum([i**2 for i in self.weight_vector])) for x in self.weight_vector]
 
 
     def mutate_child(self):
@@ -64,38 +62,56 @@ def mutate_child(self):
             if random.random() < 0.05:
                 self.get_weight_vector()[i] +=  random.uniform(-0.20, 0.20)
     
-    def _crossover(self, parent1: tuple[float, 'GeneticAgent'], parent2: 
-                tuple[float, 'GeneticAgent']   ) -> None:
-        for i in range(len(self.get_weight_vector())):
-            self.weight_vector[i] = ((parent1[0]*parent1[1].get_weight_vector()[i] + parent2[0]*parent2[1].get_weight_vector()[i]))
-        
-
     
+    def _crossover(self, parent1, parent1_fitness : float, parent2, parent2_fitness : float):
+        if parent1_fitness == 0 and parent2_fitness == 0:
+            parent1_weight = 0.5
+            parent2_weight = 0.5
+        else:
+            parent1_weight = parent1_fitness / (parent1_fitness + parent2_fitness)
+            parent2_weight = parent2_fitness / (parent1_fitness + parent2_fitness)
+        self.weight_vector = np.add(parent1_weight * parent1, parent2_weight * parent2)
+        self._normalize_weights()
+
+def indices_sorted_by_fitness(fitness):
+    return np.argsort(-fitness)
     
 
-def average_weight_values(agents: list[float, GeneticAgent]) -> float:
-    sum_of_weights = 0
-    for agent in agents:
-        sum_of_weights += sum(agent[1].get_weight_vector())/len(agent[1].get_weight_vector())
-    return sum_of_weights/len(agents)
-        
+def norm_of_weights(weights) -> float:
+    norm = 0
+    transposed_weights = weights.transpose()
+    for weight_type in transposed_weights:
+        norm += (np.amax(weight_type) - np.amin(weight_type))**2
+    return sqrt(norm)
+
+
+def calculate_fitnesses(candidates):
+    fitness = np.array([])
+    for candidate in candidates:
+        agent = GeneticAgent(candidate)
+        board = Tetris()
+        fitness = np.append(fitness, agent._fitness(board))
+    print("Parents fitnesses: ", fitness[np.argsort(-fitness)])
+    return fitness
 
 
-def train_genetic_algorithm(init_population_size: int, tol = 1e-6) -> list[tuple[float, GeneticAgent]]:
-    candidates = []     # List of genetic agents on the form (fitness, agent)
-    candidate_fitness = np.array([])
+def train_genetic_algorithm(init_population_size: int, tol = 1e-6):
+    weight_candidates = np.array([np.random.uniform(-2, 2, 5) for _ in range(init_population_size)])
+    weight_fitnesses = np.array([])
 
     print("Starting genetic algorithm")
     for i in range(init_population_size):
         print("Creating candidate ", i)
-        candidate = GeneticAgent()
+        candidate = GeneticAgent(weight_candidates[i])
         board = Tetris()
         fitness = candidate._fitness(board)
-        candidates.append((fitness, candidate))
+        weight_fitnesses = np.append(weight_fitnesses, fitness)
     # Sort the candidates based on their fitness
     print("Initial population done")
-    child_candidates = []
-    tolerance = average_weight_values(candidates)
+    print("Fitnesses: ", weight_fitnesses[np.argsort(-weight_fitnesses)])
+    child_candidates = np.array([[]])
+    child_fitnesses = np.array([])
+    tolerance = norm_of_weights(weight_candidates)
     iterations = 0
     print("Starting iterations")
     while abs(tolerance) > tol:
@@ -104,30 +120,47 @@ def train_genetic_algorithm(init_population_size: int, tol = 1e-6) -> list[tuple
         print("Starting new generation")
         while len(child_candidates) < 0.3*init_population_size:
             random_indices = select_random_parents(init_population_size)
-            parent_candidates = []
+            print("Parents selected")
+            parent_candidates = np.array([[]])
             for i in random_indices:
-                parent_candidates.append((candidates[i][0], candidates[i][1]))
-            parent_candidates = sorted(parent_candidates, key=operator.itemgetter(0), reverse=True)
-            print(len(parent_candidates))
-            child_tuple = make_offspring(board, parent_candidates[0], parent_candidates[1])
-            child_candidates.append((child_tuple))
-        tolerance = average_weight_values(candidates)
-        candidates = sorted(candidates, key=operator.itemgetter(0), reverse=True)
-        candiates = candiates[:init_population_size*0.7+1]
-        for child in child_candidates:
-            candidates.append(child)
-        tolerance -= average_weight_values(candidates)
-        print("Generation done")
+                parent_candidates = np.append(parent_candidates, weight_candidates[i]).reshape(-1, 5)
+            parent_fitness = calculate_fitnesses(parent_candidates)
+            parent_candidates = parent_candidates[np.argsort(-parent_fitness)]
+            parent_fitness = parent_fitness[np.argsort(-parent_fitness)]
+            child, child_fitness = make_offspring(board, parent_candidates[0], parent_fitness[0], parent_candidates[1], parent_fitness[1])
+            child_candidates = np.append(child_candidates, child).reshape(-1, 5)
+            child_fitnesses = np.append(child_fitnesses, child_fitness)
+            print("Child ", len(child_candidates), " done")
+        tolerance = norm_of_weights(weight_candidates)
+        print("Children appended")
+
+        weight_fitnesses = calculate_fitnesses(weight_candidates)
+        weight_candidates = weight_candidates[np.argsort(-weight_fitnesses)]
+        weight_fitnesses = weight_fitnesses[np.argsort(-weight_fitnesses)]
+        weight_candidates = weight_candidates[:(int(np.floor(init_population_size*0.7))+1)]
+        weight_fitnesses = weight_fitnesses[:(int(np.floor(init_population_size*0.7))+1)]        
+
+        for c_candidate in child_candidates:
+            weight_candidates = np.append(weight_candidates, c_candidate).reshape(-1, 5)
+        for c_fitness in child_fitnesses:
+            weight_fitnesses = np.append(weight_fitnesses, c_fitness)
+        print("Children added to population")
+        tolerance -= norm_of_weights(weight_candidates)
+        child_candidates = np.array([[]])
+        child_fitnesses = np.array([])
+        print("Generation of iteration ", iterations, " done")
         print("-------------------")
     print(iterations, " iterations done")
-    candidates = sorted(candidates, key=operator.itemgetter(0), reverse=True)
-    print("Best candidates weights: [", candidate[0].get_weight_vector()[0], ", ", candidate[0].get_weight_vector()[1], ", ", candidate[0].get_weight_vector()[2], ", ", candidate[0].get_weight_vector()[3], "]")
-
-    return candidates
+    weight_fitnesses = calculate_fitnesses(weight_candidates)
+    weight_candidates = weight_candidates[np.argsort(-weight_fitnesses)]
+    weight_fitnesses = weight_fitnesses[np.argsort(-weight_fitnesses)]
+    print("Best candidate weights: [", weight_candidates[0][0], ", ", weight_candidates[0][1], ", ", weight_candidates[0][2], ", ", weight_candidates[0][3], ", ", weight_candidates[0][4], "]")
+    return weight_candidates[0]
+    #print("Best candidates weights: [", candidate[0].get_weight_vector()[0], ", ", candidate[0].get_weight_vector()[1], ", ", candidate[0].get_weight_vector()[2], ", ", candidate[0].get_weight_vector()[3], "]")
 
 
     
-def select_random_parents(init_population_size: int) -> list[int]:
+def select_random_parents(init_population_size: int):
     """
     Selects 10% of the population randomly to be parents for the next generation.
     
@@ -135,7 +168,7 @@ def select_random_parents(init_population_size: int) -> list[int]:
       list of indices of unique selected agents.
     """
     random_selection = []
-    while len(random_selection) < max(2, init_population_size/10):
+    while len(random_selection) < max(2, (init_population_size/10)):
         random_index = random.randint(0, init_population_size - 1)
         if random_index not in random_selection:
             random_selection.append(random_index)
@@ -143,17 +176,32 @@ def select_random_parents(init_population_size: int) -> list[int]:
     
 
 
-def make_offspring(board: Tetris,  parent1: tuple[float, GeneticAgent], parent2: tuple[float, GeneticAgent]) -> tuple[float, GeneticAgent]:
+def make_offspring(board: Tetris,  parent1, parent1_fitness : float, parent2, parent2_fitness : float):
     child = GeneticAgent()
-    child.weight_vector = child._crossover(parent1, parent2)
+    child._crossover(parent1, parent1_fitness, parent2, parent2_fitness)
     child.mutate_child()
     child._normalize_weights()
+    board = Tetris()
     child_fitness = child._fitness(board)
-
-    return (child_fitness, child)
+    return child.weight_vector, child_fitness
 
 # def mutate_child(child: geneticAgent) -> geneticAgent:
 #     for i in range(len(child.get_weight_vector())):
 #         if random.random() < 0.05:
 #             child.get_weight_vector()[i] +=  random.uniform(-0.20, 0.20)
 #     return child
+
+
+"""""""""
+weights = np.array([
+        [0.0, 0.0, 0.0, 0.0, 1.0],
+        [0.1, 0.0, 0.0, 1.0, 0.0],
+        [0.0, 0.0, 0.0, 1.0, 1.0],
+        [0.1, 0.0, 1.0, 0.0, 0.0],
+        [0.0, 0.0, 1.0, 0.0, 1.0]
+    ])
+    fitness = np.array([3, 4, 2, 6, 4])
+
+    weights = weights[np.argsort(-fitness)]
+
+    print(weights)"""
\ No newline at end of file
diff --git a/src/agents/heuristic.py b/src/agents/heuristic.py
index e95d69f..e61af2a 100644
--- a/src/agents/heuristic.py
+++ b/src/agents/heuristic.py
@@ -7,19 +7,12 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh
             lines_cleared_weight: float, bumpiness_weight: float, holes_weight: float) -> float:
     """Returns the utility of the given game state."""
     sum = 0
-<<<<<<< HEAD
     aggregate, max_height, bumpiness = calculate_heights(gameState)
 
     sum += aggregate_heights_weight * aggregate
     sum += max_height_weight * max_height
     sum += lines_cleared_weight * lines_cleaned(gameState)
     sum += bumpiness_weight * bumpiness
-=======
-    sum += aggregate_heights_weight * aggregate_heights(gameState)
-    sum += max_height_weight * max_height(gameState)
-    sum += lines_cleared_weight * lines_cleared(gameState)
-    sum += bumpiness_weight * bumpiness(gameState)
->>>>>>> b842fc6 (feat: :rocket: genetic agent class and it's training algorithm commenced)
     sum += holes_weight * find_holes(gameState)
 
     # print("--------------------")
@@ -32,6 +25,7 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh
 
     return sum
 
+
 def calculate_heights(gameState: Tetris) -> tuple[int, int, int]:
     """Calculates the sum and maximum height of the columns in the game state."""
     #sum_heights = 0
@@ -81,16 +75,8 @@ def max_height(gameState: Tetris) -> int:
     return max(checkedList)
 
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 # Does this work? row cleared in get_possible_boards??
 def lines_cleaned(gameState: Tetris) -> int:
-=======
-def lines_cleared(gameState: Tetris) -> int:
->>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced)
-=======
-def lines_cleared(gameState: Tetris) -> int:
->>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
     """Retrurns the number of lines cleared."""
     sum = 0
     for row in gameState.board: