Merge branch '22-implement-geneticalgagent-jon-og-maia' into dev

CogitoNTNU · Apr 27, 2024 · bcb056e · bcb056e
2 parents fd2983f + 6eaeca7
commit bcb056e
Show file tree

Hide file tree

Showing 12 changed files with 292 additions and 147 deletions.
diff --git a/docs/guide/devContainer.md b/docs/guide/devContainer.md
diff --git a/docs/guide/venv.md b/docs/guide/venv.md
diff --git a/docs/planning/12.03.2024.md b/docs/planning/12.03.2024.md
diff --git a/main.py b/main.py
@@ -5,18 +5,20 @@
 from src.agents.heuristic import (
     utility
 )
+from src.agents.heuristic_trainer import train
+from src.agents.geneticAlgAgentJon import GeneticAlgAgentJM
 
 if __name__ == "__main__":
+
     # game = Tetris()
     # agent: Agent = create_agent("heuristic")
     # sum_rows_removed = 0
-    # for i in range(1):
+    # for i in range(10):
     #     end_board = play_game(agent, game, 7)
     #     end_board.printBoard()
     #     sum_rows_removed += end_board.rowsRemoved
-    #     print(f"Rows removed: {end_board.rowsRemoved}")
 
-    # print(f"Average rows removed: {sum_rows_removed}")
+    # print(f"Average rows removed: {sum_rows_removed / 10}")
 
     # possible_moves = game.getPossibleBoards()
     # for boards in possible_moves:
@@ -27,4 +29,12 @@
     manager = TetrisGameManager(board)
     agent = create_agent("heuristic")
 
-    manager.startDemo(agent)
+    # manager.startGame()
+
+    # train()
+
+
+    algAgent = GeneticAlgAgentJM()
+    algAgent.number_of_selection(2)
+    print(algAgent.getBestPop())
+
diff --git a/src/agents/agent.py b/src/agents/agent.py
@@ -47,17 +47,20 @@ def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1) -> Tetris:
     Returns:
         The final state of the board after the game is over.
     """
+    #count = 0
+
     while not board.isGameOver():
         # Get the result of the agent's action
-        result = agent.result(board)
-        # Perform the action(s) on the board
-        if isinstance(result, list):
-            for action in result:
-                board.doAction(action)
-                # board.printBoard()
-        else:
-            board.doAction(result)
-            # board.printBoard()
+        for _ in range(actions_per_drop):
+            result = agent.result(board)
+            # Perform the action(s) on the board
+            if isinstance(result, list):
+                for action in result:
+                    board.doAction(action)
+            else:
+                board.doAction(result)
+
+            #count += 1
         # Advance the game by one frame
         board.doAction(Action.SOFT_DROP)
         if board.blockHasLanded:

diff --git a/src/agents/geneticAlgAgent.py b/src/agents/geneticAlgAgent.py
@@ -4,4 +4,5 @@
 # a = -0.510066 b = 0.760666 c = -0.35663 d = -0.184483
 # TODO Read the part of the article about the genetic algorithm
 # TODO Create a fitness function
-# TODO Create a genetic algorithm based on the
+# TODO Create a genetic algorithm based on the
+
diff --git a/src/agents/geneticAlgAgentJon.py b/src/agents/geneticAlgAgentJon.py
@@ -0,0 +1,136 @@
+import random
+import numpy as np
+from src.game.tetris import *
+from src.agents.agent_factory import create_agent
+from src.agents.agent import Agent
+from src.agents.heuristic_with_parameters_agent import *
+# From paper: https://codemyroad.wordpress.com/2013/04/14/tetris-ai-the-near-perfect-player/
+# the weigts the author got:
+#  a x (Aggregate Height) + b x (Complete Lines) + c x (Holes) + d x (Bumpiness)
+# a = -0.510066 b = 0.760666 c = -0.35663 d = -0.184483
+# TODO Read the part of the article about the genetic algorithm
+# TODO Create a fitness function
+
+# TODO Create a genetic algorithm based on the
+
+# List over vectors with boards attached:
+# [(List over parameters, board), ...]
+
+# TODO create init-method that creates agents with random vectors
+# TODO create run_games-method that goes through the agents, and play 100 games each, return average lines cleared
+# TODO create method for fetching a random 10%, and finds the two with highest lines cleared, and makes a child (with 5% chance of mutation)
+# TODO create method that makes 30% new agents from existing agents (last method), replace worst 30% with the new agents
+
+class GeneticAlgAgentJM:
+    agents: list[list[list[float], float]] = []
+
+    def number_of_selection(self, number_of_selections: int):
+        self.initAgents()
+        for i in range(0, number_of_selections):
+            # Select new pops
+            print(len(self.agents))
+            self.agents = self.replace_30_percent(self.agents)
+
+            # Run new test
+            for i in range(len(self.agents)):
+                param_list = self.agents[i][0]
+                average_cleared = self.play_game(param_list[0], param_list[1], param_list[2], param_list[3], param_list[4])
+                self.agents[i][1] = average_cleared
+
+            print(self.getBestPop())
+
+
+    def initAgents(self) -> list[list[list[float], float]]:
+        number_of_agents = 20
+        for _ in range(0, number_of_agents):
+            agg_height = random.randrange(-1000, 0)/1000
+            max_height = random.randrange(-1000, 0)/1000
+            lines_cleared = random.randrange(0, 1000)/1000
+            bumpiness = random.randrange(-1000, 0)/1000
+            holes = random.randrange(-1000, 0)/1000
+
+            average_cleared = self.play_game(agg_height, max_height, lines_cleared, bumpiness, holes)
+            self.agents.append([[agg_height, max_height, lines_cleared, bumpiness, holes], average_cleared])
+            print(_)
+
+
+    def play_game(self, agg_height, max_height, lines_cleared, bumpiness, holes):
+
+        board = Tetris()
+        agent: Agent = HeuristicWithParametersAgent([agg_height, max_height, lines_cleared, bumpiness, holes])
+        total_cleared = 0
+        number_of_rounds = 20
+        for _ in range(0, number_of_rounds):
+
+            max_moves = number_of_rounds
+            move = 0
+            actions_per_drop = 7
+
+            while not board.isGameOver() and move < max_moves:
+            # Get the result of the agent's action
+                for _ in range(actions_per_drop):
+                    result = agent.result(board)
+                    # Perform the action(s) on the board
+                    if isinstance(result, list):
+                        for action in result:
+                            board.doAction(action)
+                    else:
+                        board.doAction(result)
+
+                move += 1
+            # Advance the game by one frame
+            board.doAction(Action.SOFT_DROP)
+            #board.printBoard()
+
+            total_cleared += board.rowsRemoved
+
+        return total_cleared / number_of_rounds
+
+
+    def replace_30_percent(self, pop_list: list[list[list[float], float]]) -> list[list[float], float]:
+        # Number of pops needed for 30% of total number
+        num_pops_needed = int(len(pop_list) * 0.3)
+
+        new_list = [self.paring_pop(pop_list) for _ in range(num_pops_needed)]
+
+        pop_list = sorted(pop_list, key=lambda x: x[1], reverse=False)[num_pops_needed:]
+
+        pop_list.extend(new_list)
+
+        return pop_list
+
+
+     # TODO create method for fetching a random 10%, and finds the two with highest lines cleared, and makes a child (with 5% chance of mutation)
+    def paring_pop(self, pop_list: list[list[list[float], float]]) -> list[list[float], float]:
+        # Gets the number of pops to select
+        num_pops_to_select = int(len(pop_list) * 0.1)
+
+        # Get a sample of pops based on the previous number
+        random_pop_sample = random.sample(pop_list, num_pops_to_select)
+
+        # Gets the two pops with the highest lines cleared
+        highest_values = sorted(random_pop_sample, key=lambda x: x[1], reverse=True)[:2] 
+
+        # Gets the child pop of the two pops
+        new_pop = self.fitness_crossover(highest_values[0], highest_values[1])
+
+        # Mutate 5% of children pops
+        if random.randrange(0,1000)/1000 < 0.2:
+            random_parameter = int(random.randint(0,4))
+            new_pop[0][random_parameter] = (random.randrange(-200, 200)/1000) * new_pop[0][random_parameter]
+
+        new_pop[0] = (new_pop[0] / np.linalg.norm(new_pop[0])).tolist()
+
+        return new_pop
+
+
+    def fitness_crossover(self, pop1: list[list[float], float], pop2: list[list[float], float]) -> list[list[float], float]:
+        # Combines the two vectors proportionaly by how many lines they cleared
+        child_pop = [h1 * pop1[1] + h2 * pop2[1] for h1, h2 in zip(pop1[0], pop2[0])]
+        return [child_pop, 0.0]
+
+
+    def getBestPop(self) -> list[list[float], float]:
+        pop_list = self.agents
+        pop_list = sorted(pop_list, key=lambda x: x[1], reverse=True)
+        return pop_list[0]
diff --git a/src/agents/heuristic.py b/src/agents/heuristic.py
@@ -7,10 +7,12 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh
             lines_cleared_weight: float, bumpiness_weight: float, holes_weight: float) -> float:
     """Returns the utility of the given game state."""
     sum = 0
-    sum += aggregate_heights_weight * aggregate_heights(gameState)
-    sum += max_height_weight * max_height(gameState)
+    aggregate, max_height, bumpiness = calculate_heights(gameState)
+
+    sum += aggregate_heights_weight * aggregate
+    sum += max_height_weight * max_height
     sum += lines_cleared_weight * lines_cleaned(gameState)
-    sum += bumpiness_weight * bumpiness(gameState)
+    sum += bumpiness_weight * bumpiness
     sum += holes_weight * find_holes(gameState)
 
     # print("--------------------")
@@ -23,6 +25,32 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh
 
     return sum
 
+def calculate_heights(gameState: Tetris) -> tuple[int, int, int]:
+    """Calculates the sum and maximum height of the columns in the game state."""
+    #sum_heights = 0
+    max_height = 0
+    checked_list = [0] * gameState.COLUMNS
+
+
+    total_bumpiness = 0
+    columnHeightMap = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0}
+
+
+
+    for row in range(gameState.ROWS - 1, -1, -1):
+        for column in range(gameState.COLUMNS):
+            if gameState.prevBoard[row][column] != 0:
+                height = gameState.ROWS - row
+                checked_list[column] = height
+                max_height = max(max_height, height)
+                columnHeightMap[column] = gameState.ROWS - row
+
+
+    for key in range(gameState.COLUMNS - 1):
+        total_bumpiness += abs(columnHeightMap[key] - columnHeightMap[key + 1])
+
+
+    return sum(checked_list), max_height , total_bumpiness
 
 def aggregate_heights(gameState: Tetris) -> int: 
     """Returns the sum of the heights of the columns in the game state."""
@@ -65,7 +93,7 @@ def bumpiness(gameState: Tetris) -> int:
         for row in range(gameState.SPAWN_ROWS, gameState.ROWS):
             if gameState.prevBoard[row][column] > 0:
                 if columnHeightMap[column] == 0:
-                    columnHeightMap[column] = max_height - row
+                    columnHeightMap[column] = gameState.ROWS - row
 
     for key in range(gameState.COLUMNS - 1):
         total_bumpiness += abs(columnHeightMap[key] - columnHeightMap[key + 1])

diff --git a/src/agents/heuristic_with_parameters_agent.py b/src/agents/heuristic_with_parameters_agent.py
@@ -0,0 +1,44 @@
+from src.agents.agent import Agent
+from src.game.tetris import Action, Tetris, transition_model, get_all_actions
+from src.agents.heuristic import (
+    utility
+)
+
+class HeuristicWithParametersAgent(Agent):
+
+    aggregate_heights_weight: float
+    max_height_weight: float
+    lines_cleared_weight: float
+    bumpiness_weight: float
+    holes_weight: float
+
+    def __init__(self, params: list[float]):
+        self.aggregate_heights_weight = params[0]
+        self.max_height_weight = params[1]
+        self.lines_cleared_weight = params[2]
+        self.bumpiness_weight = params[3]
+        self.holes_weight = params[4]
+
+    def result(self, board: Tetris) -> list[Action]:
+        # Get all possible boards
+        possible_boards = board.getPossibleBoards()
+
+        best_board: Tetris
+        best_utility = float("-inf")
+        # Check which board has the best outcome based on the heuristic
+        for boards in possible_boards:
+            current_utility = utility(boards, self.aggregate_heights_weight, self.max_height_weight,
+                                    self.lines_cleared_weight, self.bumpiness_weight, self.holes_weight)
+
+            if current_utility > best_utility:
+                best_board = boards
+                best_utility = current_utility
+
+
+        # Find the actions needed to transform the current board to the new board
+        actions = []
+        try:
+            actions = transition_model(board, best_board)
+            return actions
+        except:
+            return actions