Skip to content

Commit

Permalink
feat: 🎨 Changed datatype for weights in "agent tournament" from list …
Browse files Browse the repository at this point in the history
…of tuples of int and agent to np.array with weight vectors. Other funcitionality fixed accordingly
  • Loading branch information
oystkva committed Apr 29, 2024
1 parent 58652b9 commit 6291b7c
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 105 deletions.
27 changes: 8 additions & 19 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from src.agents.heuristic_trainer import train
from src.agents.geneticAlgAgentJon import GeneticAlgAgentJM

import numpy as np


def test():
# algAgent = GeneticAlgAgentJM()
Expand Down Expand Up @@ -38,41 +40,28 @@ def test():
# print(utility(boards, 0, -1, 0, 0, 0))
# boards.printBoard()

<<<<<<< HEAD
<<<<<<< HEAD

# board = Tetris()
# manager = TetrisGameManager(board)
# agent = create_agent("heuristic")

# # manager.startGame()

<<<<<<< HEAD

# # train()


# algAgent = GeneticAlgAgentJM()
# algAgent.number_of_selection(2)
# print(algAgent.getBestPop())

test()
# test()


# cProfile.run('main()', 'restats')
=======
#train()
# # cProfile.run('main()', 'restats')
# # train()

>>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced)
=======
=======
>>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
board = Tetris()
# manager = TetrisGameManager(board)
# agent = create_agent("heuristic")
agents = train_genetic_algorithm(10)

<<<<<<< HEAD
# manager.startDemo(agent)
>>>>>>> fa9eeb9 (Co-authored-by: Håvard Fossdal <HFossdal@users.noreply.github.com>)
=======
# manager.startDemo(agent)
>>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
agents = train_genetic_algorithm(50)
19 changes: 4 additions & 15 deletions src/agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def result(board: Tetris) -> Union[Action, list[Action]]:
pass


def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces_dropped: int = 1_000_000_000) -> Tetris:
def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces_dropped : int = 1_000_000_000) -> Tetris:
"""
Plays a game of Tetris with the given agent.
Expand All @@ -47,24 +47,10 @@ def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces
Returns:
The final state of the board after the game is over.
"""
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
#count = 0

while not board.isGameOver():
=======
pieces_dropped = 0
while not board.isGameOver() and pieces_dropped < max_pieces_dropped:
>>>>>>> b842fc6 (feat: :rocket: genetic agent class and it's training algorithm commenced)
=======
pieces_dropped = 0
while not board.isGameOver() and pieces_dropped < max_pieces_dropped:
>>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced)
=======
pieces_dropped = 0
while not board.isGameOver() and pieces_dropped < max_pieces_dropped:
>>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
# Get the result of the agent's action
for _ in range(actions_per_drop):
result = agent.result(board)
Expand All @@ -82,9 +68,12 @@ def play_game(agent: Agent, board: Tetris, actions_per_drop: int = 1, max_pieces
board.updateBoard()
#board.printBoard()
pieces_dropped += 1
if pieces_dropped == max_pieces_dropped:
print("Max pieces dropped")

return board


def playGameDemoStepByStep(agent: Agent, board: Tetris) -> Tetris:
"""
Plays a game of Tetris with the given agent where actions are slowed down for demonstration purposes.
Expand Down
160 changes: 104 additions & 56 deletions src/agents/geneticAlgAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ class GeneticAgent(Agent):

NUMBER_OF_GAMES = 10

def __init__(self):
self.weight_vector = [random.uniform(-2.00, 2.00) for _ in range(5)]
def __init__(self, weight_vector = None):
self.weight_vector = weight_vector
if weight_vector is None:
self.weight_vector = np.random.uniform(-2, 2, 5)
self._normalize_weights()

def result(self, board: Tetris) -> list[Action]:
possible_boards = board.getPossibleBoards()
Expand All @@ -35,67 +38,80 @@ def result(self, board: Tetris) -> list[Action]:


# Find the actions needed to transform the current board to the new board
actions = []
try:
actions = transition_model(board, best_board)
return actions
except:
return actions

def get_weight_vector(self) -> list[float]:
actions = transition_model(board, best_board)
return actions

def get_weight_vector(self):
return self.weight_vector


def _fitness(self, board: Tetris) -> float:
fitness = 0
for _ in range(self.NUMBER_OF_GAMES):
end_board = play_game(self, board, max_pieces_dropped=500)
fitness += end_board.rowsRemoved
end_board = play_game(self, board, max_pieces_dropped=1500)
fitness += end_board.rowsRemoved / self.NUMBER_OF_GAMES
return fitness

def _normalize_weights(self):
## TODO: Fix this function
self.weight_vector /= np.linalg.norm(self.weight_vector)

self.weight_vector = [x/sqrt(sum([i**2 for i in self.weight_vector])) for x in self.weight_vector]


def mutate_child(self):
for i in range(5):
if random.random() < 0.05:
self.get_weight_vector()[i] += random.uniform(-0.20, 0.20)

def _crossover(self, parent1: tuple[float, 'GeneticAgent'], parent2:
tuple[float, 'GeneticAgent'] ) -> None:
for i in range(len(self.get_weight_vector())):
self.weight_vector[i] = ((parent1[0]*parent1[1].get_weight_vector()[i] + parent2[0]*parent2[1].get_weight_vector()[i]))



def _crossover(self, parent1, parent1_fitness : float, parent2, parent2_fitness : float):
if parent1_fitness == 0 and parent2_fitness == 0:
parent1_weight = 0.5
parent2_weight = 0.5
else:
parent1_weight = parent1_fitness / (parent1_fitness + parent2_fitness)
parent2_weight = parent2_fitness / (parent1_fitness + parent2_fitness)
self.weight_vector = np.add(parent1_weight * parent1, parent2_weight * parent2)
self._normalize_weights()

def indices_sorted_by_fitness(fitness):
return np.argsort(-fitness)


def average_weight_values(agents: list[float, GeneticAgent]) -> float:
sum_of_weights = 0
for agent in agents:
sum_of_weights += sum(agent[1].get_weight_vector())/len(agent[1].get_weight_vector())
return sum_of_weights/len(agents)

def norm_of_weights(weights) -> float:
norm = 0
transposed_weights = weights.transpose()
for weight_type in transposed_weights:
norm += (np.amax(weight_type) - np.amin(weight_type))**2
return sqrt(norm)


def calculate_fitnesses(candidates):
fitness = np.array([])
for candidate in candidates:
agent = GeneticAgent(candidate)
board = Tetris()
fitness = np.append(fitness, agent._fitness(board))
print("Parents fitnesses: ", fitness[np.argsort(-fitness)])
return fitness


def train_genetic_algorithm(init_population_size: int, tol = 1e-6) -> list[tuple[float, GeneticAgent]]:
candidates = [] # List of genetic agents on the form (fitness, agent)
candidate_fitness = np.array([])
def train_genetic_algorithm(init_population_size: int, tol = 1e-6):
weight_candidates = np.array([np.random.uniform(-2, 2, 5) for _ in range(init_population_size)])
weight_fitnesses = np.array([])

print("Starting genetic algorithm")
for i in range(init_population_size):
print("Creating candidate ", i)
candidate = GeneticAgent()
candidate = GeneticAgent(weight_candidates[i])
board = Tetris()
fitness = candidate._fitness(board)
candidates.append((fitness, candidate))
weight_fitnesses = np.append(weight_fitnesses, fitness)
# Sort the candidates based on their fitness
print("Initial population done")
child_candidates = []
tolerance = average_weight_values(candidates)
print("Fitnesses: ", weight_fitnesses[np.argsort(-weight_fitnesses)])
child_candidates = np.array([[]])
child_fitnesses = np.array([])
tolerance = norm_of_weights(weight_candidates)
iterations = 0
print("Starting iterations")
while abs(tolerance) > tol:
Expand All @@ -104,56 +120,88 @@ def train_genetic_algorithm(init_population_size: int, tol = 1e-6) -> list[tuple
print("Starting new generation")
while len(child_candidates) < 0.3*init_population_size:
random_indices = select_random_parents(init_population_size)
parent_candidates = []
print("Parents selected")
parent_candidates = np.array([[]])
for i in random_indices:
parent_candidates.append((candidates[i][0], candidates[i][1]))
parent_candidates = sorted(parent_candidates, key=operator.itemgetter(0), reverse=True)
print(len(parent_candidates))
child_tuple = make_offspring(board, parent_candidates[0], parent_candidates[1])
child_candidates.append((child_tuple))
tolerance = average_weight_values(candidates)
candidates = sorted(candidates, key=operator.itemgetter(0), reverse=True)
candiates = candiates[:init_population_size*0.7+1]
for child in child_candidates:
candidates.append(child)
tolerance -= average_weight_values(candidates)
print("Generation done")
parent_candidates = np.append(parent_candidates, weight_candidates[i]).reshape(-1, 5)
parent_fitness = calculate_fitnesses(parent_candidates)
parent_candidates = parent_candidates[np.argsort(-parent_fitness)]
parent_fitness = parent_fitness[np.argsort(-parent_fitness)]
child, child_fitness = make_offspring(board, parent_candidates[0], parent_fitness[0], parent_candidates[1], parent_fitness[1])
child_candidates = np.append(child_candidates, child).reshape(-1, 5)
child_fitnesses = np.append(child_fitnesses, child_fitness)
print("Child ", len(child_candidates), " done")
tolerance = norm_of_weights(weight_candidates)
print("Children appended")

weight_fitnesses = calculate_fitnesses(weight_candidates)
weight_candidates = weight_candidates[np.argsort(-weight_fitnesses)]
weight_fitnesses = weight_fitnesses[np.argsort(-weight_fitnesses)]
weight_candidates = weight_candidates[:(int(np.floor(init_population_size*0.7))+1)]
weight_fitnesses = weight_fitnesses[:(int(np.floor(init_population_size*0.7))+1)]

for c_candidate in child_candidates:
weight_candidates = np.append(weight_candidates, c_candidate).reshape(-1, 5)
for c_fitness in child_fitnesses:
weight_fitnesses = np.append(weight_fitnesses, c_fitness)
print("Children added to population")
tolerance -= norm_of_weights(weight_candidates)
child_candidates = np.array([[]])
child_fitnesses = np.array([])
print("Generation of iteration ", iterations, " done")
print("-------------------")
print(iterations, " iterations done")
candidates = sorted(candidates, key=operator.itemgetter(0), reverse=True)
print("Best candidates weights: [", candidate[0].get_weight_vector()[0], ", ", candidate[0].get_weight_vector()[1], ", ", candidate[0].get_weight_vector()[2], ", ", candidate[0].get_weight_vector()[3], "]")

return candidates
weight_fitnesses = calculate_fitnesses(weight_candidates)
weight_candidates = weight_candidates[np.argsort(-weight_fitnesses)]
weight_fitnesses = weight_fitnesses[np.argsort(-weight_fitnesses)]
print("Best candidate weights: [", weight_candidates[0][0], ", ", weight_candidates[0][1], ", ", weight_candidates[0][2], ", ", weight_candidates[0][3], ", ", weight_candidates[0][4], "]")
return weight_candidates[0]
#print("Best candidates weights: [", candidate[0].get_weight_vector()[0], ", ", candidate[0].get_weight_vector()[1], ", ", candidate[0].get_weight_vector()[2], ", ", candidate[0].get_weight_vector()[3], "]")



def select_random_parents(init_population_size: int) -> list[int]:
def select_random_parents(init_population_size: int):
"""
Selects 10% of the population randomly to be parents for the next generation.
Returns:
list of indices of unique selected agents.
"""
random_selection = []
while len(random_selection) < max(2, init_population_size/10):
while len(random_selection) < max(2, (init_population_size/10)):
random_index = random.randint(0, init_population_size - 1)
if random_index not in random_selection:
random_selection.append(random_index)
return random_selection



def make_offspring(board: Tetris, parent1: tuple[float, GeneticAgent], parent2: tuple[float, GeneticAgent]) -> tuple[float, GeneticAgent]:
def make_offspring(board: Tetris, parent1, parent1_fitness : float, parent2, parent2_fitness : float):
child = GeneticAgent()
child.weight_vector = child._crossover(parent1, parent2)
child._crossover(parent1, parent1_fitness, parent2, parent2_fitness)
child.mutate_child()
child._normalize_weights()
board = Tetris()
child_fitness = child._fitness(board)

return (child_fitness, child)
return child.weight_vector, child_fitness

# def mutate_child(child: geneticAgent) -> geneticAgent:
# for i in range(len(child.get_weight_vector())):
# if random.random() < 0.05:
# child.get_weight_vector()[i] += random.uniform(-0.20, 0.20)
# return child


"""""""""
weights = np.array([
[0.0, 0.0, 0.0, 0.0, 1.0],
[0.1, 0.0, 0.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 1.0, 1.0],
[0.1, 0.0, 1.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 0.0, 1.0]
])
fitness = np.array([3, 4, 2, 6, 4])
weights = weights[np.argsort(-fitness)]
print(weights)"""
16 changes: 1 addition & 15 deletions src/agents/heuristic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,12 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh
lines_cleared_weight: float, bumpiness_weight: float, holes_weight: float) -> float:
"""Returns the utility of the given game state."""
sum = 0
<<<<<<< HEAD
aggregate, max_height, bumpiness = calculate_heights(gameState)

sum += aggregate_heights_weight * aggregate
sum += max_height_weight * max_height
sum += lines_cleared_weight * lines_cleaned(gameState)
sum += bumpiness_weight * bumpiness
=======
sum += aggregate_heights_weight * aggregate_heights(gameState)
sum += max_height_weight * max_height(gameState)
sum += lines_cleared_weight * lines_cleared(gameState)
sum += bumpiness_weight * bumpiness(gameState)
>>>>>>> b842fc6 (feat: :rocket: genetic agent class and it's training algorithm commenced)
sum += holes_weight * find_holes(gameState)

# print("--------------------")
Expand All @@ -32,6 +25,7 @@ def utility(gameState: Tetris, aggregate_heights_weight: float, max_height_weigh

return sum


def calculate_heights(gameState: Tetris) -> tuple[int, int, int]:
"""Calculates the sum and maximum height of the columns in the game state."""
#sum_heights = 0
Expand Down Expand Up @@ -81,16 +75,8 @@ def max_height(gameState: Tetris) -> int:
return max(checkedList)


<<<<<<< HEAD
<<<<<<< HEAD
# Does this work? row cleared in get_possible_boards??
def lines_cleaned(gameState: Tetris) -> int:
=======
def lines_cleared(gameState: Tetris) -> int:
>>>>>>> c14418b (feat: :rocket: genetic agent class and it's training algorithm commenced)
=======
def lines_cleared(gameState: Tetris) -> int:
>>>>>>> fa9eeb924767729763e18a070d98dd0646936c29
"""Retrurns the number of lines cleared."""
sum = 0
for row in gameState.board:
Expand Down

0 comments on commit 6291b7c

Please sign in to comment.