Skip to content

Commit

Permalink
Merge pull request #1 from emesterhazy/player-knowledge
Browse files Browse the repository at this point in the history
Player knowledge
  • Loading branch information
emesterhazy authored Oct 11, 2021
2 parents d7ff910 + af80d4b commit 31ddc40
Showing 1 changed file with 186 additions and 32 deletions.
218 changes: 186 additions & 32 deletions players/g4_player.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,80 +10,234 @@ def __init__(self, flavor_preference: List[int], rng: np.random.Generator, logge
"""Initialise the player with given preference.
Args:
flavor_preference (List[int]): flavor preference, most flavored flavor is first element in the list and last element is least preferred flavor
rng (np.random.Generator): numpy random number generator, use this for same player behvior across run
flavor_preference (List[int]): flavor preference, most flavored
flavor is first element in the list and last element is least
preferred flavor
rng (np.random.Generator): numpy random number
generator, use this for same player behavior across run
logger (logging.Logger): logger use this like logger.info("message")
"""
self.flavor_preference = flavor_preference
self.rng = rng
self.logger = logger
self.state = None
self.state = {
'current_served': None,
# Number of scoops served to ourselves in the current turn
'current_turn_served': 0
}

def valid_scoop(self, curr_level, x, y):
@staticmethod
def valid_scoop(curr_level, x, y):
"""Helper function: returns whether a scoop at an index x,y is valid or not"""
d = curr_level[x,y]
if curr_level[x+1,y] <= d and curr_level[x,y+1] <= d and curr_level[x+1,y+1] <= d:
return True
return False

def scoop_value(self, top_layer, curr_level, x, y):
@staticmethod
def scoop_value(flavor_preference, top_layer, curr_level, x, y):
"""Helper function: returns the value the player gets for a scoop at index x,y"""
d = curr_level[x, y]
if d >= 0:
units = 1
flav_total = len(self.flavor_preference) - self.flavor_preference.index(top_layer[x,y]) + 1
flav_total = len(flavor_preference) - flavor_preference.index(top_layer[x,y]) + 1
if curr_level[x+1, y] == d:
flav_total += len(self.flavor_preference) - self.flavor_preference.index(top_layer[x+1,y]) + 1
flav_total += len(flavor_preference) - flavor_preference.index(top_layer[x+1,y]) + 1
units += 1
if curr_level[x, y+1] == d:
flav_total += len(self.flavor_preference) - self.flavor_preference.index(top_layer[x,y+1]) + 1
flav_total += len(flavor_preference) - flavor_preference.index(top_layer[x,y+1]) + 1
units += 1
if curr_level[x+1, y+1] == d:
flav_total += len(self.flavor_preference) - self.flavor_preference.index(top_layer[x+1,y+1]) + 1
flav_total += len(flavor_preference) - flavor_preference.index(top_layer[x+1,y+1]) + 1
units += 1
return (flav_total, (x, y), units)
else:
return (0, (x,y), 0)

@staticmethod
def score_available_scoops(flavor_preference, top_layer, curr_level):
p_queue = []
# Subtract one from length since 2x2 "spoon" must remain in container
for x in range(0, top_layer.shape[0]-1):
for y in range(0, top_layer.shape[1]-1):
if Player.valid_scoop(curr_level, x, y):
p_queue.append(Player.scoop_value(flavor_preference, top_layer, curr_level, x, y))
# TODO (etm): If we care, we can use an actual heap / priority queue
p_queue.sort()
return p_queue

def serve(self, top_layer: np.ndarray, curr_level: np.ndarray, player_idx: int, get_flavors: Callable[[], List[int]], get_player_count: Callable[[], int], get_served: Callable[[], List[Dict[int, int]]], get_turns_received: Callable[[], List[int]]) -> Dict[str, Union[Tuple[int], int]]:
"""Request what to scoop or whom to pass in the given step of the turn. In each turn the simulator calls this serve function multiple times for each step for a single player, until the player has scooped 24 units of ice-cream or asked to pass to next player or made an invalid request. If you have scooped 24 units of ice-cream in a turn then you get one last step in that turn where you can specify to pass to a player.
def serve(self, top_layer: np.ndarray, curr_level: np.ndarray, player_idx: int,
get_flavors: Callable[[], List[int]],
get_player_count: Callable[[], int], get_served: Callable[[], List[Dict[int, int]]],
get_turns_received: Callable[[], List[int]]) -> Dict[str, Union[Tuple[int], int]]:
"""Request what to scoop or whom to pass in the given step of the turn.
In each turn the simulator calls this serve function multiple times for
each step for a single player, until the player has scooped 24 units of
ice-cream or asked to pass to next player or made an invalid request.
If you have scooped 24 units of ice-cream in a turn then you get one
last step in that turn where you can specify to pass to a player.
Args:
top_layer (np.ndarray): Numpy 2d array of size (24, 15) containing flavor at each cell location
curr_level (np.ndarray): Numpy 2d array of size (24, 15) containing current level at each cell location from 8 to 0, where 8 is highest level at start and 0 means no icecream left at this level
top_layer (np.ndarray): Numpy 2d array of size (24, 15) containing
flavor at each cell location
curr_level (np.ndarray): Numpy 2d array of size (24, 15) containing
current level at each cell location from 8 to 0, where 8 is highest
level at start and 0 means no icecream left at this level
player_idx (int): index of your player, 0-indexed
get_flavors (Callable[[], List[int]]): method which returns a list of all possible flavors
get_flavors (Callable[[], List[int]]): method which returns a list of all
possible flavors
get_player_count (Callable[[], int]): method which returns number of total players
get_served (Callable[[], List[Dict[int, int]]]): method which returns a list of dictionaries corresponding to each player, each dictionary at index i tells how units of a flavor are present in the bowl of the player with index i. E.g. lets say the fourth element is {1: 0, 2: 8...} means the corresponding player with index 4 has 0 units of flavor 1 and 8 units of flavor
get_turns_received (Callable[[], List[int]]): method which returns a list of integers corresponding to each player, each element at index i tells how many turns a player with index i has played so far.
get_served (Callable[[], List[Dict[int, int]]]): method which returns a list of dictionaries
corresponding to each player, each dictionary at index i tells how
units of a flavor are present in the bowl of the player with index
i. E.g. lets say the fourth element is {1: 0, 2: 8...} means the
corresponding player with index 4 has 0 units of flavor 1 and 8
units of flavor
get_turns_received (Callable[[], List[int]]):
method which returns a list of integers corresponding to each
player, each element at index i tells how many turns a player with
index i has played so far.
Returns:
Dict[str, Union[Tuple[int],int]]: Return a dictionary specifying what action to take in the next step.
Dict[str, Union[Tuple[int],int]]: Return a dictionary specifying
what action to take in the next step.
2 possible return values
{"action": "scoop", "values" : (i,j)} stating to scoop the 4 cells with index (i,j), (i+1,j), (i,j+1), (i+1,j+1)
{"action": "pass", "values" : i} pass to next player with index i
{"action": "scoop", "values" : (i,j)}
stating to scoop the 4 cells with index (i,j), (i+1,j), (i,j+1), (i+1,j+1)
{"action": "pass", "values" : i} pass to next player with index i
"""
if self.state == None:
self.state = 0
# build priority queue
p_queue = []
for x in range(0, top_layer.shape[0]-1):
for y in range(0, top_layer.shape[1]-1):
if self.valid_scoop(curr_level, x, y):
p_queue.append(self.scoop_value(top_layer, curr_level, x, y))
p_queue.sort(reverse=True)
p_queue = self.score_available_scoops(self.flavor_preference, top_layer, curr_level)
# if there is still more ice-cream to take, make a scoop
if self.state < 24:
if self.state['current_turn_served'] < 24:
action = "scoop"
value, (x, y), units = p_queue.pop(0)
self.state += units
value, (x, y), units = p_queue.pop()
self.state['current_turn_served'] += units
values = (x, y)
else:
self.state = 0
self.state['current_turn_served'] = 0
other_player_list = list(range(0, get_player_count()))
other_player_list.remove(player_idx)
next_player = other_player_list[self.rng.integers(0, len(other_player_list))]
action = "pass"
values = next_player
return {"action": action, "values": values}

# get knowledge of other players
pref_ranking = self.guess_player_pref_from_bowl(0, get_served(), get_flavors()) # [0] - favorite
added_flavors = self.diff_served(get_served(), self.state['current_served'])

# update current served
self.state['current_served'] = get_served()

return {"action": action, "values": values}

@staticmethod
def guess_player_pref_from_bowl(player, current_served, flavors):
"""
Sort the contents of player's current_served bowl and generate a preference ranking accordingly.
:param player: target player.
:param current_served: player's bowl.
:param flavors: all flavors.
:return: a list of flavor rankings. Preferred flavors are ranked toward the front.
"""
bowl = current_served[player]
flavors_in_bowl, amounts_in_bowl = [], []
for key in bowl.keys():
if bowl[key] > 0:
flavors_in_bowl.append(key)
amounts_in_bowl.append(bowl[key])
amount_indices = np.argsort(amounts_in_bowl)[::-1]
pref_ranking = [flavors_in_bowl[i] for i in amount_indices]

# TODO what if some flavor(s) didn't appear in the bowl? Do we pad the ranking list at the end?
# pref_ranking += [-1] * (len(flavors) - len(pref_ranking))

return pref_ranking

def guess_player_dislikes_from_choice(self, player, added_flavors, top_layer_before_serving):
"""
Guess the flavors that player probably dislikes based on the top_layer situation before they serve themselves,
and what flavors they end up choosing. Can only be used when we pass the container directly to player.
:param player: the player to be guessed on.
:param added_flavors: output of diff_served().
:param top_layer_before_serving: visible flavors on the top layer when we pass the container to player.
:return: a list of flavors that the player probably doesn't like. Currently the strategy is to choose flavors
that are on top_layer but the player doesn't end up choosing.
TODO: this function needs to be called one turn after we pass the container to player, which is impossible.
Currently it's not being used but we may use the idea later.
"""
if len(added_flavors[player]) == 0:
print("WARNING: No added flavor info for this player!")
return None
dislikes = []
top_layer_summed = self.sum_top_layer(top_layer_before_serving)
top_layer_flavors = top_layer_summed.keys()
for flavor in top_layer_flavors:
if int(flavor) not in added_flavors[player]:
dislikes.append(flavor)

return dislikes

@staticmethod
def is_valid_next_player(turns_received, next_player):
"""
Checks if next_player is a valid choice for passing the container to for this turn.
:param turns_received: output of get_turns_received().
:param next_player: player in question.
:return: True if next_player is a valid choice.
"""
if np.amin(turns_received) < turns_received[next_player]:
return False
return True

@staticmethod
def diff_served(new_served, current_served=None):
"""
Compare new_served against current_served and output changes in the container.
:param new_served: new container situation.
:param current_served: current container situation. None if new_served is from the first turn.
:return: changed_flavors[player_i] contains a list of flavors that player_i has newly added to
their bowl since current_served.
TODO: Take the amount into consideration?
"""
changed_flavors = []

for p_id in range(len(new_served)):
changed_flavors.append([])
for flavor in new_served[p_id].keys():
if current_served:
if new_served[p_id][flavor] > current_served[p_id][flavor]:
changed_flavors[p_id].append(flavor)
else:
if new_served[p_id][flavor] > 0:
changed_flavors[p_id].append(flavor)

return changed_flavors

@staticmethod
def sum_top_layer(top_layer):
"""
Summarize top_layer information by flavor.
:param top_layer: visible flavors.
:return: top_layer_summed['flavor_i'] is the amount of flavor_i currently visible on top_layer.
TODO: come up with other ways to convert the top layer flavor info into useful information.
"""
top_layer_summed = {}
for i in range(np.shape(top_layer)[0]):
for j in range(np.shape(top_layer)[1]):
if str(top_layer[i][j]) not in top_layer_summed:
top_layer_summed[str(top_layer[i][j])] = 0
top_layer_summed[str(top_layer[i][j])] += 1

return top_layer_summed

0 comments on commit 31ddc40

Please sign in to comment.