diff --git a/players/g4_player.py b/players/g4_player.py index ce8a2a9..1e62d12 100644 --- a/players/g4_player.py +++ b/players/g4_player.py @@ -10,80 +10,234 @@ def __init__(self, flavor_preference: List[int], rng: np.random.Generator, logge """Initialise the player with given preference. Args: - flavor_preference (List[int]): flavor preference, most flavored flavor is first element in the list and last element is least preferred flavor - rng (np.random.Generator): numpy random number generator, use this for same player behvior across run + flavor_preference (List[int]): flavor preference, most flavored + flavor is first element in the list and last element is least + preferred flavor + + rng (np.random.Generator): numpy random number + generator, use this for same player behavior across run + logger (logging.Logger): logger use this like logger.info("message") """ self.flavor_preference = flavor_preference self.rng = rng self.logger = logger - self.state = None + self.state = { + 'current_served': None, + # Number of scoops served to ourselves in the current turn + 'current_turn_served': 0 + } - def valid_scoop(self, curr_level, x, y): + @staticmethod + def valid_scoop(curr_level, x, y): """Helper function: returns whether a scoop at an index x,y is valid or not""" d = curr_level[x,y] if curr_level[x+1,y] <= d and curr_level[x,y+1] <= d and curr_level[x+1,y+1] <= d: return True return False - def scoop_value(self, top_layer, curr_level, x, y): + @staticmethod + def scoop_value(flavor_preference, top_layer, curr_level, x, y): """Helper function: returns the value the player gets for a scoop at index x,y""" d = curr_level[x, y] if d >= 0: units = 1 - flav_total = len(self.flavor_preference) - self.flavor_preference.index(top_layer[x,y]) + 1 + flav_total = len(flavor_preference) - flavor_preference.index(top_layer[x,y]) + 1 if curr_level[x+1, y] == d: - flav_total += len(self.flavor_preference) - self.flavor_preference.index(top_layer[x+1,y]) + 1 + flav_total += len(flavor_preference) - flavor_preference.index(top_layer[x+1,y]) + 1 units += 1 if curr_level[x, y+1] == d: - flav_total += len(self.flavor_preference) - self.flavor_preference.index(top_layer[x,y+1]) + 1 + flav_total += len(flavor_preference) - flavor_preference.index(top_layer[x,y+1]) + 1 units += 1 if curr_level[x+1, y+1] == d: - flav_total += len(self.flavor_preference) - self.flavor_preference.index(top_layer[x+1,y+1]) + 1 + flav_total += len(flavor_preference) - flavor_preference.index(top_layer[x+1,y+1]) + 1 units += 1 return (flav_total, (x, y), units) else: return (0, (x,y), 0) + @staticmethod + def score_available_scoops(flavor_preference, top_layer, curr_level): + p_queue = [] + # Subtract one from length since 2x2 "spoon" must remain in container + for x in range(0, top_layer.shape[0]-1): + for y in range(0, top_layer.shape[1]-1): + if Player.valid_scoop(curr_level, x, y): + p_queue.append(Player.scoop_value(flavor_preference, top_layer, curr_level, x, y)) + # TODO (etm): If we care, we can use an actual heap / priority queue + p_queue.sort() + return p_queue - def serve(self, top_layer: np.ndarray, curr_level: np.ndarray, player_idx: int, get_flavors: Callable[[], List[int]], get_player_count: Callable[[], int], get_served: Callable[[], List[Dict[int, int]]], get_turns_received: Callable[[], List[int]]) -> Dict[str, Union[Tuple[int], int]]: - """Request what to scoop or whom to pass in the given step of the turn. In each turn the simulator calls this serve function multiple times for each step for a single player, until the player has scooped 24 units of ice-cream or asked to pass to next player or made an invalid request. If you have scooped 24 units of ice-cream in a turn then you get one last step in that turn where you can specify to pass to a player. + def serve(self, top_layer: np.ndarray, curr_level: np.ndarray, player_idx: int, + get_flavors: Callable[[], List[int]], + get_player_count: Callable[[], int], get_served: Callable[[], List[Dict[int, int]]], + get_turns_received: Callable[[], List[int]]) -> Dict[str, Union[Tuple[int], int]]: + """Request what to scoop or whom to pass in the given step of the turn. + In each turn the simulator calls this serve function multiple times for + each step for a single player, until the player has scooped 24 units of + ice-cream or asked to pass to next player or made an invalid request. + If you have scooped 24 units of ice-cream in a turn then you get one + last step in that turn where you can specify to pass to a player. Args: - top_layer (np.ndarray): Numpy 2d array of size (24, 15) containing flavor at each cell location - curr_level (np.ndarray): Numpy 2d array of size (24, 15) containing current level at each cell location from 8 to 0, where 8 is highest level at start and 0 means no icecream left at this level + top_layer (np.ndarray): Numpy 2d array of size (24, 15) containing + flavor at each cell location + + curr_level (np.ndarray): Numpy 2d array of size (24, 15) containing + current level at each cell location from 8 to 0, where 8 is highest + level at start and 0 means no icecream left at this level + player_idx (int): index of your player, 0-indexed - get_flavors (Callable[[], List[int]]): method which returns a list of all possible flavors + + get_flavors (Callable[[], List[int]]): method which returns a list of all + possible flavors + get_player_count (Callable[[], int]): method which returns number of total players - get_served (Callable[[], List[Dict[int, int]]]): method which returns a list of dictionaries corresponding to each player, each dictionary at index i tells how units of a flavor are present in the bowl of the player with index i. E.g. lets say the fourth element is {1: 0, 2: 8...} means the corresponding player with index 4 has 0 units of flavor 1 and 8 units of flavor - get_turns_received (Callable[[], List[int]]): method which returns a list of integers corresponding to each player, each element at index i tells how many turns a player with index i has played so far. + + get_served (Callable[[], List[Dict[int, int]]]): method which returns a list of dictionaries + corresponding to each player, each dictionary at index i tells how + units of a flavor are present in the bowl of the player with index + i. E.g. lets say the fourth element is {1: 0, 2: 8...} means the + corresponding player with index 4 has 0 units of flavor 1 and 8 + units of flavor + + get_turns_received (Callable[[], List[int]]): + method which returns a list of integers corresponding to each + player, each element at index i tells how many turns a player with + index i has played so far. Returns: - Dict[str, Union[Tuple[int],int]]: Return a dictionary specifying what action to take in the next step. + Dict[str, Union[Tuple[int],int]]: Return a dictionary specifying + what action to take in the next step. + 2 possible return values - {"action": "scoop", "values" : (i,j)} stating to scoop the 4 cells with index (i,j), (i+1,j), (i,j+1), (i+1,j+1) - {"action": "pass", "values" : i} pass to next player with index i + {"action": "scoop", "values" : (i,j)} + stating to scoop the 4 cells with index (i,j), (i+1,j), (i,j+1), (i+1,j+1) + + {"action": "pass", "values" : i} pass to next player with index i """ - if self.state == None: - self.state = 0 # build priority queue - p_queue = [] - for x in range(0, top_layer.shape[0]-1): - for y in range(0, top_layer.shape[1]-1): - if self.valid_scoop(curr_level, x, y): - p_queue.append(self.scoop_value(top_layer, curr_level, x, y)) - p_queue.sort(reverse=True) + p_queue = self.score_available_scoops(self.flavor_preference, top_layer, curr_level) # if there is still more ice-cream to take, make a scoop - if self.state < 24: + if self.state['current_turn_served'] < 24: action = "scoop" - value, (x, y), units = p_queue.pop(0) - self.state += units + value, (x, y), units = p_queue.pop() + self.state['current_turn_served'] += units values = (x, y) else: - self.state = 0 + self.state['current_turn_served'] = 0 other_player_list = list(range(0, get_player_count())) other_player_list.remove(player_idx) next_player = other_player_list[self.rng.integers(0, len(other_player_list))] action = "pass" values = next_player - return {"action": action, "values": values} + + # get knowledge of other players + pref_ranking = self.guess_player_pref_from_bowl(0, get_served(), get_flavors()) # [0] - favorite + added_flavors = self.diff_served(get_served(), self.state['current_served']) + + # update current served + self.state['current_served'] = get_served() + + return {"action": action, "values": values} + + @staticmethod + def guess_player_pref_from_bowl(player, current_served, flavors): + """ + Sort the contents of player's current_served bowl and generate a preference ranking accordingly. + :param player: target player. + :param current_served: player's bowl. + :param flavors: all flavors. + :return: a list of flavor rankings. Preferred flavors are ranked toward the front. + """ + bowl = current_served[player] + flavors_in_bowl, amounts_in_bowl = [], [] + for key in bowl.keys(): + if bowl[key] > 0: + flavors_in_bowl.append(key) + amounts_in_bowl.append(bowl[key]) + amount_indices = np.argsort(amounts_in_bowl)[::-1] + pref_ranking = [flavors_in_bowl[i] for i in amount_indices] + + # TODO what if some flavor(s) didn't appear in the bowl? Do we pad the ranking list at the end? + # pref_ranking += [-1] * (len(flavors) - len(pref_ranking)) + + return pref_ranking + + def guess_player_dislikes_from_choice(self, player, added_flavors, top_layer_before_serving): + """ + Guess the flavors that player probably dislikes based on the top_layer situation before they serve themselves, + and what flavors they end up choosing. Can only be used when we pass the container directly to player. + :param player: the player to be guessed on. + :param added_flavors: output of diff_served(). + :param top_layer_before_serving: visible flavors on the top layer when we pass the container to player. + :return: a list of flavors that the player probably doesn't like. Currently the strategy is to choose flavors + that are on top_layer but the player doesn't end up choosing. + TODO: this function needs to be called one turn after we pass the container to player, which is impossible. + Currently it's not being used but we may use the idea later. + """ + if len(added_flavors[player]) == 0: + print("WARNING: No added flavor info for this player!") + return None + dislikes = [] + top_layer_summed = self.sum_top_layer(top_layer_before_serving) + top_layer_flavors = top_layer_summed.keys() + for flavor in top_layer_flavors: + if int(flavor) not in added_flavors[player]: + dislikes.append(flavor) + + return dislikes + + @staticmethod + def is_valid_next_player(turns_received, next_player): + """ + Checks if next_player is a valid choice for passing the container to for this turn. + :param turns_received: output of get_turns_received(). + :param next_player: player in question. + :return: True if next_player is a valid choice. + """ + if np.amin(turns_received) < turns_received[next_player]: + return False + return True + + @staticmethod + def diff_served(new_served, current_served=None): + """ + Compare new_served against current_served and output changes in the container. + :param new_served: new container situation. + :param current_served: current container situation. None if new_served is from the first turn. + :return: changed_flavors[player_i] contains a list of flavors that player_i has newly added to + their bowl since current_served. + TODO: Take the amount into consideration? + """ + changed_flavors = [] + + for p_id in range(len(new_served)): + changed_flavors.append([]) + for flavor in new_served[p_id].keys(): + if current_served: + if new_served[p_id][flavor] > current_served[p_id][flavor]: + changed_flavors[p_id].append(flavor) + else: + if new_served[p_id][flavor] > 0: + changed_flavors[p_id].append(flavor) + + return changed_flavors + + @staticmethod + def sum_top_layer(top_layer): + """ + Summarize top_layer information by flavor. + :param top_layer: visible flavors. + :return: top_layer_summed['flavor_i'] is the amount of flavor_i currently visible on top_layer. + TODO: come up with other ways to convert the top layer flavor info into useful information. + """ + top_layer_summed = {} + for i in range(np.shape(top_layer)[0]): + for j in range(np.shape(top_layer)[1]): + if str(top_layer[i][j]) not in top_layer_summed: + top_layer_summed[str(top_layer[i][j])] = 0 + top_layer_summed[str(top_layer[i][j])] += 1 + + return top_layer_summed +