From 923782a9255490880fcb76fabe53dc18c59cfeaa Mon Sep 17 00:00:00 2001 From: perib Date: Wed, 11 Sep 2024 13:25:50 -0700 Subject: [PATCH 1/2] added fixed bins, fixed bugs --- tpot2/selectors/map_elites_selection.py | 77 ++++++++++++++++--------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/tpot2/selectors/map_elites_selection.py b/tpot2/selectors/map_elites_selection.py index 27ac6156..c3589801 100644 --- a/tpot2/selectors/map_elites_selection.py +++ b/tpot2/selectors/map_elites_selection.py @@ -1,56 +1,63 @@ import numpy as np #TODO make these functions take in a predetermined set of bins rather than calculating a new set each time -def create_nd_matrix(matrix, k): +def create_nd_matrix(matrix, grid_steps=None, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") + # Extract scores and features - scores = [row[0] for row in matrix] - features = [row[1:] for row in matrix] + scores = matrix[:, 0] + features = matrix[:, 1:] # Determine the min and max of each feature min_vals = np.min(features, axis=0) max_vals = np.max(features, axis=0) # Create bins for each feature - bins = [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))] + if bins is None: + bins = [np.linspace(min_vals[i], max_vals[i], grid_steps) for i in range(len(min_vals))] # Initialize n-dimensional matrix with negative infinity - nd_matrix = np.full([k-1]*len(min_vals), {"score": -np.inf, "idx": None}) - + nd_matrix = np.full([len(b)+1 for b in bins], {"score": -np.inf, "idx": None}) # Fill in each cell with the highest score for that cell for idx, (score, feature) in enumerate(zip(scores, features)): - indices = [np.digitize(f, bin)-1 for f, bin in zip(feature, bins)] - - indices = [min(i, k-2) for i in indices] #the last bin is inclusive - + indices = [np.digitize(f, bin) for f, bin in zip(feature, bins)] cur_score = nd_matrix[tuple(indices)]["score"] if score > cur_score: nd_matrix[tuple(indices)] = {"score": score, "idx": idx} - return nd_matrix def manhattan(a, b): return sum(abs(val1-val2) for val1, val2 in zip(a,b)) +def map_elites_survival_selector(scores, k=None, rng=None, grid_steps= 10, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") -def map_elites_survival_selector(scores, k, rng=None, grid_steps= 10): rng = np.random.default_rng(rng) scores = np.array(scores) #create grid - matrix = create_nd_matrix(scores, grid_steps) + matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins) matrix = matrix.flatten() indexes = [cell["idx"] for cell in matrix if cell["idx"] is not None] return np.unique(indexes) -def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_distance = 2, n_parents=1,): +def map_elites_parent_selector(scores, k, rng=None, manhattan_distance = 2, n_parents=1, grid_steps= 10, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") + rng = np.random.default_rng(rng) scores = np.array(scores) #create grid - matrix = create_nd_matrix(scores, grid_steps) + matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins) #return true if cell is not empty f = np.vectorize(lambda x: x["idx"] is not None) @@ -60,8 +67,6 @@ def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_d idxes = [idx for idx in idx_to_coordinates.keys()] #all the indexes of best score per cell - - distance_matrix = np.zeros((len(idxes), len(idxes))) for i, idx1 in enumerate(idxes): @@ -87,17 +92,37 @@ def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_d candidates = candidates[candidates != dm_idx] manhattan_distance += 1 - if manhattan_distance > grid_steps*scores.shape[1]: + if manhattan_distance > np.max(distance_matrix): break if len(candidates) == 0: - parents.append([idx]) + parents.append([idx, idx]) #if no other parents are found, select the same parent twice. weird to crossover with itself though + else: + this_parents = [idx] + for p in range(n_parents-1): + idx2_cords = rng.choice(candidates) + this_parents.append(idxes[idx2_cords]) + parents.append(this_parents) - this_parents = [idx] - for p in range(n_parents-1): - idx2_cords = rng.choice(candidates) - this_parents.append(idxes[idx2_cords]) + return np.array(parents) - parents.append(this_parents) - - return np.array(parents) \ No newline at end of file + +def get_bins_quantiles(arr, k=None, q=None): + bins = [] + + if q is not None and k is not None: + raise ValueError("Only one of k or q can be specified") + + if q is not None: + final_q = q + elif k is not None: + final_q = np.linspace(0, 1, k) + + for i in range(arr.shape[1]): + bins.append(np.quantile(arr[:,i], final_q)) + return bins + +def get_bins(arr, k): + min_vals = np.min(arr, axis=0) + max_vals = np.max(arr, axis=0) + [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))] \ No newline at end of file From bcc890e5f827c0510e0963e3d6f78fce4bf92625 Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 12 Sep 2024 15:17:58 -0700 Subject: [PATCH 2/2] fix reproducibility bug --- tpot2/search_spaces/pipelines/choice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py index 25051aa0..af1a7a4d 100644 --- a/tpot2/search_spaces/pipelines/choice.py +++ b/tpot2/search_spaces/pipelines/choice.py @@ -12,7 +12,7 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) - super().__init__() self.search_spaces = search_spaces - self.node = np.random.default_rng(rng).choice(self.search_spaces).generate() + self.node = np.random.default_rng(rng).choice(self.search_spaces).generate(rng=rng) def mutate(self, rng=None): @@ -23,7 +23,7 @@ def mutate(self, rng=None): return self._mutate_node(rng) def _mutate_select_new_node(self, rng=None): - self.node = random.choice(self.search_spaces).generate() + self.node = random.choice(self.search_spaces).generate(rng=rng) return True def _mutate_node(self, rng=None):