From 923782a9255490880fcb76fabe53dc18c59cfeaa Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Wed, 11 Sep 2024 13:25:50 -0700
Subject: [PATCH 1/2] added fixed bins, fixed bugs

---
 tpot2/selectors/map_elites_selection.py | 77 ++++++++++++++++---------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/tpot2/selectors/map_elites_selection.py b/tpot2/selectors/map_elites_selection.py
index 27ac6156..c3589801 100644
--- a/tpot2/selectors/map_elites_selection.py
+++ b/tpot2/selectors/map_elites_selection.py
@@ -1,56 +1,63 @@
 import numpy as np
 #TODO make these functions take in a predetermined set of bins rather than calculating a new set each time
 
-def create_nd_matrix(matrix, k):
+def create_nd_matrix(matrix, grid_steps=None, bins=None):
+
+    if grid_steps is not None and bins is not None:
+        raise ValueError("Either grid_steps or bins must be provided but not both")
+
     # Extract scores and features
-    scores = [row[0] for row in matrix]
-    features = [row[1:] for row in matrix]
+    scores = matrix[:, 0]
+    features = matrix[:, 1:]
 
     # Determine the min and max of each feature
     min_vals = np.min(features, axis=0)
     max_vals = np.max(features, axis=0)
 
     # Create bins for each feature
-    bins = [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))]
+    if bins is None:
+        bins = [np.linspace(min_vals[i], max_vals[i], grid_steps) for i in range(len(min_vals))]
 
     # Initialize n-dimensional matrix with negative infinity
-    nd_matrix = np.full([k-1]*len(min_vals), {"score": -np.inf, "idx": None})
-
+    nd_matrix = np.full([len(b)+1 for b in bins], {"score": -np.inf, "idx": None})
     # Fill in each cell with the highest score for that cell
     for idx, (score, feature) in enumerate(zip(scores, features)):
-        indices = [np.digitize(f, bin)-1 for f, bin in zip(feature, bins)]
-        
-        indices = [min(i, k-2) for i in indices] #the last bin is inclusive
-        
+        indices = [np.digitize(f, bin) for f, bin in zip(feature, bins)]
         cur_score = nd_matrix[tuple(indices)]["score"]
         if score > cur_score:
             nd_matrix[tuple(indices)] = {"score": score, "idx": idx}
 
-
     return nd_matrix
 
 def manhattan(a, b):
     return sum(abs(val1-val2) for val1, val2 in zip(a,b))
 
+def map_elites_survival_selector(scores,  k=None, rng=None, grid_steps= 10, bins=None):
+
+    if grid_steps is not None and bins is not None:
+        raise ValueError("Either grid_steps or bins must be provided but not both")
 
-def map_elites_survival_selector(scores,  k, rng=None, grid_steps= 10):
     rng = np.random.default_rng(rng)
     scores = np.array(scores)
     #create grid
     
-    matrix = create_nd_matrix(scores, grid_steps)
+    matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins)
     matrix = matrix.flatten()
 
     indexes =  [cell["idx"] for cell in matrix if cell["idx"] is not None]
 
     return np.unique(indexes)
 
-def map_elites_parent_selector(scores,  k, rng=None, grid_steps= 10, manhattan_distance = 2, n_parents=1,):
+def map_elites_parent_selector(scores,  k, rng=None, manhattan_distance = 2, n_parents=1, grid_steps= 10, bins=None):
+    
+    if grid_steps is not None and bins is not None:
+        raise ValueError("Either grid_steps or bins must be provided but not both")
+    
     rng = np.random.default_rng(rng)
     scores = np.array(scores)
     #create grid
     
-    matrix = create_nd_matrix(scores, grid_steps)
+    matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins)
     
     #return true if cell is not empty
     f = np.vectorize(lambda x: x["idx"] is not None)
@@ -60,8 +67,6 @@ def map_elites_parent_selector(scores,  k, rng=None, grid_steps= 10, manhattan_d
 
     idxes = [idx for idx in idx_to_coordinates.keys()] #all the indexes of best score per cell
 
-    
-
     distance_matrix = np.zeros((len(idxes), len(idxes)))
 
     for i, idx1 in enumerate(idxes):
@@ -87,17 +92,37 @@ def map_elites_parent_selector(scores,  k, rng=None, grid_steps= 10, manhattan_d
             candidates = candidates[candidates != dm_idx]
             manhattan_distance += 1
 
-            if manhattan_distance > grid_steps*scores.shape[1]:
+            if manhattan_distance > np.max(distance_matrix):
                 break
         
         if len(candidates) == 0:
-            parents.append([idx])
+            parents.append([idx, idx]) #if no other parents are found, select the same parent twice. weird to crossover with itself though
+        else:
+            this_parents = [idx]
+            for p in range(n_parents-1):
+                idx2_cords = rng.choice(candidates)
+                this_parents.append(idxes[idx2_cords])
+            parents.append(this_parents)
         
-        this_parents = [idx]
-        for p in range(n_parents-1):
-            idx2_cords = rng.choice(candidates)
-            this_parents.append(idxes[idx2_cords])
+    return np.array(parents)
 
-        parents.append(this_parents)
-        
-    return np.array(parents)
\ No newline at end of file
+
+def get_bins_quantiles(arr, k=None, q=None):
+    bins = []
+
+    if q is not None and k is not None:
+        raise ValueError("Only one of k or q can be specified")
+
+    if q is not None:
+        final_q = q
+    elif k is not None:
+        final_q = np.linspace(0, 1, k)
+
+    for i in range(arr.shape[1]):
+        bins.append(np.quantile(arr[:,i], final_q))
+    return bins
+
+def get_bins(arr, k):
+    min_vals = np.min(arr, axis=0)
+    max_vals = np.max(arr, axis=0)
+    [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))]
\ No newline at end of file

From bcc890e5f827c0510e0963e3d6f78fce4bf92625 Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Thu, 12 Sep 2024 15:17:58 -0700
Subject: [PATCH 2/2] fix reproducibility bug

---
 tpot2/search_spaces/pipelines/choice.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py
index 25051aa0..af1a7a4d 100644
--- a/tpot2/search_spaces/pipelines/choice.py
+++ b/tpot2/search_spaces/pipelines/choice.py
@@ -12,7 +12,7 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -
         super().__init__()
         
         self.search_spaces = search_spaces
-        self.node = np.random.default_rng(rng).choice(self.search_spaces).generate()
+        self.node = np.random.default_rng(rng).choice(self.search_spaces).generate(rng=rng)
         
 
     def mutate(self, rng=None):
@@ -23,7 +23,7 @@ def mutate(self, rng=None):
             return self._mutate_node(rng)
     
     def _mutate_select_new_node(self, rng=None):
-        self.node = random.choice(self.search_spaces).generate()
+        self.node = random.choice(self.search_spaces).generate(rng=rng)
         return True
     
     def _mutate_node(self, rng=None):