Fixed campaign.analysis and supporting plots

MSDLLCpapers · Aug 13, 2024 · bad05c9 · bad05c9
1 parent b628fc8
commit bad05c9
Show file tree

Hide file tree

Showing 4 changed files with 198 additions and 127 deletions.
diff --git a/obsidian/campaign/analysis.py b/obsidian/campaign/analysis.py
@@ -1,112 +1,24 @@
 "Analysis utility functions for examining metrics over the context of an optimization campaign"
 
 from obsidian.parameters import Param_Continuous
-
+from obsidian.optimizer import Optimizer
 import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
-
-
-def plot_ofat_ranges(optimizer, ofat_ranges):
-    """
-    Plots each parameter's 1D OFAT acceptable range
-
-    Args:
-        optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data
-            and can be used to make predictions.
-        ofat_ranges (pd.DataFrame): A DataFrame containing the acceptable range values for each parameter.
-
-    Returns:
-        fig (matplotlib.figure.Figure): The parameter OFAT acceptable-range plot
-    """
-
-    fig = plt.figure(figsize=(8, 4))
-    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
-    for i, (index, row) in enumerate(ofat_ranges.iloc[0:10, :].iterrows()):
-        color = colors[i]
-
-        plt.plot([index, index], [row['Min_LB'], row['Max_LB']],
-                 linewidth=6, linestyle='solid', color=color, label='High Confidence' if i == 0 else None)
-        if row['Min_LB'] > row['Min_Mu']:
-            plt.annotate(
-                f'{(row["Min_LB"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
-                xy=(i, row['Min_LB']), xytext=(i + 0.25, row['Min_LB']),
-                fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
-        if row['Max_LB'] < row['Max_Mu']:
-            plt.annotate(
-                f'{(row["Max_LB"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
-                xy=(i, row['Max_LB']), xytext=(i + 0.25, row['Max_LB']),
-                fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
-
-        plt.plot([index, index], [row['Min_Mu'], row['Max_Mu']], linewidth=3,
-                 linestyle='solid', color=color, label='Average' if i == 0 else None)
-        plt.annotate(
-            f'{(row["Min_Mu"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
-            xy=(i, row['Min_Mu']), xytext=(i + 0.25, row['Min_Mu']),
-            fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
-        plt.annotate(
-            f'{(row["Max_Mu"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
-            xy=(i, row['Max_Mu']), xytext=(i + 0.25, row['Max_Mu']),
-            fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
-
-        if row['Min_UB'] < row['Min_Mu']:
-            plt.plot([index, index], [row['Min_UB'], row['Min_Mu']],  linewidth=1, linestyle=':', color=color)
-        if row['Max_UB'] > row['Max_Mu']:
-            plt.plot([index, index], [row['Max_UB'], row['Max_Mu']],  linewidth=1, linestyle=':', color=color)
-        plt.plot([0], [0], linewidth=1, linestyle=':', color=color, label='Low Confidence' if i == 0 else None)
-
-    plt.xticks(rotation=90)
-    plt.ylabel('Parameter Value (Scaled)')
-    plt.ylim([-0.15, 1.15])
-    plt.xlim([-1, i+1])
-    plt.title(f'Univeriate Range (OFAT) Estimates from APO Model \n Ranges exceeding {row["Threshold"]} {row["Response"]}',
-              fontsize=10)
-    plt.legend(bbox_to_anchor=(1.1, 1.05))
-
-    return fig
-
-
-def plot_interactions(optimizer, cor, clamp=False):
-    """
-    Plots the parameter interaction matrix
 
-    Args:
-        optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data
-            and can be used to make predictions.
-        cor (np.ndarray): The correlation matrix representing the parameter interactions.
-        clamp (bool, optional): Whether to clamp the colorbar range to (0, 1). Defaults to ``False``.
 
-    Returns:
-        matplotlib.figure.Figure: The parameter interaction plot
-    """
-    fig = plt.figure(figsize=(5, 5))
-    ax = fig.gca()
-    cax = ax.matshow(cor)
-    if clamp:
-        cax.set_clim(0, 1)
-    axis = np.arange(len(optimizer.X_space.X_names))
-    names = optimizer.X_space.X_names
-    ax.set_xticks(axis)
-    ax.set_xticklabels(names, rotation=90)
-    ax.set_yticks(axis)
-    ax.set_yticklabels(names, rotation=0)
-    cbar = fig.colorbar(cax)
-    ax.set_title('Parameter Interactions')
-    cbar.ax.set_ylabel('Range Shrinkage')
-    for (i, j), z in np.ndenumerate(cor):
-        if z > 0.05:
-            ax.text(j, i, '{:0.2f}'.format(z), ha='center', va='center', fontsize=8)
-    return fig
-
-
-def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7,
-                     steps=100, response_id=0, calc_interacts=True):
+def calc_ofat_ranges(optimizer: Optimizer,
+                     threshold: float,
+                     X_ref: pd.DataFrame | pd.Series | None = None,
+                     PI_range: float = 0.95,
+                     steps: int = 100,
+                     response_id: int = 0,
+                     calc_interacts: bool = True):
     """
     Calculates an OFAT design space using confidence bounds around the optimizer prediction. Also
     includes a matrix of interaction scores.
 
     Args:
-        optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data
+        optimizer (Optimizer): The optimizer object which contains a surrogate that has been fit to data
             and can be used to make predictions.
         X_ref (pd.DataFrame): The reference data point from which the OFAT variations are calculated.
         threshold (float): The response value threshold (minimum value) which would be considered passing for OFAT variations.
@@ -125,19 +37,22 @@ def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7,
             in comparison to the corresponding two independent 1-factor variations. As such, diagonal elements are 0.
     """
 
-    threshold = 0.4
     ofat_ranges = []
     response_name = optimizer.target[response_id].name
 
+    if X_ref is None:
+        X_ref = optimizer.X_space.mean()
+    if isinstance(X_ref, pd.Series):
+        X_ref = X_ref.to_frame().T
+
+    # Calculate 1D OFAT ranges
     for p in optimizer.X_space:
         if isinstance(p, Param_Continuous):
-            X_min = p.min
-            X_max = p.max
-            X_range = p.range
-            X_span = np.linspace(X_min, X_max, steps)
-
+
+            X_span = np.linspace(0, 1, steps)
             X_sim = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns)
-            X_sim[p.name] = X_span
+            X_sim[p.name] = p.unit_demap(X_span)
+
             df_pred = optimizer.predict(X_sim, PI_range=PI_range)
             lb = df_pred[response_name + ' lb']
             ub = df_pred[response_name + ' ub']
@@ -147,47 +62,59 @@ def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7,
             labels = ['Mu', 'LB', 'UB']
 
             for label, y in zip(labels, [pred_mu, lb, ub]):
-                pass_ids = np.where(pred_mu > threshold)
+                pass_ids = np.where(y > threshold)
                 pass_vals = X_sim[p.name].iloc[pass_ids]
 
-                row['Min_'+label] = (pass_vals.min()-X_min)/X_range
-                row['Max_'+label] = (pass_vals.max()-X_min)/X_range
+                row['Min_'+label] = p.encode(pass_vals.min())
+                row['Max_'+label] = p.encode(pass_vals.max())
             ofat_ranges.append(row)
 
     ofat_ranges = pd.DataFrame(ofat_ranges).set_index('Name')
 
+    # Calculate the correlation matrix as 2-FI range / diagional of 1-FI box
     if calc_interacts:
         cor = []
 
-        for i, pi in enumerate(optimizer.X_space.X_names):
+        # Calculate with a nested loop of parameters
+        for pi in optimizer.X_space:
             cor_j = []
 
-            Xi_pass_min = optimizer.X_space.X_min[pi] + optimizer.X_space.X_range[pi]*ofat_ranges['Min_Mu'][pi]
-            Xi_pass_max = optimizer.X_space.X_min[pi] + optimizer.X_space.X_range[pi]*ofat_ranges['Max_Mu'][pi]
-            Xi_pass_span = np.linspace(Xi_pass_min, Xi_pass_max, steps)
-
-            for j, pj in enumerate(optimizer.X_space.X_names):
-                Xj_pass_min = optimizer.X_space.X_min[pj] + optimizer.X_space.X_range[pj]*ofat_ranges['Min_Mu'][pj]
-                Xj_pass_max = optimizer.X_space.X_min[pj] + optimizer.X_space.X_range[pj]*ofat_ranges['Max_Mu'][pj]
-                Xj_pass_span = np.linspace(Xj_pass_min, Xj_pass_max, steps)
+            if np.isnan(ofat_ranges['Min_Mu'][pi.name]):
+                cor.append([np.nan]*len(optimizer.X_space))
+                continue
+
+            # Enumerate a grid over the passing range at the MEAN
+            Xi_pass_span = pi.unit_demap(np.linspace(ofat_ranges['Min_Mu'][pi.name],
+                                                     ofat_ranges['Max_Mu'][pi.name], steps))
 
-                X_sim_cor = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns)
+            for pj in optimizer.X_space:
 
-                X_sim_cor[pj] = Xj_pass_span
-                if not pi == pj:
-                    X_sim_cor[pi] = Xi_pass_span
+                if np.isnan(ofat_ranges['Min_Mu'][pj.name]):
+                    cor_j.append([np.nan]*len(optimizer.X_space))
+                    continue
+
+                Xj_pass_span = pi.unit_demap(np.linspace(ofat_ranges['Min_Mu'][pj.name],
+                                                         ofat_ranges['Max_Mu'][pj.name], steps))
+
+                # Set up a simulation dataframe where these parameters will co-vary
+                X_sim_cor = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns)
+                X_sim_cor[pj.name] = Xj_pass_span
+                X_sim_cor[pi.name] = Xi_pass_span
 
-                pred_mu_cor_all, _ = optimizer.predict(X_sim_cor)
+                # Predict the responses, and extract the target one
+                pred_mu_cor_all = optimizer.predict(X_sim_cor)
                 pred_mu_cor = pred_mu_cor_all.iloc[:, response_id]
                 cor_passing = np.where(pred_mu_cor > threshold)[0]
 
+                # Want to calculate the number of steps along the diagonal which pass
+                # A value of 0 for cor_j means that the two parameters are independent
                 if len(cor_passing) > 0:
                     start = cor_passing[0]
                     stop = cor_passing[-1]
-                    cor_ij = 1-(stop-start)/(steps-1)
-                    cor_j.append(cor_ij)
+                    pass_ij = (stop-start)/(steps-1)
                 else:
-                    cor_j.append(0)
+                    pass_ij = 0
+                cor_j.append(1 - pass_ij)
 
             cor.append(cor_j)
         cor = np.array(cor)

diff --git a/obsidian/plotting/mpl.py b/obsidian/plotting/mpl.py
@@ -1,8 +1,14 @@
+"""Matplotlib figure-generating functions"""
+
+from obsidian.campaign import Campaign
+from obsidian.optimizer import Optimizer
+
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 from matplotlib.pyplot import Figure
-from obsidian.campaign import Campaign
+
 import numpy as np
+import pandas as pd
 
 
 def visualize_inputs(campaign: Campaign) -> Figure:
@@ -41,3 +47,126 @@ def visualize_inputs(campaign: Campaign) -> Figure:
     plt.title('Correlation Plot')
 
     return fig
+
+
+def plot_ofat_ranges(optimizer: Optimizer,
+                     ofat_ranges: pd.DataFrame) -> Figure:
+    """
+    Plots each parameter's 1D OFAT acceptable range
+
+    Args:
+        optimizer (Optimizer): The optimizer object which contains a surrogate
+            that has been fit to data and can be used to make predictions.
+        ofat_ranges (pd.DataFrame): A DataFrame containing the acceptable range
+            values for each parameter, at the low bound, average, and high bound.
+
+    Returns:
+        Figure: The parameter OFAT acceptable-range plot
+    """
+
+    fig = plt.figure(figsize=(2*len(ofat_ranges), 4))
+    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
+
+    # Iterate over the parameteres
+    for i, (p_name, row) in enumerate(ofat_ranges.iterrows()):
+        color = colors[i]
+
+        # Plot as a bar chart; x-axis is the parameter name, y-axis is the scaled value
+        plt.plot([p_name, p_name], [row['Min_LB'], row['Max_LB']],
+                 linewidth=6, linestyle='solid', color=color, label='High Confidence' if i == 0 else None)
+
+        # If the edges of LB are too close to mean, only annotate LB (higher conf)
+        if row['Min_LB'] > row['Min_Mu']:
+            plt.annotate(
+                f'{(optimizer.X_space[i].unit_demap(row["Min_LB"])):.2f}',
+                xy=(i, row['Min_LB']), xytext=(i + 0.25, row['Min_LB']),
+                fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
+        if row['Max_LB'] < row['Max_Mu']:
+            plt.annotate(
+                f'{(optimizer.X_space[i].unit_demap(row["Max_LB"])):.2f}',
+                xy=(i, row['Max_LB']), xytext=(i + 0.25, row['Max_LB']),
+                fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
+
+        plt.plot([p_name, p_name], [row['Min_Mu'], row['Max_Mu']], linewidth=3,
+                 linestyle='solid', color=color, label='Average' if i == 0 else None)
+
+        # If the edges of the mean are too close to the UB, only annotate mean (higher conf)
+        plt.annotate(
+            f'{(optimizer.X_space[i].unit_demap(row["Min_Mu"])):.2f}',
+            xy=(i, row['Min_Mu']), xytext=(i + 0.25, row['Min_Mu']),
+            fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
+        plt.annotate(
+            f'{(optimizer.X_space[i].unit_demap(row["Max_Mu"])):.2f}',
+            xy=(i, row['Max_Mu']), xytext=(i + 0.25, row['Max_Mu']),
+            fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
+
+        # Only plot UB if it isn't already encompassed by higher-confidence ranges
+        if row['Min_UB'] < row['Min_Mu']:
+            plt.plot([p_name, p_name], [row['Min_UB'], row['Min_Mu']],  linewidth=1, linestyle=':', color=color)
+        if row['Max_UB'] > row['Max_Mu']:
+            plt.plot([p_name, p_name], [row['Max_UB'], row['Max_Mu']],  linewidth=1, linestyle=':', color=color)
+        plt.plot([0], [0], linewidth=1, linestyle=':', color=color, label='Low Confidence' if i == 0 else None)
+
+        # Never annotate UB (low confidence)
+
+    alpha = ofat_ranges['PI Range'].mode().iloc[0]
+    LCL = (1 - alpha) / 2
+    UCL = 1 - LCL
+
+    plt.xticks(rotation=90)
+    plt.ylabel('Parameter Value (Scaled)')
+    plt.ylim([-0.15, 1.15])
+    plt.xlim([-1, len(ofat_ranges)])
+    plt.title('Univariate Range (OFAT) Estimates from APO Model \n'
+              + f'Ranges Exceeding {row["Response"]} > {row["Threshold"]} \n'
+              + f'Confidence Range: {LCL*100:.1f} - {UCL*100:.1f}%',
+              fontsize=10)
+    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
+    plt.close(fig)
+
+    return fig
+
+
+def plot_interactions(optimizer: Optimizer,
+                      cor: np.ndarray,
+                      clamp: bool = False):
+    """
+    Plots the parameter interaction matrix
+
+    Args:
+        optimizer (ptimizer): The optimizer object which contains a surrogate
+            that has been fit to data and can be used to make predictions.
+        cor (np.ndarray): The correlation matrix representing the parameter interactions.
+        clamp (bool, optional): Whether to clamp the colorbar range to (0, 1).
+            Defaults to ``False``.
+
+    Returns:
+        Figure: The parameter interaction plot
+    """
+
+    fig = plt.figure(figsize=(4, 4))
+    ax = fig.gca()
+
+    # Use matrix imshow to plot correlation matrix
+    cax = ax.matshow(cor)
+    if clamp:
+        cax.set_clim(0, 1)
+
+    # Set axis labels and ticks
+    axis = np.arange(len(optimizer.X_space.X_names))
+    names = optimizer.X_space.X_names
+    ax.set_xticks(axis)
+    ax.set_xticklabels(names, rotation=90)
+    ax.set_yticks(axis)
+    ax.set_yticklabels(names, rotation=0)
+    cbar = fig.colorbar(cax)
+    ax.set_title('Parameter Interactions')
+    cbar.ax.set_ylabel('Range Shrinkage')
+
+    # Add text annotations if correlation is greater than 0.05
+    for (i, j), z in np.ndenumerate(cor):
+        if z > 0.05:
+            ax.text(j, i, '{:0.2f}'.format(z), ha='center', va='center', fontsize=8)
+    plt.close(fig)
+
+    return fig