Skip to content

Commit

Permalink
Fixed campaign.analysis and supporting plots
Browse files Browse the repository at this point in the history
  • Loading branch information
kstone40 committed Aug 13, 2024
1 parent b628fc8 commit bad05c9
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 127 deletions.
175 changes: 51 additions & 124 deletions obsidian/campaign/analysis.py
Original file line number Diff line number Diff line change
@@ -1,112 +1,24 @@
"Analysis utility functions for examining metrics over the context of an optimization campaign"

from obsidian.parameters import Param_Continuous

from obsidian.optimizer import Optimizer
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def plot_ofat_ranges(optimizer, ofat_ranges):
"""
Plots each parameter's 1D OFAT acceptable range
Args:
optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data
and can be used to make predictions.
ofat_ranges (pd.DataFrame): A DataFrame containing the acceptable range values for each parameter.
Returns:
fig (matplotlib.figure.Figure): The parameter OFAT acceptable-range plot
"""

fig = plt.figure(figsize=(8, 4))
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
for i, (index, row) in enumerate(ofat_ranges.iloc[0:10, :].iterrows()):
color = colors[i]

plt.plot([index, index], [row['Min_LB'], row['Max_LB']],
linewidth=6, linestyle='solid', color=color, label='High Confidence' if i == 0 else None)
if row['Min_LB'] > row['Min_Mu']:
plt.annotate(
f'{(row["Min_LB"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
xy=(i, row['Min_LB']), xytext=(i + 0.25, row['Min_LB']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
if row['Max_LB'] < row['Max_Mu']:
plt.annotate(
f'{(row["Max_LB"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
xy=(i, row['Max_LB']), xytext=(i + 0.25, row['Max_LB']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))

plt.plot([index, index], [row['Min_Mu'], row['Max_Mu']], linewidth=3,
linestyle='solid', color=color, label='Average' if i == 0 else None)
plt.annotate(
f'{(row["Min_Mu"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
xy=(i, row['Min_Mu']), xytext=(i + 0.25, row['Min_Mu']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
plt.annotate(
f'{(row["Max_Mu"]*optimizer.X_space.X_range[index].iloc[0]+optimizer.X_space.X_min[index].iloc[0]):.2f}',
xy=(i, row['Max_Mu']), xytext=(i + 0.25, row['Max_Mu']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))

if row['Min_UB'] < row['Min_Mu']:
plt.plot([index, index], [row['Min_UB'], row['Min_Mu']], linewidth=1, linestyle=':', color=color)
if row['Max_UB'] > row['Max_Mu']:
plt.plot([index, index], [row['Max_UB'], row['Max_Mu']], linewidth=1, linestyle=':', color=color)
plt.plot([0], [0], linewidth=1, linestyle=':', color=color, label='Low Confidence' if i == 0 else None)

plt.xticks(rotation=90)
plt.ylabel('Parameter Value (Scaled)')
plt.ylim([-0.15, 1.15])
plt.xlim([-1, i+1])
plt.title(f'Univeriate Range (OFAT) Estimates from APO Model \n Ranges exceeding {row["Threshold"]} {row["Response"]}',
fontsize=10)
plt.legend(bbox_to_anchor=(1.1, 1.05))

return fig


def plot_interactions(optimizer, cor, clamp=False):
"""
Plots the parameter interaction matrix

Args:
optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data
and can be used to make predictions.
cor (np.ndarray): The correlation matrix representing the parameter interactions.
clamp (bool, optional): Whether to clamp the colorbar range to (0, 1). Defaults to ``False``.

Returns:
matplotlib.figure.Figure: The parameter interaction plot
"""
fig = plt.figure(figsize=(5, 5))
ax = fig.gca()
cax = ax.matshow(cor)
if clamp:
cax.set_clim(0, 1)
axis = np.arange(len(optimizer.X_space.X_names))
names = optimizer.X_space.X_names
ax.set_xticks(axis)
ax.set_xticklabels(names, rotation=90)
ax.set_yticks(axis)
ax.set_yticklabels(names, rotation=0)
cbar = fig.colorbar(cax)
ax.set_title('Parameter Interactions')
cbar.ax.set_ylabel('Range Shrinkage')
for (i, j), z in np.ndenumerate(cor):
if z > 0.05:
ax.text(j, i, '{:0.2f}'.format(z), ha='center', va='center', fontsize=8)
return fig


def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7,
steps=100, response_id=0, calc_interacts=True):
def calc_ofat_ranges(optimizer: Optimizer,
threshold: float,
X_ref: pd.DataFrame | pd.Series | None = None,
PI_range: float = 0.95,
steps: int = 100,
response_id: int = 0,
calc_interacts: bool = True):
"""
Calculates an OFAT design space using confidence bounds around the optimizer prediction. Also
includes a matrix of interaction scores.
Args:
optimizer (BayesianOptimizer): The optimizer object which contains a surrogate that has been fit to data
optimizer (Optimizer): The optimizer object which contains a surrogate that has been fit to data
and can be used to make predictions.
X_ref (pd.DataFrame): The reference data point from which the OFAT variations are calculated.
threshold (float): The response value threshold (minimum value) which would be considered passing for OFAT variations.
Expand All @@ -125,19 +37,22 @@ def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7,
in comparison to the corresponding two independent 1-factor variations. As such, diagonal elements are 0.
"""

threshold = 0.4
ofat_ranges = []
response_name = optimizer.target[response_id].name

if X_ref is None:
X_ref = optimizer.X_space.mean()
if isinstance(X_ref, pd.Series):
X_ref = X_ref.to_frame().T

# Calculate 1D OFAT ranges
for p in optimizer.X_space:
if isinstance(p, Param_Continuous):
X_min = p.min
X_max = p.max
X_range = p.range
X_span = np.linspace(X_min, X_max, steps)


X_span = np.linspace(0, 1, steps)
X_sim = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns)
X_sim[p.name] = X_span
X_sim[p.name] = p.unit_demap(X_span)

df_pred = optimizer.predict(X_sim, PI_range=PI_range)
lb = df_pred[response_name + ' lb']
ub = df_pred[response_name + ' ub']
Expand All @@ -147,47 +62,59 @@ def calc_ofat_ranges(optimizer, threshold, X_ref, PI_range=0.7,
labels = ['Mu', 'LB', 'UB']

for label, y in zip(labels, [pred_mu, lb, ub]):
pass_ids = np.where(pred_mu > threshold)
pass_ids = np.where(y > threshold)
pass_vals = X_sim[p.name].iloc[pass_ids]

row['Min_'+label] = (pass_vals.min()-X_min)/X_range
row['Max_'+label] = (pass_vals.max()-X_min)/X_range
row['Min_'+label] = p.encode(pass_vals.min())
row['Max_'+label] = p.encode(pass_vals.max())
ofat_ranges.append(row)

ofat_ranges = pd.DataFrame(ofat_ranges).set_index('Name')

# Calculate the correlation matrix as 2-FI range / diagional of 1-FI box
if calc_interacts:
cor = []

for i, pi in enumerate(optimizer.X_space.X_names):
# Calculate with a nested loop of parameters
for pi in optimizer.X_space:
cor_j = []

Xi_pass_min = optimizer.X_space.X_min[pi] + optimizer.X_space.X_range[pi]*ofat_ranges['Min_Mu'][pi]
Xi_pass_max = optimizer.X_space.X_min[pi] + optimizer.X_space.X_range[pi]*ofat_ranges['Max_Mu'][pi]
Xi_pass_span = np.linspace(Xi_pass_min, Xi_pass_max, steps)

for j, pj in enumerate(optimizer.X_space.X_names):
Xj_pass_min = optimizer.X_space.X_min[pj] + optimizer.X_space.X_range[pj]*ofat_ranges['Min_Mu'][pj]
Xj_pass_max = optimizer.X_space.X_min[pj] + optimizer.X_space.X_range[pj]*ofat_ranges['Max_Mu'][pj]
Xj_pass_span = np.linspace(Xj_pass_min, Xj_pass_max, steps)
if np.isnan(ofat_ranges['Min_Mu'][pi.name]):
cor.append([np.nan]*len(optimizer.X_space))
continue

# Enumerate a grid over the passing range at the MEAN
Xi_pass_span = pi.unit_demap(np.linspace(ofat_ranges['Min_Mu'][pi.name],
ofat_ranges['Max_Mu'][pi.name], steps))

X_sim_cor = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns)
for pj in optimizer.X_space:

X_sim_cor[pj] = Xj_pass_span
if not pi == pj:
X_sim_cor[pi] = Xi_pass_span
if np.isnan(ofat_ranges['Min_Mu'][pj.name]):
cor_j.append([np.nan]*len(optimizer.X_space))
continue

Xj_pass_span = pi.unit_demap(np.linspace(ofat_ranges['Min_Mu'][pj.name],
ofat_ranges['Max_Mu'][pj.name], steps))

# Set up a simulation dataframe where these parameters will co-vary
X_sim_cor = pd.DataFrame(np.repeat(X_ref.values, repeats=steps, axis=0), columns=X_ref.columns)
X_sim_cor[pj.name] = Xj_pass_span
X_sim_cor[pi.name] = Xi_pass_span

pred_mu_cor_all, _ = optimizer.predict(X_sim_cor)
# Predict the responses, and extract the target one
pred_mu_cor_all = optimizer.predict(X_sim_cor)
pred_mu_cor = pred_mu_cor_all.iloc[:, response_id]
cor_passing = np.where(pred_mu_cor > threshold)[0]

# Want to calculate the number of steps along the diagonal which pass
# A value of 0 for cor_j means that the two parameters are independent
if len(cor_passing) > 0:
start = cor_passing[0]
stop = cor_passing[-1]
cor_ij = 1-(stop-start)/(steps-1)
cor_j.append(cor_ij)
pass_ij = (stop-start)/(steps-1)
else:
cor_j.append(0)
pass_ij = 0
cor_j.append(1 - pass_ij)

cor.append(cor_j)
cor = np.array(cor)
Expand Down
131 changes: 130 additions & 1 deletion obsidian/plotting/mpl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
"""Matplotlib figure-generating functions"""

from obsidian.campaign import Campaign
from obsidian.optimizer import Optimizer

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.pyplot import Figure
from obsidian.campaign import Campaign

import numpy as np
import pandas as pd


def visualize_inputs(campaign: Campaign) -> Figure:
Expand Down Expand Up @@ -41,3 +47,126 @@ def visualize_inputs(campaign: Campaign) -> Figure:
plt.title('Correlation Plot')

return fig


def plot_ofat_ranges(optimizer: Optimizer,
ofat_ranges: pd.DataFrame) -> Figure:
"""
Plots each parameter's 1D OFAT acceptable range
Args:
optimizer (Optimizer): The optimizer object which contains a surrogate
that has been fit to data and can be used to make predictions.
ofat_ranges (pd.DataFrame): A DataFrame containing the acceptable range
values for each parameter, at the low bound, average, and high bound.
Returns:
Figure: The parameter OFAT acceptable-range plot
"""

fig = plt.figure(figsize=(2*len(ofat_ranges), 4))
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

# Iterate over the parameteres
for i, (p_name, row) in enumerate(ofat_ranges.iterrows()):
color = colors[i]

# Plot as a bar chart; x-axis is the parameter name, y-axis is the scaled value
plt.plot([p_name, p_name], [row['Min_LB'], row['Max_LB']],
linewidth=6, linestyle='solid', color=color, label='High Confidence' if i == 0 else None)

# If the edges of LB are too close to mean, only annotate LB (higher conf)
if row['Min_LB'] > row['Min_Mu']:
plt.annotate(
f'{(optimizer.X_space[i].unit_demap(row["Min_LB"])):.2f}',
xy=(i, row['Min_LB']), xytext=(i + 0.25, row['Min_LB']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
if row['Max_LB'] < row['Max_Mu']:
plt.annotate(
f'{(optimizer.X_space[i].unit_demap(row["Max_LB"])):.2f}',
xy=(i, row['Max_LB']), xytext=(i + 0.25, row['Max_LB']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))

plt.plot([p_name, p_name], [row['Min_Mu'], row['Max_Mu']], linewidth=3,
linestyle='solid', color=color, label='Average' if i == 0 else None)

# If the edges of the mean are too close to the UB, only annotate mean (higher conf)
plt.annotate(
f'{(optimizer.X_space[i].unit_demap(row["Min_Mu"])):.2f}',
xy=(i, row['Min_Mu']), xytext=(i + 0.25, row['Min_Mu']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))
plt.annotate(
f'{(optimizer.X_space[i].unit_demap(row["Max_Mu"])):.2f}',
xy=(i, row['Max_Mu']), xytext=(i + 0.25, row['Max_Mu']),
fontsize=8, ha='left', va='center', rotation=0, arrowprops=dict(arrowstyle='-', color=color, lw=1))

# Only plot UB if it isn't already encompassed by higher-confidence ranges
if row['Min_UB'] < row['Min_Mu']:
plt.plot([p_name, p_name], [row['Min_UB'], row['Min_Mu']], linewidth=1, linestyle=':', color=color)
if row['Max_UB'] > row['Max_Mu']:
plt.plot([p_name, p_name], [row['Max_UB'], row['Max_Mu']], linewidth=1, linestyle=':', color=color)
plt.plot([0], [0], linewidth=1, linestyle=':', color=color, label='Low Confidence' if i == 0 else None)

# Never annotate UB (low confidence)

alpha = ofat_ranges['PI Range'].mode().iloc[0]
LCL = (1 - alpha) / 2
UCL = 1 - LCL

plt.xticks(rotation=90)
plt.ylabel('Parameter Value (Scaled)')
plt.ylim([-0.15, 1.15])
plt.xlim([-1, len(ofat_ranges)])
plt.title('Univariate Range (OFAT) Estimates from APO Model \n'
+ f'Ranges Exceeding {row["Response"]} > {row["Threshold"]} \n'
+ f'Confidence Range: {LCL*100:.1f} - {UCL*100:.1f}%',
fontsize=10)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.close(fig)

return fig


def plot_interactions(optimizer: Optimizer,
cor: np.ndarray,
clamp: bool = False):
"""
Plots the parameter interaction matrix
Args:
optimizer (ptimizer): The optimizer object which contains a surrogate
that has been fit to data and can be used to make predictions.
cor (np.ndarray): The correlation matrix representing the parameter interactions.
clamp (bool, optional): Whether to clamp the colorbar range to (0, 1).
Defaults to ``False``.
Returns:
Figure: The parameter interaction plot
"""

fig = plt.figure(figsize=(4, 4))
ax = fig.gca()

# Use matrix imshow to plot correlation matrix
cax = ax.matshow(cor)
if clamp:
cax.set_clim(0, 1)

# Set axis labels and ticks
axis = np.arange(len(optimizer.X_space.X_names))
names = optimizer.X_space.X_names
ax.set_xticks(axis)
ax.set_xticklabels(names, rotation=90)
ax.set_yticks(axis)
ax.set_yticklabels(names, rotation=0)
cbar = fig.colorbar(cax)
ax.set_title('Parameter Interactions')
cbar.ax.set_ylabel('Range Shrinkage')

# Add text annotations if correlation is greater than 0.05
for (i, j), z in np.ndenumerate(cor):
if z > 0.05:
ax.text(j, i, '{:0.2f}'.format(z), ha='center', va='center', fontsize=8)
plt.close(fig)

return fig
Loading

0 comments on commit bad05c9

Please sign in to comment.