diff --git a/CHANGELOG.md b/CHANGELOG.md
index 566bab6..6a11a12 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,13 @@
# Changelog
## [Untracked Changes]
+### Added
+- Improved methods for fitting PyTorch surrogates, including auto-stopping by parameter value norm
+
+### Modified
+- Greatly reduced the number of samples for DNN posterior, speeding up optimization
+- Stabilized the mean estimate of ensemble surrogates by avoiding resampling
+- Disabled root caching for ensemble surrogates during optimization
## [0.8.5]
### Added
diff --git a/obsidian/acquisition/custom.py b/obsidian/acquisition/custom.py
index 4f41b6a..efb2495 100644
--- a/obsidian/acquisition/custom.py
+++ b/obsidian/acquisition/custom.py
@@ -64,10 +64,11 @@ class qSpaceFill(MCAcquisitionFunction):
"""
def __init__(self,
model: Model,
+ X_baseline: Tensor,
sampler: MCSampler | None = None,
objective: MCAcquisitionObjective | None = None,
posterior_transform: PosteriorTransform | None = None,
- X_pending: Tensor | None = None):
+ X_pending: Tensor | None = None,):
if sampler is None:
sampler = SobolQMCNormalSampler(sample_shape=torch.Size([512]))
@@ -80,6 +81,8 @@ def __init__(self,
super().__init__(model=model, sampler=sampler, objective=objective,
posterior_transform=posterior_transform, X_pending=X_pending)
+
+ self.register_buffer('X_baseline', X_baseline)
@t_batch_mode_transform()
def forward(self,
@@ -88,7 +91,7 @@ def forward(self,
Evaluate the acquisition function on the candidate set x
"""
# x dimensions: b * q * d
- x_train = self.model.train_inputs[0][0] # train_inputs is a list of tuples
+ x_train = self.X_baseline
# For sequential mode, add pending data points to "train"
if self.X_pending is not None:
diff --git a/obsidian/optimizer/bayesian.py b/obsidian/optimizer/bayesian.py
index 468199c..dff4fc5 100644
--- a/obsidian/optimizer/bayesian.py
+++ b/obsidian/optimizer/bayesian.py
@@ -3,7 +3,7 @@
from .base import Optimizer
from obsidian.parameters import ParamSpace, Target, Task
-from obsidian.surrogates import SurrogateBoTorch, DNN
+from obsidian.surrogates import SurrogateBoTorch, EnsembleModel
from obsidian.acquisition import aq_class_dict, aq_defaults, aq_hp_defaults, valid_aqs
from obsidian.surrogates import model_class_dict
from obsidian.objectives import Index_Objective, Objective_Sequence
@@ -18,7 +18,7 @@
from botorch.sampling.index_sampler import IndexSampler
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.gpytorch import GPyTorchModel
-from botorch.models.model import ModelList
+from botorch.models.model import ModelList, Model
from botorch.utils.sampling import draw_sobol_samples
from botorch.utils.multi_objective.box_decompositions.non_dominated import NondominatedPartitioning
@@ -478,6 +478,7 @@ def _parse_aq_kwargs(self,
hps: dict,
m_batch: int,
target_locs: list[int],
+ model: Model,
X_t_pending: Tensor | None = None,
objective: MCAcquisitionObjective | None = None) -> dict:
"""
@@ -533,7 +534,9 @@ def _parse_aq_kwargs(self,
# Noisy aqs require X_train reference
if aq in ['NEI', 'NEHVI', 'NParEGO']:
aq_kwargs['X_baseline'] = X_baseline
-
+ if any(isinstance(m, EnsembleModel) for m in model.models):
+ aq_kwargs['cache_root'] = False
+
# Hypervolume requires reference point
if aq in ['EHVI', 'NEHVI']:
@@ -570,6 +573,9 @@ def _parse_aq_kwargs(self,
w = w/torch.sum(torch.abs(w))
aq_kwargs['scalarization_weights'] = w
+ if aq == 'SF':
+ aq_kwargs['X_baseline'] = X_baseline
+
return aq_kwargs
def suggest(self,
@@ -712,7 +718,7 @@ def suggest(self,
if not isinstance(model, ModelListGP):
samplers = []
for m in model.models:
- if isinstance(m, DNN):
+ if isinstance(m, EnsembleModel):
sampler_i = IndexSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
else:
sampler_i = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
@@ -757,7 +763,9 @@ def suggest(self,
# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'sampler': sampler, 'X_pending': X_t_pending}
- aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch, target_locs, X_t_pending, objective))
+ aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch,
+ target_locs, model,
+ X_t_pending, objective))
# Raise errors related to certain constraints
if aq_str in ['UCB', 'Mean', 'TS', 'SF', 'SR', 'NIPV']:
@@ -812,7 +820,10 @@ def suggest(self,
# Hypervolume aqs fail with X_t_pending when optim_sequential=True
if aq_str in ['NEHVI', 'EHVI']:
- optim_sequential = False
+ if optim_sequential and X_t_pending is not None:
+ warnings.warn('Hypervolume aqs with X_pending require joint optimization. \
+ Setting optim_sequential to False', UserWarning)
+ optim_sequential = False
# If it's random search, no need to do optimization; Otherwise, initialize the aq function and optimize
if aq_str == 'RS':
@@ -978,7 +989,9 @@ def evaluate(self,
# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'sampler': None, 'X_pending': X_t_pending}
- aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0], target_locs, X_t_pending, objective))
+ aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0],
+ target_locs, model,
+ X_t_pending, objective))
# If it's random search, no need to evaluate aq
if aq_str == 'RS':
diff --git a/obsidian/plotting/plotly.py b/obsidian/plotting/plotly.py
index e753252..02f88e7 100644
--- a/obsidian/plotting/plotly.py
+++ b/obsidian/plotting/plotly.py
@@ -10,7 +10,6 @@
import plotly.graph_objects as go
from plotly.graph_objects import Figure
from plotly.subplots import make_subplots
-from sklearn.manifold import MDS
import pandas as pd
import numpy as np
@@ -52,18 +51,34 @@ def visualize_inputs(campaign: Campaign) -> Figure:
+ ['Correlation Matrix']
+ [X.columns[i] for i in range(cols, n_dim)]
)
+ # if campaign.optimizer is fitted, then X_best_f_idx is identified
+ if 'X_best_f_idx' in dir(campaign.optimizer):
+ marker_shapes = ['diamond' if rowInd in [campaign.optimizer.X_best_f_idx] else 'circle' for rowInd in range(campaign.X.shape[0])]
+ else:
+ marker_shapes = ['circle']*campaign.X.shape[0]
for i, param in enumerate(X.columns):
row_i = i // cols + 1
col_i = i % cols + 1
fig.add_trace(go.Scatter(x=X.index, y=X[param],
mode='markers', name=param,
- marker=dict(color=color_list[i]),
+ marker=dict(color=color_list[i], symbol=marker_shapes),
showlegend=False),
row=row_i, col=col_i)
fig.update_xaxes(tickvals=np.around(np.linspace(0, campaign.m_exp, 5)),
row=row_i, col=col_i)
+ # Add note to explain the shape of markers
+ if hasattr(campaign.optimizer, 'X_best_f_idx'):
+ fig.add_annotation(
+ text="Note: The diamond markers denote samples that achieve the best sum of targets.",
+ showarrow=False,
+ xref="paper", yref="paper",
+ x=0,
+ y=-0.2,
+ font=dict(style="italic")
+ )
+
# Calculate the correlation matrix
X_u = campaign.X_space.unit_map(X)
corr_matrix = X_u.corr()
@@ -99,6 +114,12 @@ def MDS_plot(campaign: Campaign) -> Figure:
Returns:
fig (Figure): The MDS plot
"""
+ try:
+ from sklearn.manifold import MDS
+ except ImportError:
+ raise ImportError('The `sklearn` package (>1.0) is required for the MDS plot. \
+ Please install it using `pip install scikit-learn`')
+
mds = MDS(n_components=2)
X_mds = mds.fit_transform(campaign.X_space.encode(campaign.X))
@@ -320,8 +341,9 @@ def factor_plot(optimizer: Optimizer,
Y_mu_ref = Y_pred_ref[y_name+('_t (pred)' if f_transform else ' (pred)')].values
fig.add_trace(go.Scatter(x=X_ref.iloc[:, feature_id].values, y=Y_mu_ref,
mode='markers',
+ marker=dict(symbol='diamond'),
line={'color': obsidian_colors.teal},
- name='Ref'),
+ name='Reference'),
)
fig.update_xaxes(title_text=X_name)
fig.update_yaxes(title_text=y_name)
@@ -539,7 +561,19 @@ def optim_progress(campaign: Campaign,
marker=marker_dict,
customdata=campaign.data[X_names],
name='Data'))
-
+
+ # Highlight the best samples
+ if hasattr(campaign.optimizer, 'X_best_f_idx'):
+ fig.add_trace(go.Scatter(x=pd.Series(out_exp.iloc[campaign.optimizer.X_best_f_idx, 0]),
+ y=pd.Series(out_exp.iloc[campaign.optimizer.X_best_f_idx, 1]),
+ mode='markers',
+ marker=dict(symbol='diamond-open', size=14),
+ line={'color': 'black'},
+ legendgroup='marker_shape', showlegend=True,
+ name='Best')
+ )
+ fig.update_layout(showlegend=True)
+
template = [""+str(param.name)+": "+" %{customdata["+str(i)+"]"
+ (":.3G}"if isinstance(param, Param_Continuous) else "}") + "
"
for i, param in enumerate(campaign.X_space)]
diff --git a/obsidian/surrogates/__init__.py b/obsidian/surrogates/__init__.py
index 3ec1726..94d42ee 100644
--- a/obsidian/surrogates/__init__.py
+++ b/obsidian/surrogates/__init__.py
@@ -4,3 +4,4 @@
from .custom_GP import *
from .custom_torch import *
from .config import *
+from .utils import *
diff --git a/obsidian/surrogates/botorch.py b/obsidian/surrogates/botorch.py
index 25d668e..9eed9d7 100644
--- a/obsidian/surrogates/botorch.py
+++ b/obsidian/surrogates/botorch.py
@@ -2,6 +2,7 @@
from .base import SurrogateModel
from .config import model_class_dict
+from .utils import fit_pytorch
from obsidian.utils import tensordict_to_dict, dict_to_tensordict
from obsidian.exceptions import SurrogateFitError
@@ -10,11 +11,11 @@
from botorch.fit import fit_gpytorch_mll
from botorch.optim.fit import fit_gpytorch_mll_torch, fit_gpytorch_mll_scipy
from botorch.models.gpytorch import GPyTorchModel
+from botorch.models.ensemble import EnsembleModel
from gpytorch.mlls import ExactMarginalLogLikelihood
import torch
import torch.nn as nn
-import torch.optim as optim
import numpy as np
import pandas as pd
import warnings
@@ -156,20 +157,7 @@ def fit(self,
raise SurrogateFitError('BoTorch model failed to fit')
else:
self.loss_fcn = nn.MSELoss()
- self.optimizer = optim.Adam(self.torch_model.parameters(), lr=1e-2)
-
- self.torch_model.train()
- for epoch in range(200):
- self.optimizer.zero_grad()
- output = self.torch_model(X_p)
- loss = self.loss_fcn(output, y_p)
- loss.backward()
- self.optimizer.step()
-
- if (epoch % 50 == 0 and self.verbose):
- print(f'Epoch {epoch}: Loss {loss.item()}')
-
- self.torch_model.eval()
+ fit_pytorch(self.torch_model, X_p, y_p, loss_fcn=self.loss_fcn, verbose=self.verbose)
self.is_fit = True
@@ -229,7 +217,14 @@ def predict(self,
X_p = self._prepare(X)
pred_posterior = self.torch_model.posterior(X_p)
- mu = pred_posterior.mean.detach().cpu().squeeze(-1)
+
+ # We would prefer to have stability in the mean of ensemble models,
+ # So, we will not re-sample for prediction but use forward methods
+ if isinstance(self.torch_model, EnsembleModel):
+ mu = self.torch_model.forward(X_p).detach()
+ else:
+ mu = pred_posterior.mean.detach().cpu().squeeze(-1)
+
if q is not None:
if (q < 0) or (q > 1):
raise ValueError('Quantile must be between 0 and 1')
diff --git a/obsidian/surrogates/custom_torch.py b/obsidian/surrogates/custom_torch.py
index 3cbdcd6..b1bb037 100644
--- a/obsidian/surrogates/custom_torch.py
+++ b/obsidian/surrogates/custom_torch.py
@@ -1,11 +1,21 @@
"""Custom implementations of PyTorch surrogate models using BoTorch API"""
-from botorch.models.model import Model
+from .utils import fit_pytorch
+
+from obsidian.config import TORCH_DTYPE
+
+from botorch.models.model import FantasizeMixin
+from botorch.models.ensemble import EnsembleModel, Model
from botorch.posteriors.ensemble import Posterior, EnsemblePosterior
+import torch
import torch.nn as nn
+from torch.nn import Module
from torch import Tensor
+from typing import TypeVar
+TFantasizeMixin = TypeVar("TFantasizeMixin", bound="FantasizeMixin")
+
class DNNPosterior(EnsemblePosterior):
@@ -17,7 +27,7 @@ def quantile(self, value: Tensor) -> Tensor:
return self.values.quantile(q=value.to(self.values), dim=-3, interpolation='linear')
-class DNN(Model):
+class DNN(EnsembleModel, FantasizeMixin):
def __init__(self,
train_X: Tensor,
train_Y: Tensor,
@@ -33,6 +43,13 @@ def __init__(self,
if p_dropout < 0 or p_dropout > 1:
raise ValueError("p_dropout must be in [0, 1]")
+ self.register_buffer('train_X', train_X)
+ self.register_buffer('train_Y', train_Y)
+ self.register_buffer('p_dropout', torch.tensor(p_dropout, dtype=TORCH_DTYPE))
+ self.register_buffer('h_width', torch.tensor(h_width, dtype=torch.int))
+ self.register_buffer('h_layers', torch.tensor(h_layers, dtype=torch.int))
+ self.register_buffer('num_outputs', torch.tensor(num_outputs, dtype=torch.int))
+
self.input_layer = nn.Sequential(
nn.Linear(train_X.shape[-1], h_width),
nn.PReLU(),
@@ -49,6 +66,7 @@ def __init__(self,
self.outer_layer = nn.Linear(h_width, num_outputs)
self._num_outputs = num_outputs
+ self.to(TORCH_DTYPE)
def forward(self,
x: Tensor) -> Tensor:
@@ -60,7 +78,7 @@ def forward(self,
def posterior(self,
X: Tensor,
- n_sample: int = 16384,
+ n_sample: int = 512,
output_indices: list[int] = None,
observation_noise: bool | Tensor = False) -> Posterior:
"""Calculates the posterior distribution of the model at X"""
@@ -86,3 +104,53 @@ def posterior(self,
def num_outputs(self) -> int:
"""Number of outputs of the model"""
return self._num_outputs
+
+ def transform_inputs(self,
+ X: Tensor,
+ input_transform: Module = None) -> Tensor:
+ """
+ Transform inputs.
+
+ Args:
+ X: A tensor of inputs
+ input_transform: A Module that performs the input transformation.
+
+ Returns:
+ A tensor of transformed inputs
+ """
+ if input_transform is not None:
+ input_transform.to(X)
+ return input_transform(X)
+ try:
+ return self.input_transform(X)
+ except AttributeError:
+ return X
+
+ def condition_on_observations(self,
+ X: Tensor,
+ Y: Tensor) -> TFantasizeMixin:
+ """
+ Condition the model to new observations, returning a fantasy model
+ """
+
+ X_c = torch.concat((self.train_X, X), axis=0)
+ Y_c = torch.concat((self.train_Y, Y), axis=0)
+
+ # Create a new model based on the current one
+ fantasy = self.__class__(train_X=X_c, train_Y=Y_c,
+ p_dropout=float(self.p_dropout),
+ h_width=int(self.h_width), h_layers=int(self.h_layers),
+ num_outputs=int(self.num_outputs))
+
+ # Fit to the new data
+ fit_pytorch(fantasy, X_c, Y_c)
+
+ return fantasy
+
+ def fantasize(self,
+ X: Tensor) -> Model:
+
+ Y_f = self.forward(X).detach()
+ fantasy = self.condition_on_observations(X, Y_f)
+
+ return fantasy
diff --git a/obsidian/surrogates/utils.py b/obsidian/surrogates/utils.py
new file mode 100644
index 0000000..246c96c
--- /dev/null
+++ b/obsidian/surrogates/utils.py
@@ -0,0 +1,86 @@
+"""Utility functions for surrogate model handling"""
+
+import torch
+from torch import Tensor
+import torch.nn as nn
+from torch.nn import Module, Parameter
+import torch.optim as optim
+
+
+def check_parameter_change(params: list[Parameter],
+ prev_params: list[Parameter]) -> float:
+ """Check the average change in model parameters."""
+
+ changes = []
+ for param, prev_param in zip(params, prev_params):
+ changes.append(torch.norm(param.data - prev_param.data))
+
+ total_change = sum(changes)
+ avg_change = total_change / len(list(params))
+
+ return avg_change
+
+
+def fit_pytorch(model: Module,
+ X: Tensor,
+ y: Tensor,
+ loss_fcn: Module | None = None,
+ verbose: bool = False,
+ max_iter: int = 5000) -> None:
+ """
+ Fits a PyTorch model to the given input data.
+ Args:
+ model (Module): The PyTorch model to be trained.
+ X (Tensor): The input data.
+ y (Tensor): The target data.
+ loss_fcn (Module, optional): The loss function to be used.
+ If not provided, the Mean Squared Error (MSE) loss function will
+ be used. Defaults to ``None``.
+ verbose (bool, optional): Whether to print the loss at each epoch.
+ Defaults to ``False``.
+ """
+
+ if loss_fcn is None:
+ loss_fcn = nn.MSELoss()
+
+ optimizer = optim.Adam(model.parameters(),
+ lr=1e-2,
+ weight_decay=1e-2)
+
+ # Set up early stoppping
+ avg_change = 1e10
+ param_tol = 1e-2
+ converge_buffer = 5
+ converge_count = 0
+
+ model.train()
+ for epoch in range(max_iter):
+
+ prev_params = [param.clone().detach() for param in model.parameters()]
+ optimizer.zero_grad()
+ output = model(X)
+ loss = loss_fcn(output, y)
+ loss.backward()
+ optimizer.step()
+
+ # Check change
+ current_params = [param.clone().detach() for param in model.parameters()]
+ avg_change = check_parameter_change(current_params, prev_params)
+
+ if (epoch % 50 == 0 and verbose):
+ print(f'Epoch {epoch}: Loss {loss.item()}')
+
+ # If the parameter change is under tolerance, increment the counter
+ if avg_change < param_tol:
+ converge_count += 1
+ else:
+ converge_count = 0
+
+ # Auto-converge when the count reaches the desired length
+ if converge_count == converge_buffer:
+ if verbose:
+ print(f'Converged at epoch {epoch}')
+ break
+
+ model.eval()
+ return
diff --git a/obsidian/tests/test_optimizer_MOO.py b/obsidian/tests/test_optimizer_MOO.py
index 5740c32..167030c 100644
--- a/obsidian/tests/test_optimizer_MOO.py
+++ b/obsidian/tests/test_optimizer_MOO.py
@@ -7,6 +7,8 @@
from obsidian.optimizer import BayesianOptimizer
from obsidian.experiment.benchmark import two_leaves
from obsidian.tests.utils import equal_state_dicts
+
+from botorch.models.ensemble import EnsembleModel
import pandas as pd
import numpy as np
@@ -51,10 +53,17 @@ def test_optimizer_fit(X_space, surrogate, Z0, serial_test=True):
obj_dict2 = optimizer_2.save_state()
assert equal_state_dicts(obj_dict, obj_dict2)
optimizer_2.__repr__()
+
y_pred = optimizer.predict(optimizer.X_train)
y_pred_2 = optimizer_2.predict(optimizer.X_train)
- y_error = ((y_pred_2-y_pred)/y_pred.max(axis=0)).values
- assert abs(y_error).max() < tol, 'Prediction error in loading parameters of saved optimizer'
+ y_error = ((y_pred_2-y_pred)/y_pred.max(axis=0))
+
+ # Only check the mean for ensemble models, as the lb/ub are not deterministic
+ if isinstance(optimizer.surrogate[0].torch_model, EnsembleModel):
+ check_cols = [col for col in y_pred if '(pred)' in col]
+ y_error = y_error[check_cols]
+
+ assert abs(y_error.values).max() < tol, 'Prediction error in loading parameters of saved optimizer'
# Generate a baseline optimizer to use for future tests
diff --git a/obsidian/tests/test_optimizer_SOO.py b/obsidian/tests/test_optimizer_SOO.py
index 8477709..ca961b4 100644
--- a/obsidian/tests/test_optimizer_SOO.py
+++ b/obsidian/tests/test_optimizer_SOO.py
@@ -8,6 +8,8 @@
from obsidian.experiment.benchmark import shifted_parab
from obsidian.tests.utils import approx_equal, equal_state_dicts
+from botorch.models.ensemble import EnsembleModel
+
import pandas as pd
import numpy as np
import pytest
@@ -80,8 +82,14 @@ def test_optimizer_fit(X_space, surrogate, Z0, serial_test=True):
optimizer_2.__repr__()
y_pred = optimizer.predict(optimizer.X_train)
y_pred_2 = optimizer_2.predict(optimizer.X_train)
- y_error = ((y_pred_2-y_pred)/y_pred.max(axis=0)).values
- assert abs(y_error).max() < tol, 'Prediction error in loading parameters of saved optimizer'
+ y_error = ((y_pred_2-y_pred)/y_pred.max(axis=0))
+
+ # Only check the mean for ensemble models, as the lb/ub are not deterministic
+ if isinstance(optimizer.surrogate[0].torch_model, EnsembleModel):
+ check_cols = [col for col in y_pred if '(pred)' in col]
+ y_error = y_error[check_cols]
+
+ assert abs(y_error.values).max() < tol, 'Prediction error in loading parameters of saved optimizer'
# Generate a baseline optimizer to use for future tests
diff --git a/pyproject.toml b/pyproject.toml
index c974769..c114fb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,12 +58,12 @@ sphinx = { version = "^7.3.7", optional = true}
myst-parser = { version = "^3.0.1", optional = true}
pydata-sphinx-theme = { version = "^0.15.4", optional = true}
linkify-it-py = { version = "^2.0.3", optional = true}
-scikit-learn = "^1.5.1"
+scikit-learn = { version = "^1.5.1", optional = true}
[tool.poetry.extras]
app = ["flask", "dash", "dash-daq", "dash-bootstrap-components"]
-dev = ["pytest", "xlrd", "ipykernel", "jupyterlab", "flake8", "pytest-cov"]
+dev = ["pytest", "xlrd", "ipykernel", "jupyterlab", "flake8", "pytest-cov", "scikit-learn"]
docs = ["sphinx", "myst-parser", "pydata-sphinx-theme", "linkify-it-py"]