Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
kstone40 committed Sep 7, 2024
2 parents 6e99ac3 + 59c374a commit f4c2bc7
Show file tree
Hide file tree
Showing 14 changed files with 230 additions and 63 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Changelog

## [Untracked Changes]
### Added
- More optional outputs for verbose settings
- Parameters in ParamSpace can also be indexed by name
- Parameters now have search_space property, to modify the optimizer search space from the full space
- Continuous parameters have search_min/search_max; Discete parameteres have search_categories

### Modified
- Optimizer and Campaign X_space attributes are now assigned using setter

### Remvoed
- Torch device references and options (GPU compatibility may be re-added)

## [0.8.4]
### Added
- Campaign X_best method
Expand Down
17 changes: 14 additions & 3 deletions obsidian/campaign/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self,
objective: Objective | None = None,
seed: int | None = None):

self.X_space = X_space
self.set_X_space(X_space)
self.data = pd.DataFrame()

optimizer = BayesianOptimizer(X_space, seed=seed) if optimizer is None else optimizer
Expand Down Expand Up @@ -101,6 +101,15 @@ def clear_data(self):
self.data = pd.DataFrame()
self.iter = 0

@property
def X_space(self) -> ParamSpace:
"""Campaign ParamSpace"""
return self._X_space

def set_X_space(self, X_space: ParamSpace):
"""Sets the campaign ParamSpace"""
self._X_space = X_space

@property
def optimizer(self) -> Optimizer:
"""Campaign Optimizer"""
Expand Down Expand Up @@ -342,6 +351,8 @@ def suggest(self, **optim_kwargs):
"""
if self.optimizer.is_fit:
try:
# In case X_space has changed, re-set the optimizer X_space
self.optimizer.set_X_space(self.X_space)
X, eval = self.optimizer.suggest(objective=self.objective, **optim_kwargs)
return (X, eval)
except Exception:
Expand Down Expand Up @@ -371,11 +382,11 @@ def _profile_hv(self):
for i in iters:
iter_index = self.data.query(f'Iteration <= {i}').index
out_iter = self.out.loc[iter_index, :]
out_iter = torch.tensor(out_iter.values).to(self.optimizer.device)
out_iter = torch.tensor(out_iter.values)
hv[i] = self.optimizer.hypervolume(out_iter)

self.data['Hypervolume (iter)'] = self.data.apply(lambda x: hv[x['Iteration']], axis=1)
self.data['Pareto Front'] = self.optimizer.pareto(torch.tensor(self.out.values).to(self.optimizer.device))
self.data['Pareto Front'] = self.optimizer.pareto(torch.tensor(self.out.values))

return

Expand Down
3 changes: 1 addition & 2 deletions obsidian/experiment/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,7 @@ def initialize(self,

if seed is not None:
torch.manual_seed(seed)
if not torch.cuda.is_available():
torch.use_deterministic_algorithms(True)
torch.use_deterministic_algorithms(True)

if sample_custom is not None:
if sample_custom.shape[1] != d:
Expand Down
18 changes: 14 additions & 4 deletions obsidian/optimizer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,25 @@ def __init__(self,
self.seed = seed
if self.seed is not None:
torch.manual_seed(self.seed)
if not torch.cuda.is_available():
torch.use_deterministic_algorithms(True)
torch.use_deterministic_algorithms(True)
np.random.seed(self.seed)
random.seed(self.seed)

# Store the parameter space which contains useful reference properties
if not isinstance(X_space, ParamSpace):
raise TypeError('X_space must be an obsidian ParamSpace object')
self.X_space = X_space
self.set_X_space(X_space)

@property
def X_space(self):
"""
ParamSpace: The parameter space defining the search space for the optimization.
"""
return self._X_space

def set_X_space(self, X_space: ParamSpace):
self._X_space = X_space
return

def _fixed_features(self,
fixed_var: dict | None = None) -> list:
Expand Down Expand Up @@ -101,7 +111,7 @@ def _fixed_features(self,
# First, get the cartesian product of all of the categorical/ordinal combos
for x in self.X_space.X_discrete:
if x.name not in fixed_var.keys(): # Fixed_var should take precedent and lock out other combinations
df_i = pd.DataFrame({x.name: x.categories})
df_i = pd.DataFrame({x.name: x.search_categories})
df_list.append(df_i)

# Merge by cross
Expand Down
77 changes: 44 additions & 33 deletions obsidian/optimizer/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ class BayesianOptimizer(Optimizer):
Attributes:
surrogate_type (list[str]): The shorthand name of each surrogate model.
surrogate_hps (list[dict]): The hyperparameters for each surrogate model.
device (str): The device to use for computations ('cuda' if available, 'cpu' otherwise).
is_fit (bool): Indicates whether the surrogate model has been fit to data.
Raises:
Expand Down Expand Up @@ -122,8 +121,6 @@ def _load_surrogate_dict(surrogate_dict):
if surrogate_str not in model_class_dict.keys():
raise KeyError(f'Surrogate model must be selected from one of: {model_class_dict.keys()}')

self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

return

@property
Expand Down Expand Up @@ -230,7 +227,7 @@ def fit(self,
for i in range(self.n_response):
self.surrogate.append(
SurrogateBoTorch(model_type=self.surrogate_type[i], seed=self.seed,
verbose=self.verbose, hps=self.surrogate_hps[i]))
verbose=self.verbose >= 2, hps=self.surrogate_hps[i]))

# Handle response NaN values on a response-by-response basis
f_train_i = self.f_train.iloc[:, i]
Expand All @@ -246,9 +243,11 @@ def fit(self,
self.surrogate[i].fit(X_t_train_valid, f_train_i_valid,
cat_dims=self.X_space.X_t_cat_idx, task_feature=self.X_space.X_t_task_idx)

if self.verbose > 0:
print(f'{self.surrogate_type[i]} model has been fit \
to data with a train-score of: {self.surrogate[i].r2_score:.3g} for response: {self.y_names[i]}')
if self.verbose >= 1:
print(f'{self.surrogate_type[i]} model has been fit to data'
+ 'with an R2-train-score of: {self.surrogate[i].r2_score:.3g}'
+ (f'and a training-loss of: {self.surrogate[i].loss:.3g}' if self.verbose >= 2 else '')
+ ' for response: {self.y_names[i]}')
return

def save_state(self) -> dict:
Expand Down Expand Up @@ -381,6 +380,9 @@ def predict(self,
raise NameError('X for prediction does not contain all of the \
required predictors from the training set')

if self.verbose >= 3:
print(f'Predicting {X.shape[0]} experiments [...]')

X_names = list(self.X_space.X_names)
X_pred = X[X_names].dropna(subset=X_names) # Reinforce order and non-nan before proceeding
nan_indices = np.where(pd.isnull(X[X_names]).any(axis=1))[0].tolist()
Expand Down Expand Up @@ -516,7 +518,9 @@ def _parse_aq_kwargs(self,

# If using an objective, want to calculate EI/PI from here
o = f_t if not objective else objective(f_t.unsqueeze(0), X_baseline).squeeze(0)

if objective:
aq_kwargs['objective'] = objective

# Improvement aqs based on inflation or deflation of best point
if aq in ['EI', 'PI']:
o_max = o.max(dim=0).values * (1+hps['inflate'])
Expand Down Expand Up @@ -552,9 +556,12 @@ def _parse_aq_kwargs(self,
aq_kwargs['partitioning'] = NondominatedPartitioning(aq_kwargs['ref_point'], Y=o)

if aq == 'NIPV':
X_bounds = torch.tensor([[0.0, 1.0]]*self.X_space.n_tdim, dtype=TORCH_DTYPE).T.to(self.device)
X_bounds = torch.tensor([[0.0, 1.0]]*self.X_space.n_tdim, dtype=TORCH_DTYPE).T
qmc_samples = draw_sobol_samples(bounds=X_bounds, n=128, q=m_batch)
aq_kwargs['mc_points'] = qmc_samples.squeeze(-2)
aq_kwargs['sampler'] = None
if objective:
raise UnsupportedError('NIPV does not support objectives')

if aq == 'NParEGO':
w = hps['scalarization_weights']
Expand Down Expand Up @@ -660,10 +667,15 @@ def suggest(self,

if not self.is_fit:
raise UnfitError('Surrogate model must be fit before suggesting new experiments')


if self.verbose >= 2:
print(f'Optimizing {m_batch} experiments [...]')

# Use indexing to handle if suggestions are made for a subset of fit targets/surrogates
target = self._validate_target(target)
target_locs = [self.y_names.index(t.name) for t in target]

# Select the model(s) to use for optimization
model_list = [one_surrogate.torch_model for i, one_surrogate in enumerate(self.surrogate) if i in target_locs]
if all(isinstance(m, GPyTorchModel) for m in model_list):
model = ModelListGP(*model_list)
Expand All @@ -685,10 +697,11 @@ def suggest(self,

optim_type = 'single' if o_dim == 1 else 'multi'

# Default to noisy expected improvement if no aq method is provided
# Default if no aq method is provided
if not acquisition:
acquisition = [aq_defaults[optim_type]]

# Type check for acquisition
if not isinstance(acquisition, list):
raise TypeError('acquisition must be a list of strings or dictionaries')
if not all(isinstance(item, (str, dict)) for item in acquisition):
Expand All @@ -702,14 +715,16 @@ def suggest(self,
samplers = []
for m in model.models:
if isinstance(m, DNN):
sampler_i = IndexSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed).to(self.device)
sampler_i = IndexSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
else:
sampler_i = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed).to(self.device)
sampler_i = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)
samplers.append(sampler_i)
sampler = ListSampler(*samplers)
else:
sampler = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed).to(self.device)
X_bounds = torch.tensor([[0.0, 1.0]]*self.X_space.n_tdim, dtype=TORCH_DTYPE).T.to(self.device)
sampler = SobolQMCNormalSampler(sample_shape=torch.Size([optim_samples]), seed=self.seed)

# Calculate search bounds for optimization
X_bounds = torch.tensor(self.X_space.search_space.values, dtype=TORCH_DTYPE)

# Set up master lists to hold the candidates from multi-acquisition results
candidates_all = []
Expand Down Expand Up @@ -744,11 +759,8 @@ def suggest(self,
# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'sampler': sampler, 'X_pending': X_t_pending}

if aq_str != 'NIPV':
aq_kwargs['objective'] = objective
else:
aq_kwargs['sampler'] = None

aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch, target_locs, X_t_pending, objective))

# Type check for constraints
for constraint_type in eq_constraints, ineq_constraints, nleq_constraints, out_constraints:
if constraint_type:
Expand Down Expand Up @@ -776,8 +788,6 @@ def suggest(self,
if fixed_features_list:
raise UnsupportedError('Nonlinear constraints are not supported with discrete features.')

aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, m_batch, target_locs, X_t_pending, objective))

# Hypervolume aqs fail with X_t_pending when optim_sequential=True
if aq_str in ['NEHVI', 'EHVI']:
optim_sequential = False
Expand All @@ -804,6 +814,9 @@ def suggest(self,
options=optim_options,
**optim_kwargs)

if self.verbose >= 2:
print(f'Optimized {aq_str} acquisition function successfully')

candidates_i = self.X_space.decode(
pd.DataFrame(candidates.detach().cpu().numpy(),
columns=[col for col in self.X_t_train.columns if col not in self.X_space.X_task]))
Expand Down Expand Up @@ -857,16 +870,16 @@ def evaluate(self,
"""

if not self.is_fit:
raise UnfitError('Surrogate model must be fit before suggesting new experiments')
raise UnfitError('Surrogate model must be fit before evaluating new experiments')

# Use indexing to handle if suggestions are made for a subset of fit targets/surrogates
target = self._validate_target(target)
target_locs = [self.y_names.index(t.name) for t in target]

# Begin evaluation with y_predict with pred interval
eval_suggest = self.predict(X_suggest)
X_t = torch.tensor(self.X_space.encode(X_suggest).values, dtype=TORCH_DTYPE).to(self.device)
X_t_train = torch.tensor(self.X_space.encode(self.X_train).values, dtype=TORCH_DTYPE).to(self.device)
X_t = torch.tensor(self.X_space.encode(X_suggest).values, dtype=TORCH_DTYPE)
X_t_train = torch.tensor(self.X_space.encode(self.X_train).values, dtype=TORCH_DTYPE)

# Evaluate f_predict on new and pending points
f_all = []
Expand Down Expand Up @@ -924,27 +937,25 @@ def evaluate(self,
optim_type = 'single' if o_dim == 1 else 'multi'

if eval_aq:
# Default to noisy expected improvement if no aq method is provided
# Default if no aq method is provided
if not acquisition:
acquisition = [aq_defaults[optim_type]]

if not isinstance(acquisition, (str, dict)):
raise TypeError('Acquisition must be either a string or a dictionary')

# Extract acq function names and custom hyperparameters from the 'acquisition' list in config
aq_str, aq_hps = self._validate_hypers(o_dim, acquisition)

model_list = [one_surrogate.torch_model for i, one_surrogate in enumerate(self.surrogate) if i in target_locs]
if all(isinstance(m, GPyTorchModel) for m in model_list):
model = ModelListGP(*model_list)
else:
model = ModelList(*model_list)

# Extract acq function names and custom hyperparameters from the 'acquisition' list in config
aq_str, aq_hps = self._validate_hypers(o_dim, acquisition)

# Use aq_kwargs so that extra unnecessary ones in hps get removed for certain aq funcs
aq_kwargs = {'model': model, 'X_pending': X_t_pending}
if aq_str != 'NIPV':
aq_kwargs['objective'] = objective

aq_kwargs = {'model': model, 'sampler': None, 'X_pending': X_t_pending}

aq_kwargs.update(self._parse_aq_kwargs(aq_str, aq_hps, X_suggest.shape[0], target_locs, X_t_pending, objective))

# If it's random search, no need to evaluate aq
Expand Down
5 changes: 5 additions & 0 deletions obsidian/parameters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ def _validate_value(self,
"""Validate data inputs"""
pass # pragma: no cover

@abstractmethod
def set_search(self):
"""Set the search space for the parameter"""
pass # pragma: no cover

@abstractmethod
def encode(X):
"""Encode parameter to a format that can be used for training"""
Expand Down
Loading

0 comments on commit f4c2bc7

Please sign in to comment.