Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major testing improvements and numerous bug fixes #22

Merged
merged 5 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ per-file-ignores =
__init__.py: F401, F403
obsidian/dash/*: F401, F403

# Often creating variables but not accessing them in testing
obsidian/tests/*: F841
# Often importing and creating unaccessed objects during testing
obsidian/tests/*: F401, F841

# No good way around comparing types for recursive state-dict comparison
obsidian/tests/utils.py: E721

exclude =
projects/
Expand Down
11 changes: 10 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
# Changelog

## [0.7.13]
## [0.8.0]
### Added
- Major improvements to testing and numerous small bug fixes to improve code robustness
- Code coverage > 90%
- New method for asserting equivalence of state_dicts during serialization

### Modified
- Objective PyTests separated
- Constraint PyTests separated

## [0.7.13]
### Added
- Campaign.Explainer now added to PyTests
- Docstrings and typing to Explainer methods
Expand Down
2 changes: 1 addition & 1 deletion obsidian/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""obsidian: Automated experiment design and black-box optimization"""
__version__ = '0.7.12'
__version__ = '0.8.0'

# Import key objects
from obsidian.campaign import Campaign
Expand Down
16 changes: 6 additions & 10 deletions obsidian/campaign/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def add_data(self, df: pd.DataFrame):

def clear_data(self):
"""Clears campaign data"""
self.data = None
self.data = pd.DataFrame()

@property
def optimizer(self) -> Optimizer:
Expand Down Expand Up @@ -189,12 +189,11 @@ def y(self) -> pd.Series | pd.DataFrame:
"""
Experimental response data

Raises:
ValueError: If no target(s) are specified.
"""
if not self.target:
raise ValueError('No target(s) specified')
return self.data[self.y_names]
if not self.data.empty:
return self.data[self.y_names]
else:
return None

@property
def response_max(self) -> float | pd.Series:
Expand Down Expand Up @@ -296,10 +295,7 @@ def load_state(cls,
new_campaign.data = pd.DataFrame(obj_dict['data'])
new_campaign.data.index = new_campaign.data.index.astype('int')

try:
new_campaign.iter = new_campaign.data['Iteration'].astype('int').max()
except KeyError:
new_campaign.iter = 0
new_campaign.iter = new_campaign.data['Iteration'].astype('int').max()

return new_campaign

Expand Down
11 changes: 6 additions & 5 deletions obsidian/campaign/explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from obsidian.parameters import Param_Continuous, ParamSpace
from obsidian.optimizer import Optimizer
from obsidian.exceptions import UnfitError

import shap
from shap import KernelExplainer, Explanation
Expand Down Expand Up @@ -36,7 +37,7 @@ def __init__(self,
X_space: ParamSpace | None = None) -> None:

if not optimizer.is_fit:
raise ValueError('Surrogate model in optimizer is not fit to data. ')
raise UnfitError('Surrogate model in optimizer is not fit to data. ')

self.set_optimizer(optimizer)
self.X_space = optimizer.X_space if X_space is None else X_space
Expand Down Expand Up @@ -117,7 +118,7 @@ def pred_func(X):
def shap_summary(self) -> Figure:
"""SHAP Summary Plot (Beeswarm)"""
if not self.shap:
raise ValueError('shap explainer is not fit.')
raise UnfitError('SHAP explainer is not fit.')

fig = plt.figure()
shap.summary_plot(self.shap['values'], self.shap['X_sample'],
Expand All @@ -129,7 +130,7 @@ def shap_summary(self) -> Figure:
def shap_summary_bar(self) -> Figure:
"""SHAP Summary Plot (Bar Plot / Importance)"""
if not self.shap:
raise ValueError('shap explainer is not fit.')
raise UnfitError('SHAP explainer is not fit.')

fig = plt.figure()
shap.plots.bar(self.shap['explanation'],
Expand Down Expand Up @@ -159,7 +160,7 @@ def shap_pdp_ice(self,

"""
if not self.shap:
raise ValueError('shap explainer is not fit.')
raise UnfitError('SHAP explainer is not fit.')

fig, ax = partial_dependence(
ind=ind,
Expand Down Expand Up @@ -193,7 +194,7 @@ def shap_single_point(self,

"""
if not self.shap:
raise ValueError('shap explainer is not fit.')
raise UnfitError('SHAP explainer is not fit.')

if isinstance(X_new, pd.Series):
X_new = X_new.copy().to_frame().T
Expand Down
3 changes: 2 additions & 1 deletion obsidian/parameters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ def __init__(self,
name: str):
self.name = name

@abstractmethod
def __repr__(self):
return f"{self.__class__.__name__}(name={self.name})"
pass # pragma: no cover

@abstractmethod
def _validate_value(self,
Expand Down
14 changes: 8 additions & 6 deletions obsidian/parameters/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(self,
self.categories = categories.split(',')
else:
self.categories = categories
for c in categories:
for c in self.categories:
self._validate_value(c)

def __repr__(self):
Expand Down Expand Up @@ -183,12 +183,14 @@ def range(self):

def __init__(self,
name,
categories: int | float | list[int | float]):
if not isinstance(categories, (int, float, list)):
categories: list[int | float]):

if not isinstance(categories, list):
raise TypeError('Categories must be a number or list of numbers')
if isinstance(categories, list):
if not all(isinstance(x, (int, float)) for x in categories):
raise TypeError('Categories must be a list of numbers')

self.categories = categories
for c in self.categories:
self._validate_value(c)

self.name = name
self.categories = categories if isinstance(categories, list) else [categories]
Expand Down
23 changes: 14 additions & 9 deletions obsidian/parameters/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,26 +64,31 @@ def transform_f(self,
if not (isinstance(f, (pd.Series, pd.DataFrame, np.ndarray, list, float, int))
or torch.is_tensor(f)):
raise TypeError('f being transformed must be numeric or array-like')

if not fit:
if not hasattr(self, 'f_transform_func'):
raise UnfitError('Transform function is being called without being fit first.')


# Convert everything to numpy except Tensors
if isinstance(f, (float, int)):
f = [f]

f = np.array([f])
if isinstance(f, (list)):
f = np.array(f)
if isinstance(f, (pd.Series, pd.DataFrame)):
f = f.values

if not torch.is_tensor(f):
# Check that types are valid, then convert to Tensor
if not all(np.issubdtype(f_i.dtype, np.number) for f_i in f.flatten()):
raise TypeError('Each element of f being transformed must be numeric')
f = torch.tensor(f)

if not fit:
if not hasattr(self, 'f_transform_func'):
raise UnfitError('Transform function is being called without being fit first.')

if f.ndim == 1:
f = f.reshape(-1, 1)

if inverse:
f_obj = self.f_transform_func.inverse(f)
return pd.Series(f_obj.flatten(), name=self.name) * self.multiplier
f_obj = self.f_transform_func.inverse(f * self.multiplier)
return pd.Series(f_obj.flatten(), name=self.name)
else:
if fit:
self.f_transform_func = f_transform_dict[self.f_transform]()
Expand Down
2 changes: 1 addition & 1 deletion obsidian/parameters/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def forward(self,
X_s = self.params['scale']*(X - self.params['loc'])
valid_range = (X_s >= 0).all() and (X_s <= 1).all()
if not valid_range:
warnings.warn('Invalid range provided for logit scaler, proceeding with min-max fit')
warnings.warn('Invalid range provided for logit scaler, proceeding with min-max fit', UserWarning)
self._fit_minmax(X)
return self.forward(X)
else:
Expand Down
25 changes: 16 additions & 9 deletions obsidian/tests/param_configs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Preset parameter configurations for unit testing"""

from obsidian.parameters import Param_Continuous, Param_Ordinal, Param_Categorical, \
Param_Observational, Param_Discrete_Numeric, ParamSpace

# Set up ap master list of parameter spaces for testing
params = [
Param_Continuous('Parameter 1', 0, 10),
Param_Continuous('Parameter 2', -20, 0),
Expand All @@ -15,14 +18,25 @@
Param_Ordinal('Parameter 11', ['N'])
]

# Subset some default selections
default = [params[i] for i in [0, 1, 2, 6]] # 2 continuous, 1 static, 1 categorical
X_sp_default = ParamSpace(params=default)

# Numeric
cont_small = [params[i] for i in [0, 1, 2]] # continuous including edge cases
numeric = [params[i] for i in [0, 1, 2, 3, 4, 5]] # numeric including edge cases
X_sp_cont_small = ParamSpace(params=cont_small)
X_sp_numeric = ParamSpace(params=numeric)

# Nominal
cat_small = [params[i] for i in [6, 7, 8]] # categorical including edge cases
disc_small = [params[i] for i in [6, 9]] # 1 categorical, 1 ordinal
disc_large = [params[i] for i in [6, 7, 8, 9, 10]] # discrete including edge cases
X_sp_cat_small = ParamSpace(params=cat_small)
X_sp_disc_small = ParamSpace(params=disc_small)
X_sp_disc_large = ParamSpace(params=disc_large)

# Set up a range of continuous parameters
params_cont_large = [
Param_Continuous('Parameter 1', 0, 10),
Param_Continuous('Parameter 2', 0, 10),
Expand All @@ -37,15 +51,8 @@
Param_Continuous('Parameter 11', 0, 10),
Param_Continuous('Parameter 12', 0, 10),
]

X_sp_default = ParamSpace(params=default)
X_sp_cont_small = ParamSpace(params=cont_small)
X_sp_cont_large = ParamSpace(params=params_cont_large)
X_sp_numeric = ParamSpace(params=numeric)
X_sp_cat_small = ParamSpace(params=cat_small)
X_sp_disc_small = ParamSpace(params=disc_small)
X_sp_disc_large = ParamSpace(params=disc_large)
X_sp_cont_ndims = [ParamSpace(params_cont_large[:i]) for i in range(len(params_cont_large))]

# Wrap everything for iteration during testing
test_X_space = [X_sp_default, X_sp_cont_small, X_sp_numeric, X_sp_cat_small, X_sp_disc_small, X_sp_disc_large]

X_sp_cont_ndims = [ParamSpace(params_cont_large[:i]) for i in range(len(params_cont_large))]
Loading
Loading