Skip to content

Commit

Permalink
Merge pull request #429 from pymc-labs/fix-failing-doctest
Browse files Browse the repository at this point in the history
Calculate goodness of fit (`r2_score`) based on posterior expectation rather than posterior prediction
  • Loading branch information
drbenvincent authored Nov 21, 2024
2 parents 142694f + cdf2ee5 commit e61bfd0
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions causalpy/pymc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,18 @@ class PyMCModel(pm.Model):
... "chains": 2,
... "draws": 2000,
... "progressbar": False,
... "random_seed": rng,
... "random_seed": 42,
... }
... )
>>> model.fit(X, y)
Inference data...
>>> model.score(X, y) # doctest: +ELLIPSIS
r2 ...
r2_std ...
dtype: float64
>>> X_new = rng.normal(loc=0, scale=1, size=(20,2))
>>> model.predict(X_new)
Inference data...
>>> model.score(X, y)
r2 0.390344
r2_std 0.081135
dtype: float64
"""

def __init__(self, sample_kwargs: Optional[Dict[str, Any]] = None):
Expand Down Expand Up @@ -123,7 +123,6 @@ def predict(self, X):
# Ensure random_seed is used in sample_prior_predictive() and
# sample_posterior_predictive() if provided in sample_kwargs.
random_seed = self.sample_kwargs.get("random_seed", None)

self._data_setter(X)
with self: # sample with new input data
post_pred = pm.sample_posterior_predictive(
Expand All @@ -137,18 +136,19 @@ def predict(self, X):
def score(self, X, y) -> pd.Series:
"""Score the Bayesian :math:`R^2` given inputs ``X`` and outputs ``y``.
Note that the score is based on a comparison of the observed data ``y`` and the
model's expected value of the data, `mu`.
.. caution::
The Bayesian :math:`R^2` is not the same as the traditional coefficient of
determination, https://en.wikipedia.org/wiki/Coefficient_of_determination.
"""
yhat = self.predict(X)
yhat = az.extract(
yhat, group="posterior_predictive", var_names="y_hat"
).T.values
mu = self.predict(X)
mu = az.extract(mu, group="posterior_predictive", var_names="mu").T.values
# Note: First argument must be a 1D array
return r2_score(y.flatten(), yhat)
return r2_score(y.flatten(), mu)

def calculate_impact(self, y_true, y_pred):
pre_data = xr.DataArray(y_true, dims=["obs_ind"])
Expand Down

0 comments on commit e61bfd0

Please sign in to comment.