Merge pull request #429 from pymc-labs/fix-failing-doctest

Calculate goodness of fit (`r2_score`) based on posterior expectation rather than posterior prediction
pymc-labs · Nov 21, 2024 · e61bfd0 · e61bfd0
2 parents 142694f + cdf2ee5
commit e61bfd0
Showing 1 changed file with 11 additions and 11 deletions.
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -54,18 +54,18 @@ class PyMCModel(pm.Model):
     ...                 "chains": 2,
     ...                 "draws": 2000,
     ...                 "progressbar": False,
-    ...                 "random_seed": rng,
+    ...                 "random_seed": 42,
     ...             }
     ... )
     >>> model.fit(X, y)
     Inference data...
+    >>> model.score(X, y)  # doctest: +ELLIPSIS
+    r2        ...
+    r2_std    ...
+    dtype: float64
     >>> X_new = rng.normal(loc=0, scale=1, size=(20,2))
     >>> model.predict(X_new)
     Inference data...
-    >>> model.score(X, y)
-    r2        0.390344
-    r2_std    0.081135
-    dtype: float64
     """
 
     def __init__(self, sample_kwargs: Optional[Dict[str, Any]] = None):
@@ -123,7 +123,6 @@ def predict(self, X):
         # Ensure random_seed is used in sample_prior_predictive() and
         # sample_posterior_predictive() if provided in sample_kwargs.
         random_seed = self.sample_kwargs.get("random_seed", None)
-
         self._data_setter(X)
         with self:  # sample with new input data
             post_pred = pm.sample_posterior_predictive(
@@ -137,18 +136,19 @@ def predict(self, X):
     def score(self, X, y) -> pd.Series:
         """Score the Bayesian :math:`R^2` given inputs ``X`` and outputs ``y``.
 
+        Note that the score is based on a comparison of the observed data ``y`` and the
+        model's expected value of the data, `mu`.
+
         .. caution::
 
             The Bayesian :math:`R^2` is not the same as the traditional coefficient of
             determination, https://en.wikipedia.org/wiki/Coefficient_of_determination.
 
         """
-        yhat = self.predict(X)
-        yhat = az.extract(
-            yhat, group="posterior_predictive", var_names="y_hat"
-        ).T.values
+        mu = self.predict(X)
+        mu = az.extract(mu, group="posterior_predictive", var_names="mu").T.values
         # Note: First argument must be a 1D array
-        return r2_score(y.flatten(), yhat)
+        return r2_score(y.flatten(), mu)
 
     def calculate_impact(self, y_true, y_pred):
         pre_data = xr.DataArray(y_true, dims=["obs_ind"])