From f50b73ca4a953677605fe6e640b637763584d625 Mon Sep 17 00:00:00 2001
From: John Cherian <john.jcheri@gmail.com>
Date: Tue, 22 Oct 2024 21:15:56 -0700
Subject: [PATCH 1/7] changes to test epsilon sampling that account for limited
 signal in states with few units reporting + guaranteed overlap in aggregate
 PIs

---
 .../models/BootstrapElectionModel.py          | 79 +++++++++++++------
 1 file changed, 55 insertions(+), 24 deletions(-)

diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index 003fe182..217212cf 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -6,6 +6,7 @@
 import pandas as pd
 from elexsolver.OLSRegressionSolver import OLSRegressionSolver
 from elexsolver.QuantileRegressionSolver import QuantileRegressionSolver
+from scipy.linalg import block_diag
 from scipy.special import expit
 
 from elexmodel.handlers.data.Featurizer import Featurizer
@@ -586,40 +587,66 @@ def _sample_test_epsilon(
         This function generates new test epsilons (contest level random effects)
         NOTE: for now we are sampling from a normal with mean zero and sample variance.
         """
-        # the contests that need a sampled contest level random effect are those that still have outstanding
-        # units (since for the others, the contest level random effect is a known fixed quantity) but no
-        # current estimate for the epsilon in that contest (so ones with NO training examples)
-
-        # this gives us indices of contests for which we have no samples in the training set
-        contests_not_in_reporting_units = np.where(np.all(aggregate_indicator_train == 0, axis=0))[0]
-        # gives us contests for which there is at least one county not reporting
-        contests_in_nonreporting_units = np.where(np.any(aggregate_indicator_test > 0, axis=0))[0]
-        # contests that need a random effect are:
-        #   contests that we want to make predictions on (ie. there are still outstanding units)
-        #   BUT no units in that contest are reporting
-        contests_that_need_random_effect = np.intersect1d(
-            contests_not_in_reporting_units, contests_in_nonreporting_units
-        )
-
-        # \epsilon ~ N(0, \Sigma)
-        mu_hat = [0, 0]
-
-        # the variance is the sample variance of epsilons for which we have an estimate
         non_zero_epsilon_indices = np.nonzero(epsilon_y_hat)[0]
         # if there is only one non zero contest OR if the contest is a state level election only
         # then just sample from nearly zero since no variance
         if non_zero_epsilon_indices.shape[0] == 1:
             return np.zeros((1, self.B)), np.zeros((1, self.B))
 
-        sigma_hat = np.cov(np.concatenate([epsilon_y_hat, epsilon_z_hat], axis=1)[np.nonzero(epsilon_y_hat)[0]].T)
+        aggregate_indicator = np.concatenate([aggregate_indicator_train, aggregate_indicator_test], axis=0)
+
+        # computes standard error of epsilon_hat estimate for each contest
+        # formula is given by square root of (1 - n/N) * (pop variance) / n
+        # where n is the number of observed units and N is the number of units in the contest
+        # pop variance is the variance of the residuals in the contest
+        def _get_epsilon_hat_std(residuals, epsilon):
+            var = np.var(aggregate_indicator_train * residuals, axis=0) # incorrect denominator for variance
+            var *= aggregate_indicator_train.shape[0] / (aggregate_indicator_train.sum(axis=0) - 1) # Bessel's correction
+            var *= (1 - aggregate_indicator_train.sum(axis=0) / aggregate_indicator.sum(axis=0)) / aggregate_indicator_train.sum(axis=0)
+
+            # where we have < 2 units in a contest, we set the variance to the variance of the observed epsilon_hat values
+            var[np.isnan(var) | np.isinf(var)] = np.var(
+                epsilon[np.nonzero(epsilon)[0]].T, 
+                ddof=1
+            )
+            return np.sqrt(var)
+
+        std_y = _get_epsilon_hat_std(residuals_y, epsilon_y_hat)
+        std_z = _get_epsilon_hat_std(residuals_z, epsilon_z_hat)
+
+        std = np.zeros((std_y.shape[0] + std_z.shape[0],))
+        std[0::2] = std_y
+        std[1::2] = std_z
+
+        # high observed correlation between epsilon_y_hat and epsilon_z_hat in 2020, so this is important
+        corr = np.corrcoef(
+            np.concatenate([epsilon_y_hat, epsilon_z_hat], axis=1)[np.nonzero(epsilon_y_hat)[0]].T
+        )
+        # tile corr into a block diagonal matrix
+        corr_list = [corr] * aggregate_indicator.shape[1]
+        corr_hat = block_diag(*corr_list)
+
+        # TODO: may wish to add additional correlations to corr_hat
+        # e.g., correlation across the rust belt, sun belt, abortion referenda states, etc.
+
+        # \epsilon ~ N(0, \Sigma)
+        # Sigma is a block diagonal matrix with 2x2 blocks running down the diagonal and 0s elsewhere
+        # each block is the covariance matrix of epsilon_y and epsilon_z for a particular contest, 
+        # e.g., the first block is for the AK contest, the second block is for the AL contest, etc.
+        # we can sample from this distribution to get new epsilons
+        mu_hat = np.zeros(corr_hat.shape[0])
+        sigma_hat = np.diag(std) @ corr_hat @ np.diag(std)
 
-        # sample new test epsilons, but only for states that need tem
         test_epsilon = self.rng.multivariate_normal(
-            mu_hat, sigma_hat, size=(len(contests_that_need_random_effect), self.B)
+            mu_hat, sigma_hat, size=self.B
         )
 
-        test_epsilon_y = aggregate_indicator_test[:, contests_that_need_random_effect] @ test_epsilon[:, :, 0]
-        test_epsilon_z = aggregate_indicator_test[:, contests_that_need_random_effect] @ test_epsilon[:, :, 1]
+        test_epsilon_y = test_epsilon[:,0::2].T
+        test_epsilon_z = test_epsilon[:,1::2].T
+
+        test_epsilon_y = aggregate_indicator_test @ test_epsilon_y
+        test_epsilon_z = aggregate_indicator_test @ test_epsilon_z
+
         return test_epsilon_y, test_epsilon_z
 
     def _sample_test_errors(
@@ -1437,6 +1464,10 @@ def get_aggregate_prediction_intervals(
         interval_upper = interval_upper.reshape(-1, 1)
         interval_lower = interval_lower.reshape(-1, 1)
 
+        # guarantee overlap between the prediction interval and the point prediction
+        interval_lower = np.minimum(interval_lower, aggregate_perc_margin_total - 0.001)
+        interval_upper = np.maximum(interval_upper, aggregate_perc_margin_total + 0.001)
+
         return PredictionIntervals(interval_lower, interval_upper)
 
     def get_national_summary_estimates(self, nat_sum_data_dict: dict, base_to_add: int | float, alpha: float) -> list:

From 3ef8a35ad4700463f893aaddd65ac01d4dad646e Mon Sep 17 00:00:00 2001
From: John Cherian <john.jcheri@gmail.com>
Date: Tue, 22 Oct 2024 21:20:17 -0700
Subject: [PATCH 2/7] some reformatting

---
 .../models/BootstrapElectionModel.py          | 29 +++++++++----------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index 217212cf..aada0189 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -600,15 +600,16 @@ def _sample_test_epsilon(
         # where n is the number of observed units and N is the number of units in the contest
         # pop variance is the variance of the residuals in the contest
         def _get_epsilon_hat_std(residuals, epsilon):
-            var = np.var(aggregate_indicator_train * residuals, axis=0) # incorrect denominator for variance
-            var *= aggregate_indicator_train.shape[0] / (aggregate_indicator_train.sum(axis=0) - 1) # Bessel's correction
-            var *= (1 - aggregate_indicator_train.sum(axis=0) / aggregate_indicator.sum(axis=0)) / aggregate_indicator_train.sum(axis=0)
+            var = np.var(aggregate_indicator_train * residuals, axis=0)  # incorrect denominator for variance
+            var *= aggregate_indicator_train.shape[0] / (
+                aggregate_indicator_train.sum(axis=0) - 1
+            )  # Bessel's correction
+            var *= (
+                1 - aggregate_indicator_train.sum(axis=0) / aggregate_indicator.sum(axis=0)
+            ) / aggregate_indicator_train.sum(axis=0)
 
             # where we have < 2 units in a contest, we set the variance to the variance of the observed epsilon_hat values
-            var[np.isnan(var) | np.isinf(var)] = np.var(
-                epsilon[np.nonzero(epsilon)[0]].T, 
-                ddof=1
-            )
+            var[np.isnan(var) | np.isinf(var)] = np.var(epsilon[np.nonzero(epsilon)[0]].T, ddof=1)
             return np.sqrt(var)
 
         std_y = _get_epsilon_hat_std(residuals_y, epsilon_y_hat)
@@ -619,9 +620,7 @@ def _get_epsilon_hat_std(residuals, epsilon):
         std[1::2] = std_z
 
         # high observed correlation between epsilon_y_hat and epsilon_z_hat in 2020, so this is important
-        corr = np.corrcoef(
-            np.concatenate([epsilon_y_hat, epsilon_z_hat], axis=1)[np.nonzero(epsilon_y_hat)[0]].T
-        )
+        corr = np.corrcoef(np.concatenate([epsilon_y_hat, epsilon_z_hat], axis=1)[np.nonzero(epsilon_y_hat)[0]].T)
         # tile corr into a block diagonal matrix
         corr_list = [corr] * aggregate_indicator.shape[1]
         corr_hat = block_diag(*corr_list)
@@ -631,18 +630,16 @@ def _get_epsilon_hat_std(residuals, epsilon):
 
         # \epsilon ~ N(0, \Sigma)
         # Sigma is a block diagonal matrix with 2x2 blocks running down the diagonal and 0s elsewhere
-        # each block is the covariance matrix of epsilon_y and epsilon_z for a particular contest, 
+        # each block is the covariance matrix of epsilon_y and epsilon_z for a particular contest,
         # e.g., the first block is for the AK contest, the second block is for the AL contest, etc.
         # we can sample from this distribution to get new epsilons
         mu_hat = np.zeros(corr_hat.shape[0])
         sigma_hat = np.diag(std) @ corr_hat @ np.diag(std)
 
-        test_epsilon = self.rng.multivariate_normal(
-            mu_hat, sigma_hat, size=self.B
-        )
+        test_epsilon = self.rng.multivariate_normal(mu_hat, sigma_hat, size=self.B)
 
-        test_epsilon_y = test_epsilon[:,0::2].T
-        test_epsilon_z = test_epsilon[:,1::2].T
+        test_epsilon_y = test_epsilon[:, 0::2].T
+        test_epsilon_z = test_epsilon[:, 1::2].T
 
         test_epsilon_y = aggregate_indicator_test @ test_epsilon_y
         test_epsilon_z = aggregate_indicator_test @ test_epsilon_z

From 87a6987263b4f64b40ca947ee5225779d6f39cca Mon Sep 17 00:00:00 2001
From: lennybronner <lenny.bronner@gmail.com>
Date: Wed, 23 Oct 2024 10:12:49 -0400
Subject: [PATCH 3/7] pass unit tests

---
 tests/models/test_bootstrap_election_model.py | 34 +++++++------------
 tests/test_client.py                          |  8 +++--
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/tests/models/test_bootstrap_election_model.py b/tests/models/test_bootstrap_election_model.py
index 39021b6f..e2fc758c 100644
--- a/tests/models/test_bootstrap_election_model.py
+++ b/tests/models/test_bootstrap_election_model.py
@@ -412,24 +412,14 @@ def test_sample_test_epsilon(bootstrap_election_model):
     assert epsilon_y.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B)
     assert epsilon_z.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B)
 
-    # aggregate 3 has no elements in the training set, and rows 2,3,5 in the test set
-    # are parts of aggregate 3
-    assert np.isclose(epsilon_z[0], 0).all()
-    assert np.isclose(epsilon_z[1], 0).all()
+    # test epsilons should never be zero
+    assert not np.isclose(epsilon_z[0], 0).all()
+    assert not np.isclose(epsilon_z[1], 0).all()
     assert not np.isclose(epsilon_z[2], 0).all()
     assert not np.isclose(epsilon_z[3], 0).all()
-    assert np.isclose(epsilon_z[4], 0).all()
+    assert not np.isclose(epsilon_z[4], 0).all()
     assert not np.isclose(epsilon_z[5], 0).all()
 
-    # testing that if there is only one element epsilon_hat that we return 0
-    epsilon_y, epsilon_z = bootstrap_election_model._sample_test_epsilon(
-        residuals, residuals, [[4]], [[4]], aggregate_indicator_train, aggregate_indicator_test
-    )
-    assert epsilon_y.shape == (1, bootstrap_election_model.B)
-    assert epsilon_z.shape == (1, bootstrap_election_model.B)
-
-    assert np.isclose(epsilon_z[0], 0).all()
-
 
 def test_sample_test_errors(bootstrap_election_model):
     residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1)
@@ -965,8 +955,8 @@ def test_get_aggregate_prediction_intervals(bootstrap_election_model, rng):
     assert lower.shape == (6, 1)
     assert upper.shape == (6, 1)
 
-    assert lower[2] == pytest.approx(upper[2])  # since c is fully reporting
-    assert lower[5] == pytest.approx(upper[5])  # since all f units are unexpected
+    assert lower[2] == pytest.approx(upper[2] - 0.002)  # since c is fully reporting
+    assert lower[5] == pytest.approx(upper[5] - 0.002)  # since all f units are unexpected
     assert all(lower <= upper)
 
     # test race calls
@@ -989,7 +979,7 @@ def test_get_aggregate_prediction_intervals(bootstrap_election_model, rng):
         None,
         lhs_called_contests=lhs_called_contests,
     )
-    assert (lower >= bootstrap_election_model.lhs_called_threshold).all()
+    assert (lower >= bootstrap_election_model.lhs_called_threshold - 0.001).all()
     assert (upper >= bootstrap_election_model.lhs_called_threshold).all()
     assert (bootstrap_election_model.divided_error_B_1 == bootstrap_election_model.lhs_called_threshold).all()
     assert (bootstrap_election_model.divided_error_B_2 == bootstrap_election_model.lhs_called_threshold).all()
@@ -1014,7 +1004,7 @@ def test_get_aggregate_prediction_intervals(bootstrap_election_model, rng):
         rhs_called_contests=rhs_called_contests,
     )
     assert (lower <= bootstrap_election_model.rhs_called_threshold).all()
-    assert (upper <= bootstrap_election_model.rhs_called_threshold).all()
+    assert (upper <= bootstrap_election_model.rhs_called_threshold + 0.001).all()
     assert (bootstrap_election_model.divided_error_B_1 == bootstrap_election_model.rhs_called_threshold).all()
     assert (bootstrap_election_model.divided_error_B_2 == bootstrap_election_model.rhs_called_threshold).all()
 
@@ -1045,10 +1035,10 @@ def test_get_aggregate_prediction_intervals(bootstrap_election_model, rng):
     assert lower.shape == (8, 1)
     assert upper.shape == (8, 1)
 
-    assert lower[0] == pytest.approx(upper[0])  # a-a is fully reporting
-    assert lower[3] == pytest.approx(upper[3])  # c-c is fully reporting
-    assert lower[7] == pytest.approx(upper[7])  # c-c is fully reporting
-    assert lower[7] == pytest.approx(upper[7])  # f-f is fully unexpected
+    assert lower[0] == pytest.approx(upper[0] - 0.002)  # a-a is fully reporting
+    assert lower[3] == pytest.approx(upper[3] - 0.002)  # c-c is fully reporting
+    assert lower[7] == pytest.approx(upper[7] - 0.002)  # c-c is fully reporting
+    assert lower[7] == pytest.approx(upper[7] - 0.002)  # f-f is fully unexpected
     assert all(lower <= upper)
 
 
diff --git a/tests/test_client.py b/tests/test_client.py
index 3b821d28..8ecb690b 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -876,5 +876,9 @@ def test_get_national_summary_votes_estimates(model_client, va_governor_county_d
 
     current = model_client.get_national_summary_votes_estimates(None, 0, [0.99])
 
-    pd.testing.assert_frame_equal(current, model_client.results_handler.final_results["nat_sum_data"])
-    pd.testing.assert_frame_equal(expected_df, model_client.results_handler.final_results["nat_sum_data"])
+    pd.testing.assert_frame_equal(
+        current, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False
+    )
+    pd.testing.assert_frame_equal(
+        expected_df, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False
+    )

From 93dec5f9e3033e83f6342c379bc11ef319f02770 Mon Sep 17 00:00:00 2001
From: John Cherian <john.jcheri@gmail.com>
Date: Thu, 24 Oct 2024 14:16:01 -0700
Subject: [PATCH 4/7] Added inter-contest correlations + improvement for house
 model correlation (i.e., state *and* contest-level effect)

---
 src/elexmodel/client.py                       |  3 +-
 .../models/BootstrapElectionModel.py          | 68 ++++++++++++++++---
 2 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/src/elexmodel/client.py b/src/elexmodel/client.py
index 554689ca..feb8cd65 100644
--- a/src/elexmodel/client.py
+++ b/src/elexmodel/client.py
@@ -232,7 +232,8 @@ def get_estimates(
         handle_unreporting = kwargs.get("handle_unreporting", "drop")
 
         district_election = False
-        if office in {"H", "Y", "Z"}:
+        # if office starts with one of these letters
+        if office.startswith("H") or office.startswith("Y") or office.startswith("Z"):
             district_election = True
 
         model_settings = {
diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index aada0189..2aa1fe54 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -1,6 +1,7 @@
 from __future__ import annotations  # pylint: disable=too-many-lines
 
 import logging
+from itertools import combinations
 
 import numpy as np
 import pandas as pd
@@ -91,6 +92,11 @@ def __init__(self, model_settings={}):
         self.lhs_called_threshold = 0.005
         self.rhs_called_threshold = -0.005
 
+        # this is the correlation structure we impose when we sample from the contest level random effects
+        self.contest_correlations = model_settings.get(
+            "contest_correlations", [(("PA", "MI", "WI", "MN"), 0.8), (("GA", "NC"), 0.8), (("AZ", "NV"), 0.8)]
+        )
+
         # Assume that we have a baseline normalized margin
         # (D^{Y'} - R^{Y'}) / (D^{Y'} + R^{Y'}) is one of the covariates
         if "baseline_normalized_margin" not in self.features:
@@ -157,11 +163,11 @@ def _estimate_epsilon(self, residuals: np.ndarray, aggregate_indicator: np.ndarr
         This function estimates the epsilon (contest level random effects)
         """
         # the estimate for epsilon is the average of the residuals in each contest
-        epsilon_hat = (aggregate_indicator.T @ residuals) / aggregate_indicator.sum(axis=0).reshape(-1, 1)
+        epsilon_hat, _, _, _ = np.linalg.lstsq(aggregate_indicator, residuals)
+
         # we can't estimate a contest level effect if only have 1 unit in that contest (since our residual can just)
         # be made equal to zero by setting the random effect to that value
         epsilon_hat[aggregate_indicator.sum(axis=0) < 2] = 0
-
         return epsilon_hat
 
     def _estimate_delta(
@@ -625,8 +631,31 @@ def _get_epsilon_hat_std(residuals, epsilon):
         corr_list = [corr] * aggregate_indicator.shape[1]
         corr_hat = block_diag(*corr_list)
 
-        # TODO: may wish to add additional correlations to corr_hat
-        # e.g., correlation across the rust belt, sun belt, abortion referenda states, etc.
+        # if we have additional inter-contest correlations to impose
+        # self.contest_correlations is list of tuples
+        # e.g., [(("AK", "AL", "AR"), 0.5), (("CA", "CO", "CT"), 0.5)]
+        # this will impose a correlation of 0.5 among the sampled state-level swings
+        # for AK, AL, AR and CA, CO, CT
+        if len(self.contest_correlations) > 0:
+            for contests, correlation in self.contest_correlations:
+                for c_1, c_2 in combinations(contests, 2):
+                    i, j = 2 * self.aggregate_names[c_1], 2 * self.aggregate_names[c_2]
+                    corr_hat[i, j] = correlation  # epsilon_y_correlation
+                    corr_hat[i + 1, j + 1] = correlation  # epsilon_z_correlation
+                    corr_hat[j, i] = correlation
+                    corr_hat[j + 1, i + 1] = correlation
+                    corr_hat[i + 1, j] = min(corr_hat[i, i + 1], correlation)
+                    corr_hat[j, i + 1] = min(corr_hat[i, i + 1], correlation)
+                    corr_hat[i, j + 1] = min(corr_hat[i, i + 1], correlation)
+                    corr_hat[j + 1, i] = min(corr_hat[i, i + 1], correlation)
+
+            # project correlation matrix back to PSD cone
+            evals, evecs = np.linalg.eigh(corr_hat)
+            if (evals < -1e-5).any():
+                LOG.info("Epsilon correl. matrix is not PSD and requires projection.")
+                evals = np.clip(evals, 0, None)
+                corr_hat = evecs @ np.diag(evals) @ evecs.T
+                corr_hat /= np.sqrt(np.outer(np.diag(corr_hat), np.diag(corr_hat)))
 
         # \epsilon ~ N(0, \Sigma)
         # Sigma is a block diagonal matrix with 2x2 blocks running down the diagonal and 0s elsewhere
@@ -809,11 +838,34 @@ def compute_bootstrap_errors(
         # between unit and contest (ie. a 1 in i,j says that unit j belongs to contest i)
         # in case district election we need to create a variable that defines the state, district
         # which is what the contest is
-        if self.district_election:
+        if self.district_election:  # want to model aggregate effect at both district and state levels
             all_units["postal_code-district"] = all_units[["postal_code", "district"]].agg("_".join, axis=1)
-            aggregate_indicator = pd.get_dummies(all_units["postal_code-district"]).values
+
+            contest_indicator = pd.get_dummies(all_units["postal_code-district"])
+            postal_code_indicator = pd.get_dummies(all_units["postal_code"])
+
+            # drop districts that are at-large districts for a state
+            postal_code_filter = all_units.groupby("postal_code")["postal_code-district"].nunique() > 1
+            valid_postal_codes = postal_code_filter[postal_code_filter].index
+            valid_districts = all_units[all_units.postal_code.isin(valid_postal_codes)]["postal_code-district"].unique()
+            contest_indicator_filtered = contest_indicator.loc[:, valid_districts]
+
+            # drop contest indicators if there are fewer than 10 units in contest
+            contest_indicator_filtered = contest_indicator_filtered.loc[:, contest_indicator.sum(axis=0) > 10]
+
+            self.aggregate_names = {
+                c: i
+                for i, c in enumerate(
+                    postal_code_indicator.columns.tolist() + contest_indicator_filtered.columns.tolist()
+                )
+            }
+            aggregate_indicator = np.concatenate(
+                (postal_code_indicator.values, contest_indicator_filtered.values), axis=1
+            )
         else:
-            aggregate_indicator = pd.get_dummies(all_units["postal_code"]).values
+            contest_indicator = pd.get_dummies(all_units["postal_code"])
+            self.aggregate_names = {c: i for i, c in enumerate(contest_indicator.columns.tolist())}
+            aggregate_indicator = contest_indicator.values
 
         aggregate_indicator_expected = aggregate_indicator[: (n_train + n_test)]
         aggregate_indicator_train = aggregate_indicator_expected[:n_train]
@@ -1047,7 +1099,7 @@ def compute_bootstrap_errors(
         # and turn into turnout estimate
         self.weighted_z_test_pred = z_test_pred * weights_test
         self.ran_bootstrap = True
-        self.n_contests = aggregate_indicator.shape[1]
+        self.n_contests = contest_indicator.values.shape[1]
 
     def get_unit_predictions(
         self, reporting_units: pd.DataFrame, nonreporting_units: pd.DataFrame, estimand: str, **kwargs

From 2dd0b6672098dfa8586a61f86568da1b5041b08a Mon Sep 17 00:00:00 2001
From: John Cherian <john.jcheri@gmail.com>
Date: Thu, 24 Oct 2024 15:09:47 -0700
Subject: [PATCH 5/7] fixes for unit tests

---
 src/elexmodel/models/BootstrapElectionModel.py |  2 +-
 tests/models/test_bootstrap_election_model.py  | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index 2aa1fe54..fba0ff0c 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -94,7 +94,7 @@ def __init__(self, model_settings={}):
 
         # this is the correlation structure we impose when we sample from the contest level random effects
         self.contest_correlations = model_settings.get(
-            "contest_correlations", [(("PA", "MI", "WI", "MN"), 0.8), (("GA", "NC"), 0.8), (("AZ", "NV"), 0.8)]
+            "contest_correlations", []
         )
 
         # Assume that we have a baseline normalized margin
diff --git a/tests/models/test_bootstrap_election_model.py b/tests/models/test_bootstrap_election_model.py
index e2fc758c..bdf63e1a 100644
--- a/tests/models/test_bootstrap_election_model.py
+++ b/tests/models/test_bootstrap_election_model.py
@@ -39,9 +39,9 @@ def test_estimate_epsilon(bootstrap_election_model):
     residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1)
     aggregate_indicator = np.asarray([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1]]).T
     epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator)
-    assert epsilon_hat[0] == (residuals[0] + residuals[1]) / 2
-    assert epsilon_hat[1] == (residuals[2] + residuals[3]) / 2
-    assert epsilon_hat[2] == 0  # since the aggrgate indicator for that row only has one 1 which is less than 2
+    assert np.isclose(epsilon_hat[0], (residuals[0] + residuals[1]) / 2)
+    assert np.isclose(epsilon_hat[1], (residuals[2] + residuals[3]) / 2)
+    assert np.isclose(epsilon_hat[2], 0)  # since the aggrgate indicator for that row only has one 1 which is less than 2
 
 
 def test_estimate_delta(bootstrap_election_model):
@@ -405,9 +405,9 @@ def test_sample_test_epsilon(bootstrap_election_model):
         # circumstance is here: https://arcpublishing.atlassian.net/browse/ELEX-3431
         epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator_train)
 
-    epsilon_y, epsilon_z = bootstrap_election_model._sample_test_epsilon(
-        residuals, residuals, epsilon_hat, epsilon_hat, aggregate_indicator_train, aggregate_indicator_test
-    )
+        epsilon_y, epsilon_z = bootstrap_election_model._sample_test_epsilon(
+            residuals, residuals, epsilon_hat, epsilon_hat, aggregate_indicator_train, aggregate_indicator_test
+        )
 
     assert epsilon_y.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B)
     assert epsilon_z.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B)

From 304c353e4c0006498ca7bdd0f36e0854da4134a6 Mon Sep 17 00:00:00 2001
From: John Cherian <john.jcheri@gmail.com>
Date: Thu, 24 Oct 2024 15:11:14 -0700
Subject: [PATCH 6/7] clean up precommit

---
 src/elexmodel/models/BootstrapElectionModel.py | 4 +---
 tests/models/test_bootstrap_election_model.py  | 4 +++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index fba0ff0c..120109ba 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -93,9 +93,7 @@ def __init__(self, model_settings={}):
         self.rhs_called_threshold = -0.005
 
         # this is the correlation structure we impose when we sample from the contest level random effects
-        self.contest_correlations = model_settings.get(
-            "contest_correlations", []
-        )
+        self.contest_correlations = model_settings.get("contest_correlations", [])
 
         # Assume that we have a baseline normalized margin
         # (D^{Y'} - R^{Y'}) / (D^{Y'} + R^{Y'}) is one of the covariates
diff --git a/tests/models/test_bootstrap_election_model.py b/tests/models/test_bootstrap_election_model.py
index bdf63e1a..5bd0edc9 100644
--- a/tests/models/test_bootstrap_election_model.py
+++ b/tests/models/test_bootstrap_election_model.py
@@ -41,7 +41,9 @@ def test_estimate_epsilon(bootstrap_election_model):
     epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator)
     assert np.isclose(epsilon_hat[0], (residuals[0] + residuals[1]) / 2)
     assert np.isclose(epsilon_hat[1], (residuals[2] + residuals[3]) / 2)
-    assert np.isclose(epsilon_hat[2], 0)  # since the aggrgate indicator for that row only has one 1 which is less than 2
+    assert np.isclose(
+        epsilon_hat[2], 0
+    )  # since the aggrgate indicator for that row only has one 1 which is less than 2
 
 
 def test_estimate_delta(bootstrap_election_model):

From b1ea9eb1bc1a7f14841a46f1b6a74c12a0ab3e96 Mon Sep 17 00:00:00 2001
From: John Cherian <john.jcheri@gmail.com>
Date: Fri, 25 Oct 2024 09:44:23 -0700
Subject: [PATCH 7/7] updated comment for _sample_test_epsilon

---
 src/elexmodel/models/BootstrapElectionModel.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index 120109ba..d271c483 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -589,7 +589,13 @@ def _sample_test_epsilon(
     ) -> tuple[np.ndarray, np.ndarray]:
         """
         This function generates new test epsilons (contest level random effects)
-        NOTE: for now we are sampling from a normal with mean zero and sample variance.
+
+        For partially observed contests, we apply a normal approximation, and sample
+        from the normal distribution implied by the remaining uncertainty of a
+        sampling without replacement process.
+
+        For unobserved contests, we sample from the normal distribution implied by the
+        sample covariance of the partially observed epsilons
         """
         non_zero_epsilon_indices = np.nonzero(epsilon_y_hat)[0]
         # if there is only one non zero contest OR if the contest is a state level election only