From f08f0c4d36baed05b5bd999a2f2af83bb63114b0 Mon Sep 17 00:00:00 2001
From: lennybronner <lenny.bronner@gmail.com>
Date: Wed, 23 Oct 2024 17:22:54 -0400
Subject: [PATCH 1/6] additional model works

---
 README.md                                     |  1 +
 src/elexmodel/handlers/data/Featurizer.py     | 25 ++++++++++++++++-
 .../models/BootstrapElectionModel.py          |  3 ++-
 tests/handlers/test_featurizer.py             | 27 +++++++++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 29b4a62d..2303c8cc 100644
--- a/README.md
+++ b/README.md
@@ -125,6 +125,7 @@ Some model types have specific model parameters that can be included.
 | percent_expected_vote_error_bound | numeric | error tolerance on expected vote | `bootstrap`     |
 | z_unobserved_lower_bound          | numeric | lower bound for turnout factor   | `bootstrap`     |
 | z_unobserved_upper_bound          | numeric | upper bound for turnout factor   | `bootstrap`     |
+| states_for_separate_model         | list    | states with separate model fit   | `bootstrap`     |
 
 This is the class and function that invokes the general function to generate estimates. You can install `elex-model` as a Python package and use this code snippet in other projects.
 
diff --git a/src/elexmodel/handlers/data/Featurizer.py b/src/elexmodel/handlers/data/Featurizer.py
index 6a08017c..b69c851d 100644
--- a/src/elexmodel/handlers/data/Featurizer.py
+++ b/src/elexmodel/handlers/data/Featurizer.py
@@ -7,7 +7,7 @@ class Featurizer:
     Featurizer. Normalizes features, add intercept, expands fixed effects
     """
 
-    def __init__(self, features: list, fixed_effects: list):
+    def __init__(self, features: list, fixed_effects: list, states_for_separate_model: list):
         self.features = features
         # fixed effects can be a list, in which case every value of a fixed effect gets its own column
         if isinstance(fixed_effects, list):
@@ -39,6 +39,8 @@ def __init__(self, features: list, fixed_effects: list):
         # active features are features + active fixed effects
         self.active_features = []
 
+        self.states_for_separate_model = states_for_separate_model
+
     def _expand_fixed_effects(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         Convert fixed effect columns into dummy variables.
@@ -80,6 +82,14 @@ def prepare_data(
             this can improve the convergence of optimization algorithms
         if add_intercept is true an intercept column is added to the features and one fixed effect value is dropped
         """
+        # if a state is in the states for separate model, then we add separate feature columns for that state
+        # and we zero out the original feature column for those states
+        for state in self.states_for_separate_model:
+            mask = df.postal_code == state
+            for feature in self.features:
+                df[ f"{feature}_{state}"] = df[feature].where(mask, 0)
+                df.loc[mask, feature] = 0
+    
         df = df.copy()  # create copy so we can do things to the values
         if center_features:
             df[self.features] -= df[self.features].mean()
@@ -91,6 +101,19 @@ def prepare_data(
             self.active_features += ["intercept"]
             df["intercept"] = 1
 
+            # if a state is in the states for separate model, then we add separate intercept columns for that state
+            # and we zero out the original intercept column for those stattes
+            for state in self.states_for_separate_model:
+                mask = df.postal_code == state
+                # if we have a postal code fixed effect then we do not want a separate intercept column for that state
+                # because the fixed effect takes the role of that column (ie. those would be linearly dependent)
+                # but we still want to zero out the original intercept for those states.
+                if not 'postal_code' in self.fixed_effect_cols:
+                    df[f'intercept_{state}'] = df['intercept'].where(mask, 0)
+                df.loc[mask, 'intercept'] = 0
+
+            # if fixed effects are on then we have redundant with the state specific intercepts
+
         if len(self.fixed_effect_cols) > 0:
             df = self._expand_fixed_effects(df)
 
diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index 003fe182..44dad314 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -80,7 +80,8 @@ def __init__(self, model_settings={}):
         self.z_unobserved_upper_bound = model_settings.get("z_unobserved_upper_bound", 1.5)
         self.z_unobserved_lower_bound = model_settings.get("z_unobserved_lower_bound", 0.5)
 
-        self.featurizer = Featurizer(self.features, self.fixed_effects)
+        self.states_for_separate_model = model_settings.get("states_for_separate_model", [])
+        self.featurizer = Featurizer(self.features, self.fixed_effects, self.states_for_separate_model)
         self.seed = model_settings.get("seed", 0)
         self.rng = np.random.default_rng(seed=self.seed)  # used for sampling
         self.ran_bootstrap = False
diff --git a/tests/handlers/test_featurizer.py b/tests/handlers/test_featurizer.py
index 027284b4..d6b68be3 100644
--- a/tests/handlers/test_featurizer.py
+++ b/tests/handlers/test_featurizer.py
@@ -540,3 +540,30 @@ def test_generate_fixed_effects_mixed_reporting(va_governor_precinct_data):
 
     assert "county_fips" in featurizer.fixed_effect_cols
     assert len(featurizer.expanded_fixed_effects) == 133 - 1
+
+def test_separate_state_model():
+    """
+    This function tests to make sure that the featurizer returns the right columns
+    """
+    features = ["a", "b", "c"]
+    fixed_effects = ["fe_a", "fe_b"]
+    states_for_separate_model = ['CC']
+
+    featurizer = Featurizer(features, fixed_effects, states_for_separate_model)
+
+    split_fitting_heldout = 5
+    df = pd.DataFrame(
+        {
+            "postal_code": ['AA', 'AA', 'BB', 'BB', 'CC', 'CC', 'CC', 'DD'],
+            "a": [5, 3, 1, 5, 2, 2, 2, 2],
+            "b": [2, 2, 2, 2, 3, 3, 3, 3],
+            "c": [3, 3, 3, 3, 1, 2, 3, 4],
+            "d": [1, 2, 3, 4, 5, 3, 1, 5],
+            "fe_a": ["a", "a", "b", "c", "a", "a", "b", "d"],
+            "fe_b": ["1", "x", "7", "y", "1", "z", "z", "w"],
+            "reporting": [1, 1, 1, 1, 1, 0, 0, 0],
+            "unit_category": ["expected"] * 8,
+        }
+    )
+    df_new = featurizer.prepare_data(df, center_features=False, scale_features=False, add_intercept=True)
+    import pdb; pdb.set_trace()

From 3a587ae5af0ec8d3bbb09e21707e5dcafca394f1 Mon Sep 17 00:00:00 2001
From: lennybronner <lenny.bronner@gmail.com>
Date: Wed, 23 Oct 2024 19:51:01 -0400
Subject: [PATCH 2/6] unit tests work now

---
 src/elexmodel/handlers/data/Featurizer.py     | 21 +++++--
 .../models/BootstrapElectionModel.py          |  5 +-
 tests/handlers/test_featurizer.py             | 58 ++++++++++++++++++-
 3 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/src/elexmodel/handlers/data/Featurizer.py b/src/elexmodel/handlers/data/Featurizer.py
index b69c851d..376270c9 100644
--- a/src/elexmodel/handlers/data/Featurizer.py
+++ b/src/elexmodel/handlers/data/Featurizer.py
@@ -7,7 +7,7 @@ class Featurizer:
     Featurizer. Normalizes features, add intercept, expands fixed effects
     """
 
-    def __init__(self, features: list, fixed_effects: list, states_for_separate_model: list):
+    def __init__(self, features: list, fixed_effects: list, states_for_separate_model: list = []):
         self.features = features
         # fixed effects can be a list, in which case every value of a fixed effect gets its own column
         if isinstance(fixed_effects, list):
@@ -82,13 +82,20 @@ def prepare_data(
             this can improve the convergence of optimization algorithms
         if add_intercept is true an intercept column is added to the features and one fixed effect value is dropped
         """
+        df = df.copy()  # create copy so we can do things to the values
+
         # if a state is in the states for separate model, then we add separate feature columns for that state
         # and we zero out the original feature column for those states
+        additional_state_features = []
+        # if 'BB' in self.states_for_separate_model and 'CC' in self.states_for_separate_model:
+            # import pdb; pdb.set_trace()
         for state in self.states_for_separate_model:
             mask = df.postal_code == state
             for feature in self.features:
-                df[ f"{feature}_{state}"] = df[feature].where(mask, 0)
+                state_feature = f"{feature}_{state}"
+                df[state_feature] = df[feature].where(mask, 0)
                 df.loc[mask, feature] = 0
+                additional_state_features.append(state_feature)
     
         df = df.copy()  # create copy so we can do things to the values
         if center_features:
@@ -108,8 +115,10 @@ def prepare_data(
                 # if we have a postal code fixed effect then we do not want a separate intercept column for that state
                 # because the fixed effect takes the role of that column (ie. those would be linearly dependent)
                 # but we still want to zero out the original intercept for those states.
-                if not 'postal_code' in self.fixed_effect_cols:
-                    df[f'intercept_{state}'] = df['intercept'].where(mask, 0)
+                if 'postal_code' not in self.fixed_effect_cols:
+                    state_intercept = f'intercept_{state}'
+                    df[state_intercept] = df['intercept'].where(mask, 0)
+                    self.complete_features.append(state_intercept)
                 df.loc[mask, 'intercept'] = 0
 
             # if fixed effects are on then we have redundant with the state specific intercepts
@@ -163,8 +172,8 @@ def prepare_data(
         # all features that the model will be fit on
         # these are all the features + the expanded fixed effects
         # (so all fixed effect values in the complete data excluding the ones dropped for multicolinearity)
-        self.complete_features += self.features + self.expanded_fixed_effects
-        self.active_features += self.features + self.active_fixed_effects
+        self.complete_features += self.features + additional_state_features + self.expanded_fixed_effects
+        self.active_features += self.features + additional_state_features + self.active_fixed_effects
         df = df[self.complete_features]
 
         return df
diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index 44dad314..1f56d958 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -81,7 +81,8 @@ def __init__(self, model_settings={}):
         self.z_unobserved_lower_bound = model_settings.get("z_unobserved_lower_bound", 0.5)
 
         self.states_for_separate_model = model_settings.get("states_for_separate_model", [])
-        self.featurizer = Featurizer(self.features, self.fixed_effects, self.states_for_separate_model)
+        self.featurizer = Featurizer(self.features, self.fixed_effects, states_for_separate_model=self.states_for_separate_model)
+        
         self.seed = model_settings.get("seed", 0)
         self.rng = np.random.default_rng(seed=self.seed)  # used for sampling
         self.ran_bootstrap = False
@@ -662,7 +663,7 @@ def _get_strata(
         # but like with fixed effects we drop one strata category and use the intercept instead so the
         # example would be
         # rural: 0, 0 urban: 1, 0 and rural: 0, 1
-        strata_featurizer = Featurizer([], self.strata)
+        strata_featurizer = Featurizer([], self.strata, states_for_separate_model=self.states_for_separate_model)
         all_units = pd.concat([reporting_units, nonreporting_units], axis=0)
 
         strata_all = strata_featurizer.prepare_data(
diff --git a/tests/handlers/test_featurizer.py b/tests/handlers/test_featurizer.py
index d6b68be3..6d5fa0a9 100644
--- a/tests/handlers/test_featurizer.py
+++ b/tests/handlers/test_featurizer.py
@@ -551,7 +551,6 @@ def test_separate_state_model():
 
     featurizer = Featurizer(features, fixed_effects, states_for_separate_model)
 
-    split_fitting_heldout = 5
     df = pd.DataFrame(
         {
             "postal_code": ['AA', 'AA', 'BB', 'BB', 'CC', 'CC', 'CC', 'DD'],
@@ -565,5 +564,60 @@ def test_separate_state_model():
             "unit_category": ["expected"] * 8,
         }
     )
+
+    df_new = featurizer.prepare_data(df, center_features=False, scale_features=False, add_intercept=True)
+    assert df_new.loc[df.postal_code != 'CC', 'intercept'].all() == 1
+    assert df_new.loc[df.postal_code == 'CC', 'intercept'].all() == 0
+    assert df_new.loc[df.postal_code != 'CC', 'intercept_CC'].all() == 0
+    assert df_new.loc[df.postal_code == 'CC', 'intercept_CC'].all() == 1
+
+    assert df_new.loc[df.postal_code != 'CC', 'a'].all() > 0
+    assert df_new.loc[df.postal_code == 'CC', 'a'].all() == 0
+    assert df_new.loc[df.postal_code != 'CC', 'a_CC'].all() == 0
+    assert df_new.loc[df.postal_code == 'CC', 'a_CC'].all() > 0
+
+    assert df_new.loc[df.postal_code != 'CC', 'b'].all() > 0
+    assert df_new.loc[df.postal_code == 'CC', 'b'].all() == 0
+    assert df_new.loc[df.postal_code != 'CC', 'b_CC'].all() == 0
+    assert df_new.loc[df.postal_code == 'CC', 'b_CC'].all() > 0
+
+    assert df_new.loc[df.postal_code != 'CC', 'c'].all() > 0
+    assert df_new.loc[df.postal_code == 'CC', 'c'].all() == 0
+    assert df_new.loc[df.postal_code != 'CC', 'c_CC'].all() == 0
+    assert df_new.loc[df.postal_code == 'CC', 'c_CC'].all() > 0
+
+    # slightly more complicated, with two states
+    states_for_separate_model = ['BB', 'CC']
+    featurizer = Featurizer(features, fixed_effects, states_for_separate_model)
     df_new = featurizer.prepare_data(df, center_features=False, scale_features=False, add_intercept=True)
-    import pdb; pdb.set_trace()
+
+    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept'].all() == 1
+    assert df_new.loc[df.postal_code == 'CC', 'intercept'].all() == 0
+    assert df_new.loc[df.postal_code == 'BB', 'intercept'].all() == 0
+    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept_CC'].all() == 0
+    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept_BB'].all() == 0
+    assert df_new.loc[df.postal_code == 'CC', 'intercept_CC'].all() == 1
+    assert df_new.loc[df.postal_code == 'BB', 'intercept_BB'].all() == 1
+    assert df_new.loc[df.postal_code == 'CC', 'intercept_BB'].all() == 0
+    assert df_new.loc[df.postal_code == 'BB', 'intercept_CC'].all() == 0
+
+    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'a'].all() > 0
+    assert df_new.loc[df.postal_code == 'CC', 'a'].all() == 0
+    assert df_new.loc[df.postal_code == 'BB', 'a'].all() == 0
+    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'a_CC'].all() == 0
+    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'a_BB'].all() == 0
+    assert df_new.loc[df.postal_code == 'CC', 'a_CC'].all() > 0
+    assert df_new.loc[df.postal_code == 'BB', 'a_BB'].all() > 0
+    assert df_new.loc[df.postal_code == 'CC', 'a_BB'].all() == 0
+    assert df_new.loc[df.postal_code == 'BB', 'a_CC'].all() == 0
+
+    # if postal code is in fixed effect, then don't add indivdual intercepts
+    fixed_effects = ["fe_a", "fe_b", 'postal_code']
+    featurizer = Featurizer(features, fixed_effects, states_for_separate_model)
+    df_new = featurizer.prepare_data(df, center_features=False, scale_features=False, add_intercept=True)
+
+    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept'].all() == 1
+    assert df_new.loc[df.postal_code == 'CC', 'intercept'].all() == 0
+    assert df_new.loc[df.postal_code == 'BB', 'intercept'].all() == 0
+    assert 'intercept_BB' not in df_new.columns
+    assert 'intercept_CC' not in df_new.columns

From 6630908200d7bd0466e15551fc4b82b2f3747b5b Mon Sep 17 00:00:00 2001
From: lennybronner <lenny.bronner@gmail.com>
Date: Wed, 23 Oct 2024 19:56:55 -0400
Subject: [PATCH 3/6] linter

---
 src/elexmodel/handlers/data/Featurizer.py     | 12 +--
 .../models/BootstrapElectionModel.py          |  6 +-
 tests/handlers/test_featurizer.py             | 95 ++++++++++---------
 3 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/src/elexmodel/handlers/data/Featurizer.py b/src/elexmodel/handlers/data/Featurizer.py
index 376270c9..f800fead 100644
--- a/src/elexmodel/handlers/data/Featurizer.py
+++ b/src/elexmodel/handlers/data/Featurizer.py
@@ -88,7 +88,7 @@ def prepare_data(
         # and we zero out the original feature column for those states
         additional_state_features = []
         # if 'BB' in self.states_for_separate_model and 'CC' in self.states_for_separate_model:
-            # import pdb; pdb.set_trace()
+        # import pdb; pdb.set_trace()
         for state in self.states_for_separate_model:
             mask = df.postal_code == state
             for feature in self.features:
@@ -96,7 +96,7 @@ def prepare_data(
                 df[state_feature] = df[feature].where(mask, 0)
                 df.loc[mask, feature] = 0
                 additional_state_features.append(state_feature)
-    
+
         df = df.copy()  # create copy so we can do things to the values
         if center_features:
             df[self.features] -= df[self.features].mean()
@@ -115,11 +115,11 @@ def prepare_data(
                 # if we have a postal code fixed effect then we do not want a separate intercept column for that state
                 # because the fixed effect takes the role of that column (ie. those would be linearly dependent)
                 # but we still want to zero out the original intercept for those states.
-                if 'postal_code' not in self.fixed_effect_cols:
-                    state_intercept = f'intercept_{state}'
-                    df[state_intercept] = df['intercept'].where(mask, 0)
+                if "postal_code" not in self.fixed_effect_cols:
+                    state_intercept = f"intercept_{state}"
+                    df[state_intercept] = df["intercept"].where(mask, 0)
                     self.complete_features.append(state_intercept)
-                df.loc[mask, 'intercept'] = 0
+                df.loc[mask, "intercept"] = 0
 
             # if fixed effects are on then we have redundant with the state specific intercepts
 
diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py
index 1f56d958..e5969064 100644
--- a/src/elexmodel/models/BootstrapElectionModel.py
+++ b/src/elexmodel/models/BootstrapElectionModel.py
@@ -81,8 +81,10 @@ def __init__(self, model_settings={}):
         self.z_unobserved_lower_bound = model_settings.get("z_unobserved_lower_bound", 0.5)
 
         self.states_for_separate_model = model_settings.get("states_for_separate_model", [])
-        self.featurizer = Featurizer(self.features, self.fixed_effects, states_for_separate_model=self.states_for_separate_model)
-        
+        self.featurizer = Featurizer(
+            self.features, self.fixed_effects, states_for_separate_model=self.states_for_separate_model
+        )
+
         self.seed = model_settings.get("seed", 0)
         self.rng = np.random.default_rng(seed=self.seed)  # used for sampling
         self.ran_bootstrap = False
diff --git a/tests/handlers/test_featurizer.py b/tests/handlers/test_featurizer.py
index 6d5fa0a9..448a72a8 100644
--- a/tests/handlers/test_featurizer.py
+++ b/tests/handlers/test_featurizer.py
@@ -541,19 +541,20 @@ def test_generate_fixed_effects_mixed_reporting(va_governor_precinct_data):
     assert "county_fips" in featurizer.fixed_effect_cols
     assert len(featurizer.expanded_fixed_effects) == 133 - 1
 
+
 def test_separate_state_model():
     """
     This function tests to make sure that the featurizer returns the right columns
     """
     features = ["a", "b", "c"]
     fixed_effects = ["fe_a", "fe_b"]
-    states_for_separate_model = ['CC']
+    states_for_separate_model = ["CC"]
 
     featurizer = Featurizer(features, fixed_effects, states_for_separate_model)
 
     df = pd.DataFrame(
         {
-            "postal_code": ['AA', 'AA', 'BB', 'BB', 'CC', 'CC', 'CC', 'DD'],
+            "postal_code": ["AA", "AA", "BB", "BB", "CC", "CC", "CC", "DD"],
             "a": [5, 3, 1, 5, 2, 2, 2, 2],
             "b": [2, 2, 2, 2, 3, 3, 3, 3],
             "c": [3, 3, 3, 3, 1, 2, 3, 4],
@@ -566,58 +567,58 @@ def test_separate_state_model():
     )
 
     df_new = featurizer.prepare_data(df, center_features=False, scale_features=False, add_intercept=True)
-    assert df_new.loc[df.postal_code != 'CC', 'intercept'].all() == 1
-    assert df_new.loc[df.postal_code == 'CC', 'intercept'].all() == 0
-    assert df_new.loc[df.postal_code != 'CC', 'intercept_CC'].all() == 0
-    assert df_new.loc[df.postal_code == 'CC', 'intercept_CC'].all() == 1
-
-    assert df_new.loc[df.postal_code != 'CC', 'a'].all() > 0
-    assert df_new.loc[df.postal_code == 'CC', 'a'].all() == 0
-    assert df_new.loc[df.postal_code != 'CC', 'a_CC'].all() == 0
-    assert df_new.loc[df.postal_code == 'CC', 'a_CC'].all() > 0
-
-    assert df_new.loc[df.postal_code != 'CC', 'b'].all() > 0
-    assert df_new.loc[df.postal_code == 'CC', 'b'].all() == 0
-    assert df_new.loc[df.postal_code != 'CC', 'b_CC'].all() == 0
-    assert df_new.loc[df.postal_code == 'CC', 'b_CC'].all() > 0
-
-    assert df_new.loc[df.postal_code != 'CC', 'c'].all() > 0
-    assert df_new.loc[df.postal_code == 'CC', 'c'].all() == 0
-    assert df_new.loc[df.postal_code != 'CC', 'c_CC'].all() == 0
-    assert df_new.loc[df.postal_code == 'CC', 'c_CC'].all() > 0
+    assert df_new.loc[df.postal_code != "CC", "intercept"].all() == 1
+    assert df_new.loc[df.postal_code == "CC", "intercept"].all() == 0
+    assert df_new.loc[df.postal_code != "CC", "intercept_CC"].all() == 0
+    assert df_new.loc[df.postal_code == "CC", "intercept_CC"].all() == 1
+
+    assert df_new.loc[df.postal_code != "CC", "a"].all() > 0
+    assert df_new.loc[df.postal_code == "CC", "a"].all() == 0
+    assert df_new.loc[df.postal_code != "CC", "a_CC"].all() == 0
+    assert df_new.loc[df.postal_code == "CC", "a_CC"].all() > 0
+
+    assert df_new.loc[df.postal_code != "CC", "b"].all() > 0
+    assert df_new.loc[df.postal_code == "CC", "b"].all() == 0
+    assert df_new.loc[df.postal_code != "CC", "b_CC"].all() == 0
+    assert df_new.loc[df.postal_code == "CC", "b_CC"].all() > 0
+
+    assert df_new.loc[df.postal_code != "CC", "c"].all() > 0
+    assert df_new.loc[df.postal_code == "CC", "c"].all() == 0
+    assert df_new.loc[df.postal_code != "CC", "c_CC"].all() == 0
+    assert df_new.loc[df.postal_code == "CC", "c_CC"].all() > 0
 
     # slightly more complicated, with two states
-    states_for_separate_model = ['BB', 'CC']
+    states_for_separate_model = ["BB", "CC"]
     featurizer = Featurizer(features, fixed_effects, states_for_separate_model)
     df_new = featurizer.prepare_data(df, center_features=False, scale_features=False, add_intercept=True)
 
-    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept'].all() == 1
-    assert df_new.loc[df.postal_code == 'CC', 'intercept'].all() == 0
-    assert df_new.loc[df.postal_code == 'BB', 'intercept'].all() == 0
-    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept_CC'].all() == 0
-    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept_BB'].all() == 0
-    assert df_new.loc[df.postal_code == 'CC', 'intercept_CC'].all() == 1
-    assert df_new.loc[df.postal_code == 'BB', 'intercept_BB'].all() == 1
-    assert df_new.loc[df.postal_code == 'CC', 'intercept_BB'].all() == 0
-    assert df_new.loc[df.postal_code == 'BB', 'intercept_CC'].all() == 0
-
-    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'a'].all() > 0
-    assert df_new.loc[df.postal_code == 'CC', 'a'].all() == 0
-    assert df_new.loc[df.postal_code == 'BB', 'a'].all() == 0
-    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'a_CC'].all() == 0
-    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'a_BB'].all() == 0
-    assert df_new.loc[df.postal_code == 'CC', 'a_CC'].all() > 0
-    assert df_new.loc[df.postal_code == 'BB', 'a_BB'].all() > 0
-    assert df_new.loc[df.postal_code == 'CC', 'a_BB'].all() == 0
-    assert df_new.loc[df.postal_code == 'BB', 'a_CC'].all() == 0
+    assert df_new.loc[(df.postal_code != "CC") & (df.postal_code != "BB"), "intercept"].all() == 1
+    assert df_new.loc[df.postal_code == "CC", "intercept"].all() == 0
+    assert df_new.loc[df.postal_code == "BB", "intercept"].all() == 0
+    assert df_new.loc[(df.postal_code != "CC") & (df.postal_code != "BB"), "intercept_CC"].all() == 0
+    assert df_new.loc[(df.postal_code != "CC") & (df.postal_code != "BB"), "intercept_BB"].all() == 0
+    assert df_new.loc[df.postal_code == "CC", "intercept_CC"].all() == 1
+    assert df_new.loc[df.postal_code == "BB", "intercept_BB"].all() == 1
+    assert df_new.loc[df.postal_code == "CC", "intercept_BB"].all() == 0
+    assert df_new.loc[df.postal_code == "BB", "intercept_CC"].all() == 0
+
+    assert df_new.loc[(df.postal_code != "CC") & (df.postal_code != "BB"), "a"].all() > 0
+    assert df_new.loc[df.postal_code == "CC", "a"].all() == 0
+    assert df_new.loc[df.postal_code == "BB", "a"].all() == 0
+    assert df_new.loc[(df.postal_code != "CC") & (df.postal_code != "BB"), "a_CC"].all() == 0
+    assert df_new.loc[(df.postal_code != "CC") & (df.postal_code != "BB"), "a_BB"].all() == 0
+    assert df_new.loc[df.postal_code == "CC", "a_CC"].all() > 0
+    assert df_new.loc[df.postal_code == "BB", "a_BB"].all() > 0
+    assert df_new.loc[df.postal_code == "CC", "a_BB"].all() == 0
+    assert df_new.loc[df.postal_code == "BB", "a_CC"].all() == 0
 
     # if postal code is in fixed effect, then don't add indivdual intercepts
-    fixed_effects = ["fe_a", "fe_b", 'postal_code']
+    fixed_effects = ["fe_a", "fe_b", "postal_code"]
     featurizer = Featurizer(features, fixed_effects, states_for_separate_model)
     df_new = featurizer.prepare_data(df, center_features=False, scale_features=False, add_intercept=True)
 
-    assert df_new.loc[(df.postal_code != 'CC') & (df.postal_code != 'BB'), 'intercept'].all() == 1
-    assert df_new.loc[df.postal_code == 'CC', 'intercept'].all() == 0
-    assert df_new.loc[df.postal_code == 'BB', 'intercept'].all() == 0
-    assert 'intercept_BB' not in df_new.columns
-    assert 'intercept_CC' not in df_new.columns
+    assert df_new.loc[(df.postal_code != "CC") & (df.postal_code != "BB"), "intercept"].all() == 1
+    assert df_new.loc[df.postal_code == "CC", "intercept"].all() == 0
+    assert df_new.loc[df.postal_code == "BB", "intercept"].all() == 0
+    assert "intercept_BB" not in df_new.columns
+    assert "intercept_CC" not in df_new.columns

From 1b15de2ba2da0b5df16167384299b5624aaf3b88 Mon Sep 17 00:00:00 2001
From: lennybronner <lenny.bronner@gmail.com>
Date: Wed, 23 Oct 2024 20:04:50 -0400
Subject: [PATCH 4/6] stopping dtype check

---
 tests/test_client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index 3b821d28..80c6a479 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -876,5 +876,5 @@ def test_get_national_summary_votes_estimates(model_client, va_governor_county_d
 
     current = model_client.get_national_summary_votes_estimates(None, 0, [0.99])
 
-    pd.testing.assert_frame_equal(current, model_client.results_handler.final_results["nat_sum_data"])
-    pd.testing.assert_frame_equal(expected_df, model_client.results_handler.final_results["nat_sum_data"])
+    pd.testing.assert_frame_equal(current, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False)
+    pd.testing.assert_frame_equal(expected_df, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False)

From bec329d41726d4f12d56f6926cb2322f65229a31 Mon Sep 17 00:00:00 2001
From: lennybronner <lenny.bronner@gmail.com>
Date: Wed, 23 Oct 2024 20:05:11 -0400
Subject: [PATCH 5/6] linter

---
 tests/test_client.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index 80c6a479..8ecb690b 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -876,5 +876,9 @@ def test_get_national_summary_votes_estimates(model_client, va_governor_county_d
 
     current = model_client.get_national_summary_votes_estimates(None, 0, [0.99])
 
-    pd.testing.assert_frame_equal(current, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False)
-    pd.testing.assert_frame_equal(expected_df, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False)
+    pd.testing.assert_frame_equal(
+        current, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False
+    )
+    pd.testing.assert_frame_equal(
+        expected_df, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False
+    )

From e1637b758822268fdd56ea0247e9925aeb390fa9 Mon Sep 17 00:00:00 2001
From: lennybronner <lenny.bronner@gmail.com>
Date: Thu, 24 Oct 2024 23:58:43 -0400
Subject: [PATCH 6/6] removed leftover pdb

---
 src/elexmodel/handlers/data/Featurizer.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/elexmodel/handlers/data/Featurizer.py b/src/elexmodel/handlers/data/Featurizer.py
index f800fead..9e661a61 100644
--- a/src/elexmodel/handlers/data/Featurizer.py
+++ b/src/elexmodel/handlers/data/Featurizer.py
@@ -87,8 +87,6 @@ def prepare_data(
         # if a state is in the states for separate model, then we add separate feature columns for that state
         # and we zero out the original feature column for those states
         additional_state_features = []
-        # if 'BB' in self.states_for_separate_model and 'CC' in self.states_for_separate_model:
-        # import pdb; pdb.set_trace()
         for state in self.states_for_separate_model:
             mask = df.postal_code == state
             for feature in self.features: