From ad24f915edb3366b7c03c9b2f9e8e251a89793e5 Mon Sep 17 00:00:00 2001
From: lbvienna <lenny.bronner@gmail.com>
Date: Thu, 14 Sep 2023 10:23:47 -0400
Subject: [PATCH 1/4] trying something new

---
 src/elexmodel/handlers/data/Estimandizer.py | 104 +++++++++++++-------
 1 file changed, 68 insertions(+), 36 deletions(-)

diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py
index 30e6a00f..fdce5f73 100644
--- a/src/elexmodel/handlers/data/Estimandizer.py
+++ b/src/elexmodel/handlers/data/Estimandizer.py
@@ -16,36 +16,47 @@ class Estimandizer:
 
     def check_and_create_estimands(self, data_df, estimands, historical, current_data=False):
         columns_to_return = []
-
         for estimand in estimands:
             results_col = f"{RESULTS_PREFIX}{estimand}"
-            baseline_col = f"{BASELINE_PREFIX}{estimand}"
-            target_col = results_col if current_data else baseline_col
 
-            if target_col not in data_df.columns:
-                if estimand in data_df.columns:
-                    data_df[target_col] = data_df[estimand].copy()
-                else:
+            if results_col not in data_df.columns:
+                data_df = globals()[estimand](data_df, RESULTS_PREFIX)
+            columns_to_return.append(results_col)
+        return data_df, columns_to_return
+
+
+
+        # columns_to_return = []
+
+        # for estimand in estimands:
+            # results_col = f"{RESULTS_PREFIX}{estimand}"
+            # baseline_col = f"{BASELINE_PREFIX}{estimand}"
+            # target_col = results_col if current_data else baseline_col
+
+            # if target_col not in data_df.columns:
+                # if estimand in data_df.columns:
+                    # data_df[target_col] = data_df[estimand].copy()
+                # else:
                     # will raise a KeyError if a function with the same name as `estimand` doesn't exist
-                    data_df = globals()[estimand](data_df)
-                if target_col == baseline_col:
-                    data_df[results_col] = data_df[baseline_col].copy()
+                    # data_df = globals()[estimand](data_df)
+                # if target_col == baseline_col:
+                    # data_df[results_col] = data_df[baseline_col].copy()
 
-            if historical:
-                data_df[results_col] = nan
-            else:
-                if results_col not in data_df.columns:
-                    raise EstimandException("This is missing results data for estimand: ", estimand)
+            # if historical:
+                # data_df[results_col] = nan
+            # else:
+                # if results_col not in data_df.columns:
+                    # raise EstimandException("This is missing results data for estimand: ", estimand)
 
-            columns_to_return.append(results_col)
+            # columns_to_return.append(results_col)
 
-        results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)]
+        # results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)]
         # If this is not a historical run, then this is a live election
         # so we are expecting that there will be actual results data
-        if not historical and len(results_column_names) == 0:
-            raise EstimandException("This is not a test election, it is missing results data")
+        # if not historical and len(results_column_names) == 0:
+            # raise EstimandException("This is not a test election, it is missing results data")
 
-        return (data_df, columns_to_return)
+        # return (data_df, columns_to_return)
 
     def add_estimand_baselines(self, data_df, estimand_baselines, historical):
         # if we are in a historical election we are only reading preprocessed data to get
@@ -54,35 +65,56 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical):
 
         for estimand, pointer in estimand_baselines.items():
             if pointer is None:
-                # should only happen when we're going to create a new estimand
+                # when we are creating a new estimand
                 pointer = estimand
 
             baseline_col = f"{BASELINE_PREFIX}{pointer}"
 
             if baseline_col not in data_df.columns:
-                # will raise a KeyError if a function with the same name as `pointer` doesn't exist
-                data_df = globals()[pointer](data_df)
-                results_col = f"{RESULTS_PREFIX}{estimand}"
-                data_df[results_col] = data_df[baseline_col].copy()
+                data_df = globals()[estimand](data_df, BASELINE_PREFIX)
 
             if not historical:
-                # Adding one to prevent zero divison
                 data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1
 
         return data_df
+        # for estimand, pointer in estimand_baselines.items():
+        #     if pointer is None:
+        #         # should only happen when we're going to create a new estimand
+        #         pointer = estimand
+
+        #     baseline_col = f"{BASELINE_PREFIX}{pointer}"
+
+        #     if baseline_col not in data_df.columns:
+        #         # will raise a KeyError if a function with the same name as `pointer` doesn't exist
+        #         data_df = globals()[pointer](data_df)
+        #         results_col = f"{RESULTS_PREFIX}{estimand}"
+        #         data_df[results_col] = data_df[baseline_col].copy()
+
+        #     if not historical:
+        #         # Adding one to prevent zero divison
+        #         data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1
+
+        # return data_df
 
 
 # custom estimands
 
 
-def party_vote_share_dem(data_df):
-    # should only happen when we're replaying an election
-    if f"{BASELINE_PREFIX}dem" not in data_df.columns and f"{BASELINE_PREFIX}turnout" not in data_df.columns:
-        data_df[f"{RESULTS_PREFIX}party_vote_share_dem"] = (
-            data_df[f"{RESULTS_PREFIX}dem"] / data_df[f"{RESULTS_PREFIX}turnout"]
-        )
-    else:
-        data_df[f"{BASELINE_PREFIX}party_vote_share_dem"] = (
-            data_df[f"{BASELINE_PREFIX}dem"] / data_df[f"{BASELINE_PREFIX}turnout"]
-        )
+def party_vote_share_dem(data_df, col_prefix):
+
+    data_df[f"{col_prefix}party_vote_share_dem"] = (
+        data_df[f"{col_prefix}dem"] / data_df[f"{col_prefix}turnout"]
+    )
+
     return data_df
+
+    # should only happen when we're replaying an election
+    # if f"{BASELINE_PREFIX}dem" not in data_df.columns and f"{BASELINE_PREFIX}turnout" not in data_df.columns:
+        # data_df[f"{RESULTS_PREFIX}party_vote_share_dem"] = (
+            # data_df[f"{RESULTS_PREFIX}dem"] / data_df[f"{RESULTS_PREFIX}turnout"]
+        # )
+    # else:
+        # data_df[f"{BASELINE_PREFIX}party_vote_share_dem"] = (
+            # data_df[f"{BASELINE_PREFIX}dem"] / data_df[f"{BASELINE_PREFIX}turnout"]
+        # )
+    # return data_df

From bd39fab6face01644b2ef000415fcfe8ee06b7e2 Mon Sep 17 00:00:00 2001
From: lbvienna <lenny.bronner@gmail.com>
Date: Thu, 14 Sep 2023 12:54:26 -0400
Subject: [PATCH 2/4] made progress on historical and testbed integration

---
 src/elexmodel/client.py                       | 19 +++++++++++++++++++
 src/elexmodel/handlers/data/Estimandizer.py   |  6 +++++-
 .../handlers/data/PreprocessedData.py         |  5 ++++-
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/elexmodel/client.py b/src/elexmodel/client.py
index 6194b7f0..df1af7c6 100644
--- a/src/elexmodel/client.py
+++ b/src/elexmodel/client.py
@@ -9,6 +9,7 @@
 from elexmodel.handlers.data.CombinedData import CombinedDataHandler
 from elexmodel.handlers.data.ModelResults import ModelResultsHandler
 from elexmodel.handlers.data.PreprocessedData import PreprocessedDataHandler
+from elexmodel.handlers.data.LiveData import MockLiveDataHandler
 from elexmodel.logging import initialize_logging
 from elexmodel.models.ConformalElectionModel import ConformalElectionModel
 from elexmodel.models.GaussianElectionModel import GaussianElectionModel
@@ -391,8 +392,25 @@ def _format_historical_current_data(
         """
         Formats data for historical model run
         """
+
+        """
+        What does the historical model client do?
+            - If we are running the election in 2024 and 100 counties are reporting, we want to see what
+                our model error would have been in 2020 with these counties reporting
+            - To do that we need to merge the 2020 results onto the 2024 reporting counties
+        
+            - So for 2020 (cli) this means -> we have 2020 data and we pick 100 random counties reporting in the MockLiveDataHandler
+            - in this function we get the 2016 results and merge that to the 100 reporting counties in 2020
+
+
+        running election id: 2020-11-03_USA_G --historical
+            -> historical election id: 2016-11-08_USA_G, 2012, ...
+        
+        """
+
         formatted_data = current_data[["postal_code", "geographic_unit_fips", "percent_expected_vote"]]
         print(f"Getting data for historical election: {historical_election_id}")
+        # historical_live_data_handler = MockLiveDataHandler(historical_election_id, office, geographic_unit_type, estimands, s3_client=s3.S3CsvUtil(TARGET_BUCKET))
         preprocessed_data_handler = PreprocessedDataHandler(
             historical_election_id,
             office,
@@ -401,6 +419,7 @@ def _format_historical_current_data(
             estimand_baselines,
             s3_client=s3.S3CsvUtil(TARGET_BUCKET),
             historical=True,
+            include_results_estimand=True
         )
 
         results_to_return = [f"results_{estimand}" for estimand in estimands]
diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py
index fdce5f73..c988435a 100644
--- a/src/elexmodel/handlers/data/Estimandizer.py
+++ b/src/elexmodel/handlers/data/Estimandizer.py
@@ -20,6 +20,7 @@ def check_and_create_estimands(self, data_df, estimands, historical, current_dat
             results_col = f"{RESULTS_PREFIX}{estimand}"
 
             if results_col not in data_df.columns:
+                # will raise a KeyError if a function with the same name as `estimand` doesn't exist
                 data_df = globals()[estimand](data_df, RESULTS_PREFIX)
             columns_to_return.append(results_col)
         return data_df, columns_to_return
@@ -58,7 +59,7 @@ def check_and_create_estimands(self, data_df, estimands, historical, current_dat
 
         # return (data_df, columns_to_return)
 
-    def add_estimand_baselines(self, data_df, estimand_baselines, historical):
+    def add_estimand_baselines(self, data_df, estimand_baselines, historical, incl_results_estimand=False):
         # if we are in a historical election we are only reading preprocessed data to get
         # the historical election results of the currently reporting units.
         # so we don't care about the total voters or the baseline election.
@@ -76,6 +77,9 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical):
             if not historical:
                 data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1
 
+        if incl_results_estimand:
+            data_df, ___ = self.check_and_create_estimands(data_df, estimand_baselines.keys(), historical)
+
         return data_df
         # for estimand, pointer in estimand_baselines.items():
         #     if pointer is None:
diff --git a/src/elexmodel/handlers/data/PreprocessedData.py b/src/elexmodel/handlers/data/PreprocessedData.py
index a2ed3902..7cfecfe1 100644
--- a/src/elexmodel/handlers/data/PreprocessedData.py
+++ b/src/elexmodel/handlers/data/PreprocessedData.py
@@ -25,6 +25,7 @@ def __init__(
         s3_client=None,
         historical=False,
         data=None,
+        include_results_estimand=False
     ):
         """
         Initialize preprocessed data. If not present, download from s3.
@@ -36,6 +37,7 @@ def __init__(
         self.s3_client = s3_client
         self.estimand_baselines = estimand_baselines
         self.historical = historical
+        self.include_results_estimand = include_results_estimand
         self.estimandizer = Estimandizer()
 
         self.local_file_path = self.get_preprocessed_data_path()
@@ -83,8 +85,9 @@ def load_data(self, preprocessed_data):
         Load preprocessed csv data as df
         """
         LOG.info("Loading preprocessed data: %s, %s, %s", self.election_id, self.office, self.geographic_unit_type)
+        data = self.estimandizer.add_estimand_baselines(preprocessed_data, self.estimand_baselines, self.historical, incl_results_estimand=self.include_results_estimand)
 
-        return self.estimandizer.add_estimand_baselines(preprocessed_data, self.estimand_baselines, self.historical)
+        return data
 
     def save_data(self, preprocessed_data):
         if not Path(self.local_file_path).parent.exists():

From b6751cb65a3ed5fe9b769b380a6b303d406ad3b7 Mon Sep 17 00:00:00 2001
From: Diane Napolitano <diane.napolitano@washpost.com>
Date: Fri, 15 Sep 2023 10:43:32 -0400
Subject: [PATCH 3/4] Finalizing estimandizer updates and unit tests; testing
 with test bed and VA 2021 House of Delegates

---
 src/elexmodel/client.py                       | 11 ++-
 src/elexmodel/handlers/data/CombinedData.py   |  4 +-
 src/elexmodel/handlers/data/Estimandizer.py   | 87 +++++--------------
 .../handlers/data/PreprocessedData.py         |  9 +-
 tests/handlers/test_estimandizer.py           |  8 +-
 5 files changed, 39 insertions(+), 80 deletions(-)

diff --git a/src/elexmodel/client.py b/src/elexmodel/client.py
index df1af7c6..be11a3fb 100644
--- a/src/elexmodel/client.py
+++ b/src/elexmodel/client.py
@@ -9,7 +9,6 @@
 from elexmodel.handlers.data.CombinedData import CombinedDataHandler
 from elexmodel.handlers.data.ModelResults import ModelResultsHandler
 from elexmodel.handlers.data.PreprocessedData import PreprocessedDataHandler
-from elexmodel.handlers.data.LiveData import MockLiveDataHandler
 from elexmodel.logging import initialize_logging
 from elexmodel.models.ConformalElectionModel import ConformalElectionModel
 from elexmodel.models.GaussianElectionModel import GaussianElectionModel
@@ -398,19 +397,19 @@ def _format_historical_current_data(
             - If we are running the election in 2024 and 100 counties are reporting, we want to see what
                 our model error would have been in 2020 with these counties reporting
             - To do that we need to merge the 2020 results onto the 2024 reporting counties
-        
-            - So for 2020 (cli) this means -> we have 2020 data and we pick 100 random counties reporting in the MockLiveDataHandler
+
+            - So for 2020 (cli) this means -> we have 2020 data and we pick 100 random counties reporting
+              in the MockLiveDataHandler
             - in this function we get the 2016 results and merge that to the 100 reporting counties in 2020
 
 
         running election id: 2020-11-03_USA_G --historical
             -> historical election id: 2016-11-08_USA_G, 2012, ...
-        
+
         """
 
         formatted_data = current_data[["postal_code", "geographic_unit_fips", "percent_expected_vote"]]
         print(f"Getting data for historical election: {historical_election_id}")
-        # historical_live_data_handler = MockLiveDataHandler(historical_election_id, office, geographic_unit_type, estimands, s3_client=s3.S3CsvUtil(TARGET_BUCKET))
         preprocessed_data_handler = PreprocessedDataHandler(
             historical_election_id,
             office,
@@ -419,7 +418,7 @@ def _format_historical_current_data(
             estimand_baselines,
             s3_client=s3.S3CsvUtil(TARGET_BUCKET),
             historical=True,
-            include_results_estimand=True
+            include_results_estimand=True,
         )
 
         results_to_return = [f"results_{estimand}" for estimand in estimands]
diff --git a/src/elexmodel/handlers/data/CombinedData.py b/src/elexmodel/handlers/data/CombinedData.py
index 8da3c426..69e83180 100644
--- a/src/elexmodel/handlers/data/CombinedData.py
+++ b/src/elexmodel/handlers/data/CombinedData.py
@@ -19,9 +19,7 @@ def __init__(
         self.estimands = estimands
 
         estimandizer = Estimandizer()
-        (current_data, _) = estimandizer.check_and_create_estimands(
-            current_data.copy(), self.estimands, False, current_data=True
-        )
+        (current_data, _) = estimandizer.check_and_create_estimands(current_data.copy(), self.estimands, False)
 
         # if we're running this for a past election, drop results columns from preprocessed data
         # so we use results_{estimand} numbers from current_data
diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py
index c988435a..87b16d42 100644
--- a/src/elexmodel/handlers/data/Estimandizer.py
+++ b/src/elexmodel/handlers/data/Estimandizer.py
@@ -14,7 +14,7 @@ class Estimandizer:
     Estimandizer. Generate estimands explicitly.
     """
 
-    def check_and_create_estimands(self, data_df, estimands, historical, current_data=False):
+    def check_and_create_estimands(self, data_df, estimands, historical):
         columns_to_return = []
         for estimand in estimands:
             results_col = f"{RESULTS_PREFIX}{estimand}"
@@ -22,44 +22,24 @@ def check_and_create_estimands(self, data_df, estimands, historical, current_dat
             if results_col not in data_df.columns:
                 # will raise a KeyError if a function with the same name as `estimand` doesn't exist
                 data_df = globals()[estimand](data_df, RESULTS_PREFIX)
-            columns_to_return.append(results_col)
-        return data_df, columns_to_return
-
-
 
-        # columns_to_return = []
+            if historical:
+                data_df[results_col] = nan
+            else:
+                if results_col not in data_df.columns:
+                    raise EstimandException("This is missing results data for estimand: ", estimand)
 
-        # for estimand in estimands:
-            # results_col = f"{RESULTS_PREFIX}{estimand}"
-            # baseline_col = f"{BASELINE_PREFIX}{estimand}"
-            # target_col = results_col if current_data else baseline_col
-
-            # if target_col not in data_df.columns:
-                # if estimand in data_df.columns:
-                    # data_df[target_col] = data_df[estimand].copy()
-                # else:
-                    # will raise a KeyError if a function with the same name as `estimand` doesn't exist
-                    # data_df = globals()[estimand](data_df)
-                # if target_col == baseline_col:
-                    # data_df[results_col] = data_df[baseline_col].copy()
-
-            # if historical:
-                # data_df[results_col] = nan
-            # else:
-                # if results_col not in data_df.columns:
-                    # raise EstimandException("This is missing results data for estimand: ", estimand)
-
-            # columns_to_return.append(results_col)
+            columns_to_return.append(results_col)
 
-        # results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)]
+        results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)]
         # If this is not a historical run, then this is a live election
         # so we are expecting that there will be actual results data
-        # if not historical and len(results_column_names) == 0:
-            # raise EstimandException("This is not a test election, it is missing results data")
+        if not historical and len(results_column_names) == 0:
+            raise EstimandException("This is not a test election, it is missing results data")
 
-        # return (data_df, columns_to_return)
+        return data_df, columns_to_return
 
-    def add_estimand_baselines(self, data_df, estimand_baselines, historical, incl_results_estimand=False):
+    def add_estimand_baselines(self, data_df, estimand_baselines, historical, include_results_estimand=False):
         # if we are in a historical election we are only reading preprocessed data to get
         # the historical election results of the currently reporting units.
         # so we don't care about the total voters or the baseline election.
@@ -77,48 +57,25 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical, incl_r
             if not historical:
                 data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1
 
-        if incl_results_estimand:
+        if include_results_estimand:
             data_df, ___ = self.check_and_create_estimands(data_df, estimand_baselines.keys(), historical)
 
         return data_df
-        # for estimand, pointer in estimand_baselines.items():
-        #     if pointer is None:
-        #         # should only happen when we're going to create a new estimand
-        #         pointer = estimand
-
-        #     baseline_col = f"{BASELINE_PREFIX}{pointer}"
-
-        #     if baseline_col not in data_df.columns:
-        #         # will raise a KeyError if a function with the same name as `pointer` doesn't exist
-        #         data_df = globals()[pointer](data_df)
-        #         results_col = f"{RESULTS_PREFIX}{estimand}"
-        #         data_df[results_col] = data_df[baseline_col].copy()
-
-        #     if not historical:
-        #         # Adding one to prevent zero divison
-        #         data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1
-
-        # return data_df
 
 
 # custom estimands
 
 
 def party_vote_share_dem(data_df, col_prefix):
-
-    data_df[f"{col_prefix}party_vote_share_dem"] = (
-        data_df[f"{col_prefix}dem"] / data_df[f"{col_prefix}turnout"]
+    if f"{col_prefix}dem" in data_df.columns and f"{col_prefix}turnout" in data_df.columns:
+        numer = f"{col_prefix}dem"
+        denom = f"{col_prefix}turnout"
+    else:
+        numer = "dem"
+        denom = "total"
+
+    data_df[f"{col_prefix}party_vote_share_dem"] = data_df.apply(
+        lambda x: 0 if x[numer] == 0 or x[denom] == 0 else x[numer] / x[denom], axis=1
     )
 
     return data_df
-
-    # should only happen when we're replaying an election
-    # if f"{BASELINE_PREFIX}dem" not in data_df.columns and f"{BASELINE_PREFIX}turnout" not in data_df.columns:
-        # data_df[f"{RESULTS_PREFIX}party_vote_share_dem"] = (
-            # data_df[f"{RESULTS_PREFIX}dem"] / data_df[f"{RESULTS_PREFIX}turnout"]
-        # )
-    # else:
-        # data_df[f"{BASELINE_PREFIX}party_vote_share_dem"] = (
-            # data_df[f"{BASELINE_PREFIX}dem"] / data_df[f"{BASELINE_PREFIX}turnout"]
-        # )
-    # return data_df
diff --git a/src/elexmodel/handlers/data/PreprocessedData.py b/src/elexmodel/handlers/data/PreprocessedData.py
index 7cfecfe1..d57639c3 100644
--- a/src/elexmodel/handlers/data/PreprocessedData.py
+++ b/src/elexmodel/handlers/data/PreprocessedData.py
@@ -25,7 +25,7 @@ def __init__(
         s3_client=None,
         historical=False,
         data=None,
-        include_results_estimand=False
+        include_results_estimand=False,
     ):
         """
         Initialize preprocessed data. If not present, download from s3.
@@ -85,7 +85,12 @@ def load_data(self, preprocessed_data):
         Load preprocessed csv data as df
         """
         LOG.info("Loading preprocessed data: %s, %s, %s", self.election_id, self.office, self.geographic_unit_type)
-        data = self.estimandizer.add_estimand_baselines(preprocessed_data, self.estimand_baselines, self.historical, incl_results_estimand=self.include_results_estimand)
+        data = self.estimandizer.add_estimand_baselines(
+            preprocessed_data,
+            self.estimand_baselines,
+            self.historical,
+            include_results_estimand=self.include_results_estimand,
+        )
 
         return data
 
diff --git a/tests/handlers/test_estimandizer.py b/tests/handlers/test_estimandizer.py
index 84312e46..bc127969 100644
--- a/tests/handlers/test_estimandizer.py
+++ b/tests/handlers/test_estimandizer.py
@@ -12,7 +12,6 @@ def test_check_and_create_estimands_not_historical(va_governor_county_data):
     estimandizer = Estimandizer()
     (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, False)
 
-    assert "baseline_party_vote_share_dem" in output_df.columns
     assert "results_party_vote_share_dem" in output_df.columns
     assert result_columns == ["results_party_vote_share_dem"]
 
@@ -27,9 +26,9 @@ def test_check_and_create_estimands_historical(va_governor_county_data):
     estimandizer = Estimandizer()
     (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, True)
 
-    assert "baseline_party_vote_share_dem" in output_df.columns
     assert "results_party_vote_share_dem" in output_df.columns
     assert result_columns == ["results_party_vote_share_dem"]
+    assert output_df["results_party_vote_share_dem"].isnull().all()
 
 
 def test_add_estimand_baselines_not_historical(va_governor_county_data):
@@ -37,14 +36,15 @@ def test_add_estimand_baselines_not_historical(va_governor_county_data):
     estimandizer = Estimandizer()
     output_df = estimandizer.add_estimand_baselines(va_governor_county_data.copy(), estimand_baselines, False)
     assert "baseline_party_vote_share_dem" in output_df.columns
-    assert "results_party_vote_share_dem" in output_df.columns
     assert "last_election_results_party_vote_share_dem" in output_df.columns
 
 
 def test_add_estimand_baselines_historical(va_governor_county_data):
     estimand_baselines = {"turnout": "turnout", "party_vote_share_dem": "party_vote_share_dem"}
     estimandizer = Estimandizer()
-    output_df = estimandizer.add_estimand_baselines(va_governor_county_data.copy(), estimand_baselines, True)
+    output_df = estimandizer.add_estimand_baselines(
+        va_governor_county_data.copy(), estimand_baselines, True, include_results_estimand=True
+    )
     assert "baseline_party_vote_share_dem" in output_df.columns
     assert "results_party_vote_share_dem" in output_df.columns
     assert "last_election_results_party_vote_share_dem" not in output_df.columns

From 5111a179b7bfe39792124b8719ee5bf6e4d6adf2 Mon Sep 17 00:00:00 2001
From: Diane Napolitano <diane.napolitano@washpost.com>
Date: Fri, 15 Sep 2023 15:52:58 -0400
Subject: [PATCH 4/4] Renaming Estimandizer's check_and_create_estimands()
 method to add_estimand_results()

---
 src/elexmodel/handlers/data/CombinedData.py |  2 +-
 src/elexmodel/handlers/data/Estimandizer.py |  4 ++--
 src/elexmodel/handlers/data/LiveData.py     |  4 +++-
 tests/handlers/test_estimandizer.py         | 12 ++++++------
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/elexmodel/handlers/data/CombinedData.py b/src/elexmodel/handlers/data/CombinedData.py
index 4f1a3154..76fc2672 100644
--- a/src/elexmodel/handlers/data/CombinedData.py
+++ b/src/elexmodel/handlers/data/CombinedData.py
@@ -19,7 +19,7 @@ def __init__(
         self.estimands = estimands
 
         estimandizer = Estimandizer()
-        (current_data, _) = estimandizer.check_and_create_estimands(current_data.copy(), self.estimands, False)
+        (current_data, _) = estimandizer.add_estimand_results(current_data.copy(), self.estimands, False)
 
         # if we're running this for a past election, drop results columns from preprocessed data
         # so we use results_{estimand} numbers from current_data
diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py
index 87b16d42..0724c53c 100644
--- a/src/elexmodel/handlers/data/Estimandizer.py
+++ b/src/elexmodel/handlers/data/Estimandizer.py
@@ -14,7 +14,7 @@ class Estimandizer:
     Estimandizer. Generate estimands explicitly.
     """
 
-    def check_and_create_estimands(self, data_df, estimands, historical):
+    def add_estimand_results(self, data_df, estimands, historical):
         columns_to_return = []
         for estimand in estimands:
             results_col = f"{RESULTS_PREFIX}{estimand}"
@@ -58,7 +58,7 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical, includ
                 data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1
 
         if include_results_estimand:
-            data_df, ___ = self.check_and_create_estimands(data_df, estimand_baselines.keys(), historical)
+            data_df, ___ = self.add_estimand_results(data_df, estimand_baselines.keys(), historical)
 
         return data_df
 
diff --git a/src/elexmodel/handlers/data/LiveData.py b/src/elexmodel/handlers/data/LiveData.py
index c9c598f3..940f9ca7 100644
--- a/src/elexmodel/handlers/data/LiveData.py
+++ b/src/elexmodel/handlers/data/LiveData.py
@@ -84,7 +84,9 @@ def get_live_data_file_path(self):
     def load_data(self, data):
         columns_to_return = ["postal_code", "geographic_unit_fips"]
 
-        (data, more_columns) = self.estimandizer.check_and_create_estimands(data, self.estimands, self.historical)
+        print(data)
+        print(data.columns)
+        (data, more_columns) = self.estimandizer.add_estimand_results(data, self.estimands, self.historical)
         columns_to_return += more_columns
 
         self.shuffle_dataframe = data[self.shuffle_columns].copy()
diff --git a/tests/handlers/test_estimandizer.py b/tests/handlers/test_estimandizer.py
index bc127969..3d258715 100644
--- a/tests/handlers/test_estimandizer.py
+++ b/tests/handlers/test_estimandizer.py
@@ -1,30 +1,30 @@
 from elexmodel.handlers.data.Estimandizer import Estimandizer
 
 
-def test_check_and_create_estimands_not_historical(va_governor_county_data):
+def test_add_estimand_results_not_historical(va_governor_county_data):
     """
-    Tests the check_and_create_estimands() method.
+    Tests the add_estimand_results() method.
     """
 
     va_data_copy = va_governor_county_data.copy()
     estimands = ["party_vote_share_dem"]
 
     estimandizer = Estimandizer()
-    (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, False)
+    (output_df, result_columns) = estimandizer.add_estimand_results(va_data_copy, estimands, False)
 
     assert "results_party_vote_share_dem" in output_df.columns
     assert result_columns == ["results_party_vote_share_dem"]
 
 
-def test_check_and_create_estimands_historical(va_governor_county_data):
+def test_add_estimand_results_historical(va_governor_county_data):
     """
-    Tests the check_and_create_estimands() method with historical elections.
+    Tests the add_estimand_results() method with historical elections.
     """
     va_data_copy = va_governor_county_data.copy()
     estimands = ["party_vote_share_dem"]
 
     estimandizer = Estimandizer()
-    (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, True)
+    (output_df, result_columns) = estimandizer.add_estimand_results(va_data_copy, estimands, True)
 
     assert "results_party_vote_share_dem" in output_df.columns
     assert result_columns == ["results_party_vote_share_dem"]