From ad24f915edb3366b7c03c9b2f9e8e251a89793e5 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 14 Sep 2023 10:23:47 -0400 Subject: [PATCH 1/4] trying something new --- src/elexmodel/handlers/data/Estimandizer.py | 104 +++++++++++++------- 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py index 30e6a00f..fdce5f73 100644 --- a/src/elexmodel/handlers/data/Estimandizer.py +++ b/src/elexmodel/handlers/data/Estimandizer.py @@ -16,36 +16,47 @@ class Estimandizer: def check_and_create_estimands(self, data_df, estimands, historical, current_data=False): columns_to_return = [] - for estimand in estimands: results_col = f"{RESULTS_PREFIX}{estimand}" - baseline_col = f"{BASELINE_PREFIX}{estimand}" - target_col = results_col if current_data else baseline_col - if target_col not in data_df.columns: - if estimand in data_df.columns: - data_df[target_col] = data_df[estimand].copy() - else: + if results_col not in data_df.columns: + data_df = globals()[estimand](data_df, RESULTS_PREFIX) + columns_to_return.append(results_col) + return data_df, columns_to_return + + + + # columns_to_return = [] + + # for estimand in estimands: + # results_col = f"{RESULTS_PREFIX}{estimand}" + # baseline_col = f"{BASELINE_PREFIX}{estimand}" + # target_col = results_col if current_data else baseline_col + + # if target_col not in data_df.columns: + # if estimand in data_df.columns: + # data_df[target_col] = data_df[estimand].copy() + # else: # will raise a KeyError if a function with the same name as `estimand` doesn't exist - data_df = globals()[estimand](data_df) - if target_col == baseline_col: - data_df[results_col] = data_df[baseline_col].copy() + # data_df = globals()[estimand](data_df) + # if target_col == baseline_col: + # data_df[results_col] = data_df[baseline_col].copy() - if historical: - data_df[results_col] = nan - else: - if results_col not in data_df.columns: - raise EstimandException("This is missing results data for estimand: ", estimand) + # if historical: + # data_df[results_col] = nan + # else: + # if results_col not in data_df.columns: + # raise EstimandException("This is missing results data for estimand: ", estimand) - columns_to_return.append(results_col) + # columns_to_return.append(results_col) - results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)] + # results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)] # If this is not a historical run, then this is a live election # so we are expecting that there will be actual results data - if not historical and len(results_column_names) == 0: - raise EstimandException("This is not a test election, it is missing results data") + # if not historical and len(results_column_names) == 0: + # raise EstimandException("This is not a test election, it is missing results data") - return (data_df, columns_to_return) + # return (data_df, columns_to_return) def add_estimand_baselines(self, data_df, estimand_baselines, historical): # if we are in a historical election we are only reading preprocessed data to get @@ -54,35 +65,56 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical): for estimand, pointer in estimand_baselines.items(): if pointer is None: - # should only happen when we're going to create a new estimand + # when we are creating a new estimand pointer = estimand baseline_col = f"{BASELINE_PREFIX}{pointer}" if baseline_col not in data_df.columns: - # will raise a KeyError if a function with the same name as `pointer` doesn't exist - data_df = globals()[pointer](data_df) - results_col = f"{RESULTS_PREFIX}{estimand}" - data_df[results_col] = data_df[baseline_col].copy() + data_df = globals()[estimand](data_df, BASELINE_PREFIX) if not historical: - # Adding one to prevent zero divison data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1 return data_df + # for estimand, pointer in estimand_baselines.items(): + # if pointer is None: + # # should only happen when we're going to create a new estimand + # pointer = estimand + + # baseline_col = f"{BASELINE_PREFIX}{pointer}" + + # if baseline_col not in data_df.columns: + # # will raise a KeyError if a function with the same name as `pointer` doesn't exist + # data_df = globals()[pointer](data_df) + # results_col = f"{RESULTS_PREFIX}{estimand}" + # data_df[results_col] = data_df[baseline_col].copy() + + # if not historical: + # # Adding one to prevent zero divison + # data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1 + + # return data_df # custom estimands -def party_vote_share_dem(data_df): - # should only happen when we're replaying an election - if f"{BASELINE_PREFIX}dem" not in data_df.columns and f"{BASELINE_PREFIX}turnout" not in data_df.columns: - data_df[f"{RESULTS_PREFIX}party_vote_share_dem"] = ( - data_df[f"{RESULTS_PREFIX}dem"] / data_df[f"{RESULTS_PREFIX}turnout"] - ) - else: - data_df[f"{BASELINE_PREFIX}party_vote_share_dem"] = ( - data_df[f"{BASELINE_PREFIX}dem"] / data_df[f"{BASELINE_PREFIX}turnout"] - ) +def party_vote_share_dem(data_df, col_prefix): + + data_df[f"{col_prefix}party_vote_share_dem"] = ( + data_df[f"{col_prefix}dem"] / data_df[f"{col_prefix}turnout"] + ) + return data_df + + # should only happen when we're replaying an election + # if f"{BASELINE_PREFIX}dem" not in data_df.columns and f"{BASELINE_PREFIX}turnout" not in data_df.columns: + # data_df[f"{RESULTS_PREFIX}party_vote_share_dem"] = ( + # data_df[f"{RESULTS_PREFIX}dem"] / data_df[f"{RESULTS_PREFIX}turnout"] + # ) + # else: + # data_df[f"{BASELINE_PREFIX}party_vote_share_dem"] = ( + # data_df[f"{BASELINE_PREFIX}dem"] / data_df[f"{BASELINE_PREFIX}turnout"] + # ) + # return data_df From bd39fab6face01644b2ef000415fcfe8ee06b7e2 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Thu, 14 Sep 2023 12:54:26 -0400 Subject: [PATCH 2/4] made progress on historical and testbed integration --- src/elexmodel/client.py | 19 +++++++++++++++++++ src/elexmodel/handlers/data/Estimandizer.py | 6 +++++- .../handlers/data/PreprocessedData.py | 5 ++++- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/elexmodel/client.py b/src/elexmodel/client.py index 6194b7f0..df1af7c6 100644 --- a/src/elexmodel/client.py +++ b/src/elexmodel/client.py @@ -9,6 +9,7 @@ from elexmodel.handlers.data.CombinedData import CombinedDataHandler from elexmodel.handlers.data.ModelResults import ModelResultsHandler from elexmodel.handlers.data.PreprocessedData import PreprocessedDataHandler +from elexmodel.handlers.data.LiveData import MockLiveDataHandler from elexmodel.logging import initialize_logging from elexmodel.models.ConformalElectionModel import ConformalElectionModel from elexmodel.models.GaussianElectionModel import GaussianElectionModel @@ -391,8 +392,25 @@ def _format_historical_current_data( """ Formats data for historical model run """ + + """ + What does the historical model client do? + - If we are running the election in 2024 and 100 counties are reporting, we want to see what + our model error would have been in 2020 with these counties reporting + - To do that we need to merge the 2020 results onto the 2024 reporting counties + + - So for 2020 (cli) this means -> we have 2020 data and we pick 100 random counties reporting in the MockLiveDataHandler + - in this function we get the 2016 results and merge that to the 100 reporting counties in 2020 + + + running election id: 2020-11-03_USA_G --historical + -> historical election id: 2016-11-08_USA_G, 2012, ... + + """ + formatted_data = current_data[["postal_code", "geographic_unit_fips", "percent_expected_vote"]] print(f"Getting data for historical election: {historical_election_id}") + # historical_live_data_handler = MockLiveDataHandler(historical_election_id, office, geographic_unit_type, estimands, s3_client=s3.S3CsvUtil(TARGET_BUCKET)) preprocessed_data_handler = PreprocessedDataHandler( historical_election_id, office, @@ -401,6 +419,7 @@ def _format_historical_current_data( estimand_baselines, s3_client=s3.S3CsvUtil(TARGET_BUCKET), historical=True, + include_results_estimand=True ) results_to_return = [f"results_{estimand}" for estimand in estimands] diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py index fdce5f73..c988435a 100644 --- a/src/elexmodel/handlers/data/Estimandizer.py +++ b/src/elexmodel/handlers/data/Estimandizer.py @@ -20,6 +20,7 @@ def check_and_create_estimands(self, data_df, estimands, historical, current_dat results_col = f"{RESULTS_PREFIX}{estimand}" if results_col not in data_df.columns: + # will raise a KeyError if a function with the same name as `estimand` doesn't exist data_df = globals()[estimand](data_df, RESULTS_PREFIX) columns_to_return.append(results_col) return data_df, columns_to_return @@ -58,7 +59,7 @@ def check_and_create_estimands(self, data_df, estimands, historical, current_dat # return (data_df, columns_to_return) - def add_estimand_baselines(self, data_df, estimand_baselines, historical): + def add_estimand_baselines(self, data_df, estimand_baselines, historical, incl_results_estimand=False): # if we are in a historical election we are only reading preprocessed data to get # the historical election results of the currently reporting units. # so we don't care about the total voters or the baseline election. @@ -76,6 +77,9 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical): if not historical: data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1 + if incl_results_estimand: + data_df, ___ = self.check_and_create_estimands(data_df, estimand_baselines.keys(), historical) + return data_df # for estimand, pointer in estimand_baselines.items(): # if pointer is None: diff --git a/src/elexmodel/handlers/data/PreprocessedData.py b/src/elexmodel/handlers/data/PreprocessedData.py index a2ed3902..7cfecfe1 100644 --- a/src/elexmodel/handlers/data/PreprocessedData.py +++ b/src/elexmodel/handlers/data/PreprocessedData.py @@ -25,6 +25,7 @@ def __init__( s3_client=None, historical=False, data=None, + include_results_estimand=False ): """ Initialize preprocessed data. If not present, download from s3. @@ -36,6 +37,7 @@ def __init__( self.s3_client = s3_client self.estimand_baselines = estimand_baselines self.historical = historical + self.include_results_estimand = include_results_estimand self.estimandizer = Estimandizer() self.local_file_path = self.get_preprocessed_data_path() @@ -83,8 +85,9 @@ def load_data(self, preprocessed_data): Load preprocessed csv data as df """ LOG.info("Loading preprocessed data: %s, %s, %s", self.election_id, self.office, self.geographic_unit_type) + data = self.estimandizer.add_estimand_baselines(preprocessed_data, self.estimand_baselines, self.historical, incl_results_estimand=self.include_results_estimand) - return self.estimandizer.add_estimand_baselines(preprocessed_data, self.estimand_baselines, self.historical) + return data def save_data(self, preprocessed_data): if not Path(self.local_file_path).parent.exists(): From b6751cb65a3ed5fe9b769b380a6b303d406ad3b7 Mon Sep 17 00:00:00 2001 From: Diane Napolitano Date: Fri, 15 Sep 2023 10:43:32 -0400 Subject: [PATCH 3/4] Finalizing estimandizer updates and unit tests; testing with test bed and VA 2021 House of Delegates --- src/elexmodel/client.py | 11 ++- src/elexmodel/handlers/data/CombinedData.py | 4 +- src/elexmodel/handlers/data/Estimandizer.py | 87 +++++-------------- .../handlers/data/PreprocessedData.py | 9 +- tests/handlers/test_estimandizer.py | 8 +- 5 files changed, 39 insertions(+), 80 deletions(-) diff --git a/src/elexmodel/client.py b/src/elexmodel/client.py index df1af7c6..be11a3fb 100644 --- a/src/elexmodel/client.py +++ b/src/elexmodel/client.py @@ -9,7 +9,6 @@ from elexmodel.handlers.data.CombinedData import CombinedDataHandler from elexmodel.handlers.data.ModelResults import ModelResultsHandler from elexmodel.handlers.data.PreprocessedData import PreprocessedDataHandler -from elexmodel.handlers.data.LiveData import MockLiveDataHandler from elexmodel.logging import initialize_logging from elexmodel.models.ConformalElectionModel import ConformalElectionModel from elexmodel.models.GaussianElectionModel import GaussianElectionModel @@ -398,19 +397,19 @@ def _format_historical_current_data( - If we are running the election in 2024 and 100 counties are reporting, we want to see what our model error would have been in 2020 with these counties reporting - To do that we need to merge the 2020 results onto the 2024 reporting counties - - - So for 2020 (cli) this means -> we have 2020 data and we pick 100 random counties reporting in the MockLiveDataHandler + + - So for 2020 (cli) this means -> we have 2020 data and we pick 100 random counties reporting + in the MockLiveDataHandler - in this function we get the 2016 results and merge that to the 100 reporting counties in 2020 running election id: 2020-11-03_USA_G --historical -> historical election id: 2016-11-08_USA_G, 2012, ... - + """ formatted_data = current_data[["postal_code", "geographic_unit_fips", "percent_expected_vote"]] print(f"Getting data for historical election: {historical_election_id}") - # historical_live_data_handler = MockLiveDataHandler(historical_election_id, office, geographic_unit_type, estimands, s3_client=s3.S3CsvUtil(TARGET_BUCKET)) preprocessed_data_handler = PreprocessedDataHandler( historical_election_id, office, @@ -419,7 +418,7 @@ def _format_historical_current_data( estimand_baselines, s3_client=s3.S3CsvUtil(TARGET_BUCKET), historical=True, - include_results_estimand=True + include_results_estimand=True, ) results_to_return = [f"results_{estimand}" for estimand in estimands] diff --git a/src/elexmodel/handlers/data/CombinedData.py b/src/elexmodel/handlers/data/CombinedData.py index 8da3c426..69e83180 100644 --- a/src/elexmodel/handlers/data/CombinedData.py +++ b/src/elexmodel/handlers/data/CombinedData.py @@ -19,9 +19,7 @@ def __init__( self.estimands = estimands estimandizer = Estimandizer() - (current_data, _) = estimandizer.check_and_create_estimands( - current_data.copy(), self.estimands, False, current_data=True - ) + (current_data, _) = estimandizer.check_and_create_estimands(current_data.copy(), self.estimands, False) # if we're running this for a past election, drop results columns from preprocessed data # so we use results_{estimand} numbers from current_data diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py index c988435a..87b16d42 100644 --- a/src/elexmodel/handlers/data/Estimandizer.py +++ b/src/elexmodel/handlers/data/Estimandizer.py @@ -14,7 +14,7 @@ class Estimandizer: Estimandizer. Generate estimands explicitly. """ - def check_and_create_estimands(self, data_df, estimands, historical, current_data=False): + def check_and_create_estimands(self, data_df, estimands, historical): columns_to_return = [] for estimand in estimands: results_col = f"{RESULTS_PREFIX}{estimand}" @@ -22,44 +22,24 @@ def check_and_create_estimands(self, data_df, estimands, historical, current_dat if results_col not in data_df.columns: # will raise a KeyError if a function with the same name as `estimand` doesn't exist data_df = globals()[estimand](data_df, RESULTS_PREFIX) - columns_to_return.append(results_col) - return data_df, columns_to_return - - - # columns_to_return = [] + if historical: + data_df[results_col] = nan + else: + if results_col not in data_df.columns: + raise EstimandException("This is missing results data for estimand: ", estimand) - # for estimand in estimands: - # results_col = f"{RESULTS_PREFIX}{estimand}" - # baseline_col = f"{BASELINE_PREFIX}{estimand}" - # target_col = results_col if current_data else baseline_col - - # if target_col not in data_df.columns: - # if estimand in data_df.columns: - # data_df[target_col] = data_df[estimand].copy() - # else: - # will raise a KeyError if a function with the same name as `estimand` doesn't exist - # data_df = globals()[estimand](data_df) - # if target_col == baseline_col: - # data_df[results_col] = data_df[baseline_col].copy() - - # if historical: - # data_df[results_col] = nan - # else: - # if results_col not in data_df.columns: - # raise EstimandException("This is missing results data for estimand: ", estimand) - - # columns_to_return.append(results_col) + columns_to_return.append(results_col) - # results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)] + results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)] # If this is not a historical run, then this is a live election # so we are expecting that there will be actual results data - # if not historical and len(results_column_names) == 0: - # raise EstimandException("This is not a test election, it is missing results data") + if not historical and len(results_column_names) == 0: + raise EstimandException("This is not a test election, it is missing results data") - # return (data_df, columns_to_return) + return data_df, columns_to_return - def add_estimand_baselines(self, data_df, estimand_baselines, historical, incl_results_estimand=False): + def add_estimand_baselines(self, data_df, estimand_baselines, historical, include_results_estimand=False): # if we are in a historical election we are only reading preprocessed data to get # the historical election results of the currently reporting units. # so we don't care about the total voters or the baseline election. @@ -77,48 +57,25 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical, incl_r if not historical: data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1 - if incl_results_estimand: + if include_results_estimand: data_df, ___ = self.check_and_create_estimands(data_df, estimand_baselines.keys(), historical) return data_df - # for estimand, pointer in estimand_baselines.items(): - # if pointer is None: - # # should only happen when we're going to create a new estimand - # pointer = estimand - - # baseline_col = f"{BASELINE_PREFIX}{pointer}" - - # if baseline_col not in data_df.columns: - # # will raise a KeyError if a function with the same name as `pointer` doesn't exist - # data_df = globals()[pointer](data_df) - # results_col = f"{RESULTS_PREFIX}{estimand}" - # data_df[results_col] = data_df[baseline_col].copy() - - # if not historical: - # # Adding one to prevent zero divison - # data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1 - - # return data_df # custom estimands def party_vote_share_dem(data_df, col_prefix): - - data_df[f"{col_prefix}party_vote_share_dem"] = ( - data_df[f"{col_prefix}dem"] / data_df[f"{col_prefix}turnout"] + if f"{col_prefix}dem" in data_df.columns and f"{col_prefix}turnout" in data_df.columns: + numer = f"{col_prefix}dem" + denom = f"{col_prefix}turnout" + else: + numer = "dem" + denom = "total" + + data_df[f"{col_prefix}party_vote_share_dem"] = data_df.apply( + lambda x: 0 if x[numer] == 0 or x[denom] == 0 else x[numer] / x[denom], axis=1 ) return data_df - - # should only happen when we're replaying an election - # if f"{BASELINE_PREFIX}dem" not in data_df.columns and f"{BASELINE_PREFIX}turnout" not in data_df.columns: - # data_df[f"{RESULTS_PREFIX}party_vote_share_dem"] = ( - # data_df[f"{RESULTS_PREFIX}dem"] / data_df[f"{RESULTS_PREFIX}turnout"] - # ) - # else: - # data_df[f"{BASELINE_PREFIX}party_vote_share_dem"] = ( - # data_df[f"{BASELINE_PREFIX}dem"] / data_df[f"{BASELINE_PREFIX}turnout"] - # ) - # return data_df diff --git a/src/elexmodel/handlers/data/PreprocessedData.py b/src/elexmodel/handlers/data/PreprocessedData.py index 7cfecfe1..d57639c3 100644 --- a/src/elexmodel/handlers/data/PreprocessedData.py +++ b/src/elexmodel/handlers/data/PreprocessedData.py @@ -25,7 +25,7 @@ def __init__( s3_client=None, historical=False, data=None, - include_results_estimand=False + include_results_estimand=False, ): """ Initialize preprocessed data. If not present, download from s3. @@ -85,7 +85,12 @@ def load_data(self, preprocessed_data): Load preprocessed csv data as df """ LOG.info("Loading preprocessed data: %s, %s, %s", self.election_id, self.office, self.geographic_unit_type) - data = self.estimandizer.add_estimand_baselines(preprocessed_data, self.estimand_baselines, self.historical, incl_results_estimand=self.include_results_estimand) + data = self.estimandizer.add_estimand_baselines( + preprocessed_data, + self.estimand_baselines, + self.historical, + include_results_estimand=self.include_results_estimand, + ) return data diff --git a/tests/handlers/test_estimandizer.py b/tests/handlers/test_estimandizer.py index 84312e46..bc127969 100644 --- a/tests/handlers/test_estimandizer.py +++ b/tests/handlers/test_estimandizer.py @@ -12,7 +12,6 @@ def test_check_and_create_estimands_not_historical(va_governor_county_data): estimandizer = Estimandizer() (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, False) - assert "baseline_party_vote_share_dem" in output_df.columns assert "results_party_vote_share_dem" in output_df.columns assert result_columns == ["results_party_vote_share_dem"] @@ -27,9 +26,9 @@ def test_check_and_create_estimands_historical(va_governor_county_data): estimandizer = Estimandizer() (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, True) - assert "baseline_party_vote_share_dem" in output_df.columns assert "results_party_vote_share_dem" in output_df.columns assert result_columns == ["results_party_vote_share_dem"] + assert output_df["results_party_vote_share_dem"].isnull().all() def test_add_estimand_baselines_not_historical(va_governor_county_data): @@ -37,14 +36,15 @@ def test_add_estimand_baselines_not_historical(va_governor_county_data): estimandizer = Estimandizer() output_df = estimandizer.add_estimand_baselines(va_governor_county_data.copy(), estimand_baselines, False) assert "baseline_party_vote_share_dem" in output_df.columns - assert "results_party_vote_share_dem" in output_df.columns assert "last_election_results_party_vote_share_dem" in output_df.columns def test_add_estimand_baselines_historical(va_governor_county_data): estimand_baselines = {"turnout": "turnout", "party_vote_share_dem": "party_vote_share_dem"} estimandizer = Estimandizer() - output_df = estimandizer.add_estimand_baselines(va_governor_county_data.copy(), estimand_baselines, True) + output_df = estimandizer.add_estimand_baselines( + va_governor_county_data.copy(), estimand_baselines, True, include_results_estimand=True + ) assert "baseline_party_vote_share_dem" in output_df.columns assert "results_party_vote_share_dem" in output_df.columns assert "last_election_results_party_vote_share_dem" not in output_df.columns From 5111a179b7bfe39792124b8719ee5bf6e4d6adf2 Mon Sep 17 00:00:00 2001 From: Diane Napolitano Date: Fri, 15 Sep 2023 15:52:58 -0400 Subject: [PATCH 4/4] Renaming Estimandizer's check_and_create_estimands() method to add_estimand_results() --- src/elexmodel/handlers/data/CombinedData.py | 2 +- src/elexmodel/handlers/data/Estimandizer.py | 4 ++-- src/elexmodel/handlers/data/LiveData.py | 4 +++- tests/handlers/test_estimandizer.py | 12 ++++++------ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/elexmodel/handlers/data/CombinedData.py b/src/elexmodel/handlers/data/CombinedData.py index 4f1a3154..76fc2672 100644 --- a/src/elexmodel/handlers/data/CombinedData.py +++ b/src/elexmodel/handlers/data/CombinedData.py @@ -19,7 +19,7 @@ def __init__( self.estimands = estimands estimandizer = Estimandizer() - (current_data, _) = estimandizer.check_and_create_estimands(current_data.copy(), self.estimands, False) + (current_data, _) = estimandizer.add_estimand_results(current_data.copy(), self.estimands, False) # if we're running this for a past election, drop results columns from preprocessed data # so we use results_{estimand} numbers from current_data diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py index 87b16d42..0724c53c 100644 --- a/src/elexmodel/handlers/data/Estimandizer.py +++ b/src/elexmodel/handlers/data/Estimandizer.py @@ -14,7 +14,7 @@ class Estimandizer: Estimandizer. Generate estimands explicitly. """ - def check_and_create_estimands(self, data_df, estimands, historical): + def add_estimand_results(self, data_df, estimands, historical): columns_to_return = [] for estimand in estimands: results_col = f"{RESULTS_PREFIX}{estimand}" @@ -58,7 +58,7 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical, includ data_df[f"last_election_results_{estimand}"] = data_df[baseline_col].copy() + 1 if include_results_estimand: - data_df, ___ = self.check_and_create_estimands(data_df, estimand_baselines.keys(), historical) + data_df, ___ = self.add_estimand_results(data_df, estimand_baselines.keys(), historical) return data_df diff --git a/src/elexmodel/handlers/data/LiveData.py b/src/elexmodel/handlers/data/LiveData.py index c9c598f3..940f9ca7 100644 --- a/src/elexmodel/handlers/data/LiveData.py +++ b/src/elexmodel/handlers/data/LiveData.py @@ -84,7 +84,9 @@ def get_live_data_file_path(self): def load_data(self, data): columns_to_return = ["postal_code", "geographic_unit_fips"] - (data, more_columns) = self.estimandizer.check_and_create_estimands(data, self.estimands, self.historical) + print(data) + print(data.columns) + (data, more_columns) = self.estimandizer.add_estimand_results(data, self.estimands, self.historical) columns_to_return += more_columns self.shuffle_dataframe = data[self.shuffle_columns].copy() diff --git a/tests/handlers/test_estimandizer.py b/tests/handlers/test_estimandizer.py index bc127969..3d258715 100644 --- a/tests/handlers/test_estimandizer.py +++ b/tests/handlers/test_estimandizer.py @@ -1,30 +1,30 @@ from elexmodel.handlers.data.Estimandizer import Estimandizer -def test_check_and_create_estimands_not_historical(va_governor_county_data): +def test_add_estimand_results_not_historical(va_governor_county_data): """ - Tests the check_and_create_estimands() method. + Tests the add_estimand_results() method. """ va_data_copy = va_governor_county_data.copy() estimands = ["party_vote_share_dem"] estimandizer = Estimandizer() - (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, False) + (output_df, result_columns) = estimandizer.add_estimand_results(va_data_copy, estimands, False) assert "results_party_vote_share_dem" in output_df.columns assert result_columns == ["results_party_vote_share_dem"] -def test_check_and_create_estimands_historical(va_governor_county_data): +def test_add_estimand_results_historical(va_governor_county_data): """ - Tests the check_and_create_estimands() method with historical elections. + Tests the add_estimand_results() method with historical elections. """ va_data_copy = va_governor_county_data.copy() estimands = ["party_vote_share_dem"] estimandizer = Estimandizer() - (output_df, result_columns) = estimandizer.check_and_create_estimands(va_data_copy, estimands, True) + (output_df, result_columns) = estimandizer.add_estimand_results(va_data_copy, estimands, True) assert "results_party_vote_share_dem" in output_df.columns assert result_columns == ["results_party_vote_share_dem"]