From 78ea922e663726a09efc02ee0d0667e592836f66 Mon Sep 17 00:00:00 2001 From: Diane Napolitano Date: Fri, 15 Sep 2023 17:44:58 -0400 Subject: [PATCH] Removing the setting of results_estimands to nan during historical runs and the single use of non-conventional column names during estimand creation that I could find so far --- src/elexmodel/handlers/data/Estimandizer.py | 19 ++----------------- tests/handlers/test_estimandizer.py | 1 - 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py index 0724c53c..84914a61 100644 --- a/src/elexmodel/handlers/data/Estimandizer.py +++ b/src/elexmodel/handlers/data/Estimandizer.py @@ -1,6 +1,3 @@ -from numpy import nan - - class EstimandException(Exception): pass @@ -18,17 +15,9 @@ def add_estimand_results(self, data_df, estimands, historical): columns_to_return = [] for estimand in estimands: results_col = f"{RESULTS_PREFIX}{estimand}" - if results_col not in data_df.columns: # will raise a KeyError if a function with the same name as `estimand` doesn't exist data_df = globals()[estimand](data_df, RESULTS_PREFIX) - - if historical: - data_df[results_col] = nan - else: - if results_col not in data_df.columns: - raise EstimandException("This is missing results data for estimand: ", estimand) - columns_to_return.append(results_col) results_column_names = [x for x in data_df.columns if x.startswith(RESULTS_PREFIX)] @@ -67,12 +56,8 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical, includ def party_vote_share_dem(data_df, col_prefix): - if f"{col_prefix}dem" in data_df.columns and f"{col_prefix}turnout" in data_df.columns: - numer = f"{col_prefix}dem" - denom = f"{col_prefix}turnout" - else: - numer = "dem" - denom = "total" + numer = f"{col_prefix}dem" + denom = f"{col_prefix}turnout" data_df[f"{col_prefix}party_vote_share_dem"] = data_df.apply( lambda x: 0 if x[numer] == 0 or x[denom] == 0 else x[numer] / x[denom], axis=1 diff --git a/tests/handlers/test_estimandizer.py b/tests/handlers/test_estimandizer.py index 3d258715..91be637a 100644 --- a/tests/handlers/test_estimandizer.py +++ b/tests/handlers/test_estimandizer.py @@ -28,7 +28,6 @@ def test_add_estimand_results_historical(va_governor_county_data): assert "results_party_vote_share_dem" in output_df.columns assert result_columns == ["results_party_vote_share_dem"] - assert output_df["results_party_vote_share_dem"].isnull().all() def test_add_estimand_baselines_not_historical(va_governor_county_data):