washingtonpost · dmnapolitano · Sep 13, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -36,4 +36,4 @@ repos:
           # these are errors that will be ignored by flake8
           # definitions here
           # https://flake8.pycqa.org/en/latest/user/error-codes.html
-          - "--ignore=E266,E501,W503"
+          - "--ignore=E266,E501,W503,F811"
@@ -7,7 +7,7 @@ max-line-length = 120
 [pylint]
 max-line-length = 120
 good-names= on, x, df, NonparametricElectionModel, GaussianElectionModel, 
-            BaseElectionModel, qr, X, y, f, LiveData, n, Featurizer, fe, PreprocessedData, CombinedData, 
+            BaseElectionModel, qr, X, y, f, LiveData, n, Featurizer, Estimandizer, fe, PreprocessedData, CombinedData, 
             ModelResults, GaussianModel, MODEL_THRESHOLD, LOG, w, df_X, df_y, v, n, g, a, b
 disable=missing-function-docstring, missing-module-docstring, missing-class-docstring, #missing
         too-many-arguments, too-many-locals, too-many-branches, too-many-instance-attributes, too-many-statements, #structure: too-many

@@ -0,0 +1,153 @@
+import numpy as np
+
+
+class Estimandizer:
+    """
+    Estimandizer. Generate estimands explicitly.
+    """
+
+    def __init__(self, data_handler, estimands):
+        self.data_handler = data_handler
+        self.estimands = estimands
+        self.transformations = []
+        self.transformation_map = {
+            "margin": [self.calculate_margin],
+            "voter_turnout_rate": [self.calculate_voter_turnout_rate],
+            "standardized_income": [self.standardize_median_household_income],
+            "age_groups": [self.create_age_groups],
+            "party_vote_share": [self.calculate_party_vote_share],
+            "education_impact": [self.calculate_party_vote_share, self.analyze_education_impact],
+            "gender_turnout_disparity": [self.investigate_gender_turnout_disparity],
+            "ethnicity_voting_patterns": [self.calculate_party_vote_share, self.examine_ethnicity_voting_patterns],
+            "income_impact": [
+                self.calculate_party_vote_share,
+                self.standardize_median_household_income,
+                self.explore_income_impact,
+            ],
+            "candidate": [self.candidate],
+        }
+
+    def check_estimand(self, estimand):
+        """
+        Ensure estimand isn't one of the pre-specified values that are already included
+        """
+        already_included = ["dem_votes", "gop_votes", "total_votes"]
+        if estimand in already_included:
+            return False
+        return True
+
+    def check_input_columns(self, columns):
+        """
+        Check that input columns contain all neccessary values for a calculation
+        """
+        missing_columns = [col for col in columns if col not in self.data_handler.data.columns]
+        return len(missing_columns) == 0
+
+    def predict_estimands(self, estimand):
+        """
+        Predict which estimands can be formed given a dataset and a list of estimands we would like to create
+        """
+        if estimand not in self.transformation_map:
+            raise ValueError(f"Estimand '{estimand}' is not supported.")
+        self.transformations = self.transformation_map[estimand]
+        """
+        if not self.check_input_columns(
+            [col for transform in self.transformations for col in transform.__code__.co_varnames[1:]]
+        ):
+            return []
+        """
+        return self.transformations
+
+    def create_estimand(self, estimand):
+        """
+        Create an estimand
+        """
+        if estimand in self.transformation_map:
+            if self.transformation_map[estimand][0] in self.transformations:
+                transformation_func = self.transformations[
+                    self.transformations.index(self.transformation_map[estimand][0])
+                ]
+                transformation_func()
+
+    def generate_estimands(self):
+        """
+        Main function to generate estimands
+        """
+        for estimand in self.estimands:
+            if self.check_estimand(estimand):
+                self.predict_estimands(estimand)
+                self.create_estimand(estimand)
+        return self.data_handler
+
+    # Transformation methods
+    def calculate_margin(self):
+        self.data_handler.data["margin"] = self.data_handler.data["results_dem"] - self.data_handler.data["results_gop"]
+
+    def calculate_voter_turnout_rate(self):
+        self.data_handler.data["voter_turnout_rate"] = (
+            self.data_handler.data["results_turnout"] / self.data_handler.data["total_gen_voters"]
+        )
+
+    def standardize_median_household_income(self):
+        mean_income = self.data_handler.data["median_household_income"].mean()
+        std_income = self.data_handler.data["median_household_income"].std()
+        self.data_handler.data["standardized_income"] = (
+            self.data_handler.data["median_household_income"] - mean_income
+        ) / std_income
+
+    def create_age_groups(self):
+        self.data_handler.data["age_group_under_30"] = np.where(self.data_handler.data["age_le_30"] == 1, 1, 0)
+        self.data_handler.data["age_group_30_45"] = np.where(self.data_handler.data["age_geq_30_le_45"] == 1, 1, 0)
+        self.data_handler.data["age_group_45_65"] = np.where(self.data_handler.data["age_geq_45_le_65"] == 1, 1, 0)
+        self.data_handler.data["age_group_over_65"] = np.where(self.data_handler.data["age_geq_65"] == 1, 1, 0)
+
+    def calculate_party_vote_share(self):
+        self.data_handler.data["party_vote_share_dem"] = (
+            self.data_handler.data["results_dem"] / self.data_handler.data["results_turnout"]
+        )
+        self.data_handler.data["party_vote_share_gop"] = (
+            self.data_handler.data["results_gop"] / self.data_handler.data["results_turnout"]
+        )
+
+    def analyze_education_impact(self):
+        self.data_handler.data["education_impact_dem"] = (
+            self.data_handler.data["percent_bachelor_or_higher"] * self.data_handler.data["party_vote_share_dem"]
+        )
+        self.data_handler.data["education_impact_gop"] = (
+            self.data_handler.data["percent_bachelor_or_higher"] * self.data_handler.data["party_vote_share_gop"]
+        )
+
+    def investigate_gender_turnout_disparity(self):
+        self.data_handler.data["gender_turnout_disparity"] = (
+            self.data_handler.data["gender_f"] - self.data_handler.data["gender_m"]
+        )
+
+    def examine_ethnicity_voting_patterns(self):
+        ethnicities = [
+            "east_and_south_asian",
+            "european",
+            "hispanic_and_portuguese",
+            "likely_african_american",
+            "other",
+            "unknown",
+        ]
+        for ethnicity in ethnicities:
+            self.data_handler.data[f"vote_share_{ethnicity}"] = (
+                self.data_handler.data[f"ethnicity_{ethnicity}"] * self.data_handler.data["results_turnout"]
+            )
+
+    def candidate(self):
+        election_data = self.data_handler.data[self.data_handler.election_id][0]
+        candidate_data = election_data["baseline_pointer"]
+        # cand_set = set(candidate_data)
+        for cand_name in candidate_data:
+            if cand_name != "turnout":
+                election_data[cand_name] = candidate_data[cand_name]
+
+    def explore_income_impact(self):
+        self.data_handler.data["income_impact_dem"] = (
+            self.data_handler.data["standardized_income"] * self.data_handler.data["party_vote_share_dem"]
+        )
+        self.data_handler.data["income_impact_gop"] = (
+            self.data_handler.data["standardized_income"] * self.data_handler.data["party_vote_share_gop"]
+        )
@@ -0,0 +1,121 @@
+from elexmodel.handlers.data.CombinedData import CombinedDataHandler
+from elexmodel.handlers.data.Estimandizer import Estimandizer
+from elexmodel.handlers.data.LiveData import MockLiveDataHandler
+from elexmodel.handlers.data.PreprocessedData import PreprocessedDataHandler
+
+
+def test_create_estimand_margin_preprocessed(va_governor_county_data):
+    """
+    Tests margin estimand generation (for preprocessed data only)
+    """
+    va_data_copy = va_governor_county_data.copy()
+    election_id = "2017-11-07_VA_G"
+    office = "G"
+    geographic_unit_type = "county"
+    estimands = []
+    estimand_baseline = {}
+
+    preprocessed_data_handler = PreprocessedDataHandler(
+        election_id, office, geographic_unit_type, estimands, estimand_baseline, data=va_data_copy
+    )
+
+    new_estimands = ["margin"]
+
+    estimandizer = Estimandizer(preprocessed_data_handler, new_estimands)
+    new_data_handler = estimandizer.generate_estimands()
+
+    assert "margin" in new_data_handler.data
+
+
+def test_create_estimand_voter_turnout_rate(va_governor_county_data):
+    """
+    Tests voter turnout rate estimand generation on preprocessed data of the VA general
+    """
+    va_data_copy = va_governor_county_data.copy()
+    election_id = "2017-11-07_VA_G"
+    office = "G"
+    geographic_unit_type = "county"
+    estimands = []
+    estimand_baseline = {}
+
+    preprocessed_data_handler = PreprocessedDataHandler(
+        election_id, office, geographic_unit_type, estimands, estimand_baseline, data=va_data_copy
+    )
+
+    new_estimands = ["voter_turnout_rate"]
+
+    estimandizer = Estimandizer(preprocessed_data_handler, new_estimands)
+    new_data_handler = estimandizer.generate_estimands()
+
+    assert "voter_turnout_rate" in new_data_handler.data
+
+
+def test_create_estimand_age_combined(va_governor_county_data):
+    """
+    Tests age bracket estimand generation on a combined data handler
+    """
+    va_data_copy = va_governor_county_data.copy()
+    election_id = "2017-11-07_VA_G"
+    office = "G"
+    geographic_unit_type = "county"
+    estimands = []
+    estimand_baseline = {}
+
+    preprocessed_data_handler = PreprocessedDataHandler(
+        election_id, office, geographic_unit_type, estimands, estimand_baseline, data=va_data_copy
+    )
+
+    live_data_handler = MockLiveDataHandler(election_id, office, geographic_unit_type, estimands, data=va_data_copy)
+
+    current_data = live_data_handler.get_n_fully_reported(n=va_data_copy.shape[0])
+
+    combined_data_handler = CombinedDataHandler(
+        preprocessed_data_handler.data,
+        current_data,
+        estimands,
+        "county",
+        handle_unreporting="drop",
+    )
+
+    new_estimands = ["age_groups"]
+
+    estimandizer = Estimandizer(combined_data_handler, new_estimands)
+    new_data_handler = estimandizer.generate_estimands()
+
+    assert "age_group_30_45" in new_data_handler.data
+
+
+def test_candidate(tx_primary_governor_config):
+    """
+    Tests `{candidate_last_name}_{polID}` estimand generation on a preprocessed data handler for tx primaries
+
+    {'2018-03-06_TX_R': [{
+        'office': 'G',
+        'states': ['TX'],
+        'geographic_unit_types': ['county'],
+        'baseline_results_year': 2014,
+        'historical_election': [],
+        'features': ['age_le_30', 'age_geq_30_le_45', 'age_geq_35_le_65', 'age_geq_65', 'ethnicity_east_and_south_asian', 'ethnicity_hispanic_and_portuguese', 'ethnicity_european', 'ethnicity_likely_african_american', 'ethnicity_other', 'ethnicity_unknown', 'median_household_income', 'percent_bachelor_or_higher'],
+        'aggregates': ['postal_code', 'county_classification'], 'fixed_effect': [],
+        'baseline_pointer': {'abbott_41404': 'abbott_41404', 'krueger_66077': 'abbott_41404', 'kilgore_57793': 'abbott_41404',
+        'turnout': 'turnout'}}]}
+
+    This function adds the combined values for each candidate (ex: all abbott_41404) to the main list under '2018-03-06_TX_R'
+    """
+    tx_data_copy = tx_primary_governor_config.copy()
+    election_id = "2018-03-06_TX_R"
+    office = "G"
+    geographic_unit_type = "county"
+    estimands = []
+    estimand_baseline = {}
+
+    preprocessed_data_handler = PreprocessedDataHandler(
+        election_id, office, geographic_unit_type, estimands, estimand_baseline, data=tx_data_copy
+    )
+
+    new_estimands = ["candidate"]
+
+    estimandizer = Estimandizer(preprocessed_data_handler, new_estimands)
+    new_data_handler = estimandizer.generate_estimands()
+
+    assert "abbott_41404" in new_data_handler.data[new_data_handler.election_id][0]