diff --git a/README.md b/README.md index c712cbb8..75ee47cf 100644 --- a/README.md +++ b/README.md @@ -100,12 +100,27 @@ elexmodel 2017-11-07_VA_G --estimands=turnout --estimands my_estimand --estimand Some model types have specific model parameters that can be included. -| Name | Type | Acceptable values | model | -|-----------|---------|-----------------------------|-----------------| -| lambda | numeric | 0-inf | all | -| robust | boolean | larger prediction intervals | `nonparametric` | -| beta | numeric | variance inflation | `gaussian` | -| winsorize | boolean | winsorize std estimate | `gaussian` | +| Name | Type | Acceptable values | model | +|-----------------------------------|---------|----------------------------------|-----------------| +| lambda | numeric | regularization constant | all | +| turnout_factor_lower | numeric | drop units with < turnout factor | all | +| turnout_factor_upper | numeric | drop units with < turnout factor | all | +| robust | boolean | larger prediction intervals | `nonparametric` | +| beta | numeric | variance inflation | `gaussian` | +| winsorize | boolean | winsorize std estimate | `gaussian` | +| B | numeric | bootstrap samples | `bootstrap` | +| T | numeric | temperature for aggregate | `bootstrap` | +| strata | list | groups to stratify bootstrap by | `bootstrap` | +| agg_model_hard_threshold | bool | hard threshold aggregate model | `bootstrap` | +| y_LB | numeric | lower bound norm. margin dist | `bootstrap` | +| y_UB | numeric | upper bound norm. margin dist | `bootstrap` | +| z_LB | numeric | lower bound turnout fact dist | `bootstrap` | +| z_UB | numeric | lower bound turnout fact dist | `bootstrap` | +| y_unobserved_lower_bound | numeric | lower bound for norm. margin | `bootstrap` | +| y_unobserved_upper_bound | numeric | upper bound for norm. margin | `bootstrap` | +| percent_expected_vote_error_bound | numeric | error tolerance on expected vote | `bootstrap` | +| z_unobserved_lower_bound | numeric | lower bound for turnout factor | `bootstrap` | +| z_unobserved_upper_bound | numeric | upper bound for turnout factor | `bootstrap` | This is the class and function that invokes the general function to generate estimates. You can install `elex-model` as a Python package and use this code snippet in other projects. diff --git a/src/elexmodel/cli.py b/src/elexmodel/cli.py index c7128ebe..387f24ed 100644 --- a/src/elexmodel/cli.py +++ b/src/elexmodel/cli.py @@ -35,12 +35,7 @@ def type_cast_value(self, ctx, value): "--pi_method", "pi_method", default="nonparametric", - type=click.Choice( - [ - "gaussian", - "nonparametric", - ] - ), + type=click.Choice(["gaussian", "nonparametric", "bootstrap"]), ) @click.option("--prediction_intervals", "prediction_intervals", default=[0.7, 0.9], multiple=True) @click.option("--percent_reporting_threshold", "percent_reporting_threshold", default=100) diff --git a/src/elexmodel/client.py b/src/elexmodel/client.py index db83feee..31f00f4b 100644 --- a/src/elexmodel/client.py +++ b/src/elexmodel/client.py @@ -10,6 +10,7 @@ from elexmodel.handlers.data.ModelResults import ModelResultsHandler from elexmodel.handlers.data.PreprocessedData import PreprocessedDataHandler from elexmodel.logging import initialize_logging +from elexmodel.models.BootstrapElectionModel import BootstrapElectionModel from elexmodel.models.ConformalElectionModel import ConformalElectionModel from elexmodel.models.GaussianElectionModel import GaussianElectionModel from elexmodel.models.NonparametricElectionModel import NonparametricElectionModel @@ -39,6 +40,7 @@ def __init__(self): super().__init__() self.all_conformalization_data_unit_dict = defaultdict(dict) self.all_conformalization_data_agg_dict = defaultdict(dict) + self.model = None def _check_input_parameters( self, @@ -88,8 +90,7 @@ def _check_input_parameters( ] if len(invalid_fixed_effects) > 0: raise ValueError(f"Fixed effect(s): {invalid_fixed_effects} not valid. Please check config") - - if pi_method not in {"gaussian", "nonparametric"}: + if pi_method not in {"gaussian", "nonparametric", "bootstrap"}: raise ValueError( f"Prediction interval method: {pi_method} is not valid. \ pi_method has to be either `gaussian` or `nonparametric`." @@ -102,6 +103,14 @@ def _check_input_parameters( not isinstance(model_parameters["lambda_"], (float, int)) or model_parameters["lambda_"] < 0 ): raise ValueError("lambda is not valid. It has to be numeric and greater than zero.") + if "turnout_factor_lower" in model_parameters and not isinstance( + model_parameters["turnout_factor_lower"], float + ): + raise ValueError("turnout_factor_lower is not valid. Has to be a float.") + if "turnout_factor_upper" in model_parameters and not isinstance( + model_parameters["turnout_factor_upper"], float + ): + raise ValueError("turnout_factor_upper is not valid. Has to be a float.") if pi_method == "gaussian": if "beta" in model_parameters and not isinstance(model_parameters["beta"], (int, float)): raise ValueError("beta is not valid. Has to be either an integer or a float.") @@ -110,7 +119,45 @@ def _check_input_parameters( elif pi_method == "nonparametric": if "robust" in model_parameters and not isinstance(model_parameters["robust"], bool): raise ValueError("robust is not valid. Has to be a boolean.") - + elif pi_method == "bootstrap": + if "B" in model_parameters and not isinstance(model_parameters["B"], int): + raise ValueError("B is not valid. Has to be either an integer.") + if "T" in model_parameters and not isinstance(model_parameters["T"], (int, float)): + raise ValueError("T is not valid. Has to be either an integer or a float.") + if "strata" in model_parameters and not isinstance(model_parameters["strata"], list): + raise ValueError("strata is not valid. Has to be a list.") + if "agg_model_hard_threshold" in model_parameters and not isinstance( + model_parameters["agg_model_hard_threshold"], bool + ): + raise ValueError("agg_model_hard_threshold is not valid. Has to be a boolean.") + if "y_LB" in model_parameters and not isinstance(model_parameters["y_LB"], float): + raise ValueError("y_LB is not valid. Has to be a float.") + if "y_UB" in model_parameters and not isinstance(model_parameters["y_UB"], float): + raise ValueError("y_UB is not valid. Has to be a float.") + if "z_LB" in model_parameters and not isinstance(model_parameters["z_LB"], float): + raise ValueError("z_LB is not valid. Has to be a float.") + if "z_UB" in model_parameters and not isinstance(model_parameters["z_UB"], float): + raise ValueError("z_UB is not valid. Has to be a float.") + if "y_unobserved_upper_bound" in model_parameters and not isinstance( + model_parameters["y_unobserved_upper_bound"], float + ): + raise ValueError("y_unobserved_upper_bound is not valid. Has to be a float.") + if "y_unobserved_lower_bound" in model_parameters and not isinstance( + model_parameters["y_unobserved_lower_bound"], float + ): + raise ValueError("y_unobserved_lower_bound is not valid. Has to be a float.") + if "percent_expected_vote_error_bound" in model_parameters and not isinstance( + model_parameters["percent_expected_vote_error_bound"], float + ): + raise ValueError("z_UB is not valid. Has to be a float.") + if "z_unobserved_upper_bound" in model_parameters and not isinstance( + model_parameters["z_unobserved_upper_bound"], float + ): + raise ValueError("z_unobserved_upper_bound is not valid. Has to be a float.") + if "z_unobserved_lower_bound" in model_parameters and not isinstance( + model_parameters["z_unobserved_lower_bound"], float + ): + raise ValueError("z_unobserved_lower_bound is not valid. Has to be a float.") if handle_unreporting not in {"drop", "zero"}: raise ValueError("handle_unreporting must be either `drop` or `zero`") @@ -122,6 +169,9 @@ def get_aggregate_list(self, office, aggregate): raw_aggregate_list = base_aggregate + [aggregate] return sorted(list(set(raw_aggregate_list)), key=lambda x: AGGREGATE_ORDER.index(x)) + def get_national_summary_votes_estimates(self, nat_sum_data_dict=None, called_states={}, base_to_add=0, alpha=0.99): + return self.model.get_national_summary_estimates(nat_sum_data_dict, called_states, base_to_add, alpha=alpha) + def get_estimates( self, current_data, # list of lists @@ -158,10 +208,15 @@ def get_estimates( save_conformalization = "conformalization" in save_output handle_unreporting = kwargs.get("handle_unreporting", "drop") + district_election = False + if office in {"H", "Y", "Z"}: + district_election = True + model_settings = { "election_id": election_id, "office": office, "geographic_unit_type": geographic_unit_type, + "district_election": district_election, "features": features, "fixed_effects": fixed_effects, "save_conformalization": save_conformalization, @@ -172,6 +227,7 @@ def get_estimates( config_handler = ConfigHandler( election_id, config=raw_config, s3_client=s3.S3JsonUtil(TARGET_BUCKET), save=save_config ) + self._check_input_parameters( config_handler, office, @@ -184,6 +240,7 @@ def get_estimates( model_parameters, handle_unreporting, ) + states_with_election = config_handler.get_states(office) estimand_baselines = config_handler.get_estimand_baselines(office, estimands) @@ -213,13 +270,18 @@ def get_estimates( handle_unreporting=handle_unreporting, ) + turnout_factor_lower = model_parameters.get("turnout_factor_lower", 0.2) + turnout_factor_upper = model_parameters.get("turnout_factor_upper", 2.5) + reporting_units = data.get_reporting_units( - percent_reporting_threshold, features_to_normalize=features, add_intercept=True + percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper ) nonreporting_units = data.get_nonreporting_units( - percent_reporting_threshold, features_to_normalize=features, add_intercept=True + percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper + ) + unexpected_units = data.get_unexpected_units( + percent_reporting_threshold, aggregates, turnout_factor_lower, turnout_factor_upper ) - unexpected_units = data.get_unexpected_units(percent_reporting_threshold, aggregates) LOG.info( "Model parameters: \n prediction intervals: %s, percent reporting threshold: %s, \ @@ -232,13 +294,15 @@ def get_estimates( ) if pi_method == "nonparametric": - model = NonparametricElectionModel(model_settings=model_settings) + self.model = NonparametricElectionModel(model_settings=model_settings) elif pi_method == "gaussian": - model = GaussianElectionModel(model_settings=model_settings) + self.model = GaussianElectionModel(model_settings=model_settings) + elif pi_method == "bootstrap": + self.model = BootstrapElectionModel(model_settings=model_settings) minimum_reporting_units_max = 0 for alpha in prediction_intervals: - minimum_reporting_units = model.get_minimum_reporting_units(alpha) + minimum_reporting_units = self.model.get_minimum_reporting_units(alpha) if minimum_reporting_units > minimum_reporting_units_max: minimum_reporting_units_max = minimum_reporting_units @@ -270,24 +334,26 @@ def get_estimates( ) for estimand in estimands: - unit_predictions = model.get_unit_predictions(reporting_units, nonreporting_units, estimand) + unit_predictions = self.model.get_unit_predictions( + reporting_units, nonreporting_units, estimand, unexpected_units=unexpected_units + ) results_handler.add_unit_predictions(estimand, unit_predictions) # gets prediciton intervals for each alpha alpha_to_unit_prediction_intervals = {} for alpha in prediction_intervals: - alpha_to_unit_prediction_intervals[alpha] = model.get_unit_prediction_intervals( + alpha_to_unit_prediction_intervals[alpha] = self.model.get_unit_prediction_intervals( results_handler.reporting_units, results_handler.nonreporting_units, alpha, estimand ) - if isinstance(model, ConformalElectionModel): + if isinstance(self.model, ConformalElectionModel): self.all_conformalization_data_unit_dict[alpha][ estimand - ] = model.get_all_conformalization_data_unit() + ] = self.model.get_all_conformalization_data_unit() results_handler.add_unit_intervals(estimand, alpha_to_unit_prediction_intervals) for aggregate in results_handler.aggregates: aggregate_list = self.get_aggregate_list(office, aggregate) - estimates_df = model.get_aggregate_predictions( + estimates_df = self.model.get_aggregate_predictions( results_handler.reporting_units, results_handler.nonreporting_units, results_handler.unexpected_units, @@ -296,7 +362,7 @@ def get_estimates( ) alpha_to_agg_prediction_intervals = {} for alpha in prediction_intervals: - alpha_to_agg_prediction_intervals[alpha] = model.get_aggregate_prediction_intervals( + alpha_to_agg_prediction_intervals[alpha] = self.model.get_aggregate_prediction_intervals( results_handler.reporting_units, results_handler.nonreporting_units, results_handler.unexpected_units, @@ -305,10 +371,10 @@ def get_estimates( alpha_to_unit_prediction_intervals[alpha], estimand, ) - if isinstance(model, ConformalElectionModel): + if isinstance(self.model, ConformalElectionModel): self.all_conformalization_data_agg_dict[alpha][ estimand - ] = model.get_all_conformalization_data_agg() + ] = self.model.get_all_conformalization_data_agg() # get all of the prediction intervals here results_handler.add_agg_predictions( diff --git a/src/elexmodel/handlers/config.py b/src/elexmodel/handlers/config.py index f04874eb..b6e8fb39 100644 --- a/src/elexmodel/handlers/config.py +++ b/src/elexmodel/handlers/config.py @@ -69,11 +69,15 @@ def get_estimand_baselines(self, office, estimands): Return dict of baseline pointers for requested estimands """ baseline_pointers = {estimand: self.get_baseline_pointer(office).get(estimand) for estimand in estimands} + if "margin" in estimands: + baseline_pointers["margin"] = "margin" return baseline_pointers def get_estimands(self, office): baseline_pointer = self.get_baseline_pointer(office) estimands = list(baseline_pointer.keys()) + if self.election_id.endswith("G"): + estimands += ["margin"] # would otherwise need to add margin to every single config return estimands def get_states(self, office): @@ -92,7 +96,12 @@ def get_geographic_unit_types(self, office): return self._get_office_subconfig(office).get("geographic_unit_types") def get_features(self, office): - return self._get_office_subconfig(office).get("features", []) + features = self._get_office_subconfig(office).get("features", []) + if self.election_id.endswith("G"): + features += [ + "baseline_normalized_margin" + ] # would otherwise need to add baseline_margin to every single config + return features def get_aggregates(self, office): return self._get_office_subconfig(office).get("aggregates", []) diff --git a/src/elexmodel/handlers/data/CombinedData.py b/src/elexmodel/handlers/data/CombinedData.py index d21f64a3..eb4c0863 100644 --- a/src/elexmodel/handlers/data/CombinedData.py +++ b/src/elexmodel/handlers/data/CombinedData.py @@ -1,3 +1,6 @@ +import numpy as np +import pandas as pd + from elexmodel.handlers import s3 from elexmodel.handlers.data.Estimandizer import Estimandizer from elexmodel.utils.file_utils import S3_FILE_PATH, TARGET_BUCKET, convert_df_to_csv @@ -20,7 +23,6 @@ def __init__( estimandizer = Estimandizer() (current_data, _) = estimandizer.add_estimand_results(current_data.copy(), self.estimands, False) - # if we're running this for a past election, drop results columns from preprocessed data # so we use results_{estimand} numbers from current_data preprocessed_results_columns = list(filter(lambda col: col.startswith("results_"), preprocessed_data.columns)) @@ -46,23 +48,22 @@ def __init__( self.data = data - def get_reporting_units( - self, - percent_reporting_threshold, - turnout_factor_lower=0.5, - turnout_factor_upper=1.5, - features_to_normalize=[], - add_intercept=True, - ): + def get_reporting_units(self, percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper): """ Get reporting data. These are units where the expected vote is greater than the percent reporting threshold. """ + # units where the expected vote is greater than the percent reporting threshold reporting_units = self.data[self.data.percent_expected_vote >= percent_reporting_threshold].reset_index( drop=True ) - # if turnout factor less than 0.5 or greater than 1.5 assume AP made a mistake and don't treat those as reporting units - reporting_units = reporting_units[reporting_units.turnout_factor > turnout_factor_lower] - reporting_units = reporting_units[reporting_units.turnout_factor < turnout_factor_upper] + + # remove unexpected units + unexpected_units = self._get_unexpected_units( + percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper + ) + reporting_units = reporting_units[ + ~reporting_units.geographic_unit_fips.isin(unexpected_units.geographic_unit_fips) + ].reset_index(drop=True) # residualize + normalize for estimand in self.estimands: @@ -75,23 +76,23 @@ def get_reporting_units( return reporting_units - def get_nonreporting_units( - self, - percent_reporting_threshold, - turnout_factor_lower=0.5, - turnout_factor_upper=1.5, - features_to_normalize=[], - add_intercept=True, - ): + def get_nonreporting_units(self, percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper): """ Get nonreporting data. These are units where expected vote is less than the percent reporting threshold """ - # if turnout factor <= turnout_factor_lower or >= turnout_factor_upper assume the AP made a mistake and treat them as non-reporting units - nonreporting_units = self.data.query( - "(percent_expected_vote < @percent_reporting_threshold) | (turnout_factor <= @turnout_factor_lower) | (turnout_factor >= @turnout_factor_upper)" # - ).reset_index( # not checking if results.isnull() anymore across multiple estimands + # units where expected vote is less than the percent reporting threshold + nonreporting_units = self.data[self.data.percent_expected_vote < percent_reporting_threshold].reset_index( drop=True ) + + # remove unexpected units + unexpected_units = self._get_unexpected_units( + percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper + ) + nonreporting_units = nonreporting_units[ + ~nonreporting_units.geographic_unit_fips.isin(unexpected_units.geographic_unit_fips) + ].reset_index(drop=True) + nonreporting_units["reporting"] = int(0) nonreporting_units["expected"] = True @@ -104,6 +105,9 @@ def _get_expected_geographic_unit_fips(self): # data is only expected units since left join of preprocessed data in initialization return self.data.geographic_unit_fips + def _get_units_without_baseline(self): + return self.data[np.isclose(self.data.baseline_weights, 0)].geographic_unit_fips + def _get_county_fips_from_geographic_unit_fips(self, geographic_unit_fips): """ Get county fips for geographic unit fips @@ -123,17 +127,37 @@ def _get_district_from_geographic_unit_fips(self, geographic_unit_fips): components = geographic_unit_fips.split("_") return components[0] - def get_unexpected_units(self, percent_reporting_threshold, aggregates): - """ - Gets reporting but unexpected data. These are units that are may or may not be fully - reporting, but we have no preprocessed data for them. - """ + # TODO: rename unexpected units to be non-modeled units + def _get_unexpected_units(self, percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper): expected_geographic_units = self._get_expected_geographic_unit_fips().tolist() + no_baseline_units = self._get_units_without_baseline() # Note: this uses current_data because self.data drops unexpected units unexpected_units = self.current_data[ ~self.current_data["geographic_unit_fips"].isin(expected_geographic_units) + | self.current_data.geographic_unit_fips.isin(no_baseline_units) ].reset_index(drop=True) + units_with_strange_turnout_factor = self.data[ + (self.data.percent_expected_vote >= percent_reporting_threshold) + & ((self.data.turnout_factor <= turnout_factor_lower) | (self.data.turnout_factor >= turnout_factor_upper)) + ][self.current_data.columns] + + all_unexpected_units = pd.concat([unexpected_units, units_with_strange_turnout_factor]).reset_index(drop=True) + all_unexpected_units.drop_duplicates(subset="geographic_unit_fips", inplace=True) + return all_unexpected_units + + def get_unexpected_units(self, percent_reporting_threshold, aggregates, turnout_factor_lower, turnout_factor_upper): + """ + Gets units for which we will not be making predictions: + - unexpected units (ie. units for which we have no covariates prepared) + - units for which the baseline results is zero (ie. units that are tiny) + - units with strange turnout factors (ie. units that are likely precinct mismatches) + """ + + unexpected_units = self._get_unexpected_units( + percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper + ) + # since we were not expecting them, we have don't have their county or district # from preprocessed data. so we have to add that back in. if "county_fips" in aggregates: @@ -146,7 +170,7 @@ def get_unexpected_units(self, percent_reporting_threshold, aggregates): self._get_district_from_geographic_unit_fips ) - unexpected_units["reporting"] = int(1) + unexpected_units["reporting"] = int(0) unexpected_units["expected"] = False return unexpected_units diff --git a/src/elexmodel/handlers/data/Estimandizer.py b/src/elexmodel/handlers/data/Estimandizer.py index 038bd623..8f6e41a4 100644 --- a/src/elexmodel/handlers/data/Estimandizer.py +++ b/src/elexmodel/handlers/data/Estimandizer.py @@ -13,6 +13,12 @@ def add_estimand_results(self, data_df, estimands, historical): columns_to_return = [] turnout_col = f"{RESULTS_PREFIX}turnout" + # if historical is true then we are running this only to add turnout and results + # columns that can be empty (since this is run by the live results handler to get + # the reporting counties. So we don't actually need to add the weights yet. + if not historical and f"{RESULTS_PREFIX}weights" not in data_df: + data_df = self.add_weights(data_df, RESULTS_PREFIX) + for estimand in estimands: results_col = f"{RESULTS_PREFIX}{estimand}" additional_columns_added = [] @@ -44,14 +50,13 @@ def add_estimand_results(self, data_df, estimands, historical): if turnout_col not in columns_to_return: columns_to_return.append(turnout_col) - data_df = self.add_weights(data_df, RESULTS_PREFIX) - return data_df, columns_to_return def add_estimand_baselines(self, data_df, estimand_baselines, historical, include_results_estimand=False): # if we are in a historical election we are only reading preprocessed data to get # the historical election results of the currently reporting units. # so we don't care about the total voters or the baseline election. + data_df = self.add_weights(data_df, BASELINE_PREFIX) for estimand, pointer in estimand_baselines.items(): if pointer is None: @@ -74,8 +79,20 @@ def add_estimand_baselines(self, data_df, estimand_baselines, historical, includ # we need to add the results from the historical election as well. data_df, ___ = self.add_estimand_results(data_df, estimand_baselines.keys(), historical) - data_df = self.add_weights(data_df, BASELINE_PREFIX) + return data_df + def add_weights(self, data_df, col_prefix): + data_df[f"{col_prefix}weights"] = data_df[f"{col_prefix}turnout"] + return data_df + + def add_turnout_factor(self, data_df): + # posinf and neginf are also set to zero because dividing by zero can lead to nan/posinf/neginf depending + # on the type of the numeric in the numpy array. Assume that if baseline_weights is zero then turnout + # would be incredibly low in this election too (ie. this is effectively an empty precinct) and so setting + # the turnout factor to zero is fine + data_df["turnout_factor"] = np.nan_to_num( + data_df.results_weights / data_df.baseline_weights, nan=0, posinf=0, neginf=0 + ) return data_df def add_weights(self, data_df, col_prefix): @@ -100,4 +117,16 @@ def party_vote_share_dem(data_df, col_prefix): data_df[f"{col_prefix}party_vote_share_dem"] = np.nan_to_num( data_df[f"{col_prefix}dem"] / data_df[f"{col_prefix}turnout"] ) + return data_df, [] + + +def margin(data_df, col_prefix): + # in the margin case we are overwriting baseline_weights with two party turnout + generated_weights_column_name = f"{col_prefix}weights" + generated_margin_column_name = f"{col_prefix}margin" + generated_normalized_margin_column_name = f"{col_prefix}normalized_margin" + data_df[generated_weights_column_name] = data_df[f"{col_prefix}dem"] + data_df[f"{col_prefix}gop"] + data_df[generated_margin_column_name] = data_df[f"{col_prefix}dem"] - data_df[f"{col_prefix}gop"] + data_df[generated_normalized_margin_column_name] = data_df[f"{col_prefix}margin"] / data_df[f"{col_prefix}weights"] + return data_df, [generated_weights_column_name, generated_normalized_margin_column_name] diff --git a/src/elexmodel/models/BootstrapElectionModel.py b/src/elexmodel/models/BootstrapElectionModel.py new file mode 100644 index 00000000..d81e74a5 --- /dev/null +++ b/src/elexmodel/models/BootstrapElectionModel.py @@ -0,0 +1,1350 @@ +from __future__ import annotations + +import logging + +import numpy as np +import pandas as pd +from elexsolver.OLSRegressionSolver import OLSRegressionSolver +from elexsolver.QuantileRegressionSolver import QuantileRegressionSolver +from scipy.special import expit + +from elexmodel.handlers.data.Featurizer import Featurizer +from elexmodel.models.BaseElectionModel import BaseElectionModel, PredictionIntervals + +LOG = logging.getLogger(__name__) + + +class BootstrapElectionModelException(Exception): + pass + + +class BootstrapElectionModel(BaseElectionModel): + """ + The bootstrap election model. + + This model uses ordinary least squares regression for point predictions and the bootstrap to generate prediction intervals. + + In this setup, we are modeling normalized two party margin. But because we need to sum our estimand from the unit level to the aggregate level, we need + to be able to convert normalized margin to unormalized margin (since normalized margins don't sum). This means on the unit level we will be modeling + unnormalized margin and on the aggregate level normalized margin. + + We have found that instead of modeling the unit margin directly, decomposing the estimand into two quantites works better because the linear + model assumption is more plausible for each individually. The two quantities are normalized margin (y) and a turnout factor (z) + y = (D^{Y} - R^{Y}) / (D^{Y} + D^{Y}) + z = (D^{Y} + R^{Y}) / (D^{Y'} + R^{Y'}) + where Y is the year we are modeling and Y' is the previous election year. + + If we define weights to be the total two party turnout in a previous election: + w = (D^{Y'} + R^{Y'}) -> w * y * z = (D^{Y} - R^{Y}) + so (w * y * z) is the unnormalized margin. + + We define our model as: + y_i = f_y(x) + epsilon_y(x) + z_i = f_z(x) + epsilon_z(x) + where f_y(x) and f_z(x) are ordinary least squares regressions and the epsilons are contest (state/district) level random effects. + """ + + def __init__(self, model_settings={}): + super().__init__(model_settings) + self.B = model_settings.get("B", 2000) # number of bootstrap samples + self.strata = model_settings.get("strata", ["county_classification"]) # columns to stratify the data by + self.T = model_settings.get("T", 5000) # temperature for aggregate model + self.hard_threshold = model_settings.get( + "agg_model_hard_threshold", False + ) # use sigmoid or hard thresold when calculating agg model + self.district_election = model_settings.get("district_election", False) + + # upper and lower bounds for the quantile regression which define the strata distributions + # these make sure that we can control the worst cases for the distributions in case we + # haven't seen enough data ayet + self.y_LB = model_settings.get("y_LB", -0.3) # normalied margin lower bound + self.y_UB = model_settings.get("y_UB", 0.3) # normalized margin upper bound + self.z_LB = model_settings.get("z_LB", -0.5) # turnout factor lower bound + self.z_UB = model_settings.get("z_UB", 0.5) # turnout factor upper bound + + # percentiles to compute the strata distributions for + self.taus_lower = np.arange(0.01, 0.5, 0.01) + self.taus_upper = np.arange(0.50, 1, 0.01) + self.taus = np.concatenate([self.taus_lower, self.taus_upper]) + + # upper and lower bounds for normalized margin and turnout factor as to how the "outstanding vote" in + # non-reporting units can go. Used to clip our predictions + self.y_unobserved_lower_bound = model_settings.get("y_unobserved_lower_bound", -1.0) + self.y_unobserved_upper_bound = model_settings.get("y_unobserved_upper_bound", 1.0) + self.percent_expected_vote_error_bound = model_settings.get("percent_expected_vote_error_bound", 0.5) + self.z_unobserved_upper_bound = model_settings.get("z_unobserved_upper_bound", 1.5) + self.z_unobserved_lower_bound = model_settings.get("z_unobserved_lower_bound", 0.5) + + self.featurizer = Featurizer(self.features, self.fixed_effects) + self.seed = model_settings.get("seed", 0) + self.rng = np.random.default_rng(seed=self.seed) # used for sampling + self.ran_bootstrap = False + + # Assume that we have a baseline normalized margin (D^{Y'} - R^{Y'}) / (D^{Y'} + R^{Y'}) is one of the covariates + if "baseline_normalized_margin" not in self.features: + raise BootstrapElectionModelException( + "baseline_normalized_margin not included as feature. This is necessary for the model to work." + ) + + def cv_lambda( + self, x: np.ndarray, y: np.ndarray, lambdas_: np.ndarray, weights: np.ndarray | None = None, k: int = 5 + ) -> float: + """ + This function does k-fold cross validation for a OLS regression model given x, y and a set of lambdas to try out + This function returns the lambda that minimizes the k-fold cross validation loss + """ + # if weights are none assume that all samples have equal weights + if weights is None: + weights = np.ones((y.shape[0], 1)) + # concatenate since we need to keep x, y, weight samples together + x_y_w = np.concatenate([x, y, weights], axis=1) + self.rng.shuffle(x_y_w) + # generate k chunks + chunks = np.array_split(x_y_w, k, axis=0) + errors = np.zeros((len(lambdas_),)) + # for each possible lambda value perform k-fold cross validation + # ie. train model on k-1 chunks and evaluate on one chunk (for all possible k combinations of heldout chunk) + for i, lambda_ in enumerate(lambdas_): + for test_chunk in range(k): + x_y_w_test = chunks[test_chunk] + # extract all chunks except for the current test chunk + x_y_w_train = np.concatenate(chunks[:test_chunk] + chunks[(test_chunk + 1) :], axis=0) # noqa: 203 + # undo the concatenation above + x_test = x_y_w_test[:, :-2] + y_test = x_y_w_test[:, -2] + w_test = x_y_w_test[:, -1] + x_train = x_y_w_train[:, :-2] + y_train = x_y_w_train[:, -2] + w_train = x_y_w_train[:, -1] + ols_lambda = OLSRegressionSolver() + ols_lambda.fit( + x_train, + y_train, + weights=w_train, + lambda_=lambda_, + fit_intercept=True, + regularize_intercept=False, + n_feat_ignore_reg=1, + ) + y_hat_lambda = ols_lambda.predict(x_test) + # error is the weighted sum of squares of the residual between the actual heldout y and the predicted y on the heldout set + errors[i] += np.sum( + w_test * ols_lambda.residuals(y_test, y_hat_lambda, loo=False, center=False) ** 2 + ) / np.sum(w_test) + # return lambda that minimizes the k-fold error + # np.argmin returns the first occurence if multiple minimum values + return lambdas_[np.argmin(errors)] + + def get_minimum_reporting_units(self, alpha: float) -> int: + # arbitrary, just enough to fit coefficients + return 10 + + def _estimate_epsilon(self, residuals: np.ndarray, aggregate_indicator: np.ndarray) -> np.ndarray: + """ + This function estimates the epsilon (contest level random effects) + """ + # the estimate for epsilon is the average of the residuals in each contest + epsilon_hat = (aggregate_indicator.T @ residuals) / aggregate_indicator.sum(axis=0).reshape(-1, 1) + # we can't estimate a contest level effect if only have 1 unit in that contest (since our residual can just) + # be made equal to zero by setting the random effect to that value + epsilon_hat[aggregate_indicator.sum(axis=0) < 2] = 0 + + return epsilon_hat + + def _estimate_delta( + self, residuals: np.ndarray, epsilon_hat: np.ndarray, aggregate_indicator: np.ndarray + ) -> np.ndarray: + """ + This function estimates delta (the final unit level residual of the model) + """ + # our estimate for delta is the difference between the residual and + # what can be explained by the contest level random effect + return (residuals - (aggregate_indicator @ epsilon_hat)).flatten() + + def _estimate_model_errors( + self, model: OLSRegressionSolver, x: np.ndarray, y: np.ndarray, aggregate_indicator: np.ndarray + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + This function estimates all components of the error in our bootstrap model + residual: the centered leave one out residual, ie the difference between our OLS prediction and our actual training values + this includes the component of the error that we want to explain using a contest level random effect + epsilon_hat: our estimate for the contest level effect in our model + (e.g. how much did each state contribute) + deta_hat: the unit level error (ie. the difference between the OLS residual and the contest level state effect) + """ + # get unit level predictions from OLS + y_pred = model.predict(x) + # compute residuals + residuals_y = model.residuals(y, y_pred, loo=True, center=True) + # estimate contest level effect (the average residual in units of a contest) + epsilon_y_hat = self._estimate_epsilon(residuals_y, aggregate_indicator) + # compute delta, which is the left over residual after removing the contest level effect + delta_y_hat = self._estimate_delta(residuals_y, epsilon_y_hat, aggregate_indicator) + return residuals_y, epsilon_y_hat, delta_y_hat + + def _estimate_strata_dist( + self, + x_train: np.ndarray, + x_train_strata: np.ndarray, + x_test: np.ndarray, + x_test_strata: np.ndarray, + delta_hat: np.ndarray, + lb: float, + ub: float, + ) -> tuple[dict, dict]: + """ + This function generates the distribution (ie. CDF/PPF) for the strata in which we want to exchange + bootstrap errors in this model. + """ + stratum_ppfs_delta = {} + stratum_cdfs_delta = {} + + def ppf_creator(betas: np.ndarray, taus: np.ndarray, lb: float, ub: float) -> float: + """ + Creates a probability point function (inverse of a cumulative distribution function -- CDF) + Given a percentile, provides the value of the CDF at that point + """ + # we interpolate, because we want to return smooth betas + return lambda p: np.interp(p, taus, betas, lb, ub) + + def cdf_creator(betas: np.ndarray, taus: np.ndarray) -> float: + """ + Creates a cumulative distribution function (CDF) + Provides the probability that a value is at most x + """ + # interpolates because we want to provide smooth probabilites + return lambda x: np.interp(x, betas, taus, right=1) + + # we need the unique strata that exist in both the training and in the holdout data + # since we want a distribution for all strata + x_strata = np.unique(np.concatenate([x_train_strata, x_test_strata], axis=0), axis=0).astype(int) + + # We compute the probability distribution for each stratum by fitting quantile regressions + # this works because the regression covariates are only dummy variables that define + # the strata. Therefore the i-th coefficient for a quantile regression at level tau is the + # tau-th delta for where dummy == 1 + # ie. if tau is 0.5 and there are two unique dummies (x = [[0, 1], [1, 0], ...]), then + # the first coefficient is the median of all deltas where x = [1, 0] and the second coefficient + # is the median of all deltas where x = [0, 1] + # since the covariates define the strata, we get the tau-th (e.g. median or 30th percentile) + # delta per strata as the beta for that regression, which defines a probability distribution. + + # for each stratum we want to add a worst case lower bound (for the taus between 0-0.49) + # and upper bound (for the taus between 0.5-1) so that if we sample a uniform from a different + # stratum that is larger/smaller than any one we have seen in this strata, we have a worst case + # value that is larger/smaller than what we saw in that strata. We do this by simply adding the + # lower/upper bound to the regression, one pair for each stratum. + # This lower/upper bound is set manually, note that if we observe a value that is more extreme than + # the lower/upper bound we are fine, since the quantile regression will use that instead + for x_stratum in x_strata: + x_train_aug = np.concatenate([x_train_strata, x_stratum.reshape(1, -1)], axis=0) + + delta_aug_lb = np.concatenate([delta_hat, [lb]]) + delta_aug_ub = np.concatenate([delta_hat, [ub]]) + + # fit the regressions to create the probability distributions + # for a single regression beta[i] is the tau-th (e.g. median or 30th percentile) + # for where dummy variable position i is equal to 1 + # since we are fitting many quantile regressions at the same time, our beta is + # beta[tau, i] where tau stretches from 0.01 to 0.99 + qr_lower = QuantileRegressionSolver() + qr_lower.fit(x_train_aug, delta_aug_lb, self.taus_lower, fit_intercept=False) + betas_lower = qr_lower.coefficients + + qr_upper = QuantileRegressionSolver() + qr_upper.fit(x_train_aug, delta_aug_ub, self.taus_upper, fit_intercept=False) + betas_upper = qr_upper.coefficients + + betas = np.concatenate([betas_lower, betas_upper]) + + # for each strata, we take the betas that belong to that stratum + # ie. for stratum [0, 1, 0] we take the betas (there is one for each tau between 0.01, 0.99]) + # at position 1 (0-indexed) + + # get all the betas for where x_stratum has a 1 (ie [1, 0, 0] position 0, [0, 1, 0] position 1 etc.) + betas_stratum = betas[:, np.where(x_stratum == 1)[0]].sum(axis=1) + + # for this stratum value create ppf + # we want the lower bounds and upper bounds to be the actual lower and upper values taken from beta + stratum_ppfs_delta[tuple(x_stratum)] = ppf_creator( + betas_stratum, self.taus, np.min(betas_stratum), np.max(betas_stratum) + ) + + # for this stratum value create cdf + stratum_cdfs_delta[tuple(x_stratum)] = cdf_creator(betas_stratum, self.taus) + + return stratum_ppfs_delta, stratum_cdfs_delta + + # TODO: figure out how to better estimate margin_upper/lower_bound + def _generate_nonreporting_bounds( + self, nonreporting_units: pd.DataFrame, bootstrap_estimand: str, n_bins: int = 10 + ) -> tuple[np.ndarray, np.ndarray]: + """ + This function creates upper and lower bounds for y and z based on the expected vote + that we have for each unit. This is used to clip our predictions + """ + + # turn expected for nonreporting units into decimal (also clip at 100) + nonreporting_expected_vote_frac = nonreporting_units.percent_expected_vote.values.clip(max=100) / 100 + if bootstrap_estimand == "results_normalized_margin": + unobserved_upper_bound = self.y_unobserved_upper_bound + unobserved_lower_bound = self.y_unobserved_lower_bound + # the upper bound for LHS party if all the outstanding vote go in their favour + upper_bound = ( + nonreporting_expected_vote_frac * nonreporting_units[bootstrap_estimand] + + (1 - nonreporting_expected_vote_frac) * unobserved_upper_bound + ) + # the lower bound for the LHS party if all the outstanding vote go against them + lower_bound = ( + nonreporting_expected_vote_frac * nonreporting_units[bootstrap_estimand] + + (1 - nonreporting_expected_vote_frac) * unobserved_lower_bound + ) + elif bootstrap_estimand == "turnout_factor": + # our error bound for how much error we think our results provider has with expected vote + # e.g. 0.7 of the vote is in for a county, if percent_expected_vote_error_bound is 0.1 + # then we are saying that we believe between 0.6 and 0.8 of the vote is in for that county + percent_expected_vote_error_bound = self.percent_expected_vote_error_bound + unobserved_upper_bound = self.z_unobserved_upper_bound + unobserved_lower_bound = self.z_unobserved_lower_bound + # inflate or deflate turnout factor appropriately + lower_bound = nonreporting_units[bootstrap_estimand] / ( + nonreporting_expected_vote_frac + percent_expected_vote_error_bound + ) + upper_bound = nonreporting_units[bootstrap_estimand] / ( + nonreporting_expected_vote_frac - percent_expected_vote_error_bound + ).clip(min=0.01) + # if 0 percent of the vote is in, the upper bound would be zero if we used the above + # code. So instead we set it to the naive bound + upper_bound[np.isclose(upper_bound, 0)] = unobserved_upper_bound + + # if percent reporting is 0 or 1, don't try to compute anything and revert to naive bounds + lower_bound[ + np.isclose(nonreporting_expected_vote_frac, 0) | np.isclose(nonreporting_expected_vote_frac, 1) + ] = unobserved_lower_bound + upper_bound[ + np.isclose(nonreporting_expected_vote_frac, 0) | np.isclose(nonreporting_expected_vote_frac, 1) + ] = unobserved_upper_bound + + return lower_bound.values.reshape(-1, 1), upper_bound.values.reshape(-1, 1) + + def _strata_pit( + self, + x_train_strata: pd.DataFrame, + x_train_strata_unique: np.ndarray, + delta_hat: np.ndarray, + stratum_cdfs_delta: dict, + ) -> np.ndarray: + """ + Apply the probability integral transform for each strata + """ + # We convert the deltas in the training data to their percentiles in uniform space + unifs = [] + # for each strata, apply the CDF to the residual to turn the residual into perecntil + for strata_dummies in x_train_strata_unique: + # grab all deltas that belong to strata defined by strata_dummies + delta_strata = delta_hat[np.where((strata_dummies == x_train_strata).all(axis=1))[0]] + + # plug the deltas into their CDF to get uniform random variables (probability integral transform) + # 1e-6 is added to solve with numerical issues + unifs_strata = stratum_cdfs_delta[tuple(strata_dummies)](delta_strata + 1e-6).reshape(-1, 1) + + # if the uniform is close to 1/0 set percentile to 0.01/0.99 also for numerical issues + unifs_strata[np.isclose(unifs_strata, 1)] = np.max(self.taus) + unifs_strata[np.isclose(unifs_strata, 0)] = np.min(self.taus) + + unifs.append(unifs_strata) + return np.concatenate(unifs).reshape(-1, 1) + + def _bootstrap_deltas( + self, + unifs: np.ndarray, + x_train_strata: pd.DataFrame, + x_train_strata_unique: np.ndarray, + stratum_ppfs_delta_y: dict, + stratum_ppfs_delta_z: dict, + ) -> tuple[np.ndarray, np.ndarray]: + """ + Bootstrap deltas (unit level errors of our entire model) this is done by re-sampling from uniform random variables + """ + n_train = unifs.shape[0] + + # re-sample uniform random variables + # we are re-sampling over all uniforms (not just per strata), which gives us more randomness + # but we can guarantee that the deltas per strata will be correct because when we convert + # the uniforms back, we use the distribution of the stratum that the training point came from. + # also note unifs is of size: (n_train, self.B, 2) so we are re-sampling the deltas + # for y and z jointly + unifs_B = self.rng.choice(unifs, (n_train, self.B), replace=True) + + delta_y_B = np.zeros((n_train, self.B)) + delta_z_B = np.zeros((n_train, self.B)) + + # convert percentile (uniforms) back into residuals + # we need to do this separately for each strata because there is a different + # inverse function per stratum + for strata_dummies in x_train_strata_unique: + # grab all training indices where units belong to strata strata_dummies + strata_indices = np.where((strata_dummies == x_train_strata).all(axis=1))[0] + # get their corresponding uniform random variables + unifs_strata = unifs_B[strata_indices] + # convert back to deltas. unifs_stratas last dimension defines either y or z + delta_y_B[strata_indices] = stratum_ppfs_delta_y[tuple(strata_dummies)](unifs_strata[:, :, 0]) + delta_z_B[strata_indices] = stratum_ppfs_delta_z[tuple(strata_dummies)](unifs_strata[:, :, 1]) + return delta_y_B, delta_z_B + + # TODO: what if hat_epsilon_y, hat_epsilon_z are correlated? + def _bootstrap_epsilons( + self, + epsilon_y_hat: np.ndarray, + epsilon_z_hat: np.ndarray, + x_train_strata: pd.DataFrame, + x_train_strata_unique: np.ndarray, + stratum_ppfs_delta_y: dict, + stratum_ppfs_delta_z: dict, + aggregate_indicator_train: np.ndarray, + ) -> tuple[np.ndarray.np.ndarray]: + """ + Bootstrap epsilons (contest level random effects) using the parametric bootstrap + + In a random effects model we assume that the contest level random effects are drawn from a normal distribution with mean zero and variance Sigma + epsilon_y, epsilon_z ~ N(0, Sigma) + and for now we assume Sigma is diagional (e.g. contest level random effects between normalized margin and turnout are uncorrelated) + """ + # We first fit the parameters of the multivariate normal distribution that we are assuming epsilon comes + # from and then we sample from it B times to generate bootstrapped contest level random effects. + + # The mean of our normal distribution is epsilon_hat (our best guess for contest level effects) + # we do this to maintain the sign of the contest level effect + + # We now need to estimate Sigma, the covariance matrix of our contest level effects. + # We assume that the contest level random effects between contests is uncorrelated so the diagonals of \Sigma are zero. + # To estimate the variance (diagonals) we use the sum of the variances of the units within a contest. + # This is because residuals = epsilons + delta, if we have observed units in a contest then + # the true epsilon is no longer a random quantity and no residual_{i, j} ~ N(epsilon_{i}, \sigma_{j}) (for i in contest and j in strata) + # which means that var(residuals) = var(delta). So that epsilon_hat = mean(residuals) + # so that var(epsilon_hat) = var(mean(residuals)) = mean(var(residuals)) = mean(var(delta)) + + # The variance of deltas is defined by their probability distribution (it's the same per stratum + # since we assume that deltas are iid within a stratum). So we use the distribution to compute + # the variance of the delta. We use interquartile-range (IQR) since this is more robust than + # sample standard deviation + + # we first compute the unit variance estimate for each stratum (since that defines the variance per unit) + iqr_y_strata = {} + iqr_z_strata = {} + for x_stratum in x_train_strata_unique: + x_stratum_delta_y_ppf = stratum_ppfs_delta_y[tuple(x_stratum)] + iqr_y = x_stratum_delta_y_ppf(0.75) - x_stratum_delta_y_ppf(0.25) + iqr_y_strata[tuple(x_stratum)] = iqr_y + + x_stratum_delta_z_ppf = stratum_ppfs_delta_z[tuple(x_stratum)] + iqr_z = x_stratum_delta_z_ppf(0.75) - x_stratum_delta_z_ppf(0.25) + iqr_z_strata[tuple(x_stratum)] = iqr_z + + # set variance per contest to be zero + var_epsilon_y = np.zeros((aggregate_indicator_train.shape[1],)) + var_epsilon_z = np.zeros((aggregate_indicator_train.shape[1],)) + + # we are now adding the unit variances to create the contest level variances + for strata_dummies in x_train_strata_unique: + # grab the indices of the units per state that within strata defined by strata_dummies + strata_indices = np.where((strata_dummies == x_train_strata).all(axis=1))[0] + # add variances + # we square because IQR approximates standard deviation + var_epsilon_y += ( + aggregate_indicator_train[strata_indices] * (iqr_y_strata[tuple(strata_dummies)] ** 2) + ).sum(axis=0) + var_epsilon_z += ( + aggregate_indicator_train[strata_indices] * (iqr_z_strata[tuple(strata_dummies)] ** 2) + ).sum(axis=0) + + # IQR constant for a normal random variable + iqr_scale = 1.349 + var_epsilon_y /= iqr_scale**2 + var_epsilon_z /= iqr_scale**2 + var_epsilon_y /= aggregate_indicator_train.sum(axis=0) + var_epsilon_z /= aggregate_indicator_train.sum(axis=0) + + # if we only have 1 unit in a contest we define the variance to be zero + var_epsilon_y[aggregate_indicator_train.sum(axis=0) < 2] = 0 + var_epsilon_z[aggregate_indicator_train.sum(axis=0) < 2] = 0 + + # sample B new epsilons + epsilon_y_B = self.rng.multivariate_normal( + mean=epsilon_y_hat.flatten(), cov=np.diag(var_epsilon_y), size=self.B + ).T + epsilon_z_B = self.rng.multivariate_normal( + mean=epsilon_z_hat.flatten(), cov=np.diag(var_epsilon_z), size=self.B + ).T + return epsilon_y_B, epsilon_z_B + + def _bootstrap_errors( + self, + epsilon_y_hat: np.ndarray, + epsilon_z_hat: np.ndarray, + delta_y_hat: np.ndarray, + delta_z_hat: np.ndarray, + x_train_strata: pd.DataFrame, + stratum_cdfs_y: dict, + stratum_cdfs_z: dict, + stratum_ppfs_delta_y: dict, + stratum_ppfs_delta_z: dict, + aggregate_indicator_train: np.ndarray, + ) -> tuple[tuple[np.ndarray, np.ndarray], tuple[np.ndarray, np.ndarray]]: + """ + Bootstrap the errors of our model (epsilon and delta) + """ + # get unique strata that appea in the training data + x_train_strata_unique = np.unique(x_train_strata, axis=0).astype(int) + + # bootstrap the epsilons. Uses the parametric bootstrap + epsilon_y_B, epsilon_z_B = self._bootstrap_epsilons( + epsilon_y_hat, + epsilon_z_hat, + x_train_strata, + x_train_strata_unique, + stratum_ppfs_delta_y, + stratum_ppfs_delta_z, + aggregate_indicator_train, + ) + + # turn deltas into percentiles (uniforms) so that we can re-sample those + unifs_y = self._strata_pit(x_train_strata, x_train_strata_unique, delta_y_hat, stratum_cdfs_y) + unifs_z = self._strata_pit(x_train_strata, x_train_strata_unique, delta_z_hat, stratum_cdfs_z) + unifs = np.concatenate([unifs_y, unifs_z], axis=1) + + # re-sample uniforms and apply the inverse CDF (PPF) to convert back to re-sampled residuals + delta_y_B, delta_z_B = self._bootstrap_deltas( + unifs, x_train_strata, x_train_strata_unique, stratum_ppfs_delta_y, stratum_ppfs_delta_z + ) + + return (epsilon_y_B, epsilon_z_B), (delta_y_B, delta_z_B) + + def _sample_test_delta( + self, x_test_strata: pd.DataFrame, stratum_ppfs_delta_y: dict, stratum_ppfs_delta_z: dict + ) -> tuple[np.ndarray, np.ndarray]: + """ + This function generates new test deltas (unit level errors) + """ + # we previously estimated the distribution of the deltas per stratum + # we can now generate a new set of uniform random variables (percentiles) + # and then convert them to deltas using the deltas PPFs. We just need + # to make sure that we use the correct distribution defined by the strata + + n_test = x_test_strata.shape[0] + + # sample a new set of uniforms + test_unifs = self.rng.uniform(low=0, high=1, size=(n_test, self.B, 2)) + + test_delta_y = np.zeros((n_test, self.B)) + test_delta_z = np.zeros((n_test, self.B)) + + x_test_strata_unique = np.unique(x_test_strata, axis=0).astype(int) + for strata_dummies in x_test_strata_unique: + # get indices of the nonreporting data that are in strata strata_dummies + strata_indices = np.where((strata_dummies == x_test_strata).all(axis=1))[0] + # get their corresponding newly sampled uniforms + unifs_strata = test_unifs[strata_indices] + # use PPF to generate new deltas from the new uniforms + test_delta_y[strata_indices] = stratum_ppfs_delta_y[tuple(strata_dummies)](unifs_strata[:, :, 0]) + test_delta_z[strata_indices] = stratum_ppfs_delta_z[tuple(strata_dummies)](unifs_strata[:, :, 1]) + + return test_delta_y, test_delta_z + + def _sample_test_epsilon( + self, + residuals_y: np.ndarray, + residuals_z: np.ndarray, + epsilon_y_hat: np.ndarray, + epsilon_z_hat: np.ndarray, + aggregate_indicator_train: np.ndarray, + aggregate_indicator_test: np.ndarray, + ) -> tuple[np.ndarray, np.ndarray]: + """ + This function generates new test epsilons (contest level random effects) + NOTE: for now we are sampling from a normal with mean zero and sample variance. + """ + # the contests that need a sampled contest level random effect are those that still have outstanding + # units (since for the others, the contest level random effect is a known fixed quantity) but no + # current estimate for the epsilon in that contest (so ones with NO training examples) + + # this gives us indices of contests for which we have no samples in the training set + contests_not_in_reporting_units = np.where(np.all(aggregate_indicator_train == 0, axis=0))[0] + # gives us contests for which there is at least one county not reporting + contests_in_nonreporting_units = np.where(np.any(aggregate_indicator_test > 0, axis=0))[0] + # contests that need a random effect are: + # contests that we want to make predictions on (ie. there are still outstanding units) + # BUT no units in that contest are reporting + contests_that_need_random_effect = np.intersect1d( + contests_not_in_reporting_units, contests_in_nonreporting_units + ) + + # \epsilon ~ N(0, \Sigma) + mu_hat = [0, 0] + + # the variance is the sample variance of epsilons for which we have an estimate + non_zero_epsilon_indices = np.nonzero(epsilon_y_hat)[0] + # if there is only one non zero contest OR if the contest is a state level election only + # then just sample from nearly zero since no variance + if non_zero_epsilon_indices.shape[0] == 1: + return np.zeros((1, self.B)), np.zeros((1, self.B)) + else: + sigma_hat = np.cov(np.concatenate([epsilon_y_hat, epsilon_z_hat], axis=1)[np.nonzero(epsilon_y_hat)[0]].T) + + # sample new test epsilons, but only for states that need tem + test_epsilon = self.rng.multivariate_normal( + mu_hat, sigma_hat, size=(len(contests_that_need_random_effect), self.B) + ) + + test_epsilon_y = aggregate_indicator_test[:, contests_that_need_random_effect] @ test_epsilon[:, :, 0] + test_epsilon_z = aggregate_indicator_test[:, contests_that_need_random_effect] @ test_epsilon[:, :, 1] + return test_epsilon_y, test_epsilon_z + + def _sample_test_errors( + self, + residuals_y: np.ndarray, + residuals_z: np.ndarray, + epsilon_y_hat: np.ndarray, + epsilon_z_hat: np.ndarray, + x_test_strata: pd.DataFrame, + stratum_ppfs_delta_y: dict, + stratum_ppfs_delta_z: dict, + aggregate_indicator_train: np.ndarray, + aggregate_indicator_test: np.ndarray, + ) -> tuple[np.ndarray, np.ndarray]: + """ + This function samples new test errors for our model (ie. new test residuals) + """ + # we generate new test residuals by generating new test epsilons and new test deltas and then adding them together + test_epsilon_y, test_epsilon_z = self._sample_test_epsilon( + residuals_y, residuals_z, epsilon_y_hat, epsilon_z_hat, aggregate_indicator_train, aggregate_indicator_test + ) + test_delta_y, test_delta_z = self._sample_test_delta(x_test_strata, stratum_ppfs_delta_y, stratum_ppfs_delta_z) + test_error_y = test_epsilon_y + test_delta_y + test_error_z = test_epsilon_z + test_delta_z + return test_error_y, test_error_z + + # TODO: potentially generalize binning features for strata + def _get_strata( + self, reporting_units: pd.DataFrame, nonreporting_units: pd.DataFrame + ) -> tuple[pd.DataFrame, pd.DataFrame]: + """ + Gets strata for stratified bootstrap sampling + """ + n_train = reporting_units.shape[0] + # we can use the featurizer, since strata are defined by dummy variables in the same way that + # fixed effects are (ie. rural could be (1, 0, 0) while urban could be (0, 1, 0) while suburban could be (0, 0, 1)) + # but like with fixed effects we drop one strata category and use the intercept instead so the example would be + # rural: 0, 0 urban: 1, 0 and rural: 0, 1 + strata_featurizer = Featurizer([], self.strata) + all_units = pd.concat([reporting_units, nonreporting_units], axis=0) + + strata_all = strata_featurizer.prepare_data( + all_units, center_features=False, scale_features=False, add_intercept=self.add_intercept + ) + x_train_strata = strata_all[:n_train] + x_test_strata = strata_all[n_train:] + return x_train_strata, x_test_strata + + def compute_bootstrap_errors( + self, reporting_units: pd.DataFrame, nonreporting_units: pd.DataFrame, unexpected_units: pd.DataFrame + ): + """ + Computes unit level point predictions and runs the bootstrap to generate quantities needed for prediction intervals. + + The bootstrap generally re-samples the observed data with replacement in order to generate synthentic "bootstrap" datasets, + which can be used to estimate the sampling distribution of a quantity that we are interested in. + Our implementation is the stratified residual bootstrap. The residual bootstrap samples residuals of a model (instead of the original dataset) + this is preferable in a regression setting because it removes the the component of the observation that is not random. + We use the stratified bootstrap because our units are not independent and identically distributed, which means that we cannot assign + the error of any unit to any other unit (e.g. the residual for an urban unit would likely not fit for a rural unit). For now, this model + stratifies on county classification (rural/urban/suburban). + + Generally we are interested in predicting functions of: + w * y * z = weights * normalized_margin * turnout_factor = unnormalized_margin + + There are three cases: + 1) In the unit case we are interested in the unnormalized margin: + w_i * y_i * z_i + 2) In the aggregate (e.g. state aggregation) case we are interested in the normalized sum of the unnormalized margin of units + (sum_{i = 1}^N w_i * y_i * z_i) / (sum_{i = 1}^N w_i * z_i) + 3) In the national case we are interested in an interval over the sum of electoral votes generated by the predictions + sum_{s = 1}^{51} sigmoid{sum_{i = 1}^{N_s} w_i * y_i * z_i} * ev_s + + Our point prediction for each is: + 1) w_i * hat{y_i} * hat{z_i} + 2) (sum_{i = 1}^N w_i * hat{y_i} * hat_{z_i}) / (sum_{i = 1}^N w_i hat{z_i}) + 3) sum_{s = 1}^{51} sigmoid{sum_{i=1}^{N_s} w_i * hat{y_i} * hat{z_i}} * ev_s + remember that our model for y_i, z_i is OLS plus a contest random effect: + y_i = f_y(x) + epsilon_y(x) + z_i = f_z(x) + epsilon_z(x) + this means that: + hat{y_i} = hat{f_y(x)} + hat{epsilon_y(x)} + hat{z_i} = hat{f_z(x)} + hat{epsilon_z(x)} + + As point predictions, this function only computes: + 1) w_i * hat{y_i} * hat{z_i} + 2) w_i * hat{z_i} + which are then used in the respective prediction functions + + We are also interested in generating prediction intervals for the quantities, we do that by bootstrapping the error in our predictions + and then taking the appropriate percentiles of those errors. The errors we are interested in are between the true quantity and + our prediction: + + There are three cases that mirror the cases above (error between prediction and true quantity) + 1) w_i * hat{y_i} * hat{z_i} - w_i * y_i * z_i + 2) (sum_{i = 1}^N w_i * hat{y_i} * hat{z_i}) / (sum_{i = 1}^n w_i hat{z_i}) - (sum_{i = 1}^N w_i * y_i * z_i) / (sum_{i = 1}^n w_i z_i) + 3) sum_{s = 1}^{51} sigmoid{sum_{i=1}^{N_s} w_i * hat{y_i} * hat{z_i}} * ev_s - sum_{s = 1}^{51} sigmoid{sum_{i=1}^{N_s} w_i * y_i * z_i} * ev_s + + In order to keep this model as flexible as possible for all potential cases, this function generates bootstrap estimates for + 1) w_i * hat{y_i} * hat{z_i} + 2) w_i * y_i * z_i + 3) w_i * hat{z_i} + 4) w_i * z_i + and store those so that we can later compute prediction interevals for any function of these quantities in their respective functions. + + In a normal setting the bootstrap works by assuming that our fitted predictions (e.g. w_i * hat{y_i} * hat{z_i}) is now the true + value. Then using the bootstrap to generate synthentic samples (e.g. w_i hat{y_i}^b * hat{z_i}^b) and computing the error + between the two. This would give us a confidence interval (ie. the error between a quantity and it's mean), but we are interested + in a prediction interval, which means we also need to take into account the additional uncertainty in sampling new y_i and z_i + + This means that our new "true" quantity (the equivalent of w_i * y_i * z_i) needs a new fresh sampled uncertainty, so we sample + new test errors + residuals_{y, i}^{b}, residuals_{z, i}^{b} + in order to compute: + hat{y_i} + residuals_{y, i}^{b} + hat{z_i} + residuals_{z, i}^{b} + so that: + w_i * y_i * z_i --> w_i * (hat{y_i} + residuals_{y, i}^{b}) * (hat{z_i} + residuals_{z, i}^{b}) + + We also need new "estimated" quantities (the equivalent of w_i * hat{y_i} * hat{z_i}), these are the outcome + of the stratified residual bootstrap: + w_i * hat{y_i} * hat{z_i} --> w_i * tilde{y_i}^{b} * tilde{z_i}^{b} + + For completeness, we also generate estimates for the other two quantities: + w_i * z_i -> w_i * (hat{z_i} + epsilon_{z, i}^{b}) + w_i * hat{z_i} -> w_i * tilde{z_i}^{b} + """ + # prepare data (generate fixed effects, add intercept etc.) + all_units = pd.concat([reporting_units, nonreporting_units, unexpected_units], axis=0) + x_all = self.featurizer.prepare_data( + all_units, center_features=False, scale_features=False, add_intercept=self.add_intercept + ) + n_train = reporting_units.shape[0] + n_test = nonreporting_units.shape[0] + + x_train_df = self.featurizer.filter_to_active_features(x_all[:n_train]) + x_train = x_train_df.values + y_train = reporting_units["results_normalized_margin"].values.reshape(-1, 1) + z_train = reporting_units["turnout_factor"].values.reshape(-1, 1) + weights_train = reporting_units["baseline_weights"].values.reshape(-1, 1) + + x_test_df = self.featurizer.generate_holdout_data(x_all[n_train : (n_train + n_test)]) # noqa: 203 + x_test = x_test_df.values + weights_test = nonreporting_units["baseline_weights"].values.reshape(-1, 1) + + # Create a matrix of size (n_contests, n_total_units) which acts as a crosswalk + # between unit and contest (ie. a 1 in i,j says that unit j belongs to contest i) + # in case district election we need to create a variable that defines the state, district + # which is what the contest is + if self.district_election: + all_units["postal_code-district"] = all_units[["postal_code", "district"]].agg("_".join, axis=1) + aggregate_indicator = pd.get_dummies(all_units["postal_code-district"]).values + else: + aggregate_indicator = pd.get_dummies(all_units["postal_code"]).values + + aggregate_indicator_expected = aggregate_indicator[: (n_train + n_test)] + aggregate_indicator_train = aggregate_indicator_expected[:n_train] + aggregate_indicator_test = aggregate_indicator_expected[n_train:] + + # we compute bounds for normalized margin and turnout factor based on our results providers current estimate for expected vote + # ie. if 95% of the votes of a unit are in, what is the max/min the normalized_margin and turnout factor could still reach? + y_partial_reporting_lower, y_partial_reporting_upper = self._generate_nonreporting_bounds( + nonreporting_units, "results_normalized_margin" + ) + z_partial_reporting_lower, z_partial_reporting_upper = self._generate_nonreporting_bounds( + nonreporting_units, "turnout_factor" + ) + + # we use k-fold cross validation to find the optimal lambda for our OLS regression + optimal_lambda_y = self.cv_lambda(x_train, y_train, np.logspace(-3, 2, 20), weights=weights_train) + optimal_lambda_z = self.cv_lambda(x_train, z_train, np.logspace(-3, 2, 20), weights=weights_train) + + # step 1) fit the initial model + # we don't want to regularize the intercept or the coefficient for baseline_normalized_margin + ols_y = OLSRegressionSolver() + ols_y.fit( + x_train, + y_train, + weights=weights_train, + lambda_=optimal_lambda_y, + fit_intercept=True, + regularize_intercept=False, + n_feat_ignore_reg=1, + ) + ols_z = OLSRegressionSolver() + ols_z.fit( + x_train, + z_train, + weights=weights_train, + lambda_=optimal_lambda_z, + fit_intercept=True, + regularize_intercept=False, + n_feat_ignore_reg=1, + ) + + # step 2) calculate the fitted values + y_train_pred = ols_y.predict(x_train) + z_train_pred = ols_z.predict(x_train) + + # step 3) calculate residuals + # residuals are the residuals from OLS (ie. the amount of error that our regression does not explain) + # they have been inflated by (1 - hat_matrix) to be leave-one-out residuals so they are an estimate of + # the test residuals + # epsilon is the contest level effect (it is estiamted as the average error in OLS over all the units in a contest) + # delta is the unit level error that is unaccounted for by the contest level effect + residuals_y, epsilon_y_hat, delta_y_hat = self._estimate_model_errors( + ols_y, x_train, y_train, aggregate_indicator_train + ) + residuals_z, epsilon_z_hat, delta_z_hat = self._estimate_model_errors( + ols_z, x_train, z_train, aggregate_indicator_train + ) + + # As part of the residual bootstrap we now need to generate B synthetic versions of residuals_y and residuals_z + # residuals are broken into an epsilon (contest level error) and delta (unit level error component). This means + # in order to generate bootstrapped residuals we need to bootstrap samples for epsilon and for delta. + # We bootstrap epsilons using the parametric bootstrap. Our procedure for bootstrapping the deltas is more complicated. + + # The stratified bootstrap assumes that the final model residuals (delta) are independent and identically distributed per + # strata. So we now need to generate new bootstrapped deltas for each unit in each strata. + + # Instead of sampling the deltas directly with replacement we generate a probability distributionsfor each strata. + # We convert the deltas into uniform random variables using the distributions CDF (probability integral transform), + # we then re-sample the uniforms with replacement and then convert the uniform random variables back into deltas + # using the PPF of the distribution (inverse CDF) + # We do this because we have two deltas to sample (delta_z and delta_y) by sampling the observed CDFs we can maintain the correlation + # between the y and z deltas + # E.g. take a rural delta (delta_y, delta_z), evaluate CDF -> get percentile for each residual (0.75, 0.85) + # If you do this for every pair of points, you will notice that these two percentiles are correlated (a big y error and a big z error co-occur) + # This approach using uniform random variables allows us to smooth over the distribution in cases where we have only seen very few obserations + # per strata. We can also impose a worst/best case scenario by adding an additional datapoint for each stratum when generating the distribution. + + # we only want re-sample deltas in each strata (ie. rural counties should only receive errors from rural counties, same for suburban and urban) + # this means that we need to generate error distributions conditional on each strata value (ie. conditional on urban, rural and suburban) + # to do this, we first need to get the strata + # x_train_strata is an array where each row defines the strata for that training unit (e.g. 00, 01, 10) + # x_test_strata is equivalent + x_train_strata, x_test_strata = self._get_strata(reporting_units, nonreporting_units) + + # we then compute the probability distribution (CDF/PPF) for the deltas given each strata, this will allow us to move from the residual space + # to the percentile space [0, 1] and back again after re-sampling + stratum_ppfs_delta_y, stratum_cdfs_delta_y = self._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_y_hat, self.y_LB, self.y_UB + ) + stratum_ppfs_delta_z, stratum_cdfs_delta_z = self._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_z_hat, self.z_LB, self.z_UB + ) + + # step 4) bootstrap resampling + # step 4a) we resample B new epsilons and deltas + epsilon_B, delta_B = self._bootstrap_errors( + epsilon_y_hat, + epsilon_z_hat, + delta_y_hat, + delta_z_hat, + x_train_strata, + stratum_cdfs_delta_y, + stratum_cdfs_delta_z, + stratum_ppfs_delta_y, + stratum_ppfs_delta_z, + aggregate_indicator_train, + ) + epsilon_y_B, epsilon_z_B = epsilon_B + delta_y_B, delta_z_B = delta_B + + # step 4b) create our bootrapped dataset by adding the bootstrapped errors (epsilon + delta) to our fitted values + y_train_B = y_train_pred + (aggregate_indicator_train @ epsilon_y_B) + delta_y_B + z_train_B = z_train_pred + (aggregate_indicator_train @ epsilon_z_B) + delta_z_B + + # step 5) refit the model + # we need to generate bootstrapped test predictions and to do that we need to fit a bootstrap model + # we are using the normal equations from the original model since x_train has stayed the same and the normal + # equations are only dependent on x_train. This saves compute. + ols_y_B = OLSRegressionSolver() + ols_y_B.fit( + x_train, + y_train_B, + weights_train, + normal_eqs=ols_y.normal_eqs, + fit_intercept=True, + regularize_intercept=False, + n_feat_ignore_reg=1, + ) + ols_z_B = OLSRegressionSolver() + ols_z_B.fit( + x_train, + z_train_B, + weights_train, + normal_eqs=ols_z.normal_eqs, + fit_intercept=True, + regularize_intercept=False, + n_feat_ignore_reg=1, + ) + + LOG.info("orig. ols coefficients, normalized margin: \n %s", ols_y.coefficients.flatten()) + LOG.info("boot. ols coefficients, normalized margin: \n %s", ols_y_B.coefficients.mean(axis=-1)) + + # we cannot just apply ols_y_B/old_z_B to the test units because that would be missing our contest level random effect + # so we need to compute an bootstrapped estimate of the contest level random effect (epsilon) + # to do that we first compute the bootstrapped leave-one-out training residuals and then use that to + # estimate bootstrapped epsilons + y_train_pred_B = ols_y_B.predict(x_train) + z_train_pred_B = ols_z_B.predict(x_train) + residuals_y_B = ols_y_B.residuals(y_train_B, y_train_pred_B, loo=True, center=True) + residuals_z_B = ols_z_B.residuals(z_train_B, z_train_pred_B, loo=True, center=True) + epsilon_y_hat_B = self._estimate_epsilon(residuals_y_B, aggregate_indicator_train) + epsilon_z_hat_B = self._estimate_epsilon(residuals_z_B, aggregate_indicator_train) + + # We can then use our bootstrapped ols_y_B/ols_z_B and our bootstrapped contest level effect (epsilon) to make bootstrapped predictions + # on our non-reporting units + # This is \tilde{y_i}^{b} and \tilde{z_i}^{b} + y_test_pred_B = (ols_y_B.predict(x_test) + (aggregate_indicator_test @ epsilon_y_hat_B)).clip( + min=y_partial_reporting_lower, max=y_partial_reporting_upper + ) + z_test_pred_B = (ols_z_B.predict(x_test) + (aggregate_indicator_test @ epsilon_z_hat_B)).clip( + min=z_partial_reporting_lower, max=z_partial_reporting_upper + ) + + # \tilde{y_i}^{b} * \tilde{z_i}^{b} + yz_test_pred_B = y_test_pred_B * z_test_pred_B + + # In order to generate our point prediction, we also need to apply our non-bootstrapped model to the testset + # this is \hat{y_i} and \hat{z_i} + y_test_pred = (ols_y.predict(x_test) + (aggregate_indicator_test @ epsilon_y_hat)).clip( + min=y_partial_reporting_lower, max=y_partial_reporting_upper + ) + z_test_pred = (ols_z.predict(x_test) + (aggregate_indicator_test @ epsilon_z_hat)).clip( + min=z_partial_reporting_lower, max=z_partial_reporting_upper + ) + yz_test_pred = y_test_pred * z_test_pred + + # we now need to generate our bootstrapped "true" quantities (in order to subtract the bootstrapped estimates from these quantities to get an estimate for our error) + # In a normal bootstrap setting we would replace y and z with \hat{y} and \hat{z} + # however, we want to produce prediction intervals (rather than just confidence intervals) so we need to take into account the extra + # uncertainty that comes with prediction (ie. estimating the mean has some variance, but sampling from our mean estimate introduces an additional error) + # To do that we need an estimate for our prediction error (ie. an estimate of the residuals = epsilon + delta) + # we cannot use our original residuals_y, residuals_z because they would cancel out our error used in the bootstrap + test_residuals_y, test_residuals_z = self._sample_test_errors( + residuals_y, + residuals_z, + epsilon_y_hat, + epsilon_z_hat, + x_test_strata, + stratum_ppfs_delta_y, + stratum_ppfs_delta_z, + aggregate_indicator_train, + aggregate_indicator_test, + ) + + # multiply by weights to turn into unnormalized margin + # w_i * \hat{y_i} * \hat{z_i} + self.errors_B_1 = yz_test_pred_B * weights_test + + # This is (\hat{y_i} + \residuals_{y, i}^{b}) * (\hat{z_i} + \residuals_{z, i}^{b}) + # we clip them based on bounds we generated earlier. These are defined by the estimates amount of + # outstanding vote from our election results provider + errors_B_2 = (y_test_pred + test_residuals_y).clip(min=y_partial_reporting_lower, max=y_partial_reporting_upper) + errors_B_2 *= (z_test_pred + test_residuals_z).clip( + min=z_partial_reporting_lower, max=z_partial_reporting_upper + ) + + # multiply by weights turn into unnormalized margin + # this is w_i * (\hat{y_i} + \residuals_{y, i}^{b}) * (\hat{z_i} + \residuals_{z, i}^{b}) + self.errors_B_2 = errors_B_2 * weights_test + + # we also need errors for the denominator of the aggregate + # this is \tilde{z_i}^{b} + self.errors_B_3 = z_test_pred_B * weights_test # has already been clipped above + # this is (\hat{z_i} + \residuals_{z, i}^{b}) + self.errors_B_4 = (z_test_pred + test_residuals_z).clip( + min=z_partial_reporting_lower, max=z_partial_reporting_upper + ) * weights_test + + # this is for the unit point prediction. turn into unnormalized margin + self.weighted_yz_test_pred = yz_test_pred * weights_test + # and turn into turnout estimate + self.weighted_z_test_pred = z_test_pred * weights_test + self.ran_bootstrap = True + + def get_unit_predictions( + self, reporting_units: pd.DataFrame, nonreporting_units: pd.DataFrame, estimand: str, **kwargs + ) -> np.ndarray: + """ + Returns the unit predictions, if necessary also generates them + The unit predictions are the *unnormalized margin* + w_i * hat{y_i} * hat{z_i} + """ + # if bootstrap hasn't been run yet, run it + if not self.ran_bootstrap: + unexpected_units = kwargs["unexpected_units"] + self.compute_bootstrap_errors(reporting_units, nonreporting_units, unexpected_units) + return self.weighted_yz_test_pred + + def _is_top_level_aggregate(self, aggregate: list) -> bool: + """ + Function to figure out whether we are at the top level aggregation (ie. postal code for state level model or postal code, district for district model) + """ + # case 1: + # top level aggregate is postal code (ie. we are generating up to a state level -> ECV or Senate). We know this is the case + # because aggregate length is just 1 and postal code is the only aggregate + # case 2: + # top level aggregate is postal code and district (ie. we are generating up to a district level -> House or State Senate). + # We know this is the case because aggregate length is 2 and postal code and district are the two aggregates. + return (len(aggregate) == 1 and "postal_code" in aggregate) or ( + len(aggregate) == 2 and "postal_code" in aggregate and "district" in aggregate + ) + + def get_aggregate_predictions( + self, + reporting_units: pd.DataFrame, + nonreporting_units: pd.DataFrame, + unexpected_units: pd.DataFrame, + aggregate: list, + estimand: str, + ) -> pd.DataFrame: + """ + Generates and returns the normalized margin for arbitrary aggregates + sum_{i = 1}^N (w_i * hat{y_i} * hat{z_i}) / sum_{i = 1}^N (w_i * hat{z_i}) + """ + n_train = reporting_units.shape[0] + n_test = nonreporting_units.shape[0] + + all_units = pd.concat([reporting_units, nonreporting_units, unexpected_units], axis=0) + + # if we want to aggregate to something that isn't postal_code we need to generate a temporary + # column so that we create a dummary variable for each level of the aggregate + # aggreagate_1 * aggregate_2 rather than aggregate_1 + aggregate_2 which is what would happen otherwise + if len(aggregate) > 1: + aggregate_temp_column_name = "-".join(aggregate) + all_units[aggregate_temp_column_name] = all_units[aggregate].agg("_".join, axis=1) + aggregate_indicator = pd.get_dummies(all_units[aggregate_temp_column_name]).values + else: + aggregate_indicator = pd.get_dummies(all_units[aggregate]).values + aggregate_temp_column_name = aggregate + + # the unit level predictions that come in through reporting_units and nonreporting_units + # are unnormalized. Since we want the normalized margin for the aggregate predictions + # we need to divide the sum of unnormalized aggregates by the total turnout predictions + # so we first compute the total turnout predictions + + aggregate_indicator_expected = aggregate_indicator[: (n_train + n_test)] # noqa: 203 + aggregate_indicator_unexpected = aggregate_indicator[(n_train + n_test) :] # noqa: 203 + + # two party turnout + # results weights is unexpected_units["results_dem"] + unexpected_units["results_gop"] + turnout_unexpected = (unexpected_units["results_weights"]).values.reshape(-1, 1) + + aggregate_indicator_train = aggregate_indicator_expected[:n_train] + aggregate_indicator_test = aggregate_indicator_expected[n_train:] + weights_train = reporting_units["baseline_weights"].values.reshape(-1, 1) + z_train = reporting_units["turnout_factor"].values.reshape(-1, 1) + + # get turnout for aggregate (w_i * z_i) + aggregate_z_train = aggregate_indicator_train.T @ (weights_train * z_train) + aggregate_z_unexpected = aggregate_indicator_unexpected.T @ turnout_unexpected + + # total turnout predictions + aggregate_z_total = ( + aggregate_z_unexpected + aggregate_z_train + aggregate_indicator_test.T @ self.weighted_z_test_pred + ) + + # use get_aggregate_predictions from BaseElectionModel to sum unnormalized margin of all the units + raw_margin_df = super().get_aggregate_predictions( + reporting_units, nonreporting_units, unexpected_units, aggregate, estimand + ) + + # divide the unnormalized margin and results by the total turnout predictions to get the normalized margin for the aggregate + # turnot prediction could be zero, in which case predicted margin is also zero, so replace NaNs with zero in that case + raw_margin_df["pred_margin"] = np.nan_to_num(raw_margin_df.pred_margin / aggregate_z_total.flatten()) + raw_margin_df["results_margin"] = np.nan_to_num(raw_margin_df.results_margin / aggregate_z_total.flatten()) + # if we are in the top level prediction, then save the aggregated baseline margin, which we will need for the national + # summary (e.g. ecv) model + if self._is_top_level_aggregate(aggregate): + aggregate_sum = all_units.groupby(aggregate_temp_column_name).sum() + self.aggregate_baseline_margin = ( + (aggregate_sum.baseline_dem - aggregate_sum.baseline_gop) / (aggregate_sum.baseline_turnout + 1) + ).values + return raw_margin_df + + def _get_quantiles(self, alpha): + """ + Given a confidence level for the prediction interval, calculates the quantiles + necessary to be computed + """ + lower_alpha = (1 - alpha) / 2 + upper_alpha = 1 - lower_alpha + + # adjust percentiles to account for bootstrap + lower_q = np.floor(lower_alpha * (self.B + 1)) / self.B + upper_q = np.ceil(upper_alpha * (self.B - 1)) / self.B + + return lower_q, upper_q + + def get_unit_prediction_intervals( + self, reporting_units: pd.DataFrame, nonreporting_units: pd.DataFrame, alpha: float, estimand: str + ) -> PredictionIntervals: + """ + Generate and return unit level prediction intervals + + In the unit case, the error in our prediciton is: + w_i * hat{y_i} * hat{z_i} - w_i * y_i * z_i + In the bootstrap setting this has been estimated as: + w_i * tilde{y_i}^{b} * tilde{z_i}^{b} - w_i * (hat{y_i} + residual_{y, i}^{b}) * (hat{z_i} + residual_{z, i}^{b}) + + The alpha% prediction interval is the (1 - alpha) / 2 and (1 + alpha) / 2 percentiles over the bootstrap samples of this quantity + """ + # error_B_1: w_i * \tilde{y_i}^{b} * \tilde{z_i}^{b} + # error_B_2: w_i * (\hat{y_i} + \residual_{y, i}^{b}) * (\hat{z_i} + \residual_{z, i}^{b}) + errors_B = self.errors_B_1 - self.errors_B_2 + + lower_q, upper_q = self._get_quantiles(alpha) + + # sum in the prediction to our lower and upper esimate of the error in our prediction + interval_upper, interval_lower = ( + self.weighted_yz_test_pred - np.quantile(errors_B, q=[lower_q, upper_q], axis=-1).T + ).T + + interval_upper = interval_upper.reshape(-1, 1) + interval_lower = interval_lower.reshape(-1, 1) + + return PredictionIntervals(interval_lower.round(decimals=0), interval_upper.round(decimals=0)) + + def get_aggregate_prediction_intervals( + self, + reporting_units: pd.DataFrame, + nonreporting_units: pd.DataFrame, + unexpected_units: pd.DataFrame, + aggregate: list, + alpha: float, + unit_prediction_intervals: PredictionIntervals, + estimand: str, + ) -> PredictionIntervals: + """ + Generate and return aggregate prediction intervals for arbitrary aggregates + + In the aggregate case, the error in our prediction is: + (sum_{i = 1}^N w_i * hat{y_i} * hat{z_i}) / (sum_{i = 1}^n w_i hat{z_i}) - (sum_{i = 1}^N w_i * y_i * z_i) / (sum_{i = 1}^n w_i z_i) + In the bootstrap setting this has been estimated as: + (sum_{i = 1}^N w_i * tilde_{y_i}^b * tilde_{z_i}^b) / (sum_{i = 1}^N w_i * tilde_{z_i}^b) - (sum_{i = 1}^N w_i * hat_{y_i} + residual_{y, i}^b) * (hat{z_i} + residual_{z, i}^b)) / (sum_{i = 1}^N w_i * (hat{z_i} + residual_{z, i}^b)) + + The alpha% prediction interval is the (1 - alpha) / 2 and (1 + alpha) / 2 percentiles over the bootstrap samples of this quantity + """ + n_train = reporting_units.shape[0] + n_test = nonreporting_units.shape[0] + + all_units = pd.concat([reporting_units, nonreporting_units, unexpected_units], axis=0) + + if len(aggregate) > 1: + aggregate_temp_column_name = "-".join(aggregate) + all_units[aggregate_temp_column_name] = all_units[aggregate].agg("_".join, axis=1) + aggregate_indicator = pd.get_dummies(all_units[aggregate_temp_column_name]).values + else: + aggregate_indicator = pd.get_dummies(all_units[aggregate]).values + aggregate_indicator_expected = aggregate_indicator[: (n_train + n_test)] + + # first compute turnout and unnormalized margin for unexpected units. + # this is a known quantity + aggregate_indicator_unexpected = aggregate_indicator[(n_train + n_test) :] # noqa: 1185 + margin_unexpected = unexpected_units["results_margin"].values.reshape(-1, 1) + # results weights is unexpected_units["results_dem"] + unexpected_units["results_gop"] + turnout_unexpected = (unexpected_units["results_weights"]).values.reshape(-1, 1) + aggregate_z_unexpected = aggregate_indicator_unexpected.T @ turnout_unexpected + aggregate_yz_unexpected = aggregate_indicator_unexpected.T @ margin_unexpected + + aggregate_indicator_train = aggregate_indicator_expected[:n_train] + aggregate_indicator_test = aggregate_indicator_expected[n_train:] + weights_train = reporting_units["baseline_weights"].values.reshape(-1, 1) + + # compute turnout and unnormalized margin for reporting units. + # this is also a known quantity with no uncertainty + y_train = reporting_units["results_normalized_margin"].values.reshape(-1, 1) + z_train = reporting_units["turnout_factor"].values.reshape(-1, 1) + yz_train = y_train * z_train + aggregate_z_train = aggregate_indicator_train.T @ (weights_train * z_train) + aggregate_yz_train = aggregate_indicator_train.T @ (weights_train * yz_train) + + # (sum_{i = 1}^N w_i * \tilde_{y_i}^b * \tilde_{z_i}^b) + aggregate_yz_test_B = aggregate_indicator_test.T @ self.errors_B_1 + + # (\sum_{i = 1}^N w_i * (\hat_{y_i} + \residual_{y, i}^b) * (\hat{z_i} + \residual_{z, i}^b)) + aggregate_yz_test_pred = aggregate_indicator_test.T @ self.errors_B_2 + + # (\sum_{i = 1}^N w_i * \tilde_{z_i}^b) + aggregate_z_test_B = aggregate_indicator_test.T @ self.errors_B_3 + + # (\sum_{i = 1}^N w_i * (\hat{z_i} + \residual_{z, i}^b)) + aggregate_z_test_pred = aggregate_indicator_test.T @ self.errors_B_4 + + # sum the aggregate error components with the known quantities from reporting and unexpected units + aggregate_yz_total_B = aggregate_yz_train + aggregate_yz_test_B + aggregate_yz_unexpected + aggregate_yz_total_pred = aggregate_yz_train + aggregate_yz_test_pred + aggregate_yz_unexpected + aggregate_z_total_B = aggregate_z_train + aggregate_z_test_B + aggregate_z_unexpected + aggregate_z_total_pred = aggregate_z_train + aggregate_z_test_pred + aggregate_z_unexpected + + aggregate_error_B_1 = aggregate_yz_total_B + aggregate_error_B_2 = aggregate_yz_total_pred + aggregate_error_B_3 = aggregate_z_total_B + aggregate_error_B_4 = aggregate_z_total_pred + + # (sum_{i = 1}^N w_i * \tilde_{y_i}^b * \tilde_{z_i}^b) / (\sum_{i = 1}^N w_i * \tilde_{z_i}^b) + divided_error_B_1 = np.nan_to_num(aggregate_error_B_1 / aggregate_error_B_3) + + # (\sum_{i = 1}^N w_i * (\hat_{y_i} + \residual_{y, i}^b) * (\hat{z_i} + \residual_{z, i}^b)) / (\sum_{i = 1}^N w_i * (\hat{z_i} + \residual_{z, i}^b)) + divided_error_B_2 = np.nan_to_num(aggregate_error_B_2 / aggregate_error_B_4) + + # subtract to get bootstrap error for estimate in our predictions + aggregate_error_B = divided_error_B_1 - divided_error_B_2 + + lower_q, upper_q = self._get_quantiles(alpha) + + # we also need to re-compute our aggregate prediction to add to our error to get the prediction interval + # first the turnout component + aggregate_z_total = ( + aggregate_z_unexpected + aggregate_z_train + aggregate_indicator_test.T @ self.weighted_z_test_pred + ) + # then the unnormalied margin component + aggregate_yz_total = ( + aggregate_yz_unexpected + aggregate_yz_train + aggregate_indicator_test.T @ self.weighted_yz_test_pred + ) + # calculate normalized margin in the aggregate prediction + # turnout prediction could be zero, so convert NaN -> 0 + aggregate_perc_margin_total = np.nan_to_num(aggregate_yz_total / aggregate_z_total) + + # saves the aggregate errors in case we want to generate somem form of national predictions (like ecv) + if self._is_top_level_aggregate(aggregate): + self.aggregate_error_B_1 = aggregate_error_B_1 + self.aggregate_error_B_2 = aggregate_error_B_2 + self.aggregate_error_B_3 = aggregate_error_B_3 + self.aggregate_error_B_4 = aggregate_error_B_4 + self.aggregate_perc_margin_total = aggregate_perc_margin_total + + interval_upper, interval_lower = ( + aggregate_perc_margin_total - np.quantile(aggregate_error_B, q=[lower_q, upper_q], axis=-1).T + ).T + interval_upper = interval_upper.reshape(-1, 1) + interval_lower = interval_lower.reshape(-1, 1) + + return PredictionIntervals(interval_lower, interval_upper) + + def get_national_summary_estimates( + self, nat_sum_data_dict: dict, called_states: dict, base_to_add: int | float, alpha: float + ) -> list: + """ + Generates and returns a national summary estimate (ie. electoral votes or total number of senate seats) + This function does both the point prediction and the lower and upper estimates + + First element in the list is the prediction, second is the lower end of the interval and third is the upper end of the interval + + The point prediction and prediction intervals are very similar to get_aggregate_prediction / get_aggregate_prediction_intervals + except that we pass our bootstrapped preditions (and our stand-in for the "true" value) through a sigmoid (or a threshold) and assign + weights. This creates gives us bootstrapped national summary estimate (e.g. electoral votes), which we can use to generate + a prediction interval + """ + # if nat_sum_data_dict is None then we assign 1 for every contest (ie. Senate or House) + if nat_sum_data_dict is None: + # the order does not matter since all contests have the same weight, so we can use anything as the key when sorting + nat_sum_data_dict = {i: 1 for i in range(self.aggregate_error_B_1.shape[0])} + + # if we didn't pass the right number of national summary weights (ie. the number of contests) then raise an exception + if len(nat_sum_data_dict) != self.aggregate_error_B_1.shape[0]: + raise BootstrapElectionModelException( + f"nat_sum_data_dict is of length {len(nat_sum_data_dict)} but there are {self.aggregate_error_B_1.shape[0]} contests" + ) + + # called states is a dictionary where 1 means that the LHS party has one, 0 means that the RHS party has won + # and -1 means that the state is not called. If called_states is None, assume that all states are not called. + if called_states is None: + called_states = {i: -1 for i in range(self.aggregate_error_B_1.shape[0])} + + if len(called_states) != self.aggregate_error_B_1.shape[0]: + raise BootstrapElectionModelException( + f"called_states is of length {len(nat_sum_data_dict)} but there are {self.aggregate_error_B_1.shape[0]} contests" + ) + + # sort in order to get in the same order as the contests, which have been sorted when getting dummies for aggregate indicators + # in get_aggregate_prediction_intervals + nat_sum_data_dict_sorted = sorted(nat_sum_data_dict.items()) + nat_sum_data_dict_sorted_vals = np.asarray([x[1] for x in nat_sum_data_dict_sorted]).reshape(-1, 1) + + called_states_sorted = sorted(called_states.items()) + called_states_sorted_vals = ( + np.asarray([x[1] for x in called_states_sorted]).reshape(-1, 1) * 1.0 + ) # multiplying by 1.0 to turn into floats + # since we max/min the state called values with contest win probabilities, we don't want the uncalled states to have a number to max/min + # in order for those states to keep their original computed win probability + called_states_sorted_vals[called_states_sorted_vals == -1] = np.nan + + # technically we do not need to do this division, since the margin (ie. aggregate_error_B_1 and aggregate_error_B_2) + # are enough to know who has won a contest (we don't need the normalized margin) + # but we normalize so that the temperature we use to set aggressiveness of sigmoid is in the right scale + + # divided_error_B_1 = np.nan_to_num(self.aggregate_error_B_1 / self.aggregate_baseline_margin.reshape(-1, 1)) + divided_error_B_1 = np.nan_to_num(self.aggregate_error_B_1 / self.aggregate_error_B_3) + # divided_error_B_2 = np.nan_to_num(self.aggregate_error_B_2 / self.aggregate_baseline_margin.reshape(-1, 1)) + divided_error_B_2 = np.nan_to_num(self.aggregate_error_B_2 / self.aggregate_error_B_4) + + if self.hard_threshold: + aggregate_dem_prob_B_1 = divided_error_B_1 > 0.5 + aggregate_dem_prob_B_1 = divided_error_B_2 > 0.5 + else: + aggregate_dem_prob_B_1 = expit(self.T * divided_error_B_1) + aggregate_dem_prob_B_2 = expit(self.T * divided_error_B_2) + + # since called_states_sorted_vals has value 1 if the state is called for the LHS party, maxing the probabilities + # gives a probability of 1 for the LHS party + # and called_states_sorted_vals has value 0 if the state is called for the RHS party, so mining with probabilities + # gives a probability of 0 for the LHS party + # and called_states_sorted_vals has value np.nan if the state is uncalled, since we use fmax/fmin the actual number + # and not nan gets propagated, so we maintain the probability + aggregate_dem_prob_B_1_called = np.fmin( + np.fmax(aggregate_dem_prob_B_1, called_states_sorted_vals), called_states_sorted_vals + ) + aggregate_dem_prob_B_2_called = np.fmin( + np.fmax(aggregate_dem_prob_B_2, called_states_sorted_vals), called_states_sorted_vals + ) + + # multiply by weights of each contest + aggregate_dem_vals_B_1 = nat_sum_data_dict_sorted_vals * aggregate_dem_prob_B_1_called + aggregate_dem_vals_B_2 = nat_sum_data_dict_sorted_vals * aggregate_dem_prob_B_2_called + + # calculate the error in our national aggregate prediction + aggregate_dem_vals_B = np.sum(aggregate_dem_vals_B_1, axis=0) - np.sum(aggregate_dem_vals_B_2, axis=0) + + # we also need a national aggregate point prediction + if self.hard_threshold: + aggregate_dem_probs_total = self.aggregate_perc_margin_total > 0.5 + else: + aggregate_dem_probs_total = expit(self.T * self.aggregate_perc_margin_total) + + # same as for the intervals + aggregate_dem_probs_total_called = np.fmin( + np.fmax(aggregate_dem_probs_total, called_states_sorted_vals), called_states_sorted_vals + ) + aggregate_dem_vals_pred = np.sum(nat_sum_data_dict_sorted_vals * aggregate_dem_probs_total_called) + + lower_q, upper_q = self._get_quantiles(alpha) + + interval_upper, interval_lower = ( + aggregate_dem_vals_pred - np.quantile(aggregate_dem_vals_B, q=[lower_q, upper_q], axis=-1).T + ).T + national_summary_estimates = { + "margin": [ + aggregate_dem_vals_pred + base_to_add, + interval_lower + base_to_add, + interval_upper + base_to_add, + ] + } + + print(national_summary_estimates) + + return national_summary_estimates diff --git a/tests/conftest.py b/tests/conftest.py index 871879e4..7d0cfad6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,11 +3,12 @@ import os import sys +import numpy as np import pandas as pd import pytest from elexmodel.client import HistoricalModelClient, ModelClient -from elexmodel.models import BaseElectionModel, ConformalElectionModel +from elexmodel.models import BaseElectionModel, BootstrapElectionModel, ConformalElectionModel _TEST_FOLDER = os.path.dirname(__file__) FIXTURE_DIR = os.path.join(_TEST_FOLDER, "fixtures") @@ -66,6 +67,18 @@ def conformal_election_model(): return ConformalElectionModel.ConformalElectionModel(model_settings) +@pytest.fixture(scope="function") +def bootstrap_election_model(): + model_settings = {"features": ["baseline_normalized_margin"]} + return BootstrapElectionModel.BootstrapElectionModel(model_settings) + + +@pytest.fixture(scope="session") +def rng(): + seed = 1941 + return np.random.default_rng(seed=seed) + + @pytest.fixture(scope="function") def va_config(get_fixture): path = os.path.join("config", "2017-11-07_VA_G.json") diff --git a/tests/handlers/test_combined_data.py b/tests/handlers/test_combined_data.py index d1cd53aa..8b5f1ac9 100644 --- a/tests/handlers/test_combined_data.py +++ b/tests/handlers/test_combined_data.py @@ -174,7 +174,9 @@ def test_get_reporting_data(va_governor_county_data): # no fixed effects combined_data_handler = CombinedDataHandler(va_governor_county_data, current_data, estimands, geographic_unit_type) - observed_data = combined_data_handler.get_reporting_units(100) + turnout_factor_lower = 0.5 + turnout_factor_upper = 1.5 + observed_data = combined_data_handler.get_reporting_units(100, turnout_factor_lower, turnout_factor_upper) assert observed_data.shape[0] == 20 assert observed_data.reporting.iloc[0] == 1 assert observed_data.reporting.sum() == 20 @@ -206,58 +208,15 @@ def test_get_reporting_data_dropping_with_turnout_factor(va_governor_county_data & (combined_data_handler.data.turnout_factor < turnout_factor_lower) ].shape[0] - observed_data = combined_data_handler.get_reporting_units( - 100, turnout_factor_lower=turnout_factor_lower, turnout_factor_upper=turnout_factor_upper - ) + observed_data = combined_data_handler.get_reporting_units(100, turnout_factor_lower, turnout_factor_upper) - # 20 units should be reporting, but the additional ones are dropped to nonreporting because they are above/below threshold + # 20 units should be reporting, but the additional ones are dropped to unexpected because they are above/below threshold # and so are subtracted from the reporting ones assert observed_data.shape[0] == 20 - ( reporting_units_above_turnout_factor_threshold + reporting_units_below_turnout_factor_threshold ) -def test_get_nonreporting_adding_with_turnout_factor(va_governor_county_data): - election_id = "2017-11-07_VA_G" - office = "G" - geographic_unit_type = "county" - estimands = ["turnout"] - - live_data_handler = MockLiveDataHandler( - election_id, office, geographic_unit_type, estimands, data=va_governor_county_data - ) - n = 20 - current_data = live_data_handler.get_n_fully_reported(n=n) - - va_governor_county_data["baseline_weights"] = va_governor_county_data.baseline_turnout - va_governor_county_data["last_election_results_turnout"] = va_governor_county_data.baseline_turnout + 1 - - combined_data_handler = CombinedDataHandler(va_governor_county_data, current_data, estimands, geographic_unit_type) - - turnout_factor_lower = 0.95 - turnout_factor_upper = 1.2 - - reporting_units_above_turnout_factor_threshold = combined_data_handler.data[ - combined_data_handler.data.turnout_factor > turnout_factor_upper - ].shape[0] - reporting_units_below_turnout_factor_threshold = combined_data_handler.data[ - (combined_data_handler.data.percent_expected_vote == 100) - & (combined_data_handler.data.turnout_factor < turnout_factor_lower) - ].shape[0] - - nonreporting_data = combined_data_handler.get_nonreporting_units( - 100, turnout_factor_lower=turnout_factor_lower, turnout_factor_upper=turnout_factor_upper - ) - - assert ( - nonreporting_data.shape[0] - == va_governor_county_data.shape[0] - - n - + reporting_units_above_turnout_factor_threshold - + reporting_units_below_turnout_factor_threshold - ) - - def test_get_unexpected_units_county_district(va_assembly_county_data): election_id = "2017-11-07_VA_G" office = "Y" @@ -279,7 +238,11 @@ def test_get_unexpected_units_county_district(va_assembly_county_data): va_assembly_county_data["last_election_results_turnout"] = va_assembly_county_data.baseline_turnout + 1 combined_data_handler = CombinedDataHandler(va_assembly_county_data, current_data, estimands, geographic_unit_type) - unexpected_data = combined_data_handler.get_unexpected_units(100, ["county_fips", "district"]) + turnout_factor_lower = 0 # set to extreme values so as not to add any more "unexpected" + turnout_factor_upper = 100 + unexpected_data = combined_data_handler.get_unexpected_units( + 100, ["county_fips", "district"], turnout_factor_lower, turnout_factor_upper + ) assert unexpected_data.shape[0] == unexpected_units assert unexpected_data[unexpected_data.county_fips == ""].shape[0] == 0 assert unexpected_data["county_fips"].map(lambda x: len(x) == 6).all() @@ -313,9 +276,69 @@ def test_get_unexpected_units_county(va_governor_county_data): va_governor_county_data["last_election_results_turnout"] = va_governor_county_data.baseline_turnout + 1 combined_data_handler = CombinedDataHandler(va_governor_county_data, current_data, estimands, geographic_unit_type) - unexpected_data = combined_data_handler.get_unexpected_units(100, ["county_fips"]) + turnout_factor_lower = 0.5 + turnout_factor_upper = 1.5 + unexpected_data = combined_data_handler.get_unexpected_units( + 100, ["county_fips"], turnout_factor_lower, turnout_factor_upper + ) assert unexpected_data.shape[0] == reporting_unexpected_units + 1 assert unexpected_data[unexpected_data.county_fips == ""].shape[0] == 0 assert unexpected_data["county_fips"].map(lambda x: len(x) == 6).all() # test that nonreporting unexpected unit is captured here assert unexpected_data[unexpected_data.percent_expected_vote == 50].shape[0] == 1 + + +def test_zero_baseline_turnout_as_unexpected(va_governor_county_data): + election_id = "2017-11-07_VA_G" + office = "G" + geographic_unit_type = "county" + estimands = ["turnout"] + + live_data_handler = MockLiveDataHandler( + election_id, office, geographic_unit_type, estimands, data=va_governor_county_data + ) + current_data = live_data_handler.get_n_fully_reported(n=20) + # Add an additional row of a nonreporting unexpected unit that we test below + va_governor_county_data.loc[0, "baseline_turnout"] = 0 + va_governor_county_data["baseline_weights"] = va_governor_county_data.baseline_turnout + va_governor_county_data["last_election_results_turnout"] = va_governor_county_data.baseline_turnout + 1 + combined_data_handler = CombinedDataHandler(va_governor_county_data, current_data, estimands, geographic_unit_type) + turnout_factor_lower = 0.5 + turnout_factor_upper = 1.5 + unexpected_data = combined_data_handler.get_unexpected_units( + 100, ["county_fips"], turnout_factor_lower, turnout_factor_upper + ) + assert va_governor_county_data.loc[0].geographic_unit_fips in unexpected_data.geographic_unit_fips.tolist() + assert len(unexpected_data) == 1 + + reporting_units = combined_data_handler.get_reporting_units(100, turnout_factor_lower, turnout_factor_upper) + assert len(reporting_units) == 20 - 1 + assert va_governor_county_data.loc[0].geographic_unit_fips not in reporting_units.geographic_unit_fips.tolist() + + nonreporting_units = combined_data_handler.get_nonreporting_units(100, turnout_factor_lower, turnout_factor_upper) + assert va_governor_county_data.loc[0].geographic_unit_fips not in nonreporting_units.geographic_unit_fips.tolist() + + +def test_turnout_factor_as_unexpected(va_governor_county_data): + election_id = "2017-11-07_VA_G" + office = "G" + geographic_unit_type = "county" + estimands = ["turnout"] + + live_data_handler = MockLiveDataHandler( + election_id, office, geographic_unit_type, estimands, data=va_governor_county_data + ) + current_data = live_data_handler.get_n_fully_reported(n=133) + # Add an additional row of a nonreporting unexpected unit that we test below + va_governor_county_data.loc[0, "baseline_turnout"] = 0 + va_governor_county_data["baseline_weights"] = va_governor_county_data.baseline_turnout + va_governor_county_data["last_election_results_turnout"] = va_governor_county_data.baseline_turnout + 1 + combined_data_handler = CombinedDataHandler(va_governor_county_data, current_data, estimands, geographic_unit_type) + turnout_factor_lower = 0.95 + turnout_factor_upper = 1.2 + unexpected_data = combined_data_handler.get_unexpected_units( + 100, ["county_fips"], turnout_factor_lower, turnout_factor_upper + ) + over = combined_data_handler.data[combined_data_handler.data.turnout_factor >= turnout_factor_upper].shape[0] + under = combined_data_handler.data[combined_data_handler.data.turnout_factor < turnout_factor_lower].shape[0] + unexpected_data.shape[0] == over + under diff --git a/tests/handlers/test_config.py b/tests/handlers/test_config.py index 2567b535..0aed2ef3 100644 --- a/tests/handlers/test_config.py +++ b/tests/handlers/test_config.py @@ -77,7 +77,7 @@ def test_get_estimands_general(va_config): office = "G" estimands = config_handler.get_estimands(office) - expected = ["dem", "gop", "turnout"] + expected = ["dem", "gop", "turnout", "margin"] assert expected == estimands @@ -120,9 +120,10 @@ def test_get_features(va_config): office = "G" features = config_handler.get_features(office) - assert len(features) == 13 + assert len(features) == 14 assert features[0] == "age_le_30" - assert features[-1] == "percent_bachelor_or_higher" + assert features[-2] == "percent_bachelor_or_higher" + assert features[-1] == "baseline_normalized_margin" def test_get_aggregates(va_config): diff --git a/tests/handlers/test_estimandizer.py b/tests/handlers/test_estimandizer.py index d2321fcc..69f1fd1e 100644 --- a/tests/handlers/test_estimandizer.py +++ b/tests/handlers/test_estimandizer.py @@ -30,7 +30,6 @@ def test_add_estimand_results_historical(va_governor_county_data): (output_df, result_columns) = estimandizer.add_estimand_results(va_data_copy, estimands, True) assert "results_party_vote_share_dem" in output_df.columns - assert "results_weights" in output_df.columns assert result_columns == ["results_party_vote_share_dem", "results_turnout"] diff --git a/tests/handlers/test_featurizer.py b/tests/handlers/test_featurizer.py index 64e1fe16..75a86f74 100644 --- a/tests/handlers/test_featurizer.py +++ b/tests/handlers/test_featurizer.py @@ -347,8 +347,8 @@ def test_generate_fixed_effects(va_governor_county_data): handle_unreporting="drop", ) - reporting_data = combined_data_handler.get_reporting_units(99) - nonreporting_data = combined_data_handler.get_nonreporting_units(99) + reporting_data = combined_data_handler.get_reporting_units(99, 0.5, 1.5) + nonreporting_data = combined_data_handler.get_nonreporting_units(99, 0.5, 1.5) featurizer = Featurizer([], {"county_classification": "all"}) @@ -382,8 +382,8 @@ def test_generate_fixed_effects(va_governor_county_data): featurizer = Featurizer([], {"county_classification": ["all"], "county_fips": ["all"]}) - reporting_data = combined_data_handler.get_reporting_units(99) - nonreporting_data = combined_data_handler.get_nonreporting_units(99) + reporting_data = combined_data_handler.get_reporting_units(99, 0.5, 1.5) + nonreporting_data = combined_data_handler.get_nonreporting_units(99, 0.5, 1.5) n_train = reporting_data.shape[0] all_units = pd.concat([reporting_data, nonreporting_data], axis=0) @@ -438,8 +438,8 @@ def test_generate_fixed_effects_not_all_reporting(va_governor_county_data): handle_unreporting="drop", ) - reporting_data = combined_data_handler.get_reporting_units(99) - nonreporting_data = combined_data_handler.get_nonreporting_units(99) + reporting_data = combined_data_handler.get_reporting_units(99, 0.5, 1.5) + nonreporting_data = combined_data_handler.get_nonreporting_units(99, 0.5, 1.5) featurizer = Featurizer([], {"county_fips": ["all"]}) n_train = reporting_data.shape[0] @@ -504,8 +504,8 @@ def test_generate_fixed_effects_mixed_reporting(va_governor_precinct_data): handle_unreporting="drop", ) - reporting_data = combined_data_handler.get_reporting_units(99) - nonreporting_data = combined_data_handler.get_nonreporting_units(99) + reporting_data = combined_data_handler.get_reporting_units(99, 0.5, 1.5) + nonreporting_data = combined_data_handler.get_nonreporting_units(99, 0.5, 1.5) featurizer = Featurizer([], ["county_fips"]) diff --git a/tests/integration_test.sh b/tests/integration_test.sh index 33a3e4e6..c25be886 100644 --- a/tests/integration_test.sh +++ b/tests/integration_test.sh @@ -28,3 +28,6 @@ elexmodel 2021-11-02_VA_G --estimands=dem --office_id=Y --geographic_unit_type=p echo "Running AZ 2021 Republican Senate primary precinct model" elexmodel 2020-08-04_AZ_R --estimands mcsally_61631 --office_id=S --geographic_unit_type=precinct --percent_reporting 20 --aggregates=postal_code + +echo "Running VA Governor 2017 precinct model using the bootstrap model with model parameters " +elexmodel 2017-11-07_VA_G --estimands=margin --office_id=G --geographic_unit_type=precinct --pi_method=bootstrap --percent_reporting=10 --aggregates=postal_code --fixed_effects=county_classification --features=baseline_normalized_margin --model_parameters='{"B": 10}' \ No newline at end of file diff --git a/tests/models/test_bootstrap_election_model.py b/tests/models/test_bootstrap_election_model.py new file mode 100644 index 00000000..1b608802 --- /dev/null +++ b/tests/models/test_bootstrap_election_model.py @@ -0,0 +1,1045 @@ +import numpy as np +import pandas as pd +import pytest +import scipy as sp + +from elexmodel.handlers.data.CombinedData import CombinedDataHandler +from elexmodel.handlers.data.LiveData import MockLiveDataHandler +from elexmodel.handlers.data.PreprocessedData import PreprocessedDataHandler +from elexmodel.models.BootstrapElectionModel import BootstrapElectionModelException, OLSRegressionSolver + +TOL = 1e-5 + + +def test_cross_validation(bootstrap_election_model, rng): + """ + Tests cross validation for finding the optimal lambda. + """ + x = rng.normal(loc=2, scale=5, size=(100, 75)) + x[:, 0] = 1 + beta = rng.normal(size=(75, 1)) + y = x @ beta + rng.normal(loc=0, scale=1, size=(100, 1)) + lambdas = [-0.1, 0, 0.01, 0.1, 1, 100] + res = bootstrap_election_model.cv_lambda(x, y, lambdas) + assert res == 0.01 + + beta = rng.normal(scale=0.001, size=(75, 1)) + y = x @ beta + rng.normal(loc=0, scale=1, size=(100, 1)) + lambdas = [-0.1, 0, 0.01, 0.1, 1, 100] + res = bootstrap_election_model.cv_lambda(x, y, lambdas) + assert res == 100 + + +def test_estimate_epsilon(bootstrap_election_model): + """ + Testing estimating the contest level effect. + """ + # the contest level effect (epsilon) is estiamted as the average of the residuals in that contest + # if a contest has less then 2 units reporting then the contest level effect is set to zero + residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1) + aggregate_indicator = np.asarray([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1]]).T + epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator) + assert epsilon_hat[0] == (residuals[0] + residuals[1]) / 2 + assert epsilon_hat[1] == (residuals[2] + residuals[3]) / 2 + assert epsilon_hat[2] == 0 # since the aggrgate indicator for that row only has one 1 which is less than 2 + + +def test_estimate_delta(bootstrap_election_model): + """ + Testing estimating the unit level error + """ + # the unit level error is the difference between the residu and the state level effect + residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1) + aggregate_indicator = np.asarray([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1]]).T + epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator) # [0.5, 0.55, 0] + delta_hat = bootstrap_election_model._estimate_delta(residuals, epsilon_hat, aggregate_indicator) + desired = np.asarray([0.0, 0.0, -0.25, 0.25, 0.5]) + np.testing.assert_array_almost_equal(desired, delta_hat) + + +def test_estimate_model_error(bootstrap_election_model, rng): + x = rng.normal(loc=2, scale=5, size=(100, 5)) + x[:, 0] = 1 + beta = rng.integers(low=-100, high=100, size=(5, 1)) + y = x @ beta + ols_regression = OLSRegressionSolver() + ols_regression.fit(x, y) + aggregate_indicator = rng.multivariate_hypergeometric([1] * 5, 1, size=100) + residuals, epsilon_hat, delta_hat = bootstrap_election_model._estimate_model_errors( + ols_regression, x, y, aggregate_indicator + ) + y_hat = ols_regression.predict(x) + np.testing.assert_array_almost_equal(ols_regression.residuals(y, y_hat, loo=True, center=True), residuals) + # epsilon_hat and delta_hat are tested above + + +def test_get_strata(bootstrap_election_model): + reporting_units = pd.DataFrame( + [["a", True, True], ["b", True, True], ["c", True, True]], + columns=["county_classification", "reporting", "expected"], + ) + nonreporting_units = pd.DataFrame( + [["c", False, True], ["d", False, True]], columns=["county_classification", "reporting", "expected"] + ) + x_train_strata, x_test_strata = bootstrap_election_model._get_strata(reporting_units, nonreporting_units) + + assert "intercept" in x_train_strata.columns + # a has been dropped + assert "county_classification_b" in x_train_strata.columns + assert "county_classification_c" in x_train_strata.columns + assert "county_classification_d" in x_train_strata.columns + np.testing.assert_array_almost_equal(x_train_strata.values, np.asarray([[1, 0, 0, 0], [1, 1, 0, 0], [1, 0, 1, 0]])) + + assert "intercept" in x_test_strata.columns + assert "county_classification_b" in x_test_strata.columns + assert "county_classification_c" in x_test_strata.columns + assert "county_classification_d" in x_test_strata.columns + + np.testing.assert_array_almost_equal(x_test_strata.values, np.asarray([[1, 0, 1, 0], [1, 0, 0, 1]])) + + +def test_estimate_strata_dist(bootstrap_election_model, rng): + x_train, x_test = None, None + x_train_strata = pd.DataFrame([[1, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x_test_strata = pd.DataFrame([[1, 1, 0], [1, 0, 1], [1, 0, 1]]) + delta_hat = np.asarray([-0.3, 0.5, 0.2, 0.25, 0.28]) + lb = 0.1 + ub = 0.3 + ppf, cdf = bootstrap_election_model._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_hat, lb, ub + ) + assert ppf[(1, 0, 0)](0.01) == pytest.approx(delta_hat[:2].min()) + assert ppf[(1, 0, 0)](0.34) == pytest.approx(lb) + assert ppf[(1, 0, 0)](0.66) == pytest.approx(ub) + assert ppf[(1, 0, 0)](0.99) == pytest.approx(delta_hat[:2].max()) + + # since all elements of the second strata are > 0.1 and < 0.3 we use the lb and ub for the first quantiles + assert ppf[(1, 1, 0)](0.01) == pytest.approx(lb) + assert ppf[(1, 1, 0)](0.25) == pytest.approx(lb) + assert ppf[(1, 1, 0)](0.26) == pytest.approx(delta_hat[2:].min()) + assert ppf[(1, 1, 0)](0.50) == pytest.approx(np.median(delta_hat[2:])) + assert ppf[(1, 1, 0)](0.51) == pytest.approx(delta_hat[2:].max()) + assert ppf[(1, 1, 0)](0.74) == pytest.approx(delta_hat[2:].max()) + assert ppf[(1, 1, 0)](0.75) == pytest.approx(ub) + assert ppf[(1, 1, 0)](0.99) == pytest.approx(ub) + + assert ppf[(1, 0, 1)](0.49) == pytest.approx(lb) + assert ppf[(1, 0, 1)](0.50) == pytest.approx(ub) + + # anything less than -0.3 returns 0.01 + assert cdf[(1, 0, 0)](-0.4) == pytest.approx(0.01) + # lower QR sees three datapoints: -0.3, 0.1 and 0.5, so -0.3 is at the right boundary of 0.01 and 0.33 + assert cdf[(1, 0, 0)](-0.3) == pytest.approx(0.33) + # upper QR sees three datapoints: -0.3, 0.3 and 0.5 and so 0.3 is that right boundary of 0.33 and 0.66 + assert cdf[(1, 0, 0)](0.3) == pytest.approx(0.66) + # upper QR sees three datapoints: -0.3, 0.3 and 0.5 and so 0.5 is on the right boundary of 0.66 and 0.99 + assert cdf[(1, 0, 0)](0.5) == pytest.approx(0.99) + # since interp keyword righ=1 + assert cdf[(1, 0, 0)](0.51) == pytest.approx(1) + + assert cdf[(1, 1, 0)](0.1) == pytest.approx(0.01) + assert cdf[(1, 1, 0)](0.2) == pytest.approx(0.49) + assert cdf[(1, 1, 0)](0.28) == pytest.approx(0.74) + assert cdf[(1, 1, 0)](0.3) == pytest.approx(0.99) + + assert cdf[(1, 0, 1)](0.1) == pytest.approx(0.01) + assert cdf[(1, 0, 1)](0.3) == pytest.approx(0.99) + + +def test_generate_nonreporting_bounds(bootstrap_election_model, rng): + nonreporting_units = pd.DataFrame( + [[0.1, 1.2, 75], [0.8, 0.8, 24], [0.1, 0.01, 0], [-0.2, 0.8, 99], [-0.3, 0.9, 100]], + columns=["results_normalized_margin", "turnout_factor", "percent_expected_vote"], + ) + + # assumes that all outstanding vote will go to one of the two parties + lower, upper = bootstrap_election_model._generate_nonreporting_bounds( + nonreporting_units, "results_normalized_margin" + ) + + # hand checked in excel + assert lower[0] == pytest.approx(-0.175) + assert upper[0] == pytest.approx(0.325) + + assert lower[1] == pytest.approx(-0.568) + assert upper[1] == pytest.approx(0.952) + + # if expected vote is close to 0 or 1 we set the bounds to be the extreme case + assert lower[2] == bootstrap_election_model.y_unobserved_lower_bound + assert upper[2] == bootstrap_election_model.y_unobserved_upper_bound + + assert lower[3] == pytest.approx(-0.208) + assert upper[3] == pytest.approx(-0.188) + + assert lower[4] == bootstrap_election_model.y_unobserved_lower_bound + assert upper[4] == bootstrap_election_model.y_unobserved_upper_bound + + lower, upper = bootstrap_election_model._generate_nonreporting_bounds(nonreporting_units, "turnout_factor") + + assert lower[0] == pytest.approx(0.96) + assert upper[0] == pytest.approx(4.8) + + assert lower[1] == pytest.approx(1.081081081) + assert upper[1] == pytest.approx(80) # this is 80 since we divide 0.8 / 0.01 (it's clipped) + + assert lower[2] == bootstrap_election_model.z_unobserved_lower_bound + assert upper[2] == bootstrap_election_model.z_unobserved_upper_bound + + assert lower[3] == pytest.approx(0.536912752) + assert upper[3] == pytest.approx(1.632653061) + + assert lower[4] == bootstrap_election_model.z_unobserved_lower_bound + assert upper[4] == bootstrap_election_model.z_unobserved_upper_bound + + # changing parameters + bootstrap_election_model.y_unobserved_lower_bound = -0.8 + bootstrap_election_model.y_unobserved_upper_bound = 0.8 + + lower, upper = bootstrap_election_model._generate_nonreporting_bounds( + nonreporting_units, "results_normalized_margin" + ) + assert lower[0] == pytest.approx(-0.125) + assert upper[0] == pytest.approx(0.275) + + assert lower[1] == pytest.approx(-0.416) + assert upper[1] == pytest.approx(0.8) + + assert lower[2] == bootstrap_election_model.y_unobserved_lower_bound + assert upper[2] == bootstrap_election_model.y_unobserved_upper_bound + + assert lower[3] == pytest.approx(-0.206) + assert upper[3] == pytest.approx(-0.19) + + assert lower[4] == bootstrap_election_model.y_unobserved_lower_bound + assert upper[4] == bootstrap_election_model.y_unobserved_upper_bound + + bootstrap_election_model.y_unobserved_lower_bound = 0.8 + bootstrap_election_model.y_unobserved_upper_bound = 1.2 + bootstrap_election_model.percent_expected_vote_error_bound = 0.1 + + lower, upper = bootstrap_election_model._generate_nonreporting_bounds(nonreporting_units, "turnout_factor") + assert lower[0] == pytest.approx(1.411764706) + assert upper[0] == pytest.approx(1.846153846) + + assert lower[1] == pytest.approx(2.352941176) + assert upper[1] == pytest.approx(5.714285714) + + assert lower[2] == bootstrap_election_model.z_unobserved_lower_bound + assert upper[2] == bootstrap_election_model.z_unobserved_upper_bound + + assert lower[3] == pytest.approx(0.733944954) + assert upper[3] == pytest.approx(0.898876404) + + assert lower[4] == bootstrap_election_model.z_unobserved_lower_bound + assert upper[4] == bootstrap_election_model.z_unobserved_upper_bound + + +def test_strata_pit(bootstrap_election_model, rng): + x_train, x_test = None, None + x_train_strata = pd.DataFrame([[1, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x_test_strata = pd.DataFrame([[1, 1, 0], [1, 0, 1], [1, 0, 1]]) + delta_hat = np.asarray([-0.3, 0.5, 0.2, 0.25, 0.28]) + lb = 0.1 + ub = 0.3 + ppf, cdf = bootstrap_election_model._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_hat, lb, ub + ) + + x_train_strata_unique = np.unique(x_train_strata, axis=0).astype(int) + uniforms = bootstrap_election_model._strata_pit(x_train_strata, x_train_strata_unique, delta_hat, cdf) + + assert uniforms[0] == pytest.approx( + 0.33 + ) # since -0.3 is the lowest of the three elements [-0.3, 0.5, 0.3] where 0.3 is the UB (LB is -0.3) + assert uniforms[1] == pytest.approx(0.99) # since 0.5 is the largest + assert uniforms[2] == pytest.approx(0.49) # [-0.3, 0.2, 0.25, 0.28, 0.3] + assert uniforms[3] == pytest.approx(0.5) + assert uniforms[4] == pytest.approx(0.74) + + +def test_bootstrap_deltas(bootstrap_election_model): + x_train, x_test = None, None + x_train_strata = pd.DataFrame([[1, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x_test_strata = pd.DataFrame([[1, 1, 0], [1, 0, 1], [1, 0, 1]]) + delta_hat = np.asarray([-0.3, 0.5, 0.2, 0.25, 0.28]) + lb = 0.1 + ub = 0.3 + ppf, cdf = bootstrap_election_model._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_hat, lb, ub + ) + x_train_strata_unique = np.unique(x_train_strata, axis=0).astype(int) + uniforms = bootstrap_election_model._strata_pit(x_train_strata, x_train_strata_unique, delta_hat, cdf) + unifs = np.concatenate([uniforms, uniforms], axis=1) + x_train_strata_unique = np.unique(x_train_strata, axis=0).astype(int) + bootstrap_deltas_y, bootstrap_deltas_z = bootstrap_election_model._bootstrap_deltas( + unifs, x_train_strata, x_train_strata_unique, ppf, ppf + ) + assert bootstrap_deltas_y.shape == (delta_hat.shape[0], bootstrap_election_model.B) + assert bootstrap_deltas_z.shape == (delta_hat.shape[0], bootstrap_election_model.B) + # testing that all elements of bootstrap delta is one of delta hat or lb or ub + assert np.isclose( + bootstrap_deltas_z.flatten().reshape(1, -1), np.concatenate([delta_hat, [0.1, 0.3]]).reshape(-1, 1), rtol=0.001 + ).any(0).mean() == pytest.approx(1) + assert np.isclose( + bootstrap_deltas_y.flatten().reshape(1, -1), np.concatenate([delta_hat, [0.1, 0.3]]).reshape(-1, 1), rtol=0.001 + ).any(0).mean() == pytest.approx(1) + + +def test_bootstrap_epsilons(bootstrap_election_model): + residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1) + aggregate_indicator = np.asarray([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1]]).T + epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator) + delta_hat = bootstrap_election_model._estimate_delta(residuals, epsilon_hat, aggregate_indicator) + + x_train, x_test = None, None + x_train_strata = pd.DataFrame([[1, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x_test_strata = pd.DataFrame([[1, 1, 0], [1, 0, 1], [1, 0, 1]]) + x_train_strata_unique = np.unique(x_train_strata, axis=0).astype(int) + lb = 0.1 + ub = 0.3 + ppf, cdf = bootstrap_election_model._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_hat, lb, ub + ) + + bootstrap_epsilons_y, bootstrap_epsilons_z = bootstrap_election_model._bootstrap_epsilons( + epsilon_hat, epsilon_hat, x_train_strata, x_train_strata_unique, ppf, ppf, aggregate_indicator + ) + assert bootstrap_epsilons_y.shape == (epsilon_hat.shape[0], bootstrap_election_model.B) + assert bootstrap_epsilons_z.shape == (epsilon_hat.shape[0], bootstrap_election_model.B) + + # the last epsilon only has one element, so epsilon_hat is zero so the bootstrapped versions should be zero also + assert np.isclose(bootstrap_epsilons_y[-1], 0).mean() == pytest.approx(1) + # the others have the correct mean + assert bootstrap_epsilons_y[0].mean() == pytest.approx(epsilon_hat[0], rel=0.1) + assert bootstrap_epsilons_y[1].mean() == pytest.approx(epsilon_hat[1], rel=0.1) + + +def test_bootstrap_errors(bootstrap_election_model): + residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1) + aggregate_indicator = np.asarray([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1]]).T + epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator) + delta_hat = bootstrap_election_model._estimate_delta(residuals, epsilon_hat, aggregate_indicator) + + x_train, x_test = None, None + x_train_strata = pd.DataFrame([[1, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x_test_strata = pd.DataFrame([[1, 1, 0], [1, 0, 1], [1, 0, 1]]) + lb = 0.1 + ub = 0.3 + ppf, cdf = bootstrap_election_model._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_hat, lb, ub + ) + epsilon_B, delta_B = bootstrap_election_model._bootstrap_errors( + epsilon_hat, epsilon_hat, delta_hat, delta_hat, x_train_strata, cdf, cdf, ppf, ppf, aggregate_indicator + ) + epsilon_y_B, epsilon_z_B = epsilon_B + delta_y_B, delta_z_B = delta_B + + assert epsilon_y_B.shape == (aggregate_indicator.shape[1], bootstrap_election_model.B) + assert epsilon_z_B.shape == (aggregate_indicator.shape[1], bootstrap_election_model.B) + assert delta_y_B.shape == (residuals.shape[0], bootstrap_election_model.B) + assert delta_z_B.shape == (residuals.shape[0], bootstrap_election_model.B) + # the values of bootstrap epsilon and delta are checked above + + +def test_sample_test_delta(bootstrap_election_model): + residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1) + aggregate_indicator = np.asarray([[1, 1, 0, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 0, 1]]).T + epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator) + delta_hat = bootstrap_election_model._estimate_delta(residuals, epsilon_hat, aggregate_indicator) + + x_train, x_test = None, None + x_train_strata = pd.DataFrame([[1, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x_test_strata = pd.DataFrame([[1, 1, 0], [1, 0, 1], [1, 0, 1], [1, 0, 0]]) + lb = 0.1 + ub = 0.3 + ppf, cdf = bootstrap_election_model._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_hat, lb, ub + ) + + delta_y, delta_z = bootstrap_election_model._sample_test_delta(x_test_strata, ppf, ppf) + + assert delta_y.shape == (x_test_strata.shape[0], bootstrap_election_model.B) + assert delta_z.shape == (x_test_strata.shape[0], bootstrap_election_model.B) + # delta_y should be either in delta_hat or lb or ub for all sampled uniforms EXCEPT when we sample exactly at a break (so between the percentiles of two samples) + # this should only happen ~1% of time per breakpoint, for this strata there are 5 such breakpoints (this strats is 1,1,0) + assert ( + np.isclose(delta_y[0].reshape(1, -1), np.concatenate([delta_hat, [0.1, 0.3]]).reshape(-1, 1), rtol=0.01) + .any(0) + .mean() + >= 0.94 + ) + # for this strata there are 5 such breakpoints: this stratas is 1,0,0 + assert ( + np.isclose(delta_y[3].reshape(1, -1), np.concatenate([delta_hat, [0.1, 0.3]]).reshape(-1, 1), rtol=0.01) + .any(0) + .mean() + >= 0.95 + ) + # one breakpoint at 0.1 and 0.3 for startum (1,0,1) + assert ( + np.isclose(delta_y[1].reshape(1, -1), np.concatenate([delta_hat, [0.1, 0.3]]).reshape(-1, 1), rtol=0.01) + .any(0) + .mean() + >= 0.98 + ) + assert ( + np.isclose(delta_y[2].reshape(1, -1), np.concatenate([delta_hat, [0.1, 0.3]]).reshape(-1, 1), rtol=0.01) + .any(0) + .mean() + >= 0.98 + ) + + +def test_sample_test_epsilon(bootstrap_election_model): + residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5, 0.2]).reshape(-1, 1) + aggregate_indicator_train = np.asarray( + [[1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0]] + ) + aggregate_indicator_test = np.asarray( + [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0]] + ) + epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator_train) + + epsilon_y, epsilon_z = bootstrap_election_model._sample_test_epsilon( + residuals, residuals, epsilon_hat, epsilon_hat, aggregate_indicator_train, aggregate_indicator_test + ) + + assert epsilon_y.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B) + assert epsilon_z.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B) + + # aggregate 3 has no elements in the training set, and rows 2,3,5 in the test set + # are parts of aggregate 3 + assert np.isclose(epsilon_z[0], 0).all() + assert np.isclose(epsilon_z[1], 0).all() + assert not np.isclose(epsilon_z[2], 0).all() + assert not np.isclose(epsilon_z[3], 0).all() + assert np.isclose(epsilon_z[4], 0).all() + assert not np.isclose(epsilon_z[5], 0).all() + + # testing that if there is only one element epsilon_hat that we return 0 + epsilon_y, epsilon_z = bootstrap_election_model._sample_test_epsilon( + residuals, residuals, [[4]], [[4]], aggregate_indicator_train, aggregate_indicator_test + ) + assert epsilon_y.shape == (1, bootstrap_election_model.B) + assert epsilon_z.shape == (1, bootstrap_election_model.B) + + assert np.isclose(epsilon_z[0], 0).all() + + +def test_sample_test_errors(bootstrap_election_model): + residuals = np.asarray([0.5, 0.5, 0.3, 0.8, 0.5]).reshape(-1, 1) + aggregate_indicator_train = np.asarray([[1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]]) + aggregate_indicator_test = np.asarray( + [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0]] + ) + epsilon_hat = bootstrap_election_model._estimate_epsilon(residuals, aggregate_indicator_train) + delta_hat = bootstrap_election_model._estimate_delta(residuals, epsilon_hat, aggregate_indicator_train) + + x_train, x_test = None, None + x_train_strata = pd.DataFrame([[1, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 0], [1, 1, 0]]) + x_test_strata = pd.DataFrame([[1, 1, 0], [1, 0, 1], [1, 0, 1], [1, 1, 0], [1, 0, 1], [1, 0, 0]]) + lb = 0.1 + ub = 0.3 + ppf, cdf = bootstrap_election_model._estimate_strata_dist( + x_train, x_train_strata, x_test, x_test_strata, delta_hat, lb, ub + ) + test_error_y, test_error_z = bootstrap_election_model._sample_test_errors( + residuals, + residuals, + epsilon_hat, + epsilon_hat, + x_test_strata, + ppf, + ppf, + aggregate_indicator_train, + aggregate_indicator_test, + ) + assert test_error_y.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B) + assert test_error_z.shape == (aggregate_indicator_test.shape[0], bootstrap_election_model.B) + # values for sampling epsilons and deltas above + + +def test_compute_bootstrap_errors(bootstrap_election_model, va_governor_county_data): + election_id = "2017-11-07_VA_G" + office_id = "G" + geographic_unit_type = "county" + estimands = ["margin"] + percent_reporting_threshold = 100 + + data_handler = MockLiveDataHandler( + election_id, office_id, geographic_unit_type, estimands, data=va_governor_county_data + ) + + data_handler.shuffle() + current_data = data_handler.get_percent_fully_reported(50) + + preprocessed_data_handler = PreprocessedDataHandler( + election_id, office_id, geographic_unit_type, estimands, {"margin": "margin"}, data=va_governor_county_data + ) + + combined_data_handler = CombinedDataHandler( + preprocessed_data_handler.data, current_data, estimands, geographic_unit_type, handle_unreporting="drop" + ) + + turnout_factor_lower = 0.5 + turnout_factor_upper = 2.0 + reporting_units = combined_data_handler.get_reporting_units( + percent_reporting_threshold, + turnout_factor_lower=turnout_factor_lower, + turnout_factor_upper=turnout_factor_upper, + ) + nonreporting_units = combined_data_handler.get_nonreporting_units( + percent_reporting_threshold, + turnout_factor_lower=turnout_factor_lower, + turnout_factor_upper=turnout_factor_upper, + ) + unexpected_units = combined_data_handler.get_unexpected_units( + percent_reporting_threshold, + ["postal_code"], + turnout_factor_lower=turnout_factor_lower, + turnout_factor_upper=turnout_factor_upper, + ) + + assert not bootstrap_election_model.ran_bootstrap + bootstrap_election_model.B = 10 + bootstrap_election_model.compute_bootstrap_errors(reporting_units, nonreporting_units, unexpected_units) + assert bootstrap_election_model.ran_bootstrap + assert bootstrap_election_model.weighted_yz_test_pred.shape == (nonreporting_units.shape[0], 1) + assert bootstrap_election_model.weighted_yz_test_pred.shape == bootstrap_election_model.weighted_z_test_pred.shape + assert bootstrap_election_model.errors_B_1.shape == (nonreporting_units.shape[0], bootstrap_election_model.B) + assert bootstrap_election_model.errors_B_2.shape == (nonreporting_units.shape[0], bootstrap_election_model.B) + assert bootstrap_election_model.errors_B_3.shape == (nonreporting_units.shape[0], bootstrap_election_model.B) + assert bootstrap_election_model.errors_B_4.shape == (nonreporting_units.shape[0], bootstrap_election_model.B) + + +def test_get_unit_predictions(bootstrap_election_model, va_governor_county_data): + election_id = "2017-11-07_VA_G" + office_id = "G" + geographic_unit_type = "county" + estimands = ["margin"] + percent_reporting_threshold = 100 + + data_handler = MockLiveDataHandler( + election_id, office_id, geographic_unit_type, estimands, data=va_governor_county_data + ) + + data_handler.shuffle() + current_data = data_handler.get_percent_fully_reported(50) + + preprocessed_data_handler = PreprocessedDataHandler( + election_id, office_id, geographic_unit_type, estimands, {"margin": "margin"}, data=va_governor_county_data + ) + + combined_data_handler = CombinedDataHandler( + preprocessed_data_handler.data, current_data, estimands, geographic_unit_type, handle_unreporting="drop" + ) + + turnout_factor_lower = 0.5 + turnout_factor_upper = 2.0 + reporting_units = combined_data_handler.get_reporting_units( + percent_reporting_threshold, + turnout_factor_lower=turnout_factor_lower, + turnout_factor_upper=turnout_factor_upper, + ) + nonreporting_units = combined_data_handler.get_nonreporting_units( + percent_reporting_threshold, + turnout_factor_lower=turnout_factor_lower, + turnout_factor_upper=turnout_factor_upper, + ) + unexpected_units = combined_data_handler.get_unexpected_units( + percent_reporting_threshold, ["postal_code"], turnout_factor_lower, turnout_factor_upper + ) + + bootstrap_election_model.B = 10 + assert not bootstrap_election_model.ran_bootstrap + unit_predictions = bootstrap_election_model.get_unit_predictions( + reporting_units, nonreporting_units, estimand="margin", unexpected_units=unexpected_units + ) + assert bootstrap_election_model.ran_bootstrap + assert unit_predictions.shape == (nonreporting_units.shape[0], 1) + + +def test_is_top_level_aggregate(bootstrap_election_model): + assert bootstrap_election_model._is_top_level_aggregate(["postal_code"]) + assert bootstrap_election_model._is_top_level_aggregate(["postal_code", "district"]) + + assert not bootstrap_election_model._is_top_level_aggregate(["postal_code", "county_fips"]) + assert not bootstrap_election_model._is_top_level_aggregate(["postal_code", "district", "county_classification"]) + assert not bootstrap_election_model._is_top_level_aggregate(["county_fips"]) + assert not bootstrap_election_model._is_top_level_aggregate([]) + + +def test_aggregate_predictions(bootstrap_election_model): + reporting_units = pd.DataFrame( + [ + ["a", -3, 0.2, 1, 1, 1, 1, 3, 5, 8, "c"], + ["a", 1, 0, 1, 1, 1, 1, 2, 1, 3, "a"], + ["b", 5, -0.1, 1, 1, 1, 1, 8, 3, 11, "a"], + ["c", 3, -0.2, 1, 1, 1, 1, 9, 1, 9, "c"], + ["c", 3, 0.8, 1, 1, 1, 1, 2, 4, 6, "c"], + ], + columns=[ + "postal_code", + "pred_margin", + "results_margin", + "results_weights", + "baseline_weights", + "turnout_factor", + "reporting", + "baseline_dem", + "baseline_gop", + "baseline_turnout", + "district", + ], + ) + nonreporting_units = pd.DataFrame( + [ + ["a", -3, 0.2, 1, 1, 1, 0, 3, 5, 8, "c"], + ["b", 1, 0, 1, 1, 1, 0, 2, 1, 3, "a"], + ["d", 5, -0.1, 1, 1, 1, 0, 8, 3, 11, "c"], + ["d", 3, 0.8, 1, 1, 1, 0, 2, 4, 6, "c"], + ["e", 4, 0.1, 1, 1, 1, 0, 5, 1, 9, "e"], + ["e", 4, 0.1, 1, 1, 1, 0, 5, 1, 9, "a"], + ], + columns=[ + "postal_code", + "pred_margin", + "results_margin", + "results_weights", + "baseline_weights", + "turnout_factor", + "reporting", + "baseline_dem", + "baseline_gop", + "baseline_turnout", + "district", + ], + ) + unexpected_units = pd.DataFrame( + [ + ["a", -3, 0.2, 1, 1, 1, 0, np.nan, np.nan, np.nan, "c"], + ["d", 1, 0, 1, 1, 1, 0, np.nan, np.nan, np.nan, "c"], + ["f", 5, -0.1, 1, 1, 1, 0, np.nan, np.nan, np.nan, "f"], + ["f", 3, 0.8, 1, 1, 1, 0, np.nan, np.nan, np.nan, "f"], + ], + columns=[ + "postal_code", + "pred_margin", + "results_margin", + "results_weights", + "baseline_weights", + "turnout_factor", + "reporting", + "baseline_dem", + "baseline_gop", + "baseline_turnout", + "district", + ], + ) + + bootstrap_election_model.weighted_z_test_pred = np.asarray([1, 1, 1, 1, 1, 1]).reshape(-1, 1) + + # test that is top level aggregate is working + aggregate_predictions = bootstrap_election_model.get_aggregate_predictions( + reporting_units, nonreporting_units, unexpected_units, ["district"], "margin" + ) + with pytest.raises(AttributeError): + bootstrap_election_model.aggregate_baseline_margin + + aggregate_predictions = bootstrap_election_model.get_aggregate_predictions( + reporting_units, nonreporting_units, unexpected_units, ["postal_code"], "margin" + ) + + assert bootstrap_election_model.aggregate_baseline_margin is not None + + assert aggregate_predictions[aggregate_predictions.postal_code == "a"].pred_margin[0] == pytest.approx( + -2.6 / 4 + ) # (-3 (pred) + 0.2 + 0 (reporting margin) + 0.2 (unexpected margin))/ 4 + assert aggregate_predictions[aggregate_predictions.postal_code == "b"].pred_margin[1] == pytest.approx( + 0.9 / 2 + ) # (-0.1 + 1) / 2 + assert aggregate_predictions[aggregate_predictions.postal_code == "c"].pred_margin[2] == pytest.approx( + 0.3 + ) # (0.3 + 0.3) / 2 + assert aggregate_predictions[aggregate_predictions.postal_code == "d"].pred_margin[3] == pytest.approx( + 8 / 3 + ) # 0.8 / 3 + assert aggregate_predictions[aggregate_predictions.postal_code == "e"].pred_margin[4] == pytest.approx( + 4 + ) # (4 + 4) / 2 + assert aggregate_predictions[aggregate_predictions.postal_code == "f"].pred_margin[5] == pytest.approx(0.7 / 2) + + assert aggregate_predictions[aggregate_predictions.postal_code == "a"].results_margin[0] == pytest.approx(0.6 / 4) + assert aggregate_predictions[aggregate_predictions.postal_code == "b"].results_margin[1] == pytest.approx(-0.1 / 2) + assert aggregate_predictions[aggregate_predictions.postal_code == "c"].results_margin[2] == pytest.approx(0.3) + assert aggregate_predictions[aggregate_predictions.postal_code == "d"].results_margin[3] == pytest.approx(0.7 / 3) + assert aggregate_predictions[aggregate_predictions.postal_code == "e"].results_margin[4] == pytest.approx(0.1) + assert aggregate_predictions[aggregate_predictions.postal_code == "f"].results_margin[5] == pytest.approx(0.7 / 2) + + assert aggregate_predictions[aggregate_predictions.postal_code == "a"].reporting[0] == pytest.approx(2) + assert aggregate_predictions[aggregate_predictions.postal_code == "b"].reporting[1] == pytest.approx(1) + assert aggregate_predictions[aggregate_predictions.postal_code == "c"].reporting[2] == pytest.approx(2) + assert aggregate_predictions[aggregate_predictions.postal_code == "d"].reporting[3] == pytest.approx(0) + assert aggregate_predictions[aggregate_predictions.postal_code == "e"].reporting[4] == pytest.approx(0) + assert aggregate_predictions[aggregate_predictions.postal_code == "f"].reporting[5] == pytest.approx(0) + + # test more complicated z predictions + bootstrap_election_model.weighted_z_test_pred = np.asarray([2, 3, 1, 1, 1, 1]).reshape(-1, 1) + + aggregate_predictions = bootstrap_election_model.get_aggregate_predictions( + reporting_units, nonreporting_units, unexpected_units, ["postal_code"], "margin" + ) + + assert aggregate_predictions[aggregate_predictions.postal_code == "a"].pred_margin[0] == pytest.approx( + -2.6 / 5 + ) # now divided by 5 since the a in non reporting has weight 2 + assert aggregate_predictions[aggregate_predictions.postal_code == "b"].pred_margin[1] == pytest.approx(0.9 / 4) + + # test more complicated aggregate (postal code-district) + bootstrap_election_model.weighted_z_test_pred = np.asarray([1, 1, 1, 1, 1, 1]).reshape(-1, 1) + aggregate_predictions = bootstrap_election_model.get_aggregate_predictions( + reporting_units, nonreporting_units, unexpected_units, ["postal_code", "district"], "margin" + ) + + assert aggregate_predictions.shape[0] == 8 + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "a") & (aggregate_predictions.district == "a") + ].pred_margin[0] == pytest.approx(0) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "a") & (aggregate_predictions.district == "c") + ].pred_margin[1] == pytest.approx(-2.6 / 3) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "b") & (aggregate_predictions.district == "a") + ].pred_margin[2] == pytest.approx(0.9 / 2) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "c") & (aggregate_predictions.district == "c") + ].pred_margin[3] == pytest.approx(0.6 / 2) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "d") & (aggregate_predictions.district == "c") + ].pred_margin[4] == pytest.approx(8 / 3) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "e") & (aggregate_predictions.district == "a") + ].pred_margin[5] == pytest.approx(4) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "e") & (aggregate_predictions.district == "e") + ].pred_margin[6] == pytest.approx(4) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "f") & (aggregate_predictions.district == "f") + ].pred_margin[7] == pytest.approx(0.7 / 2) + + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "a") & (aggregate_predictions.district == "a") + ].reporting[0] == pytest.approx(1) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "a") & (aggregate_predictions.district == "c") + ].reporting[1] == pytest.approx(1) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "b") & (aggregate_predictions.district == "a") + ].reporting[2] == pytest.approx(1) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "c") & (aggregate_predictions.district == "c") + ].reporting[3] == pytest.approx(2) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "d") & (aggregate_predictions.district == "c") + ].reporting[4] == pytest.approx(0) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "e") & (aggregate_predictions.district == "a") + ].reporting[5] == pytest.approx(0) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "e") & (aggregate_predictions.district == "e") + ].reporting[6] == pytest.approx(0) + assert aggregate_predictions[ + (aggregate_predictions.postal_code == "f") & (aggregate_predictions.district == "f") + ].reporting[7] == pytest.approx(0) + + +def test_get_quantile(bootstrap_election_model): + bootstrap_election_model.B = 1000 + alpha = 0.95 + lower_q, upper_q = bootstrap_election_model._get_quantiles(alpha) + assert lower_q == pytest.approx(0.025) + assert upper_q == pytest.approx(0.975) + + +def test_get_unit_prediction_intervals(bootstrap_election_model, rng): + reporting_units, nonreporting_units = None, None + n = 10 + B = 10000 + alpha = 0.95 + + s = 2 + bootstrap_election_model.weighted_yz_test_pred = rng.normal(scale=1, size=(n, 1)) + bootstrap_election_model.errors_B_1 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.errors_B_2 = rng.normal(scale=s, size=(n, B)) + lower, upper = bootstrap_election_model.get_unit_prediction_intervals( + reporting_units, nonreporting_units, alpha, "margin" + ) + + lower_alpha = (1 - alpha) / 2 + upper_alpha = 1 - lower_alpha + + lower_q = np.floor(lower_alpha * (B + 1)) / B + upper_q = np.ceil(upper_alpha * (B - 1)) / B + + assert lower.shape == (n, 1) + assert upper.shape == (n, 1) + assert all(upper > lower) + assert all(upper > bootstrap_election_model.weighted_yz_test_pred) + assert all(bootstrap_election_model.weighted_yz_test_pred > lower) + + # sqrt of variance of two errors above + normal_lower_q = sp.stats.norm.ppf(lower_q, scale=np.sqrt(s**2 + s**2)) + normal_upper_q = sp.stats.norm.ppf(upper_q, scale=np.sqrt(s**2 + s**2)) + # arbitrarily one can be off because of sampling variability and rounding + assert ((bootstrap_election_model.weighted_yz_test_pred - normal_upper_q).round() == lower).mean() >= 0.9 + assert ((bootstrap_election_model.weighted_yz_test_pred - normal_lower_q).round() == upper).mean() >= 0.9 + + +def test_get_aggregate_prediction_intervals(bootstrap_election_model, rng): + reporting_units = pd.DataFrame( + [ + ["a", -3, 0.2, 1, 1, 1, 1, 3, 5, 8, "c"], + ["a", 1, 0, 1, 1, 1, 1, 2, 1, 3, "a"], + ["b", 5, -0.1, 1, 1, 1, 1, 8, 3, 11, "a"], + ["c", 3, -0.2, 1, 1, 1, 1, 9, 1, 9, "c"], + ["c", 3, 0.8, 1, 1, 1, 1, 2, 4, 6, "c"], + ], + columns=[ + "postal_code", + "pred_margin", + "results_margin", + "results_weights", + "baseline_weights", + "turnout_factor", + "reporting", + "baseline_dem", + "baseline_gop", + "baseline_turnout", + "district", + ], + ) + reporting_units["results_normalized_margin"] = reporting_units.results_margin / reporting_units.results_weights + nonreporting_units = pd.DataFrame( + [ + ["a", -3, 0.2, 1, 1, 1, 0, 3, 5, 8, "c"], + ["b", 1, 0, 1, 1, 1, 0, 2, 1, 3, "a"], + ["d", 5, -0.1, 1, 1, 1, 0, 8, 3, 11, "c"], + ["d", 3, 0.8, 1, 1, 1, 0, 2, 4, 6, "c"], + ["e", 4, 0.1, 1, 1, 1, 0, 5, 1, 9, "e"], + ["e", 4, 0.1, 1, 1, 1, 0, 5, 1, 9, "a"], + ], + columns=[ + "postal_code", + "pred_margin", + "results_margin", + "results_weights", + "baseline_weights", + "turnout_factor", + "reporting", + "baseline_dem", + "baseline_gop", + "baseline_turnout", + "district", + ], + ) + nonreporting_units["results_normalized_margin"] = ( + nonreporting_units.results_margin / nonreporting_units.results_weights + ) + unexpected_units = pd.DataFrame( + [ + ["a", -3, 0.2, 1, 1, 1, 0, np.nan, np.nan, np.nan, "c"], + ["d", 1, 0, 1, 1, 1, 0, np.nan, np.nan, np.nan, "c"], + ["f", 5, -0.1, 1, 1, 1, 0, np.nan, np.nan, np.nan, "f"], + ["f", 3, 0.8, 1, 1, 1, 0, np.nan, np.nan, np.nan, "f"], + ], + columns=[ + "postal_code", + "pred_margin", + "results_margin", + "results_weights", + "baseline_weights", + "turnout_factor", + "reporting", + "baseline_dem", + "baseline_gop", + "baseline_turnout", + "district", + ], + ) + unexpected_units["results_normalized_margin"] = unexpected_units.results_margin / unexpected_units.results_weights + + n = nonreporting_units.shape[0] + B = 10 + s = 1.0 + bootstrap_election_model.B = B + bootstrap_election_model.errors_B_1 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.errors_B_2 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.errors_B_3 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.errors_B_4 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.weighted_z_test_pred = rng.normal(scale=s, size=(n, 1)) + bootstrap_election_model.weighted_yz_test_pred = rng.normal(scale=s, size=(n, 1)) + + lower, upper = bootstrap_election_model.get_aggregate_prediction_intervals( + reporting_units, nonreporting_units, unexpected_units, ["postal_code"], 0.95, None, None + ) + + assert lower.shape == (6, 1) + assert upper.shape == (6, 1) + + assert lower[2] == pytest.approx(upper[2]) # since c is fully reporting + assert lower[5] == pytest.approx(upper[5]) # since all f units are unexpected + assert all(lower <= upper) + + # test with more complicated aggregate + lower, upper = bootstrap_election_model.get_aggregate_prediction_intervals( + reporting_units, nonreporting_units, unexpected_units, ["postal_code", "district"], 0.95, None, None + ) + + assert lower.shape == (8, 1) + assert upper.shape == (8, 1) + + assert lower[0] == pytest.approx(upper[0]) # a-a is fully reporting + assert lower[3] == pytest.approx(upper[3]) # c-c is fully reporting + assert lower[7] == pytest.approx(upper[7]) # c-c is fully reporting + assert lower[7] == pytest.approx(upper[7]) # f-f is fully unexpected + assert all(lower <= upper) + + +def test_get_national_summary_estimates(bootstrap_election_model, rng): + n = 10 + s = 2.0 + B = 20 + bootstrap_election_model.B = B + bootstrap_election_model.aggregate_error_B_1 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.aggregate_error_B_2 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.aggregate_error_B_3 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.aggregate_error_B_4 = rng.normal(scale=s, size=(n, B)) + bootstrap_election_model.aggregate_perc_margin_total = rng.normal(scale=s, size=(n, 1)) + + nat_sum_estimates = bootstrap_election_model.get_national_summary_estimates(None, None, 0, 0.95) + assert "margin" in nat_sum_estimates + assert len(nat_sum_estimates["margin"]) == 3 + assert nat_sum_estimates["margin"][0] >= nat_sum_estimates["margin"][1] + assert nat_sum_estimates["margin"][0] <= nat_sum_estimates["margin"][2] + + # testing adding to base + base_to_add = rng.random() + nat_sum_estimates_w_base = bootstrap_election_model.get_national_summary_estimates(None, None, base_to_add, 0.95) + assert nat_sum_estimates_w_base["margin"][0] == pytest.approx(nat_sum_estimates["margin"][0] + base_to_add) + assert nat_sum_estimates_w_base["margin"][1] == pytest.approx(nat_sum_estimates["margin"][1] + base_to_add) + assert nat_sum_estimates_w_base["margin"][2] == pytest.approx(nat_sum_estimates["margin"][2] + base_to_add) + + # test calling races + states_called = {i: 1 for i in range(n)} + nat_sum_data_dict = {i: 3 for i in range(n)} + nat_sum_data_dict[1] = 7 + nat_sum_estimates = bootstrap_election_model.get_national_summary_estimates( + nat_sum_data_dict, states_called, 0, 0.95 + ) + assert nat_sum_estimates["margin"][0] == pytest.approx(34) + assert nat_sum_estimates["margin"][1] == pytest.approx(34) + assert nat_sum_estimates["margin"][2] == pytest.approx(34) + + states_called = {i: 0 for i in range(n)} + nat_sum_estimates = bootstrap_election_model.get_national_summary_estimates( + nat_sum_data_dict, states_called, 0, 0.95 + ) + assert nat_sum_estimates["margin"][0] == pytest.approx(0) + assert nat_sum_estimates["margin"][1] == pytest.approx(0) + assert nat_sum_estimates["margin"][2] == pytest.approx(0) + + states_called = {i: 0 for i in range(n)} + states_called[1] = 1 + nat_sum_estimates = bootstrap_election_model.get_national_summary_estimates( + nat_sum_data_dict, states_called, 0, 0.95 + ) + assert nat_sum_estimates["margin"][0] == pytest.approx(7) + assert nat_sum_estimates["margin"][1] == pytest.approx(7) + assert nat_sum_estimates["margin"][2] == pytest.approx(7) + + # test exceptions + states_called = {i: 0 for i in range(n - 1)} + with pytest.raises(BootstrapElectionModelException): + nat_sum_estimates = bootstrap_election_model.get_national_summary_estimates( + nat_sum_data_dict, states_called, 0, 0.95 + ) + + nat_sum_data_dict = {i: 3 for i in range(n - 1)} + with pytest.raises(BootstrapElectionModelException): + nat_sum_estimates = bootstrap_election_model.get_national_summary_estimates( + nat_sum_data_dict, states_called, 0, 0.95 + ) + + +# TODO: write unit test for combined aggregation (e.g. prediction, intervals, aggregate etc.) +# also where an unexpected or non reporting unit starts ahead of an reporting unit +def test_total_aggregation(bootstrap_election_model, va_assembly_precinct_data): + election_id = "2017-11-07_VA_G" + office_id = "Y" + geographic_unit_type = "precinct-district" + estimands = ["margin"] + estimands_baseline = {"margin": "margin"} + unexpected_units = 50 + alpha = 0.9 + percent_reporting_threshold = 100 + + live_data_handler = MockLiveDataHandler( + election_id, + office_id, + geographic_unit_type, + estimands, + unexpected_units=unexpected_units, + data=va_assembly_precinct_data, + ) + + live_data_handler.shuffle() + current_data = live_data_handler.get_percent_fully_reported(400) + + preprocessed_data_handler = PreprocessedDataHandler( + election_id, office_id, geographic_unit_type, estimands, estimands_baseline, data=va_assembly_precinct_data + ) + + combined_data_handler = CombinedDataHandler( + preprocessed_data_handler.data, current_data, estimands, geographic_unit_type + ) + + reporting_units = combined_data_handler.get_reporting_units(percent_reporting_threshold, 0.5, 1.5) + nonreporting_units = combined_data_handler.get_nonreporting_units(percent_reporting_threshold, 0.5, 1.5) + unexpected_units = combined_data_handler.get_unexpected_units( + percent_reporting_threshold, + aggregates=["postal_code", "district"], + turnout_factor_lower=0.5, + turnout_factor_upper=1.5, + ) + + bootstrap_election_model.B = 300 + + unit_predictions = bootstrap_election_model.get_unit_predictions( + reporting_units, nonreporting_units, "margin", unexpected_units=unexpected_units + ) + unit_lower, unit_upper = bootstrap_election_model.get_unit_prediction_intervals( + reporting_units, nonreporting_units, alpha, "margin" + ) + + assert unit_predictions.shape[0] == unit_lower.shape[0] + assert unit_predictions.shape[0] == unit_upper.shape[0] + assert all(unit_lower.flatten() <= unit_predictions.flatten()) + assert all(unit_predictions.flatten() <= unit_upper.flatten()) + + reporting_units["pred_margin"] = reporting_units.results_margin + nonreporting_units["pred_margin"] = unit_predictions + unexpected_units["pred_margin"] = unexpected_units.results_margin + + district_predictions = bootstrap_election_model.get_aggregate_predictions( + reporting_units, nonreporting_units, unexpected_units, ["postal_code", "district"], "margin" + ) + district_lower, district_upper = bootstrap_election_model.get_aggregate_prediction_intervals( + reporting_units, nonreporting_units, unexpected_units, ["postal_code", "district"], alpha, None, "margin" + ) + district_predictions.shape[0] == len(preprocessed_data_handler.data.district.unique()) + + assert district_predictions.shape[0] == district_lower.shape[0] + assert district_predictions.shape[0] == district_upper.shape[0] + + assert all(district_lower.flatten() <= district_predictions.pred_margin + TOL) + assert all(district_predictions.pred_margin <= district_upper.flatten() + TOL) diff --git a/tests/test_client.py b/tests/test_client.py index 802d20b4..93402322 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -618,7 +618,6 @@ def test_get_estimates_some_reporting(model_client, va_governor_county_data, va_ "upper_0.9_turnout", "results_turnout", ] - assert result["state_data"]["postal_code"][0] == "VA" assert result["state_data"]["pred_turnout"][0] == 2587563.0 assert result["state_data"]["results_turnout"][0] == 1570077.0