diff --git a/CHANGELOG.md b/CHANGELOG.md index 18915c73..f5d7f2f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +# 2.2.2 (11/5/2024) +- fix: missing `est_correction` column in `VersionedResults` `DataFrame` in the event of bad data [#131](https://github.com/washingtonpost/elex-live-model/pull/131) + # 2.2.1 (11/1/2024) - chore: downgrade botocore and s3transfer as per live team dependency [#128](https://github.com/washingtonpost/elex-live-model/pull/128) diff --git a/src/elexmodel/handlers/data/VersionedData.py b/src/elexmodel/handlers/data/VersionedData.py index 77a4a7e3..231bfc29 100644 --- a/src/elexmodel/handlers/data/VersionedData.py +++ b/src/elexmodel/handlers/data/VersionedData.py @@ -127,6 +127,8 @@ def compute_estimated_margin(df): "percent_expected_vote": np.arange(101), "nearest_observed_vote": np.nan * np.ones(101), "est_margin": np.nan * np.ones(101), + "est_correction": np.nan * np.ones(101), + "error_type": "non-monotone percent expected vote", } ) @@ -152,6 +154,8 @@ def compute_estimated_margin(df): "percent_expected_vote": np.arange(101), "nearest_observed_vote": np.nan * np.ones(101), "est_margin": np.nan * np.ones(101), + "est_correction": np.nan * np.ones(101), + "error_type": "batch_margin", } ) @@ -196,10 +200,17 @@ def compute_estimated_margin(df): "nearest_observed_vote": percent_vote[np.clip(obs_indices + 1, 0, len(percent_vote) - 1)], "est_margin": est_margins, "est_correction": norm_margin[-1] - est_margins, + "error_type": "none", } ) results = results.groupby("geographic_unit_fips").apply(compute_estimated_margin).reset_index() + + for error_type in sorted(set(results["error_type"])): + if error_type == "none": + continue + category_error_type = results[results["error_type"] == error_type].geographic_unit_fips.unique() + LOG.info(f"# of versioned units with {error_type} error: {len(category_error_type)}") return results def get_versioned_predictions(self, filepath=None):