From a8d0dce23417a784367c304afa5db74f8226d37c Mon Sep 17 00:00:00 2001 From: Becca McBrayer Date: Thu, 20 Jan 2022 11:24:23 -0500 Subject: [PATCH] Ensure prediction explanations are json-serializable (#3262) * Add extra encoding to prediction explanations --- docs/source/release_notes.rst | 1 + .../_user_interface.py | 5 ++ .../test_explainers.py | 46 ++++++++++++++++--- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 4b2dd64812..00fe962505 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -13,6 +13,7 @@ * Removed empty cell in text_input.ipynb :pr:`3234` * Removed potential prediction explanations failure when pipelines predicted a class with probability 1 :pr:`3221` * Dropped NaNs before partial dependence grid generation :pr:`3235` + * Allowed prediction explanations to be json-serializable :pr:`3262` * Fixed bug where ``InvalidTargetDataCheck`` would not check time series regression targets :pr:`3251` * Fixed bug in ``are_datasets_separated_by_gap_time_index`` :pr:`3256` * Changes diff --git a/evalml/model_understanding/prediction_explanations/_user_interface.py b/evalml/model_understanding/prediction_explanations/_user_interface.py index 6abf8540c7..0fa25b7a02 100644 --- a/evalml/model_understanding/prediction_explanations/_user_interface.py +++ b/evalml/model_understanding/prediction_explanations/_user_interface.py @@ -68,6 +68,9 @@ def _make_rows( feature_value = "{:.2f}".format(feature_value) else: feature_value = str(feature_value) + + feature_value = _make_json_serializable(feature_value) + row = [feature_name, feature_value, display_text] if include_explainer_values: explainer_value = explainer_values[feature_name][0] @@ -117,6 +120,8 @@ def _make_json_serializable(value): value = int(value) else: value = float(value) + elif isinstance(value, pd.Timestamp): + value = str(value) return value diff --git a/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_explainers.py b/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_explainers.py index 68851bc8ba..b084fa771a 100644 --- a/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_explainers.py +++ b/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_explainers.py @@ -1303,7 +1303,7 @@ def test_categories_aggregated_text( "CUC", "Mastercard", 24900, - pd.Timestamp("2019-01-01 00:12:26"), + str(pd.Timestamp("2019-01-01 00:12:26")), } assert explanation["drill_down"].keys() == {"currency", "provider", "datetime"} assert ( @@ -1368,7 +1368,7 @@ def test_categories_aggregated_date_ohe( "datetime", } assert set(explanation["feature_values"]) == { - pd.Timestamp("2019-01-01 00:12:26"), + str(pd.Timestamp("2019-01-01 00:12:26")), "Mastercard", "CUC", 24900, @@ -1442,7 +1442,11 @@ def test_categories_aggregated_pca_dag( assert all( [ f in explanation["feature_values"] - for f in [pd.Timestamp("2019-01-01 00:12:26"), "Mastercard", "CUC"] + for f in [ + str(pd.Timestamp("2019-01-01 00:12:26")), + "Mastercard", + "CUC", + ] ] ) assert explanation["drill_down"].keys() == {"currency", "provider", "datetime"} @@ -1567,7 +1571,7 @@ def test_categories_aggregated_when_some_are_dropped( "CUC", "Mastercard", 24900, - pd.Timestamp("2019-01-01 00:12:26"), + str(pd.Timestamp("2019-01-01 00:12:26")), } assert explanation["drill_down"].keys() == {"currency", "provider", "datetime"} assert ( @@ -2043,7 +2047,10 @@ def test_explain_predictions_report_shows_original_value_if_possible( top_k_features=20, algorithm=algorithm, ) - expected_feature_values = set(X.ww.iloc[0, :].tolist()) + X_dt = X.copy() + X_dt.ww.init() + X_dt["datetime"] = X_dt["datetime"].astype(str) + expected_feature_values = set(X_dt.ww.iloc[0, :].tolist()) for explanation in report["explanations"][0]["explanations"]: assert set(explanation["feature_names"]) == set(X.columns) assert set(explanation["feature_values"]) == expected_feature_values @@ -2106,11 +2113,14 @@ def test_explain_predictions_best_worst_report_shows_original_value_if_possible( algorithm=algorithm, ) + X_dt = X.copy() + X_dt.ww.init() + X_dt["datetime"] = X_dt["datetime"].astype(str) for index, explanation in enumerate(report["explanations"]): for exp in explanation["explanations"]: assert set(exp["feature_names"]) == set(X.columns) assert set(exp["feature_values"]) == set( - X.ww.iloc[explanation["predicted_values"]["index_id"], :] + X_dt.ww.iloc[explanation["predicted_values"]["index_id"], :] ) X_null = X.ww.copy() @@ -2136,6 +2146,30 @@ def test_explain_predictions_best_worst_report_shows_original_value_if_possible( assert np.isnan(feature_value) +@pytest.mark.parametrize("algorithm", algorithms) +def test_explain_predictions_best_worst_json( + algorithm, fraud_100, has_minimal_dependencies +): + if has_minimal_dependencies and algorithm == "lime": + pytest.skip("Skipping because lime is a non-core dependency") + pipeline = BinaryClassificationPipeline( + [ + "Natural Language Featurizer", + "DateTime Featurizer", + "One Hot Encoder", + "Logistic Regression Classifier", + ] + ) + X, y = fraud_100 + pipeline.fit(X, y) + + report = explain_predictions_best_worst( + pipeline, X, y, algorithm=algorithm, output_format="dict" + ) + json_output = json.dumps(report) + assert isinstance(json_output, str) + + def test_explain_predictions_invalid_algorithm(): pipeline = MagicMock() input_features = pd.DataFrame({"a": [5, 6, 1, 2, 3, 4, 5, 6, 7, 4]})