Skip to content

Commit

Permalink
Fixed multiseries prediction interval labels (#4377)
Browse files Browse the repository at this point in the history
* Inital commit

* Updated test

* Update release notes

* Added coverage for column labels
  • Loading branch information
christopherbunn authored Jan 25, 2024
1 parent 8349680 commit c7843cd
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 9 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Release Notes
* Enhancements
* Fixes
* Fixed bug in `_downcast_nullable_y` causing woodwork initialization issues :pr:`4369`
* Fixed multiseries prediction interval labels :pr:`4377`
* Changes
* Pinned scipy version to under 1.12.0 :pr:`4380`
* Documentation Changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def transform(
# Convert the list to a DataFrame
# For multiseries, return tuple[pd.DataFrame, pd.Dataframe] where each column is a series_id
detrending_df = pd.DataFrame(detrending_list).T
detrending_df.columns = y.columns
return X, detrending_df

def inverse_transform(
Expand Down
7 changes: 1 addition & 6 deletions evalml/pipelines/time_series_regression_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ def _get_series_intervals(intervals, residuals, trend_pred_intervals, y):

if self.problem_type == ProblemTypes.MULTISERIES_TIME_SERIES_REGRESSION:
from evalml.pipelines.utils import (
MULTISERIES_SEPARATOR_SYMBOL,
stack_data,
unstack_multiseries,
)
Expand Down Expand Up @@ -271,11 +270,7 @@ def _get_series_intervals(intervals, residuals, trend_pred_intervals, y):

# `pred_intervals` are in {series_id: {coverage_label: bound_value}} form
for series_id, series_intervals in pred_intervals.items():
series_id_target_name = (
self.input_target_name
+ MULTISERIES_SEPARATOR_SYMBOL
+ str(series_id)
)
series_id_target_name = str(series_id)
series_id_prediction_intervals = _get_series_intervals(
series_intervals,
residuals[series_id],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,15 @@ def test_stl_fit_transform_in_sample(

stl = STLDecomposer(period=period)

series_id_columns = ["series_1", "series_2"]
if variateness == "multivariate":
y.columns = series_id_columns

X_t, y_t = stl.fit_transform(X, y)

if variateness == "multivariate":
assert all(y_t.columns == series_id_columns)

# If y_t is a pd.Series, give it columns
if isinstance(y_t, pd.Series):
y_t = y_t.to_frame()
Expand Down Expand Up @@ -179,7 +186,11 @@ def test_stl_fit_transform_in_sample(
# Check the trend to make sure STL worked properly
pd.testing.assert_series_equal(
pd.Series(expected_trend),
pd.Series(stl.trends[0]),
pd.Series(
stl.trends["series_1"]
if variateness == "multivariate"
else stl.trends[0],
),
check_exact=False,
check_index=False,
check_names=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def test_time_series_pipeline_get_prediction_intervals(
)
X_train, y_train = X[:65], y[:65]
X_validation, y_validation = X[65:], y[65:]
mock_X, _ = unstack_multiseries(
mock_X, mock_y = unstack_multiseries(
X_train,
y_train,
series_id="series_id",
Expand All @@ -356,7 +356,7 @@ def test_time_series_pipeline_get_prediction_intervals(
)
mock_transform_return_value = (
mock_X,
pd.DataFrame(np.random.rand(13, 5)),
mock_y,
)
with patch(
"evalml.pipelines.components.transformers.preprocessing.stl_decomposer.STLDecomposer.transform",
Expand Down

0 comments on commit c7843cd

Please sign in to comment.