From 3e104b5ba02ab4f900c133ab361ceec149d6f0bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Thu, 27 Jun 2024 18:42:26 -0600 Subject: [PATCH] support multiple seasonalities in mstl_decomposition (#861) --- dev/environment.yml | 4 ++-- dev/local_environment.yml | 2 +- nbs/src/core/models.ipynb | 9 +++++--- nbs/src/feature_engineering.ipynb | 32 +++++++++++++++++++++++++--- statsforecast/_modidx.py | 2 ++ statsforecast/feature_engineering.py | 16 ++++++++------ statsforecast/models.py | 10 ++++++--- 7 files changed, 56 insertions(+), 19 deletions(-) diff --git a/dev/environment.yml b/dev/environment.yml index 6b7d08234..a2e98f658 100644 --- a/dev/environment.yml +++ b/dev/environment.yml @@ -7,7 +7,7 @@ dependencies: - matplotlib - numba>=0.55.0 - numpy>=1.21.6 - - pandas>=1.3.5,<2.2 + - pandas>=1.3.5 - pyspark>=3.3 - pip - prophet @@ -23,7 +23,7 @@ dependencies: - fugue[dask,ray] - nbdev - plotly-resampler - - polars + - polars[numpy]>=0.0.0rc0 - supersmoother - tqdm - utilsforecast>=0.1.4 diff --git a/dev/local_environment.yml b/dev/local_environment.yml index 474d5ef27..474bbb76d 100644 --- a/dev/local_environment.yml +++ b/dev/local_environment.yml @@ -21,7 +21,7 @@ dependencies: - datasetsforecast - nbdev - plotly-resampler - - polars + - polars[numpy]>=0.0.0rc0 - supersmoother - tqdm - utilsforecast>=0.1.4 diff --git a/nbs/src/core/models.ipynb b/nbs/src/core/models.ipynb index 00fa60e08..e38c26432 100644 --- a/nbs/src/core/models.ipynb +++ b/nbs/src/core/models.ipynb @@ -9119,7 +9119,7 @@ "outputs": [], "source": [ "#| exporti\n", - "def _predict_mstl_seas(mstl_ob, h, season_length):\n", + "def _predict_mstl_components(mstl_ob, h, season_length):\n", " seasoncolumns = mstl_ob.filter(regex='seasonal*').columns\n", " nseasons = len(seasoncolumns)\n", " seascomp = np.full((h, nseasons), np.nan)\n", @@ -9128,8 +9128,11 @@ " mp = seasonal_periods[i]\n", " colname = seasoncolumns[i]\n", " seascomp[:, i] = np.tile(mstl_ob[colname].values[-mp:], trunc(1 + (h-1)/mp))[:h]\n", - " lastseas = seascomp.sum(axis=1)\n", - " return lastseas" + " return seascomp\n", + "\n", + "def _predict_mstl_seas(mstl_ob, h, season_length):\n", + " seascomp = _predict_mstl_components(mstl_ob, h, season_length)\n", + " return seascomp.sum(axis=1)" ] }, { diff --git a/nbs/src/feature_engineering.ipynb b/nbs/src/feature_engineering.ipynb index 7b33c0ae1..8b5be9e51 100644 --- a/nbs/src/feature_engineering.ipynb +++ b/nbs/src/feature_engineering.ipynb @@ -1,5 +1,17 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "eb55820a-551f-45e6-b6a8-c8f2d868aa32", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, @@ -41,7 +53,7 @@ "\n", "from statsforecast import StatsForecast\n", "from statsforecast.core import _id_as_idx\n", - "from statsforecast.models import MSTL, _predict_mstl_seas" + "from statsforecast.models import MSTL, _predict_mstl_components" ] }, { @@ -92,11 +104,13 @@ " train_features = []\n", " future_features = []\n", " df_constructor = type(df)\n", + " seas_cols = [c for c in sf.fitted_[0, 0].model_.columns if c.startswith('seasonal')]\n", " for fitted_model in sf.fitted_[:, 0]:\n", - " train_features.append(fitted_model.model_[['trend', 'seasonal']])\n", + " train_features.append(fitted_model.model_[['trend'] + seas_cols])\n", + " seas_comp = _predict_mstl_components(fitted_model.model_, h, model.season_length)\n", " future_df = df_constructor({\n", " 'trend': fitted_model.trend_forecaster.predict(h)['mean'],\n", - " 'seasonal': _predict_mstl_seas(fitted_model.model_, h, model.season_length)\n", + " **dict(zip(seas_cols, seas_comp.T)),\n", " })\n", " future_features.append(future_df)\n", " train_features = vertical_concat(train_features, match_categories=False)\n", @@ -185,6 +199,18 @@ "with_estimate = train_df_pl.with_columns(estimate=pl.col('trend') + pl.col('seasonal'))\n", "assert smape(with_estimate, models=['estimate'])['estimate'].mean() < 0.1" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb35b3f1-9c67-4422-a4db-f5b51a246dac", + "metadata": {}, + "outputs": [], + "source": [ + "model = MSTL(season_length=[7, 28])\n", + "train_df, X_df = mstl_decomposition(series, model, 'D', horizon)\n", + "assert train_df.columns.intersection(X_df.columns).tolist() == ['unique_id', 'ds', 'trend', 'seasonal7', 'seasonal28']" + ] } ], "metadata": { diff --git a/statsforecast/_modidx.py b/statsforecast/_modidx.py index c96b4cbfe..fbefefec3 100644 --- a/statsforecast/_modidx.py +++ b/statsforecast/_modidx.py @@ -728,6 +728,8 @@ 'statsforecast.models._intervals': ('src/core/models.html#_intervals', 'statsforecast/models.py'), 'statsforecast.models._optimized_ses_forecast': ( 'src/core/models.html#_optimized_ses_forecast', 'statsforecast/models.py'), + 'statsforecast.models._predict_mstl_components': ( 'src/core/models.html#_predict_mstl_components', + 'statsforecast/models.py'), 'statsforecast.models._predict_mstl_seas': ( 'src/core/models.html#_predict_mstl_seas', 'statsforecast/models.py'), 'statsforecast.models._probability': ('src/core/models.html#_probability', 'statsforecast/models.py'), diff --git a/statsforecast/feature_engineering.py b/statsforecast/feature_engineering.py index 4f23f82ea..af753d1db 100644 --- a/statsforecast/feature_engineering.py +++ b/statsforecast/feature_engineering.py @@ -3,7 +3,7 @@ # %% auto 0 __all__ = ['mstl_decomposition'] -# %% ../nbs/src/feature_engineering.ipynb 2 +# %% ../nbs/src/feature_engineering.ipynb 3 from typing import Tuple import pandas as pd @@ -18,9 +18,9 @@ from . import StatsForecast from .core import _id_as_idx -from .models import MSTL, _predict_mstl_seas +from .models import MSTL, _predict_mstl_components -# %% ../nbs/src/feature_engineering.ipynb 3 +# %% ../nbs/src/feature_engineering.ipynb 4 def mstl_decomposition( df: DataFrame, model: MSTL, @@ -61,14 +61,16 @@ def mstl_decomposition( train_features = [] future_features = [] df_constructor = type(df) + seas_cols = [c for c in sf.fitted_[0, 0].model_.columns if c.startswith("seasonal")] for fitted_model in sf.fitted_[:, 0]: - train_features.append(fitted_model.model_[["trend", "seasonal"]]) + train_features.append(fitted_model.model_[["trend"] + seas_cols]) + seas_comp = _predict_mstl_components( + fitted_model.model_, h, model.season_length + ) future_df = df_constructor( { "trend": fitted_model.trend_forecaster.predict(h)["mean"], - "seasonal": _predict_mstl_seas( - fitted_model.model_, h, model.season_length - ), + **dict(zip(seas_cols, seas_comp.T)), } ) future_features.append(future_df) diff --git a/statsforecast/models.py b/statsforecast/models.py index 0e6f36757..f1623f299 100644 --- a/statsforecast/models.py +++ b/statsforecast/models.py @@ -4962,7 +4962,7 @@ def forecast( return res # %% ../nbs/src/core/models.ipynb 363 -def _predict_mstl_seas(mstl_ob, h, season_length): +def _predict_mstl_components(mstl_ob, h, season_length): seasoncolumns = mstl_ob.filter(regex="seasonal*").columns nseasons = len(seasoncolumns) seascomp = np.full((h, nseasons), np.nan) @@ -4975,8 +4975,12 @@ def _predict_mstl_seas(mstl_ob, h, season_length): seascomp[:, i] = np.tile( mstl_ob[colname].values[-mp:], trunc(1 + (h - 1) / mp) )[:h] - lastseas = seascomp.sum(axis=1) - return lastseas + return seascomp + + +def _predict_mstl_seas(mstl_ob, h, season_length): + seascomp = _predict_mstl_components(mstl_ob, h, season_length) + return seascomp.sum(axis=1) # %% ../nbs/src/core/models.ipynb 364 class MSTL(_TS):