support different column names for ids, times and targets

Nixtla · Dec 5, 2023 · 223d554 · 223d554
1 parent 3e0e507
commit 223d554
Show file tree

Hide file tree

Showing 9 changed files with 878 additions and 302 deletions.
diff --git a/action_files/test_dask.py b/action_files/test_dask.py
@@ -13,7 +13,11 @@ def sample_data(local_data):
     return to_distributed(series), to_distributed(X_df)
 
 def test_dask_flow(horizon, sample_data, n_series):
-    pipeline(*sample_data, n_series, horizon)
+    renamer = {'unique_id': 'id', 'ds': 'time', 'y': 'value'}
+    series, X_df = sample_data
+    series = series.rename(columns=renamer)
+    X_df = X_df.rename(columns=renamer)
+    pipeline(series, X_df, n_series, horizon, id_col='id', time_col='time', target_col='value')
 
 def test_dask_flow_with_level(horizon, sample_data, n_series):
     pipeline_with_level(*sample_data, n_series, horizon)
diff --git a/action_files/utils.py b/action_files/utils.py
@@ -21,7 +21,7 @@
 )
 
 
-def pipeline(series, X_df, n_series, horizon):
+def pipeline(series, X_df, n_series, horizon, id_col='unique_id', time_col='ds', target_col='y'):
     models = [
 		ADIDA(),
         AutoARIMA(season_length=7),
@@ -45,14 +45,18 @@ def pipeline(series, X_df, n_series, horizon):
         models=models,
         freq='D',
     )
-    forecast = fa.as_pandas(sf.forecast(df=series, h=horizon, X_df=X_df))
+    forecast = fa.as_pandas(
+        sf.forecast(df=series, h=horizon, X_df=X_df, id_col=id_col, time_col=time_col, target_col=target_col)
+    )
     print(forecast)
     assert forecast.shape == (n_series * horizon, len(models) + 2)
 
     n_windows = 2
-    cv = fa.as_pandas(sf.cross_validation(df=series, n_windows=n_windows, h=horizon))
+    cv = fa.as_pandas(
+        sf.cross_validation(df=series, n_windows=n_windows, h=horizon, id_col=id_col, time_col=time_col, target_col=target_col)
+    )
     assert cv.shape[0] == n_series * n_windows * horizon
-    assert cv.columns.tolist() == ['unique_id', 'ds', 'cutoff', 'y'] + [m.alias for m in models]
+    assert cv.columns.tolist() == [id_col, time_col, 'cutoff', target_col] + [m.alias for m in models]
 
 def pipeline_with_level(series, X_df, n_series, horizon):
     models = [

diff --git a/dev/environment.yml b/dev/environment.yml
@@ -23,4 +23,4 @@ dependencies:
     - polars
     - supersmoother
     - tqdm
-    - utilsforecast>=0.0.19
+    - utilsforecast>=0.0.20
diff --git a/dev/local_environment.yml b/dev/local_environment.yml
@@ -21,4 +21,4 @@ dependencies:
     - polars
     - supersmoother
     - tqdm
-    - utilsforecast>=0.0.19
+    - utilsforecast>=0.0.20
diff --git a/nbs/src/core/core.ipynb b/nbs/src/core/core.ipynb