Skip to content

Commit

Permalink
support different column names for ids, times and targets
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez committed Dec 5, 2023
1 parent 3e0e507 commit 223d554
Show file tree
Hide file tree
Showing 9 changed files with 878 additions and 302 deletions.
6 changes: 5 additions & 1 deletion action_files/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ def sample_data(local_data):
return to_distributed(series), to_distributed(X_df)

def test_dask_flow(horizon, sample_data, n_series):
pipeline(*sample_data, n_series, horizon)
renamer = {'unique_id': 'id', 'ds': 'time', 'y': 'value'}
series, X_df = sample_data
series = series.rename(columns=renamer)
X_df = X_df.rename(columns=renamer)
pipeline(series, X_df, n_series, horizon, id_col='id', time_col='time', target_col='value')

def test_dask_flow_with_level(horizon, sample_data, n_series):
pipeline_with_level(*sample_data, n_series, horizon)
12 changes: 8 additions & 4 deletions action_files/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)


def pipeline(series, X_df, n_series, horizon):
def pipeline(series, X_df, n_series, horizon, id_col='unique_id', time_col='ds', target_col='y'):
models = [
ADIDA(),
AutoARIMA(season_length=7),
Expand All @@ -45,14 +45,18 @@ def pipeline(series, X_df, n_series, horizon):
models=models,
freq='D',
)
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon, X_df=X_df))
forecast = fa.as_pandas(
sf.forecast(df=series, h=horizon, X_df=X_df, id_col=id_col, time_col=time_col, target_col=target_col)
)
print(forecast)
assert forecast.shape == (n_series * horizon, len(models) + 2)

n_windows = 2
cv = fa.as_pandas(sf.cross_validation(df=series, n_windows=n_windows, h=horizon))
cv = fa.as_pandas(
sf.cross_validation(df=series, n_windows=n_windows, h=horizon, id_col=id_col, time_col=time_col, target_col=target_col)
)
assert cv.shape[0] == n_series * n_windows * horizon
assert cv.columns.tolist() == ['unique_id', 'ds', 'cutoff', 'y'] + [m.alias for m in models]
assert cv.columns.tolist() == [id_col, time_col, 'cutoff', target_col] + [m.alias for m in models]

def pipeline_with_level(series, X_df, n_series, horizon):
models = [
Expand Down
2 changes: 1 addition & 1 deletion dev/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ dependencies:
- polars
- supersmoother
- tqdm
- utilsforecast>=0.0.19
- utilsforecast>=0.0.20
2 changes: 1 addition & 1 deletion dev/local_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ dependencies:
- polars
- supersmoother
- tqdm
- utilsforecast>=0.0.19
- utilsforecast>=0.0.20
343 changes: 248 additions & 95 deletions nbs/src/core/core.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 223d554

Please sign in to comment.