Skip to content

Commit

Permalink
Merge branch 'mlflow_docs' of github.com:Nixtla/statsforecast into ml…
Browse files Browse the repository at this point in the history
…flow_docs
  • Loading branch information
kvnkho committed Oct 21, 2023
2 parents 3e05112 + 9357acf commit 5b4969a
Show file tree
Hide file tree
Showing 88 changed files with 608,439 additions and 55,011 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/build-docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: "build-docs"
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
workflow_dispatch:

defaults:
run:
shell: bash

jobs:
build-docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: 'recursive'
- uses: actions/setup-python@v4
with:
cache: "pip"
python-version: '3.10'
cache-dependency-path: settings.ini
- name: Build docs
run: |
set -ux
python -m pip install --upgrade pip
pip install -Uq nbdev
pip install -e ".[dev]"
mkdir nbs/_extensions
cp -r docs-scripts/mintlify/ nbs/_extensions/
python docs-scripts/update-quarto.py
echo "procs = nbdev_plotly.plotly:PlotlyProc" >> settings.ini
nbdev_docs
- name: Apply final formats
run: bash ./docs-scripts/docs-final-formatting.bash
- name: Copy over necessary assets
run: |
cp nbs/mint.json _docs/mint.json
cp docs-scripts/imgs/* _docs/
- name: Deploy to Mintlify Docs
if: github.event_name == 'push'
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_branch: docs
publish_dir: ./_docs
# The following lines assign commit authorship to the official GH-Actions bot for deploys to `docs` branch.
# You can swap them out with your own user credentials.
user_name: github-actions[bot]
user_email: 41898282+github-actions[bot]@users.noreply.github.com
6 changes: 5 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

env:
NIXTLA_NUMBA_CACHE: '1'

jobs:
nb-sync:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -62,6 +65,7 @@ jobs:

run-tests:
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
Expand All @@ -87,7 +91,7 @@ jobs:
- name: Run integration tests
run: |
pip install ".[dev]" pytest
pytest action_files
pytest --durations=0 action_files
test-m3-performance:
runs-on: ubuntu-latest
Expand Down
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@ dist
.vscode
.idea
*.gif
*.icloud
*.csv
*/data/*
*.parquet
tmp
_docs/
_proc/
sidebar.yml
.DS_Store
.gitattributes
.gitconfig
nbs/.last_checked
.venv
.idea
mlruns/
mlruns/
.luarc.json
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "docs-scripts"]
path = docs-scripts
url = [email protected]:Nixtla/docs.git
branch = scripts
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,4 +259,4 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d

<!-- ALL-CONTRIBUTORS-LIST:END -->

This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
40 changes: 40 additions & 0 deletions action_files/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import numpy as np
import pandas as pd
import pytest

from statsforecast.utils import generate_series


@pytest.fixture
def n_series():
return 2

@pytest.fixture
def horizon():
return 7

@pytest.fixture()
def local_data(n_series, horizon):
n_static = 2
series = generate_series(n_series, n_static_features=n_static)
static_features = []
for i in range(n_static):
name = f'static_{i}'
series[name] = series[name].astype(int)
static_features.append(name)
series['unique_id'] = series['unique_id'].astype(str)
uids = series['unique_id'].unique()
static_values = series.groupby('unique_id')[static_features].head(1)
static_values['unique_id'] = uids
last_train_dates = series.groupby('unique_id')['ds'].max()
pred_start = last_train_dates + pd.offsets.Day()
pred_end = last_train_dates + horizon * pd.offsets.Day()
pred_dates = np.hstack([pd.date_range(start, end) for start, end in zip(pred_start, pred_end)])
X_df = pd.DataFrame(
{
'unique_id': np.repeat(uids, horizon),
'ds': pred_dates,
}
)
X_df = X_df.merge(static_values, on='unique_id')
return series, X_df
25 changes: 10 additions & 15 deletions action_files/test_dask.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
import dask.dataframe as dd
import pytest

from statsforecast.utils import generate_series
from .utils import pipeline, pipeline_with_level

@pytest.fixture()
def n_series():
return 2

def to_distributed(df):
return dd.from_pandas(df, npartitions=2)

@pytest.fixture()
def sample_data(n_series):
series = generate_series(n_series).reset_index()
series['unique_id'] = series['unique_id'].astype(str)
series = dd.from_pandas(series, npartitions=2)
return series
def sample_data(local_data):
series, X_df = local_data
return to_distributed(series), to_distributed(X_df)

def test_dask_flow(sample_data, n_series):
horizon = 7
pipeline(sample_data, n_series, horizon)
def test_dask_flow(horizon, sample_data, n_series):
pipeline(*sample_data, n_series, horizon)

def test_dask_flow_with_level(sample_data, n_series):
horizon = 7
pipeline_with_level(sample_data, n_series, horizon)
def test_dask_flow_with_level(horizon, sample_data, n_series):
pipeline_with_level(*sample_data, n_series, horizon)
29 changes: 14 additions & 15 deletions action_files/test_ray.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
import sys

import pytest
import ray

from statsforecast.utils import generate_series
from .utils import pipeline, pipeline_with_level

@pytest.fixture()
def n_series():
return 2

def to_distributed(df):
return ray.data.from_pandas(df).repartition(2)

@pytest.fixture()
def sample_data(n_series):
series = generate_series(n_series).reset_index()
series['unique_id'] = series['unique_id'].astype(str)
series = ray.data.from_pandas(series).repartition(2)
return series
def sample_data(local_data):
series, X_df = local_data
return to_distributed(series), to_distributed(X_df)

def test_ray_flow(sample_data, n_series):
horizon = 7
pipeline(sample_data, n_series, horizon)
@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python >= 3.8")
def test_ray_flow(horizon, sample_data, n_series):
pipeline(*sample_data, n_series, horizon)

def test_ray_flow_with_level(sample_data, n_series):
horizon = 7
pipeline_with_level(sample_data, n_series, horizon)
@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python >= 3.8")
def test_ray_flow_with_level(horizon, sample_data, n_series):
pipeline_with_level(*sample_data, n_series, horizon)
39 changes: 20 additions & 19 deletions action_files/test_spark.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
import sys

import pytest
from pyspark.sql import SparkSession

from statsforecast.utils import generate_series
from .utils import pipeline, pipeline_with_level

@pytest.fixture()
def n_series():
return 2

@pytest.fixture
def spark():
return SparkSession.builder.getOrCreate()

def to_distributed(spark, df):
return spark.createDataFrame(df).repartition(2, 'unique_id')

@pytest.fixture()
def sample_data(n_series):
n_series = 2
series = generate_series(n_series).reset_index()
series['unique_id'] = series['unique_id'].astype(str)
spark = SparkSession.builder.getOrCreate()
series = spark.createDataFrame(series).repartition(2, 'unique_id')
return series

def test_spark_flow(sample_data, n_series):
horizon = 7
pipeline(sample_data, n_series, horizon)

def test_spark_flow_with_level(sample_data, n_series):
horizon = 7
pipeline_with_level(sample_data, n_series, horizon)
def sample_data(spark, local_data):
series, X_df = local_data
return to_distributed(spark, series), to_distributed(spark, X_df)

@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python >= 3.8")
def test_spark_flow(horizon, sample_data, n_series):
pipeline(*sample_data, n_series, horizon)

@pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python >= 3.8")
def test_spark_flow_with_level(horizon, sample_data, n_series):
pipeline_with_level(*sample_data, n_series, horizon)
38 changes: 26 additions & 12 deletions action_files/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@
)


def pipeline(series, n_series, horizon):
def pipeline(series, X_df, n_series, horizon):
models = [
ADIDA(), AutoARIMA(season_length=7),
ARIMA(season_length=7, order=(0, 1, 2)),
CrostonClassic(), CrostonOptimized(),
CrostonSBA(), AutoETS(season_length=7),
HistoricAverage(),
IMAPA(), Naive(),
RandomWalkWithDrift(),
ADIDA(),
AutoARIMA(season_length=7),
ARIMA(season_length=7, order=(0, 1, 2)),
CrostonClassic(),
CrostonOptimized(),
CrostonSBA(),
AutoETS(season_length=7),
HistoricAverage(),
IMAPA(),
Naive(),
RandomWalkWithDrift(),
SeasonalExponentialSmoothing(season_length=7, alpha=0.1),
SeasonalNaive(season_length=7),
SeasonalWindowAverage(season_length=7, window_size=4),
Expand All @@ -41,19 +45,29 @@ def pipeline(series, n_series, horizon):
models=models,
freq='D',
)
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon))
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon, X_df=X_df))
print(forecast)
assert forecast.shape == (n_series * horizon, len(models) + 2)

def pipeline_with_level(series, n_series, horizon):
n_windows = 2
cv = fa.as_pandas(sf.cross_validation(df=series, n_windows=n_windows, h=horizon))
assert cv.shape[0] == n_series * n_windows * horizon
assert cv.columns.tolist() == ['unique_id', 'ds', 'cutoff', 'y'] + [m.alias for m in models]

def pipeline_with_level(series, X_df, n_series, horizon):
models = [
AutoARIMA(season_length=7),
]
sf = StatsForecast(
models=models,
freq='D',
)
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon, level=[80, 90]))
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon, X_df=X_df, level=[80, 90]))
print(forecast.columns)
expected = ["unique_id","ds","AutoARIMA","AutoARIMA-lo-90","AutoARIMA-hi-90", "AutoARIMA-lo-80","AutoARIMA-hi-80"]
assert forecast.shape == (n_series * horizon, len(expected))
assert forecast.shape == (n_series * horizon, len(expected))

n_windows = 2
cv = fa.as_pandas(sf.cross_validation(df=series, n_windows=n_windows, h=horizon, level=[80]))
assert cv.shape[0] == n_series * n_windows * horizon
assert cv.columns.tolist() == ['unique_id', 'ds', 'cutoff', 'y', 'AutoARIMA', 'AutoARIMA-lo-80', 'AutoARIMA-hi-80']
1 change: 1 addition & 0 deletions dev/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- statsmodels>=0.13.2
- tabulate
- plotly
- utilsforecast>=0.0.5
- pip:
- fugue[dask,ray]
- nbdev
Expand Down
1 change: 1 addition & 0 deletions dev/local_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- statsmodels>=0.13.2
- tabulate
- plotly
- utilsforecast>=0.0.5
- pip:
- nbdev
- plotly-resampler
Expand Down
10 changes: 7 additions & 3 deletions dev/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
holidays<0.21
holidays<0.21
jupyterlab
matplotlib
numba>=0.55.0
numpy>=1.21.6
pandas>=1.3.5
pyspark>=3.3
pip
prophet
pyarrow
scipy>=1.7.3
statsmodels>=0.13.2
tabulate
plotly
utilsforecast>=0.0.5
fugue[dask,ray]
nbdev
tqdm
plotly-resampler
polars
supersmoother
supersmoother
tqdm
1 change: 1 addition & 0 deletions docs-scripts
Submodule docs-scripts added at d63d02
4 changes: 2 additions & 2 deletions experiments/ces/src/ces.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import time
from functools import partial
from multiprocessing import cpu_count
os.environ['NUMBA_RELEASE_GIL'] = 'True'
#os.environ['NUMBA_CACHE'] = 'True'
os.environ['NIXTLA_NUMBA_RELEASE_GIL'] = '1'
os.environ['NIXTLA_NUMBA_CACHE'] = '1'

import fire
import numpy as np
Expand Down
Loading

0 comments on commit 5b4969a

Please sign in to comment.