Skip to content

Commit

Permalink
Merge branch 'main' into python_3_11
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Aug 15, 2023
2 parents 023f28d + 4c2733b commit 007480c
Show file tree
Hide file tree
Showing 33 changed files with 57,857 additions and 40,258 deletions.
30 changes: 29 additions & 1 deletion .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@
"avatar_url": "https://avatars.githubusercontent.com/u/16029092?v=4",
"profile": "https://github.com/shagn",
"contributions": [
"bug"
"bug",
"doc"
]
},
{
Expand Down Expand Up @@ -258,6 +259,33 @@
"contributions": [
"code"
]
},
{
"login": "yibenhuang",
"name": "Yiben Huang",
"avatar_url": "https://avatars.githubusercontent.com/u/62163340?v=4",
"profile": "https://github.com/yibenhuang",
"contributions": [
"doc"
]
},
{
"login": "andrewgross",
"name": "Andrew Gross",
"avatar_url": "https://avatars.githubusercontent.com/u/370118?v=4",
"profile": "https://github.com/andrewgross",
"contributions": [
"doc"
]
},
{
"login": "taniishkaaa",
"name": "taniishkaaa",
"avatar_url": "https://avatars.githubusercontent.com/u/109246904?v=4",
"profile": "https://github.com/taniishkaaa",
"contributions": [
"doc"
]
}
],
"contributorsPerLine": 7,
Expand Down
1 change: 1 addition & 0 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ categories:
- title: 'Enhancement'
label: 'enhancement'
change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
commitish: main
template: |
## Changes
$CHANGES
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ docs/_site
build
dist
.vscode
.idea
*.gif
*.csv
*/data/*
Expand Down
91 changes: 48 additions & 43 deletions README.md

Large diffs are not rendered by default.

22 changes: 17 additions & 5 deletions action_files/test_dask.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
import dask.dataframe as dd
import pytest

from statsforecast.utils import generate_series
from .utils import pipeline
from .utils import pipeline, pipeline_with_level

def test_dask_flow():
n_series = 2
horizon = 7
@pytest.fixture()
def n_series():
return 2

@pytest.fixture()
def sample_data(n_series):
series = generate_series(n_series).reset_index()
series['unique_id'] = series['unique_id'].astype(str)
series = dd.from_pandas(series, npartitions=2)
pipeline(series, n_series, horizon)
return series

def test_dask_flow(sample_data, n_series):
horizon = 7
pipeline(sample_data, n_series, horizon)

def test_dask_flow_with_level(sample_data, n_series):
horizon = 7
pipeline_with_level(sample_data, n_series, horizon)
24 changes: 17 additions & 7 deletions action_files/test_ray.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
import pytest
import ray

from statsforecast.utils import generate_series
from .utils import pipeline
from .utils import pipeline, pipeline_with_level

def test_ray_flow():
n_series = 2
horizon = 7
@pytest.fixture()
def n_series():
return 2

@pytest.fixture()
def sample_data(n_series):
series = generate_series(n_series).reset_index()
series['unique_id'] = series['unique_id'].astype(str)
ctx = ray.data.context.DatasetContext.get_current()
ctx.use_streaming_executor = False
series = ray.data.from_pandas(series).repartition(2)
pipeline(series, n_series, horizon)
return series

def test_ray_flow(sample_data, n_series):
horizon = 7
pipeline(sample_data, n_series, horizon)

def test_ray_flow_with_level(sample_data, n_series):
horizon = 7
pipeline_with_level(sample_data, n_series, horizon)
21 changes: 17 additions & 4 deletions action_files/test_spark.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
import pytest
from pyspark.sql import SparkSession

from statsforecast.utils import generate_series
from .utils import pipeline
from .utils import pipeline, pipeline_with_level

def test_spark_flow():
@pytest.fixture()
def n_series():
return 2

@pytest.fixture()
def sample_data(n_series):
n_series = 2
horizon = 7
series = generate_series(n_series).reset_index()
series['unique_id'] = series['unique_id'].astype(str)
spark = SparkSession.builder.getOrCreate()
series = spark.createDataFrame(series).repartition(2, 'unique_id')
pipeline(series, n_series, horizon)
return series

def test_spark_flow(sample_data, n_series):
horizon = 7
pipeline(sample_data, n_series, horizon)

def test_spark_flow_with_level(sample_data, n_series):
horizon = 7
pipeline_with_level(sample_data, n_series, horizon)
14 changes: 13 additions & 1 deletion action_files/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,19 @@ def pipeline(series, n_series, horizon):
models=models,
freq='D',
)
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon, level=[80, 90]))
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon))
print(forecast)
assert forecast.shape == (n_series * horizon, len(models) + 2)

def pipeline_with_level(series, n_series, horizon):
models = [
AutoARIMA(season_length=7),
]
sf = StatsForecast(
models=models,
freq='D',
)
forecast = fa.as_pandas(sf.forecast(df=series, h=horizon, level=[80, 90]))
print(forecast.columns)
expected = ["unique_id","ds","AutoARIMA","AutoARIMA-lo-90","AutoARIMA-hi-90", "AutoARIMA-lo-80","AutoARIMA-hi-80"]
assert forecast.shape == (n_series * horizon, len(expected))
2 changes: 2 additions & 0 deletions nbs/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
/.quarto/

lightning_logs/
2 changes: 1 addition & 1 deletion nbs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ website:
collapse-below: lg
left:
- text: "Get Started"
href: docs/getting-started/Getting_Started_short.ipynb
href: docs/getting-started/getting_started_short.html
- text: "NixtlaVerse"
menu:
- text: "StatsForecast ⚡️"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "14f5686c-449b-4376-8c58-fc8141f4b0f8",
"metadata": {},
Expand All @@ -11,6 +12,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "0f1d1483-6da7-4372-8390-84c9c280109e",
"metadata": {},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -10,13 +11,14 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"`StatsForecast` follows the sklearn model API. For this minimal example, you will create an instance of the StatsForecast class and then call its `fit` and `predict` methods. We recommend this option if speed is not paramount and you want to explore the fitted values and parameters. \n",
"\n",
":::{.callout-tip}\n",
"If you want to forecast many series, we recommend using the `forecast` method. Check this [Getting Started with multiple time series](./Getting_Started_multiple.ipynb) guide. \n",
"If you want to forecast many series, we recommend using the `forecast` method. Check this [Getting Started with multiple time series](./2_Getting_Started_complete.ipynb) guide. \n",
":::\n",
"\n",
"The input to StatsForecast is always a data frame in [long format](https://www.theanalysisfactor.com/wide-and-long-data/) with three columns: `unique_id`, `ds` and `y`:\n",
Expand All @@ -30,7 +32,7 @@
"\n",
"As an example, let’s look at the US Air Passengers dataset. This time series consists of monthly totals of a US airline passengers from 1949 to 1960. The CSV is available [here](https://www.kaggle.com/datasets/chirag19/air-passengers).\n",
"\n",
"We assume you have StatsForecast already installed. Check this guide for instructions on [how to install StatsForecast](./Installation.ipynb).\n",
"We assume you have StatsForecast already installed. Check this guide for instructions on [how to install StatsForecast](./0_Installation.ipynb).\n",
"\n",
"First, we’ll import the data:"
]
Expand All @@ -39,18 +41,10 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"UsageError: unrecognized arguments: hide output\n"
]
}
],
"outputs": [],
"source": [
"%%capture #To hide output \n",
"! pip install StatsForecast"
"#| hide\n",
"! pip install statsforecast"
]
},
{
Expand Down Expand Up @@ -148,14 +142,15 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"We fit the model by instantiating a new `StatsForecast` object with its two required parameters:\n",
"https://nixtla.github.io/statsforecast/src/core/models.html\n",
"* `models`: a list of models. Select the models you want from [models](../../src/core/models.ipynb) and import them. For this example, we will use a `AutoARIMA` model. We set `season_length` to 12 because we expect seasonal effects every 12 months. (See: [Seasonal periods](https://robjhyndman.com/hyndsight/seasonal-periods/))\n",
"\n",
"* `models`: a list of models. Select the models you want from [models](../models.ipynb) and import them. For this example, we will use a `AutoARIMA` model. We set `season_length` to 12 because we expect seasonal effects every 12 months. (See: [Seasonal periods](https://robjhyndman.com/hyndsight/seasonal-periods/))\n",
"\n",
"* `freq`: a string indicating the frequency of the data. (See [panda's available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).)\n",
"* `freq`: a string indicating the frequency of the data. (See [pandas available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).)\n",
"\n",
"Any settings are passed into the constructor. Then you call its fit method and pass in the historical data frame.\n",
"\n",
Expand Down Expand Up @@ -184,6 +179,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -297,6 +293,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -1741,15 +1738,16 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
":::{.callout-tip}\n",
"## Next Steps\n",
"\n",
"* Build and end to end forecasting pipeline following best practices in [End to End Walkthrough](./Getting_Started_complete.ipynb)\n",
"* [Forecast millions of series](./ForecastingAtScale.ipynb) in a scalable cluster in the cloud using Spark and Nixtla\n",
"* [Detect anomalies](./AnomalyDetection.ipynb) in your past observations\n",
"* Build and end-to-end forecasting pipeline following best practices in [End to End Walkthrough](./2_Getting_Started_complete.ipynb)\n",
"* [Forecast millions of series](../how-to-guides/Prophet_spark_m5.ipynb) in a scalable cluster in the cloud using Spark and Nixtla\n",
"* [Detect anomalies](../tutorials/AnomalyDetection.ipynb) in your past observations\n",
":::"
]
}
Expand Down
Loading

0 comments on commit 007480c

Please sign in to comment.