Skip to content

Commit

Permalink
Merge branch 'main' into feature/anomaly_detection_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
marcopeix committed Dec 9, 2024
2 parents e0fcce8 + 164ea51 commit d293408
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 71 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,26 @@ jobs:

- name: Run tests
run: nbdev_test --timing --do_print --n_workers 0 --skip_file_re "computing_at_scale|distributed"

run-minimal-tests:
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
os: [macos-13, macos-14, ubuntu-latest, windows-latest]
python-version: ["3.9", "3.13"]
steps:
- name: Clone repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

- name: Set up python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # 5.3.0
with:
python-version: ${{ matrix.python-version }}

- name: Install pip requirements
run: pip install uv && uv pip install --system . matplotlib nbdev python-dotenv

- name: Run tests
run: nbdev_test --n_workers 0 --path nbs/docs/getting-started/2_quickstart.ipynb
2 changes: 1 addition & 1 deletion nbs/docs/getting-started/1_introduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"\n",
"Self-attention, the revolutionary concept introduced by the paper [Attention is all you need](https://arxiv.org/abs/1706.03762), is the basis of this foundation model. TimeGPT model is not based on any existing large language model(LLM). Instead, it is independently trained on a vast amount of time series data, and the large transformer model is designed to minimize the forecasting error.\n",
"\n",
"<img src=\"../../img/timegpt_archi.png\" />\n",
"<img src=\"https://github.com/Nixtla/nixtla/blob/main/nbs/img/timegpt_archi.png?raw=true\" />\n",
"\n",
"The architecture consists of an encoder-decoder structure with multiple layers, each with residual connections and layer normalization. Finally, a linear layer maps the decoder’s output to the forecasting window dimension. The general intuition is that attention-based mechanisms are able to capture the diversity of past events and correctly extrapolate potential future distributions."
]
Expand Down
2 changes: 1 addition & 1 deletion nbs/docs/getting-started/7_why_timegpt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@
"#### Benchmark Results\n",
"For a more comprehensive dive into model accuracy and performance, explore our [Time Series Model Arena](https://github.com/Nixtla/nixtla/tree/main/experiments/foundation-time-series-arena)! TimeGPT continues to lead the pack with exceptional performance across benchmarks! 🌟\n",
"\n",
"<img src=\"../../assets/timeseries_model_arena.png\" />"
"<img src=\"https://github.com/Nixtla/nixtla/blob/main/nbs/img/timeseries_model_arena.png?raw=true\" alt=\"Benchmark\" />"
]
},
{
Expand Down
File renamed without changes
110 changes: 105 additions & 5 deletions nbs/src/nixtla_client.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"import orjson\n",
"import pandas as pd\n",
"import utilsforecast.processing as ufp\n",
"import zstandard as zstd\n",
"from tenacity import (\n",
" RetryCallState,\n",
" retry,\n",
Expand Down Expand Up @@ -731,7 +732,13 @@
" else:\n",
" self.supported_models = ['timegpt-1', 'timegpt-1-long-horizon']\n",
"\n",
" def _make_request(self, client: httpx.Client, endpoint: str, payload: dict[str, Any]) -> dict[str, Any]:\n",
" def _make_request(\n",
" self,\n",
" client: httpx.Client,\n",
" endpoint: str,\n",
" payload: dict[str, Any],\n",
" multithreaded_compress: bool,\n",
" ) -> dict[str, Any]:\n",
" def ensure_contiguous_arrays(d: dict[str, Any]) -> None:\n",
" for k, v in d.items():\n",
" if isinstance(v, np.ndarray):\n",
Expand All @@ -751,10 +758,15 @@
"\n",
" ensure_contiguous_arrays(payload)\n",
" content = orjson.dumps(payload, option=orjson.OPT_SERIALIZE_NUMPY)\n",
" content_size_mb = len(content) / (1024*1024)\n",
" content_size_mb = len(content) / 2**20\n",
" if content_size_mb > 200:\n",
" raise ValueError(f'The payload is too large. Set num_partitions={math.ceil(content_size_mb / 200)}')\n",
" resp = client.post(url=endpoint, content=content)\n",
" headers = {}\n",
" if content_size_mb > 1:\n",
" threads = -1 if multithreaded_compress else 0\n",
" content = zstd.ZstdCompressor(level=1, threads=threads).compress(content)\n",
" headers['content-encoding'] = 'zstd'\n",
" resp = client.post(url=endpoint, content=content, headers=headers)\n",
" try:\n",
" resp_body = orjson.loads(resp.content)\n",
" except orjson.JSONDecodeError:\n",
Expand All @@ -773,11 +785,13 @@
" client: httpx.Client,\n",
" endpoint: str,\n",
" payload: dict[str, Any],\n",
" multithreaded_compress: bool = True,\n",
" ) -> dict[str, Any]:\n",
" return self._retry_strategy(self._make_request)(\n",
" client=client,\n",
" endpoint=endpoint,\n",
" payload=payload,\n",
" multithreaded_compress=multithreaded_compress,\n",
" )\n",
"\n",
" def _make_partitioned_requests(\n",
Expand All @@ -794,7 +808,11 @@
" with ThreadPoolExecutor(max_workers) as executor:\n",
" future2pos = {\n",
" executor.submit(\n",
" self._make_request_with_retries, client, endpoint, payload\n",
" self._make_request_with_retries,\n",
" client=client,\n",
" endpoint=endpoint,\n",
" payload=payload,\n",
" multithreaded_compress=False,\n",
" ): i\n",
" for i, payload in enumerate(payloads)\n",
" }\n",
Expand Down Expand Up @@ -1862,6 +1880,7 @@
" finetune_steps: _NonNegativeInt,\n",
" finetune_depth: _Finetune_Depth,\n",
" finetune_loss: _Loss,\n",
" refit: bool,\n",
" clean_ex_first: bool,\n",
" hist_exog_list: Optional[list[str]],\n",
" date_features: Union[bool, Sequence[Union[str, Callable]]],\n",
Expand Down Expand Up @@ -1900,6 +1919,7 @@
" finetune_steps=finetune_steps,\n",
" finetune_depth=finetune_depth,\n",
" finetune_loss=finetune_loss,\n",
" refit=refit,\n",
" clean_ex_first=clean_ex_first,\n",
" hist_exog_list=hist_exog_list,\n",
" date_features=date_features,\n",
Expand Down Expand Up @@ -1928,6 +1948,7 @@
" finetune_steps: _NonNegativeInt = 0,\n",
" finetune_depth: _Finetune_Depth = 1,\n",
" finetune_loss: _Loss = 'default',\n",
" refit: bool = True,\n",
" clean_ex_first: bool = True,\n",
" hist_exog_list: Optional[list[str]] = None,\n",
" date_features: Union[bool, list[str]] = False,\n",
Expand Down Expand Up @@ -1980,11 +2001,14 @@
" finetune_steps : int (default=0)\n",
" Number of steps used to finetune TimeGPT in the\n",
" new data.\n",
" finetune_depth: int (default=1)\n",
" finetune_depth : int (default=1)\n",
" The depth of the finetuning. Uses a scale from 1 to 5, where 1 means little finetuning,\n",
" and 5 means that the entire model is finetuned.\n",
" finetune_loss : str (default='default')\n",
" Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.\n",
" refit : bool (default=True)\n",
" Fine-tune the model in each window. If `False`, only fine-tunes on the first window.\n",
" Only used if `finetune_steps` > 0.\n",
" clean_ex_first : bool (default=True)\n",
" Clean exogenous signal before making forecasts using TimeGPT.\n",
" hist_exog_list : list of str, optional (default=None)\n",
Expand Down Expand Up @@ -2029,6 +2053,7 @@
" finetune_steps=finetune_steps,\n",
" finetune_depth=finetune_depth,\n",
" finetune_loss=finetune_loss,\n",
" refit=refit,\n",
" clean_ex_first=clean_ex_first,\n",
" hist_exog_list=hist_exog_list,\n",
" date_features=date_features,\n",
Expand Down Expand Up @@ -2136,6 +2161,7 @@
" 'finetune_steps': finetune_steps,\n",
" 'finetune_depth': finetune_depth,\n",
" 'finetune_loss': finetune_loss,\n",
" 'refit': refit,\n",
" }\n",
" with httpx.Client(**self._client_kwargs) as client:\n",
" if num_partitions is None:\n",
Expand Down Expand Up @@ -2432,6 +2458,7 @@
" finetune_steps: _NonNegativeInt,\n",
" finetune_depth: _Finetune_Depth,\n",
" finetune_loss: _Loss,\n",
" refit: bool,\n",
" clean_ex_first: bool,\n",
" hist_exog_list: Optional[list[str]],\n",
" date_features: Union[bool, list[str]],\n",
Expand All @@ -2454,6 +2481,7 @@
" finetune_steps=finetune_steps,\n",
" finetune_depth=finetune_depth,\n",
" finetune_loss=finetune_loss,\n",
" refit=refit,\n",
" clean_ex_first=clean_ex_first,\n",
" hist_exog_list=hist_exog_list,\n",
" date_features=date_features,\n",
Expand Down Expand Up @@ -2578,6 +2606,48 @@
"nixtla_client.validate_api_key()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"# test compression\n",
"captured_request = None\n",
"\n",
"class CapturingClient(httpx.Client): \n",
" def post(self, *args, **kwargs):\n",
" request = self.build_request('POST', *args, **kwargs)\n",
" global captured_request\n",
" captured_request = {\n",
" 'headers': dict(request.headers),\n",
" 'content': request.content,\n",
" 'method': request.method,\n",
" 'url': str(request.url)\n",
" }\n",
" return super().post(*args, **kwargs)\n",
"\n",
"@contextmanager\n",
"def capture_request():\n",
" original_client = httpx.Client\n",
" httpx.Client = CapturingClient\n",
" try:\n",
" yield\n",
" finally:\n",
" httpx.Client = original_client\n",
"\n",
"# this produces a 1MB payload\n",
"series = generate_series(250, n_static_features=2)\n",
"with capture_request():\n",
" nixtla_client.forecast(df=series, freq='D', h=1, hist_exog_list=['static_0', 'static_1'])\n",
"\n",
"assert captured_request['headers']['content-encoding'] == 'zstd'\n",
"content = captured_request['content']\n",
"assert len(content) < 2**20\n",
"assert len(zstd.ZstdDecompressor().decompress(content)) > 2**20"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -2803,6 +2873,36 @@
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"# cv refit\n",
"cv_kwargs = dict(\n",
" df=df,\n",
" n_windows=2,\n",
" h=12,\n",
" freq='MS',\n",
" time_col='timestamp',\n",
" target_col='value',\n",
" finetune_steps=2,\n",
")\n",
"res_refit = nixtla_client.cross_validation(refit=True, **cv_kwargs)\n",
"res_no_refit = nixtla_client.cross_validation(refit=False, **cv_kwargs)\n",
"np.testing.assert_allclose(res_refit['value'], res_no_refit['value'])\n",
"np.testing.assert_raises(\n",
" AssertionError,\n",
" np.testing.assert_allclose,\n",
" res_refit['TimeGPT'],\n",
" res_no_refit['TimeGPT'],\n",
" atol=1e-4,\n",
" rtol=1e-3,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
3 changes: 1 addition & 2 deletions nixtla/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
__all__ = ["NixtlaClient"]
__version__ = "0.6.4"

__all__ = ["NixtlaClient"]
from .nixtla_client import NixtlaClient
Loading

0 comments on commit d293408

Please sign in to comment.