Skip to content

Commit

Permalink
use frequency validation from utilsforecast (#717)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Dec 5, 2023
1 parent 1fc40f9 commit 3e0e507
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 53 deletions.
2 changes: 1 addition & 1 deletion dev/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ dependencies:
- polars
- supersmoother
- tqdm
- utilsforecast>=0.0.17
- utilsforecast>=0.0.19
2 changes: 1 addition & 1 deletion dev/local_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ dependencies:
- polars
- supersmoother
- tqdm
- utilsforecast>=0.0.17
- utilsforecast>=0.0.19
28 changes: 3 additions & 25 deletions nbs/src/core/core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"from triad import conditional_dispatcher\n",
"from utilsforecast.compat import DataFrame, pl_DataFrame, pl_Series\n",
"from utilsforecast.grouped_array import GroupedArray as BaseGroupedArray\n",
"from utilsforecast.validation import ensure_time_dtype\n",
"from utilsforecast.validation import ensure_time_dtype, validate_freq\n",
"\n",
"import statsforecast.config as sf_config\n",
"from statsforecast.utils import ConformalIntervals"
Expand Down Expand Up @@ -945,30 +945,8 @@
" _warn_df_constructor()\n",
" return\n",
" df = ensure_time_dtype(df, 'ds')\n",
" time_dtype = df['ds'].head(1).to_numpy().dtype\n",
" time_is_int = np.issubdtype(time_dtype, np.integer)\n",
" if time_is_int and not isinstance(self.freq, int):\n",
" raise ValueError(\n",
" 'Time column contains integer but the specified frequency is not an integer. '\n",
" 'Please provide a valid integer, like `freq=1`'\n",
" )\n",
" elif not time_is_int and not isinstance(self.freq, str):\n",
" # the ensure_time_dtype function makes sure that ds is either int or timestamp\n",
" raise ValueError(\n",
" 'Time column contains timestamps but the specified frequency is an integer. '\n",
" 'Please provide a valid pandas or polars offset.'\n",
" )\n",
" # try to catch pandas frequency in polars dataframe\n",
" if isinstance(df, pl_DataFrame) and isinstance(self.freq, str):\n",
" missing_n = re.search(r'\\d+', self.freq) is None\n",
" uppercase = re.sub('\\d+', '', self.freq).isupper()\n",
" if missing_n or uppercase:\n",
" raise ValueError(\n",
" 'You must specify a valid polars offset when using polars dataframes. '\n",
" 'You can find the available offsets in '\n",
" 'https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.offset_by.html'\n",
" )\n",
" elif isinstance(df, pd.DataFrame) and df.index.name == 'unique_id':\n",
" validate_freq(df['ds'], self.freq)\n",
" if isinstance(df, pd.DataFrame) and df.index.name == 'unique_id':\n",
" warnings.warn(\n",
" \"Passing unique_id as the index is deprecated. \"\n",
" \"Please provide it as a column instead.\",\n",
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ language = English
custom_sidebar = True
license = apache2
status = 2
requirements = numba>=0.55.0 numpy>=1.21.6 pandas>=1.3.5 scipy>=1.7.3 statsmodels>=0.13.2 tqdm fugue>=0.8.1 utilsforecast>=0.0.17
requirements = numba>=0.55.0 numpy>=1.21.6 pandas>=1.3.5 scipy>=1.7.3 statsmodels>=0.13.2 tqdm fugue>=0.8.1 utilsforecast>=0.0.19
polars_requirements = polars
ray_requirements = fugue[ray]>=0.8.1 protobuf>=3.15.3,<4.0.0
dask_requirements = fugue[dask]>=0.8.1
Expand Down
28 changes: 3 additions & 25 deletions statsforecast/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from triad import conditional_dispatcher
from utilsforecast.compat import DataFrame, pl_DataFrame, pl_Series
from utilsforecast.grouped_array import GroupedArray as BaseGroupedArray
from utilsforecast.validation import ensure_time_dtype
from utilsforecast.validation import ensure_time_dtype, validate_freq

import statsforecast.config as sf_config
from .utils import ConformalIntervals
Expand Down Expand Up @@ -516,30 +516,8 @@ def _prepare_fit(
_warn_df_constructor()
return
df = ensure_time_dtype(df, "ds")
time_dtype = df["ds"].head(1).to_numpy().dtype
time_is_int = np.issubdtype(time_dtype, np.integer)
if time_is_int and not isinstance(self.freq, int):
raise ValueError(
"Time column contains integer but the specified frequency is not an integer. "
"Please provide a valid integer, like `freq=1`"
)
elif not time_is_int and not isinstance(self.freq, str):
# the ensure_time_dtype function makes sure that ds is either int or timestamp
raise ValueError(
"Time column contains timestamps but the specified frequency is an integer. "
"Please provide a valid pandas or polars offset."
)
# try to catch pandas frequency in polars dataframe
if isinstance(df, pl_DataFrame) and isinstance(self.freq, str):
missing_n = re.search(r"\d+", self.freq) is None
uppercase = re.sub("\d+", "", self.freq).isupper()
if missing_n or uppercase:
raise ValueError(
"You must specify a valid polars offset when using polars dataframes. "
"You can find the available offsets in "
"https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.offset_by.html"
)
elif isinstance(df, pd.DataFrame) and df.index.name == "unique_id":
validate_freq(df["ds"], self.freq)
if isinstance(df, pd.DataFrame) and df.index.name == "unique_id":
warnings.warn(
"Passing unique_id as the index is deprecated. "
"Please provide it as a column instead.",
Expand Down

0 comments on commit 3e0e507

Please sign in to comment.