Skip to content

Commit

Permalink
Merge pull request #48 from Techtonique/v0222
Browse files Browse the repository at this point in the history
V0222
  • Loading branch information
thierrymoudiki authored Jun 30, 2024
2 parents a8d6bfe + 0700a12 commit 6fa9386
Show file tree
Hide file tree
Showing 12 changed files with 261 additions and 21 deletions.
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# version 0.22.0
# version 0.22.2

- Implement new types of predictive simulation intervals (`type_pi`s): independent bootstrap, block bootstrap, 2 variants of split conformal prediction in class `MTS` (see updated docs)
- Implement Winkler score in `LazyMTS` and `LazyDeepMTS` for probabilistic forecasts
- Use conformalized `Estimator`s in `MTS` (see `examples/mts_conformal_not_sims.py`)

# version 0.20.6

Expand Down
43 changes: 43 additions & 0 deletions examples/conformal_not_sims.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_diabetes, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
from time import time

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

print(f"\n ----- fetch_california_housing ----- \n")

data = fetch_california_housing()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state = 213)
print(f"X_train.shape(): {X_train.shape}")
print(f"X_test.shape(): {X_test.shape}")

regr = ns.PredictionInterval(obj=ExtraTreesRegressor(),
method="splitconformal",
level=95,
seed=312)
start = time()
regr.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds = regr.predict(X_test, return_pi=True)
print(preds)
print(f"coverage_rate: {np.mean((preds.lower<=y_test)*(preds.upper>=y_test))}")

regr3 = ns.PredictionInterval(obj=ExtraTreesRegressor(),
method="splitconformal",
type_split="sequential",
level=95,
seed=312)
start = time()
regr3.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds = regr3.predict(X_test, return_pi=True)
print(preds)
print(f"coverage_rate: {np.mean((preds.lower<=y_test)*(preds.upper>=y_test))}")

17 changes: 17 additions & 0 deletions examples/lazy_deepmts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# ice cream vs heater (I don't own the copyright)
df.set_index('Month', inplace=True)
df.index.rename('date')
df2 = df.diff().dropna()
# gnp dataset
#df.set_index('date', inplace=True)

Expand All @@ -27,6 +28,11 @@
df_train = df.iloc[0:idx_train,]
df_test = df.iloc[idx_train:idx_end,]

idx_train = int(df2.shape[0]*0.8)
idx_end = df2.shape[0]
df2_train = df2.iloc[0:idx_train,]
df2_test = df2.iloc[idx_train:idx_end,]

print(f"----- df_train: {df_train} -----")
print(f"----- df_train.dtypes: {df_train.dtypes} -----")

Expand Down Expand Up @@ -65,4 +71,15 @@
print(models["WINKLERSCORE"])


regr_mts3 = ns.LazyDeepMTS(verbose=0, ignore_warnings=False, custom_metric=None,
lags = 15, n_hidden_features=7, n_clusters=2,
replications=100, kernel="gaussian",
type_pi="scp2-kde",
show_progress=False, preprocess=False)
models, predictions = regr_mts3.fit(df2_train, df2_test)
model_dictionary = regr_mts3.provide_models(df2_train, df2_test)
print(models)
print(models["WINKLERSCORE"])



83 changes: 83 additions & 0 deletions examples/mts_conformal_not_sims.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
import pandas as pd
import nnetsauce as ns
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
from time import time

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

url = "https://github.com/ritvikmath/Time-Series-Analysis/raw/master/ice_cream_vs_heater.csv"

df = pd.read_csv(url)

#df.set_index('date', inplace=True)
df.set_index('Month', inplace=True)
df.index.rename('date')

print(df.shape)
print(198*0.8)
# df_train = df.iloc[0:97,]
# df_test = df.iloc[97:123,]
df_train = df.iloc[0:158,]
df_test = df.iloc[158:198,]

regr = ns.PredictionInterval(obj=Ridge(),
method="splitconformal",
type_split="sequential",
level=95,
seed=312)

print(df_test)
obj_MTS = ns.MTS(regr, lags = 25, n_hidden_features=10, verbose = 1)
obj_MTS.fit(df_train)
print("\n")
print(obj_MTS.predict(h=10, return_pi=True))


from sklearn.base import ClassifierMixin, RegressorMixin
from sklearn.utils import all_estimators

removed_regressors = [
"TheilSenRegressor",
"ARDRegression",
"CCA",
"GaussianProcessRegressor",
"GradientBoostingRegressor",
"HistGradientBoostingRegressor",
"IsotonicRegression",
"MultiOutputRegressor",
"MultiTaskElasticNet",
"MultiTaskElasticNetCV",
"MultiTaskLasso",
"MultiTaskLassoCV",
"OrthogonalMatchingPursuit",
"OrthogonalMatchingPursuitCV",
"PLSCanonical",
"PLSRegression",
"RadiusNeighborsRegressor",
"RegressorChain",
"StackingRegressor",
"VotingRegressor",
]

for est in all_estimators():
if (
issubclass(est[1], RegressorMixin)
and (est[0] not in removed_regressors)
):
try:
print(f"Estimator: {est[0]}")
obj0 = ns.PredictionInterval(obj=est[1](),
method="splitconformal",
type_split="sequential",
level=95,
seed=312)

regr = ns.MTS(obj=obj0,
lags=25)
regr.fit(df_train)
print(regr.predict(h=10, return_pi=True))
except:
pass
6 changes: 6 additions & 0 deletions nnetsauce/custom/customRegressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ class CustomRegressor(Custom, RegressorMixin):
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
Expand Down Expand Up @@ -120,6 +124,7 @@ def __init__(
type_pi=None,
replications=None,
kernel=None,
type_split=None,
col_sample=1,
row_sample=1,
seed=123,
Expand Down Expand Up @@ -148,6 +153,7 @@ def __init__(
self.type_pi = type_pi
self.replications = replications
self.kernel = kernel
self.type_split = type_split

def fit(self, X, y, sample_weight=None, **kwargs):
"""Fit custom model to training data (X, y).
Expand Down
10 changes: 9 additions & 1 deletion nnetsauce/lazypredict/lazyMTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from .config import REGRESSORSMTS
from ..mts import MTS
from ..utils import convert_df_to_numeric, winkler_score
from ..utils import convert_df_to_numeric, coverage, winkler_score

import warnings

Expand Down Expand Up @@ -235,6 +235,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
MPE = []
MAPE = []
WINKLERSCORE = []
COVERAGE = []

# WIN = []
names = []
Expand Down Expand Up @@ -373,6 +374,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
mae = mean_absolute_error(X_test, X_pred.mean)
mpl = mean_pinball_loss(X_test, X_pred.mean)
winklerscore = winkler_score(X_pred, X_test, level=95)
coveragecalc = coverage(X_pred, X_test, level=95)
else:
rmse = mean_squared_error(X_test, X_pred, squared=False)
mae = mean_absolute_error(X_test, X_pred)
Expand All @@ -384,6 +386,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
MPL.append(mpl)
if self.replications is not None:
WINKLERSCORE.append(winklerscore)
COVERAGE.append(coveragecalc)
TIME.append(time.time() - start)

if self.custom_metric:
Expand All @@ -402,6 +405,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
# "MPE": mpe,
# "MAPE": mape,
"WINKLERSCORE": winklerscore,
"COVERAGE": coveragecalc,
"Time taken": time.time() - start,
}
else:
Expand Down Expand Up @@ -500,6 +504,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
mae = mean_absolute_error(X_test, X_pred.mean)
mpl = mean_pinball_loss(X_test, X_pred.mean)
winklerscore = winkler_score(X_pred, X_test, level=95)
coveragecalc = coverage(X_pred, X_test, level=95)
else:
rmse = mean_squared_error(X_test, X_pred, squared=False)
mae = mean_absolute_error(X_test, X_pred)
Expand All @@ -511,6 +516,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
MPL.append(mpl)
if self.replications is not None:
WINKLERSCORE.append(winklerscore)
COVERAGE.append(coveragecalc)
TIME.append(time.time() - start)

if self.custom_metric:
Expand All @@ -529,6 +535,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
# "MPE": mpe,
# "MAPE": mape,
"WINKLERSCORE": winklerscore,
"COVERAGE": coveragecalc,
"Time taken": time.time() - start,
}
else:
Expand Down Expand Up @@ -568,6 +575,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
# "MPE": MPE,
# "MAPE": MAPE,
"WINKLERSCORE": WINKLERSCORE,
"COVERAGE": COVERAGE,
"Time Taken": TIME,
}
else:
Expand Down
10 changes: 9 additions & 1 deletion nnetsauce/lazypredict/lazydeepMTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .config import DEEPREGRESSORSMTS
from ..mts import MTS
from ..deep import DeepMTS
from ..utils import convert_df_to_numeric, winkler_score
from ..utils import convert_df_to_numeric, coverage, winkler_score

import warnings

Expand Down Expand Up @@ -239,6 +239,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
MPE = []
MAPE = []
WINKLERSCORE = []
COVERAGE = []

# WIN = []
names = []
Expand Down Expand Up @@ -378,6 +379,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
mae = mean_absolute_error(X_test, X_pred.mean)
mpl = mean_pinball_loss(X_test, X_pred.mean)
winklerscore = winkler_score(X_pred, X_test, level=95)
coveragecalc = coverage(X_pred, X_test, level=95)
else:
rmse = mean_squared_error(X_test, X_pred, squared=False)
mae = mean_absolute_error(X_test, X_pred)
Expand All @@ -389,6 +391,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
MPL.append(mpl)
if self.replications is not None:
WINKLERSCORE.append(winklerscore)
COVERAGE.append(coveragecalc)
TIME.append(time.time() - start)

if self.custom_metric:
Expand All @@ -407,6 +410,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
# "MPE": mpe,
# "MAPE": mape,
"WINKLERSCORE": winklerscore,
"COVERAGE": coveragecalc,
"Time taken": time.time() - start,
}
else:
Expand Down Expand Up @@ -504,6 +508,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
mae = mean_absolute_error(X_test, X_pred.mean)
mpl = mean_pinball_loss(X_test, X_pred.mean)
winklerscore = winkler_score(X_pred, X_test, level=95)
coveragecalc = coverage(X_pred, X_test, level=95)
else:
rmse = mean_squared_error(X_test, X_pred, squared=False)
mae = mean_absolute_error(X_test, X_pred)
Expand All @@ -515,6 +520,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
MPL.append(mpl)
if self.replications is not None:
WINKLERSCORE.append(winklerscore)
COVERAGE.append(coveragecalc)
TIME.append(time.time() - start)

if self.custom_metric:
Expand All @@ -533,6 +539,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
# "MPE": mpe,
# "MAPE": mape,
"WINKLERSCORE": winklerscore,
"COVERAGE": coveragecalc,
"Time taken": time.time() - start,
}
else:
Expand Down Expand Up @@ -572,6 +579,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
# "MPE": MPE,
# "MAPE": MAPE,
"WINKLERSCORE": WINKLERSCORE,
"COVERAGE": COVERAGE,
"Time Taken": TIME,
}
else:
Expand Down
Loading

0 comments on commit 6fa9386

Please sign in to comment.