Skip to content

Commit

Permalink
Merge pull request #24 from Techtonique/prediction-interval
Browse files Browse the repository at this point in the history
Prediction intervals for Split Conformal for LSBoost Regression
  • Loading branch information
thierrymoudiki authored Apr 14, 2024
2 parents 919c5e1 + 6c27326 commit 01ae85e
Show file tree
Hide file tree
Showing 18 changed files with 2,952 additions and 3 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# version 0.12.0

- add prediction intervals to `LSBoostRegressor` (split conformal prediction,
split conformal prediction with KDE, and split conformal prediction bootstrap)

# version 0.9.0

- dowload data from R-universe
Expand Down
217 changes: 217 additions & 0 deletions examples/lsboost_regressor_pi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import subprocess
import sys

subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib"])

import mlsauce as ms
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from time import time
from os import chdir
from sklearn import metrics

# ridge

print("\n")
print("ridge -----")
print("\n")


dataset = fetch_california_housing()
X = dataset.data
y = dataset.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.LSBoostRegressor(col_sample=0.9, row_sample=0.9)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True, method="splitconformal")
print(time()-start)
print(f"splitconformal coverage 1: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(col_sample=0.9, row_sample=0.9,
replications=50,
type_pi="bootstrap")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal bootstrap coverage 1: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(col_sample=0.9, row_sample=0.9,
replications=50,
type_pi="kde")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal kde coverage 1: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


dataset = load_diabetes()
X = dataset.data
y = dataset.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.LSBoostRegressor(col_sample=0.9, row_sample=0.9)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True, method="splitconformal")
print(time()-start)
print(f"splitconformal coverage 2: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(col_sample=0.9, row_sample=0.9,
replications=50,
type_pi="bootstrap")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal bootstrap coverage 2: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(col_sample=0.9, row_sample=0.9,
replications=50,
type_pi="kde")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal kde coverage 2: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")



# lasso

print("\n")
print("lasso -----")
print("\n")


dataset = fetch_california_housing()
X = dataset.data
y = dataset.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.LSBoostRegressor(n_estimators=50, solver="lasso", col_sample=0.9, row_sample=0.9)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True, method="splitconformal")
print(time()-start)
print(f"splitconformal coverage 3: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(n_estimators=50, solver="lasso", col_sample=0.9, row_sample=0.9,
replications=50,
type_pi="bootstrap")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal bootstrap coverage 3: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(n_estimators=50, solver="lasso", col_sample=0.9, row_sample=0.9,
replications=50,
type_pi="kde")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal kde coverage 3: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


dataset = load_diabetes()
X = dataset.data
y = dataset.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.LSBoostRegressor(n_estimators=50, solver="lasso", reg_lambda=0.002,
col_sample=0.9, row_sample=0.9)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True, method="splitconformal")
print(time()-start)
print(f"splitconformal coverage 4: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(n_estimators=10, solver="lasso", col_sample=0.9, row_sample=0.9,
replications=50, reg_lambda=0.003, dropout=0.4,
type_pi="bootstrap")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal bootstrap coverage 4: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")


obj = ms.LSBoostRegressor(n_estimators=10, solver="lasso", col_sample=0.9, row_sample=0.9,
replications=50, reg_lambda=0.001, dropout=0.4,
type_pi="kde")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
preds = obj.predict(X_test, return_pi=True,
method="splitconformal")
print(time()-start)
print(f"splitconformal kde coverage 4: {np.mean((preds.upper >= y_test)*(preds.lower <= y_test))}")

11 changes: 11 additions & 0 deletions mlsauce.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ mlsauce/lasso/__init__.py
mlsauce/lasso/_lasso.py
mlsauce/lasso/_lassoc.c
mlsauce/lasso/setup.py
mlsauce/nonconformist/__init__.py
mlsauce/nonconformist/acp.py
mlsauce/nonconformist/base.py
mlsauce/nonconformist/cp.py
mlsauce/nonconformist/evaluation.py
mlsauce/nonconformist/icp.py
mlsauce/nonconformist/nc.py
mlsauce/nonconformist/util.py
mlsauce/predictioninterval/__init__.py
mlsauce/predictioninterval/predictioninterval.py
mlsauce/ridge/__init__.py
mlsauce/ridge/_ridge.py
mlsauce/ridge/_ridgec.c
Expand All @@ -39,6 +49,7 @@ mlsauce/stump/setup.py
mlsauce/tests/__init__.py
mlsauce/tests/test_adaopt.py
mlsauce/utils/__init__.py
mlsauce/utils/progress_bar.py
mlsauce/utils/memoryuse/__init__.py
mlsauce/utils/memoryuse/mem_usage.py
mlsauce/utils/misc/__init__.py
Expand Down
50 changes: 49 additions & 1 deletion mlsauce/booster/_booster_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from . import _boosterc as boosterc
from ..predictioninterval import PredictionInterval

class LSBoostRegressor(BaseEstimator, RegressorMixin):
"""LSBoost regressor.
Expand Down Expand Up @@ -53,6 +54,17 @@ class LSBoostRegressor(BaseEstimator, RegressorMixin):
activation: str
activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh'
type_pi: str.
type of prediction interval; currently "kde" (default) or "bootstrap".
Used only in `self.predict`, for `self.replications` > 0 and `self.kernel`
in ('gaussian', 'tophat'). Default is `None`.
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
"""

Expand All @@ -72,6 +84,9 @@ def __init__(
backend="cpu",
solver="ridge",
activation="relu",
type_pi=None,
replications=None,
kernel=None
):
assert backend in (
"cpu",
Expand Down Expand Up @@ -107,6 +122,9 @@ def __init__(
self.obj = None
self.solver = solver
self.activation = activation
self.type_pi=type_pi
self.replications=replications
self.kernel=kernel

def fit(self, X, y, **kwargs):
"""Fit Booster (regressor) to training data (X, y)
Expand Down Expand Up @@ -148,16 +166,30 @@ def fit(self, X, y, **kwargs):

self.n_estimators = self.obj["n_estimators"]

self.X_ = X

self.y_ = y

return self

def predict(self, X, **kwargs):
def predict(self, X,
level=95,
method=None,
**kwargs):
"""Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
`None`, or 'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters to be passed to
self.cook_test_set
Expand All @@ -167,6 +199,22 @@ def predict(self, X, **kwargs):
probability estimates for test data: {array-like}
"""

if "return_pi" in kwargs:
assert method in ('splitconformal', 'localconformal'), \
"method must be in ('splitconformal', 'localconformal')"
self.pi = PredictionInterval(obj = self,
method=method,
level=level,
type_pi=self.type_pi,
replications=self.replications,
kernel=self.kernel,
)
self.pi.fit(self.X_, self.y_)
self.X_ = None
self.y_ = None
preds = self.pi.predict(X, return_pi=True)
return preds

return boosterc.predict_booster_regressor(
self.obj, np.asarray(X, order="C")
)
25 changes: 25 additions & 0 deletions mlsauce/nonconformist/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
The MIT License (MIT)

nonconformist package:
Copyright (c) 2015 Henrik Linusson

Other extensions:
Copyright (c) 2019 Yaniv Romano

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
17 changes: 17 additions & 0 deletions mlsauce/nonconformist/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python

"""
docstring
"""

# Authors: Henrik Linusson
# Yaniv Romano modified np.py file to include CQR
# T. Moudiki modified __init__.py to import classes

#__version__ = '2.1.0'

from .nc import AbsErrorErrFunc, QuantileRegErrFunc, RegressorNc, RegressorNormalizer
from .cp import IcpRegressor
from .base import RegressorAdapter

__all__ = ["AbsErrorErrFunc", "QuantileRegErrFunc", "RegressorAdapter", "RegressorNc", "RegressorNormalizer", "IcpRegressor"]
Loading

0 comments on commit 01ae85e

Please sign in to comment.