forked from TannerGilbert/Machine-Learning-Explained
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bagging.py
48 lines (40 loc) · 1.61 KB
/
bagging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from sklearn.base import BaseEstimator, TransformerMixin, clone, RegressorMixin
import numpy as np
class BaggingModels(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self, models, task_type='classification'):
self.models = models
self.task_type = task_type
def fit(self, X, y):
self.models_ = [clone(x) for x in self.models]
for model in self.models_:
X_tmp, y_tmp = self.subsample(X, y)
model.fit(X_tmp, y_tmp)
return self
# Create a random subsample from the dataset with replacement
@staticmethod
def subsample(X, y, ratio=1.0):
X_new, y_new = list(), list()
n_sample = round(len(X) * ratio)
while len(X_new) < n_sample:
index = np.random.randint(len(X))
X_new.append(X[index])
y_new.append(y[index])
return X_new, y_new
def predict(self, X):
predictions_array = np.column_stack([
model.predict(X) for model in self.models_
])
if self.task_type == 'classification':
return np.array([np.argmax(np.bincount(predictions)) for predictions in predictions_array])
else:
return np.mean(predictions_array, axis=1)
def predict_proba(self, X):
if self.task_type == 'classification':
predictions = []
for x in X:
prediction = np.row_stack([
model.predict_proba([x]) for model in self.models_
])
predictions.append(np.mean(prediction, axis=0))
return np.array(predictions)
return None