Skip to content

Commit

Permalink
enn example
Browse files Browse the repository at this point in the history
  • Loading branch information
dvro committed Jun 23, 2014
1 parent 7b31f02 commit d9b08fb
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 1 deletion.
39 changes: 39 additions & 0 deletions examples/enn_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np
import matplotlib.pyplot as plt

from protopy.selection.enn import ENN

mu1 = [4, 5]
si1 = [[0.75, 0.25], [0.25, 0.75]]

mu2 = [6, 5]
si2 = [[0.25, 0.75], [0.75, 0.25]]

samples = 200

X1 = np.random.multivariate_normal(
np.asarray(mu1), np.asarray(si1), samples)
X2 = np.random.multivariate_normal(
np.asarray(mu2), np.asarray(si2), samples)

X = np.vstack((X1, X2))

y = np.asarray([0] * samples + [1] * samples)


plt.plot(X[y==0].T[0], X[y==0].T[1], 'bs', X[y==1].T[0], X[y==1].T[1],'ro')
plt.axis([0, 10, 0, 10])
plt.title('Original Dataset')
plt.show()
plt.clf()

editednn = ENN()
X_, y_ = editednn.reduce_data(X, y)

plt.plot(X_[y_==0].T[0], X_[y_==0].T[1], 'bs', X_[y_==1].T[0], X_[y_==1].T[1],'ro')
plt.axis([0, 10, 0, 10])
plt.title('ENN')
plt.show()
plt.clf()


137 changes: 137 additions & 0 deletions examples/plot_comparision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
======================
Classifiers Comparison
======================
A comparison of a several classifiers in scikit-learn on synthetic datasets.
The point of this example is to illustrate the nature of decision boundaries
of different classifiers.
This should be taken with a grain of salt, as the intuition conveyed by
these examples does not necessarily carry over to real datasets.
In particular in high dimensional spaces data can more easily be separated
linearly and the simplicity of classifiers such as naive Bayes and linear SVMs
might lead to better generalization.
The plots show training points in solid colors and testing points
semi-transparent. The lower right shows the classification accuracy on the test
set.
"""
print(__doc__)


# Code source: Gael Varoqueux
# Andreas Mueller
# Modified for Documentation merge by Jaques Grobler
# License: BSD 3 clause

import numpy as np
import pylab as pl
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.lda import LDA
from sklearn.qda import QDA
from protopy.selection.enn import ENN

h = .02 # step size in the mesh

#names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
# "Random Forest", "AdaBoost", "Naive Bayes", "LDA", "QDA"]
names = ["KNN", "ENN"]


classifiers = [
KNeighborsClassifier(3),
ENN()]
'''
SVC(kernel="linear", C=0.025),
SVC(gamma=2, C=1),
DecisionTreeClassifier(max_depth=5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
AdaBoostClassifier(),
GaussianNB(),
LDA(),
QDA()]
'''
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

datasets = [make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
linearly_separable
]

figure = pl.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds in datasets:
# preprocess dataset, split into training and test part
X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))

# just plot the dataset first
cm = pl.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = pl.subplot(len(datasets), len(classifiers) + 1, i)
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
i += 1

# iterate over classifiers
for name, clf in zip(names, classifiers):
ax = pl.subplot(len(datasets), len(classifiers) + 1, i)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

# Plot also the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
alpha=0.6)

ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(name)
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
size=15, horizontalalignment='right')
i += 1

figure.subplots_adjust(left=.02, right=.98)
pl.show()
1 change: 1 addition & 0 deletions examples/protopy
7 changes: 6 additions & 1 deletion protopy/selection/enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class ENN(InstanceReductionMixin):
Examples
--------
>>> from protopy.selection import ENN
>>> from protopy.selection.enn import ENN
>>> import numpy as np
>>> X = np.array([[-1, 0], [-0.8, 1], [-0.8, -1], [-0.5, 0] , [0.5, 0], [1, 0], [0.8, 1], [0.8, -1]])
>>> y = np.array([1, 1, 1, 2, 1, 2, 2, 2])
Expand Down Expand Up @@ -90,3 +90,8 @@ def reduce_data(self, X, y):
self.y_ = np.asarray(y[mask])
self.reduction_ = 1.0 - float(len(self.y_)) / len(y)
return self.X_, self.y_


def predict_proba(self, X):
return self.classifier.predict_proba(X)

0 comments on commit d9b08fb

Please sign in to comment.