Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Linear Discriminant Analysis: scripting part #2823

Merged
merged 2 commits into from
Jan 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Orange/projection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .manifold import *
from .freeviz import *
from .radviz import radviz
from .lda import LDA
65 changes: 65 additions & 0 deletions Orange/projection/lda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import Orange.data
from Orange.classification.logistic_regression import _FeatureScorerMixin
from Orange.data.util import SharedComputeValue
from Orange.projection import SklProjector, Projection

__all__ = ["LDA"]


class LDA(SklProjector, _FeatureScorerMixin):
name = "LDA"
supports_sparse = False

def __init__(self, n_components=2, solver='eigen', preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.n_components = n_components
self.solver = solver

def fit(self, X, Y=None):
if self.n_components is not None:
self.n_components = min(min(X.shape), self.n_components)
proj = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
proj = proj.fit(X, Y)
return LDAModel(proj, self.domain)


class _LDATransformDomain:
"""Computation common for all LDA variables."""
def __init__(self, lda):
self.lda = lda

def __call__(self, data):
if data.domain != self.lda.pre_domain:
data = data.transform(self.lda.pre_domain)
return self.lda.transform(data.X)


class LDAModel(Projection):
name = "LDAModel"

def __init__(self, proj, domain):
lda_transform = _LDATransformDomain(self)
self.components_ = proj.scalings_.T

def lda_variable(i):
return Orange.data.ContinuousVariable(
'LD%d' % (i + 1), compute_value=LDAProjector(self, i, lda_transform))

super().__init__(proj=proj)
self.orig_domain = domain
self.n_components = self.components_.shape[0]
self.domain = Orange.data.Domain(
[lda_variable(i) for i in range(proj.n_components)],
domain.class_vars, domain.metas)


class LDAProjector(SharedComputeValue):
"""Transform into a given LDA component."""
def __init__(self, projection, feature, lda_transform):
super().__init__(lda_transform)
self.feature = feature

def compute(self, data, lda_space):
return lda_space[:, self.feature]
49 changes: 49 additions & 0 deletions Orange/tests/test_lda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring

import unittest

import numpy as np

from Orange.preprocess import Continuize, Randomize
from Orange.projection import LDA
from Orange.data import Table


class TestLDA(unittest.TestCase):
def test_lda(self):
iris = Table('iris')
n_components = 2
lda = LDA(n_components=n_components)
model = lda(iris)
transformed = model(iris)
self.assertEqual(transformed.X.shape, (len(iris), n_components))
self.assertEqual(transformed.Y.shape, (len(iris),))

def test_transform_changed_domain(self):
"""
1. Open data, apply some preprocessor, splits the data into two parts,
use LDA on the first part, and then transform the second part.

2. Open data, split into two parts, apply the same preprocessor and
LDA only on the first part, and then transform the second part.

The transformed second part in (1) and (2) has to be the same.
"""
data = Table("iris")
data = Randomize()(data)
preprocessor = Continuize()
lda = LDA()

# normalize all
ndata = preprocessor(data)

model = lda(ndata[:75])
result_1 = model(ndata[75:])

# normalize only the "training" part
ndata = preprocessor(data[:75])
model = lda(ndata)
result_2 = model(data[75:])

np.testing.assert_almost_equal(result_1.X, result_2.X)
48 changes: 48 additions & 0 deletions doc/data-mining-library/source/reference/projection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,51 @@ Example


.. autoclass:: Orange.projection.freeviz.FreeViz




LDA
---

Linear discriminant analysis is another way of finding a linear transformation of
data that reduces the number of dimensions required to represent it. It is often
used for dimensionality reduction prior to classification, but can also be used as a
classification technique itself ([1]_).


Example
=======

>>> from Orange.projection import LDA
>>> from Orange.data import Table
>>> iris = Table('iris')
>>> lda = LDA()
>>> model = LDA(iris)
>>> model.components_ # LDA components
array([[ 0.20490976, 0.38714331, -0.54648218, -0.71378517],
[ 0.00898234, 0.58899857, -0.25428655, 0.76703217],
[-0.71507172, 0.43568045, 0.45568731, -0.30200008],
[ 0.06449913, -0.35780501, -0.42514529, 0.828895 ]])
>>> transformed_data = model(iris) # transformed data
>>> transformed_data
[[1.492, 1.905 | Iris-setosa],
[1.258, 1.608 | Iris-setosa],
[1.349, 1.750 | Iris-setosa],
[1.180, 1.639 | Iris-setosa],
[1.510, 1.963 | Iris-setosa],
...
]



.. autoclass:: Orange.projection.lda.LDA



References
----------

.. [1] Witten, I.H., Frank, E., Hall, M.A. and Pal, C.J., 2016.
Data Mining: Practical machine learning tools and techniques. Morgan Kaufmann.