Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Transform: Add new widget #3346

Merged
merged 2 commits into from
Nov 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Orange/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,18 @@ def transform(var):
return data.transform(domain)


class ApplyDomain(Preprocess):
def __init__(self, domain, name):
self._domain = domain
self._name = name

def __call__(self, data):
return data.transform(self._domain)

def __str__(self):
return self._name


class PreprocessorList(Preprocess):
"""
Store a list of preprocessors and on call apply them to the dataset.
Expand Down
15 changes: 15 additions & 0 deletions Orange/widgets/data/icons/Transform.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
112 changes: 112 additions & 0 deletions Orange/widgets/data/owtransform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from Orange.data import Table
from Orange.preprocess.preprocess import Preprocess, Discretize
from Orange.widgets import gui
from Orange.widgets.utils.sql import check_sql_input
from Orange.widgets.widget import OWWidget, Input, Output, Msg


class OWTransform(OWWidget):
name = "Transform"
description = "Transform data table."
icon = "icons/Transform.svg"
priority = 2110
keywords = []

class Inputs:
data = Input("Data", Table, default=True)
preprocessor = Input("Preprocessor", Preprocess)

class Outputs:
transformed_data = Output("Transformed Data", Table)

class Error(OWWidget.Error):
pp_error = Msg("An error occurred while transforming data.\n{}")

resizing_enabled = False
want_main_area = False

def __init__(self):
super().__init__()
self.data = None
self.preprocessor = None
self.transformed_data = None

info_box = gui.widgetBox(self.controlArea, "Info")
self.input_label = gui.widgetLabel(info_box, "")
self.preprocessor_label = gui.widgetLabel(info_box, "")
self.output_label = gui.widgetLabel(info_box, "")
self.set_input_label_text()
self.set_preprocessor_label_text()

def set_input_label_text(self):
text = "No data on input."
if self.data is not None:
text = "Input data with {:,} instances and {:,} features.".format(
len(self.data),
len(self.data.domain.attributes))
self.input_label.setText(text)

def set_preprocessor_label_text(self):
text = "No preprocessor on input."
if self.transformed_data is not None:
text = "Preprocessor {} applied.".format(self.preprocessor)
elif self.preprocessor is not None:
text = "Preprocessor {} on input.".format(self.preprocessor)
self.preprocessor_label.setText(text)

def set_output_label_text(self):
text = ""
if self.transformed_data:
text = "Output data includes {:,} features.".format(
len(self.transformed_data.domain.attributes))
self.output_label.setText(text)

@Inputs.data
@check_sql_input
def set_data(self, data):
self.data = data
self.set_input_label_text()

@Inputs.preprocessor
def set_preprocessor(self, preprocessor):
self.preprocessor = preprocessor

def handleNewSignals(self):
self.apply()

def apply(self):
self.clear_messages()
self.transformed_data = None
if self.data is not None and self.preprocessor is not None:
try:
self.transformed_data = self.preprocessor(self.data)
except Exception as ex: # pylint: disable=broad-except
self.Error.pp_error(ex)
self.Outputs.transformed_data.send(self.transformed_data)

self.set_preprocessor_label_text()
self.set_output_label_text()

def send_report(self):
if self.preprocessor is not None:
self.report_items("Settings",
(("Preprocessor", self.preprocessor),))
if self.data is not None:
self.report_data("Data", self.data)
if self.transformed_data is not None:
self.report_data("Transformed data", self.transformed_data)


if __name__ == "__main__":
from AnyQt.QtWidgets import QApplication

app = QApplication([])
ow = OWTransform()
d = Table("iris")
pp = Discretize()
ow.set_data(d)
ow.set_preprocessor(pp)
ow.handleNewSignals()
ow.show()
app.exec_()
ow.saveSettings()
92 changes: 92 additions & 0 deletions Orange/widgets/data/tests/test_owtransform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
from Orange.data import Table
from Orange.preprocess import Discretize
from Orange.preprocess.preprocess import Preprocess
from Orange.widgets.data.owtransform import OWTransform
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.unsupervised.owpca import OWPCA


class TestOWTransform(WidgetTest):
def setUp(self):
self.widget = self.create_widget(OWTransform)
self.data = Table("iris")
self.preprocessor = Discretize()

def test_output(self):
# send data and preprocessor
self.send_signal(self.widget.Inputs.data, self.data)
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsInstance(output, Table)
self.assertEqual("Input data with 150 instances and 4 features.",
self.widget.input_label.text())
self.assertEqual("Preprocessor Discretize() applied.",
self.widget.preprocessor_label.text())
self.assertEqual("Output data includes 4 features.",
self.widget.output_label.text())

# remove preprocessor
self.send_signal(self.widget.Inputs.preprocessor, None)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsNone(output)
self.assertEqual("Input data with 150 instances and 4 features.",
self.widget.input_label.text())
self.assertEqual("No preprocessor on input.", self.widget.preprocessor_label.text())
self.assertEqual("", self.widget.output_label.text())

# send preprocessor
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsInstance(output, Table)
self.assertEqual("Input data with 150 instances and 4 features.",
self.widget.input_label.text())
self.assertEqual("Preprocessor Discretize() applied.",
self.widget.preprocessor_label.text())
self.assertEqual("Output data includes 4 features.",
self.widget.output_label.text())

# remove data
self.send_signal(self.widget.Inputs.data, None)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsNone(output)
self.assertEqual("No data on input.", self.widget.input_label.text())
self.assertEqual("Preprocessor Discretize() on input.",
self.widget.preprocessor_label.text())
self.assertEqual("", self.widget.output_label.text())

# remove preprocessor
self.send_signal(self.widget.Inputs.preprocessor, None)
self.assertEqual("No data on input.", self.widget.input_label.text())
self.assertEqual("No preprocessor on input.",
self.widget.preprocessor_label.text())
self.assertEqual("", self.widget.output_label.text())

def test_input_pca_preprocessor(self):
owpca = self.create_widget(OWPCA)
self.send_signal(owpca.Inputs.data, self.data, widget=owpca)
owpca.components_spin.setValue(2)
pp = self.get_output(owpca.Outputs.preprocessor, widget=owpca)
self.assertIsNotNone(pp, Preprocess)

self.send_signal(self.widget.Inputs.data, self.data)
self.send_signal(self.widget.Inputs.preprocessor, pp)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsInstance(output, Table)
self.assertEqual(output.X.shape, (len(self.data), 2))

def test_error_transforming(self):
self.send_signal(self.widget.Inputs.data, self.data)
self.send_signal(self.widget.Inputs.preprocessor, Preprocess())
self.assertTrue(self.widget.Error.pp_error.is_shown())
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsNone(output)
self.send_signal(self.widget.Inputs.data, None)
self.assertFalse(self.widget.Error.pp_error.is_shown())

def test_send_report(self):
self.send_signal(self.widget.Inputs.data, self.data)
self.widget.report_button.click()
self.send_signal(self.widget.Inputs.data, None)
self.widget.report_button.click()
8 changes: 7 additions & 1 deletion Orange/widgets/unsupervised/owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from Orange.data import Table, Domain, StringVariable, ContinuousVariable
from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT
from Orange.preprocess import Normalize
from Orange.preprocess.preprocess import Preprocess, ApplyDomain
from Orange.projection import PCA, TruncatedSVD
from Orange.widgets import widget, gui, settings
from Orange.widgets.widget import Input, Output
Expand Down Expand Up @@ -44,6 +45,7 @@ class Outputs:
transformed_data = Output("Transformed data", Table)
components = Output("Components", Table)
pca = Output("PCA", PCA, dynamic=False)
preprocessor = Output("Preprocessor", Preprocess)

settingsHandler = settings.DomainContextHandler()

Expand Down Expand Up @@ -290,6 +292,7 @@ def clear_outputs(self):
self.Outputs.transformed_data.send(None)
self.Outputs.components.send(None)
self.Outputs.pca.send(self._pca_projector)
self.Outputs.preprocessor.send(None)

def get_model(self):
if self.rpca is None:
Expand Down Expand Up @@ -455,7 +458,7 @@ def _update_axis(self):
axis.setTicks([[(i, str(i+1)) for i in range(0, p, d)]])

def commit(self):
transformed = components = None
transformed = components = pp = None
if self._pca is not None:
if self._transformed is None:
# Compute the full transform (MAX_COMPONENTS components) only once.
Expand All @@ -479,10 +482,13 @@ def commit(self):
metas=metas)
components.name = 'components'

pp = ApplyDomain(domain, "PCA")

self._pca_projector.component = self.ncomponents
self.Outputs.transformed_data.send(transformed)
self.Outputs.components.send(components)
self.Outputs.pca.send(self._pca_projector)
self.Outputs.preprocessor.send(pp)

def send_report(self):
if self.data is None:
Expand Down
14 changes: 14 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import scipy.sparse as sp

from Orange.data import Table, Domain, ContinuousVariable, TimeVariable
from Orange.preprocess.preprocess import Preprocess
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.unsupervised.owpca import OWPCA, DECOMPOSITIONS

Expand Down Expand Up @@ -131,3 +132,16 @@ def test_do_not_mask_features(self):
self.widget.set_data(data)
ndata = Table("iris.tab")
self.assertEqual(data.domain[0], ndata.domain[0])

def test_output_preprocessor(self):
data = Table("iris")
self.send_signal(self.widget.Inputs.data, data)
pp = self.get_output(self.widget.Outputs.preprocessor)
self.assertIsInstance(pp, Preprocess)
transformed_data = pp(data[::10])
self.assertIsInstance(transformed_data, Table)
self.assertEqual(transformed_data.X.shape, (15, 2))
output = self.get_output(self.widget.Outputs.transformed_data)
np.testing.assert_array_equal(transformed_data.X, output.X[::10])
self.assertEqual([a.name for a in transformed_data.domain.attributes],
[m.name for m in output.domain.attributes])