-
-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5378 from janezd/add-unique
Unique: Move widget from prototypes
- Loading branch information
Showing
8 changed files
with
326 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
from operator import itemgetter | ||
|
||
import numpy as np | ||
|
||
from AnyQt.QtCore import Qt | ||
|
||
from Orange.data import Table | ||
from Orange.widgets import widget, gui, settings | ||
from Orange.widgets.utils.itemmodels import DomainModel | ||
from Orange.widgets.utils.widgetpreview import WidgetPreview | ||
|
||
|
||
class OWUnique(widget.OWWidget): | ||
name = 'Unique' | ||
icon = 'icons/Unique.svg' | ||
description = 'Filter instances unique by specified key attribute(s).' | ||
|
||
class Inputs: | ||
data = widget.Input("Data", Table) | ||
|
||
class Outputs: | ||
data = widget.Output("Data", Table) | ||
|
||
want_main_area = False | ||
|
||
TIEBREAKERS = {'Last instance': itemgetter(-1), | ||
'First instance': itemgetter(0), | ||
'Middle instance': lambda seq: seq[len(seq) // 2], | ||
'Random instance': np.random.choice, | ||
'Discard non-unique instances': | ||
lambda seq: seq[0] if len(seq) == 1 else None} | ||
|
||
settingsHandler = settings.DomainContextHandler() | ||
selected_vars = settings.ContextSetting([]) | ||
tiebreaker = settings.Setting(next(iter(TIEBREAKERS))) | ||
autocommit = settings.Setting(True) | ||
|
||
def __init__(self): | ||
# Commit is thunked because autocommit redefines it | ||
# pylint: disable=unnecessary-lambda | ||
super().__init__() | ||
self.data = None | ||
|
||
self.var_model = DomainModel(parent=self, order=DomainModel.MIXED) | ||
var_list = gui.listView( | ||
self.controlArea, self, "selected_vars", box="Group by", | ||
model=self.var_model, callback=lambda: self.commit()) | ||
var_list.setSelectionMode(var_list.ExtendedSelection) | ||
|
||
gui.comboBox( | ||
self.controlArea, self, 'tiebreaker', box=True, | ||
label='Instance to select in each group:', | ||
items=tuple(self.TIEBREAKERS), | ||
callback=lambda: self.commit(), sendSelectedValue=True) | ||
gui.auto_commit( | ||
self.controlArea, self, 'autocommit', 'Commit', | ||
orientation=Qt.Horizontal) | ||
|
||
@Inputs.data | ||
def set_data(self, data): | ||
self.closeContext() | ||
self.data = data | ||
self.selected_vars = [] | ||
if data: | ||
self.var_model.set_domain(data.domain) | ||
self.selected_vars = self.var_model[:] | ||
self.openContext(data.domain) | ||
else: | ||
self.var_model.set_domain(None) | ||
|
||
self.unconditional_commit() | ||
|
||
def commit(self): | ||
if self.data is None: | ||
self.Outputs.data.send(None) | ||
else: | ||
self.Outputs.data.send(self._compute_unique_data()) | ||
|
||
def _compute_unique_data(self): | ||
uniques = {} | ||
keys = zip(*[self.data.get_column_view(attr)[0] | ||
for attr in self.selected_vars or self.var_model]) | ||
for i, key in enumerate(keys): | ||
uniques.setdefault(key, []).append(i) | ||
|
||
choose = self.TIEBREAKERS[self.tiebreaker] | ||
selection = sorted( | ||
x for x in (choose(inds) for inds in uniques.values()) | ||
if x is not None) | ||
if selection: | ||
return self.data[selection] | ||
else: | ||
return None | ||
|
||
|
||
if __name__ == "__main__": # pragma: no cover | ||
WidgetPreview(OWUnique).run(Table("iris")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# Tests test protected methods | ||
# pylint: disable=protected-access | ||
import unittest | ||
from unittest.mock import Mock | ||
|
||
import numpy as np | ||
|
||
from Orange.data import DiscreteVariable, ContinuousVariable, Domain, Table | ||
from Orange.widgets.tests.base import WidgetTest | ||
|
||
from Orange.widgets.data import owunique | ||
|
||
|
||
class TestOWUnique(WidgetTest): | ||
def setUp(self): | ||
self.widget = self.create_widget(owunique.OWUnique) #: OWUnique | ||
|
||
self.domain = Domain( | ||
[DiscreteVariable(name, values=("a", "b", "c")) for name in "abcd"], | ||
[ContinuousVariable("e")], | ||
[DiscreteVariable(name, values=("a", "b", "c")) for name in "fg"]) | ||
self.table = Table.from_numpy( | ||
self.domain, | ||
[[0, 0, 0, 0], | ||
[0, 0, 0, 0], | ||
[0, 0, 0, 0], | ||
[0, 1, 0, 0], | ||
[0, 2, 0, 0], | ||
[1, 2, 0, 0]], | ||
np.arange(6), | ||
np.zeros((6, 2))) | ||
|
||
def test_settings(self): | ||
w = self.widget | ||
domain = self.domain | ||
w.unconditional_commit = Mock() | ||
|
||
self.send_signal(w.Inputs.data, self.table) | ||
w.selected_vars = [w.var_model[2]] | ||
|
||
self.send_signal(w.Inputs.data, None) | ||
self.assertEqual(w.selected_vars, []) | ||
|
||
domain = Domain(domain.attributes[2:], domain.class_vars, domain.metas) | ||
table = self.table.transform(domain) | ||
self.send_signal(w.Inputs.data, table) | ||
self.assertEqual(w.selected_vars, [self.domain[2]]) | ||
|
||
def test_unconditional_commit(self): | ||
w = self.widget | ||
w.autocommit = False | ||
|
||
w._compute_unique_data = cud = Mock() | ||
cud.return_value = self.table | ||
|
||
self.send_signal(w.Inputs.data, self.table) | ||
out = self.get_output(w.Outputs.data) | ||
self.assertIs(out, cud.return_value) | ||
|
||
self.send_signal(w.Inputs.data, None) | ||
out = self.get_output(w.Outputs.data) | ||
self.assertIs(out, None) | ||
|
||
def test_compute(self): | ||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.table) | ||
out = self.get_output(w.Outputs.data) | ||
np.testing.assert_equal(out.Y, self.table.Y) | ||
|
||
w.selected_vars = w.var_model[:2] | ||
|
||
w.tiebreaker = "Last instance" | ||
w.commit() | ||
out = self.get_output(w.Outputs.data) | ||
np.testing.assert_equal(out.Y, [2, 3, 4, 5]) | ||
|
||
w.tiebreaker = "First instance" | ||
w.commit() | ||
out = self.get_output(w.Outputs.data) | ||
np.testing.assert_equal(out.Y, [0, 3, 4, 5]) | ||
|
||
w.tiebreaker = "Middle instance" | ||
w.commit() | ||
out = self.get_output(w.Outputs.data) | ||
np.testing.assert_equal(out.Y, [1, 3, 4, 5]) | ||
|
||
w.tiebreaker = "Discard non-unique instances" | ||
w.commit() | ||
out = self.get_output(w.Outputs.data) | ||
np.testing.assert_equal(out.Y, [3, 4, 5]) | ||
|
||
def test_use_all_when_non_selected(self): | ||
w = self.widget | ||
w.tiebreaker = "First instance" | ||
|
||
data = self.table.transform(Domain(self.table.domain.attributes)) | ||
|
||
self.send_signal(w.Inputs.data, data) | ||
out = self.get_output(w.Outputs.data) | ||
np.testing.assert_equal(out.X, data.X[2:]) | ||
|
||
w.selected_vars.clear() | ||
w.unconditional_commit() | ||
out = self.get_output(w.Outputs.data) | ||
np.testing.assert_equal(out.X, data.X[2:]) | ||
|
||
def test_no_output_on_no_unique(self): | ||
w = self.widget | ||
w.tiebreaker = "Discard non-unique instances" | ||
|
||
attrs = self.table.domain.attributes | ||
data = Table.from_numpy(Domain(attrs), np.zeros((5, len(attrs)))) | ||
self.send_signal(w.Inputs.data, data) | ||
self.assertIsNone(self.get_output(w.Outputs.data)) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Unique | ||
====== | ||
|
||
Remove duplicated data instances. | ||
|
||
**Inputs** | ||
|
||
- Data: data table | ||
|
||
**Outputs** | ||
|
||
- Data: data table without duplicates | ||
|
||
The widget removes duplicated data instances. The user can choose a subset of observed variables, so two instances are considered as duplicates although they may differ in values of other, ignored variables. | ||
|
||
![](images/Unique-stamped.png) | ||
|
||
1. Select the variables that are considered in comparing data instances. | ||
2. Data instance that is kept. The options are to use the first, last, middle or random instance, or to keep none, that is, to remove duplicated instances altogether. | ||
|
||
Example | ||
------- | ||
|
||
Data set *Zoo* contains two frogs. This workflow keeps only one by removing instances with the same names. | ||
|
||
![](images/Unique-Example.png) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters