Skip to content

Commit

Permalink
Merge pull request #5378 from janezd/add-unique
Browse files Browse the repository at this point in the history
Unique: Move widget from prototypes
  • Loading branch information
janezd authored Apr 9, 2021
2 parents 266d365 + 294ba57 commit 8d2429e
Show file tree
Hide file tree
Showing 8 changed files with 326 additions and 0 deletions.
76 changes: 76 additions & 0 deletions Orange/widgets/data/icons/Unique.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
97 changes: 97 additions & 0 deletions Orange/widgets/data/owunique.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from operator import itemgetter

import numpy as np

from AnyQt.QtCore import Qt

from Orange.data import Table
from Orange.widgets import widget, gui, settings
from Orange.widgets.utils.itemmodels import DomainModel
from Orange.widgets.utils.widgetpreview import WidgetPreview


class OWUnique(widget.OWWidget):
name = 'Unique'
icon = 'icons/Unique.svg'
description = 'Filter instances unique by specified key attribute(s).'

class Inputs:
data = widget.Input("Data", Table)

class Outputs:
data = widget.Output("Data", Table)

want_main_area = False

TIEBREAKERS = {'Last instance': itemgetter(-1),
'First instance': itemgetter(0),
'Middle instance': lambda seq: seq[len(seq) // 2],
'Random instance': np.random.choice,
'Discard non-unique instances':
lambda seq: seq[0] if len(seq) == 1 else None}

settingsHandler = settings.DomainContextHandler()
selected_vars = settings.ContextSetting([])
tiebreaker = settings.Setting(next(iter(TIEBREAKERS)))
autocommit = settings.Setting(True)

def __init__(self):
# Commit is thunked because autocommit redefines it
# pylint: disable=unnecessary-lambda
super().__init__()
self.data = None

self.var_model = DomainModel(parent=self, order=DomainModel.MIXED)
var_list = gui.listView(
self.controlArea, self, "selected_vars", box="Group by",
model=self.var_model, callback=lambda: self.commit())
var_list.setSelectionMode(var_list.ExtendedSelection)

gui.comboBox(
self.controlArea, self, 'tiebreaker', box=True,
label='Instance to select in each group:',
items=tuple(self.TIEBREAKERS),
callback=lambda: self.commit(), sendSelectedValue=True)
gui.auto_commit(
self.controlArea, self, 'autocommit', 'Commit',
orientation=Qt.Horizontal)

@Inputs.data
def set_data(self, data):
self.closeContext()
self.data = data
self.selected_vars = []
if data:
self.var_model.set_domain(data.domain)
self.selected_vars = self.var_model[:]
self.openContext(data.domain)
else:
self.var_model.set_domain(None)

self.unconditional_commit()

def commit(self):
if self.data is None:
self.Outputs.data.send(None)
else:
self.Outputs.data.send(self._compute_unique_data())

def _compute_unique_data(self):
uniques = {}
keys = zip(*[self.data.get_column_view(attr)[0]
for attr in self.selected_vars or self.var_model])
for i, key in enumerate(keys):
uniques.setdefault(key, []).append(i)

choose = self.TIEBREAKERS[self.tiebreaker]
selection = sorted(
x for x in (choose(inds) for inds in uniques.values())
if x is not None)
if selection:
return self.data[selection]
else:
return None


if __name__ == "__main__": # pragma: no cover
WidgetPreview(OWUnique).run(Table("iris"))
119 changes: 119 additions & 0 deletions Orange/widgets/data/tests/test_owunique.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Tests test protected methods
# pylint: disable=protected-access
import unittest
from unittest.mock import Mock

import numpy as np

from Orange.data import DiscreteVariable, ContinuousVariable, Domain, Table
from Orange.widgets.tests.base import WidgetTest

from Orange.widgets.data import owunique


class TestOWUnique(WidgetTest):
def setUp(self):
self.widget = self.create_widget(owunique.OWUnique) #: OWUnique

self.domain = Domain(
[DiscreteVariable(name, values=("a", "b", "c")) for name in "abcd"],
[ContinuousVariable("e")],
[DiscreteVariable(name, values=("a", "b", "c")) for name in "fg"])
self.table = Table.from_numpy(
self.domain,
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 1, 0, 0],
[0, 2, 0, 0],
[1, 2, 0, 0]],
np.arange(6),
np.zeros((6, 2)))

def test_settings(self):
w = self.widget
domain = self.domain
w.unconditional_commit = Mock()

self.send_signal(w.Inputs.data, self.table)
w.selected_vars = [w.var_model[2]]

self.send_signal(w.Inputs.data, None)
self.assertEqual(w.selected_vars, [])

domain = Domain(domain.attributes[2:], domain.class_vars, domain.metas)
table = self.table.transform(domain)
self.send_signal(w.Inputs.data, table)
self.assertEqual(w.selected_vars, [self.domain[2]])

def test_unconditional_commit(self):
w = self.widget
w.autocommit = False

w._compute_unique_data = cud = Mock()
cud.return_value = self.table

self.send_signal(w.Inputs.data, self.table)
out = self.get_output(w.Outputs.data)
self.assertIs(out, cud.return_value)

self.send_signal(w.Inputs.data, None)
out = self.get_output(w.Outputs.data)
self.assertIs(out, None)

def test_compute(self):
w = self.widget

self.send_signal(w.Inputs.data, self.table)
out = self.get_output(w.Outputs.data)
np.testing.assert_equal(out.Y, self.table.Y)

w.selected_vars = w.var_model[:2]

w.tiebreaker = "Last instance"
w.commit()
out = self.get_output(w.Outputs.data)
np.testing.assert_equal(out.Y, [2, 3, 4, 5])

w.tiebreaker = "First instance"
w.commit()
out = self.get_output(w.Outputs.data)
np.testing.assert_equal(out.Y, [0, 3, 4, 5])

w.tiebreaker = "Middle instance"
w.commit()
out = self.get_output(w.Outputs.data)
np.testing.assert_equal(out.Y, [1, 3, 4, 5])

w.tiebreaker = "Discard non-unique instances"
w.commit()
out = self.get_output(w.Outputs.data)
np.testing.assert_equal(out.Y, [3, 4, 5])

def test_use_all_when_non_selected(self):
w = self.widget
w.tiebreaker = "First instance"

data = self.table.transform(Domain(self.table.domain.attributes))

self.send_signal(w.Inputs.data, data)
out = self.get_output(w.Outputs.data)
np.testing.assert_equal(out.X, data.X[2:])

w.selected_vars.clear()
w.unconditional_commit()
out = self.get_output(w.Outputs.data)
np.testing.assert_equal(out.X, data.X[2:])

def test_no_output_on_no_unique(self):
w = self.widget
w.tiebreaker = "Discard non-unique instances"

attrs = self.table.domain.attributes
data = Table.from_numpy(Domain(attrs), np.zeros((5, len(attrs))))
self.send_signal(w.Inputs.data, data)
self.assertIsNone(self.get_output(w.Outputs.data))


if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions doc/visual-programming/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Data
widgets/data/color
widgets/data/featurestatistics
widgets/data/neighbors
widgets/data/unique


Visualize
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 26 additions & 0 deletions doc/visual-programming/source/widgets/data/unique.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Unique
======

Remove duplicated data instances.

**Inputs**

- Data: data table

**Outputs**

- Data: data table without duplicates

The widget removes duplicated data instances. The user can choose a subset of observed variables, so two instances are considered as duplicates although they may differ in values of other, ignored variables.

![](images/Unique-stamped.png)

1. Select the variables that are considered in comparing data instances.
2. Data instance that is kept. The options are to use the first, last, middle or random instance, or to keep none, that is, to remove duplicated instances altogether.

Example
-------

Data set *Zoo* contains two frogs. This workflow keeps only one by removing instances with the same names.

![](images/Unique-Example.png)
7 changes: 7 additions & 0 deletions doc/widgets.json
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,13 @@
"keywords": [
"export"
]
},
{
"text": "Unique",
"doc": "visual-programming/source/widgets/data/unique.md",
"icon": "../Orange/widgets/data/icons/Unique.svg",
"background": "#FFD39F",
"keywords": []
}
]
],
Expand Down

0 comments on commit 8d2429e

Please sign in to comment.