From 8293eef8cd9464e08b757211ce67d12c2e0e57e6 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 19 Jun 2020 19:32:11 +0200 Subject: [PATCH] Edit Domain: Add option to remove compute_value --- Orange/widgets/data/oweditdomain.py | 146 ++++++++++++------ .../widgets/data/tests/test_oweditdomain.py | 110 ++++++++++--- 2 files changed, 194 insertions(+), 62 deletions(-) diff --git a/Orange/widgets/data/oweditdomain.py b/Orange/widgets/data/oweditdomain.py index ccd046d79fa..3f9c6d738ed 100644 --- a/Orange/widgets/data/oweditdomain.py +++ b/Orange/widgets/data/oweditdomain.py @@ -95,6 +95,7 @@ class Categorical( ("name", str), ("categories", Tuple[str, ...]), ("annotations", AnnotationsType), + ("linked", bool) ])): pass @@ -104,6 +105,7 @@ class Real( # a precision (int, and a format specifier('f', 'g', or '') ("format", Tuple[int, str]), ("annotations", AnnotationsType), + ("linked", bool) ])): pass @@ -111,6 +113,7 @@ class String( _DataType, NamedTuple("String", [ ("name", str), ("annotations", AnnotationsType), + ("linked", bool) ])): pass @@ -118,6 +121,7 @@ class Time( _DataType, NamedTuple("Time", [ ("name", str), ("annotations", AnnotationsType), + ("linked", bool) ])): pass @@ -175,10 +179,14 @@ def __call__(self, var): return var._replace(annotations=self.annotations) -Transform = Union[Rename, CategoriesMapping, Annotate] -TransformTypes = (Rename, CategoriesMapping, Annotate) +class Unlink(_DataType, namedtuple("Unlink", [])): + """Unlink variable from its source, that is, remove compute_value""" -CategoricalTransformTypes = (CategoriesMapping, ) + +Transform = Union[Rename, CategoriesMapping, Annotate, Unlink] +TransformTypes = (Rename, CategoriesMapping, Annotate, Unlink) + +CategoricalTransformTypes = (CategoriesMapping, Unlink) # Reinterpret vector transformations. @@ -221,7 +229,7 @@ def __call__(self, vector: DataVector) -> StringVector: if isinstance(var, String): return vector return StringVector( - String(var.name, var.annotations), + String(var.name, var.annotations, False), lambda: as_string(vector.data()), ) @@ -241,11 +249,11 @@ def data() -> MArray: a = categorical_to_string_vector(d, var.values) return MArray(as_float_or_nan(a, where=a.mask), mask=a.mask) return RealVector( - Real(var.name, (6, 'g'), var.annotations), data + Real(var.name, (6, 'g'), var.annotations, var.linked), data ) elif isinstance(var, Time): return RealVector( - Real(var.name, (6, 'g'), var.annotations), + Real(var.name, (6, 'g'), var.annotations, var.linked), lambda: vector.data().astype(float) ) elif isinstance(var, String): @@ -253,7 +261,7 @@ def data(): s = vector.data() return MArray(as_float_or_nan(s, where=s.mask), mask=s.mask) return RealVector( - Real(var.name, (6, "g"), var.annotations), data + Real(var.name, (6, "g"), var.annotations, var.linked), data ) raise AssertionError @@ -266,22 +274,10 @@ def __call__(self, vector: DataVector) -> CategoricalVector: var, _ = vector if isinstance(var, Categorical): return vector - if isinstance(var, Real): - data, values = categorical_from_vector(vector.data()) - return CategoricalVector( - Categorical(var.name, values, var.annotations), - lambda: data - ) - elif isinstance(var, Time): + if isinstance(var, (Real, Time, String)): data, values = categorical_from_vector(vector.data()) return CategoricalVector( - Categorical(var.name, values, var.annotations), - lambda: data - ) - elif isinstance(var, String): - data, values = categorical_from_vector(vector.data()) - return CategoricalVector( - Categorical(var.name, values, var.annotations), + Categorical(var.name, values, var.annotations, var.linked), lambda: data ) raise AssertionError @@ -295,7 +291,7 @@ def __call__(self, vector: DataVector) -> TimeVector: return vector elif isinstance(var, Real): return TimeVector( - Time(var.name, var.annotations), + Time(var.name, var.annotations, var.linked), lambda: vector.data().astype("M8[us]") ) elif isinstance(var, Categorical): @@ -305,7 +301,7 @@ def data(): dt = pd.to_datetime(s, errors="coerce").values.astype("M8[us]") return MArray(dt, mask=d.mask) return TimeVector( - Time(var.name, var.annotations), data + Time(var.name, var.annotations, var.linked), data ) elif isinstance(var, String): def data(): @@ -313,7 +309,7 @@ def data(): dt = pd.to_datetime(s, errors="coerce").values.astype("M8[us]") return MArray(dt, mask=s.mask) return TimeVector( - Time(var.name, var.annotations), data + Time(var.name, var.annotations, var.linked), data ) raise AssertionError @@ -532,6 +528,17 @@ def __init__(self, parent=None, **kwargs): ) form.addRow("Name:", self.name_edit) + self.unlink_var_cb = QCheckBox( + "Unlink variable from its source variable", self, + toolTip="Make Orange forget that the variable is derived from " + "another.\n" + "Use this for instance when you want to consider variables " + "with the same name but from different sources as the same " + "variable." + ) + self.unlink_var_cb.toggled.connect(self._set_unlink) + form.addRow("", self.unlink_var_cb) + vlayout = QVBoxLayout(margin=0, spacing=1) self.labels_edit = view = QTreeView( objectName="annotation-pairs-edit", @@ -616,17 +623,23 @@ def set_data(self, var, transform=()): if var is not None: name = var.name annotations = var.annotations + unlink = False for tr in transform: if isinstance(tr, Rename): name = tr.name elif isinstance(tr, Annotate): annotations = tr.annotations + elif isinstance(tr, Unlink): + unlink = True self.name_edit.setText(name) self.labels_model.set_dict(dict(annotations)) self.add_label_action.actionGroup().setEnabled(True) + self.unlink_var_cb.setChecked(unlink) else: self.add_label_action.actionGroup().setEnabled(False) + self.unlink_var_cb.setDisabled(var is None or not var.linked) + def get_data(self): """Retrieve the modified variable. """ @@ -639,6 +652,8 @@ def get_data(self): tr.append(Rename(name)) if self.var.annotations != labels: tr.append(Annotate(labels)) + if self.var.linked and self.unlink_var_cb.isChecked(): + tr.append(Unlink()) return self.var, tr def clear(self): @@ -647,6 +662,7 @@ def clear(self): self.var = None self.name_edit.setText("") self.labels_model.setRowCount(0) + self.unlink_var_cb.setChecked(False) @Slot() def on_name_changed(self): @@ -661,6 +677,10 @@ def on_label_selection_changed(self): selected = self.labels_edit.selectionModel().selectedRows() self.remove_label_action.setEnabled(bool(len(selected))) + def _set_unlink(self, unlink): + self.unlink_var_cb.setChecked(unlink) + self.variable_changed.emit() + class GroupItemsDialog(QDialog): """ @@ -1157,7 +1177,7 @@ def __init__(self, *args, **kwargs): hlayout.addStretch(10) vlayout.addLayout(hlayout) - form.insertRow(1, "Values:", vlayout) + form.insertRow(2, "Values:", vlayout) QWidget.setTabOrder(self.name_edit, self.values_edit) QWidget.setTabOrder(self.values_edit, button1) @@ -2030,23 +2050,32 @@ def state(i): model.data(midx, TransformRole)) state = [state(i) for i in range(model.rowCount())] - if all(tr is None or not tr for _, tr in state) \ - and self.output_table_name in ("", data.name): + input_vars = data.domain.variables + data.domain.metas + if self.output_table_name in ("", data.name) \ + and not any(requires_transform(var, trs) + for var, (_, trs) in zip(input_vars, state)): self.Outputs.data.send(data) self.info.set_output_summary(len(data), format_summary_details(data)) return - output_vars = [] - input_vars = data.domain.variables + data.domain.metas assert all(v_.vtype.name == v.name for v, (v_, _) in zip(input_vars, state)) + output_vars = [] + unlinked_vars = [] + unlink_domain = False for (_, tr), v in zip(state, input_vars): if tr: var = apply_transform(v, data, tr) + if requires_unlink(v, tr): + unlinked_var = var.copy(compute_value=None) + unlink_domain = True + else: + unlinked_var = var else: - var = v + unlinked_var = var = v output_vars.append(var) + unlinked_vars.append(unlinked_var) if len(output_vars) != len({v.name for v in output_vars}): self.Error.duplicate_var_name() @@ -2058,15 +2087,23 @@ def state(i): nx = len(domain.attributes) ny = len(domain.class_vars) - Xs = output_vars[:nx] - Ys = output_vars[nx: nx + ny] - Ms = output_vars[nx + ny:] - # Move non primitive Xs, Ys to metas (if they were changed) - Ms += [v for v in Xs + Ys if not v.is_primitive()] - Xs = [v for v in Xs if v.is_primitive()] - Ys = [v for v in Ys if v.is_primitive()] - domain = Orange.data.Domain(Xs, Ys, Ms) + def construct_domain(vars_list): + # Move non primitive Xs, Ys to metas (if they were changed) + Xs = [v for v in vars_list[:nx] if v.is_primitive()] + Ys = [v for v in vars_list[nx: nx + ny] if v.is_primitive()] + Ms = vars_list[nx + ny:] + \ + [v for v in vars_list[:nx + ny] if not v.is_primitive()] + return Orange.data.Domain(Xs, Ys, Ms) + + domain = construct_domain(output_vars) new_data = data.transform(domain) + if unlink_domain: + unlinked_domain = construct_domain(unlinked_vars) + new_data = new_data.from_numpy( + unlinked_domain, + new_data.X, new_data.Y, new_data.metas, new_data.W, + new_data.attributes, new_data.ids + ) if self.output_table_name: new_data.name = self.output_table_name self.Outputs.data.send(new_data) @@ -2236,7 +2273,7 @@ def i(text): def text(text): return "{}".format(escape(text)) assert trs - rename = annotate = catmap = None + rename = annotate = catmap = unlink = None reinterpret = None for tr in trs: @@ -2246,6 +2283,8 @@ def text(text): annotate = tr elif isinstance(tr, CategoriesMapping): catmap = tr + elif isinstance(tr, Unlink): + unlink = tr elif isinstance(tr, ReinterpretTransformTypes): reinterpret = tr @@ -2258,6 +2297,8 @@ def text(text): header = "{} → {}".format(var.name, rename.name) else: header = var.name + if unlink is not None: + header += "(unlinked from source)" values_section = None if catmap is not None: @@ -2323,14 +2364,15 @@ def abstract(var): (key, str(value)) for key, value in var.attributes.items() )) + linked = var.compute_value is not None if isinstance(var, Orange.data.DiscreteVariable): - return Categorical(var.name, tuple(var.values), annotations) + return Categorical(var.name, tuple(var.values), annotations, linked) elif isinstance(var, Orange.data.TimeVariable): - return Time(var.name, annotations) + return Time(var.name, annotations, linked) elif isinstance(var, Orange.data.ContinuousVariable): - return Real(var.name, (var.number_of_decimals, 'f'), annotations) + return Real(var.name, (var.number_of_decimals, 'f'), annotations, linked) elif isinstance(var, Orange.data.StringVariable): - return String(var.name, annotations) + return String(var.name, annotations, linked) else: raise TypeError @@ -2359,6 +2401,24 @@ def apply_transform(var, table, trs): return var +def requires_unlink(var: Orange.data.Variable, trs: List[Transform]) -> bool: + # Variable is only unlinked if it has compute_value or if it has other + # transformations (that might had added compute_value) + return trs is not None \ + and any(isinstance(tr, Unlink) for tr in trs) \ + and (var.compute_value is not None or len(trs) > 1) + + +def requires_transform(var: Orange.data.Variable, trs: List[Transform]) -> bool: + # Unlink is treated separately: Unlink is required only if the variable + # has compute_value. Hence tranform is required if it has any + # transformations other than Unlink, or if unlink is indeed required. + return trs is not None and ( + not all(isinstance(tr, Unlink) for tr in trs) + or requires_unlink(var, trs) + ) + + @singledispatch def apply_transform_var(var, trs): # type: (Orange.data.Variable, List[Transform]) -> Orange.data.Variable diff --git a/Orange/widgets/data/tests/test_oweditdomain.py b/Orange/widgets/data/tests/test_oweditdomain.py index 4c459cb8c4f..ffe06335030 100644 --- a/Orange/widgets/data/tests/test_oweditdomain.py +++ b/Orange/widgets/data/tests/test_oweditdomain.py @@ -28,7 +28,7 @@ OWEditDomain, ContinuousVariableEditor, DiscreteVariableEditor, VariableEditor, TimeVariableEditor, Categorical, Real, Time, String, - Rename, Annotate, CategoriesMapping, report_transform, + Rename, Annotate, Unlink, CategoriesMapping, report_transform, apply_transform, apply_transform_var, apply_reinterpret, MultiplicityRole, AsString, AsCategorical, AsContinuous, AsTime, table_column_data, ReinterpretVariableEditor, CategoricalVector, @@ -46,21 +46,26 @@ class TestReport(TestCase): def test_rename(self): - var = Real("X", (-1, ""), ()) + var = Real("X", (-1, ""), (), False) tr = Rename("Y") val = report_transform(var, [tr]) self.assertIn("X", val) self.assertIn("Y", val) def test_annotate(self): - var = Real("X", (-1, ""), (("a", "1"), ("b", "z"))) + var = Real("X", (-1, ""), (("a", "1"), ("b", "z")), False) tr = Annotate((("a", "2"), ("j", "z"))) r = report_transform(var, [tr]) self.assertIn("a", r) self.assertIn("b", r) + def test_unlinke(self): + var = Real("X", (-1, ""), (("a", "1"), ("b", "z")), True) + r = report_transform(var, [Unlink()]) + self.assertIn("unlinked", r) + def test_categories_mapping(self): - var = Categorical("C", ("a", "b", "c"), ()) + var = Categorical("C", ("a", "b", "c"), (), False) tr = CategoriesMapping( (("a", "aa"), ("b", None), @@ -74,7 +79,7 @@ def test_categories_mapping(self): self.assertIn("", r) def test_categorical_merge_mapping(self): - var = Categorical("C", ("a", "b1", "b2"), ()) + var = Categorical("C", ("a", "b1", "b2"), (), False) tr = CategoriesMapping( (("a", "a"), ("b1", "b"), @@ -85,7 +90,7 @@ def test_categorical_merge_mapping(self): self.assertIn('b', r) def test_reinterpret(self): - var = String("T", ()) + var = String("T", (), False) for tr in (AsContinuous(), AsCategorical(), AsTime()): t = report_transform(var, [tr]) self.assertIn("→ (", t) @@ -243,6 +248,34 @@ def enter_text(widget, text): output = self.get_output(self.widget.Outputs.data) self.assertIsInstance(output, Table) + def test_unlink(self): + var0, var1, var2 = [ContinuousVariable("x", compute_value=Mock()), + ContinuousVariable("y", compute_value=Mock()), + ContinuousVariable("z")] + domain = Domain([var0, var1, var2], None) + table = Table.from_numpy(domain, np.zeros((5, 3)), np.zeros((5, 0))) + self.send_signal(self.widget.Inputs.data, table) + + index = self.widget.domain_view.model().index + for i in range(3): + self.widget.domain_view.setCurrentIndex(index(i)) + editor = self.widget.findChild(ContinuousVariableEditor) + self.assertIs(editor.unlink_var_cb.isEnabled(), i < 2) + editor._set_unlink(i == 1) + + self.widget.commit() + out = self.get_output(self.widget.Outputs.data) + out0, out1, out2 = out.domain.variables + self.assertIs(out0, domain[0]) + self.assertIsNot(out1, domain[1]) + self.assertIs(out2, domain[2]) + + self.assertIsNotNone(out0.compute_value) + self.assertIsNone(out1.compute_value) + self.assertIsNone(out2.compute_value) + + + def test_time_variable_preservation(self): """Test if time variables preserve format specific attributes""" table = Table(test_filename("datasets/cyber-security-breaches.tab")) @@ -263,7 +296,8 @@ def test_restore(self): iris = self.iris viris = ( "Categorical", - ("iris", ("Iris-setosa", "Iris-versicolor", "Iris-virginica"), ()) + ("iris", ("Iris-setosa", "Iris-versicolor", "Iris-virginica"), (), + False) ) w = self.widget @@ -326,7 +360,7 @@ def test_variable_editor(self): w = VariableEditor() self.assertEqual(w.get_data(), (None, [])) - v = String("S", (("A", "1"), ("B", "b"))) + v = String("S", (("A", "1"), ("B", "b")), False) w.set_data(v, []) self.assertEqual(w.name_edit.text(), v.name) @@ -351,7 +385,7 @@ def test_continuous_editor(self): w = ContinuousVariableEditor() self.assertEqual(w.get_data(), (None, [])) - v = Real("X", (-1, ""), (("A", "1"), ("B", "b"))) + v = Real("X", (-1, ""), (("A", "1"), ("B", "b")), False) w.set_data(v, []) self.assertEqual(w.name_edit.text(), v.name) @@ -366,7 +400,7 @@ def test_discrete_editor(self): w = DiscreteVariableEditor() self.assertEqual(w.get_data(), (None, [])) - v = Categorical("C", ("a", "b", "c"), (("A", "1"), ("B", "b"))) + v = Categorical("C", ("a", "b", "c"), (("A", "1"), ("B", "b")), False) values = [0, 0, 0, 1, 1, 2] w.set_data_categorical(v, values) @@ -418,7 +452,7 @@ def test_discrete_editor(self): def test_discrete_editor_add_remove_action(self): w = DiscreteVariableEditor() v = Categorical("C", ("a", "b", "c"), - (("A", "1"), ("B", "b"))) + (("A", "1"), ("B", "b")), False) values = [0, 0, 0, 1, 1, 2] w.set_data_categorical(v, values) action_add = w.add_new_item @@ -466,7 +500,7 @@ def test_discrete_editor_merge_action(self): """ w = DiscreteVariableEditor() v = Categorical("C", ("a", "b", "c"), - (("A", "1"), ("B", "b"))) + (("A", "1"), ("B", "b")), False) w.set_data_categorical(v, [0, 0, 0, 1, 1, 2]) view = w.values_edit @@ -487,7 +521,7 @@ def test_time_editor(self): w = TimeVariableEditor() self.assertEqual(w.get_data(), (None, [])) - v = Time("T", (("A", "1"), ("B", "b"))) + v = Time("T", (("A", "1"), ("B", "b")), False) w.set_data(v,) self.assertEqual(w.name_edit.text(), v.name) @@ -500,19 +534,19 @@ def test_time_editor(self): DataVectors = [ CategoricalVector( - Categorical("A", ("a", "aa"), ()), lambda: + Categorical("A", ("a", "aa"), (), False), lambda: MArray([0, 1, 2], mask=[False, False, True]) ), RealVector( - Real("B", (6, "f"), ()), lambda: + Real("B", (6, "f"), (), False), lambda: MArray([0.1, 0.2, 0.3], mask=[True, False, True]) ), TimeVector( - Time("T", ()), lambda: + Time("T", (), False), lambda: MArray([0, 100, 200], dtype="M8[us]", mask=[True, False, True]) ), StringVector( - String("S", ()), lambda: + String("S", (), False), lambda: MArray(["0", "1", "2"], dtype=object, mask=[True, False, True]) ), ] @@ -555,6 +589,44 @@ def cb(): w.set_data(vec, [Rename("Z")]) simulate.combobox_run_through_all(tc, callback=cb) + def test_unlink(self): + w = ContinuousVariableEditor() + cbox = w.unlink_var_cb + self.assertEqual(w.get_data(), (None, [])) + + v = Real("X", (-1, ""), (("A", "1"), ("B", "b")), False) + w.set_data(v, []) + self.assertFalse(cbox.isEnabled()) + + v = Real("X", (-1, ""), (("A", "1"), ("B", "b")), True) + w.set_data(v, [Unlink()]) + self.assertTrue(cbox.isEnabled()) + self.assertTrue(cbox.isChecked()) + + v = Real("X", (-1, ""), (("A", "1"), ("B", "b")), True) + w.set_data(v, []) + self.assertTrue(cbox.isEnabled()) + self.assertFalse(cbox.isChecked()) + + cbox.setChecked(True) + self.assertEqual(w.get_data()[1], [Unlink()]) + + w.set_data(v, [Unlink()]) + self.assertTrue(cbox.isChecked()) + + cbox.setChecked(False) + self.assertEqual(w.get_data()[1], []) + + cbox.setChecked(True) + w.clear() + self.assertFalse(cbox.isChecked()) + self.assertEqual(w.get_data()[1], []) + + w._set_unlink(True) + self.assertTrue(cbox.isChecked()) + w._set_unlink(False) + self.assertFalse(cbox.isChecked()) + class TestDelegates(GuiTest): def test_delegate(self): @@ -568,7 +640,7 @@ def get_style_option() -> QStyleOptionViewItem: delegate.initStyleOption(opt, model.index(0)) return opt - set_item({Qt.EditRole: Categorical("a", (), ())}) + set_item({Qt.EditRole: Categorical("a", (), (), False)}) delegate = VariableEditDelegate() opt = get_style_option() self.assertEqual(opt.text, "a") @@ -928,7 +1000,7 @@ def test_pickle(self): class TestGroupLessFrequentItemsDialog(GuiTest): def setUp(self) -> None: self.v = Categorical("C", ("a", "b", "c"), - (("A", "1"), ("B", "b"))) + (("A", "1"), ("B", "b")), False) self.data = [0, 0, 0, 1, 1, 2] def test_dialog_open(self):