From 321924cfc9d2b573641f38105132939f7467b271 Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 8 Jul 2019 22:44:00 +0200 Subject: [PATCH 1/5] OWFeatureConstructor: Deduct discrete values from function --- Orange/widgets/data/owfeatureconstructor.py | 38 +++++++++++++------ .../data/tests/test_owfeatureconstructor.py | 30 ++++++++++++--- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index d4e4a8dd043..6481b71fba6 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -412,8 +412,7 @@ def generate_newname(fmt): disc = menu.addAction("Categorical") disc.triggered.connect( lambda: self.addFeature( - DiscreteDescriptor(generate_newname("D{}"), "", - ("A", "B"), False)) + DiscreteDescriptor(generate_newname("D{}"), "", (), False)) ) string = menu.addAction("Text") string.triggered.connect( @@ -593,6 +592,11 @@ def validate(source): return final def apply(self): + def report_error(err): + log = logging.getLogger(__name__) + log.error("", exc_info=True) + self.error("".join(format_exception_only(type(err), err)).rstrip()) + self.Error.clear() if self.data is None: @@ -600,8 +604,11 @@ def apply(self): desc = list(self.featuremodel) desc = self._validate_descriptors(desc) - source_vars = self.data.domain.variables + self.data.domain.metas - new_variables = construct_variables(desc, source_vars) + try: + new_variables = construct_variables(desc, self.data) + except Exception as err: + report_error(err) + return attrs = [var for var in new_variables if var.is_primitive()] metas = [var for var in new_variables if not var.is_primitive()] @@ -616,9 +623,7 @@ def apply(self): # user's expression can contain arbitrary errors # pylint: disable=broad-except except Exception as err: - log = logging.getLogger(__name__) - log.error("", exc_info=True) - self.error("".join(format_exception_only(type(err), err)).rstrip()) + report_error(err) return disc_attrs_not_ok = self.check_attrs_values( [var for var in attrs if var.is_discrete], data) @@ -815,11 +820,12 @@ def validate_exp(exp): raise ValueError(exp) -def construct_variables(descriptions, source_vars): +def construct_variables(descriptions, data): # subs variables = [] + source_vars = data.domain.variables + data.domain.metas for desc in descriptions: - _, func = bind_variable(desc, source_vars) + desc, func = bind_variable(desc, source_vars, data) var = make_variable(desc, func) variables.append(var) return variables @@ -832,7 +838,7 @@ def sanitized_name(name): return sanitized -def bind_variable(descriptor, env): +def bind_variable(descriptor, env, data): """ (descriptor, env) -> (descriptor, (instance -> value) | (table -> value list)) @@ -848,8 +854,16 @@ def bind_variable(descriptor, env): values = {} if isinstance(descriptor, DiscreteDescriptor): - values = [sanitized_name(v) for v in descriptor.values] - values = {name: i for i, name in enumerate(values)} + if not descriptor.values: + str_func = FeatureFunc(descriptor.expression, source_vars) + values = sorted({str(x) for x in str_func(data)}) + values = {name: i for i, name in enumerate(values)} + descriptor = descriptor \ + ._replace(values=values) \ + ._replace(expression=f"{values}.get(str({descriptor.expression}), float('nan'))") + else: + values = [sanitized_name(v) for v in descriptor.values] + values = {name: i for i, name in enumerate(values)} return descriptor, FeatureFunc(descriptor.expression, source_vars, values) diff --git a/Orange/widgets/data/tests/test_owfeatureconstructor.py b/Orange/widgets/data/tests/test_owfeatureconstructor.py index e31481a54c7..47730c3ac6d 100644 --- a/Orange/widgets/data/tests/test_owfeatureconstructor.py +++ b/Orange/widgets/data/tests/test_owfeatureconstructor.py @@ -34,7 +34,7 @@ def test_construct_variables_discrete(self): values=values, ordered=False)] ) data = Table(Domain(list(data.domain.attributes) + - construct_variables(desc, data.domain.variables), + construct_variables(desc, data), data.domain.class_vars, data.domain.metas), data) self.assertTrue(isinstance(data.domain[name], DiscreteVariable)) @@ -42,6 +42,26 @@ def test_construct_variables_discrete(self): for i in range(3): self.assertEqual(data[i * 50, name], values[i]) + def test_construct_variables_discrete_no_values(self): + data = Table("iris") + name = 'Discrete Variable' + expression = "str(iris)[-1]" # last letter - a or r + values = () + desc = PyListModel( + [DiscreteDescriptor(name=name, expression=expression, + values=values, ordered=False)] + ) + data = Table(Domain(list(data.domain.attributes) + + construct_variables(desc, data), + data.domain.class_vars, + data.domain.metas), data) + newvar = data.domain[name] + self.assertTrue(isinstance(newvar, DiscreteVariable)) + self.assertEqual(set(data.domain[name].values), set("ar")) + for i in range(3): + inst = data[i * 50] + self.assertEqual(str(inst[name]), str(inst["iris"])[-1]) + def test_construct_variables_continuous(self): data = Table("iris") name = 'Continuous Variable' @@ -51,7 +71,7 @@ def test_construct_variables_continuous(self): number_of_decimals=2)] ) data = Table(Domain(list(data.domain.attributes) + - construct_variables(featuremodel, data.domain.variables), + construct_variables(featuremodel, data), data.domain.class_vars, data.domain.metas), data) self.assertTrue(isinstance(data.domain[name], ContinuousVariable)) @@ -69,7 +89,7 @@ def test_construct_variables_string(self): data = Table(Domain(data.domain.attributes, data.domain.class_vars, list(data.domain.metas) + - construct_variables(desc, data.domain.variables)), + construct_variables(desc, data)), data) self.assertTrue(isinstance(data.domain[name], StringVariable)) for i in range(3): @@ -86,7 +106,7 @@ def test_construct_numeric_names(): expression="_0_1 + _1", number_of_decimals=3)] ) - nv = construct_variables(desc, data.domain.variables) + nv = construct_variables(desc, data) ndata = Table(Domain(nv, None), data) np.testing.assert_array_equal(ndata.X[:, 0], data.X[:, :2].sum(axis=1)) @@ -257,7 +277,7 @@ def test_discrete_no_values(self): self.widget.setData(data) discreteFeatureEditor = DiscreteFeatureEditor() - discreteFeatureEditor.valuesedit.setText("") + discreteFeatureEditor.valuesedit.setText("A") discreteFeatureEditor.nameedit.setText("D1") discreteFeatureEditor.expressionedit.setText("iris") self.widget.addFeature( From 71321ccaf65a683d4c6f83ca6370f3e059358537 Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 8 Jul 2019 22:44:45 +0200 Subject: [PATCH 2/5] OWFeatureConstructor: Cast string values to strings --- Orange/widgets/data/owfeatureconstructor.py | 26 +++++++++++------ .../data/tests/test_owfeatureconstructor.py | 28 ++++++++++++++----- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index 6481b71fba6..4876941fd7e 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -25,8 +25,7 @@ from AnyQt.QtWidgets import ( QSizePolicy, QAbstractItemView, QComboBox, QFormLayout, QLineEdit, QHBoxLayout, QVBoxLayout, QStackedWidget, QStyledItemDelegate, - QPushButton, QMenu, QListView, QFrame -) + QPushButton, QMenu, QListView, QFrame, QLabel) from AnyQt.QtGui import QKeySequence from AnyQt.QtCore import Qt, pyqtSignal as Signal, pyqtProperty as Property @@ -115,8 +114,11 @@ def __init__(self, *args, **kwargs): QSizePolicy.Fixed) ) self.expressionedit = QLineEdit( - placeholderText="Expression..." - ) + placeholderText="Expression...", + toolTip="Result must be a number for numeric variables, " + "and strings for text variables.\n" + "For categorical, return integer indices if values are " + "specified, and strings if they are not.") self.attrs_model = itemmodels.VariableListModel( ["Select Feature"], parent=self) @@ -235,11 +237,16 @@ class DiscreteFeatureEditor(FeatureEditor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.valuesedit = QLineEdit() + tooltip = \ + "If values are given, expression must return integer indices.\n" \ + "Otherwise, expression must return strings." + self.valuesedit = QLineEdit(placeholderText="A, B ...", toolTip=tooltip) self.valuesedit.textChanged.connect(self._invalidate) layout = self.layout() - layout.addRow(self.tr("Values"), self.valuesedit) + label = QLabel(self.tr("Values (optional)")) + label.setToolTip(tooltip) + layout.addRow(label, self.valuesedit) def setEditorData(self, data, domain): self.valuesedit.setText( @@ -606,7 +613,8 @@ def report_error(err): desc = self._validate_descriptors(desc) try: new_variables = construct_variables(desc, self.data) - except Exception as err: + # user's expression can contain arbitrary errors + except Exception as err: # pylint: disable=broad-except report_error(err) return @@ -1004,7 +1012,9 @@ def __call__(self, instance, *_): if isinstance(instance, Orange.data.Table): return [self(inst) for inst in instance] else: - args = [instance[var] for _, var in self.args] + args = [str(instance[var]) + if instance.domain[var].is_string else instance[var] + for _, var in self.args] return self.func(*args) def __reduce__(self): diff --git a/Orange/widgets/data/tests/test_owfeatureconstructor.py b/Orange/widgets/data/tests/test_owfeatureconstructor.py index 47730c3ac6d..64a4583a4cc 100644 --- a/Orange/widgets/data/tests/test_owfeatureconstructor.py +++ b/Orange/widgets/data/tests/test_owfeatureconstructor.py @@ -224,26 +224,40 @@ def validate_(source): class FeatureFuncTest(unittest.TestCase): def test_reconstruct(self): - f = FeatureFunc("a * x + c", [("x", "x")], {"a": 2, "c": 10}) - self.assertEqual(f({"x": 2}), 14) + iris = Table("iris") + inst1 = iris[0] + val1 = 2 * inst1["sepal width"] + 10 + inst2 = iris[100] + val2 = 2 * inst2["sepal width"] + 10 + + f = FeatureFunc("a * sepal_width + c", + [("sepal_width", iris.domain["sepal width"])], + {"a": 2, "c": 10}) + self.assertAlmostEqual(f(inst1), val1) f1 = pickle.loads(pickle.dumps(f)) - self.assertEqual(f1({"x": 2}), 14) + self.assertAlmostEqual(f1(inst1), val1) fc = copy.copy(f) - self.assertEqual(fc({"x": 3}), 16) + self.assertEqual(fc(inst2), val2) def test_repr(self): self.assertEqual(repr(FeatureFunc("a + 1", [("a", 2)])), "FeatureFunc('a + 1', [('a', 2)], {})") def test_call(self): - f = FeatureFunc("a + 1", [("a", "a")]) - self.assertEqual(f({"a": 2}), 3) - iris = Table("iris") f = FeatureFunc("sepal_width + 10", [("sepal_width", iris.domain["sepal width"])]) r = f(iris) np.testing.assert_array_equal(r, iris.X[:, 1] + 10) + self.assertEqual(f(iris[0]), iris[0]["sepal width"] + 10) + + def test_string_casting(self): + zoo = Table("zoo") + f = FeatureFunc("name[0]", + [("name", zoo.domain["name"])]) + r = f(zoo) + self.assertEqual(r, [x[0] for x in zoo.metas[:, 0]]) + self.assertEqual(f(zoo[0]), str(zoo[0, "name"])[0]) class OWFeatureConstructorTests(WidgetTest): From 663fd18ce1a1e204cc9e23b30d590a5fbd6fc528 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 12 Jul 2019 10:52:09 +0200 Subject: [PATCH 3/5] Feature Constructor: Add DateTime variable --- Orange/widgets/data/owfeatureconstructor.py | 26 +++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index 4876941fd7e..c2a63891738 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -44,6 +44,9 @@ ContinuousDescriptor = \ namedtuple("ContinuousDescriptor", ["name", "expression", "number_of_decimals"]) +DateTimeDescriptor = \ + namedtuple("DateTimeDescriptor", + ["name", "expression"]) DiscreteDescriptor = \ namedtuple("DiscreteDescriptor", ["name", "expression", "values", "ordered"]) @@ -57,6 +60,10 @@ def make_variable(descriptor, compute_value): descriptor.name, descriptor.number_of_decimals, compute_value) + if isinstance(descriptor, DateTimeDescriptor): + return Orange.data.TimeVariable( + descriptor.name, + compute_value=compute_value, have_date=True, have_time=True) elif isinstance(descriptor, DiscreteDescriptor): return Orange.data.DiscreteVariable( descriptor.name, @@ -233,6 +240,15 @@ def editorData(self): ) +class DateTimeFeatureEditor(FeatureEditor): + + def editorData(self): + return DateTimeDescriptor( + name=self.nameedit.text(), + expression=self.expressionedit.text() + ) + + class DiscreteFeatureEditor(FeatureEditor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -277,6 +293,7 @@ def editorData(self): _VarMap = { DiscreteDescriptor: vartype(Orange.data.DiscreteVariable()), ContinuousDescriptor: vartype(Orange.data.ContinuousVariable()), + DateTimeDescriptor: vartype(Orange.data.TimeVariable()), StringDescriptor: vartype(Orange.data.StringVariable()) } @@ -354,6 +371,7 @@ class Outputs: EDITORS = [ (ContinuousDescriptor, ContinuousFeatureEditor), + (DateTimeDescriptor, DateTimeFeatureEditor), (DiscreteDescriptor, DiscreteFeatureEditor), (StringDescriptor, StringFeatureEditor) ] @@ -426,6 +444,12 @@ def generate_newname(fmt): lambda: self.addFeature( StringDescriptor(generate_newname("S{}"), "")) ) + datetime = menu.addAction("Date/Time") + datetime.triggered.connect( + lambda: self.addFeature( + DateTimeDescriptor(generate_newname("T{}"), "")) + ) + menu.addSeparator() self.duplicateaction = menu.addAction("Duplicate Selected Variable") self.duplicateaction.triggered.connect(self.duplicateFeature) @@ -650,6 +674,8 @@ def send_report(self): "; ordered" * feature.ordered) elif isinstance(feature, ContinuousDescriptor): items[feature.name] = "{} (numeric)".format(feature.expression) + elif isinstance(feature, DateTimeDescriptor): + items[feature.name] = "{} (date/time)".format(feature.expression) else: items[feature.name] = "{} (text)".format(feature.expression) self.report_items( From 40511eeceb47556fb04b5a5e5b267b50b788d07c Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 12 Jul 2019 14:56:15 +0200 Subject: [PATCH 4/5] Feature Constructor: Add 'cast' method to 'FeatureFunc' --- Orange/widgets/data/owfeatureconstructor.py | 39 +++++++++++++++---- .../data/tests/test_owfeatureconstructor.py | 23 +++++++++-- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index c2a63891738..506b4b6e111 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -887,18 +887,35 @@ def bind_variable(descriptor, env, data): if name in variables] values = {} + cast = None + nan = float("nan") + if isinstance(descriptor, DiscreteDescriptor): if not descriptor.values: str_func = FeatureFunc(descriptor.expression, source_vars) values = sorted({str(x) for x in str_func(data)}) values = {name: i for i, name in enumerate(values)} - descriptor = descriptor \ - ._replace(values=values) \ - ._replace(expression=f"{values}.get(str({descriptor.expression}), float('nan'))") + descriptor = descriptor._replace(values=values) + + def cast(x): # pylint: disable=function-redefined + return values.get(x, nan) + else: values = [sanitized_name(v) for v in descriptor.values] values = {name: i for i, name in enumerate(values)} - return descriptor, FeatureFunc(descriptor.expression, source_vars, values) + + if isinstance(descriptor, DateTimeDescriptor): + parse = Orange.data.TimeVariable("_").parse + + def cast(e): # pylint: disable=function-redefined + if isinstance(e, (int, float)): + return e + if e == "" or e is None: + return np.nan + return parse(e) + + func = FeatureFunc(descriptor.expression, source_vars, values, cast) + return descriptor, func def make_lambda(expression, args, env=None): @@ -1026,13 +1043,17 @@ class FeatureFunc: extra_env : Optional[Dict[str, Any]] Extra environment specifying constant values to be made available in expression. It must not shadow names in `args` + cast: Optional[Callable] + A function for casting the expressions result to the appropriate + type (e.g. string representation of date/time variables to floats) """ - def __init__(self, expression, args, extra_env=None): + def __init__(self, expression, args, extra_env=None, cast=None): self.expression = expression self.args = args self.extra_env = dict(extra_env or {}) self.func = make_lambda(ast.parse(expression, mode="eval"), [name for name, _ in args], self.extra_env) + self.cast = cast def __call__(self, instance, *_): if isinstance(instance, Orange.data.Table): @@ -1041,10 +1062,14 @@ def __call__(self, instance, *_): args = [str(instance[var]) if instance.domain[var].is_string else instance[var] for _, var in self.args] - return self.func(*args) + y = self.func(*args) + if self.cast: + y = self.cast(y) + return y def __reduce__(self): - return type(self), (self.expression, self.args, self.extra_env) + return type(self), (self.expression, self.args, + self.extra_env, self.cast) def __repr__(self): return "{0.__name__}{1!r}".format(*self.__reduce__()) diff --git a/Orange/widgets/data/tests/test_owfeatureconstructor.py b/Orange/widgets/data/tests/test_owfeatureconstructor.py index 64a4583a4cc..5a0f02189d1 100644 --- a/Orange/widgets/data/tests/test_owfeatureconstructor.py +++ b/Orange/widgets/data/tests/test_owfeatureconstructor.py @@ -8,14 +8,15 @@ import numpy as np from Orange.data import (Table, Domain, StringVariable, - ContinuousVariable, DiscreteVariable) + ContinuousVariable, DiscreteVariable, TimeVariable) from Orange.widgets.tests.base import WidgetTest from Orange.widgets.utils import vartype from Orange.widgets.utils.itemmodels import PyListModel from Orange.widgets.data.owfeatureconstructor import ( DiscreteDescriptor, ContinuousDescriptor, StringDescriptor, construct_variables, OWFeatureConstructor, - FeatureEditor, DiscreteFeatureEditor, FeatureConstructorHandler) + FeatureEditor, DiscreteFeatureEditor, FeatureConstructorHandler, + DateTimeDescriptor) from Orange.widgets.data.owfeatureconstructor import ( freevars, validate_exp, FeatureFunc @@ -79,6 +80,22 @@ def test_construct_variables_continuous(self): self.assertEqual(data[i * 50, name], pow(data[i * 50, 0] + data[i * 50, 1], 2)) + def test_construct_variables_datetime(self): + data = Table("housing") + name = 'Date' + expression = '"2019-07-{:02}".format(int(MEDV/3))' + featuremodel = PyListModel( + [DateTimeDescriptor(name=name, expression=expression)] + ) + data = Table(Domain(list(data.domain.attributes) + + construct_variables(featuremodel, data), + data.domain.class_vars, + data.domain.metas), data) + self.assertTrue(isinstance(data.domain[name], TimeVariable)) + for row in data: + self.assertEqual("2019-07-{:02}".format(int(row["MEDV"] / 3)), + str(row["Date"])[:10]) + def test_construct_variables_string(self): data = Table("iris") name = 'String Variable' @@ -241,7 +258,7 @@ def test_reconstruct(self): def test_repr(self): self.assertEqual(repr(FeatureFunc("a + 1", [("a", 2)])), - "FeatureFunc('a + 1', [('a', 2)], {})") + "FeatureFunc('a + 1', [('a', 2)], {}, None)") def test_call(self): iris = Table("iris") From ac97c90135f869aa5fe4eb28ab53f2f7979b1e90 Mon Sep 17 00:00:00 2001 From: janezd Date: Tue, 30 Jul 2019 17:51:11 +0200 Subject: [PATCH 5/5] OWFeatureConstructor: Different line edit tooltips for different variable types --- Orange/widgets/data/owfeatureconstructor.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index 506b4b6e111..ec6c9c2bba6 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -122,10 +122,7 @@ def __init__(self, *args, **kwargs): ) self.expressionedit = QLineEdit( placeholderText="Expression...", - toolTip="Result must be a number for numeric variables, " - "and strings for text variables.\n" - "For categorical, return integer indices if values are " - "specified, and strings if they are not.") + toolTip=self.ExpressionTooltip) self.attrs_model = itemmodels.VariableListModel( ["Select Feature"], parent=self) @@ -231,6 +228,7 @@ def insert_into_expression(self, what): class ContinuousFeatureEditor(FeatureEditor): + ExpressionTooltip = "A numeric expression" def editorData(self): return ContinuousDescriptor( @@ -241,6 +239,10 @@ def editorData(self): class DateTimeFeatureEditor(FeatureEditor): + ExpressionTooltip = \ + "Result must be a string in ISO-8601 format " \ + "(e.g. 2019-07-30T15:37:27 or a part thereof),\n" \ + "or a number of seconds since Jan 1, 1970." def editorData(self): return DateTimeDescriptor( @@ -250,12 +252,16 @@ def editorData(self): class DiscreteFeatureEditor(FeatureEditor): + ExpressionTooltip = \ + "Result must be a string, if values are not explicitly given\n" \ + "or a zero-based integer indices into a list of values given below." + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) tooltip = \ - "If values are given, expression must return integer indices.\n" \ - "Otherwise, expression must return strings." + "If values are given, above expression must return zero-based " \ + "integer indices into that list." self.valuesedit = QLineEdit(placeholderText="A, B ...", toolTip=tooltip) self.valuesedit.textChanged.connect(self._invalidate) @@ -283,6 +289,8 @@ def editorData(self): class StringFeatureEditor(FeatureEditor): + ExpressionTooltip = "A string expression" + def editorData(self): return StringDescriptor( name=self.nameedit.text(),