diff --git a/Orange/widgets/data/icons/CreateInstance.svg b/Orange/widgets/data/icons/CreateInstance.svg new file mode 100644 index 00000000000..2a7b039df6d --- /dev/null +++ b/Orange/widgets/data/icons/CreateInstance.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Orange/widgets/data/owcreateinstance.py b/Orange/widgets/data/owcreateinstance.py new file mode 100644 index 00000000000..615bbb6a6b6 --- /dev/null +++ b/Orange/widgets/data/owcreateinstance.py @@ -0,0 +1,719 @@ +from typing import Optional, Callable, List, Union, Dict +from collections import namedtuple +from functools import singledispatch + +import numpy as np + +from AnyQt.QtCore import Qt, QSortFilterProxyModel, QSize, QDateTime, \ + QModelIndex, Signal, QPoint, QRect, QEvent +from AnyQt.QtGui import QStandardItemModel, QStandardItem, QIcon, QPainter, \ + QColor +from AnyQt.QtWidgets import QLineEdit, QTableView, QSlider, \ + QComboBox, QStyledItemDelegate, QWidget, QDateTimeEdit, QHBoxLayout, \ + QDoubleSpinBox, QSizePolicy, QStyleOptionViewItem, QLabel, QMenu, QAction + +from Orange.data import DiscreteVariable, ContinuousVariable, \ + TimeVariable, Table, StringVariable, Variable, Domain +from Orange.widgets import gui +from Orange.widgets.utils.itemmodels import TableModel +from Orange.widgets.settings import Setting +from Orange.widgets.utils.state_summary import format_summary_details, \ + format_multiple_summaries +from Orange.widgets.utils.widgetpreview import WidgetPreview +from Orange.widgets.widget import OWWidget, Input, Output, Msg + +VariableRole = next(gui.OrangeUserRole) +ValuesRole = next(gui.OrangeUserRole) +ValueRole = next(gui.OrangeUserRole) + + +class VariableEditor(QWidget): + valueChanged = Signal(float) + + def __init__(self, parent: QWidget, callback: Callable): + super().__init__(parent) + layout = QHBoxLayout() + layout.setContentsMargins(6, 0, 6, 0) + layout.setAlignment(Qt.AlignLeft) + self.setLayout(layout) + self.valueChanged.connect(callback) + + @property + def value(self) -> Union[int, float, str]: + return NotImplemented + + @value.setter + def value(self, value: Union[float, str]): + raise NotImplementedError + + def sizeHint(self): + return QSize(super().sizeHint().width(), 40) + + +class DiscreteVariableEditor(VariableEditor): + valueChanged = Signal(int) + + def __init__(self, parent: QWidget, items: List[str], callback: Callable): + super().__init__(parent, callback) + self._combo = QComboBox( + parent, + maximumWidth=180, + sizePolicy=QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) + ) + self._combo.addItems(items) + self._combo.currentIndexChanged.connect(self.valueChanged) + self.layout().addWidget(self._combo) + + @property + def value(self) -> int: + return self._combo.currentIndex() + + @value.setter + def value(self, value: float): + assert value == int(value) + self._combo.setCurrentIndex(int(value)) + + +class ContinuousVariableEditor(VariableEditor): + MAX_FLOAT = 2147483647 + + def __init__(self, parent: QWidget, variable: ContinuousVariable, + min_value: float, max_value: float, callback: Callable): + super().__init__(parent, callback) + + if np.isnan(min_value) or np.isnan(max_value): + raise ValueError("Min/Max cannot be NaN.") + + n_decimals = variable.number_of_decimals + abs_max = max(abs(min_value), max_value) + if abs_max * 10 ** n_decimals > self.MAX_FLOAT: + n_decimals = int(np.log10(self.MAX_FLOAT / abs_max)) + + self._value: float = min_value + self._n_decimals: int = n_decimals + self._min_value: float = self.__round_value(min_value) + self._max_value: float = self.__round_value(max_value) + + sp_spin = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) + sp_spin.setHorizontalStretch(1) + sp_slider = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) + sp_slider.setHorizontalStretch(5) + sp_edit = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) + sp_edit.setHorizontalStretch(1) + + class DoubleSpinBox(QDoubleSpinBox): + def sizeHint(self) -> QSize: + size: QSize = super().sizeHint() + return QSize(size.width(), size.height() + 2) + + self._spin = DoubleSpinBox( + parent, + value=self._min_value, + minimum=-np.inf, + maximum=np.inf, + singleStep=10 ** (-self._n_decimals), + decimals=self._n_decimals, + minimumWidth=70, + sizePolicy=sp_spin, + ) + self._slider = QSlider( + parent, + minimum=self.__map_to_slider(self._min_value), + maximum=self.__map_to_slider(self._max_value), + singleStep=1, + orientation=Qt.Horizontal, + sizePolicy=sp_slider, + ) + self._label_min = QLabel( + parent, + text=variable.repr_val(min_value), + alignment=Qt.AlignRight, + minimumWidth=60, + sizePolicy=sp_edit, + ) + self._label_max = QLabel( + parent, + text=variable.repr_val(max_value), + alignment=Qt.AlignLeft, + minimumWidth=60, + sizePolicy=sp_edit, + ) + + self._slider.valueChanged.connect(self._apply_slider_value) + self._spin.valueChanged.connect(self._apply_spin_value) + + self.layout().addWidget(self._spin) + self.layout().addWidget(self._label_min) + self.layout().addWidget(self._slider) + self.layout().addWidget(self._label_max) + + self.setFocusProxy(self._spin) + + def deselect(): + self._spin.lineEdit().deselect() + try: + self._spin.lineEdit().selectionChanged.disconnect(deselect) + except TypeError: + pass + + # Invoking self.setFocusProxy(self._spin), causes the + # self._spin.lineEdit()s to have selected texts (focus is set to + # provide keyboard functionality, i.e.: pressing ESC after changing + # spinbox value). Since the spin text is selected only after the + # delegate draws it, it cannot be deselected during initialization. + # Therefore connect the deselect() function to + # self._spin.lineEdit().selectionChanged only for editor creation. + self._spin.lineEdit().selectionChanged.connect(deselect) + + self._slider.installEventFilter(self) + self._spin.installEventFilter(self) + + @property + def value(self) -> float: + return self.__round_value(self._value) + + @value.setter + def value(self, value: float): + if self._value is None or self.__round_value(value) != self.value: + self._value = value + self.valueChanged.emit(self.value) + self._spin.setValue(self.value) + # prevent emitting self.valueChanged again, due to slider change + slider_value = self.__map_to_slider(self.value) + self._value = self.__map_from_slider(slider_value) + self._slider.setValue(slider_value) + self._value = value + + def _apply_slider_value(self): + self.value = self.__map_from_slider(self._slider.value()) + + def _apply_spin_value(self): + self.value = self._spin.value() + + def __round_value(self, value): + return round(value, self._n_decimals) + + def __map_to_slider(self, value: float) -> int: + value = min(self._max_value, max(self._min_value, value)) + return round(value * 10 ** self._n_decimals) + + def __map_from_slider(self, value: int) -> float: + return value * 10 ** (-self._n_decimals) + + def eventFilter(self, obj: Union[QSlider, QDoubleSpinBox], event: QEvent) \ + -> bool: + if event.type() == QEvent.Wheel: + return True + return super().eventFilter(obj, event) + + +class StringVariableEditor(VariableEditor): + valueChanged = Signal() + + def __init__(self, parent: QWidget, callback: Callable): + super().__init__(parent, callback) + self._edit = QLineEdit( + parent, + sizePolicy=QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) + ) + self._edit.textChanged.connect(self.valueChanged) + self.layout().addWidget(self._edit) + self.setFocusProxy(self._edit) + + @property + def value(self) -> str: + return self._edit.text() + + @value.setter + def value(self, value: str): + self._edit.setText(value) + + +class TimeVariableEditor(VariableEditor): + DATE_FORMAT = "yyyy-MM-dd" + TIME_FORMAT = "hh:mm:ss" + + def __init__(self, parent: QWidget, variable: TimeVariable, + callback: Callable): + super().__init__(parent, callback) + self._value: float = 0 + self._variable: TimeVariable = variable + + if variable.have_date and not variable.have_time: + self._format = TimeVariableEditor.DATE_FORMAT + elif not variable.have_date and variable.have_time: + self._format = TimeVariableEditor.TIME_FORMAT + else: + self._format = f"{TimeVariableEditor.DATE_FORMAT} " \ + f"{TimeVariableEditor.TIME_FORMAT}" + + class DateTimeEdit(QDateTimeEdit): + def sizeHint(self) -> QSize: + size: QSize = super().sizeHint() + return QSize(size.width(), size.height() + 2) + + self._edit = DateTimeEdit( + parent, + dateTime=self.__map_to_datetime(self._value), + displayFormat=self._format, + sizePolicy=QSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed) + ) + self._edit.dateTimeChanged.connect(self._apply_edit_value) + + self.layout().addWidget(self._edit) + self.setFocusProxy(self._edit) + + self._edit.installEventFilter(self) + + @property + def value(self) -> float: + return self._value + + @value.setter + def value(self, value: float): + if value != self.value: + self._value = value + self.valueChanged.emit(self.value) + self._edit.setDateTime(self.__map_to_datetime(self.value)) + + def _apply_edit_value(self): + self.value = self.__map_from_datetime(self._edit.dateTime()) + + def __map_from_datetime(self, date_time: QDateTime) -> float: + return self._variable.to_val(date_time.toString(self._format)) + + def __map_to_datetime(self, value: float) -> QDateTime: + return QDateTime.fromString(self._variable.repr_val(value), + self._format) + + def eventFilter(self, obj: QDateTimeEdit, event: QEvent) -> bool: + if event.type() == QEvent.Wheel: + return True + return super().eventFilter(obj, event) + + +class VariableDelegate(QStyledItemDelegate): + def paint(self, painter: QPainter, option: QStyleOptionViewItem, + index: QModelIndex): + self.parent().view.openPersistentEditor(index) + super().paint(painter, option, index) + + def createEditor(self, parent: QWidget, _: QStyleOptionViewItem, + index: QModelIndex) -> VariableEditor: + variable = index.data(VariableRole) + values = index.data(ValuesRole) + return _create_editor(variable, values, parent, self._commit_data) + + def _commit_data(self): + editor = self.sender() + assert isinstance(editor, VariableEditor) + self.commitData.emit(editor) + + # pylint: disable=no-self-use + def setEditorData(self, editor: VariableEditor, index: QModelIndex): + editor.value = index.model().data(index, ValueRole) + + # pylint: disable=no-self-use + def setModelData(self, editor: VariableEditor, + model: QSortFilterProxyModel, index: QModelIndex): + model.setData(index, editor.value, ValueRole) + + # pylint: disable=no-self-use + def updateEditorGeometry(self, editor: VariableEditor, + option: QStyleOptionViewItem, _: QModelIndex): + rect: QRect = option.rect + if isinstance(editor, ContinuousVariableEditor): + width = editor.sizeHint().width() + if width > rect.width(): + rect.setWidth(width) + editor.setGeometry(rect) + + # pylint: disable=no-self-use + def sizeHint(self, _: QStyleOptionViewItem, index: QModelIndex) -> QSize: + return _create_editor(index.data(role=VariableRole), np.array([0]), + None, lambda: 1).sizeHint() + + +@singledispatch +def _create_editor(*_) -> VariableEditor: + raise NotImplementedError + + +@_create_editor.register(DiscreteVariable) +def _(variable: DiscreteVariable, _: np.ndarray, + parent: QWidget, callback: Callable) -> DiscreteVariableEditor: + return DiscreteVariableEditor(parent, variable.values, callback) + + +@_create_editor.register(ContinuousVariable) +def _(variable: ContinuousVariable, values: np.ndarray, + parent: QWidget, callback: Callable) -> ContinuousVariableEditor: + return ContinuousVariableEditor(parent, variable, np.nanmin(values), + np.nanmax(values), callback) + + +@_create_editor.register(StringVariable) +def _(_: StringVariable, __: np.ndarray, parent: QWidget, + callback: Callable) -> StringVariableEditor: + return StringVariableEditor(parent, callback) + + +@_create_editor.register(TimeVariable) +def _(variable: TimeVariable, _: np.ndarray, + parent: QWidget, callback: Callable) -> TimeVariableEditor: + return TimeVariableEditor(parent, variable, callback) + + +def majority(values: np.ndarray) -> int: + return np.bincount(values[~np.isnan(values)].astype(int)).argmax() + + +def disc_random(values: np.ndarray) -> int: + return np.random.randint(low=np.nanmin(values), high=np.nanmax(values) + 1) + + +def cont_random(values: np.ndarray) -> float: + return np.random.uniform(low=np.nanmin(values), high=np.nanmax(values)) + + +class VariableItemModel(QStandardItemModel): + dataHasNanColumn = Signal() + + # pylint: disable=dangerous-default-value + def set_data(self, data: Table, saved_values={}): + domain = data.domain + variables = [(TableModel.Attribute, a) for a in domain.attributes] + \ + [(TableModel.ClassVar, c) for c in domain.class_vars] + \ + [(TableModel.Meta, m) for m in domain.metas] + for place, variable in variables: + if variable.is_primitive(): + values = data.get_column_view(variable)[0].astype(float) + if all(np.isnan(values)): + self.dataHasNanColumn.emit() + continue + else: + values = np.array([]) + color = TableModel.ColorForRole.get(place) + self._add_row(variable, values, color, + saved_values.get(variable.name)) + + def _add_row(self, variable: Variable, values: np.ndarray, color: QColor, + saved_value: Optional[Union[int, float, str]]): + var_item = QStandardItem() + var_item.setData(variable.name, Qt.DisplayRole) + var_item.setToolTip(variable.name) + var_item.setIcon(self._variable_icon(variable)) + var_item.setEditable(False) + if color: + var_item.setBackground(color) + + control_item = QStandardItem() + control_item.setData(variable, VariableRole) + control_item.setData(values, ValuesRole) + if color: + control_item.setBackground(color) + + value = self._default_for_variable(variable, values) + if saved_value is not None and not \ + (variable.is_discrete and saved_value >= len(variable.values)): + value = saved_value + control_item.setData(value, ValueRole) + + self.appendRow([var_item, control_item]) + + @staticmethod + def _default_for_variable(variable: Variable, values: np.ndarray) \ + -> Union[float, int, str]: + if variable.is_continuous: + return round(np.nanmedian(values), variable.number_of_decimals) + elif variable.is_discrete: + return majority(values) + elif variable.is_string: + return "" + else: + raise NotImplementedError + + @staticmethod + def _variable_icon(variable: Variable) -> QIcon: + if variable.is_discrete: + return gui.attributeIconDict[1] + elif variable.is_time: + return gui.attributeIconDict[4] + elif variable.is_continuous: + return gui.attributeIconDict[2] + elif variable.is_string: + return gui.attributeIconDict[3] + else: + return gui.attributeIconDict[-1] + + +class OWCreateInstance(OWWidget): + name = "Create Instance" + description = "Interactively create a data instance from sample dataset." + icon = "icons/CreateInstance.svg" + category = "Data" + keywords = ["simulator"] + priority = 4000 + + class Inputs: + data = Input("Data", Table) + reference = Input("Reference", Table) + + class Outputs: + data = Output("Data", Table) + + class Information(OWWidget.Information): + nans_removed = Msg("Variables with only missing values were " + "removed from the list.") + + want_main_area = False + ACTIONS = ["median", "mean", "random", "input"] + HEADER = [["name", "Variable"], + ["variable", "Value"]] + Header = namedtuple( + "header", [tag for tag, _ in HEADER] + )(*range(len(HEADER))) + + values: Dict[str, Union[float, str]] = Setting({}, schema_only=True) + append_to_data = Setting(True) + auto_commit = Setting(True) + + def __init__(self): + super().__init__() + self.data: Optional[Table] = None + self.reference: Optional[Table] = None + + self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed, + placeholderText="Filter...") + self.view = QTableView(sortingEnabled=True, + contextMenuPolicy=Qt.CustomContextMenu, + selectionMode=QTableView.NoSelection) + self.view.customContextMenuRequested.connect(self.__menu_requested) + self.view.setItemDelegateForColumn( + self.Header.variable, VariableDelegate(self) + ) + self.view.verticalHeader().hide() + self.view.horizontalHeader().setStretchLastSection(True) + self.view.horizontalHeader().setMaximumSectionSize(350) + + self.model = VariableItemModel(self) + self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER]) + self.model.dataChanged.connect(self.__table_data_changed) + self.model.dataHasNanColumn.connect(self.Information.nans_removed) + self.proxy_model = QSortFilterProxyModel() + self.proxy_model.setFilterKeyColumn(-1) + self.proxy_model.setFilterCaseSensitivity(False) + self.proxy_model.setSourceModel(self.model) + self.view.setModel(self.proxy_model) + + vbox = gui.vBox(self.controlArea, box=True) + vbox.layout().addWidget(self.filter_edit) + vbox.layout().addWidget(self.view) + + box = gui.hBox(vbox) + gui.rubber(box) + for name in self.ACTIONS: + gui.button( + box, self, name.capitalize(), + lambda *args, fun=name: self._initialize_values(fun), + autoDefault=False + ) + gui.rubber(box) + + box = gui.auto_apply(self.controlArea, self, "auto_commit") + box.button.setFixedWidth(180) + box.layout().insertStretch(0) + # pylint: disable=unnecessary-lambda + append = gui.checkBox(None, self, "append_to_data", + "Append this instance to input data", + callback=lambda: self.commit()) + box.layout().insertWidget(0, append) + + self._set_input_summary() + self._set_output_summary() + self.settingsAboutToBePacked.connect(self.pack_settings) + + def __filter_edit_changed(self): + self.proxy_model.setFilterFixedString(self.filter_edit.text().strip()) + + def __table_data_changed(self): + self.commit() + + def __menu_requested(self, point: QPoint): + index = self.view.indexAt(point) + model: QSortFilterProxyModel = index.model() + source_index = model.mapToSource(index) + menu = QMenu(self) + for action in self._create_actions(source_index): + menu.addAction(action) + menu.popup(self.view.viewport().mapToGlobal(point)) + + def _create_actions(self, index: QModelIndex) -> List[QAction]: + actions = [] + for name in self.ACTIONS: + action = QAction(name.capitalize(), self) + action.triggered.connect( + lambda *args, fun=name: self._initialize_values(fun, [index]) + ) + actions.append(action) + return actions + + def _initialize_values(self, fun: str, indices: List[QModelIndex] = None): + cont_fun = {"median": np.nanmedian, + "mean": np.nanmean, + "random": cont_random, + "input": np.nanmean}.get(fun, NotImplemented) + disc_fun = {"median": majority, + "mean": majority, + "random": disc_random, + "input": majority}.get(fun, NotImplemented) + + if not self.data or fun == "input" and not self.reference: + return + + self.model.dataChanged.disconnect(self.__table_data_changed) + rows = range(self.proxy_model.rowCount()) if indices is None else \ + [index.row() for index in indices] + for row in rows: + index = self.model.index(row, self.Header.variable) + variable = self.model.data(index, VariableRole) + + if fun == "input": + if variable not in self.reference.domain: + continue + values = self.reference.get_column_view(variable)[0] + if variable.is_primitive(): + values = values.astype(float) + if all(np.isnan(values)): + continue + else: + values = self.model.data(index, ValuesRole) + + if variable.is_continuous: + value = cont_fun(values) + value = round(value, variable.number_of_decimals) + elif variable.is_discrete: + value = disc_fun(values) + elif variable.is_string: + value = "" + else: + raise NotImplementedError + + self.model.setData(index, value, ValueRole) + self.model.dataChanged.connect(self.__table_data_changed) + self.commit() + + @Inputs.data + def set_data(self, data: Table): + self.data = data + self._set_input_summary() + self._set_model_data() + self.unconditional_commit() + + def _set_model_data(self): + self.Information.nans_removed.clear() + self.model.removeRows(0, self.model.rowCount()) + if not self.data: + return + + self.model.set_data(self.data, self.values) + self.values = {} + self.view.horizontalHeader().setStretchLastSection(False) + self.view.resizeColumnsToContents() + self.view.resizeRowsToContents() + self.view.horizontalHeader().setStretchLastSection(True) + + @Inputs.reference + def set_reference(self, data: Table): + self.reference = data + self._set_input_summary() + + def _set_input_summary(self): + n_data = len(self.data) if self.data else 0 + n_refs = len(self.reference) if self.reference else 0 + summary, details, kwargs = self.info.NoInput, "", {} + + if self.data or self.reference: + summary = f"{self.info.format_number(n_data)}, " \ + f"{self.info.format_number(n_refs)}" + data_list = [("Data", self.data), ("Reference", self.reference)] + details = format_multiple_summaries(data_list) + kwargs = {"format": Qt.RichText} + self.info.set_input_summary(summary, details, **kwargs) + + def _set_output_summary(self, data: Optional[Table] = None): + if data: + summary, details = len(data), format_summary_details(data) + else: + summary, details = self.info.NoOutput, "" + self.info.set_output_summary(summary, details) + + def commit(self): + output_data = None + if self.data: + output_data = self._create_data_from_values() + if self.append_to_data: + output_data = self._append_to_data(output_data) + self._set_output_summary(output_data) + self.Outputs.data.send(output_data) + + def _create_data_from_values(self) -> Table: + data = Table.from_domain(self.data.domain, 1) + data.name = "created" + data.X[:] = np.nan + data.Y[:] = np.nan + for i, m in enumerate(self.data.domain.metas): + data.metas[:, i] = "" if m.is_string else np.nan + + values = self._get_values() + for var_name, value in values.items(): + data[:, var_name] = value + return data + + def _append_to_data(self, data: Table) -> Table: + assert self.data + assert len(data) == 1 + + var = DiscreteVariable("Source ID", values=(self.data.name, data.name)) + data = Table.concatenate([self.data, data], axis=0) + domain = Domain(data.domain.attributes, data.domain.class_vars, + data.domain.metas + (var,)) + data = data.transform(domain) + data.metas[: len(self.data), -1] = 0 + data.metas[len(self.data):, -1] = 1 + return data + + def _get_values(self) -> Dict[str, Union[str, float]]: + values = {} + for row in range(self.model.rowCount()): + index = self.model.index(row, self.Header.variable) + values[self.model.data(index, VariableRole).name] = \ + self.model.data(index, ValueRole) + return values + + def send_report(self): + if not self.data: + return + self.report_domain("Input", self.data.domain) + self.report_domain("Output", self.data.domain) + items = [] + values: Dict = self._get_values() + for var in self.data.domain.variables + self.data.domain.metas: + val = values.get(var.name, np.nan) + if var.is_primitive(): + val = var.repr_val(val) + items.append([f"{var.name}:", val]) + self.report_table("Values", items) + + @staticmethod + def sizeHint(): + return QSize(600, 500) + + def pack_settings(self): + self.values: Dict[str, Union[str, float]] = self._get_values() + + +if __name__ == "__main__": # pragma: no cover + table = Table("housing") + WidgetPreview(OWCreateInstance).run(set_data=table, + set_reference=table[:1]) diff --git a/Orange/widgets/data/tests/test_owcreateinstance.py b/Orange/widgets/data/tests/test_owcreateinstance.py new file mode 100644 index 00000000000..c0aa850c7a3 --- /dev/null +++ b/Orange/widgets/data/tests/test_owcreateinstance.py @@ -0,0 +1,542 @@ +# pylint: disable=missing-docstring,protected-access +from unittest.mock import Mock + +import numpy as np + +from AnyQt.QtCore import QDateTime, QDate, QTime, QPoint +from AnyQt.QtWidgets import QWidget, QLineEdit, QStyleOptionViewItem, QMenu + +from orangewidget.tests.base import GuiTest +from Orange.data import Table, ContinuousVariable, Domain, DiscreteVariable, \ + TimeVariable +from Orange.widgets.data.owcreateinstance import OWCreateInstance, \ + DiscreteVariableEditor, ContinuousVariableEditor, StringVariableEditor, \ + TimeVariableEditor, VariableDelegate, VariableItemModel, ValueRole +from Orange.widgets.tests.base import WidgetTest, datasets +from Orange.widgets.utils.state_summary import format_summary_details, \ + format_multiple_summaries + + +class TestOWCreateInstance(WidgetTest): + def setUp(self): + self.widget = self.create_widget(OWCreateInstance) + self.data = Table("iris") + + def test_output(self): + self.send_signal(self.widget.Inputs.data, self.data) + self.widget.controls.append_to_data.setChecked(False) + output = self.get_output(self.widget.Outputs.data) + self.assertEqual(len(output), 1) + self.assertEqual(output.name, "created") + self.assertEqual(output.domain, self.data.domain) + array = np.round(np.median(self.data.X, axis=0), 1).reshape(1, 4) + np.testing.assert_array_equal(output.X, array) + + def test_output_append_data(self): + self.send_signal(self.widget.Inputs.data, self.data) + self.widget.controls.append_to_data.setChecked(True) + + output = self.get_output(self.widget.Outputs.data) + self.assertEqual(len(output), 151) + + np.testing.assert_array_equal(output.X[:150], self.data.X) + np.testing.assert_array_equal(output.Y[:150], self.data.Y) + array = np.zeros((150, 1), dtype=object) + np.testing.assert_array_equal(output.metas[:150], array) + + array = np.round(np.median(self.data.X, axis=0), 1).reshape(1, 4) + np.testing.assert_array_equal(output.X[150:], array) + np.testing.assert_array_equal(output.Y[150:], np.array([0])) + np.testing.assert_array_equal(output.metas[150:], np.array([[1]])) + + self.assertEqual(output.domain.attributes, self.data.domain.attributes) + self.assertEqual(output.domain.class_vars, self.data.domain.class_vars) + self.assertIn("Source ID", [m.name for m in output.domain.metas]) + self.assertTupleEqual(output.domain.metas[0].values, + ("iris", "created")) + + def test_summary(self): + info = self.widget.info + reference = self.data[:1] + no_input, no_output = "No data on input", "No data on output" + + self.assertEqual(info._StateInfo__input_summary.brief, "") + self.assertEqual(info._StateInfo__input_summary.details, no_input) + self.assertEqual(info._StateInfo__output_summary.brief, "") + self.assertEqual(info._StateInfo__output_summary.details, no_output) + + self.send_signal(self.widget.Inputs.data, self.data) + data_list = [("Data", self.data), ("Reference", None)] + summary, details = "150, 0", format_multiple_summaries(data_list) + self.assertEqual(info._StateInfo__input_summary.brief, summary) + self.assertEqual(info._StateInfo__input_summary.details, details) + + output = self.get_output(self.widget.Outputs.data) + details = format_summary_details(output) + self.assertEqual(info._StateInfo__output_summary.brief, "151") + self.assertEqual(info._StateInfo__output_summary.details, details) + + self.send_signal(self.widget.Inputs.reference, reference) + data_list = [("Data", self.data), ("Reference", reference)] + summary, details = "150, 1", format_multiple_summaries(data_list) + self.assertEqual(info._StateInfo__input_summary.brief, summary) + self.assertEqual(info._StateInfo__input_summary.details, details) + + self.send_signal(self.widget.Inputs.data, None) + data_list = [("Data", None), ("Reference", reference)] + summary, details = "0, 1", format_multiple_summaries(data_list) + self.assertEqual(info._StateInfo__input_summary.brief, summary) + self.assertEqual(info._StateInfo__input_summary.details, details) + self.assertEqual(info._StateInfo__output_summary.brief, "") + self.assertEqual(info._StateInfo__output_summary.details, no_output) + + self.send_signal(self.widget.Inputs.reference, None) + self.assertEqual(info._StateInfo__input_summary.brief, "") + self.assertEqual(info._StateInfo__input_summary.details, no_input) + + def _get_init_buttons(self, widget=None): + if not widget: + widget = self.widget + box = widget.controlArea.layout().itemAt(0).widget().children()[3] + return box.children()[1:] + + def test_initialize_buttons(self): + self.widget.controls.append_to_data.setChecked(False) + self.send_signal(self.widget.Inputs.data, self.data) + self.send_signal(self.widget.Inputs.reference, self.data[:1]) + output = self.get_output(self.widget.Outputs.data) + + buttons = self._get_init_buttons() + + buttons[3].click() # Input + output_input = self.get_output(self.widget.Outputs.data) + self.assert_table_equal(output_input, self.data[:1]) + + buttons[0].click() # Median + output_median = self.get_output(self.widget.Outputs.data) + self.assert_table_equal(output_median, output) + + buttons[1].click() # Mean + output_mean = self.get_output(self.widget.Outputs.data) + output.X = np.round(np.mean(self.data.X, axis=0), 1).reshape(1, 4) + self.assert_table_equal(output_mean, output) + + buttons[2].click() # Random + output_random = self.get_output(self.widget.Outputs.data) + self.assertTrue((self.data.X.max(axis=0) >= output_random.X).all()) + self.assertTrue((self.data.X.min(axis=0) <= output_random.X).all()) + + self.send_signal(self.widget.Inputs.data, self.data[9:10]) + buttons[2].click() # Random + output_random = self.get_output(self.widget.Outputs.data) + self.assert_table_equal(output_random, self.data[9:10]) + + self.send_signal(self.widget.Inputs.reference, None) + buttons[3].click() # Input + output = self.get_output(self.widget.Outputs.data) + self.assert_table_equal(output_random, output) + + def test_initialize_buttons_commit_once(self): + self.widget.commit = self.widget.unconditional_commit = Mock() + self.send_signal(self.widget.Inputs.data, self.data) + self.send_signal(self.widget.Inputs.reference, self.data[:1]) + self.widget.unconditional_commit.assert_called_once() + + self.widget.commit.reset_mock() + buttons = self._get_init_buttons() + buttons[3].click() # Input + self.widget.commit.assert_called_once() + + def test_table(self): + self.send_signal(self.widget.Inputs.data, self.data) + self.assertEqual(self.widget.view.model().rowCount(), 5) + self.assertEqual(self.widget.view.horizontalHeader().count(), 2) + + data = Table("zoo") + self.send_signal(self.widget.Inputs.data, data) + self.assertEqual(self.widget.view.model().rowCount(), 18) + self.assertEqual(self.widget.view.horizontalHeader().count(), 2) + + self.send_signal(self.widget.Inputs.data, None) + self.assertEqual(self.widget.view.model().rowCount(), 0) + self.assertEqual(self.widget.view.horizontalHeader().count(), 2) + + def test_table_data_changed(self): + self.send_signal(self.widget.Inputs.data, self.data) + index = self.widget.model.index(0, 1) + self.widget.model.setData(index, 7, role=ValueRole) + output = self.get_output(self.widget.Outputs.data) + self.assertEqual(len(output), 151) + self.assertEqual(output.X[150, 0], 7) + + def test_datasets(self): + for ds in datasets.datasets(): + self.send_signal(self.widget.Inputs.data, ds) + + def test_missing_values(self): + domain = Domain([ContinuousVariable("c")], + class_vars=[DiscreteVariable("m", ("a", "b"))]) + data = Table(domain, np.array([[np.nan], [np.nan]]), + np.array([np.nan, np.nan])) + self.widget.controls.append_to_data.setChecked(False) + self.send_signal(self.widget.Inputs.data, data) + output = self.get_output(self.widget.Outputs.data) + self.assert_table_equal(output, data[:1]) + self.assertTrue(self.widget.Information.nans_removed.is_shown()) + + self.send_signal(self.widget.Inputs.data, None) + self.assertFalse(self.widget.Information.nans_removed.is_shown()) + + def test_missing_values_reference(self): + reference = self.data[:1].copy() + reference[:] = np.nan + self.send_signal(self.widget.Inputs.data, self.data) + self.send_signal(self.widget.Inputs.reference, reference) + output1 = self.get_output(self.widget.Outputs.data) + buttons = self._get_init_buttons() + buttons[3].click() # Input + output2 = self.get_output(self.widget.Outputs.data) + self.assert_table_equal(output1, output2) + + def test_saved_workflow(self): + data = self.data + data.X[:, 0] = np.nan + self.send_signal(self.widget.Inputs.data, data) + buttons = self._get_init_buttons() + buttons[2].click() # Random + output1 = self.get_output(self.widget.Outputs.data) + + settings = self.widget.settingsHandler.pack_data(self.widget) + widget = self.create_widget(OWCreateInstance, stored_settings=settings) + self.send_signal(widget.Inputs.data, data, widget=widget) + output2 = self.get_output(widget.Outputs.data) + self.assert_table_equal(output1, output2) + + def test_commit_once(self): + self.widget.commit = self.widget.unconditional_commit = Mock() + self.send_signal(self.widget.Inputs.data, self.data) + self.widget.unconditional_commit.assert_called_once() + + self.widget.commit.reset_mock() + self.send_signal(self.widget.Inputs.data, None) + self.widget.commit.assert_called_once() + + self.widget.commit.reset_mock() + self.send_signal(self.widget.Inputs.data, self.data) + self.widget.commit.assert_called_once() + + def test_context_menu(self): + self.send_signal(self.widget.Inputs.data, self.data) + self.send_signal(self.widget.Inputs.reference, self.data[:1]) + output1 = self.get_output(self.widget.Outputs.data) + self.widget.view.customContextMenuRequested.emit(QPoint(0, 0)) + menu = [w for w in self.widget.children() if isinstance(w, QMenu)][0] + self.assertEqual(len(menu.actions()), 4) + + menu.actions()[3].trigger() # Input + output2 = self.get_output(self.widget.Outputs.data) + np.testing.assert_array_equal(output2.X[:, 1:], output1.X[:, 1:]) + np.testing.assert_array_equal(output2.X[150:, :1], self.data.X[:1, :1]) + + def test_report(self): + self.widget.send_report() + self.send_signal(self.widget.Inputs.data, self.data) + self.widget.send_report() + self.send_signal(self.widget.Inputs.data, None) + self.widget.send_report() + + def test_sparse(self): + data = self.data.to_sparse() + self.send_signal(self.widget.Inputs.data, data) + self.send_signal(self.widget.Inputs.reference, data) + + +class TestDiscreteVariableEditor(GuiTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.parent = QWidget() + + def setUp(self): + self.callback = Mock() + self.editor = DiscreteVariableEditor( + self.parent, ["Foo", "Bar"], self.callback + ) + + def test_init(self): + self.assertEqual(self.editor.value, 0) + self.assertEqual(self.editor._combo.currentText(), "Foo") + self.callback.assert_not_called() + + def test_edit(self): + """ Edit combo by user. """ + self.editor._combo.setCurrentText("Bar") + self.assertEqual(self.editor.value, 1) + self.assertEqual(self.editor._combo.currentText(), "Bar") + self.callback.assert_called_once() + + def test_set_value(self): + """ Programmatically set combo box value. """ + self.editor.value = 1 + self.assertEqual(self.editor.value, 1) + self.assertEqual(self.editor._combo.currentText(), "Bar") + self.callback.assert_called_once() + + +class TestContinuousVariableEditor(GuiTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.parent = QWidget() + + def setUp(self): + self.callback = Mock() + data = Table("iris") + values = data.get_column_view(data.domain[0])[0] + self.min_value = np.min(values) + self.max_value = np.max(values) + self.editor = ContinuousVariableEditor( + self.parent, data.domain[0], self.min_value, + self.max_value, self.callback + ) + + def test_init(self): + self.assertEqual(self.editor.value, self.min_value) + self.assertEqual(self.editor._slider.value(), self.min_value * 10) + self.assertEqual(self.editor._spin.value(), self.min_value) + self.callback.assert_not_called() + + def test_edit_slider(self): + """ Edit slider by user. """ + self.editor._slider.setValue(int(self.max_value * 10)) + self.assertEqual(self.editor.value, self.max_value) + self.assertEqual(self.editor._slider.value(), self.max_value * 10) + self.assertEqual(self.editor._spin.value(), self.max_value) + self.callback.assert_called_once() + + self.callback.reset_mock() + value = self.min_value + (self.max_value - self.min_value) / 2 + self.editor._slider.setValue(int(value * 10)) + self.assertEqual(self.editor.value, value) + self.assertEqual(self.editor._slider.value(), value * 10) + self.assertEqual(self.editor._spin.value(), value) + self.callback.assert_called_once() + + def test_edit_spin(self): + """ Edit spin by user. """ + self.editor._spin.setValue(self.max_value) + self.assertEqual(self.editor.value, self.max_value) + self.assertEqual(self.editor._slider.value(), self.max_value * 10) + self.assertEqual(self.editor._spin.value(), self.max_value) + self.callback.assert_called_once() + + self.callback.reset_mock() + self.editor._spin.setValue(self.max_value + 1) + self.assertEqual(self.editor.value, self.max_value + 1) + self.assertEqual(self.editor._slider.value(), self.max_value * 10) + self.assertEqual(self.editor._spin.value(), self.max_value + 1) + self.callback.assert_called_once() + + self.callback.reset_mock() + value = self.min_value + (self.max_value - self.min_value) / 2 + self.editor._spin.setValue(value) + self.assertEqual(self.editor.value, value) + self.assertEqual(self.editor._slider.value(), value * 10) + self.assertEqual(self.editor._spin.value(), value) + self.callback.assert_called_once() + + def test_set_value(self): + """ Programmatically set slider/spin value. """ + self.editor.value = -2 + self.assertEqual(self.editor._slider.value(), self.min_value * 10) + self.assertEqual(self.editor._spin.value(), -2) + self.assertEqual(self.editor.value, -2) + self.callback.assert_called_once() + + self.callback.reset_mock() + value = self.min_value + (self.max_value - self.min_value) / 4 + self.editor.value = value + self.assertEqual(self.editor._slider.value(), value * 10) + self.assertEqual(self.editor._spin.value(), value) + self.assertEqual(self.editor.value, value) + self.callback.assert_called_once() + + def test_missing_values(self): + var = ContinuousVariable("var") + self.assertRaises(ValueError, ContinuousVariableEditor, self.parent, + var, np.nan, np.nan, Mock()) + + def test_overflow(self): + var = ContinuousVariable("var", number_of_decimals=10) + editor = ContinuousVariableEditor( + self.parent, var, -100000, 1, self.callback + ) + self.assertLess(editor._n_decimals, 10) + + def test_spin_selection_after_init(self): + edit: QLineEdit = self.editor._spin.lineEdit() + edit.selectAll() + self.assertEqual(edit.selectedText(), "") + self.assertIs(self.editor.focusProxy(), edit.parent()) + + +class TestStringVariableEditor(GuiTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.parent = QWidget() + + def setUp(self): + self.callback = Mock() + self.editor = StringVariableEditor(self.parent, self.callback) + + def test_init(self): + self.assertEqual(self.editor.value, "") + self.assertEqual(self.editor._edit.text(), "") + self.callback.assert_not_called() + + def test_edit(self): + """ Set lineedit by user. """ + self.editor._edit.setText("Foo") + self.assertEqual(self.editor.value, "Foo") + self.assertEqual(self.editor._edit.text(), "Foo") + self.callback.assert_called_once() + + def test_set_value(self): + """ Programmatically set lineedit value. """ + self.editor.value = "Foo" + self.assertEqual(self.editor.value, "Foo") + self.assertEqual(self.editor._edit.text(), "Foo") + self.callback.assert_called_once() + + +class TestTimeVariableEditor(GuiTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.parent = QWidget() + + def setUp(self): + self.callback = Mock() + self.editor = TimeVariableEditor( + self.parent, TimeVariable("var", have_date=1), self.callback + ) + + def test_init(self): + self.assertEqual(self.editor.value, 0) + self.assertEqual(self.editor._edit.dateTime(), + QDateTime(QDate(1970, 1, 1))) + self.callback.assert_not_called() + + def test_edit(self): + """ Edit datetimeedit by user. """ + datetime = QDateTime(QDate(2001, 9, 9)) + self.editor._edit.setDateTime(datetime) + self.assertEqual(self.editor.value, 999993600) + self.assertEqual(self.editor._edit.dateTime(), datetime) + self.callback.assert_called_once() + + def test_set_value(self): + """ Programmatically set datetimeedit value. """ + value = 999993600 + self.editor.value = value + self.assertEqual(self.editor._edit.dateTime(), + QDateTime(QDate(2001, 9, 9))) + self.assertEqual(self.editor.value, value) + self.callback.assert_called_once() + + def test_have_date_have_time(self): + callback = Mock() + editor = TimeVariableEditor( + self.parent, TimeVariable("var", have_date=1, have_time=1), + callback + ) + self.assertEqual(editor.value, 0) + self.assertEqual(self.editor._edit.dateTime(), + QDateTime(QDate(1970, 1, 1), QTime(0, 0, 0))) + self.callback.assert_not_called() + + datetime = QDateTime(QDate(2001, 9, 9), QTime(1, 2, 3)) + editor._edit.setDateTime(datetime) + self.assertEqual(editor._edit.dateTime(), datetime) + self.assertEqual(editor.value, 999993600 + 3723) + callback.assert_called_once() + + def test_have_time(self): + callback = Mock() + editor = TimeVariableEditor( + self.parent, TimeVariable("var", have_time=1), callback + ) + self.assertEqual(editor.value, 0) + self.assertEqual(self.editor._edit.dateTime(), + QDateTime(QDate(1970, 1, 1), QTime(0, 0, 0))) + self.callback.assert_not_called() + + datetime = QDateTime(QDate(1900, 1, 1), QTime(1, 2, 3)) + editor._edit.setDateTime(datetime) + self.assertEqual(editor._edit.dateTime(), datetime) + self.assertEqual(editor.value, 3723) + callback.assert_called_once() + + def test_no_date_no_time(self): + callback = Mock() + editor = TimeVariableEditor(self.parent, TimeVariable("var"), callback) + self.assertEqual(editor.value, 0) + self.assertEqual(self.editor._edit.dateTime(), + QDateTime(QDate(1970, 1, 1), QTime(0, 0, 0))) + self.callback.assert_not_called() + + datetime = QDateTime(QDate(2001, 9, 9), QTime(1, 2, 3)) + editor._edit.setDateTime(datetime) + self.assertEqual(editor._edit.dateTime(), datetime) + self.assertEqual(editor.value, 999993600 + 3723) + callback.assert_called_once() + + +class TestVariableDelegate(GuiTest): + def setUp(self): + self.data = Table("iris") + self.model = model = VariableItemModel() + model.set_data(self.data) + widget = OWCreateInstance() + self.delegate = VariableDelegate(widget) + self.parent = QWidget() + self.opt = QStyleOptionViewItem() + + def test_create_editor(self): + index = self.model.index(0, 1) + editor = self.delegate.createEditor(self.parent, self.opt, index) + self.assertIsInstance(editor, ContinuousVariableEditor) + + index = self.model.index(4, 1) + editor = self.delegate.createEditor(self.parent, self.opt, index) + self.assertIsInstance(editor, DiscreteVariableEditor) + + def test_set_editor_data(self): + index = self.model.index(0, 1) + editor = self.delegate.createEditor(self.parent, self.opt, index) + self.delegate.setEditorData(editor, index) + self.assertEqual(editor.value, np.median(self.data.X[:, 0])) + + def test_set_model_data(self): + index = self.model.index(0, 1) + editor = self.delegate.createEditor(self.parent, self.opt, index) + editor.value = 7.5 + self.delegate.setModelData(editor, self.model, index) + self.assertEqual(self.model.data(index, ValueRole), 7.5) + + def test_editor_geometry(self): + index = self.model.index(0, 1) + editor = self.delegate.createEditor(self.parent, self.opt, index) + self.delegate.updateEditorGeometry(editor, self.opt, index) + self.assertGreaterEqual(editor.geometry().width(), + self.opt.rect.width()) + + size = self.delegate.sizeHint(self.opt, index) + self.assertEqual(size.width(), editor.geometry().width()) + self.assertEqual(size.height(), 40) + + +if __name__ == "__main__": + import unittest + unittest.main() diff --git a/doc/visual-programming/source/index.rst b/doc/visual-programming/source/index.rst index 0a53138796e..7f62026896f 100644 --- a/doc/visual-programming/source/index.rst +++ b/doc/visual-programming/source/index.rst @@ -35,6 +35,7 @@ Data widgets/data/transpose widgets/data/discretize widgets/data/continuize + widgets/data/createinstance widgets/data/createclass widgets/data/randomize widgets/data/concatenate diff --git a/doc/visual-programming/source/widgets/data/createinstance.md b/doc/visual-programming/source/widgets/data/createinstance.md new file mode 100644 index 00000000000..16725371f2a --- /dev/null +++ b/doc/visual-programming/source/widgets/data/createinstance.md @@ -0,0 +1,44 @@ +Create Instance +=============== + +Interactively creates an instance from a sample dataset. + +**Inputs** + +- Data: input dataset +- Reference: refrence dataset + +**Outputs** + +- Data: input dataset appended the created instance + +The **Create Instance** widget creates a new instance, based on the input data. The widget displays all variables of the input dataset in a table of two columns. The column *Variable* represents the variable's name, meanwhile the column *Value* enables setting the variable's value. Each value is initially set to median value of the variable. The values can be manually set to *Median*, *Mean*, *Random* or *Input* by clicking the corresponding button. For easier searching through the variables, the table has filter attached. When clicking upon one of the mentioned buttons, only filtered variables are considered. One can also set the value by right-clicking a row and selecting an option in a context menu. + +![](images/CreateInstance-stamped.png) + +1. Filter table by variable name. +2. The column represents a variable's name and type. The table can be sorted by clicking the columns header. +3. Provides controls for value editing. +4. Set filtered variables' values to: + - *Median*: median value of variable in the input dataset + - *Mean*: mean value of variable in the input dataset + - *Random*: random value in a range of variable in the input dataset + - *Input*: median value of variable in the reference dataset +5. If *Append this instance to input data* is ticked, the created instance is appended to the input dataset. Otherwise, a single instance appears on the output. To distinguish between created and original data, *Source ID* variable is added. +5. If *Apply automatically* is ticked, changes are committed automatically. Otherwise, you have to press *Apply* after each change. +6. Produce a report. +7. Information on input and reference dataset. +8. Information on output dataset. + +Example +------- + +The **Create Instance** is usually used to examine a model performance on some arbitrary data. The basic usage is shown in the following workflow, where a (*Housing*) dataset is used to fit a [Linear Regression](../model/linearregression.md) model, which is than used to [predict](../evaluate/predictions.md) a target value for data, created by the *Create Instance* widget. Inserting a [Rank](../data/rank.md) widget between [File](../data/file.md) and *Create Instance* enables outputting (and therefore making predictions on) the most important features. +A [Select Column](../data/selectcolumns.md) widget is inserted to omit the actual target value. + +![](images/CreateInstance-example.png) + +The next example shows how to check whether the created instance is some kind of outlier. The creates instance is feed to [PCA](../unsupervised/PCA.md) whose first and second componens are then examined in a [Scatter Plot](../visualize/scatterplot.md). The created instance is colored red in the plot and it could be considered as an outlier if it appears far from the original data (blue). + +![](images/CreateInstance-example2.png) + diff --git a/doc/visual-programming/source/widgets/data/images/CreateInstance-example.png b/doc/visual-programming/source/widgets/data/images/CreateInstance-example.png new file mode 100644 index 00000000000..5b78b9d0729 Binary files /dev/null and b/doc/visual-programming/source/widgets/data/images/CreateInstance-example.png differ diff --git a/doc/visual-programming/source/widgets/data/images/CreateInstance-example2.png b/doc/visual-programming/source/widgets/data/images/CreateInstance-example2.png new file mode 100644 index 00000000000..164f2a66276 Binary files /dev/null and b/doc/visual-programming/source/widgets/data/images/CreateInstance-example2.png differ diff --git a/doc/visual-programming/source/widgets/data/images/CreateInstance-stamped.png b/doc/visual-programming/source/widgets/data/images/CreateInstance-stamped.png new file mode 100644 index 00000000000..c39e7985813 Binary files /dev/null and b/doc/visual-programming/source/widgets/data/images/CreateInstance-stamped.png differ diff --git a/doc/widgets.json b/doc/widgets.json index 8358e80c430..251a9c288d0 100644 --- a/doc/widgets.json +++ b/doc/widgets.json @@ -228,6 +228,15 @@ "program" ] }, + { + "text": "Create Instance", + "doc": "visual-programming/source/widgets/data/createinstance.md", + "icon": "../Orange/widgets/data/icons/CreateInstance.svg", + "background": "#FFD39F", + "keywords": [ + "simulator" + ] + }, { "text": "Color", "doc": "visual-programming/source/widgets/data/color.md",