Skip to content

Commit

Permalink
OWDomain: Merge less frequent values
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Mar 6, 2020
1 parent 3781811 commit 776b432
Show file tree
Hide file tree
Showing 2 changed files with 324 additions and 88 deletions.
282 changes: 224 additions & 58 deletions Orange/widgets/data/oweditdomain.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
QToolButton, QLineEdit, QAction, QActionGroup, QGroupBox,
QStyledItemDelegate, QStyleOptionViewItem, QStyle, QSizePolicy, QToolTip,
QDialogButtonBox, QPushButton, QCheckBox, QComboBox, QShortcut,
QStackedLayout
)
QStackedLayout,
QDialog, QRadioButton, QGridLayout, QLabel, QSpinBox, QDoubleSpinBox)
from AnyQt.QtGui import QStandardItemModel, QStandardItem, QKeySequence, QIcon
from AnyQt.QtCore import (
Qt, QEvent, QSize, QModelIndex, QAbstractItemModel, QPersistentModelIndex,
QRect,
)
QPoint)
from AnyQt.QtCore import pyqtSignal as Signal, pyqtSlot as Slot

import numpy as np
Expand Down Expand Up @@ -662,6 +662,168 @@ def on_label_selection_changed(self):
self.remove_label_action.setEnabled(bool(len(selected)))


class GroupLessFrequentItemsDialog(QDialog):
"""
A dialog for group less frequent values.
"""
def __init__(
self, variable: Categorical, data: Union[np.ndarray, List],
selected_attributes: List[str], parent: QWidget = None,
flags: Qt.WindowFlags = Qt.Dialog, **kwargs
) -> None:
super().__init__(parent, flags, **kwargs)
self.variable = variable
self.data = data
self.selected_attributes = selected_attributes

# grouping strategy
self.selected_radio = radio1 = QRadioButton("Group selected values")
self.frequent_abs_radio = radio2 = QRadioButton(
"Group values with less than"
)
self.frequent_rel_radio = radio3 = QRadioButton(
"Group values with less than"
)
self.n_values_radio = radio4 = QRadioButton(
"Group all except"
)

# if selected attributes available check the first radio button,
# it is most probable option
if selected_attributes:
radio1.setChecked(True)
else:
radio2.setChecked(True)

label2 = QLabel("occurrences")
label3 = QLabel("occurrences")
label4 = QLabel("most frequent values")

self.frequent_abs_spin = spin2 = QSpinBox()
max_val = len(data)
spin2.setMinimum(1)
spin2.setMaximum(max_val)
spin2.setValue(10)
spin2.setMinimumWidth(
self.fontMetrics().width("X") * (len(str(max_val)) + 1) + 20
)
spin2.valueChanged.connect(partial(self._spin_changed, spin2))

self.frequent_rel_spin = spin3 = QDoubleSpinBox()
spin3.setMinimum(0)
spin3.setDecimals(1)
spin3.setSingleStep(0.1)
spin3.setMaximum(100)
spin3.setValue(10)
spin3.setMinimumWidth(self.fontMetrics().width("X") * (2 + 1) + 20)
spin3.setSuffix(" %")
spin3.valueChanged.connect(partial(self._spin_changed, spin3))

self.n_values_spin = spin4 = QSpinBox()
max_val = min(10, len(variable.categories))
spin4.setMinimum(0)
spin4.setMaximum(len(variable.categories))
spin4.setValue(max_val)
spin4.setMinimumWidth(
self.fontMetrics().width("X") * (len(str(max_val)) + 1) + 20
)
spin4.valueChanged.connect(partial(self._spin_changed, spin4))

grid_layout = QGridLayout()
# first row
grid_layout.addWidget(radio1, 0, 0, 1, 2)
# second row
grid_layout.addWidget(radio2, 1, 0, 1, 2)
grid_layout.addWidget(spin2, 1, 2)
grid_layout.addWidget(label2, 1, 3)
# third row
grid_layout.addWidget(radio3, 2, 0, 1, 2)
grid_layout.addWidget(spin3, 2, 2)
grid_layout.addWidget(label3, 2, 3)
# fourth row
grid_layout.addWidget(radio4, 3, 0)
grid_layout.addWidget(spin4, 3, 1)
grid_layout.addWidget(label4, 3, 2, 1, 2)

group_box = QGroupBox("Grouping strategy")
group_box.setLayout(grid_layout)

# grouped variable name
new_name_label = QLabel("New value name: ")
self.new_name_line_edit = n_line_edit = QLineEdit("Others")
# it is shown gray when user removes the text and let user know that
# word others is default one
n_line_edit.setPlaceholderText("Others")
name_hlayout = QHBoxLayout()
name_hlayout.addWidget(new_name_label)
name_hlayout.addWidget(n_line_edit)

# confirm_button = QPushButton("Apply")
# cancel_button = QPushButton("Cancel")
buttons = QDialogButtonBox(
orientation=Qt.Horizontal,
standardButtons=(QDialogButtonBox.Ok | QDialogButtonBox.Cancel),
objectName="dialog-button-box",
)
buttons.accepted.connect(self.accept)
buttons.rejected.connect(self.reject)

# join components
self.setLayout(QVBoxLayout())
self.layout().addWidget(group_box)
self.layout().addLayout(name_hlayout)
self.layout().addWidget(buttons)
self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)

def _spin_changed(self, spin: QSpinBox) -> None:
"""
If user change the value of a spin box, check the corresponding radio
Parameters
----------
spin
Spin that have been changed
"""
if spin is self.n_values_spin:
self.n_values_radio.setChecked(True)
elif spin is self.frequent_abs_spin:
self.frequent_abs_radio.setChecked(True)
elif spin is self.frequent_rel_spin:
self.frequent_rel_radio.setChecked(True)

def get_merge_attributes(self) -> List[str]:
"""
Returns attributes that will be merged
Returns
-------
List of attributes' to be merged names
"""
counts = Counter(self.data)
if self.selected_radio.isChecked():
return self.selected_attributes
elif self.n_values_radio.isChecked():
keep_values = self.n_values_spin.value()
values = counts.most_common()[keep_values:]
indices = [i for i, _ in values]
elif self.frequent_abs_radio.isChecked():
indices = [v for v, c in counts.most_common()
if c < self.frequent_abs_spin.value()]
else: # self.frequent_rel_radio.isChecked():
n_all = sum(counts.values())
indices = [v for v, c in counts.most_common()
if c / n_all * 100 < self.frequent_rel_spin.value()]
return np.array(self.variable.categories)[indices].tolist()

def get_merged_value_name(self) -> str:
"""
Returns
-------
New label of merged values
"""
return self.new_name_line_edit.text() or "Others"


@contextmanager
def disconnected(signal, slot, connection_type=Qt.AutoConnection):
signal.disconnect(slot)
Expand Down Expand Up @@ -847,6 +1009,9 @@ class DiscreteVariableEditor(VariableEditor):
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self._values = None

form = self.layout().itemAt(0)
assert isinstance(form, QFormLayout)
self.ordered_cb = QCheckBox(
Expand Down Expand Up @@ -916,13 +1081,12 @@ def __init__(self, *args, **kwargs):
objectName="action-merge-item",
toolTip="Merge selected items.",
shortcut=QKeySequence(Qt.ControlModifier | Qt.Key_Equal),
shortcutContext=Qt.WidgetShortcut,
enabled=False,
shortcutContext=Qt.WidgetShortcut
)

self.add_new_item.triggered.connect(self._add_category)
self.remove_item.triggered.connect(self._remove_category)
self.merge_items.triggered.connect(self._merge_categories)
self.merge_items.triggered.connect(self._merge_less_common_categories)

button1 = FixedSizeButton(
self, defaultAction=self.move_value_up,
Expand All @@ -945,8 +1109,7 @@ def __init__(self, *args, **kwargs):
accessibleName="Merge",
)
self.values_edit.addActions([self.move_value_up, self.move_value_down,
self.add_new_item, self.remove_item,
self.merge_items])
self.add_new_item, self.remove_item])
hlayout.addWidget(button1)
hlayout.addWidget(button2)
hlayout.addSpacing(3)
Expand All @@ -968,13 +1131,16 @@ def __init__(self, *args, **kwargs):
QWidget.setTabOrder(button2, button3)
QWidget.setTabOrder(button3, button4)

def set_data(self, var, transform=()):
# type: (Optional[Categorical], Sequence[Transform]) -> None
def set_data(self, var, values, transform=()):
# type: (Optional[Categorical], Sequence[Transform], Optional[List]) -> None
"""
Set the variable to edit.
`values` is needed for categorical features to perform grouping.
"""
# pylint: disable=too-many-branches
super().set_data(var, transform)
self._values = values
tr = None # type: Optional[CategoriesMapping]
ordered = None # type: Optional[ChangeOrdered]
for tr_ in transform:
Expand Down Expand Up @@ -1118,7 +1284,6 @@ def on_value_selection_changed(self):
len(rows) > 1 and \
not any(index.data(EditStateRole) != ItemEditState.NoState
for index in rows)
self.merge_items.setEnabled(enable_merge)

if len(rows) == 1:
i = rows[0].row()
Expand Down Expand Up @@ -1182,65 +1347,63 @@ def _add_category(self):
view.edit(index)
self.on_values_changed()

def _merge_categories(self):
def _reset_name_merge(self) -> None:
"""
This function resets renamed and merged variables in the model.
"""
Merge selected categories into one.
view = self.values_edit
model = view.model() # type: QAbstractItemModel
prows = [
QPersistentModelIndex(model.index(i, 0))
for i in range(model.rowCount())
]
with disconnected(model.dataChanged, self.on_values_changed):
for prow in prows:
if prow.isValid():
model.setData(
QModelIndex(prow), prow.data(SourceNameRole),
Qt.EditRole
)
self.variable_changed.emit()

Popup an editable combo box for selection/edit of a new value.
def _merge_less_common_categories(self) -> None:
"""
Merge less common categories into one with the dialog for merge
selection.
"""
view = self.values_edit
model = view.model() # type: QAbstractItemModel
rows = view.selectedIndexes() # type: List[QModelIndex]
if not len(rows) >= 2:
return # pragma: no cover
first_row = rows[0]

def mapRectTo(widget, parent, rect):
# type: (QWidget, QWidget, QRect) -> QRect
return QRect(
widget.mapTo(parent, rect.topLeft()),
rect.size(),
)
selected_attributes = [ind.data() for ind in view.selectedIndexes()]

def mapRectToGlobal(widget, rect):
# type: (QWidget, QRect) -> QRect
return QRect(
widget.mapToGlobal(rect.topLeft()),
rect.size(),
)
view.scrollTo(first_row)
vport = view.viewport()
vrect = view.visualRect(first_row)
vrect = mapRectTo(vport, view, vrect)
vrect = vrect.intersected(vport.geometry())
vrect = mapRectToGlobal(vport, vrect)

cb = QComboBox(editable=True, insertPolicy=QComboBox.InsertAtBottom)
cb.setAttribute(Qt.WA_DeleteOnClose)
sh = QShortcut(QKeySequence(QKeySequence.Cancel), cb)
sh.activated.connect(cb.close)
cb.setParent(self, Qt.Popup)
cb.move(vrect.topLeft())

cb.addItems(
list(unique(str(row.data(Qt.EditRole)) for row in rows)))
prows = [QPersistentModelIndex(row) for row in rows]

def complete_merge(text):
dlg = GroupLessFrequentItemsDialog(
self.var, self._values, selected_attributes, self,
windowTitle="Import Options",
sizeGripEnabled=True,
)
dlg.setWindowModality(Qt.WindowModal)
status = dlg.exec_()
dlg.deleteLater()

prows = [
QPersistentModelIndex(model.index(i, 0))
for i in range(model.rowCount())
]

def complete_merge(text, merge_attributes):
# write the new text for edit role in all rows
self._reset_name_merge()
with disconnected(model.dataChanged, self.on_values_changed):
for prow in prows:
if prow.isValid():
if (prow.isValid()
and prow.data(SourceNameRole) in merge_attributes):
model.setData(QModelIndex(prow), text, Qt.EditRole)
cb.close()
self.variable_changed.emit()

cb.activated[str].connect(complete_merge)
size = cb.sizeHint().expandedTo(vrect.size())
cb.resize(size)
cb.show()
cb.raise_()
cb.setFocus(Qt.PopupFocusReason)
if status == QDialog.Accepted:
complete_merge(
dlg.get_merged_value_name(), dlg.get_merge_attributes()
)

def _set_ordered(self, ordered):
self.ordered_cb.setChecked(ordered)
Expand Down Expand Up @@ -1438,7 +1601,10 @@ def set_data(self, data, transform=()): # pylint: disable=arguments-differ
if index != -1:
w = self.layout().currentWidget()
assert isinstance(w, VariableEditor)
w.set_data(var, transform)
if isinstance(var, Categorical):
w.set_data(var, data.data(), transform=transform)
else:
w.set_data(var, transform)
self.__history[var] = tuple(transform)
cb = w.findChild(QComboBox, "type-combo")
cb.setCurrentIndex(index)
Expand Down
Loading

0 comments on commit 776b432

Please sign in to comment.