Skip to content

Commit

Permalink
Merge pull request #3177 from thocevar/scatterplot-overlap
Browse files Browse the repository at this point in the history
[ENH] Scatterplot: indicate overlap of points.
  • Loading branch information
astaric authored Aug 24, 2018
2 parents 565c22c + a03e6ac commit d15cf88
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 55 deletions.
19 changes: 8 additions & 11 deletions Orange/widgets/unsupervised/owmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ def update_data(self, attr_x, attr_y, reset_view=True):
self.plot_widget.setAspectLocked(True, 1)

def compute_sizes(self):
"""Handle 'Stress' size option.
Everything else is passed to Scatterplot's compute_sizes"""

if self.attr_size != "Stress":
return super().compute_sizes()

def scale(a):
dmin, dmax = np.nanmin(a), np.nanmax(a)
if dmax - dmin > 0:
Expand All @@ -64,17 +70,8 @@ def scale(a):
return np.zeros_like(a)

self.master.Information.missing_size.clear()
if self.attr_size is None:
size_data = np.full((self.n_points,), self.point_width,
dtype=float)
elif self.attr_size == "Stress":
size_data = scale(stress(self.master.embedding, self.master.effective_matrix))
size_data = self.MinShapeSize + size_data * self.point_width
else:
size_data = \
self.MinShapeSize + \
self.scaled_data.get_column_view(self.attr_size)[0][self.valid_data] * \
self.point_width
size_data = scale(stress(self.master.embedding, self.master.effective_matrix))
size_data = self.MinShapeSize + size_data * self.point_width
nans = np.isnan(size_data)
if np.any(nans):
size_data[nans] = self.MinShapeSize - 2
Expand Down
3 changes: 3 additions & 0 deletions Orange/widgets/utils/plot/owplotgui.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,11 +465,14 @@ def __init__(self, plot):
self.shape_model = DomainModel(placeholder="(Same shape)",
valid_types=DiscreteVariable)
self.size_model = DomainModel(placeholder="(Same size)",
order=(self.SizeByOverlap,) + DomainModel.SEPARATED,
valid_types=ContinuousVariable)
self.label_model = DomainModel(placeholder="(No labels)")
self.points_models = [self.color_model, self.shape_model,
self.size_model, self.label_model]

SizeByOverlap = "Overlap"

Spacing = 0

ShowLegend = 2
Expand Down
52 changes: 38 additions & 14 deletions Orange/widgets/visualize/owscatterplotgraph.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from collections import Counter
from collections import Counter, defaultdict
import sys
import itertools
from xml.sax.saxutils import escape
from math import log10, floor, ceil
from math import log2, log10, floor, ceil

import numpy as np
from scipy.stats import linregress
Expand Down Expand Up @@ -691,13 +691,13 @@ def update_data(self, attr_x, attr_y, reset_view=True):
self.shown_x.name, self.shown_y.name)
return

x_data, y_data = self.get_xy_data_positions(
self.x_data, self.y_data = self.get_xy_data_positions(
attr_x, attr_y, self.valid_data)
self.n_points = len(x_data)
self.n_points = len(self.x_data)

if reset_view:
min_x, max_x = np.nanmin(x_data), np.nanmax(x_data)
min_y, max_y = np.nanmin(y_data), np.nanmax(y_data)
min_x, max_x = np.nanmin(self.x_data), np.nanmax(self.x_data)
min_y, max_y = np.nanmin(self.y_data), np.nanmax(self.y_data)
self.view_box.setRange(
QRectF(min_x, min_y, max_x - min_x, max_y - min_y),
padding=0.025)
Expand All @@ -712,6 +712,15 @@ def update_data(self, attr_x, attr_y, reset_view=True):
else:
self.set_labels(axis, None)

# compute overlaps of points for use in compute_colors and compute_sizes
self.overlaps = []
self.coord_to_id = defaultdict(list)
for i, xy in enumerate(zip(self.x_data, self.y_data)):
self.coord_to_id[xy].append(i)
self.overlaps = [len(self.coord_to_id[xy])
for i, xy in enumerate(zip(self.x_data, self.y_data))]
self.overlap_factor = [1+log2(o) for o in self.overlaps]

color_data, brush_data = self.compute_colors()
color_data_sel, brush_data_sel = self.compute_colors_sel()
size_data = self.compute_sizes()
Expand All @@ -721,7 +730,7 @@ def update_data(self, attr_x, attr_y, reset_view=True):
rgb_data = [pen.color().getRgb()[:3] for pen in color_data]
self.density_img = classdensity.class_density_image(
min_x, max_x, min_y, max_y, self.resolution,
x_data, y_data, rgb_data)
self.x_data, self.y_data, rgb_data)
self.plot_widget.addItem(self.density_img)

self.data_indices = np.flatnonzero(self.valid_data)
Expand All @@ -730,11 +739,11 @@ def update_data(self, attr_x, attr_y, reset_view=True):
self.shown_x.name, self.shown_y.name)

self.scatterplot_item = ScatterPlotItem(
x=x_data, y=y_data, data=self.data_indices,
x=self.x_data, y=self.y_data, data=self.data_indices,
symbol=shape_data, size=size_data, pen=color_data, brush=brush_data
)
self.scatterplot_item_sel = ScatterPlotItem(
x=x_data, y=y_data, data=self.data_indices,
x=self.x_data, y=self.y_data, data=self.data_indices,
symbol=shape_data, size=size_data + SELECTION_WIDTH,
pen=color_data_sel, brush=brush_data_sel
)
Expand Down Expand Up @@ -803,7 +812,7 @@ def set_axis_title(self, axis, title):

def compute_sizes(self):
self.master.Information.missing_size.clear()
if self.attr_size is None:
if self.attr_size in [None, OWPlotGUI.SizeByOverlap]:
size_data = np.full((self.n_points,), self.point_width,
dtype=float)
else:
Expand All @@ -815,6 +824,11 @@ def compute_sizes(self):
if np.any(nans):
size_data[nans] = self.MinShapeSize - 2
self.master.Information.missing_size(self.attr_size)

# scale sizes because of overlaps
if self.attr_size == OWPlotGUI.SizeByOverlap:
size_data = np.multiply(size_data, self.overlap_factor)

return size_data

def update_sizes(self):
Expand Down Expand Up @@ -947,16 +961,26 @@ def compute_colors(self, keep_colors=False):
c_data = c_data.astype(int)
colors = np.r_[palette.getRGB(np.arange(n_colors)),
[[128, 128, 128]]]
pens = np.array(
pen_colors_palette = np.array(
[_make_pen(QColor(*col).darker(self.DarkerValue), 1.5)
for col in colors])
self.pen_colors = pens[c_data]
self.pen_colors = pen_colors_palette[c_data]
alpha = self.alpha_value if subset is None else 255
self.brush_colors = np.array([
brush_colors_palette = np.array([
[QBrush(QColor(0, 0, 0, 0)),
QBrush(QColor(col[0], col[1], col[2], alpha))]
for col in colors])
self.brush_colors = self.brush_colors[c_data]
self.brush_colors = brush_colors_palette[c_data]

if self.attr_size == OWPlotGUI.SizeByOverlap:
# color overlapping points by most frequent color
for i, xy in enumerate(zip(self.x_data, self.y_data)):
if self.overlaps[i] > 1:
cnt = Counter(c_data[j] for j in self.coord_to_id[xy])
c = cnt.most_common(1)[0][0]
self.brush_colors[i] = brush_colors_palette[c]
self.pen_colors[i] = pen_colors_palette[c]

if subset is not None:
brush = np.where(
subset,
Expand Down
8 changes: 0 additions & 8 deletions Orange/widgets/visualize/tests/test_owfreeviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@ def setUpClass(cls):
def setUp(self):
self.widget = self.create_widget(OWFreeViz)

def test_points_combo_boxes(self):
self.send_signal(self.widget.Inputs.data, self.heart_disease)
graph = self.widget.controls.graph
self.assertEqual(len(graph.attr_color.model()), 17)
self.assertEqual(len(graph.attr_shape.model()), 11)
self.assertEqual(len(graph.attr_size.model()), 8)
self.assertEqual(len(graph.attr_label.model()), 17)

def test_ugly_datasets(self):
self.send_signal(self.widget.Inputs.data, Table(datasets.path("testing_dataset_cls")))
self.send_signal(self.widget.Inputs.data, Table(datasets.path("testing_dataset_reg")))
Expand Down
8 changes: 0 additions & 8 deletions Orange/widgets/visualize/tests/test_owlinearprojection.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,6 @@ def test_nan_plot(self):
with excepthook_catch():
simulate.combobox_activate_item(cb.attr_size, "X1")

def test_points_combo_boxes(self):
self.send_signal("Data", self.data)
graph = self.widget.controls.graph
self.assertEqual(len(graph.attr_color.model()), 8)
self.assertEqual(len(graph.attr_shape.model()), 3)
self.assertEqual(len(graph.attr_size.model()), 6)
self.assertEqual(len(graph.attr_label.model()), 8)

def test_buttons(self):
for btn in self.widget.radio_placement.buttons[:3]:
self.send_signal(self.widget.Inputs.data, self.data)
Expand Down
8 changes: 0 additions & 8 deletions Orange/widgets/visualize/tests/test_owradviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,6 @@ def setUpClass(cls):
def setUp(self):
self.widget = self.create_widget(OWRadviz)

def test_points_combo_boxes(self):
self.send_signal(self.widget.Inputs.data, self.heart_disease)
graph = self.widget.controls.graph
self.assertEqual(len(graph.attr_color.model()), 17)
self.assertEqual(len(graph.attr_shape.model()), 11)
self.assertEqual(len(graph.attr_size.model()), 8)
self.assertEqual(len(graph.attr_label.model()), 17)

def test_ugly_datasets(self):
self.send_signal(self.widget.Inputs.data, Table(datasets.path("testing_dataset_cls")))
self.send_signal(self.widget.Inputs.data, Table(datasets.path("testing_dataset_reg")))
Expand Down
29 changes: 24 additions & 5 deletions Orange/widgets/visualize/tests/test_owscatterplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from AnyQt.QtWidgets import QToolTip

from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable
from Orange.widgets.utils.plot import OWPlotGUI
from Orange.widgets.visualize.owscatterplotgraph import MAX
from Orange.widgets.widget import AttributeList
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin, datasets
Expand Down Expand Up @@ -154,15 +155,33 @@ def test_regression_line(self):
def test_points_combo_boxes(self):
"""Check Point box combo models and values"""
self.send_signal(self.widget.Inputs.data, self.data)
self.assertEqual(len(self.widget.controls.graph.attr_color.model()), 8)
self.assertEqual(len(self.widget.controls.graph.attr_shape.model()), 3)
self.assertEqual(len(self.widget.controls.graph.attr_size.model()), 6)
self.assertEqual(len(self.widget.controls.graph.attr_label.model()), 8)
graph = self.widget.controls.graph

# color and label should contain all variables
# size should contain only continuous variables
# shape should contain only discrete variables
for var in self.data.domain.variables + self.data.domain.metas:
self.assertIn(var, graph.attr_color.model())
self.assertIn(var, graph.attr_label.model())
if var.is_continuous:
self.assertIn(var, graph.attr_size.model())
self.assertNotIn(var, graph.attr_shape.model())
if var.is_discrete:
self.assertNotIn(var, graph.attr_size.model())
self.assertIn(var, graph.attr_shape.model())

other_widget = self.create_widget(OWScatterPlot)
self.send_signal(self.widget.Inputs.data, self.data, widget=other_widget)
self.assertEqual(self.widget.graph.controls.attr_color.currentText(),
self.assertEqual(graph.attr_color.currentText(),
self.data.domain.class_var.name)

def test_overlap(self):
self.send_signal(self.widget.Inputs.data, Table("iris"))
self.assertEqual(len(set(self.widget.graph.compute_sizes())), 1)
simulate.combobox_activate_item(self.widget.controls.graph.attr_size,
OWPlotGUI.SizeByOverlap)
self.assertGreater(len(set(self.widget.graph.compute_sizes())), 1)

def test_group_selections(self):
self.send_signal(self.widget.Inputs.data, self.data)
graph = self.widget.graph
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ the left side of the widget. A snapshot below shows the scatterplot of the
2. Set the color of the displayed points (you will get colors for discrete
values and grey-scale points for continuous). Set label, shape and
size to differentiate between points. Set symbol size and opacity for
all data points. Set the desired colors scale.
all data points. Set the desired colors scale. To visualize the number
of overlapping points use *Overlap* for size.
3. Adjust *plot properties*:

- *Show legend* displays a legend on the right. Click and drag the legend to move it.
Expand Down

0 comments on commit d15cf88

Please sign in to comment.