Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] OWDataSampler: Data info displayed in the status bar #4492

Merged
merged 1 commit into from
Mar 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions Orange/widgets/data/owdatasampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from Orange.data import Table
from Orange.data.sql.table import SqlTable
from Orange.widgets.utils.widgetpreview import WidgetPreview
from Orange.widgets.utils.state_summary import format_summary_details
from Orange.widgets.widget import Msg, OWWidget, Input, Output
from Orange.util import Reprable

Expand Down Expand Up @@ -179,14 +180,14 @@ def set_data(self, dataset):
self.cb_seed.setVisible(not sql)
self.cb_stratify.setVisible(not sql)
self.cb_sql_dl.setVisible(sql)
self.info.set_input_summary(str(len(dataset)))
self.info.set_input_summary(len(dataset),
format_summary_details(dataset))

if not sql:
self._update_sample_max_size()
self.updateindices()
else:
self.info.set_input_summary(self.info.NoInput)
self.info.set_output_summary(self.info.NoInput)
self.indices = None
self.clear_messages()
self.commit()
Expand Down Expand Up @@ -224,12 +225,16 @@ def commit(self):
remaining, sample = self.indices
elif self.sampling_type == self.CrossValidation:
remaining, sample = self.indices[self.selectedFold - 1]
self.info.set_output_summary(str(len(sample)))

sample = self.data[sample]
other = self.data[remaining]
self.sampled_instances = len(sample)
self.remaining_instances = len(other)

summary = len(sample) if sample else self.info.NoOutput
details = format_summary_details(sample) if sample else ""
self.info.set_output_summary(summary, details)

self.Outputs.data_sample.send(sample)
self.Outputs.remaining_data.send(other)

Expand Down
23 changes: 12 additions & 11 deletions Orange/widgets/data/tests/test_owdatasampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from Orange.data import Table
from Orange.widgets.data.owdatasampler import OWDataSampler
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.utils.state_summary import format_summary_details


class TestOWDataSampler(WidgetTest):
Expand Down Expand Up @@ -38,7 +39,6 @@ def test_stratified_on_unbalanced_data(self):
self.assertTrue(self.widget.Warning.could_not_stratify.is_shown())

def test_bootstrap(self):
output_sum = self.widget.info.set_output_summary = Mock()
self.select_sampling_type(self.widget.Bootstrap)

self.send_signal("Data", self.iris)
Expand All @@ -59,20 +59,16 @@ def test_bootstrap(self):
# high probability (1-(1/150*2/150*...*150/150) ~= 1-2e-64)
self.assertGreater(len(in_sample), 0)
self.assertGreater(len(in_remaining), 0)
#Check if status bar shows correct number of output data
output_sum.assert_called_with(str(len(sample)))

def select_sampling_type(self, sampling_type):
buttons = self.widget.controls.sampling_type.group.buttons()
buttons[sampling_type].click()

def test_no_intersection_in_outputs(self):
""" Check whether outputs intersect and whether length of outputs sums
to length of original data and
if status bar displays correct output for each sampling type"""
to length of original data"""
self.send_signal("Data", self.iris)
w = self.widget
output_sum = self.widget.info.set_output_summary = Mock()
sampling_types = [w.FixedProportion, w.FixedSize, w.CrossValidation]

for replicable in [True, False]:
Expand All @@ -87,7 +83,6 @@ def test_no_intersection_in_outputs(self):
other = self.get_output("Remaining Data")
self.assertEqual(len(self.iris), len(sample) + len(other))
self.assertNoIntersection(sample, other)
output_sum.assert_called_with(str(len(sample)))

def test_bigger_size_with_replacement(self):
"""Allow bigger output without replacement."""
Expand Down Expand Up @@ -126,17 +121,23 @@ def test_shuffling(self):

def test_summary(self):
"""Check if status bar is updated when data is received"""
input_sum = self.widget.info.set_input_summary = Mock()
data = self.iris
input_sum = self.widget.info.set_input_summary = Mock()
output_sum = self.widget.info.set_output_summary = Mock()

input_sum.reset_mock()
self.send_signal(self.widget.Inputs.data, data[:])
input_sum.assert_called_with("150")
self.send_signal(self.widget.Inputs.data, data)
input_sum.assert_called_with(len(data), format_summary_details(data))
output = self.get_output(self.widget.Outputs.data_sample)
output_sum.assert_called_with(len(output),
format_summary_details(output))

input_sum.reset_mock()
output_sum.reset_mock()
self.send_signal(self.widget.Inputs.data, None)
input_sum.assert_called_once()
self.assertEqual(input_sum.call_args[0][0].brief, "")
output_sum.assert_called_once()
self.assertEqual(output_sum.call_args[0][0].brief, "")

def set_fixed_sample_size(self, sample_size, with_replacement=False):
"""Set fixed sample size and return the number of gui spin.
Expand Down