Skip to content

Commit

Permalink
Merge pull request #6920 from janezd/datasets-add-domain
Browse files Browse the repository at this point in the history
[ENH] Datasets: Add domain field; respect "Unlisted"
  • Loading branch information
markotoplak authored Nov 11, 2024
2 parents 871b406 + 300a753 commit 747b3b2
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 53 deletions.
139 changes: 118 additions & 21 deletions Orange/widgets/data/owdatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@

log = logging.getLogger(__name__)

# These two constants are used in settings (and in the proxy filter model).
# The corresponding options in the combo box are translatable, therefore
# the settings must be stored in language-independent form.
GENERAL_DOMAIN = None
ALL_DOMAINS = "" # The setting is Optional[str], so don't use other types here


def ensure_local(index_url, file_path, local_cache_path,
force=False, progress_advance=None):
Expand Down Expand Up @@ -102,6 +108,8 @@ class UniformHeightIndicatorDelegate(


class Namespace(SimpleNamespace):
PUBLISHED, UNLISTED = range(2)

def __init__(self, **kwargs):
self.file_path = None
self.prefix = None
Expand All @@ -122,8 +130,10 @@ def __init__(self, **kwargs):
self.seealso = []
self.tags = []
self.language = "English"
self.domain = None
self.publication_status = self.PUBLISHED

super(Namespace, self).__init__(**kwargs)
super().__init__(**kwargs)

# if title missing, use filename
if not self.title and self.filename:
Expand All @@ -144,6 +154,12 @@ class SortFilterProxyWithLanguage(QSortFilterProxyModel):
def __init__(self):
super().__init__()
self.__language = None
self.__domain = None
self.__filter = None

def setFilterFixedString(self, pattern):
self.__filter = pattern and pattern.casefold()
super().setFilterFixedString(pattern)

def setLanguage(self, language):
self.__language = language
Expand All @@ -152,11 +168,24 @@ def setLanguage(self, language):
def language(self):
return self.__language

def setDomain(self, domain):
self.__domain = domain
self.invalidateFilter()

def domain(self):
return self.__domain

def filterAcceptsRow(self, row, parent):
source = self.sourceModel()
return super().filterAcceptsRow(row, parent) and (
self.__language is None
or source.index(row, 0).data(Qt.UserRole).language == self.__language
data = source.index(row, 0).data(Qt.UserRole)
return (super().filterAcceptsRow(row, parent)
and (self.__language is None or data.language == self.__language)
and self.__domain in (ALL_DOMAINS, data.domain)
and (data.publication_status == Namespace.PUBLISHED or (
self.__filter is not None
and len(self.__filter) >= 5
and data.title.casefold().startswith(self.__filter)
))
)


Expand All @@ -178,6 +207,10 @@ class OWDataSets(OWWidget):
DEFAULT_LANG = "English"
ALL_LANGUAGES = "All Languages"

# These two combo options are translatable; others (domain names) are not
GENERAL_DOMAIN_LABEL = "(General)"
ALL_DOMAINS_LABEL = "(Show all)"

# override HEADER_SCHEMA to define new columns
# if schema is changed override methods: self.assign_delegates and
# self.create_model
Expand Down Expand Up @@ -206,6 +239,8 @@ class Outputs:
#: Selected dataset id
selected_id = Setting(None) # type: Optional[str]
language = Setting(DEFAULT_LANG)
domain = Setting(GENERAL_DOMAIN)
settings_version = 2

#: main area splitter state
splitter_state = Setting(b'') # type: bytes
Expand Down Expand Up @@ -235,6 +270,7 @@ def __init__(self):
textChanged=self.filter, placeholderText="Search for data set ..."
)
layout.addWidget(self.filterLineEdit)

layout.addSpacing(20)
layout.addWidget(QLabel("Show data sets in "))
lang_combo = self.language_combo = QComboBox()
Expand All @@ -245,6 +281,15 @@ def __init__(self):
lang_combo.setCurrentText(self.language)
lang_combo.activated.connect(self._on_language_changed)
layout.addWidget(lang_combo)

layout.addSpacing(20)
layout.addWidget(QLabel("Domain:"))
domain_combo = self.domain_combo = QComboBox()
domain_combo.addItem(self.GENERAL_DOMAIN_LABEL)
domain_combo.activated.connect(self._on_domain_changed)
if self.core_widget:
layout.addWidget(domain_combo)

self.mainArea.layout().addLayout(layout)

self.splitter = QSplitter(orientation=Qt.Vertical)
Expand Down Expand Up @@ -290,6 +335,9 @@ def __init__(self):
proxy.setFilterKeyColumn(-1)
proxy.setFilterCaseSensitivity(Qt.CaseInsensitive)
self.view.setModel(proxy)
if not self.core_widget:
self.domain = ALL_DOMAINS
self.view.model().setDomain(self.domain)

if self.splitter_state:
self.splitter.restoreState(self.splitter_state)
Expand All @@ -306,6 +354,18 @@ def __init__(self):

self._on_language_changed()

# Single cell add-on has a data set widget that derives from this one
# although this class isn't defined as open. Adding the domain broke
# single-cell. A proper solution would be to split this widget into an
# (open) base class and a closed widget that adds the domain functionality.
# Yet, simply excluding three chunks of code makes this code simpler
# - which is better, if we assume that extending this widget is an anomaly.
@property
def core_widget(self):
# Compare by names; unit tests wrap widget classes in to detect
# missing onDeleteWidget calls
return type(self).__name__ == OWDataSets.__name__

def assign_delegates(self):
# NOTE: All columns must have size hinting delegates.
# QTreeView queries only the columns displayed in the viewport so
Expand Down Expand Up @@ -353,6 +413,7 @@ def _parse_info(self, file_path):

def create_model(self):
self.update_language_combo()
self.update_domain_combo()
return self.update_model()

def update_language_combo(self):
Expand All @@ -374,6 +435,15 @@ def update_language_combo(self):
else:
combo.setCurrentText(self.ALL_LANGUAGES)

def update_domain_combo(self):
combo = self.domain_combo
allkeys = set(self.allinfo_local) | set(self.allinfo_remote)
domains = {self._parse_info(key).domain for key in allkeys}
domains -= {None, "sc"}
if domains:
combo.addItems(sorted(domains))
combo.addItem(self.ALL_DOMAINS_LABEL)

def update_model(self):
allkeys = set(self.allinfo_local) | set(self.allinfo_remote)
allkeys = sorted(allkeys)
Expand Down Expand Up @@ -410,8 +480,22 @@ def update_model(self):
model.appendRow(row)

# for settings do not use os.path.join (Windows separator is different)
if "/".join(file_path) == self.selected_id:
if file_path[-1] == self.selected_id:
current_index = i
# for selected_id, set publication status so that unlisted data load correctly
datainfo.publication_status = Namespace.PUBLISHED
if self.core_widget:
self.domain = datainfo.domain
if self.domain == "sc": # domain from the list of ignored domain
self.domain = ALL_DOMAINS
combo = self.domain_combo
if self.domain == GENERAL_DOMAIN:
combo.setCurrentIndex(0)
elif self.domain == ALL_DOMAINS:
combo.setCurrentIndex(combo.count() - 1)
else:
combo.setCurrentText(self.domain)
self._on_domain_changed()

return model, current_index

Expand All @@ -423,6 +507,16 @@ def _on_language_changed(self):
self.language = combo.currentText()
self.view.model().setLanguage(self.language)

def _on_domain_changed(self):
combo = self.domain_combo
if combo.currentIndex() == 0:
self.domain = GENERAL_DOMAIN
elif combo.currentIndex() == combo.count() - 1:
self.domain = ALL_DOMAINS
else:
self.domain = combo.currentText()
self.view.model().setDomain(self.domain)

@Slot(object)
def __set_index(self, f):
# type: (Future) -> None
Expand Down Expand Up @@ -527,7 +621,9 @@ def __on_selection(self):
text = description_html(di)
self.descriptionlabel.setText(text)
# for settings do not use os.path.join (Windows separator is different)
self.selected_id = "/".join(di.file_path)
self.selected_id = di.file_path[-1]
# do not clear a dataset once you select it if it was unlisted
di.publication_status = Namespace.PUBLISHED
else:
self.descriptionlabel.setText("")
self.selected_id = None
Expand Down Expand Up @@ -558,8 +654,7 @@ def commit(self):
self.__awaiting_state = None

if not di.islocal:
pr = progress()
callback = lambda pr=pr: pr.advance.emit()
pr = Progress()
pr.advance.connect(self.__progress_advance, Qt.QueuedConnection)

self.progressBarInit()
Expand All @@ -569,7 +664,7 @@ def commit(self):
f = self._executor.submit(
ensure_local, self.INDEX_URL, di.file_path,
self.local_cache_path, force=di.outdated,
progress_advance=callback)
progress_advance=pr.advance.emit)
w = FutureWatcher(f, parent=self)
w.done.connect(self.__commit_complete)
self.__awaiting_state = _FetchState(f, w, pr)
Expand Down Expand Up @@ -620,8 +715,7 @@ def onDeleteWidget(self):
self.__awaiting_state.pb.advance.disconnect(self.__progress_advance)
self.__awaiting_state = None

@staticmethod
def sizeHint():
def sizeHint(self):
return QSize(1100, 500)

def closeEvent(self, event):
Expand All @@ -644,10 +738,13 @@ def load_data(path):
return Orange.data.Table(path)

@classmethod
def migrate_settings(cls, settings, _):
# until including 3.36.0 selected dataset was saved with \ on Windows
if "selected_id" in settings and isinstance(settings["selected_id"], str):
settings["selected_id"] = settings["selected_id"].replace("\\", "/")
def migrate_settings(cls, settings, version: Optional[int] = None):
selected_id = settings.get("selected_id")
if isinstance(selected_id, str):
# until including 3.36.0 selected dataset was saved with \ on Windows
selected_id = selected_id.replace("\\", "/")
if version is None or version < 2:
settings["selected_id"] = selected_id.split("/")[-1]


class FutureWatcher(QObject):
Expand All @@ -666,7 +763,7 @@ def __on_done(self, f):
self.done.emit(self.__future)


class progress(QObject):
class Progress(QObject):
advance = Signal()


Expand All @@ -692,7 +789,7 @@ def make_html_list(items):
style = '"margin: 5px; text-indent: -40px; margin-left: 40px;"'

def format_item(i):
return '<p style={}><small>{}</small></p>'.format(style, i)
return f'<p style={style}><small>{i}</small></p>'

return '\n'.join([format_item(i) for i in items])

Expand All @@ -703,11 +800,11 @@ def description_html(datainfo):
Summarize a data info as a html fragment.
"""
html = []
year = " ({})".format(str(datainfo.year)) if datainfo.year else ""
source = ", from {}".format(datainfo.source) if datainfo.source else ""
year = f" ({datainfo.year})" if datainfo.year else ""
source = f", from {datainfo.source}" if datainfo.source else ""

html.append("<b>{}</b>{}{}".format(escape(datainfo.title), year, source))
html.append("<p>{}</p>".format(datainfo.description))
html.append(f"<b>{escape(datainfo.title)}</b>{year}{source}")
html.append(f"<p>{datainfo.description}</p>")
seealso = make_html_list(datainfo.seealso)
if seealso:
html.append("<small><b>See Also</b>\n" + seealso + "</small>")
Expand Down
Loading

0 comments on commit 747b3b2

Please sign in to comment.