Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Tidy] Prepare for dynamic filters, part 1 of 2 #850

Merged
merged 14 commits into from
Nov 7, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<!--
A new scriv changelog fragment.

Uncomment the section that is right (remove the HTML comment wrapper).
-->

<!--
### Highlights ✨

- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))

-->
<!--
### Removed

- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))

-->
<!--
### Added

- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))

-->
<!--
### Changed

- A bullet item for the Changed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))

-->
<!--
### Deprecated

- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))

-->
<!--
### Fixed

- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))

-->
<!--
### Security

- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))

-->
100 changes: 51 additions & 49 deletions vizro-core/src/vizro/models/_controls/filter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import numpy as np
from typing import Literal, Union

import pandas as pd
Expand Down Expand Up @@ -86,6 +87,7 @@ class Filter(VizroBaseModel):
)
selector: SelectorType = None
_column_type: Literal["numerical", "categorical", "temporal"] = PrivateAttr()
_data_frames = PrivateAttr()

@validator("targets", each_item=True)
def check_target_present(cls, target):
Expand All @@ -108,31 +110,57 @@ def build(self):
return self.selector.build()

def _set_targets(self):
if not self.targets:
for component_id in model_manager._get_page_model_ids_with_figure(
page_id=model_manager._get_model_page_id(model_id=ModelID(str(self.id)))
):
# TODO: consider making a helper method in data_manager or elsewhere to reduce this operation being
# duplicated across Filter so much, and/or consider storing the result to avoid repeating it.
# Need to think about this in connection with how to update filters on the fly and duplicated calls
# issue outlined in https://github.com/mckinsey/vizro/pull/398#discussion_r1559120849.
data_source_name = model_manager[component_id]["data_frame"]
data_frame = data_manager[data_source_name].load()
if self.column in data_frame.columns:
self.targets.append(component_id)
if not self.targets:
raise ValueError(f"Selected column {self.column} not found in any dataframe on this page.")
# TODO: consider moving to data_manager, depending on Petar's work
# TODO: write tests
potential_targets = self.targets or model_manager._get_page_model_ids_with_figure(
page_id=model_manager._get_model_page_id(model_id=ModelID(str(self.id)))
)

potential_target_to_data_source_name = {
target: model_manager[target]["data_frame"] for target in potential_targets
}
# Using set() here ensures we only load each data source once rather than repeating the operation for each
# target.
data_source_name_to_data = {
data_source_name: data_manager[data_source_name].load()
antonymilne marked this conversation as resolved.
Show resolved Hide resolved
for data_source_name in set(potential_target_to_data_source_name.values())
}
target_to_series = dict()

for target, data_source_name in potential_target_to_data_source_name.items():
data_frame = data_source_name_to_data[data_source_name]

if self.column in data_frame.columns:
target_to_series[target] = data_frame
elif self.targets:
# targets were manually specified so it's not ok the column isn't there. If targets were not specified
# then it's fine, we just skip this target, and error is not raised.
raise ValueError(f"Selected column {self.column} not found in dataframe for {target}.")

if not target_to_series:
raise ValueError(f"Selected column {self.column} not found in any dataframe on this page.")

self.targets = list(target_to_series)
# COMMEnt. will have repeats
self._targeted_data = pd.DataFrame.from_dict(target_to_series)
antonymilne marked this conversation as resolved.
Show resolved Hide resolved

def _set_column_type(self):
data_source_name = model_manager[self.targets[0]]["data_frame"]
data_frame = data_manager[data_source_name].load()
# TODO: check
is_numerical = self._targeted_data.apply(is_numeric_dtype)
is_temporal = self._targeted_data.apply(is_datetime64_any_dtype)
is_categorical = ~is_numerical & ~is_temporal

if is_numeric_dtype(data_frame[self.column]):
if is_numerical.all():
self._column_type = "numerical"
elif is_datetime64_any_dtype(data_frame[self.column]):
elif is_temporal.all():
self._column_type = "temporal"
else:
elif is_categorical.all():
self._column_type = "categorical"
else:
raise ValueError(
f"Inconsistent types detected in the shared data column {self.column}. This column must "
"have the same type for all targets."
)

def _set_selector(self):
self.selector = self.selector or SELECTORS[self._column_type][0]()
Expand All @@ -149,40 +177,14 @@ def _set_numerical_and_temporal_selectors_values(self):
# If the selector is a numerical or temporal selector, and the min and max values are not set, then set them
# N.B. All custom selectors inherit from numerical or temporal selector should also pass this check
if isinstance(self.selector, SELECTORS["numerical"] + SELECTORS["temporal"]):
min_values = []
max_values = []
for target_id in self.targets:
data_source_name = model_manager[target_id]["data_frame"]
data_frame = data_manager[data_source_name].load()
min_values.append(data_frame[self.column].min())
max_values.append(data_frame[self.column].max())

if not (
(is_numeric_dtype(pd.Series(min_values)) and is_numeric_dtype(pd.Series(max_values)))
or (is_datetime64_any_dtype(pd.Series(min_values)) and is_datetime64_any_dtype(pd.Series(max_values)))
):
raise ValueError(
f"Inconsistent types detected in the shared data column '{self.column}' for targeted charts "
f"{self.targets}. Please ensure that the data column contains the same data type across all "
f"targeted charts."
)

if self.selector.min is None:
self.selector.min = min(min_values)
if self.selector.max is None:
self.selector.max = max(max_values)
self.selector.min = self.selector.min or self._targeted_data.to_numpy().min()
self.selector.max = self.selector.max or self._targeted_data.to_numpy().max()

def _set_categorical_selectors_options(self):
# If the selector is a categorical selector, and the options are not set, then set them
# N.B. All custom selectors inherit from categorical selector should also pass this check
if isinstance(self.selector, SELECTORS["categorical"]) and not self.selector.options:
options = set()
for target_id in self.targets:
data_source_name = model_manager[target_id]["data_frame"]
data_frame = data_manager[data_source_name].load()
options |= set(data_frame[self.column])

self.selector.options = sorted(options)
if isinstance(self.selector, SELECTORS["categorical"]):
self.selector.options = self.selector.options or sorted(np.unique(self._targeted_data.to_numpy()))

def _set_actions(self):
if not self.selector.actions:
Expand Down