Skip to content

Commit

Permalink
[Tidy] Prepare for dynamic filters, part 2 of 2 (#857)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
antonymilne and pre-commit-ci[bot] authored Nov 11, 2024
1 parent 02e6419 commit c442686
Show file tree
Hide file tree
Showing 12 changed files with 467 additions and 181 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<!--
A new scriv changelog fragment.
Uncomment the section that is right (remove the HTML comment wrapper).
-->

<!--
### Highlights ✨
- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Removed
- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Added
- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Changed
- A bullet item for the Changed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Deprecated
- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Fixed
- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Security
- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<!--
A new scriv changelog fragment.
Uncomment the section that is right (remove the HTML comment wrapper).
-->

<!--
### Highlights ✨
- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Removed
- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Added
- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->

### Changed

- Improve performance of data loading. ([#850](https://github.com/mckinsey/vizro/pull/850), [#857](https://github.com/mckinsey/vizro/pull/857))

<!--
### Deprecated
- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Fixed
- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
<!--
### Security
- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
-->
208 changes: 130 additions & 78 deletions vizro-core/src/vizro/actions/_actions_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

from __future__ import annotations

from collections import defaultdict
from copy import deepcopy
from typing import TYPE_CHECKING, Any, Literal, Optional, TypedDict, Union

import pandas as pd

from vizro._constants import ALL_OPTION, NONE_OPTION
from vizro.managers import data_manager, model_manager
from vizro.managers._data_manager import DataSourceName
from vizro.managers._model_manager import ModelID
from vizro.models.types import MultiValueType, SelectorType, SingleValueType

Expand All @@ -23,7 +23,7 @@ class CallbackTriggerDict(TypedDict):
"""Represent dash.ctx.args_grouping item. Shortened as 'ctd' in the code.
Args:
id: The component ID. If it`s a pattern matching ID, it will be a dict.
id: The component ID. If it's a pattern matching ID, it will be a dict.
property: The component property used in the callback.
value: The value of the component property at the time the callback was fired.
str_id: For pattern matching IDs, it's the stringified dict ID without white spaces.
Expand All @@ -47,7 +47,18 @@ def _get_component_actions(component) -> list[Action]:
)


def _apply_filters(data_frame: pd.DataFrame, ctds_filters: list[CallbackTriggerDict], target: str) -> pd.DataFrame:
def _apply_filter_controls(
data_frame: pd.DataFrame, ctds_filters: list[CallbackTriggerDict], target: ModelID
) -> pd.DataFrame:
"""Applies filters from a vm.Filter model in the controls.
Args:
data_frame: unfiltered DataFrame.
ctds_filters: list of CallbackTriggerDict for filters.
target: id of targeted Figure.
Returns: filtered DataFrame.
"""
for ctd in ctds_filters:
selector_value = ctd["value"]
selector_value = selector_value if isinstance(selector_value, list) else [selector_value]
Expand Down Expand Up @@ -84,8 +95,19 @@ def _get_parent_vizro_model(_underlying_callable_object_id: str) -> VizroBaseMod


def _apply_filter_interaction(
data_frame: pd.DataFrame, ctds_filter_interaction: list[dict[str, CallbackTriggerDict]], target: str
data_frame: pd.DataFrame, ctds_filter_interaction: list[dict[str, CallbackTriggerDict]], target: ModelID
) -> pd.DataFrame:
"""Applies filters from a filter_interaction.
This will be removed in future when filter interactions are implemented using controls.
Args:
data_frame: unfiltered DataFrame.
ctds_filter_interaction: structure containing CallbackTriggerDict for filter interactions.
target: id of targeted Figure.
Returns: filtered DataFrame.
"""
for ctd_filter_interaction in ctds_filter_interaction:
triggered_model = model_manager[ctd_filter_interaction["modelID"]["id"]]
data_frame = triggered_model._filter_interaction(
Expand All @@ -105,120 +127,150 @@ def _validate_selector_value_none(value: Union[SingleValueType, MultiValueType])
return value


def _create_target_arg_mapping(dot_separated_strings: list[str]) -> dict[str, list[str]]:
results = defaultdict(list)
for string in dot_separated_strings:
if "." not in string:
raise ValueError(f"Provided string {string} must contain a '.'")
component, arg = string.split(".", 1)
results[component].append(arg)
return results
def _get_target_dot_separated_strings(dot_separated_strings: list[str], target: ModelID, data_frame: bool) -> list[str]:
"""Filters list of dot separated strings to get just those relevant for a single target.
Args:
dot_separated_strings: list of dot separated strings that can be targeted by a vm.Parameter,
e.g. ["target_name.data_frame.arg", "target_name.x"]
target: id of targeted Figure.
data_frame: whether to return only DataFrame parameters starting "data_frame." or only non-DataFrame parameters.
def _update_nested_graph_properties(
graph_config: dict[str, Any], dot_separated_string: str, value: Any
Returns:
List of dot separated strings for target.
"""
result = []

for dot_separated_string_with_target in dot_separated_strings:
if dot_separated_string_with_target.startswith(f"{target}."):
dot_separated_string = dot_separated_string_with_target.removeprefix(f"{target}.")
# We only want data_frame parameters when data_frame = True.
if dot_separated_string.startswith("data_frame.") == data_frame:
result.append(dot_separated_string)
return result


def _update_nested_figure_properties(
figure_config: dict[str, Any], dot_separated_string: str, value: Any
) -> dict[str, Any]:
keys = dot_separated_string.split(".")
current_property = graph_config
current_property = figure_config

for key in keys[:-1]:
current_property = current_property.setdefault(key, {})

current_property[keys[-1]] = value
return graph_config
return figure_config


def _get_parametrized_config(
ctd_parameters: list[CallbackTriggerDict], target: ModelID, data_frame: bool
) -> dict[str, Any]:
"""Convert parameters into a keyword-argument dictionary.
def _get_parametrized_config(target: ModelID, ctd_parameters: list[CallbackTriggerDict]) -> dict[str, Any]:
# TODO - avoid calling _captured_callable. Once we have done this we can remove _arguments from
# CapturedCallable entirely.
config = deepcopy(model_manager[target].figure._arguments)
Args:
ctd_parameters: list of CallbackTriggerDicts for vm.Parameter.
target: id of targeted figure.
data_frame: whether to return only DataFrame parameters starting "data_frame." or only non-DataFrame parameters.
# It's not possible to address nested argument of data_frame like data_frame.x.y, just top-level ones like
# data_frame.x.
config["data_frame"] = {}
Returns: keyword-argument dictionary.
"""
if data_frame:
# This entry is inserted (but will always be empty) even for static data so that the load/_multi_load calls
# look identical for dynamic data with no arguments and static data. Note it's not possible to address nested
# argument of data_frame like data_frame.x.y, just top-level ones like data_frame.x.
config: dict[str, Any] = {"data_frame": {}}
else:
# TODO - avoid calling _captured_callable. Once we have done this we can remove _arguments from
# CapturedCallable entirely. This might mean not being able to address nested parameters.
config = deepcopy(model_manager[target].figure._arguments)
del config["data_frame"]

for ctd in ctd_parameters:
# TODO: needs to be refactored so that it is independent of implementation details
selector_value = ctd["value"]
parameter_value = ctd["value"]

if hasattr(selector_value, "__iter__") and ALL_OPTION in selector_value: # type: ignore[operator]
selector: SelectorType = model_manager[ctd["id"]]

# Even if options are provided as list[dict], the Dash component only returns a list of values.
selector: SelectorType = model_manager[ctd["id"]]
if hasattr(parameter_value, "__iter__") and ALL_OPTION in parameter_value: # type: ignore[operator]
# Even if an option is provided as list[dict], the Dash component only returns a list of values.
# So we need to ensure that we always return a list only as well to provide consistent types.
if all(isinstance(option, dict) for option in selector.options):
selector_value = [option["value"] for option in selector.options]
else:
selector_value = selector.options
parameter_value = [option["value"] if isinstance(option, dict) else option for option in selector.options]

selector_value = _validate_selector_value_none(selector_value)
selector_actions = _get_component_actions(model_manager[ctd["id"]])
parameter_value = _validate_selector_value_none(parameter_value)

for action in selector_actions:
for action in _get_component_actions(selector):
if action.function._function.__name__ != "_parameter":
continue

action_targets = _create_target_arg_mapping(action.function["targets"])

if target not in action_targets:
continue

for action_targets_arg in action_targets[target]:
config = _update_nested_graph_properties(
graph_config=config, dot_separated_string=action_targets_arg, value=selector_value
for dot_separated_string in _get_target_dot_separated_strings(
action.function["targets"], target, data_frame
):
config = _update_nested_figure_properties(
figure_config=config, dot_separated_string=dot_separated_string, value=parameter_value
)

return config


# Helper functions used in pre-defined actions ----
def _get_targets_data_and_config(
def _apply_filters(
data: pd.DataFrame,
ctds_filter: list[CallbackTriggerDict],
ctds_filter_interaction: list[dict[str, CallbackTriggerDict]],
ctds_parameters: list[CallbackTriggerDict],
targets: list[ModelID],
target: ModelID,
):
all_filtered_data = {}
all_parameterized_config = {}

# Takes in just one target, so dataframe is filtered repeatedly for every target that uses it.
# Potentially this could be de-duplicated but it's not so important since filtering is a relatively fast
# operation (compared to data loading).
filtered_data = _apply_filter_controls(data_frame=data, ctds_filters=ctds_filter, target=target)
filtered_data = _apply_filter_interaction(
data_frame=filtered_data, ctds_filter_interaction=ctds_filter_interaction, target=target
)
return filtered_data


def _get_unfiltered_data(
ctds_parameters: list[CallbackTriggerDict], targets: list[ModelID]
) -> dict[ModelID, pd.DataFrame]:
# Takes in multiple targets to ensure that data can be loaded efficiently using _multi_load and not repeated for
# every single target.
# Getting unfiltered data requires data frame parameters. We pass in all ctd_parameters and then find the
# data_frame ones by passing data_frame=True in the call to _get_paramaterized_config. Static data is also
# handled here and will just have empty dictionary for its kwargs.
multi_data_source_name_load_kwargs: list[tuple[DataSourceName, dict[str, Any]]] = []
for target in targets:
# parametrized_config includes a key "data_frame" that is used in the data loading function.
parameterized_config = _get_parametrized_config(target=target, ctd_parameters=ctds_parameters)
data_source_name = model_manager[target]["data_frame"]
data_frame = data_manager[data_source_name].load(**parameterized_config["data_frame"])

filtered_data = _apply_filters(data_frame=data_frame, ctds_filters=ctds_filter, target=target)
filtered_data = _apply_filter_interaction(
data_frame=filtered_data, ctds_filter_interaction=ctds_filter_interaction, target=target
dynamic_data_load_params = _get_parametrized_config(
ctd_parameters=ctds_parameters, target=target, data_frame=True
)
data_source_name = model_manager[target]["data_frame"]
multi_data_source_name_load_kwargs.append((data_source_name, dynamic_data_load_params["data_frame"]))

# Parameters affecting data_frame have already been used above in data loading and so are excluded from
# all_parameterized_config.
all_filtered_data[target] = filtered_data
all_parameterized_config[target] = {
key: value for key, value in parameterized_config.items() if key != "data_frame"
}

return all_filtered_data, all_parameterized_config
return dict(zip(targets, data_manager._multi_load(multi_data_source_name_load_kwargs)))


def _get_modified_page_figures(
ctds_filter: list[CallbackTriggerDict],
ctds_filter_interaction: list[dict[str, CallbackTriggerDict]],
ctds_parameters: list[CallbackTriggerDict],
targets: Optional[list[ModelID]] = None,
) -> dict[str, Any]:
targets = targets or []

filtered_data, parameterized_config = _get_targets_data_and_config(
ctds_filter=ctds_filter,
ctds_filter_interaction=ctds_filter_interaction,
ctds_parameters=ctds_parameters,
targets=targets,
)
targets: list[ModelID],
) -> dict[ModelID, Any]:
outputs: dict[ModelID, Any] = {}

# TODO: the structure here would be nicer if we could get just the ctds for a single target at one time,
# so you could do apply_filters on a target a pass only the ctds relevant for that target.
# Consider restructuring ctds to a more convenient form to make this possible.

for target, unfiltered_data in _get_unfiltered_data(ctds_parameters, targets).items():
filtered_data = _apply_filters(unfiltered_data, ctds_filter, ctds_filter_interaction, target)
outputs[target] = model_manager[target](
data_frame=filtered_data,
**_get_parametrized_config(ctd_parameters=ctds_parameters, target=target, data_frame=False),
)

outputs: dict[str, Any] = {}
for target in targets:
outputs[target] = model_manager[target](data_frame=filtered_data[target], **parameterized_config[target])
# TODO NEXT: will need to pass unfiltered_data into Filter.__call__.
# This dictionary is filtered for correct targets already selected in Filter.__call__ or that could be done here
# instead.
# {target: data_frame for target, data_frame in unfiltered_data.items() if target in self.targets}

return outputs
Loading

0 comments on commit c442686

Please sign in to comment.