From a31dabb53073f70b674d449ffde6ef4f81dacbd4 Mon Sep 17 00:00:00 2001 From: Jayesh Sharma Date: Thu, 28 Nov 2024 15:55:24 +0530 Subject: [PATCH 1/3] Make dicts/lists visualizable and add JSON as viz type (#2882) * add visualization to serialize dict * add json as visualization type * add JSONString class * add json viz support in notebook * JSON expects a jsonable dict not str * also add list/tuples to viz * add JSON support in docs * Auto-update of Starter template * fix link --------- Co-authored-by: GitHub Actions --- .../disable-colorful-logging.md | 2 +- .../creating-custom-visualizations.md | 6 ++++-- src/zenml/enums.py | 1 + .../materializers/built_in_materializer.py | 21 ++++++++++++++++++- .../structured_string_materializer.py | 11 +++++++--- src/zenml/types.py | 3 +++ src/zenml/utils/visualization_utils.py | 5 ++++- 7 files changed, 41 insertions(+), 8 deletions(-) diff --git a/docs/book/how-to/advanced-topics/control-logging/disable-colorful-logging.md b/docs/book/how-to/advanced-topics/control-logging/disable-colorful-logging.md index 8724c19e5d7..e536fa989be 100644 --- a/docs/book/how-to/advanced-topics/control-logging/disable-colorful-logging.md +++ b/docs/book/how-to/advanced-topics/control-logging/disable-colorful-logging.md @@ -10,7 +10,7 @@ By default, ZenML uses colorful logging to make it easier to read logs. However, ZENML_LOGGING_COLORS_DISABLED=true ``` -Note that setting this on the [client environment](../configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will automatically disable colorful logging on remote pipeline runs. If you wish to only disable it locally, but turn on for remote pipeline runs, you can set the `ZENML_LOGGING_COLORS_DISABLED` environment variable in your pipeline runs environment as follows: +Note that setting this on the [client environment](../../infrastructure-deployment/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will automatically disable colorful logging on remote pipeline runs. If you wish to only disable it locally, but turn on for remote pipeline runs, you can set the `ZENML_LOGGING_COLORS_DISABLED` environment variable in your pipeline runs environment as follows: ```python docker_settings = DockerSettings(environment={"ZENML_LOGGING_COLORS_DISABLED": "false"}) diff --git a/docs/book/how-to/data-artifact-management/visualize-artifacts/creating-custom-visualizations.md b/docs/book/how-to/data-artifact-management/visualize-artifacts/creating-custom-visualizations.md index 199764269ce..f9b30a38fcb 100644 --- a/docs/book/how-to/data-artifact-management/visualize-artifacts/creating-custom-visualizations.md +++ b/docs/book/how-to/data-artifact-management/visualize-artifacts/creating-custom-visualizations.md @@ -12,20 +12,22 @@ Currently, the following visualization types are supported: * **Image:** Visualizations of image data such as Pillow images (e.g. `PIL.Image`) or certain numeric numpy arrays, * **CSV:** Tables, such as the pandas DataFrame `.describe()` output, * **Markdown:** Markdown strings or pages. +* **JSON:** JSON strings or objects. There are three ways how you can add custom visualizations to the dashboard: -* If you are already handling HTML, Markdown, or CSV data in one of your steps, you can have them visualized in just a few lines of code by casting them to a [special class](#visualization-via-special-return-types) inside your step. +* If you are already handling HTML, Markdown, CSV or JSON data in one of your steps, you can have them visualized in just a few lines of code by casting them to a [special class](#visualization-via-special-return-types) inside your step. * If you want to automatically extract visualizations for all artifacts of a certain data type, you can define type-specific visualization logic by [building a custom materializer](#visualization-via-materializers). * If you want to create any other custom visualizations, you can [create a custom return type class with corresponding materializer](#how-to-think-about-creating-a-custom-visualization) and build and return this custom return type from one of your steps. ## Visualization via Special Return Types -If you already have HTML, Markdown, or CSV data available as a string inside your step, you can simply cast them to one of the following types and return them from your step: +If you already have HTML, Markdown, CSV or JSON data available as a string inside your step, you can simply cast them to one of the following types and return them from your step: * `zenml.types.HTMLString` for strings in HTML format, e.g., `"

Header

Some text"`, * `zenml.types.MarkdownString` for strings in Markdown format, e.g., `"# Header\nSome text"`, * `zenml.types.CSVString` for strings in CSV format, e.g., `"a,b,c\n1,2,3"`. +* `zenml.types.JSONString` for strings in JSON format, e.g., `{"key": "value"}`. ### Example: diff --git a/src/zenml/enums.py b/src/zenml/enums.py index 2e8e77f2dbb..0469048f3d9 100644 --- a/src/zenml/enums.py +++ b/src/zenml/enums.py @@ -60,6 +60,7 @@ class VisualizationType(StrEnum): HTML = "html" IMAGE = "image" MARKDOWN = "markdown" + JSON = "json" class ZenMLServiceType(StrEnum): diff --git a/src/zenml/materializers/built_in_materializer.py b/src/zenml/materializers/built_in_materializer.py index 6e4c9acfa65..a65d8ececeb 100644 --- a/src/zenml/materializers/built_in_materializer.py +++ b/src/zenml/materializers/built_in_materializer.py @@ -28,7 +28,7 @@ ) from zenml.artifact_stores.base_artifact_store import BaseArtifactStore -from zenml.enums import ArtifactType +from zenml.enums import ArtifactType, VisualizationType from zenml.logger import get_logger from zenml.materializers.base_materializer import BaseMaterializer from zenml.materializers.materializer_registry import materializer_registry @@ -414,6 +414,25 @@ def save(self, data: Any) -> None: for entry in metadata: self.artifact_store.rmtree(entry["path"]) raise e + + # save dict type objects to JSON file with JSON visualization type + def save_visualizations( + self, data: Any + ) -> Dict[str, "VisualizationType"]: + """Save visualizations for the given data. + + Args: + data: The data to save visualizations for. + + Returns: + A dictionary of visualization URIs and their types. + """ + # dict/list type objects are always saved as JSON files + # doesn't work for non-serializable types as they + # are saved as list of lists in different files + if _is_serializable(data): + return {self.data_path: VisualizationType.JSON} + return {} def extract_metadata(self, data: Any) -> Dict[str, "MetadataType"]: """Extract metadata from the given built-in container object. diff --git a/src/zenml/materializers/structured_string_materializer.py b/src/zenml/materializers/structured_string_materializer.py index cd26f37d68b..96e9cc8a7a7 100644 --- a/src/zenml/materializers/structured_string_materializer.py +++ b/src/zenml/materializers/structured_string_materializer.py @@ -19,22 +19,23 @@ from zenml.enums import ArtifactType, VisualizationType from zenml.logger import get_logger from zenml.materializers.base_materializer import BaseMaterializer -from zenml.types import CSVString, HTMLString, MarkdownString +from zenml.types import CSVString, HTMLString, JSONString, MarkdownString logger = get_logger(__name__) -STRUCTURED_STRINGS = Union[CSVString, HTMLString, MarkdownString] +STRUCTURED_STRINGS = Union[CSVString, HTMLString, MarkdownString, JSONString] HTML_FILENAME = "output.html" MARKDOWN_FILENAME = "output.md" CSV_FILENAME = "output.csv" +JSON_FILENAME = "output.json" class StructuredStringMaterializer(BaseMaterializer): """Materializer for HTML or Markdown strings.""" - ASSOCIATED_TYPES = (CSVString, HTMLString, MarkdownString) + ASSOCIATED_TYPES = (CSVString, HTMLString, MarkdownString, JSONString) ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA_ANALYSIS def load(self, data_type: Type[STRUCTURED_STRINGS]) -> STRUCTURED_STRINGS: @@ -94,6 +95,8 @@ def _get_filepath(self, data_type: Type[STRUCTURED_STRINGS]) -> str: filename = HTML_FILENAME elif issubclass(data_type, MarkdownString): filename = MARKDOWN_FILENAME + elif issubclass(data_type, JSONString): + filename = JSON_FILENAME else: raise ValueError( f"Data type {data_type} is not supported by this materializer." @@ -120,6 +123,8 @@ def _get_visualization_type( return VisualizationType.HTML elif issubclass(data_type, MarkdownString): return VisualizationType.MARKDOWN + elif issubclass(data_type, JSONString): + return VisualizationType.JSON else: raise ValueError( f"Data type {data_type} is not supported by this materializer." diff --git a/src/zenml/types.py b/src/zenml/types.py index 3c8e187934c..1f4e339e751 100644 --- a/src/zenml/types.py +++ b/src/zenml/types.py @@ -33,3 +33,6 @@ class MarkdownString(str): class CSVString(str): """Special string class to indicate a CSV string.""" + +class JSONString(str): + """Special string class to indicate a JSON string.""" diff --git a/src/zenml/utils/visualization_utils.py b/src/zenml/utils/visualization_utils.py index 5fc473eedd6..2011e70a88d 100644 --- a/src/zenml/utils/visualization_utils.py +++ b/src/zenml/utils/visualization_utils.py @@ -13,9 +13,10 @@ # permissions and limitations under the License. """Utility functions for dashboard visualizations.""" +import json from typing import TYPE_CHECKING, Optional -from IPython.core.display import HTML, Image, Markdown, display +from IPython.core.display import HTML, Image, JSON, Markdown, display from zenml.artifacts.utils import load_artifact_visualization from zenml.enums import VisualizationType @@ -63,6 +64,8 @@ def visualize_artifact( assert isinstance(visualization.value, str) table = format_csv_visualization_as_html(visualization.value) display(HTML(table)) + elif visualization.type == VisualizationType.JSON: + display(JSON(json.loads(visualization.value))) else: display(visualization.value) From c1078052ce1bcd3924b777e4bda5ff351199d16c Mon Sep 17 00:00:00 2001 From: Nils Date: Thu, 28 Nov 2024 13:30:41 +0100 Subject: [PATCH 2/3] Instances of the `FeatureService`s are now used instead of only the names of the FeatureServices. (#3209) * refactor: Return `FeatureService` instances instead of the service names * refactor: Use a list of features or a `FeatureService` to get online or historical features * refactor: Set the minimum version of feast to 0.12.0 to ensure the correct feast python API --- src/zenml/integrations/feast/__init__.py | 2 +- .../feature_stores/feast_feature_store.py | 22 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/zenml/integrations/feast/__init__.py b/src/zenml/integrations/feast/__init__.py index 2746f93316b..efa1b672947 100644 --- a/src/zenml/integrations/feast/__init__.py +++ b/src/zenml/integrations/feast/__init__.py @@ -31,7 +31,7 @@ class FeastIntegration(Integration): NAME = FEAST # click is added to keep the feast click version in sync with ZenML's click - REQUIREMENTS = ["feast", "click>=8.0.1,<8.1.4"] + REQUIREMENTS = ["feast>=0.12.0", "click>=8.0.1,<8.1.4"] REQUIREMENTS_IGNORED_ON_UNINSTALL = ["click", "pandas"] @classmethod diff --git a/src/zenml/integrations/feast/feature_stores/feast_feature_store.py b/src/zenml/integrations/feast/feature_stores/feast_feature_store.py index 4213d540d36..ffd4e84340f 100644 --- a/src/zenml/integrations/feast/feature_stores/feast_feature_store.py +++ b/src/zenml/integrations/feast/feature_stores/feast_feature_store.py @@ -16,7 +16,7 @@ from typing import Any, Dict, List, Union, cast import pandas as pd -from feast import FeatureStore # type: ignore +from feast import FeatureService, FeatureStore # type: ignore from feast.infra.registry.base_registry import BaseRegistry # type: ignore from zenml.feature_stores.base_feature_store import BaseFeatureStore @@ -43,14 +43,14 @@ def config(self) -> FeastFeatureStoreConfig: def get_historical_features( self, entity_df: Union[pd.DataFrame, str], - features: List[str], + features: Union[List[str], FeatureService], full_feature_names: bool = False, ) -> pd.DataFrame: """Returns the historical features for training or batch scoring. Args: entity_df: The entity DataFrame or entity name. - features: The features to retrieve. + features: The features to retrieve or a FeatureService. full_feature_names: Whether to return the full feature names. Raise: @@ -70,14 +70,14 @@ def get_historical_features( def get_online_features( self, entity_rows: List[Dict[str, Any]], - features: List[str], + features: Union[List[str], FeatureService], full_feature_names: bool = False, ) -> Dict[str, Any]: """Returns the latest online feature data. Args: entity_rows: The entity rows to retrieve. - features: The features to retrieve. + features: The features to retrieve or a FeatureService. full_feature_names: Whether to return the full feature names. Raise: @@ -118,17 +118,21 @@ def get_entities(self) -> List[str]: fs = FeatureStore(repo_path=self.config.feast_repo) return [ds.name for ds in fs.list_entities()] - def get_feature_services(self) -> List[str]: - """Returns the feature service names. + def get_feature_services(self) -> List[FeatureService]: + """Returns the feature services. Raise: ConnectionError: If the online component (Redis) is not available. Returns: - The feature service names. + The feature services. """ fs = FeatureStore(repo_path=self.config.feast_repo) - return [ds.name for ds in fs.list_feature_services()] + feature_services: List[FeatureService] = list( + fs.list_feature_services() + ) + + return feature_services def get_feature_views(self) -> List[str]: """Returns the feature view names. From d16e3a449069978d38a98672e971da703bdd7be0 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 28 Nov 2024 16:27:41 +0100 Subject: [PATCH 3/3] Quickstart fixes (#3227) * Replace zenml connect in quickstart * Linting --- examples/quickstart/quickstart.ipynb | 10 +--------- src/zenml/materializers/built_in_materializer.py | 8 +++----- src/zenml/types.py | 1 + src/zenml/utils/visualization_utils.py | 2 +- 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 067c262afb4..6b88699458e 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -149,7 +149,7 @@ "\n", "assert zenml_server_url\n", "\n", - "!zenml connect --url $zenml_server_url" + "!zenml login $zenml_server_url" ] }, { @@ -722,14 +722,6 @@ "* If you have questions or feedback... join our [**Slack Community**](https://zenml.io/slack) and become part of the ZenML family!\n", "* If you want to quickly get started with ZenML, check out [ZenML Pro](https://zenml.io/pro)." ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c560354d-9e78-4061-aaff-2e6213229911", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/src/zenml/materializers/built_in_materializer.py b/src/zenml/materializers/built_in_materializer.py index a65d8ececeb..59d007b440a 100644 --- a/src/zenml/materializers/built_in_materializer.py +++ b/src/zenml/materializers/built_in_materializer.py @@ -414,11 +414,9 @@ def save(self, data: Any) -> None: for entry in metadata: self.artifact_store.rmtree(entry["path"]) raise e - + # save dict type objects to JSON file with JSON visualization type - def save_visualizations( - self, data: Any - ) -> Dict[str, "VisualizationType"]: + def save_visualizations(self, data: Any) -> Dict[str, "VisualizationType"]: """Save visualizations for the given data. Args: @@ -428,7 +426,7 @@ def save_visualizations( A dictionary of visualization URIs and their types. """ # dict/list type objects are always saved as JSON files - # doesn't work for non-serializable types as they + # doesn't work for non-serializable types as they # are saved as list of lists in different files if _is_serializable(data): return {self.data_path: VisualizationType.JSON} diff --git a/src/zenml/types.py b/src/zenml/types.py index 1f4e339e751..5c5e21313fa 100644 --- a/src/zenml/types.py +++ b/src/zenml/types.py @@ -34,5 +34,6 @@ class MarkdownString(str): class CSVString(str): """Special string class to indicate a CSV string.""" + class JSONString(str): """Special string class to indicate a JSON string.""" diff --git a/src/zenml/utils/visualization_utils.py b/src/zenml/utils/visualization_utils.py index 2011e70a88d..10c523fb165 100644 --- a/src/zenml/utils/visualization_utils.py +++ b/src/zenml/utils/visualization_utils.py @@ -16,7 +16,7 @@ import json from typing import TYPE_CHECKING, Optional -from IPython.core.display import HTML, Image, JSON, Markdown, display +from IPython.core.display import HTML, JSON, Image, Markdown, display from zenml.artifacts.utils import load_artifact_visualization from zenml.enums import VisualizationType