diff --git a/python/MANIFEST.in b/python/MANIFEST.in index ceaf0aac99..7015c80dba 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -1,2 +1,3 @@ include ../README.md include ../LICENSE +include hsfs/helpers/quicktours/feature_store.md diff --git a/python/hsfs/connection.py b/python/hsfs/connection.py index 2cf8cb4a63..dbf6576cba 100644 --- a/python/hsfs/connection.py +++ b/python/hsfs/connection.py @@ -29,7 +29,7 @@ ) from hsfs.core.opensearch import OpenSearchClientSingleton from hsfs.decorators import connected, not_connected -from requests.exceptions import ConnectionError +from hsfs.helpers import user_messages AWS_DEFAULT_REGION = "default" @@ -187,7 +187,9 @@ def get_feature_store( """ if not name: name = client.get_instance()._project_name - return self._feature_store_api.get(util.append_feature_store_suffix(name)) + fs = self._feature_store_api.get(util.append_feature_store_suffix(name)) + user_messages.print_connected_to_feature_store_message(fs) + return fs @not_connected def connect(self) -> None: @@ -264,7 +266,6 @@ def connect(self) -> None: except (TypeError, ConnectionError): self._connected = False raise - print("Connected. Call `.close()` to terminate connection gracefully.") def close(self) -> None: """Close a connection gracefully. diff --git a/python/hsfs/core/feature_group_api.py b/python/hsfs/core/feature_group_api.py index 35f851d878..c594740835 100644 --- a/python/hsfs/core/feature_group_api.py +++ b/python/hsfs/core/feature_group_api.py @@ -15,6 +15,8 @@ # from __future__ import annotations +from typing import List, Union + from hsfs import client, feature_group, feature_group_commit from hsfs.core import explicit_provenance, ingestion_job @@ -24,6 +26,42 @@ class FeatureGroupApi: ONDEMAND = "ondemand" SPINE = "spine" + def get_all( + self, + feature_store_id: int, + with_features: bool = False, + ) -> List[ + Union[ + feature_group.FeatureGroup, + feature_group.SpineGroup, + feature_group.ExternalFeatureGroup, + ] + ]: + """Get a list of feature groups in a feature store. + + :param feature_store_id: feature store id + :type feature_store_id: int + :param feature_group_type: type of the feature group to return + :type feature_group_type: string + :return: list of feature group metadata objects + :rtype: List[FeatureGroup] + """ + _client = client.get_instance() + path_params = [ + "project", + _client._project_id, + "featurestores", + feature_store_id, + "featuregroups", + ] + query_params = {} + if with_features: + query_params["expand"] = ["features"] + + return feature_group.FeatureGroup.from_response_json( + _client._send_request("GET", path_params, query_params) + ) + def save(self, feature_group_instance): """Save feature group metadata to the feature store. diff --git a/python/hsfs/core/feature_group_engine.py b/python/hsfs/core/feature_group_engine.py index 3e88805eda..9885018ee0 100644 --- a/python/hsfs/core/feature_group_engine.py +++ b/python/hsfs/core/feature_group_engine.py @@ -15,12 +15,14 @@ from __future__ import annotations import warnings +from typing import List, Union from hsfs import engine, util from hsfs import feature_group as fg from hsfs.client import exceptions from hsfs.core import delta_engine, feature_group_base_engine, hudi_engine from hsfs.core.deltastreamer_jobconf import DeltaStreamerJobConf +from hsfs.helpers.richer_repr import richer_feature_group class FeatureGroupEngine(feature_group_base_engine.FeatureGroupBaseEngine): @@ -388,3 +390,65 @@ def save_feature_group_metadata( feature_group_id=feature_group.id, ) ) + + def list_feature_groups( + self, + latest_version_only: bool, + online_enabled_only: bool, + spine_only: bool, + external_only: bool, + with_features: bool, + ) -> List[fg.FeatureGroup, fg.ExternalFeatureGroup, fg.SpineGroup]: + fg_list = self._feature_group_api.get_all( + feature_store_id=self._feature_store_id, + with_features=with_features, + ) + + if online_enabled_only: + fg_list = [fgroup for fgroup in fg_list if fgroup.online_enabled] + if spine_only: + fg_list = [ + fgroup for fgroup in fg_list if isinstance(fgroup, fg.SpineGroup) + ] + if external_only: + fg_list = [ + fgroup + for fgroup in fg_list + if isinstance(fgroup, fg.ExternalFeatureGroup) + ] + + if latest_version_only: + fg_list = [ + fgroup + for fgroup in fg_list + if fgroup.version + == max(fg1.version for fg1 in fg_list if fg1.name == fgroup.name) + ] + + return sorted(fg_list, key=lambda fgroup: fgroup.name) + + def show_info( + self, + feature_group: Union[fg.FeatureGroup, fg.ExternalFeatureGroup, fg.SpineGroup], + show_features: bool = False, + ): + richer_feature_group.build_and_print_info_fg_table( + feature_group, show_features=show_features + ) + + def show_all( + self, + latest_version_only: bool = True, + show_features: bool = False, + show_description: bool = False, + ): + fgroup_list = self.list_feature_groups( + latest_version_only=latest_version_only, + online_enabled_only=False, + spine_only=False, + external_only=False, + with_features=show_features, + ) + richer_feature_group.show_rich_table_feature_groups( + fgroup_list, show_features=show_features, show_description=show_description + ) diff --git a/python/hsfs/core/feature_view_api.py b/python/hsfs/core/feature_view_api.py index ed5a8468c3..53552a3fec 100644 --- a/python/hsfs/core/feature_view_api.py +++ b/python/hsfs/core/feature_view_api.py @@ -15,7 +15,7 @@ # from __future__ import annotations -from typing import List, Optional, Union +from typing import Any, Dict, List, Optional, Union from hsfs import ( client, @@ -55,6 +55,21 @@ def __init__(self, feature_store_id: int) -> None: "featureview", ] + def get_all( + self, latest_version_only: bool = True, with_features: bool = False + ) -> List[Dict[str, Any]]: + path = self._base_path + query_params = {} + if latest_version_only: + query_params["filter_by"] = "latest_version" + if with_features: + query_params["expand"] = ["features"] + return self._client._send_request( + "GET", + path_params=path, + query_params={"expand": ["features"]} if with_features else None, + )["items"] + def post( self, feature_view_obj: feature_view.FeatureView ) -> feature_view.FeatureView: diff --git a/python/hsfs/core/feature_view_engine.py b/python/hsfs/core/feature_view_engine.py index a89d98f21d..da6bad2b2b 100644 --- a/python/hsfs/core/feature_view_engine.py +++ b/python/hsfs/core/feature_view_engine.py @@ -17,8 +17,9 @@ import datetime import warnings -from typing import Optional +from typing import Any, Dict, List, Optional +import humps from hsfs import ( client, engine, @@ -39,6 +40,7 @@ training_dataset_engine, transformation_function_engine, ) +from hsfs.helpers.richer_repr import richer_feature_view from hsfs.training_dataset_split import TrainingDatasetSplit @@ -923,3 +925,45 @@ def _check_if_exists_with_prefix(self, f_name, f_set): ) else: return f_name + + def list_feature_views( + self, latest_version_only: bool = True, with_features: bool = False + ) -> List[Dict[str, Any]]: + fv_list = [ + humps.decamelize(fv_obj) + for fv_obj in self._feature_view_api.get_all( + latest_version_only=latest_version_only, with_features=with_features + ) + ] + if latest_version_only: + fv_list = [ + fview + for fview in fv_list + if fview["version"] + == max( + fv1["version"] for fv1 in fv_list if fv1["name"] == fview["name"] + ) + ] + + return sorted(fv_list, key=lambda fview: fview["name"]) + + def show_info( + self, feature_view_obj: feature_view.FeatureView, show_features: bool = True + ) -> None: + richer_feature_view.build_and_print_info_fv_table( + feature_view_obj, show_features=show_features + ) + + def show_all( + self, + latest_version_only: bool = True, + show_features: bool = False, + show_description: bool = False, + ): + fview_dicts = self.list_feature_views( + latest_version_only=latest_version_only, + with_features=show_features, + ) + richer_feature_view.show_rich_table_feature_views( + fview_dicts, show_features, show_description + ) diff --git a/python/hsfs/feature_group.py b/python/hsfs/feature_group.py index d32f8c98d4..bed72b69d0 100644 --- a/python/hsfs/feature_group.py +++ b/python/hsfs/feature_group.py @@ -3262,6 +3262,9 @@ def _is_time_travel_enabled(self) -> bool: and self._time_travel_format.upper() == "HUDI" ) + def _show_info(self, show_features: bool = True) -> None: + self._feature_group_engine.show_info(self, show_features=show_features) + @property def id(self) -> Optional[int]: """Feature group id.""" diff --git a/python/hsfs/feature_store.py b/python/hsfs/feature_store.py index 6cda2d3e1f..1b59f3961a 100644 --- a/python/hsfs/feature_store.py +++ b/python/hsfs/feature_store.py @@ -48,6 +48,7 @@ ) from hsfs.decorators import typechecked from hsfs.embedding import EmbeddingIndex +from hsfs.helpers import quicktours, verbose from hsfs.statistics_config import StatisticsConfig from hsfs.transformation_function import TransformationFunction @@ -736,6 +737,10 @@ def get_or_create_feature_group( self.id, name, version, feature_group_api.FeatureGroupApi.CACHED ) feature_group_object.feature_store = self + if verbose.is_hsfs_verbose(): + self._feature_group_engine.show_info( + feature_group_object, show_features=True + ) return feature_group_object except exceptions.RestAPIError as e: if ( @@ -764,6 +769,10 @@ def get_or_create_feature_group( notification_topic_name=notification_topic_name, ) feature_group_object.feature_store = self + if verbose.is_hsfs_verbose(): + self._feature_group_engine.show_info( + feature_group_object, show_features=False + ) return feature_group_object else: raise e @@ -1639,13 +1648,16 @@ def get_or_create_feature_view( `FeatureView`: The feature view metadata object. """ try: - return self._feature_view_engine.get(name, version) + feature_view_object = self._feature_view_engine.get(name, version) + if verbose.is_hsfs_verbose(): + self._feature_view_engine.show_info(feature_view_object) + return feature_view_object except exceptions.RestAPIError as e: if ( e.response.json().get("errorCode", "") == 270181 and e.response.status_code == 404 ): - return self.create_feature_view( + feature_view_object = self.create_feature_view( name=name, query=query, version=version, @@ -1655,6 +1667,9 @@ def get_or_create_feature_view( training_helper_columns=training_helper_columns or [], transformation_functions=transformation_functions or {}, ) + if verbose.is_hsfs_verbose(): + self._feature_view_engine.show_info(feature_view_object) + return feature_view_object else: raise e @@ -1728,6 +1743,66 @@ def get_feature_views(self, name: str) -> List["feature_view.FeatureView"]: """ return self._feature_view_engine.get(name) + def show_feature_groups( + self, + latest_only: bool = True, + show_features: bool = False, + show_description: bool = False, + ) -> None: + """Prints a list of all feature groups in the feature store. + + !!! example + ```python + # get feature store instance + fs = ... + + # show all feature groups + fs.show_feature_groups() + ``` + + # Arguments + latest_only: If `True` only the latest version of each feature group is shown, defaults to `True`. + show_features: If `True` also show the features of the feature groups, defaults to `False`. + show_description: If `True` also show the description of the feature groups, defaults to `False`. + + # Returns + `None` + """ + self._feature_group_engine.show_all( + latest_only, show_features, show_description + ) + + def show_feature_views( + self, + latest_only: bool = True, + show_features: bool = False, + show_description: bool = False, + ) -> None: + """Prints a list of all feature views in the feature store. + + !!! example + ```python + # get feature store instance + fs = ... + + # show all feature views + fs.show_feature_views() + ``` + + # Arguments + latest_only: If `True` only the latest version of each feature view is shown, defaults to `True`. + show_features: If `True` also show the features of the feature views, defaults to `False`. + show_description: If `True` also show the description of the feature views, defaults to `False`. + + # Returns + `None` + """ + self._feature_view_engine.show_all(latest_only, show_features, show_description) + + def quicktour(self) -> None: + """Prints a quick tour of the feature store API.""" + quicktours.rich_print_quicktour("feature_store") + @property def id(self) -> int: """Id of the feature store.""" diff --git a/python/hsfs/feature_view.py b/python/hsfs/feature_view.py index 3a0029cbc9..e523c8babf 100644 --- a/python/hsfs/feature_view.py +++ b/python/hsfs/feature_view.py @@ -3360,6 +3360,9 @@ def to_dict(self) -> Dict[str, Any]: "type": "featureViewDTO", } + def _show_info(self, show_features: bool = True) -> None: + self._feature_view_engine.show_info(self, show_features=show_features) + @property def id(self) -> int: """Feature view id.""" diff --git a/python/hsfs/helpers/__init__.py b/python/hsfs/helpers/__init__.py new file mode 100644 index 0000000000..f61ce386fb --- /dev/null +++ b/python/hsfs/helpers/__init__.py @@ -0,0 +1,11 @@ +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/hsfs/helpers/constants.py b/python/hsfs/helpers/constants.py new file mode 100644 index 0000000000..9b461c18f7 --- /dev/null +++ b/python/hsfs/helpers/constants.py @@ -0,0 +1,40 @@ +# +# Copyright 2024 HOPSWOKRS AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +VERBOSE_ENV_VAR = "HOPSWORKS_VERBOSE" +USE_RICH_LOGGER_ENV_VAR = "HOPSWORKS_USE_RICH_LOGGER" +USE_RICH_CONSOLE_ENV_VAR = "HOPSWORKS_USE_RICH_CONSOLE" +DEFAULT_VERBOSE_CONFIG = { + "tab_size": 4, + "width": 88, + "color_system": "truecolor", +} +PYTHON_LEXER_THEME = "github-dark" + +SHOW_FG_TYPE_MAPPING = { + "stream": "Stream", + "spine": "Spine", + "external": "External", +} +GET_OR_ENABLE_RICH_CONSOLE_ERROR_MESSAGE = ( + "Using `rich` console is not enabled." + + f" Please set the environment variable `{USE_RICH_CONSOLE_ENV_VAR}` to `true` or `1` to enable it." +) +ENABLE_RICH_FOR_PRETTY_VERBOSITY_ERROR_MESSAGE = ( + "Hopsworks Python SDK has a verbose mode" + + " using `rich` to print nicely formatted user message design help you get started." + + f"Please set the environment variable `{USE_RICH_CONSOLE_ENV_VAR}` and `{VERBOSE_ENV_VAR}` to `true` or `1` to enable it." +) diff --git a/python/hsfs/helpers/logger.py b/python/hsfs/helpers/logger.py new file mode 100644 index 0000000000..107386f38e --- /dev/null +++ b/python/hsfs/helpers/logger.py @@ -0,0 +1,37 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import logging +import os + + +def is_rich_logger_enabled(): + return os.getenv("HOPSWORKS_USE_RICH_LOGGER", "true").lower() == "true" + + +def set_rich_for_hsfs_root_logger(): + logger = logging.getLogger("hsfs") + if is_rich_logger_enabled(): + from rich.logging import RichHandler + + rich_handler = RichHandler( + rich_tracebacks=True, + tracebacks_show_locals=True, + tracebacks_show_hidden_frames=True, + ) + logger.addHandler(rich_handler) + return logger diff --git a/python/hsfs/helpers/quicktours/__init__.py b/python/hsfs/helpers/quicktours/__init__.py new file mode 100644 index 0000000000..c078c9d694 --- /dev/null +++ b/python/hsfs/helpers/quicktours/__init__.py @@ -0,0 +1,45 @@ +# +# Copyright 2024 HOPSWOKRS AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from pathlib import Path +from typing import Literal + +from hsfs.helpers import constants, verbose +from rich.markdown import Markdown + + +def load_class_quicktour(class_name: Literal["feature_store"]) -> str: + path = Path(__file__).parent / f"{class_name}.md" + try: + with open(path, "r") as f: + return f.read() + except FileNotFoundError: + return f"Quicktour for {class_name} not found" + + +def rich_print_quicktour(class_name: Literal["feature_store"]) -> None: + if verbose.is_rich_print_enabled(): + markdown = Markdown( + load_class_quicktour(class_name), + justify="left", + code_theme=verbose.get_python_lexer_theme(), + inline_code_theme=verbose.get_python_lexer_theme(), + inline_code_lexer="python", + ) + verbose.get_rich_console().print(markdown) + else: + print(constants.GET_OR_ENABLE_RICH_CONSOLE_ERROR_MESSAGE) diff --git a/python/hsfs/helpers/quicktours/feature_store.md b/python/hsfs/helpers/quicktours/feature_store.md new file mode 100644 index 0000000000..769e8a95d2 --- /dev/null +++ b/python/hsfs/helpers/quicktours/feature_store.md @@ -0,0 +1,71 @@ +# Feature Store API Quick Tour + +The `FeatureStore` serves as the central repository for features data and metadata in the Hopsworks Platform. + +## Get or Create Feature Groups to write to the Feature Store + +You can retrieve or create `FeatureGroup` objects using the `get_or_create_feature_group()` method. `FeatureGroup`s objects encapsulate metadata about a table in the Feature Store, such as its schema, and are used to insert new data in the `FeatureStore`. + +```python +my_feature_group = fs.get_or_create_feature_group( + name="my_feature_group", + version=1, + description="my first feature group", + primary_key=['user_id'], + event_time='visited_last' + online_enabled=True +) + +my_feature_group.insert(dataframe_with_feature_data) +``` + +## Get or Create Feature Views to Read Feature Data + +You can retrieve or create `FeatureView` objects using the `get_or_create_feature_view()` method. `FeatureView`s objects are a list of selected features from one or more `FeatureGroup`s to be used as input to a model. You can use it to materialize `TrainingDataset`s or serve **Real-Time** feature data to provide up to date context to your model. + +```python +my_feature_view = fs.get_or_create_feature_view( + name="my_feature_view", + version=1, + description="my first feature view", + query=my_feature_group.select(["feature1", "feature2"]) +) + +x_train, x_test, y_train, y_test = fv.train_test_split() +# or +real_time_feature_data = fv.get_feature_vector(entry={"user_id": 1}) +``` + +## Get or Create Storage Connectors to access External Data + +You can retrieve or create `StorageConnector` objects using the `get_or_create_storage_connector()` method. `StorageConnector`s objects are used to access external data sources, such as S3, Snowflake, BigQuery. Each `StorageConnector` has its own properties so check out the docs for the specific data source you are interested in: + +- [S3](https://docs.hopsworks.ai/latest/generated/api/hsfs/#hsfs.s3_connector.S3Connector) +- [BigQuery](https://docs.hopsworks.ai/latest/generated/api/hsfs/#hsfs.bigquery_connector.BigQueryConnector) +- [Kafka](https://docs.hopsworks.ai/latest/user_guides/fs/storage_connector/creation/kafka/) +- [Snowflake](https://docs.hopsworks.ai/latest/generated/api/hsfs/#hsfs.snowflake_connector.SnowflakeConnector) + +## Checkout what's available in your Feature Store + +You can list all the `FeatureGroup`s and `FeatureView`s in your project using the `show_feature_groups()` and `show_feature_views()` methods. Use `with_feature=True` to list their features `FeatureGroup` or `FeatureView`. + +```python +fs.show_feature_groups(with_features=True) # or fs.show_feature_views(with_features=True) +# output +# +----------------------------+----+-------+--------+----------+ +# | offline_fg_with_complex_ft | v1 | 11339 | Stream | 🔴 Batch | +# +----------------------------+----+-------+--------+----------+ +# Features : +# * id bigint (primary key) +# * cc_num bigint +# * when timestamp (event-time) +# * array_ft array +# +---------------------+----+------+--------+--------------+ +# | prices_composite_fg | v3 | 7300 | Stream | 🟢 Real-Time | +# +---------------------+----+------+--------+--------------+ +# Features : +# * ticker string (primary key) +# * ticker_number bigint (primary key) +# * when timestamp (event-time) +# * price bigint +``` diff --git a/python/hsfs/helpers/richer_repr/__init__.py b/python/hsfs/helpers/richer_repr/__init__.py new file mode 100644 index 0000000000..d71072a706 --- /dev/null +++ b/python/hsfs/helpers/richer_repr/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright 2024 HOPSWORKS AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/hsfs/helpers/richer_repr/richer_feature_group.py b/python/hsfs/helpers/richer_repr/richer_feature_group.py new file mode 100644 index 0000000000..e1d0d7051b --- /dev/null +++ b/python/hsfs/helpers/richer_repr/richer_feature_group.py @@ -0,0 +1,249 @@ +# +# Copyright 2024 HOPSWORKS AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Tuple, Union + +from hsfs import feature_group as fg_mod +from hsfs import util +from hsfs.helpers import constants, verbose +from rich import box +from rich.table import Table + + +def build_list_feature_table( + fg_obj: Union[fg_mod.FeatureGroup, fg_mod.ExternalFeatureGroup, fg_mod.SpineGroup], +): + feature_table = Table(show_header=True, header_style="bold", box=box.ASCII2) + feature_table.add_column("Feature Name") + if fg_obj.online_enabled: + feature_table.add_column("Online Type") + feature_table.add_column("Offline Type") + else: + feature_table.add_column("Type", justify="center") + feature_table.add_column("Metadata", overflow="ellipsis", justify="center") + feature_table.add_column("Description", overflow="ellipsis") + + for feature in fg_obj.features: + entries = [feature.name] + if fg_obj.online_enabled: + entries.append(feature.online_type) + entries.append(feature.type) + else: + entries.append(feature.type) + + if feature.primary: + entries.append("Primary Key") + elif feature.name == fg_obj.event_time: + entries.append("Event Time") + else: + entries.append("") + + entries.append(feature.description) + + feature_table.add_row(*entries) + + return feature_table + + +def build_and_print_info_fg_table( + fg_obj: Union[fg_mod.FeatureGroup, fg_mod.ExternalFeatureGroup, fg_mod.SpineGroup], + show_features: bool = True, +) -> None: + renderables = [] + description = None + if fg_obj.description and fg_obj.description != "": + description = f"[bold]Description :[/bold] {fg_obj.description}" + table = Table( + show_header=True, header_style="bold", box=box.ASCII2, caption=description + ) + + if isinstance(fg_obj, fg_mod.ExternalFeatureGroup): + table.add_column("External Feature Group") + elif isinstance(fg_obj, fg_mod.SpineGroup): + table.add_column("Spine Group") + else: + table.add_column("Feature Group") + + table.add_column(fg_obj.name, overflow="ellipsis") + + table.add_row("Version", f"v{fg_obj.version}") + table.add_row("ID", f"{fg_obj.id}") + table.add_row( + "Serving", + f"{'Online (Real-Time) 🟢' if fg_obj.online_enabled else 'Offline (Batch) 🔴'}", + ) + table.add_row("Primary Key", "".join(fg_obj.primary_key)) + table.add_row( + "Event-Time Column", + fg_obj.event_time if fg_obj.event_time else "N/A", + ) + if fg_obj.partition_key is not None and len(fg_obj.partition_key) > 0: + table.add_row("Partition Key", "".join(fg_obj.partition_key)) + + if fg_obj.expectation_suite: + table.add_row( + "Expectation Suite", + f"{'🟢' if fg_obj.expectation_suite.run_validation else '🔴'}", + ) + table.add_row("Ingestion", fg_obj.expectation_suite.validation_ingestion_policy) + + table.add_row( + "Statistics", + f"{'🟢 Enabled' if fg_obj.statistics_config.enabled else '🔴 Disabled'}", + ) + table.add_row( + "Table Format", + fg_obj.time_travel_format if fg_obj.time_travel_format else "PARQUET", + ) + renderables.append(table) + if len(fg_obj.features) > 0 and show_features: + renderables.append("\n[underline]Features :[underline]") + renderables.append(build_list_feature_table(fg_obj)) + if fg_obj.id is None: + renderables.append( + "Start writing data to the `FeatureStore` with the `insert()` method to register your `FeatureGroup`." + ) + else: + renderables.append( + f"You can also check out your [link={util.get_feature_group_url(feature_store_id=fg_obj._feature_store_id, feature_group_id=fg_obj.id)}]Feature Group page in the Hopsworks UI[/link] for more information." + ) + verbose.get_rich_console().print(*renderables) + + +def make_table_fg_list( + show_header: bool = True, + show_features: bool = False, + show_description: bool = False, +) -> Table: + table = Table(show_header=show_header, header_style="bold", box=box.ASCII2) + table.add_column("Name") + table.add_column("Version") + table.add_column("ID") + table.add_column("Type") + table.add_column("Online") + if show_description and not show_features: + table.add_column("Description") + + return table + + +def make_rich_text_row( + fgroup: Union[fg_mod.FeatureGroup, fg_mod.ExternalFeatureGroup, fg_mod.SpineGroup], + show_description: bool, + show_features: bool, +) -> Tuple[List[str], Optional[str], Optional[Table]]: + if isinstance(fgroup, fg_mod.SpineGroup): + fg_type = constants.SHOW_FG_TYPE_MAPPING["spine"] + elif isinstance(fgroup, fg_mod.ExternalFeatureGroup): + fg_type = constants.SHOW_FG_TYPE_MAPPING["external"] + else: + fg_type = constants.SHOW_FG_TYPE_MAPPING["stream"] + online_status = "🟢 Real-Time" if fgroup.online_enabled else "🔴 Batch" + entries = [ + fgroup.name, + f"v{fgroup.version}", + f"{fgroup.id}", + fg_type, + online_status, + ] + + description = None + if all( + [show_description, fgroup.description is not None, len(fgroup.description) > 0] + ): + if show_features: + description = " [bold]Description :[/bold]\n " + fgroup.description + else: + description = fgroup.description + + feature_table = None + if show_features: + feature_table = build_feature_bullets(fgroup) + + return entries, description, feature_table + + +def build_feature_bullets( + fgroup: Union[fg_mod.FeatureGroup, fg_mod.ExternalFeatureGroup, fg_mod.SpineGroup], +) -> Table: + feature_table = Table(box=None, show_lines=False) + feature_table.add_column(" Features :") + feature_table.add_column("") + feature_table.add_column("") + for feature in fgroup.features: + extra = "" + if feature.primary and feature.partition: + extra = " (primary & partition key)" + elif feature.primary: + extra = " (primary key)" + elif feature.partition: + extra = " (partition key)" + if fgroup.event_time and fgroup.event_time == feature.name: + extra = " (event-time)" + feature_table.add_row(f" * {feature.name}", feature.type, extra) + + return feature_table + + +def show_rich_table_feature_groups( + fgroup_list: List[Dict[str, Any]], + show_features: bool = False, + show_description: bool = False, +) -> None: + rich_console = verbose.get_rich_console() + row_entries_and_opt_features_and_description = [ + make_rich_text_row( + fgroup_obj, + show_description, + show_features, + ) + for fgroup_obj in fgroup_list + ] + + if show_features: + tables = [] + for ( + entries, + description, + feature_table, + ) in row_entries_and_opt_features_and_description: + new_table = make_table_fg_list( + show_header=False, + show_features=show_features, + show_description=show_description, + ) + new_table.add_row(*entries) + tables.extend( + [ + tab + for tab in [new_table, description, feature_table] + if tab is not None + ] + ) + + rich_console.print(*tables) + else: + the_table = make_table_fg_list( + show_header=True, + show_description=show_description, + show_features=show_features, + ) + for entries, description, _ in row_entries_and_opt_features_and_description: + if show_description: + entries.append(description or "") + the_table.add_row(*entries) + rich_console.print(the_table) diff --git a/python/hsfs/helpers/richer_repr/richer_feature_view.py b/python/hsfs/helpers/richer_repr/richer_feature_view.py new file mode 100644 index 0000000000..5216ab5bdd --- /dev/null +++ b/python/hsfs/helpers/richer_repr/richer_feature_view.py @@ -0,0 +1,246 @@ +# +# Copyright 2024 HOPSWORKS AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Tuple + +from hsfs import feature_view as feature_view_mod +from hsfs.helpers import verbose +from rich import box +from rich.table import Table + + +def make_list_fv_table( + show_headers: bool = True, + show_features: bool = False, + show_description: bool = False, +) -> Table: + table = Table(show_header=show_headers, header_style="bold", box=box.ASCII2) + + table.add_column("Name") + table.add_column("Version") + table.add_column("ID") + table.add_column("Parent Feature Groups") + if show_description and not show_features: + table.add_column("Description") + + return table + + +def make_rich_text_row( + fv_dict: Dict[str, Any], + show_features: bool, + show_description: bool, +) -> Tuple[List[str], Optional[str], Optional[Table]]: + fg_names = set([sk["feature_group"]["name"] for sk in fv_dict["serving_keys"]]) + entries = [ + fv_dict["name"], + f"v{fv_dict['version']}", + f"{fv_dict['id']}", + ", ".join(fg_names), + ] + description = None + if show_description and fv_dict["description"] is not None: + if show_features: + description = " [bold]Description :[/bold]\n " + fv_dict["description"] + else: + description = fv_dict["description"] + + feature_table = None + if show_features: + feature_table = build_training_feature_bullets(fv_dict) + + return entries, description, feature_table + + +def build_training_feature_bullets(fv_dict: Dict[str, Any]) -> Table: + feature_table = Table(box=None, show_lines=False) + feature_table.add_column(" Features :") + feature_table.add_column("") + feature_table.add_column("") + + for sk in fv_dict["serving_keys"]: + feature_table.add_row( + f" * {sk.get('prefix', '') + sk['feature_name']}", + sk.get("join_on", ""), + "serving key", + sk["feature_group"]["name"], + ) + + sk_names = [sk["feature_name"] for sk in fv_dict["serving_keys"]] + + for feature in fv_dict["features"]: + if feature["name"] not in sk_names: + feature_table.add_row( + f" * {feature['name']}", + feature["type"], + "", + feature["featuregroup"]["name"], + ) + + return feature_table + + +def show_rich_table_feature_views( + fview_dict_list: List[Dict[str, Any]], + show_features: bool = False, + show_description: bool = False, +) -> None: + row_entries_and_opt_features_and_description = [] + + for fview_dict in fview_dict_list: + row_entries_and_opt_features_and_description.append( + make_rich_text_row( + fview_dict, + show_features, + show_description, + ) + ) + if show_features: + tables = [] + + for ( + entries, + description, + feature_table, + ) in row_entries_and_opt_features_and_description: + new_table = make_list_fv_table( + show_headers=False, + show_features=show_features, + show_description=show_description, + ) + new_table.add_row(*entries) + tables.extend( + [ + tab + for tab in [new_table, description, feature_table] + if tab is not None + ] + ) + + verbose.get_rich_console().print(*tables) + else: + the_table = make_list_fv_table( + show_headers=True, + show_description=show_description, + show_features=show_features, + ) + + for entries, description, _ in row_entries_and_opt_features_and_description: + if show_description: + entries.append(description or "") + the_table.add_row(*entries) + + verbose.get_rich_console().print(the_table) + + +def build_training_feature_table(fview_obj: feature_view_mod.FeatureView) -> Table: + serving_key_table = Table( + box=box.ASCII2, title="Serving Keys", title_justify="left" + ) + + serving_key_table.add_column("Name") + serving_key_table.add_column("Required") + serving_key_table.add_column("JoinOn") + serving_key_table.add_column("Feature Group") + + for serving_key in fview_obj.serving_keys: + serving_key_table.add_row( + serving_key.required_serving_key, + "required" if serving_key.required else "optional", + serving_key.join_on if serving_key.join_on else "N/A", + serving_key.feature_group.name, + ) + + feature_table = Table(box=box.ASCII2, title="Features", title_justify="left") + has_transformation = ( + fview_obj.transformation_functions + and len(fview_obj.transformation_functions) > 0 + ) + feature_table.add_column("Name") + feature_table.add_column("Type") + feature_table.add_column("Metadata") + if has_transformation: + feature_table.add_column("Transformation Function") + feature_table.add_column("Feature Group") + + for feature in sorted(fview_obj.features, key=lambda x: x.feature_group.name): + metadata = "" + if feature.inference_helper_column: + metadata += "inference helper," + if feature.training_helper_column: + metadata += "training helper," + if feature.label: + metadata += "label," + + if has_transformation: + tf_name = "" + opt_tf_obj = fview_obj.transformation_functions.get(feature.name, None) + if opt_tf_obj: + tf_name = opt_tf_obj.name + entries = [ + feature.name, + feature.type, + metadata[:-1], + tf_name, + feature.feature_group.name, + ] + else: + entries = [ + feature.name, + feature.type, + metadata[:-1], + feature.feature_group.name, + ] + + feature_table.add_row(*entries) + + return serving_key_table, feature_table + + +def build_and_print_info_fv_table( + fview_obj: feature_view_mod.FeatureView, show_features: bool = False +) -> None: + tables = [] + description = "" + if fview_obj.description and len(fview_obj.description) > 0: + description = f"Description: {fview_obj.description}" + online_enabled = all([fg.online_enabled for fg in fview_obj.query.featuregroups]) + required_serving_keys = [ + sk.required_serving_key for sk in fview_obj.serving_keys if sk.required + ] + table = Table( + show_header=True, + header_style="bold", + box=box.ASCII2, + expand=False, + caption=description, + ) + table.add_column("Feature View") + table.add_column(fview_obj.name) + table.add_row("Version", f"v{fview_obj.version}") + table.add_row("ID", f"{fview_obj.id}") + table.add_row("Serving", "🟢 Real-Time" if online_enabled else "🔴 Batch") + table.add_row("Required Serving Keys", ", ".join(required_serving_keys) or "N/A") + table.add_row( + "Parent Feature Groups", + ",\n".join([fg.name for fg in fview_obj.query.featuregroups]), + ) + tables.append(table) + if show_features: + tables.extend(build_training_feature_table(fview_obj)) + + verbose.get_rich_console().print(*tables) diff --git a/python/hsfs/helpers/user_messages.py b/python/hsfs/helpers/user_messages.py new file mode 100644 index 0000000000..3ac0d03019 --- /dev/null +++ b/python/hsfs/helpers/user_messages.py @@ -0,0 +1,68 @@ +# +# Copyright 2024 HOPSWORKS AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from hsfs import client +from hsfs import feature_store as fs_mod +from hsfs.helpers import verbose +from rich import box +from rich.markdown import Markdown +from rich.panel import Panel + + +def print_connected_to_feature_store_message(fs_obj: fs_mod.FeatureStore): + feature_groups = fs_obj._feature_group_api.get_all(feature_store_id=fs_obj.id) + if len(feature_groups) == 0: + get_started_message = Markdown( + "- To learn how to get started with Hopsworks feature store, checkout our " + "[guides and docs](https://docs.hopsworks.ai/latest/user_guides/fs/) " + "or our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials) on github.\n" + "- Call `quicktour()` method to get an overview of the feature store API and capabilities.", + justify="left", + inline_code_lexer="python", + inline_code_theme=verbose.get_python_lexer_theme(), + ) + else: + get_started_message = Markdown( + "- Call `show_feature_groups()` to show a list of existing Feature Groups to insert/upsert new data or " + "set `with_features=True` to see which features you can select to build a new Feature View.\n" + "- Call `show_feature_views()` to show a list of existing Feature Views, you can use them to read data " + "and create Training Datasets. Feature Views composed of Features from online-enabled FeatureGroups can " + "be used to serve feature value for real-time use cases. Checkout the ⚡ " + "[benchmarks](https://www.hopsworks.ai/post/feature-store-benchmark-comparison-hopsworks-and-feast)\n" + "- Call the `quicktour()` method to get an overview of the feature store API and capabilities.", + justify="left", + inline_code_lexer="python", + inline_code_theme=verbose.get_python_lexer_theme(), + ) + + if verbose.is_hsfs_verbose() and verbose.is_rich_print_enabled(): + rich_console = verbose.get_rich_console() + ( + rich_console.print( + Panel.fit( + f"Connected to Project [bold red]{fs_obj.project_name}[/bold red] on [italic red]{client.get_instance()._host}[/italic red].", + title="Hopsworks Feature Store", + style="bold", + box=box.ASCII2, + padding=(1, 2), + ), + get_started_message, + justify="center", + ), + ) + else: + print(f"Connected to project {fs_obj.project_name} in Hopsworks Feature Store.") diff --git a/python/hsfs/helpers/verbose.py b/python/hsfs/helpers/verbose.py new file mode 100644 index 0000000000..e79315955a --- /dev/null +++ b/python/hsfs/helpers/verbose.py @@ -0,0 +1,56 @@ +# +# Copyright 2024 HOPSWORKS AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import os + +from hsfs.helpers import constants +from rich.console import Console + + +_rich_console = None + + +def enable_rich_verbose_mode() -> None: + os.environ[constants.VERBOSE_ENV_VAR] = "true" + os.environ[constants.USE_RICH_CONSOLE_ENV_VAR] = "true" + + +def is_rich_print_enabled() -> bool: + use_rich = os.getenv(constants.USE_RICH_CONSOLE_ENV_VAR, "true").lower() + return use_rich == "true" or use_rich == "1" + + +def is_hsfs_verbose() -> bool: + hopsworks_verbose = os.getenv(constants.VERBOSE_ENV_VAR, "1").lower() + return hopsworks_verbose == "true" or hopsworks_verbose == "1" + + +def init_rich_with_default_config() -> None: + global _rich_console + if _rich_console is None: + _rich_console = Console(**constants.DEFAULT_VERBOSE_CONFIG) + + +def get_rich_console() -> Console: + global _rich_console + if _rich_console is None: + init_rich_with_default_config() + return _rich_console + + +def get_python_lexer_theme() -> str: + return constants.PYTHON_LEXER_THEME diff --git a/python/setup.py b/python/setup.py index 9f3f1afa6d..450875ba58 100644 --- a/python/setup.py +++ b/python/setup.py @@ -38,6 +38,7 @@ def read(fname: str) -> str: "aiomysql[sa] @ git+https://git@github.com/logicalclocks/aiomysql", "polars>=0.20.18,<=0.21.0", "opensearch-py>=1.1.0,<=2.4.2", + "rich[jupyter]>=13.0.0", ], extras_require={ "dev": [