From 12e89908d0f1f882788436a6663b0c48cf60251b Mon Sep 17 00:00:00 2001 From: Laurens Vijnck Date: Tue, 20 Aug 2024 13:34:48 +0200 Subject: [PATCH 1/3] add sheets dataset --- .../sheets/sheets_dataset.py | 162 ++++++++++++++++++ kedro-datasets/pyproject.toml | 1 + 2 files changed, 163 insertions(+) create mode 100644 kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py diff --git a/kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py b/kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py new file mode 100644 index 000000000..e3890cafe --- /dev/null +++ b/kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py @@ -0,0 +1,162 @@ +"""Google sheets dataset loads and saves data to a Google Sheet. +""" +import pandas as pd + +from typing import Any, Optional +from copy import deepcopy + +from kedro.io.core import Version +from kedro.io.core import ( + PROTOCOL_DELIMITER, + AbstractVersionedDataset, + DatasetError, + Version +) + +import pygsheets +from pygsheets import Worksheet, Spreadsheet + + +class GoogleSheetsDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): + """Dataset to load data from Google sheets. + + Reading and writing of specific columns is suported to enable iterative engineering/translator interaction. Currently, + authentication is done through a GCP service account, added to a GCP project with the sheets and drive APIs enabled. The + email of the service account should be added as an editor to the Sheet. + + Example usage for the + `YAML API `_: + + + .. code-block:: yaml + + sheet: + type: sheets.GoogleSheetsDataset + key: + service_file: conf/local/service-account.json + + save_args: + sheet_name: data + write_columns: ["list", "of", "cols", "to", "write", "here"] + + load_args: + sheet_name: data + columns: ["list", "of", "cols", "to", "read", "here"] + """ + + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} + + def __init__( # noqa: PLR0913 + self, + *, + key: str, + service_file: str, + load_args: dict[str, Any] | None = None, + save_args: dict[str, Any] | None = None, + version: Version | None = None, + credentials: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + """Creates a new instance of ``GoogleSheetsDataset``. + + Args: + key: Google sheets key + service_file: path to service accunt file. + load_args: Arguments to pass to the load method. + save_args: Arguments to pass to the save + version: Version of the dataset. + credentials: Credentials to connect to the Neo4J instance. + metadata: Metadata to pass to neo4j connector. + kwargs: Keyword Args passed to parent. + """ + self._key = key + self._service_file = service_file + self._sheet = None + + super().__init__( + filepath=None, + version=version, + exists_function=self._exists, + glob_function=None, + ) + + # Handle default load and save arguments + self._load_args = deepcopy(self.DEFAULT_LOAD_ARGS) + if load_args is not None: + self._load_args.update(load_args) + self._save_args = deepcopy(self.DEFAULT_SAVE_ARGS) + if save_args is not None: + self._save_args.update(save_args) + + def _init_sheet(self): + """Function to initialize the spreadsheet. + + This is executed lazily to avoid loading credentials on python runtime launch which creates issues + in unit tests. + """ + if self._sheet is None: + gc = pygsheets.authorize(service_file=self._service_file) + self._sheet = gc.open_by_key(self._key) + + def _load(self) -> pd.DataFrame: + self._init_sheet() + + sheet_name = self._load_args["sheet_name"] + wks = self._get_wks_by_name(self._sheet, sheet_name) + if wks is None: + raise DatasetError(f"Sheet with name {sheet_name} not found!") + + df = wks.get_as_df() + if (cols := self._load_args.get("columns", None)) is not None: + df = df[cols] + + return df + + def _save(self, data: pd.DataFrame) -> None: + self._init_sheet() + + sheet_name = self._save_args["sheet_name"] + wks = self._get_wks_by_name(self._sheet, sheet_name) + + # Create the worksheet if not exists + if wks is None: + wks = self._sheet.add_worksheet(sheet_name) + + # Write columns + for column in self._save_args["write_columns"]: + col_idx = self._get_col_index(wks, column) + + if col_idx is None: + raise DatasetError( + f"Sheet with {sheet_name} does not contain column {column}!" + ) + + wks.set_dataframe(data[[column]], (1, col_idx + 1)) + + @staticmethod + def _get_wks_by_name( + spreadsheet: Spreadsheet, sheet_name: str + ) -> Optional[Worksheet]: + for wks in spreadsheet.worksheets(): + if wks.title == sheet_name: + return wks + + return None + + @staticmethod + def _get_col_index(sheet: Worksheet, col_name: str) -> Optional[int]: + for idx, col in enumerate(sheet.get_row(1)): + if col == col_name: + return idx + + return None + + def _describe(self) -> dict[str, Any]: + return { + "key": self._key, + } + + def _exists(self) -> bool: + return False \ No newline at end of file diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 270a2673c..3b9a9b098 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -276,6 +276,7 @@ experimental = [ "netcdf4>=1.6.4", "xarray>=2023.1.0", "rioxarray", + "pygsheets" ] # All requirements From 22c7a0107e90145efd73eb0e69933ef84a782d81 Mon Sep 17 00:00:00 2001 From: Laurens Vijnck Date: Tue, 20 Aug 2024 13:36:50 +0200 Subject: [PATCH 2/3] add release note --- kedro-datasets/RELEASE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index a90176166..7cc224dee 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -1,6 +1,8 @@ # Upcoming Release ## Major features and improvements +* Added experiment `GoogleSheetsDataset` to read/write data to Google Sheet + ## Bug fixes and other changes ## Breaking Changes ## Community contributions From 06a543912dff052d74380fcb5e2045515d807f15 Mon Sep 17 00:00:00 2001 From: Laurens Vijnck Date: Tue, 20 Aug 2024 14:28:54 +0200 Subject: [PATCH 3/3] add note --- kedro-datasets/kedro_datasets/api/api_dataset.py | 1 + .../kedro_datasets/biosequence/biosequence_dataset.py | 1 + kedro-datasets/kedro_datasets/dask/csv_dataset.py | 1 + kedro-datasets/kedro_datasets/dask/parquet_dataset.py | 1 + .../kedro_datasets/databricks/managed_table_dataset.py | 1 + kedro-datasets/kedro_datasets/email/message_dataset.py | 1 + .../kedro_datasets/geopandas/geojson_dataset.py | 1 + .../kedro_datasets/holoviews/holoviews_writer.py | 1 + kedro-datasets/kedro_datasets/ibis/__init__.py | 1 + kedro-datasets/kedro_datasets/ibis/table_dataset.py | 9 ++++++--- kedro-datasets/kedro_datasets/json/json_dataset.py | 1 + kedro-datasets/kedro_datasets/matlab/matlab_dataset.py | 1 + .../kedro_datasets/matplotlib/matplotlib_writer.py | 1 + kedro-datasets/kedro_datasets/networkx/gml_dataset.py | 1 + .../kedro_datasets/networkx/graphml_dataset.py | 1 + kedro-datasets/kedro_datasets/networkx/json_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/csv_dataset.py | 1 + .../kedro_datasets/pandas/deltatable_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/excel_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/feather_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/generic_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/hdf_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/json_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/parquet_dataset.py | 1 + kedro-datasets/kedro_datasets/pandas/xml_dataset.py | 1 + kedro-datasets/kedro_datasets/pickle/pickle_dataset.py | 1 + kedro-datasets/kedro_datasets/pillow/image_dataset.py | 1 + kedro-datasets/kedro_datasets/plotly/json_dataset.py | 1 + kedro-datasets/kedro_datasets/plotly/plotly_dataset.py | 1 + kedro-datasets/kedro_datasets/polars/csv_dataset.py | 1 + .../kedro_datasets/polars/eager_polars_dataset.py | 1 + .../kedro_datasets/polars/lazy_polars_dataset.py | 1 + kedro-datasets/kedro_datasets/redis/redis_dataset.py | 1 + .../kedro_datasets/snowflake/snowpark_dataset.py | 1 + .../kedro_datasets/spark/deltatable_dataset.py | 1 + .../kedro_datasets/spark/spark_hive_dataset.py | 1 + .../kedro_datasets/spark/spark_jdbc_dataset.py | 1 + .../kedro_datasets/spark/spark_streaming_dataset.py | 1 + .../kedro_datasets/svmlight/svmlight_dataset.py | 1 + .../tensorflow/tensorflow_model_dataset.py | 1 + kedro-datasets/kedro_datasets/text/text_dataset.py | 1 + kedro-datasets/kedro_datasets/video/video_dataset.py | 1 + .../kedro_datasets_experimental/sheets/sheets_dataset.py | 5 +++-- kedro-telemetry/kedro_telemetry/masking.py | 1 + 44 files changed, 51 insertions(+), 5 deletions(-) diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py index 0c575c28a..9d7c37643 100644 --- a/kedro-datasets/kedro_datasets/api/api_dataset.py +++ b/kedro-datasets/kedro_datasets/api/api_dataset.py @@ -1,6 +1,7 @@ """``APIDataset`` loads the data from HTTP(S) APIs. It uses the python requests library: https://requests.readthedocs.io/en/latest/ """ + from __future__ import annotations import json as json_ # make pylint happy diff --git a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py index bba06f185..97d093ecf 100644 --- a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py +++ b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py @@ -1,6 +1,7 @@ """BioSequenceDataset loads and saves data to/from bio-sequence objects to file. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/dask/csv_dataset.py b/kedro-datasets/kedro_datasets/dask/csv_dataset.py index 0e02f6ade..d4ee24d97 100644 --- a/kedro-datasets/kedro_datasets/dask/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/csv_dataset.py @@ -1,5 +1,6 @@ """``CSVDataset`` is a data set used to load and save data to CSV files using Dask dataframe""" + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py index c75d067aa..2895c1dee 100644 --- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py @@ -1,5 +1,6 @@ """``ParquetDataset`` is a data set used to load and save data to parquet files using Dask dataframe""" + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py index 677db0d56..adad8f07d 100644 --- a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py @@ -1,6 +1,7 @@ """``ManagedTableDataset`` implementation to access managed delta tables in Databricks. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/email/message_dataset.py b/kedro-datasets/kedro_datasets/email/message_dataset.py index df60a3c2a..d3229edd3 100644 --- a/kedro-datasets/kedro_datasets/email/message_dataset.py +++ b/kedro-datasets/kedro_datasets/email/message_dataset.py @@ -2,6 +2,7 @@ using an underlying filesystem (e.g.: local, S3, GCS). It uses the ``email`` package in the standard library to manage email messages. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py index 42be606e3..d2c6ca37d 100644 --- a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py +++ b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py @@ -2,6 +2,7 @@ underlying functionality is supported by geopandas, so it supports all allowed geopandas (pandas) options for loading and saving geosjon files. """ + from __future__ import annotations import copy diff --git a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py index 97e6446a9..c652ca8ea 100644 --- a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py +++ b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py @@ -1,5 +1,6 @@ """``HoloviewsWriter`` saves Holoviews objects as image file(s) to an underlying filesystem (e.g. local, S3, GCS).""" + from __future__ import annotations import io diff --git a/kedro-datasets/kedro_datasets/ibis/__init__.py b/kedro-datasets/kedro_datasets/ibis/__init__.py index 7e793c4e0..2b3276493 100644 --- a/kedro-datasets/kedro_datasets/ibis/__init__.py +++ b/kedro-datasets/kedro_datasets/ibis/__init__.py @@ -1,4 +1,5 @@ """Provide data loading and saving functionality for Ibis's backends.""" + from typing import Any import lazy_loader as lazy diff --git a/kedro-datasets/kedro_datasets/ibis/table_dataset.py b/kedro-datasets/kedro_datasets/ibis/table_dataset.py index 9839876c3..0cef90821 100644 --- a/kedro-datasets/kedro_datasets/ibis/table_dataset.py +++ b/kedro-datasets/kedro_datasets/ibis/table_dataset.py @@ -1,4 +1,5 @@ """Provide data loading and saving functionality for Ibis's backends.""" + from __future__ import annotations from copy import deepcopy @@ -185,9 +186,11 @@ def _describe(self) -> dict[str, Any]: "filepath": self._filepath, "file_format": self._file_format, "table_name": self._table_name, - "backend": self._connection_config.get("backend") - if self._connection_config - else None, + "backend": ( + self._connection_config.get("backend") + if self._connection_config + else None + ), "load_args": self._load_args, "save_args": self._save_args, "materialized": self._materialized, diff --git a/kedro-datasets/kedro_datasets/json/json_dataset.py b/kedro-datasets/kedro_datasets/json/json_dataset.py index cc882e75f..9e2ffe639 100644 --- a/kedro-datasets/kedro_datasets/json/json_dataset.py +++ b/kedro-datasets/kedro_datasets/json/json_dataset.py @@ -1,6 +1,7 @@ """``JSONDataset`` loads/saves data from/to a JSON file using an underlying filesystem (e.g.: local, S3, GCS). It uses native json to handle the JSON file. """ + from __future__ import annotations import json diff --git a/kedro-datasets/kedro_datasets/matlab/matlab_dataset.py b/kedro-datasets/kedro_datasets/matlab/matlab_dataset.py index a74d74209..9c9317182 100644 --- a/kedro-datasets/kedro_datasets/matlab/matlab_dataset.py +++ b/kedro-datasets/kedro_datasets/matlab/matlab_dataset.py @@ -3,6 +3,7 @@ the specified backend library passed in (defaults to the ``matlab`` library), so it supports all allowed options for loading and saving matlab files. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py index 377c4dfbd..789de9287 100644 --- a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py +++ b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py @@ -1,5 +1,6 @@ """``MatplotlibWriter`` saves one or more Matplotlib objects as image files to an underlying filesystem (e.g. local, S3, GCS).""" + from __future__ import annotations import base64 diff --git a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py index dec121bee..967da14df 100644 --- a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py @@ -2,6 +2,7 @@ file using an underlying filesystem (e.g.: local, S3, GCS). NetworkX is used to create GML data. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py index f69113533..73983afae 100644 --- a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py @@ -1,6 +1,7 @@ """NetworkX ``GraphMLDataset`` loads and saves graphs to a GraphML file using an underlying filesystem (e.g.: local, S3, GCS). NetworkX is used to create GraphML data. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/networkx/json_dataset.py b/kedro-datasets/kedro_datasets/networkx/json_dataset.py index 1bef8dc3d..f3230a950 100644 --- a/kedro-datasets/kedro_datasets/networkx/json_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/json_dataset.py @@ -1,6 +1,7 @@ """``JSONDataset`` loads and saves graphs to a JSON file using an underlying filesystem (e.g.: local, S3, GCS). NetworkX is used to create JSON data. """ + from __future__ import annotations import json diff --git a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py index 2c43e13c6..a1c5dfc8b 100644 --- a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py @@ -1,6 +1,7 @@ """``CSVDataset`` loads/saves data from/to a CSV file using an underlying filesystem (e.g.: local, S3, GCS). It uses pandas to handle the CSV file. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py b/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py index fcc680d16..f5a01f816 100644 --- a/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py @@ -2,6 +2,7 @@ S3, GCS), Databricks unity catalog and AWS Glue catalog respectively. It handles load and save using a pandas dataframe. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py index 601ef377e..67717488a 100644 --- a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py @@ -1,6 +1,7 @@ """``ExcelDataset`` loads/saves data from/to a Excel file using an underlying filesystem (e.g.: local, S3, GCS). It uses pandas to handle the Excel file. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py index eb1f115f0..2e4e22239 100644 --- a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py @@ -2,6 +2,7 @@ using an underlying filesystem (e.g.: local, S3, GCS). The underlying functionality is supported by pandas, so it supports all operations the pandas supports. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py index 4a4ec2726..22601f91a 100644 --- a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py @@ -2,6 +2,7 @@ filesystem (e.g.: local, S3, GCS). It uses pandas to handle the type of read/write target. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py index 227b26133..3c92c3c7e 100644 --- a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py @@ -1,6 +1,7 @@ """``HDFDataset`` loads/saves data from/to a hdf file using an underlying filesystem (e.g.: local, S3, GCS). It uses pandas.HDFStore to handle the hdf file. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/pandas/json_dataset.py b/kedro-datasets/kedro_datasets/pandas/json_dataset.py index 578c494ce..3d8734706 100644 --- a/kedro-datasets/kedro_datasets/pandas/json_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/json_dataset.py @@ -1,6 +1,7 @@ """``JSONDataset`` loads/saves data from/to a JSON file using an underlying filesystem (e.g.: local, S3, GCS). It uses pandas to handle the JSON file. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py index 760d5a8f3..3442b16bb 100644 --- a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py @@ -1,6 +1,7 @@ """``ParquetDataset`` loads/saves data from/to a Parquet file using an underlying filesystem (e.g.: local, S3, GCS). It uses pandas to handle the Parquet file. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py index b1173f43e..a2eadcfcc 100644 --- a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py @@ -1,6 +1,7 @@ """``XMLDataset`` loads/saves data from/to a XML file using an underlying filesystem (e.g.: local, S3, GCS). It uses pandas to handle the XML file. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py index 3ef071e6c..12f04c742 100644 --- a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py +++ b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py @@ -3,6 +3,7 @@ the specified backend library passed in (defaults to the ``pickle`` library), so it supports all allowed options for loading and saving pickle files. """ + from __future__ import annotations import importlib diff --git a/kedro-datasets/kedro_datasets/pillow/image_dataset.py b/kedro-datasets/kedro_datasets/pillow/image_dataset.py index d3e2b838d..690e578eb 100644 --- a/kedro-datasets/kedro_datasets/pillow/image_dataset.py +++ b/kedro-datasets/kedro_datasets/pillow/image_dataset.py @@ -1,6 +1,7 @@ """``ImageDataset`` loads/saves image data as `numpy` from an underlying filesystem (e.g.: local, S3, GCS). It uses Pillow to handle image file. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/plotly/json_dataset.py b/kedro-datasets/kedro_datasets/plotly/json_dataset.py index 4e5182f69..2d5f61aca 100644 --- a/kedro-datasets/kedro_datasets/plotly/json_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/json_dataset.py @@ -1,6 +1,7 @@ """``JSONDataset`` loads/saves a plotly figure from/to a JSON file using an underlying filesystem (e.g.: local, S3, GCS). """ + from __future__ import annotations import json diff --git a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py index 68b64fd71..332068405 100644 --- a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py @@ -2,6 +2,7 @@ file using an underlying filesystem (e.g.: local, S3, GCS). It loads the JSON into a plotly figure. """ + from __future__ import annotations import json diff --git a/kedro-datasets/kedro_datasets/polars/csv_dataset.py b/kedro-datasets/kedro_datasets/polars/csv_dataset.py index 1195ce295..3cb007d51 100644 --- a/kedro-datasets/kedro_datasets/polars/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/csv_dataset.py @@ -1,6 +1,7 @@ """``CSVDataset`` loads/saves data from/to a CSV file using an underlying filesystem (e.g.: local, S3, GCS). It uses polars to handle the CSV file. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py index bfb9d8528..993500be8 100644 --- a/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py @@ -2,6 +2,7 @@ filesystem (e.g.: local, S3, GCS). It uses polars to handle the type of read/write target. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py index 2e650e52e..e4056327e 100644 --- a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py @@ -2,6 +2,7 @@ filesystem (e.g.: local, S3, GCS). It uses polars to handle the type of read/write target. """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/redis/redis_dataset.py b/kedro-datasets/kedro_datasets/redis/redis_dataset.py index 4afc49a21..e42cbb5d2 100644 --- a/kedro-datasets/kedro_datasets/redis/redis_dataset.py +++ b/kedro-datasets/kedro_datasets/redis/redis_dataset.py @@ -1,6 +1,7 @@ """``PickleDataset`` loads/saves data from/to a Redis database. The underlying functionality is supported by the redis library, so it supports all allowed options for instantiating the redis app ``from_url`` and setting a value.""" + from __future__ import annotations import importlib diff --git a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py index 249f5d001..ec7ac9bdc 100644 --- a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py +++ b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py @@ -1,5 +1,6 @@ """``AbstractDataset`` implementation to access Snowflake using Snowpark dataframes """ + from __future__ import annotations import logging diff --git a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py index b776b74bd..6135601a9 100644 --- a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py @@ -1,6 +1,7 @@ """``AbstractDataset`` implementation to access DeltaTables using ``delta-spark``. """ + from __future__ import annotations from pathlib import PurePosixPath diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py index fd4e8b59c..e1bcb5c3e 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py @@ -1,6 +1,7 @@ """``AbstractDataset`` implementation to access Spark dataframes using ``pyspark`` on Apache Hive. """ + from __future__ import annotations import pickle diff --git a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py index 60e1443c0..5c710ffac 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py @@ -1,4 +1,5 @@ """SparkJDBCDataset to load and save a PySpark DataFrame via JDBC.""" + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py index f30770852..e03918705 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py @@ -1,4 +1,5 @@ """SparkStreamingDataset to load and save a PySpark Streaming DataFrame.""" + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py index 9d6818eaf..44073003c 100644 --- a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py +++ b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py @@ -2,6 +2,7 @@ underlying filesystem (e.g.: local, S3, GCS). It uses sklearn functions ``dump_svmlight_file`` to save and ``load_svmlight_file`` to load a file. """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py index 5a9bbc5a8..08efbc4b8 100644 --- a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py +++ b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py @@ -1,6 +1,7 @@ """``TensorFlowModelDataset`` is a dataset implementation which can save and load TensorFlow models. """ + from __future__ import annotations import copy diff --git a/kedro-datasets/kedro_datasets/text/text_dataset.py b/kedro-datasets/kedro_datasets/text/text_dataset.py index 0432d066f..a001c4d1a 100644 --- a/kedro-datasets/kedro_datasets/text/text_dataset.py +++ b/kedro-datasets/kedro_datasets/text/text_dataset.py @@ -1,6 +1,7 @@ """``TextDataset`` loads/saves data from/to a text file using an underlying filesystem (e.g.: local, S3, GCS). """ + from __future__ import annotations from copy import deepcopy diff --git a/kedro-datasets/kedro_datasets/video/video_dataset.py b/kedro-datasets/kedro_datasets/video/video_dataset.py index 00d53d11d..08dd73c59 100644 --- a/kedro-datasets/kedro_datasets/video/video_dataset.py +++ b/kedro-datasets/kedro_datasets/video/video_dataset.py @@ -2,6 +2,7 @@ filesystem (e.g.: local, S3, GCS). It uses OpenCV VideoCapture to read and decode videos and OpenCV VideoWriter to encode and write video. """ + from __future__ import annotations import itertools diff --git a/kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py b/kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py index e3890cafe..3ddd18383 100644 --- a/kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py +++ b/kedro-datasets/kedro_datasets_experimental/sheets/sheets_dataset.py @@ -23,7 +23,7 @@ class GoogleSheetsDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): Reading and writing of specific columns is suported to enable iterative engineering/translator interaction. Currently, authentication is done through a GCP service account, added to a GCP project with the sheets and drive APIs enabled. The email of the service account should be added as an editor to the Sheet. - + Example usage for the `YAML API `_: @@ -36,6 +36,7 @@ class GoogleSheetsDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): key: service_file: conf/local/service-account.json + # NOTE: Columns being written should exist in the sheet. save_args: sheet_name: data write_columns: ["list", "of", "cols", "to", "write", "here"] @@ -159,4 +160,4 @@ def _describe(self) -> dict[str, Any]: } def _exists(self) -> bool: - return False \ No newline at end of file + return False diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index ea432f455..da7803f22 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -1,4 +1,5 @@ """Module containing command masking functionality.""" + from __future__ import annotations from typing import Any, Iterator