From 87ab43e4af75bb6b5e483e06b206b286fb1f74dd Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 8 Aug 2024 15:10:50 -0700 Subject: [PATCH 1/8] a --- .../snowpark/modin/pandas/__init__.py | 1 + src/snowflake/snowpark/modin/pandas/base.py | 81 +---- .../snowpark/modin/pandas/dataframe.py | 32 +- .../modin/plugin/extensions/base_overrides.py | 300 ++++++++++++++++++ .../plugin/extensions/dataframe_overrides.py | 30 +- .../plugin/extensions/series_overrides.py | 8 +- 6 files changed, 346 insertions(+), 106 deletions(-) create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py diff --git a/src/snowflake/snowpark/modin/pandas/__init__.py b/src/snowflake/snowpark/modin/pandas/__init__.py index dcf5db871a2..975289684cf 100644 --- a/src/snowflake/snowpark/modin/pandas/__init__.py +++ b/src/snowflake/snowpark/modin/pandas/__init__.py @@ -157,6 +157,7 @@ Index, DatetimeIndex, ) +import snowflake.snowpark.modin.plugin.extensions.base_overrides # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.dataframe_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.dataframe_overrides # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.series_extensions # isort: skip # noqa: E402,F401 diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py index f7e9da9f89a..943fe43ebbf 100644 --- a/src/snowflake/snowpark/modin/pandas/base.py +++ b/src/snowflake/snowpark/modin/pandas/base.py @@ -84,10 +84,7 @@ ) from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta from snowflake.snowpark.modin.plugin._typing import ListLike -from snowflake.snowpark.modin.plugin.utils.error_message import ( - ErrorMessage, - base_not_implemented, -) +from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage from snowflake.snowpark.modin.utils import ( _inherit_docstrings, @@ -882,7 +879,6 @@ def _get_dtypes(self): else: return list(self.dtypes) - @base_not_implemented() def align( self, other, @@ -1074,7 +1070,6 @@ def asfreq( ) ) - @base_not_implemented() def asof(self, where, subset=None): # noqa: PR01, RT01, D200 """ Return the last row(s) without any NaNs before `where`. @@ -1159,7 +1154,6 @@ def at(self, axis=None): # noqa: PR01, RT01, D200 return _AtIndexer(self) - @base_not_implemented() def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 """ Select values at particular time of day (e.g., 9:30AM). @@ -1190,7 +1184,6 @@ def backfill( method="bfill", axis=axis, limit=limit, downcast=downcast, inplace=inplace ) - @base_not_implemented() @_inherit_docstrings( pandas.DataFrame.between_time, apilink="pandas.DataFrame.between_time" ) @@ -1230,7 +1223,6 @@ def bfill( method="bfill", axis=axis, limit=limit, downcast=downcast, inplace=inplace ) - @base_not_implemented() def bool(self): # noqa: RT01, D200 """ Return the bool of a single element `BasePandasDataset`. @@ -1248,7 +1240,6 @@ def bool(self): # noqa: RT01, D200 else: return self._to_pandas().bool() - @base_not_implemented() def clip( self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs ): # noqa: PR01, RT01, D200 @@ -1279,7 +1270,6 @@ def clip( ) return self._create_or_update_from_compiler(new_query_compiler, inplace) - @base_not_implemented() def combine(self, other, func, fill_value=None, **kwargs): # noqa: PR01, RT01, D200 """ Perform combination of `BasePandasDataset`-s according to `func`. @@ -1289,7 +1279,6 @@ def combine(self, other, func, fill_value=None, **kwargs): # noqa: PR01, RT01, "combine", other, axis=0, func=func, fill_value=fill_value, **kwargs ) - @base_not_implemented() def combine_first(self, other): # noqa: PR01, RT01, D200 """ Update null elements with value in the same location in `other`. @@ -1545,7 +1534,6 @@ def _dropna( ) return self._create_or_update_from_compiler(new_query_compiler, inplace) - @base_not_implemented() def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 """ Return `BasePandasDataset` with requested index / column level(s) removed. @@ -1588,15 +1576,6 @@ def drop_duplicates( else: return result - @base_not_implemented() - def map(self, func, na_action: str | None = None, **kwargs): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if not callable(func): - raise ValueError(f"'{type(func)}' object is not callable") - return self.__constructor__( - query_compiler=self._query_compiler.map(func, na_action=na_action, **kwargs) - ) - def mask( self, cond: BasePandasDataset | Callable | AnyArrayLike, @@ -1722,7 +1701,6 @@ def eq(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset return self._binary_op("eq", other, axis=axis, level=level, dtypes=np.bool_) - @base_not_implemented() def explode(self, column, ignore_index: bool = False): # noqa: PR01, RT01, D200 """ Transform each element of a list-like to a row. @@ -1735,7 +1713,6 @@ def explode(self, column, ignore_index: bool = False): # noqa: PR01, RT01, D200 exploded = exploded.reset_index(drop=True) return exploded - @base_not_implemented() def ewm( self, com: float | None = None, @@ -1906,7 +1883,6 @@ def fillna( ) return self._create_or_update_from_compiler(new_query_compiler, inplace) - @base_not_implemented() def filter( self, items=None, like=None, regex=None, axis=None ): # noqa: PR01, RT01, D200 @@ -2064,7 +2040,6 @@ def idxmin(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, ) ) - @base_not_implemented() def infer_objects( self, copy: bool | None = None ) -> BasePandasDataset: # pragma: no cover # noqa: RT01, D200 @@ -2145,7 +2120,6 @@ def iloc(self): return _iLocIndexer(self) - @base_not_implemented() def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs): # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset validate_bool_kwarg(skipna, "skipna", none_allowed=False) @@ -2339,7 +2313,6 @@ def mod( "mod", other, axis=axis, level=level, fill_value=fill_value ) - @base_not_implemented() def mode(self, axis=0, numeric_only=False, dropna=True): # noqa: PR01, RT01, D200 """ Get the mode(s) of each element along the selected axis. @@ -2466,7 +2439,6 @@ def pct_change( ) ) - @base_not_implemented() def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 """ Apply chainable functions that expect `BasePandasDataset`. @@ -2474,7 +2446,6 @@ def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset return pipe(self, func, *args, **kwargs) - @base_not_implemented() def pop(self, item): # noqa: PR01, RT01, D200 """ Return item and drop from frame. Raise KeyError if not found. @@ -2659,7 +2630,6 @@ def reindex( final_query_compiler, inplace=False if copy is None else not copy ) - @base_not_implemented() def reindex_like( self, other, method=None, copy=True, limit=None, tolerance=None ): # noqa: PR01, RT01, D200 @@ -2745,7 +2715,6 @@ def f(x): if not inplace: return result - @base_not_implemented() def reorder_levels(self, order, axis=0): # noqa: PR01, RT01, D200 """ Rearrange index levels using input order. @@ -3035,7 +3004,6 @@ def sample( ) return self.__constructor__(query_compiler=query_compiler) - @base_not_implemented() def sem( self, axis: Axis | None = None, @@ -3088,7 +3056,6 @@ def median( **kwargs, ) - @base_not_implemented() def set_flags( self, *, copy: bool = False, allows_duplicate_labels: bool | None = None ): # noqa: PR01, RT01, D200 @@ -3313,7 +3280,6 @@ def sub( subtract = sub - @base_not_implemented() def swapaxes(self, axis1, axis2, copy=True): # noqa: PR01, RT01, D200 """ Interchange axes and swap values axes appropriately. @@ -3327,7 +3293,6 @@ def swapaxes(self, axis1, axis2, copy=True): # noqa: PR01, RT01, D200 return self.copy() return self - @base_not_implemented() def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 """ Swap levels `i` and `j` in a `MultiIndex`. @@ -3355,7 +3320,6 @@ def take( slice_obj = indices if axis == 0 else (slice(None), indices) return self.iloc[slice_obj] - @base_not_implemented() def to_clipboard( self, excel=True, sep=None, **kwargs ): # pragma: no cover # noqa: PR01, RT01, D200 @@ -3418,7 +3382,6 @@ def to_csv( storage_options=storage_options, ) - @base_not_implemented() def to_excel( self, excel_writer, @@ -3462,7 +3425,6 @@ def to_excel( storage_options=storage_options, ) - @base_not_implemented() def to_hdf( self, path_or_buf, key, format="table", **kwargs ): # pragma: no cover # noqa: PR01, RT01, D200 @@ -3474,7 +3436,6 @@ def to_hdf( "to_hdf", path_or_buf, key, format=format, **kwargs ) - @base_not_implemented() def to_json( self, path_or_buf=None, @@ -3510,7 +3471,6 @@ def to_json( storage_options=storage_options, ) - @base_not_implemented() def to_latex( self, buf=None, @@ -3566,7 +3526,6 @@ def to_latex( position=position, ) - @base_not_implemented() def to_markdown( self, buf=None, @@ -3588,7 +3547,6 @@ def to_markdown( **kwargs, ) - @base_not_implemented() def to_pickle( self, path, @@ -3643,7 +3601,6 @@ def to_period( # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset return self._default_to_pandas("to_period", freq=freq, axis=axis, copy=copy) - @base_not_implemented() def to_string( self, buf=None, @@ -3692,7 +3649,6 @@ def to_string( encoding=encoding, ) - @base_not_implemented() def to_sql( self, name, @@ -3736,7 +3692,6 @@ def to_sql( ) # TODO(williamma12): When this gets implemented, have the series one call this. - @base_not_implemented() def to_timestamp( self, freq=None, how="start", axis=0, copy=True ): # noqa: PR01, RT01, D200 @@ -3748,7 +3703,6 @@ def to_timestamp( "to_timestamp", freq=freq, how=how, axis=axis, copy=copy ) - @base_not_implemented() def to_xarray(self): # noqa: PR01, RT01, D200 """ Return an xarray object from the `BasePandasDataset`. @@ -3769,7 +3723,6 @@ def truediv( div = divide = truediv - @base_not_implemented() def truncate( self, before=None, after=None, axis=None, copy=True ): # noqa: PR01, RT01, D200 @@ -3787,7 +3740,6 @@ def truncate( slice_obj = s if axis == 0 else (slice(None), s) return self.iloc[slice_obj] - @base_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 """ Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self. @@ -3807,7 +3759,6 @@ def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 raise ValueError("transforms cannot produce aggregated results") return result - @base_not_implemented() def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D200 """ Convert tz-aware axis to target time zone. @@ -3823,7 +3774,6 @@ def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D2 obj = self.copy() if copy else self return obj.set_axis(new_labels, axis, copy=copy) - @base_not_implemented() def tz_localize( self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise" ): # noqa: PR01, RT01, D200 @@ -3910,33 +3860,6 @@ def __array__(self, dtype=None): arr = self.to_numpy(dtype) return arr - @base_not_implemented() - def __array_wrap__(self, result, context=None): - """ - Get called after a ufunc and other functions. - - Parameters - ---------- - result : np.ndarray - The result of the ufunc or other function called on the NumPy array - returned by __array__. - context : tuple of (func, tuple, int), optional - This parameter is returned by ufuncs as a 3-element tuple: (name of the - ufunc, arguments of the ufunc, domain of the ufunc), but is not set by - other NumPy functions. - - Returns - ------- - BasePandasDataset - Wrapped Modin object. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - - # TODO: This is very inefficient. __array__ and as_matrix have been - # changed to call the more efficient to_numpy, but this has been left - # unchanged since we are not sure of its purpose. - return self._default_to_pandas("__array_wrap__", result, context=context) - def __copy__(self, deep=True): """ Return the copy of the `BasePandasDataset`. @@ -3973,7 +3896,6 @@ def __eq__(self, other): # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset return self.eq(other) - @base_not_implemented() def __finalize__(self, other, method=None, **kwargs): """ Propagate metadata from `other` to `self`. @@ -4137,7 +4059,6 @@ def __or__(self, other): def __ror__(self, other): return self._binary_op("__ror__", other, axis=0) - @base_not_implemented() def __sizeof__(self): """ Generate the total memory usage for an `BasePandasDataset`. diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py index a6850941faf..fddd36da1b0 100644 --- a/src/snowflake/snowpark/modin/pandas/dataframe.py +++ b/src/snowflake/snowpark/modin/pandas/dataframe.py @@ -91,9 +91,6 @@ replace_external_data_keys_with_empty_pandas_series, replace_external_data_keys_with_query_compiler, ) -from snowflake.snowpark.modin.plugin._internal.aggregation_utils import ( - is_snowflake_agg_func, -) from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike from snowflake.snowpark.modin.plugin.utils.error_message import ( @@ -444,6 +441,14 @@ def add_suffix(self, suffix): ) ) + @dataframe_not_implemented() + def map(self, func, na_action: str | None = None, **kwargs) -> DataFrame: + if not callable(func): + raise ValueError(f"'{type(func)}' object is not callable") + return self.__constructor__( + query_compiler=self._query_compiler.map(func, na_action=na_action, **kwargs) + ) + def applymap(self, func: PythonFuncType, na_action: str | None = None, **kwargs): # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions if not callable(func): @@ -607,27 +612,6 @@ def keys(self): # noqa: RT01, D200 # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions return self.columns - def transform( - self, func: PythonFuncType, axis: Axis = 0, *args: Any, **kwargs: Any - ) -> DataFrame: # noqa: PR01, RT01, D200 - # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions - if is_list_like(func) or is_dict_like(func): - ErrorMessage.not_implemented( - "dict and list parameters are not supported for transform" - ) - # throw the same error as pandas for cases where the function type is - # invalid. - if not isinstance(func, str) and not callable(func): - raise TypeError(f"{type(func)} object is not callable") - - # if the function is an aggregation function, we'll produce - # some bogus results while pandas will throw the error the - # code below is throwing. So we do the same. - if is_snowflake_agg_func(func): - raise ValueError("Function did not transform") - - return self.apply(func, axis, False, args=args, **kwargs) - def transpose(self, copy=False, *args): # noqa: PR01, RT01, D200 """ Transpose index and columns. diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py new file mode 100644 index 00000000000..1e1b81c10cd --- /dev/null +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py @@ -0,0 +1,300 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +""" +Methods defined on BasePandasDataset that are overridden in Snowpark pandas. Adding a method to this file +should be done with discretion, and only when relevant changes cannot be made to the query compiler or +upstream frontend to accommodate Snowpark pandas. +""" +from __future__ import annotations + +import pickle as pkl +from typing import Any + +import numpy as np +import pandas +from modin.pandas.base import BasePandasDataset +from pandas._libs.lib import no_default +from pandas._typing import ( + Axis, + CompressionOptions, + StorageOptions, + TimedeltaConvertibleTypes, +) + +from snowflake.snowpark.modin.pandas.api.extensions import ( + register_dataframe_accessor, + register_series_accessor, +) +from snowflake.snowpark.modin.plugin._internal.telemetry import ( + snowpark_pandas_telemetry_method_decorator, +) +from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented + + +def register_base_not_implemented(): + def decorator(base_method: Any): + func = snowpark_pandas_telemetry_method_decorator( + base_not_implemented()(base_method) + ) + register_series_accessor(base_method.__name__)(func) + register_dataframe_accessor(base_method.__name__)(func) + return func + + return decorator + + +# === UNIMPLEMENTED METHODS === +# The following methods are not implemented in Snowpark pandas, and must be overridden on the +# frontend. These methods fall into a few categories: +# 1. Would work in Snowpark pandas, but we have not tested it. +# 2. Would work in Snowpark pandas, but requires more SQL queries than we are comfortable with. +# 3. Requires materialization (usually via a frontend _default_to_pandas call). +# 4. Performs operations on a native pandas Index object that are nontrivial for Snowpark pandas to manage. + + +@register_base_not_implemented() +def asof(self, where, subset=None): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def bool(self): # noqa: RT01, D200 + pass + + +@register_base_not_implemented() +def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def ewm( + self, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool = True, + ignore_na: bool = False, + axis: Axis = 0, + times: str | np.ndarray | BasePandasDataset | None = None, + method: str = "single", +) -> pandas.core.window.ewm.ExponentialMovingWindow: # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def filter( + self, items=None, like=None, regex=None, axis=None +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def pop(self, item): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def reorder_levels(self, order, axis=0): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def set_flags( + self, *, copy: bool = False, allows_duplicate_labels: bool | None = None +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def swapaxes(self, axis1, axis2, copy=True): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_clipboard( + self, excel=True, sep=None, **kwargs +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_excel( + self, + excel_writer, + sheet_name="Sheet1", + na_rep="", + float_format=None, + columns=None, + header=True, + index=True, + index_label=None, + startrow=0, + startcol=0, + engine=None, + merge_cells=True, + encoding=no_default, + inf_rep="inf", + verbose=no_default, + freeze_panes=None, + storage_options: StorageOptions = None, +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_hdf( + self, path_or_buf, key, format="table", **kwargs +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_json( + self, + path_or_buf=None, + orient=None, + date_format=None, + double_precision=10, + force_ascii=True, + date_unit="ms", + default_handler=None, + lines=False, + compression="infer", + index=True, + indent=None, + storage_options: StorageOptions = None, +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_latex( + self, + buf=None, + columns=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + bold_rows=False, + column_format=None, + longtable=None, + escape=None, + encoding=None, + decimal=".", + multicolumn=None, + multicolumn_format=None, + multirow=None, + caption=None, + label=None, + position=None, +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_markdown( + self, + buf=None, + mode: str = "wt", + index: bool = True, + storage_options: StorageOptions = None, + **kwargs, +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_pickle( + self, + path, + compression: CompressionOptions = "infer", + protocol: int = pkl.HIGHEST_PROTOCOL, + storage_options: StorageOptions = None, +): # pragma: no cover # noqa: PR01, D200 + pass + + +@register_base_not_implemented() +def to_string( + self, + buf=None, + columns=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + justify=None, + max_rows=None, + min_rows=None, + max_cols=None, + show_dimensions=False, + decimal=".", + line_width=None, + max_colwidth=None, + encoding=None, +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_sql( + self, + name, + con, + schema=None, + if_exists="fail", + index=True, + index_label=None, + chunksize=None, + dtype=None, + method=None, +): # noqa: PR01, D200 + pass + + +@register_base_not_implemented() +def to_timestamp( + self, freq=None, how="start", axis=0, copy=True +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_xarray(self): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def truncate( + self, before=None, after=None, axis=None, copy=True +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def __finalize__(self, other, method=None, **kwargs): + pass diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py index e1cf93529af..4ffde752d3d 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py @@ -10,12 +10,18 @@ from typing import Any import pandas as native_pd +from modin.pandas import DataFrame +from pandas._typing import Axis, PythonFuncType +from pandas.core.dtypes.common import is_dict_like, is_list_like -from snowflake.snowpark.modin import pandas as pd # noqa: F401 from snowflake.snowpark.modin.pandas.api.extensions import register_dataframe_accessor +from snowflake.snowpark.modin.plugin._internal.aggregation_utils import ( + is_snowflake_agg_func, +) from snowflake.snowpark.modin.plugin._internal.telemetry import ( snowpark_pandas_telemetry_method_decorator, ) +from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage from snowflake.snowpark.modin.utils import _inherit_docstrings @@ -105,3 +111,25 @@ def plot( "DataFrame.plot materializes data to the local machine for plotting." ) return self._to_pandas().plot + + +def transform( + self, func: PythonFuncType, axis: Axis = 0, *args: Any, **kwargs: Any +) -> DataFrame: # noqa: PR01, RT01, D200 + # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions + if is_list_like(func) or is_dict_like(func): + ErrorMessage.not_implemented( + "dict and list parameters are not supported for transform" + ) + # throw the same error as pandas for cases where the function type is + # invalid. + if not isinstance(func, str) and not callable(func): + raise TypeError(f"{type(func)} object is not callable") + + # if the function is an aggregation function, we'll produce + # some bogus results while pandas will throw the error the + # code below is throwing. So we do the same. + if is_snowflake_agg_func(func): + raise ValueError("Function did not transform") + + return self.apply(func, axis, False, args=args, **kwargs) diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index c6b229d876f..31592501bc3 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -11,13 +11,13 @@ import pandas as native_pd -from snowflake.snowpark.modin import pandas as pd # noqa: F401 from snowflake.snowpark.modin.pandas import Series from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor from snowflake.snowpark.modin.plugin._internal.telemetry import ( snowpark_pandas_telemetry_method_decorator, ) from snowflake.snowpark.modin.plugin._typing import ListLike +from snowflake.snowpark.modin.plugin.utils.error_message import series_not_implemented from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage from snowflake.snowpark.modin.utils import _inherit_docstrings @@ -158,3 +158,9 @@ def plot( "Series.plot materializes data to the local machine for plotting." ) return self._to_pandas().plot + + +@register_series_accessor("transform") +@series_not_implemented() +def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 + pass From ef205ca0dae697075f657490f1c8779b2fbf7eb6 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 8 Aug 2024 15:30:55 -0700 Subject: [PATCH 2/8] fix reindex_like and transform --- src/snowflake/snowpark/modin/pandas/base.py | 127 +++--- .../snowpark/modin/pandas/dataframe.py | 23 + src/snowflake/snowpark/modin/pandas/series.py | 20 + .../compiler/snowflake_query_compiler.py | 6 + .../plugin/extensions/base_not_implemented.py | 414 ++++++++++++++++++ .../modin/plugin/extensions/base_overrides.py | 5 + .../modin/plugin/utils/error_message.py | 2 +- tests/integ/modin/test_unimplemented.py | 30 ++ tests/unit/modin/test_unsupported.py | 32 -- 9 files changed, 572 insertions(+), 87 deletions(-) create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py index 943fe43ebbf..1a5d263a6f8 100644 --- a/src/snowflake/snowpark/modin/pandas/base.py +++ b/src/snowflake/snowpark/modin/pandas/base.py @@ -896,9 +896,47 @@ def align( Align two objects on their axes with the specified join method. """ # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "align", - other, + if ( + method is not lib.no_default + or limit is not lib.no_default + or fill_axis is not lib.no_default + ): + warnings.warn( # noqa: B028 + "The 'method', 'limit', and 'fill_axis' keywords in " + + f"{type(self).__name__}.align are deprecated and will be removed " + + "in a future version. Call fillna directly on the returned objects " + + "instead.", + FutureWarning, + ) + if fill_axis is lib.no_default: + fill_axis = 0 + if method is lib.no_default: + method = None + if limit is lib.no_default: + limit = None + + if broadcast_axis is not lib.no_default: + msg = ( + f"The 'broadcast_axis' keyword in {type(self).__name__}.align is " + + "deprecated and will be removed in a future version." + ) + if broadcast_axis is not None: + if self.ndim == 1 and other.ndim == 2: + msg += ( + " Use left = DataFrame({col: left for col in right.columns}, " + + "index=right.index) before calling `left.align(right)` instead." + ) + elif self.ndim == 2 and other.ndim == 1: + msg += ( + " Use right = DataFrame({col: right for col in left.columns}, " + + "index=left.index) before calling `left.align(right)` instead" + ) + warnings.warn(msg, FutureWarning) # noqa: B028 + else: + broadcast_axis = None + + left, right = self._query_compiler.align( + other._query_compiler, join=join, axis=axis, level=level, @@ -909,6 +947,9 @@ def align( fill_axis=fill_axis, broadcast_axis=broadcast_axis, ) + return self.__constructor__(query_compiler=left), self.__constructor__( + query_compiler=right + ) def all(self, axis=0, bool_only=None, skipna=True, **kwargs): """ @@ -1159,10 +1200,12 @@ def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 Select values at particular time of day (e.g., 9:30AM). """ # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - idx = self.index if axis == 0 else self.columns - indexer = pandas.Series(index=idx).at_time(time, asof=asof).index - return self.loc[indexer] if axis == 0 else self.loc[:, indexer] + if asof: + # pandas raises NotImplementedError for asof=True, so we do, too. + raise NotImplementedError("'asof' argument is not supported") + return self.between_time( + start_time=time, end_time=time, inclusive="both", axis=axis + ) def backfill( self, @@ -1195,18 +1238,14 @@ def between_time( axis=None, ): # noqa: PR01, RT01, D200 # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - idx = self.index if axis == 0 else self.columns - indexer = ( - pandas.Series(index=idx) - .between_time( - start_time, - end_time, + return self._create_or_update_from_compiler( + self._query_compiler.between_time( + start_time=pandas.core.tools.times.to_time(start_time), + end_time=pandas.core.tools.times.to_time(end_time), inclusive=inclusive, + axis=self._get_axis_number(axis), ) - .index ) - return self.loc[indexer] if axis == 0 else self.loc[:, indexer] def bfill( self, @@ -2630,22 +2669,6 @@ def reindex( final_query_compiler, inplace=False if copy is None else not copy ) - def reindex_like( - self, other, method=None, copy=True, limit=None, tolerance=None - ): # noqa: PR01, RT01, D200 - """ - Return an object with matching indices as `other` object. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "reindex_like", - other, - method=method, - copy=copy, - limit=limit, - tolerance=tolerance, - ) - def rename_axis( self, mapper=lib.no_default, @@ -3759,42 +3782,38 @@ def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 raise ValueError("transforms cannot produce aggregated results") return result - def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D200 + def tz_convert(self, tz, axis=0, level=None, copy=None): # noqa: PR01, RT01, D200 """ Convert tz-aware axis to target time zone. """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - if level is not None: - new_labels = ( - pandas.Series(index=self.axes[axis]).tz_convert(tz, level=level).index - ) - else: - new_labels = self.axes[axis].tz_convert(tz) - obj = self.copy() if copy else self - return obj.set_axis(new_labels, axis, copy=copy) + if copy is None: + copy = True + return self._create_or_update_from_compiler( + self._query_compiler.tz_convert( + tz, axis=self._get_axis_number(axis), level=level, copy=copy + ), + inplace=(not copy), + ) def tz_localize( - self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise" + self, tz, axis=0, level=None, copy=None, ambiguous="raise", nonexistent="raise" ): # noqa: PR01, RT01, D200 """ Localize tz-naive index of a `BasePandasDataset` to target time zone. """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - new_labels = ( - pandas.Series(index=self.axes[axis]) - .tz_localize( + if copy is None: + copy = True + return self._create_or_update_from_compiler( + self._query_compiler.tz_localize( tz, - axis=axis, + axis=self._get_axis_number(axis), level=level, - copy=False, + copy=copy, ambiguous=ambiguous, nonexistent=nonexistent, - ) - .index + ), + inplace=(not copy), ) - return self.set_axis(new_labels, axis, copy=copy) def var( self, diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py index fddd36da1b0..0c8547697d7 100644 --- a/src/snowflake/snowpark/modin/pandas/dataframe.py +++ b/src/snowflake/snowpark/modin/pandas/dataframe.py @@ -2088,6 +2088,29 @@ def reindex( tolerance=tolerance, ) + @dataframe_not_implemented() + def reindex_like( + self, + other, + method=None, + copy: bool | None = None, + limit=None, + tolerance=None, + ) -> DataFrame: + # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions + if copy is None: + copy = True + # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).": + # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.DataFrame.reindex_like.html + return self.reindex( + index=other.index, + columns=other.columns, + method=method, + copy=copy, + limit=limit, + tolerance=tolerance, + ) + def replace( self, to_replace=None, diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index e99e9cc89f8..73116c5a43f 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -1745,6 +1745,26 @@ def reindex(self, *args, **kwargs): fill_value=fill_value, ) + @series_not_implemented() + def reindex_like( + self, + other, + method=None, + copy: bool | None = None, + limit=None, + tolerance=None, + ) -> Series: + # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions + # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).": + # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.Series.reindex_like.html + return self.reindex( + index=other.index, + method=method, + copy=copy, + limit=limit, + tolerance=tolerance, + ) + def rename_axis( self, mapper=no_default, diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 108b594faf6..a4bc9fa306d 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -17342,3 +17342,9 @@ def compare( """ return result + + def tz_convert(self, *args: Any, **kwargs: Any) -> None: + ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset") + + def tz_localize(self, *args: Any, **kwargs: Any) -> None: + ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset") diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py b/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py new file mode 100644 index 00000000000..aaf8b86494d --- /dev/null +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py @@ -0,0 +1,414 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +""" +The functions in this file are not implemented in Snowpark pandas. In the future, they +should raise NotImplementedError at the query compiler layer, but doing so requires a longer-term +effort. + +We currently test unsupported APIs under tests/unit/modin/test_unsupported.py, which does not initialize +a session. As such, many frontend methods have additional query compiler API calls that would have to +be mocked before the NotImplementedError can appropriately be raised. +""" +from __future__ import annotations + +import pickle as pkl +from typing import Any + +import numpy as np +import pandas +from modin.pandas.base import BasePandasDataset +from pandas._libs import lib +from pandas._libs.lib import no_default +from pandas._typing import ( + Axis, + CompressionOptions, + StorageOptions, + TimedeltaConvertibleTypes, +) + +from snowflake.snowpark.modin.pandas.api.extensions import ( + register_dataframe_accessor, + register_series_accessor, +) +from snowflake.snowpark.modin.plugin._internal.telemetry import ( + snowpark_pandas_telemetry_method_decorator, +) +from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented + + +def register_base_not_implemented(): + def decorator(base_method: Any): + func = snowpark_pandas_telemetry_method_decorator( + base_not_implemented()(base_method) + ) + register_series_accessor(base_method.__name__)(func) + register_dataframe_accessor(base_method.__name__)(func) + return func + + return decorator + + +@register_base_not_implemented() +def align( + self, + other, + join="outer", + axis=None, + level=None, + copy=None, + fill_value=None, + method=lib.no_default, + limit=lib.no_default, + fill_axis=lib.no_default, + broadcast_axis=lib.no_default, +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def asof(self, where, subset=None): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def between_time( + self: BasePandasDataset, + start_time, + end_time, + inclusive: str | None = None, + axis=None, +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def bool(self): # noqa: RT01, D200 + pass + + +@register_base_not_implemented() +def clip( + self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def combine(self, other, func, fill_value=None, **kwargs): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def combine_first(self, other): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def explode(self, column, ignore_index: bool = False): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def ewm( + self, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, + min_periods: int | None = 0, + adjust: bool = True, + ignore_na: bool = False, + axis: Axis = 0, + times: str | np.ndarray | BasePandasDataset | None = None, + method: str = "single", +) -> pandas.core.window.ewm.ExponentialMovingWindow: # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def filter( + self, items=None, like=None, regex=None, axis=None +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def infer_objects( + self, copy: bool | None = None +) -> BasePandasDataset: # pragma: no cover # noqa: RT01, D200 + pass + + +@register_base_not_implemented() +def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs): + pass + + +@register_base_not_implemented() +def kurtosis(self, axis=no_default, skipna=True, numeric_only=False, **kwargs): + pass + + +@register_base_not_implemented() +def mode(self, axis=0, numeric_only=False, dropna=True): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def pop(self, item): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def reindex_like( + self, other, method=None, copy=True, limit=None, tolerance=None +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def reorder_levels(self, order, axis=0): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def sem( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only=False, + **kwargs, +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def set_flags( + self, *, copy: bool = False, allows_duplicate_labels: bool | None = None +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def swapaxes(self, axis1, axis2, copy=True): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_clipboard( + self, excel=True, sep=None, **kwargs +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_excel( + self, + excel_writer, + sheet_name="Sheet1", + na_rep="", + float_format=None, + columns=None, + header=True, + index=True, + index_label=None, + startrow=0, + startcol=0, + engine=None, + merge_cells=True, + encoding=no_default, + inf_rep="inf", + verbose=no_default, + freeze_panes=None, + storage_options: StorageOptions = None, +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_hdf( + self, path_or_buf, key, format="table", **kwargs +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_json( + self, + path_or_buf=None, + orient=None, + date_format=None, + double_precision=10, + force_ascii=True, + date_unit="ms", + default_handler=None, + lines=False, + compression="infer", + index=True, + indent=None, + storage_options: StorageOptions = None, +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_latex( + self, + buf=None, + columns=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + bold_rows=False, + column_format=None, + longtable=None, + escape=None, + encoding=None, + decimal=".", + multicolumn=None, + multicolumn_format=None, + multirow=None, + caption=None, + label=None, + position=None, +): # pragma: no cover # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_markdown( + self, + buf=None, + mode: str = "wt", + index: bool = True, + storage_options: StorageOptions = None, + **kwargs, +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_pickle( + self, + path, + compression: CompressionOptions = "infer", + protocol: int = pkl.HIGHEST_PROTOCOL, + storage_options: StorageOptions = None, +): # pragma: no cover # noqa: PR01, D200 + pass + + +@register_base_not_implemented() +def to_string( + self, + buf=None, + columns=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + justify=None, + max_rows=None, + min_rows=None, + max_cols=None, + show_dimensions=False, + decimal=".", + line_width=None, + max_colwidth=None, + encoding=None, +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_sql( + self, + name, + con, + schema=None, + if_exists="fail", + index=True, + index_label=None, + chunksize=None, + dtype=None, + method=None, +): # noqa: PR01, D200 + pass + + +@register_base_not_implemented() +def to_timestamp( + self, freq=None, how="start", axis=0, copy=True +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def to_xarray(self): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def truncate( + self, before=None, after=None, axis=None, copy=True +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def tz_localize( + self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise" +): # noqa: PR01, RT01, D200 + pass + + +@register_base_not_implemented() +def __array_wrap__(self, result, context=None): + pass + + +@register_base_not_implemented() +def __finalize__(self, other, method=None, **kwargs): + pass + + +@register_base_not_implemented() +def __sizeof__(self): + pass diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py index 1e1b81c10cd..95be5478309 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py @@ -288,6 +288,11 @@ def to_xarray(self): # noqa: PR01, RT01, D200 pass +@register_base_not_implemented() +def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 + pass + + @register_base_not_implemented() def truncate( self, before=None, after=None, axis=None, copy=True diff --git a/src/snowflake/snowpark/modin/plugin/utils/error_message.py b/src/snowflake/snowpark/modin/plugin/utils/error_message.py index 997af701f2b..9a29ca98903 100644 --- a/src/snowflake/snowpark/modin/plugin/utils/error_message.py +++ b/src/snowflake/snowpark/modin/plugin/utils/error_message.py @@ -178,7 +178,7 @@ def method_not_implemented_error( class_: str The class of Snowpark pandas function associated with the method. """ - message = f"{name} is not yet implemented for {class_}" + message = f"Snowpark pandas does not yet support the method {class_}.{name}" ErrorMessage.not_implemented(message) @staticmethod diff --git a/tests/integ/modin/test_unimplemented.py b/tests/integ/modin/test_unimplemented.py index 5e865c418b4..0263c9f0f5f 100644 --- a/tests/integ/modin/test_unimplemented.py +++ b/tests/integ/modin/test_unimplemented.py @@ -43,9 +43,35 @@ def eval_and_validate_unsupported_methods( func(snow_pd_args) +def unimplemented_dt_index_helper(name, *args): + # Helper method for methods that require the frame to have a DatetimeIndex and tz-aware timestamp data. + # If the argument is a native pandas object, then convert its index to DatetimeIndex. + # If the argument is a Snowpark pandas object, pass it as-is, since it should fail at the + # query compiler layer without validating the index object. + def helper(df): + if isinstance(df, (native_pd.DataFrame, native_pd.Series)): + # When the method is tz_convert, the index must already be tz-aware + # otherwise leave it tz-naive + df.index = native_pd.to_datetime(range(len(df)), utc=name == "tz_convert") + return getattr(df, name)(*args) + + return helper, name + + # unsupported methods for both dataframe and series UNSUPPORTED_DATAFRAME_SERIES_METHODS = [ (lambda df: df.cumprod(), "cumprod"), + unimplemented_dt_index_helper("at_time", "12:00"), + unimplemented_dt_index_helper("between_time", "12:00", "13:00"), + (lambda df: df.explode("a"), "explode"), + (lambda df: df.infer_objects(), "infer_objects"), + (lambda df: df.kurt(), "kurt"), + (lambda df: df.kurtosis(), "kurtosis"), + (lambda df: df.mode(), "mode"), + (lambda df: df.sem(), "sem"), + (lambda df: df.transform(lambda x: x + 1), "transform"), + unimplemented_dt_index_helper("tz_convert", "US/Central"), + unimplemented_dt_index_helper("tz_localize", "US/Central"), ] # unsupported methods that can only be applied on dataframe @@ -65,6 +91,10 @@ def eval_and_validate_unsupported_methods( UNSUPPORTED_BINARY_METHODS = [ # TODO SNOW-862664, support together with combine # (lambda dfs: dfs[0].combine(dfs[1], np.minimum, fill_value=1), "combine"), + (lambda dfs: dfs[0].align(dfs[1]), "align"), + (lambda dfs: dfs[0].combine(dfs[1], func=lambda a, b: a), "combine"), + (lambda dfs: dfs[0].combine_first(dfs[1]), "combine_first"), + (lambda dfs: dfs[0].reindex_like(dfs[1]), "reindex_like"), (lambda dfs: dfs[0].update(dfs[1]), "update"), ] diff --git a/tests/unit/modin/test_unsupported.py b/tests/unit/modin/test_unsupported.py index 5a54a1f32c7..f33babcca28 100644 --- a/tests/unit/modin/test_unsupported.py +++ b/tests/unit/modin/test_unsupported.py @@ -60,40 +60,27 @@ def test_unsupported_general(general_method, kwargs): @pytest.mark.parametrize( "df_method, kwargs", [ - ["align", {"other": ""}], ["asof", {"where": ""}], - ["at_time", {"time": ""}], - ["between_time", {"start_time": "", "end_time": ""}], ["bool", {}], ["boxplot", {}], - ["clip", {}], - ["combine", {"other": "", "func": ""}], - ["combine_first", {"other": ""}], ["corrwith", {"other": ""}], ["cov", {}], ["dot", {"other": ""}], ["droplevel", {"level": ""}], ["eval", {"expr": "xxx"}], ["ewm", {}], - ["explode", {"column": ""}], ["filter", {}], ["from_dict", {"data": ""}], ["from_records", {"data": ""}], ["hist", {}], - ["infer_objects", {}], ["interpolate", {}], ["isetitem", {"loc": "", "value": ""}], - ["kurt", {}], - ["kurtosis", {}], - ["mode", {}], ["pipe", {"func": ""}], ["pop", {"item": ""}], ["prod", {}], ["product", {}], ["query", {"expr": ""}], - ["reindex_like", {"other": ""}], ["reorder_levels", {"order": ""}], - ["sem", {}], ["set_flags", {}], ["style", {}], ["swapaxes", {"axis1": "", "axis2": ""}], @@ -118,10 +105,7 @@ def test_unsupported_general(general_method, kwargs): ["to_timestamp", {}], ["to_xarray", {}], ["to_xml", {}], - ["transform", {"func": [[], {}]}], ["truncate", {}], - ["tz_convert", {"tz": ""}], - ["tz_localize", {"tz": ""}], ["xs", {"key": ""}], ["__dataframe__", {}], ], @@ -138,47 +122,34 @@ def test_unsupported_df(df_method, kwargs): @pytest.mark.parametrize( "series_method, kwargs", [ - ["align", {"other": ""}], ["argmax", {}], ["argmin", {}], ["argsort", {}], ["array", {}], ["asof", {"where": ""}], - ["at_time", {"time": ""}], ["autocorr", {}], ["between", {"left": "", "right": ""}], - ["between_time", {"start_time": "", "end_time": ""}], ["bool", {}], - ["clip", {}], - ["combine", {"other": "", "func": ""}], - ["combine_first", {"other": ""}], ["corr", {"other": ""}], ["cov", {"other": ""}], ["divmod", {"other": ""}], ["dot", {"other": ""}], ["droplevel", {"level": ""}], ["ewm", {}], - ["explode", {}], ["factorize", {}], ["filter", {}], ["hist", {}], - ["infer_objects", {}], ["interpolate", {}], ["item", {}], - ["kurt", {}], - ["kurtosis", {}], - ["mode", {}], ["nbytes", {}], ["pipe", {"func": ""}], ["pop", {"item": ""}], ["prod", {}], ["ravel", {}], - ["reindex_like", {"other": ""}], ["reorder_levels", {"order": ""}], ["repeat", {"repeats": ""}], ["rdivmod", {"other": ""}], ["searchsorted", {"value": ""}], - ["sem", {}], ["set_flags", {}], ["swapaxes", {"axis1": "", "axis2": ""}], ["swaplevel", {}], @@ -194,10 +165,7 @@ def test_unsupported_df(df_method, kwargs): ["to_string", {}], ["to_timestamp", {}], ["to_xarray", {}], - ["transform", {"func": ""}], ["truncate", {}], - ["tz_convert", {"tz": ""}], - ["tz_localize", {"tz": ""}], ["view", {}], ["xs", {"key": ""}], ], From 1062d2f481d0db540712bcc959cc0062e5780e98 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 8 Aug 2024 16:07:18 -0700 Subject: [PATCH 3/8] fix transform --- .../snowpark/modin/plugin/extensions/dataframe_overrides.py | 2 ++ tests/integ/modin/test_unimplemented.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py index 4ffde752d3d..c80a23b8d54 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py @@ -113,6 +113,8 @@ def plot( return self._to_pandas().plot +@register_dataframe_accessor("transform") +@snowpark_pandas_telemetry_method_decorator def transform( self, func: PythonFuncType, axis: Axis = 0, *args: Any, **kwargs: Any ) -> DataFrame: # noqa: PR01, RT01, D200 diff --git a/tests/integ/modin/test_unimplemented.py b/tests/integ/modin/test_unimplemented.py index 0263c9f0f5f..d5c1c9ac91c 100644 --- a/tests/integ/modin/test_unimplemented.py +++ b/tests/integ/modin/test_unimplemented.py @@ -69,7 +69,6 @@ def helper(df): (lambda df: df.kurtosis(), "kurtosis"), (lambda df: df.mode(), "mode"), (lambda df: df.sem(), "sem"), - (lambda df: df.transform(lambda x: x + 1), "transform"), unimplemented_dt_index_helper("tz_convert", "US/Central"), unimplemented_dt_index_helper("tz_localize", "US/Central"), ] @@ -84,6 +83,7 @@ def helper(df): UNSUPPORTED_SERIES_METHODS = [ (lambda se: se.is_monotonic_increasing, "property fget:is_monotonic_increasing"), (lambda se: se.is_monotonic_decreasing, "property fget:is_monotonic_decreasing"), + (lambda df: df.transform(lambda x: x + 1), "transform"), ] # unsupported binary operations that can be applied on both dataframe and series From d4b52d79bd2e74cc46e5b3c62d2627abe95237cb Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 8 Aug 2024 16:51:55 -0700 Subject: [PATCH 4/8] update docs --- src/snowflake/snowpark/modin/plugin/docstrings/base.py | 10 ---------- .../snowpark/modin/plugin/docstrings/dataframe.py | 10 ++++++++++ .../snowpark/modin/plugin/docstrings/series.py | 5 +++++ .../modin/plugin/extensions/series_overrides.py | 5 +++++ 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py index 13d2fc6946d..a6a0aff1af4 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py @@ -1093,11 +1093,6 @@ def drop_duplicates(): Return `BasePandasDataset` with duplicate rows removed. """ - def map(): - """ - Apply a function to `BasePandasDataset elementwise. - """ - def mask(): """ Replace values where the condition is True. @@ -2282,11 +2277,6 @@ def reindex(): Conform `BasePandasDataset` to new index with optional filling logic. """ - def reindex_like(): - """ - Return an object with matching indices as `other` object. - """ - def rename_axis(): """ Set the name of the axis for the index or columns. diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py index 6d093eac1d9..a42ef48eb94 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py @@ -3320,6 +3320,11 @@ def reindex(): is the previous index value when the data is sorted. """ + def reindex_like(): + """ + Return an object with matching indices as `other` object. + """ + def replace(): """ Replace values given in `to_replace` with `value`. @@ -4522,6 +4527,11 @@ def value_counts(): Name: count, dtype: int64 """ + def map(): + """ + Apply a function to the `DataFrame` elementwise. + """ + def mask(): """ Replace values where the condition is True. diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index c73b1f43ca8..75ea0e39fb9 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -2432,6 +2432,11 @@ def reindex(): is the previous index value when the data is sorted. """ + def reindex_like(): + """ + Return an object with matching indices as `other` object. + """ + def rename_axis(): """ Set the name of the axis for the index or columns. diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index 31592501bc3..c564007e287 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -163,4 +163,9 @@ def plot( @register_series_accessor("transform") @series_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 + """ + Call ``func`` on self producing a `Series` with the same axis shape as self. + + Snowpark pandas does not yet support this method for Series. + """ pass From bef43fb0c514e6bde0160c1e8c88d4a6b229fc31 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Thu, 8 Aug 2024 16:56:42 -0700 Subject: [PATCH 5/8] fix assertion message --- tests/integ/modin/groupby/test_groupby_get_group.py | 2 +- tests/unit/modin/test_groupby_unsupported.py | 4 ++-- tests/unit/modin/test_series_strings.py | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/integ/modin/groupby/test_groupby_get_group.py b/tests/integ/modin/groupby/test_groupby_get_group.py index b17be1914fe..c83fecd24de 100644 --- a/tests/integ/modin/groupby/test_groupby_get_group.py +++ b/tests/integ/modin/groupby/test_groupby_get_group.py @@ -77,7 +77,7 @@ def test_groupby_get_group(by): # DataFrame with __getitem__ with pytest.raises( NotImplementedError, - match="get_group is not yet implemented for SeriesGroupBy", + match="Snowpark pandas does not yet support the method SeriesGroupBy.get_group", ): snowpark_pandas_df.groupby(by)["col5_int16"].get_group(name) diff --git a/tests/unit/modin/test_groupby_unsupported.py b/tests/unit/modin/test_groupby_unsupported.py index afd8e7feeaf..efc48724055 100644 --- a/tests/unit/modin/test_groupby_unsupported.py +++ b/tests/unit/modin/test_groupby_unsupported.py @@ -48,7 +48,7 @@ def test_series_groupby_unsupported_methods_raises( mock_series, func, func_name ) -> None: - msg = f"{func_name} is not yet implemented for GroupBy" + msg = f"Snowpark pandas does not yet support the method GroupBy.{func_name}" with pytest.raises(NotImplementedError, match=msg): func(mock_series) @@ -92,6 +92,6 @@ def test_series_groupby_unsupported_methods_raises( def test_dataframe_groupby_unsupported_methods_raises( mock_dataframe, func, func_name ) -> None: - msg = f"{func_name} is not yet implemented for GroupBy" + msg = f"Snowpark pandas does not yet support the method GroupBy.{func_name}" with pytest.raises(NotImplementedError, match=msg): func(mock_dataframe) diff --git a/tests/unit/modin/test_series_strings.py b/tests/unit/modin/test_series_strings.py index 9fc78f519c6..2e643356934 100644 --- a/tests/unit/modin/test_series_strings.py +++ b/tests/unit/modin/test_series_strings.py @@ -21,7 +21,8 @@ def test_str_cat_no_others(mock_str_register, mock_series): return_callable.return_value = result_query_compiler mock_str_register.return_value = return_callable with pytest.raises( - NotImplementedError, match="cat is not yet implemented for Series.str" + NotImplementedError, + match="Snowpark pandas does not yet support the method Series.str.cat", ): mock_series.str.cat() @@ -65,7 +66,7 @@ def test_str_cat_no_others(mock_str_register, mock_series): def test_str_methods_with_series_return(func, func_name, mock_series): with pytest.raises( NotImplementedError, - match=f"{func_name} is not yet implemented for Series.str", + match=f"Snowpark pandas does not yet support the method Series.str.{func_name}", ): func(mock_series) @@ -81,7 +82,8 @@ def test_str_methods_with_series_return(func, func_name, mock_series): ) def test_str_methods_with_dataframe_return(func, func_name, mock_series): with pytest.raises( - NotImplementedError, match="is not yet implemented for Series.str" + NotImplementedError, + match="Snowpark pandas does not yet support the method Series.str.", ): func(mock_series) From 39cc24a63ac87c52f68fa9baea252a73886c418d Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Fri, 9 Aug 2024 13:41:44 -0700 Subject: [PATCH 6/8] fix ci --- src/snowflake/snowpark/modin/plugin/docstrings/series.py | 5 +++++ .../snowpark/modin/plugin/extensions/series_overrides.py | 5 ----- tests/integ/modin/frame/test_filter.py | 3 +-- tests/integ/modin/strings/test_case_justify.py | 3 +-- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index 75ea0e39fb9..6e48a7e57f3 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -3375,6 +3375,11 @@ def to_timestamp(): Cast to DatetimeIndex of Timestamps, at beginning of period. """ + def transform(): + """ + Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self. + """ + def transpose(): """ Return the transpose, which is by definition `self`. diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index c564007e287..31592501bc3 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -163,9 +163,4 @@ def plot( @register_series_accessor("transform") @series_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 - """ - Call ``func`` on self producing a `Series` with the same axis shape as self. - - Snowpark pandas does not yet support this method for Series. - """ pass diff --git a/tests/integ/modin/frame/test_filter.py b/tests/integ/modin/frame/test_filter.py index 577a0d37446..723f4be7454 100644 --- a/tests/integ/modin/frame/test_filter.py +++ b/tests/integ/modin/frame/test_filter.py @@ -3,7 +3,6 @@ # import random -import re import modin.pandas as pd import numpy as np @@ -89,7 +88,7 @@ def test_filtering_with_self_not_implemented( snow_df = pd.DataFrame(data) with pytest.raises( NotImplementedError, - match=re.escape("casefold is not yet implemented for Series.str"), + match="Snowpark pandas does not yet support the method Series.str.casefold", ): func(snow_df) diff --git a/tests/integ/modin/strings/test_case_justify.py b/tests/integ/modin/strings/test_case_justify.py index ec3cfd58cbf..0ba37a39a73 100644 --- a/tests/integ/modin/strings/test_case_justify.py +++ b/tests/integ/modin/strings/test_case_justify.py @@ -1,7 +1,6 @@ # # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # -import re import modin.pandas as pd import pandas as native_pd @@ -23,6 +22,6 @@ def test_title(): @sql_count_checker(query_count=0) def test_casefold_not_implemented(): s = pd.Series(["ß", "case", "ßd"]) - msg = re.escape("casefold is not yet implemented for Series.str") + msg = "Snowpark pandas does not yet support the method Series.str.casefold" with pytest.raises(NotImplementedError, match=msg): s.str.casefold() From d719e1836461eb79207bcc616c801b90731436d3 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Mon, 12 Aug 2024 15:19:46 -0700 Subject: [PATCH 7/8] remove unused file and no cover on unimplemented --- .../plugin/extensions/base_not_implemented.py | 414 ------------------ .../modin/plugin/extensions/base_overrides.py | 50 +-- .../plugin/extensions/series_overrides.py | 2 +- 3 files changed, 26 insertions(+), 440 deletions(-) delete mode 100644 src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py b/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py deleted file mode 100644 index aaf8b86494d..00000000000 --- a/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py +++ /dev/null @@ -1,414 +0,0 @@ -# -# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. -# - -""" -The functions in this file are not implemented in Snowpark pandas. In the future, they -should raise NotImplementedError at the query compiler layer, but doing so requires a longer-term -effort. - -We currently test unsupported APIs under tests/unit/modin/test_unsupported.py, which does not initialize -a session. As such, many frontend methods have additional query compiler API calls that would have to -be mocked before the NotImplementedError can appropriately be raised. -""" -from __future__ import annotations - -import pickle as pkl -from typing import Any - -import numpy as np -import pandas -from modin.pandas.base import BasePandasDataset -from pandas._libs import lib -from pandas._libs.lib import no_default -from pandas._typing import ( - Axis, - CompressionOptions, - StorageOptions, - TimedeltaConvertibleTypes, -) - -from snowflake.snowpark.modin.pandas.api.extensions import ( - register_dataframe_accessor, - register_series_accessor, -) -from snowflake.snowpark.modin.plugin._internal.telemetry import ( - snowpark_pandas_telemetry_method_decorator, -) -from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented - - -def register_base_not_implemented(): - def decorator(base_method: Any): - func = snowpark_pandas_telemetry_method_decorator( - base_not_implemented()(base_method) - ) - register_series_accessor(base_method.__name__)(func) - register_dataframe_accessor(base_method.__name__)(func) - return func - - return decorator - - -@register_base_not_implemented() -def align( - self, - other, - join="outer", - axis=None, - level=None, - copy=None, - fill_value=None, - method=lib.no_default, - limit=lib.no_default, - fill_axis=lib.no_default, - broadcast_axis=lib.no_default, -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def asof(self, where, subset=None): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def between_time( - self: BasePandasDataset, - start_time, - end_time, - inclusive: str | None = None, - axis=None, -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def bool(self): # noqa: RT01, D200 - pass - - -@register_base_not_implemented() -def clip( - self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def combine(self, other, func, fill_value=None, **kwargs): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def combine_first(self, other): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def explode(self, column, ignore_index: bool = False): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def ewm( - self, - com: float | None = None, - span: float | None = None, - halflife: float | TimedeltaConvertibleTypes | None = None, - alpha: float | None = None, - min_periods: int | None = 0, - adjust: bool = True, - ignore_na: bool = False, - axis: Axis = 0, - times: str | np.ndarray | BasePandasDataset | None = None, - method: str = "single", -) -> pandas.core.window.ewm.ExponentialMovingWindow: # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def filter( - self, items=None, like=None, regex=None, axis=None -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def infer_objects( - self, copy: bool | None = None -) -> BasePandasDataset: # pragma: no cover # noqa: RT01, D200 - pass - - -@register_base_not_implemented() -def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs): - pass - - -@register_base_not_implemented() -def kurtosis(self, axis=no_default, skipna=True, numeric_only=False, **kwargs): - pass - - -@register_base_not_implemented() -def mode(self, axis=0, numeric_only=False, dropna=True): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def pop(self, item): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def reindex_like( - self, other, method=None, copy=True, limit=None, tolerance=None -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def reorder_levels(self, order, axis=0): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def sem( - self, - axis: Axis | None = None, - skipna: bool = True, - ddof: int = 1, - numeric_only=False, - **kwargs, -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def set_flags( - self, *, copy: bool = False, allows_duplicate_labels: bool | None = None -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def swapaxes(self, axis1, axis2, copy=True): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_clipboard( - self, excel=True, sep=None, **kwargs -): # pragma: no cover # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_excel( - self, - excel_writer, - sheet_name="Sheet1", - na_rep="", - float_format=None, - columns=None, - header=True, - index=True, - index_label=None, - startrow=0, - startcol=0, - engine=None, - merge_cells=True, - encoding=no_default, - inf_rep="inf", - verbose=no_default, - freeze_panes=None, - storage_options: StorageOptions = None, -): # pragma: no cover # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_hdf( - self, path_or_buf, key, format="table", **kwargs -): # pragma: no cover # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_json( - self, - path_or_buf=None, - orient=None, - date_format=None, - double_precision=10, - force_ascii=True, - date_unit="ms", - default_handler=None, - lines=False, - compression="infer", - index=True, - indent=None, - storage_options: StorageOptions = None, -): # pragma: no cover # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_latex( - self, - buf=None, - columns=None, - col_space=None, - header=True, - index=True, - na_rep="NaN", - formatters=None, - float_format=None, - sparsify=None, - index_names=True, - bold_rows=False, - column_format=None, - longtable=None, - escape=None, - encoding=None, - decimal=".", - multicolumn=None, - multicolumn_format=None, - multirow=None, - caption=None, - label=None, - position=None, -): # pragma: no cover # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_markdown( - self, - buf=None, - mode: str = "wt", - index: bool = True, - storage_options: StorageOptions = None, - **kwargs, -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_pickle( - self, - path, - compression: CompressionOptions = "infer", - protocol: int = pkl.HIGHEST_PROTOCOL, - storage_options: StorageOptions = None, -): # pragma: no cover # noqa: PR01, D200 - pass - - -@register_base_not_implemented() -def to_string( - self, - buf=None, - columns=None, - col_space=None, - header=True, - index=True, - na_rep="NaN", - formatters=None, - float_format=None, - sparsify=None, - index_names=True, - justify=None, - max_rows=None, - min_rows=None, - max_cols=None, - show_dimensions=False, - decimal=".", - line_width=None, - max_colwidth=None, - encoding=None, -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_sql( - self, - name, - con, - schema=None, - if_exists="fail", - index=True, - index_label=None, - chunksize=None, - dtype=None, - method=None, -): # noqa: PR01, D200 - pass - - -@register_base_not_implemented() -def to_timestamp( - self, freq=None, how="start", axis=0, copy=True -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def to_xarray(self): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def truncate( - self, before=None, after=None, axis=None, copy=True -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def tz_convert(self, tz, axis=0, level=None, copy=True): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def tz_localize( - self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise" -): # noqa: PR01, RT01, D200 - pass - - -@register_base_not_implemented() -def __array_wrap__(self, result, context=None): - pass - - -@register_base_not_implemented() -def __finalize__(self, other, method=None, **kwargs): - pass - - -@register_base_not_implemented() -def __sizeof__(self): - pass diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py index 95be5478309..332df757787 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py @@ -56,17 +56,17 @@ def decorator(base_method: Any): @register_base_not_implemented() def asof(self, where, subset=None): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def bool(self): # noqa: RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -83,53 +83,53 @@ def ewm( times: str | np.ndarray | BasePandasDataset | None = None, method: str = "single", ) -> pandas.core.window.ewm.ExponentialMovingWindow: # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def filter( self, items=None, like=None, regex=None, axis=None ): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def pop(self, item): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def reorder_levels(self, order, axis=0): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def set_flags( self, *, copy: bool = False, allows_duplicate_labels: bool | None = None ): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def swapaxes(self, axis1, axis2, copy=True): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def to_clipboard( self, excel=True, sep=None, **kwargs ): # pragma: no cover # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -153,14 +153,14 @@ def to_excel( freeze_panes=None, storage_options: StorageOptions = None, ): # pragma: no cover # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def to_hdf( self, path_or_buf, key, format="table", **kwargs ): # pragma: no cover # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -179,7 +179,7 @@ def to_json( indent=None, storage_options: StorageOptions = None, ): # pragma: no cover # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -208,7 +208,7 @@ def to_latex( label=None, position=None, ): # pragma: no cover # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -220,7 +220,7 @@ def to_markdown( storage_options: StorageOptions = None, **kwargs, ): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -231,7 +231,7 @@ def to_pickle( protocol: int = pkl.HIGHEST_PROTOCOL, storage_options: StorageOptions = None, ): # pragma: no cover # noqa: PR01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -257,7 +257,7 @@ def to_string( max_colwidth=None, encoding=None, ): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() @@ -273,33 +273,33 @@ def to_sql( dtype=None, method=None, ): # noqa: PR01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def to_timestamp( self, freq=None, how="start", axis=0, copy=True ): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def to_xarray(self): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def truncate( self, before=None, after=None, axis=None, copy=True ): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover @register_base_not_implemented() def __finalize__(self, other, method=None, **kwargs): - pass + pass # pragma: no cover diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index 31592501bc3..0afea30e29a 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -163,4 +163,4 @@ def plot( @register_series_accessor("transform") @series_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 - pass + pass # pragma: no cover From 18dfe8a79fc079f047f36cb18ff383bee2fc5036 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Tue, 13 Aug 2024 14:48:02 -0700 Subject: [PATCH 8/8] add no cover --- src/snowflake/snowpark/modin/pandas/base.py | 4 ++-- src/snowflake/snowpark/modin/pandas/dataframe.py | 6 ++++-- src/snowflake/snowpark/modin/pandas/series.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py index 1a5d263a6f8..c08cdee1386 100644 --- a/src/snowflake/snowpark/modin/pandas/base.py +++ b/src/snowflake/snowpark/modin/pandas/base.py @@ -891,7 +891,7 @@ def align( limit=lib.no_default, fill_axis=lib.no_default, broadcast_axis=lib.no_default, - ): # noqa: PR01, RT01, D200 + ): # pragma: no cover # noqa: PR01, RT01, D200 """ Align two objects on their axes with the specified join method. """ @@ -1200,7 +1200,7 @@ def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 Select values at particular time of day (e.g., 9:30AM). """ # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if asof: + if asof: # pragma: no cover # pandas raises NotImplementedError for asof=True, so we do, too. raise NotImplementedError("'asof' argument is not supported") return self.between_time( diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py index 0c8547697d7..a7d53813779 100644 --- a/src/snowflake/snowpark/modin/pandas/dataframe.py +++ b/src/snowflake/snowpark/modin/pandas/dataframe.py @@ -442,7 +442,9 @@ def add_suffix(self, suffix): ) @dataframe_not_implemented() - def map(self, func, na_action: str | None = None, **kwargs) -> DataFrame: + def map( + self, func, na_action: str | None = None, **kwargs + ) -> DataFrame: # pragma: no cover if not callable(func): raise ValueError(f"'{type(func)}' object is not callable") return self.__constructor__( @@ -2096,7 +2098,7 @@ def reindex_like( copy: bool | None = None, limit=None, tolerance=None, - ) -> DataFrame: + ) -> DataFrame: # pragma: no cover # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions if copy is None: copy = True diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index 73116c5a43f..1ce3ecfc997 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -1753,7 +1753,7 @@ def reindex_like( copy: bool | None = None, limit=None, tolerance=None, - ) -> Series: + ) -> Series: # pragma: no cover # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).": # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.Series.reindex_like.html