From 87ab43e4af75bb6b5e483e06b206b286fb1f74dd Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 8 Aug 2024 15:10:50 -0700
Subject: [PATCH 1/8] a

---
 .../snowpark/modin/pandas/__init__.py         |   1 +
 src/snowflake/snowpark/modin/pandas/base.py   |  81 +----
 .../snowpark/modin/pandas/dataframe.py        |  32 +-
 .../modin/plugin/extensions/base_overrides.py | 300 ++++++++++++++++++
 .../plugin/extensions/dataframe_overrides.py  |  30 +-
 .../plugin/extensions/series_overrides.py     |   8 +-
 6 files changed, 346 insertions(+), 106 deletions(-)
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py

diff --git a/src/snowflake/snowpark/modin/pandas/__init__.py b/src/snowflake/snowpark/modin/pandas/__init__.py
index dcf5db871a2..975289684cf 100644
--- a/src/snowflake/snowpark/modin/pandas/__init__.py
+++ b/src/snowflake/snowpark/modin/pandas/__init__.py
@@ -157,6 +157,7 @@
     Index,
     DatetimeIndex,
 )
+import snowflake.snowpark.modin.plugin.extensions.base_overrides  # isort: skip  # noqa: E402,F401
 import snowflake.snowpark.modin.plugin.extensions.dataframe_extensions  # isort: skip  # noqa: E402,F401
 import snowflake.snowpark.modin.plugin.extensions.dataframe_overrides  # isort: skip  # noqa: E402,F401
 import snowflake.snowpark.modin.plugin.extensions.series_extensions  # isort: skip  # noqa: E402,F401
diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py
index f7e9da9f89a..943fe43ebbf 100644
--- a/src/snowflake/snowpark/modin/pandas/base.py
+++ b/src/snowflake/snowpark/modin/pandas/base.py
@@ -84,10 +84,7 @@
 )
 from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
 from snowflake.snowpark.modin.plugin._typing import ListLike
-from snowflake.snowpark.modin.plugin.utils.error_message import (
-    ErrorMessage,
-    base_not_implemented,
-)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
 from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
 from snowflake.snowpark.modin.utils import (
     _inherit_docstrings,
@@ -882,7 +879,6 @@ def _get_dtypes(self):
         else:
             return list(self.dtypes)
 
-    @base_not_implemented()
     def align(
         self,
         other,
@@ -1074,7 +1070,6 @@ def asfreq(
             )
         )
 
-    @base_not_implemented()
     def asof(self, where, subset=None):  # noqa: PR01, RT01, D200
         """
         Return the last row(s) without any NaNs before `where`.
@@ -1159,7 +1154,6 @@ def at(self, axis=None):  # noqa: PR01, RT01, D200
 
         return _AtIndexer(self)
 
-    @base_not_implemented()
     def at_time(self, time, asof=False, axis=None):  # noqa: PR01, RT01, D200
         """
         Select values at particular time of day (e.g., 9:30AM).
@@ -1190,7 +1184,6 @@ def backfill(
             method="bfill", axis=axis, limit=limit, downcast=downcast, inplace=inplace
         )
 
-    @base_not_implemented()
     @_inherit_docstrings(
         pandas.DataFrame.between_time, apilink="pandas.DataFrame.between_time"
     )
@@ -1230,7 +1223,6 @@ def bfill(
             method="bfill", axis=axis, limit=limit, downcast=downcast, inplace=inplace
         )
 
-    @base_not_implemented()
     def bool(self):  # noqa: RT01, D200
         """
         Return the bool of a single element `BasePandasDataset`.
@@ -1248,7 +1240,6 @@ def bool(self):  # noqa: RT01, D200
         else:
             return self._to_pandas().bool()
 
-    @base_not_implemented()
     def clip(
         self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs
     ):  # noqa: PR01, RT01, D200
@@ -1279,7 +1270,6 @@ def clip(
         )
         return self._create_or_update_from_compiler(new_query_compiler, inplace)
 
-    @base_not_implemented()
     def combine(self, other, func, fill_value=None, **kwargs):  # noqa: PR01, RT01, D200
         """
         Perform combination of `BasePandasDataset`-s according to `func`.
@@ -1289,7 +1279,6 @@ def combine(self, other, func, fill_value=None, **kwargs):  # noqa: PR01, RT01,
             "combine", other, axis=0, func=func, fill_value=fill_value, **kwargs
         )
 
-    @base_not_implemented()
     def combine_first(self, other):  # noqa: PR01, RT01, D200
         """
         Update null elements with value in the same location in `other`.
@@ -1545,7 +1534,6 @@ def _dropna(
         )
         return self._create_or_update_from_compiler(new_query_compiler, inplace)
 
-    @base_not_implemented()
     def droplevel(self, level, axis=0):  # noqa: PR01, RT01, D200
         """
         Return `BasePandasDataset` with requested index / column level(s) removed.
@@ -1588,15 +1576,6 @@ def drop_duplicates(
         else:
             return result
 
-    @base_not_implemented()
-    def map(self, func, na_action: str | None = None, **kwargs):
-        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        if not callable(func):
-            raise ValueError(f"'{type(func)}' object is not callable")
-        return self.__constructor__(
-            query_compiler=self._query_compiler.map(func, na_action=na_action, **kwargs)
-        )
-
     def mask(
         self,
         cond: BasePandasDataset | Callable | AnyArrayLike,
@@ -1722,7 +1701,6 @@ def eq(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
         return self._binary_op("eq", other, axis=axis, level=level, dtypes=np.bool_)
 
-    @base_not_implemented()
     def explode(self, column, ignore_index: bool = False):  # noqa: PR01, RT01, D200
         """
         Transform each element of a list-like to a row.
@@ -1735,7 +1713,6 @@ def explode(self, column, ignore_index: bool = False):  # noqa: PR01, RT01, D200
             exploded = exploded.reset_index(drop=True)
         return exploded
 
-    @base_not_implemented()
     def ewm(
         self,
         com: float | None = None,
@@ -1906,7 +1883,6 @@ def fillna(
         )
         return self._create_or_update_from_compiler(new_query_compiler, inplace)
 
-    @base_not_implemented()
     def filter(
         self, items=None, like=None, regex=None, axis=None
     ):  # noqa: PR01, RT01, D200
@@ -2064,7 +2040,6 @@ def idxmin(self, axis=0, skipna=True, numeric_only=False):  # noqa: PR01, RT01,
             )
         )
 
-    @base_not_implemented()
     def infer_objects(
         self, copy: bool | None = None
     ) -> BasePandasDataset:  # pragma: no cover # noqa: RT01, D200
@@ -2145,7 +2120,6 @@ def iloc(self):
 
         return _iLocIndexer(self)
 
-    @base_not_implemented()
     def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
@@ -2339,7 +2313,6 @@ def mod(
             "mod", other, axis=axis, level=level, fill_value=fill_value
         )
 
-    @base_not_implemented()
     def mode(self, axis=0, numeric_only=False, dropna=True):  # noqa: PR01, RT01, D200
         """
         Get the mode(s) of each element along the selected axis.
@@ -2466,7 +2439,6 @@ def pct_change(
             )
         )
 
-    @base_not_implemented()
     def pipe(self, func, *args, **kwargs):  # noqa: PR01, RT01, D200
         """
         Apply chainable functions that expect `BasePandasDataset`.
@@ -2474,7 +2446,6 @@ def pipe(self, func, *args, **kwargs):  # noqa: PR01, RT01, D200
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
         return pipe(self, func, *args, **kwargs)
 
-    @base_not_implemented()
     def pop(self, item):  # noqa: PR01, RT01, D200
         """
         Return item and drop from frame. Raise KeyError if not found.
@@ -2659,7 +2630,6 @@ def reindex(
             final_query_compiler, inplace=False if copy is None else not copy
         )
 
-    @base_not_implemented()
     def reindex_like(
         self, other, method=None, copy=True, limit=None, tolerance=None
     ):  # noqa: PR01, RT01, D200
@@ -2745,7 +2715,6 @@ def f(x):
             if not inplace:
                 return result
 
-    @base_not_implemented()
     def reorder_levels(self, order, axis=0):  # noqa: PR01, RT01, D200
         """
         Rearrange index levels using input order.
@@ -3035,7 +3004,6 @@ def sample(
         )
         return self.__constructor__(query_compiler=query_compiler)
 
-    @base_not_implemented()
     def sem(
         self,
         axis: Axis | None = None,
@@ -3088,7 +3056,6 @@ def median(
             **kwargs,
         )
 
-    @base_not_implemented()
     def set_flags(
         self, *, copy: bool = False, allows_duplicate_labels: bool | None = None
     ):  # noqa: PR01, RT01, D200
@@ -3313,7 +3280,6 @@ def sub(
 
     subtract = sub
 
-    @base_not_implemented()
     def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
         """
         Interchange axes and swap values axes appropriately.
@@ -3327,7 +3293,6 @@ def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
             return self.copy()
         return self
 
-    @base_not_implemented()
     def swaplevel(self, i=-2, j=-1, axis=0):  # noqa: PR01, RT01, D200
         """
         Swap levels `i` and `j` in a `MultiIndex`.
@@ -3355,7 +3320,6 @@ def take(
         slice_obj = indices if axis == 0 else (slice(None), indices)
         return self.iloc[slice_obj]
 
-    @base_not_implemented()
     def to_clipboard(
         self, excel=True, sep=None, **kwargs
     ):  # pragma: no cover  # noqa: PR01, RT01, D200
@@ -3418,7 +3382,6 @@ def to_csv(
             storage_options=storage_options,
         )
 
-    @base_not_implemented()
     def to_excel(
         self,
         excel_writer,
@@ -3462,7 +3425,6 @@ def to_excel(
             storage_options=storage_options,
         )
 
-    @base_not_implemented()
     def to_hdf(
         self, path_or_buf, key, format="table", **kwargs
     ):  # pragma: no cover  # noqa: PR01, RT01, D200
@@ -3474,7 +3436,6 @@ def to_hdf(
             "to_hdf", path_or_buf, key, format=format, **kwargs
         )
 
-    @base_not_implemented()
     def to_json(
         self,
         path_or_buf=None,
@@ -3510,7 +3471,6 @@ def to_json(
             storage_options=storage_options,
         )
 
-    @base_not_implemented()
     def to_latex(
         self,
         buf=None,
@@ -3566,7 +3526,6 @@ def to_latex(
             position=position,
         )
 
-    @base_not_implemented()
     def to_markdown(
         self,
         buf=None,
@@ -3588,7 +3547,6 @@ def to_markdown(
             **kwargs,
         )
 
-    @base_not_implemented()
     def to_pickle(
         self,
         path,
@@ -3643,7 +3601,6 @@ def to_period(
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
         return self._default_to_pandas("to_period", freq=freq, axis=axis, copy=copy)
 
-    @base_not_implemented()
     def to_string(
         self,
         buf=None,
@@ -3692,7 +3649,6 @@ def to_string(
             encoding=encoding,
         )
 
-    @base_not_implemented()
     def to_sql(
         self,
         name,
@@ -3736,7 +3692,6 @@ def to_sql(
         )
 
     # TODO(williamma12): When this gets implemented, have the series one call this.
-    @base_not_implemented()
     def to_timestamp(
         self, freq=None, how="start", axis=0, copy=True
     ):  # noqa: PR01, RT01, D200
@@ -3748,7 +3703,6 @@ def to_timestamp(
             "to_timestamp", freq=freq, how=how, axis=axis, copy=copy
         )
 
-    @base_not_implemented()
     def to_xarray(self):  # noqa: PR01, RT01, D200
         """
         Return an xarray object from the `BasePandasDataset`.
@@ -3769,7 +3723,6 @@ def truediv(
 
     div = divide = truediv
 
-    @base_not_implemented()
     def truncate(
         self, before=None, after=None, axis=None, copy=True
     ):  # noqa: PR01, RT01, D200
@@ -3787,7 +3740,6 @@ def truncate(
         slice_obj = s if axis == 0 else (slice(None), s)
         return self.iloc[slice_obj]
 
-    @base_not_implemented()
     def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
         """
         Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.
@@ -3807,7 +3759,6 @@ def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
             raise ValueError("transforms cannot produce aggregated results")
         return result
 
-    @base_not_implemented()
     def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D200
         """
         Convert tz-aware axis to target time zone.
@@ -3823,7 +3774,6 @@ def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D2
         obj = self.copy() if copy else self
         return obj.set_axis(new_labels, axis, copy=copy)
 
-    @base_not_implemented()
     def tz_localize(
         self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
     ):  # noqa: PR01, RT01, D200
@@ -3910,33 +3860,6 @@ def __array__(self, dtype=None):
         arr = self.to_numpy(dtype)
         return arr
 
-    @base_not_implemented()
-    def __array_wrap__(self, result, context=None):
-        """
-        Get called after a ufunc and other functions.
-
-        Parameters
-        ----------
-        result : np.ndarray
-            The result of the ufunc or other function called on the NumPy array
-            returned by __array__.
-        context : tuple of (func, tuple, int), optional
-            This parameter is returned by ufuncs as a 3-element tuple: (name of the
-            ufunc, arguments of the ufunc, domain of the ufunc), but is not set by
-            other NumPy functions.
-
-        Returns
-        -------
-        BasePandasDataset
-            Wrapped Modin object.
-        """
-        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-
-        # TODO: This is very inefficient. __array__ and as_matrix have been
-        # changed to call the more efficient to_numpy, but this has been left
-        # unchanged since we are not sure of its purpose.
-        return self._default_to_pandas("__array_wrap__", result, context=context)
-
     def __copy__(self, deep=True):
         """
         Return the copy of the `BasePandasDataset`.
@@ -3973,7 +3896,6 @@ def __eq__(self, other):
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
         return self.eq(other)
 
-    @base_not_implemented()
     def __finalize__(self, other, method=None, **kwargs):
         """
         Propagate metadata from `other` to `self`.
@@ -4137,7 +4059,6 @@ def __or__(self, other):
     def __ror__(self, other):
         return self._binary_op("__ror__", other, axis=0)
 
-    @base_not_implemented()
     def __sizeof__(self):
         """
         Generate the total memory usage for an `BasePandasDataset`.
diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index a6850941faf..fddd36da1b0 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -91,9 +91,6 @@
     replace_external_data_keys_with_empty_pandas_series,
     replace_external_data_keys_with_query_compiler,
 )
-from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
-    is_snowflake_agg_func,
-)
 from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated
 from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
 from snowflake.snowpark.modin.plugin.utils.error_message import (
@@ -444,6 +441,14 @@ def add_suffix(self, suffix):
             )
         )
 
+    @dataframe_not_implemented()
+    def map(self, func, na_action: str | None = None, **kwargs) -> DataFrame:
+        if not callable(func):
+            raise ValueError(f"'{type(func)}' object is not callable")
+        return self.__constructor__(
+            query_compiler=self._query_compiler.map(func, na_action=na_action, **kwargs)
+        )
+
     def applymap(self, func: PythonFuncType, na_action: str | None = None, **kwargs):
         # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
         if not callable(func):
@@ -607,27 +612,6 @@ def keys(self):  # noqa: RT01, D200
         # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
         return self.columns
 
-    def transform(
-        self, func: PythonFuncType, axis: Axis = 0, *args: Any, **kwargs: Any
-    ) -> DataFrame:  # noqa: PR01, RT01, D200
-        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
-        if is_list_like(func) or is_dict_like(func):
-            ErrorMessage.not_implemented(
-                "dict and list parameters are not supported for transform"
-            )
-        # throw the same error as pandas for cases where the function type is
-        # invalid.
-        if not isinstance(func, str) and not callable(func):
-            raise TypeError(f"{type(func)} object is not callable")
-
-        # if the function is an aggregation function, we'll produce
-        # some bogus results while pandas will throw the error the
-        # code below is throwing. So we do the same.
-        if is_snowflake_agg_func(func):
-            raise ValueError("Function did not transform")
-
-        return self.apply(func, axis, False, args=args, **kwargs)
-
     def transpose(self, copy=False, *args):  # noqa: PR01, RT01, D200
         """
         Transpose index and columns.
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
new file mode 100644
index 00000000000..1e1b81c10cd
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
@@ -0,0 +1,300 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""
+Methods defined on BasePandasDataset that are overridden in Snowpark pandas. Adding a method to this file
+should be done with discretion, and only when relevant changes cannot be made to the query compiler or
+upstream frontend to accommodate Snowpark pandas.
+"""
+from __future__ import annotations
+
+import pickle as pkl
+from typing import Any
+
+import numpy as np
+import pandas
+from modin.pandas.base import BasePandasDataset
+from pandas._libs.lib import no_default
+from pandas._typing import (
+    Axis,
+    CompressionOptions,
+    StorageOptions,
+    TimedeltaConvertibleTypes,
+)
+
+from snowflake.snowpark.modin.pandas.api.extensions import (
+    register_dataframe_accessor,
+    register_series_accessor,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_method_decorator,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented
+
+
+def register_base_not_implemented():
+    def decorator(base_method: Any):
+        func = snowpark_pandas_telemetry_method_decorator(
+            base_not_implemented()(base_method)
+        )
+        register_series_accessor(base_method.__name__)(func)
+        register_dataframe_accessor(base_method.__name__)(func)
+        return func
+
+    return decorator
+
+
+# === UNIMPLEMENTED METHODS ===
+# The following methods are not implemented in Snowpark pandas, and must be overridden on the
+# frontend. These methods fall into a few categories:
+# 1. Would work in Snowpark pandas, but we have not tested it.
+# 2. Would work in Snowpark pandas, but requires more SQL queries than we are comfortable with.
+# 3. Requires materialization (usually via a frontend _default_to_pandas call).
+# 4. Performs operations on a native pandas Index object that are nontrivial for Snowpark pandas to manage.
+
+
+@register_base_not_implemented()
+def asof(self, where, subset=None):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def bool(self):  # noqa: RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def droplevel(self, level, axis=0):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def ewm(
+    self,
+    com: float | None = None,
+    span: float | None = None,
+    halflife: float | TimedeltaConvertibleTypes | None = None,
+    alpha: float | None = None,
+    min_periods: int | None = 0,
+    adjust: bool = True,
+    ignore_na: bool = False,
+    axis: Axis = 0,
+    times: str | np.ndarray | BasePandasDataset | None = None,
+    method: str = "single",
+) -> pandas.core.window.ewm.ExponentialMovingWindow:  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def filter(
+    self, items=None, like=None, regex=None, axis=None
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def pipe(self, func, *args, **kwargs):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def pop(self, item):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def reorder_levels(self, order, axis=0):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def set_flags(
+    self, *, copy: bool = False, allows_duplicate_labels: bool | None = None
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def swaplevel(self, i=-2, j=-1, axis=0):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_clipboard(
+    self, excel=True, sep=None, **kwargs
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_excel(
+    self,
+    excel_writer,
+    sheet_name="Sheet1",
+    na_rep="",
+    float_format=None,
+    columns=None,
+    header=True,
+    index=True,
+    index_label=None,
+    startrow=0,
+    startcol=0,
+    engine=None,
+    merge_cells=True,
+    encoding=no_default,
+    inf_rep="inf",
+    verbose=no_default,
+    freeze_panes=None,
+    storage_options: StorageOptions = None,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_hdf(
+    self, path_or_buf, key, format="table", **kwargs
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_json(
+    self,
+    path_or_buf=None,
+    orient=None,
+    date_format=None,
+    double_precision=10,
+    force_ascii=True,
+    date_unit="ms",
+    default_handler=None,
+    lines=False,
+    compression="infer",
+    index=True,
+    indent=None,
+    storage_options: StorageOptions = None,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_latex(
+    self,
+    buf=None,
+    columns=None,
+    col_space=None,
+    header=True,
+    index=True,
+    na_rep="NaN",
+    formatters=None,
+    float_format=None,
+    sparsify=None,
+    index_names=True,
+    bold_rows=False,
+    column_format=None,
+    longtable=None,
+    escape=None,
+    encoding=None,
+    decimal=".",
+    multicolumn=None,
+    multicolumn_format=None,
+    multirow=None,
+    caption=None,
+    label=None,
+    position=None,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_markdown(
+    self,
+    buf=None,
+    mode: str = "wt",
+    index: bool = True,
+    storage_options: StorageOptions = None,
+    **kwargs,
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_pickle(
+    self,
+    path,
+    compression: CompressionOptions = "infer",
+    protocol: int = pkl.HIGHEST_PROTOCOL,
+    storage_options: StorageOptions = None,
+):  # pragma: no cover  # noqa: PR01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_string(
+    self,
+    buf=None,
+    columns=None,
+    col_space=None,
+    header=True,
+    index=True,
+    na_rep="NaN",
+    formatters=None,
+    float_format=None,
+    sparsify=None,
+    index_names=True,
+    justify=None,
+    max_rows=None,
+    min_rows=None,
+    max_cols=None,
+    show_dimensions=False,
+    decimal=".",
+    line_width=None,
+    max_colwidth=None,
+    encoding=None,
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_sql(
+    self,
+    name,
+    con,
+    schema=None,
+    if_exists="fail",
+    index=True,
+    index_label=None,
+    chunksize=None,
+    dtype=None,
+    method=None,
+):  # noqa: PR01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_timestamp(
+    self, freq=None, how="start", axis=0, copy=True
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_xarray(self):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def truncate(
+    self, before=None, after=None, axis=None, copy=True
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def __finalize__(self, other, method=None, **kwargs):
+    pass
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index e1cf93529af..4ffde752d3d 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -10,12 +10,18 @@
 from typing import Any
 
 import pandas as native_pd
+from modin.pandas import DataFrame
+from pandas._typing import Axis, PythonFuncType
+from pandas.core.dtypes.common import is_dict_like, is_list_like
 
-from snowflake.snowpark.modin import pandas as pd  # noqa: F401
 from snowflake.snowpark.modin.pandas.api.extensions import register_dataframe_accessor
+from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
+    is_snowflake_agg_func,
+)
 from snowflake.snowpark.modin.plugin._internal.telemetry import (
     snowpark_pandas_telemetry_method_decorator,
 )
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
 from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
 from snowflake.snowpark.modin.utils import _inherit_docstrings
 
@@ -105,3 +111,25 @@ def plot(
         "DataFrame.plot materializes data to the local machine for plotting."
     )
     return self._to_pandas().plot
+
+
+def transform(
+    self, func: PythonFuncType, axis: Axis = 0, *args: Any, **kwargs: Any
+) -> DataFrame:  # noqa: PR01, RT01, D200
+    # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+    if is_list_like(func) or is_dict_like(func):
+        ErrorMessage.not_implemented(
+            "dict and list parameters are not supported for transform"
+        )
+    # throw the same error as pandas for cases where the function type is
+    # invalid.
+    if not isinstance(func, str) and not callable(func):
+        raise TypeError(f"{type(func)} object is not callable")
+
+    # if the function is an aggregation function, we'll produce
+    # some bogus results while pandas will throw the error the
+    # code below is throwing. So we do the same.
+    if is_snowflake_agg_func(func):
+        raise ValueError("Function did not transform")
+
+    return self.apply(func, axis, False, args=args, **kwargs)
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index c6b229d876f..31592501bc3 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -11,13 +11,13 @@
 
 import pandas as native_pd
 
-from snowflake.snowpark.modin import pandas as pd  # noqa: F401
 from snowflake.snowpark.modin.pandas import Series
 from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor
 from snowflake.snowpark.modin.plugin._internal.telemetry import (
     snowpark_pandas_telemetry_method_decorator,
 )
 from snowflake.snowpark.modin.plugin._typing import ListLike
+from snowflake.snowpark.modin.plugin.utils.error_message import series_not_implemented
 from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
 from snowflake.snowpark.modin.utils import _inherit_docstrings
 
@@ -158,3 +158,9 @@ def plot(
         "Series.plot materializes data to the local machine for plotting."
     )
     return self._to_pandas().plot
+
+
+@register_series_accessor("transform")
+@series_not_implemented()
+def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
+    pass

From ef205ca0dae697075f657490f1c8779b2fbf7eb6 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 8 Aug 2024 15:30:55 -0700
Subject: [PATCH 2/8] fix reindex_like and transform

---
 src/snowflake/snowpark/modin/pandas/base.py   | 127 +++---
 .../snowpark/modin/pandas/dataframe.py        |  23 +
 src/snowflake/snowpark/modin/pandas/series.py |  20 +
 .../compiler/snowflake_query_compiler.py      |   6 +
 .../plugin/extensions/base_not_implemented.py | 414 ++++++++++++++++++
 .../modin/plugin/extensions/base_overrides.py |   5 +
 .../modin/plugin/utils/error_message.py       |   2 +-
 tests/integ/modin/test_unimplemented.py       |  30 ++
 tests/unit/modin/test_unsupported.py          |  32 --
 9 files changed, 572 insertions(+), 87 deletions(-)
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py

diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py
index 943fe43ebbf..1a5d263a6f8 100644
--- a/src/snowflake/snowpark/modin/pandas/base.py
+++ b/src/snowflake/snowpark/modin/pandas/base.py
@@ -896,9 +896,47 @@ def align(
         Align two objects on their axes with the specified join method.
         """
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        return self._default_to_pandas(
-            "align",
-            other,
+        if (
+            method is not lib.no_default
+            or limit is not lib.no_default
+            or fill_axis is not lib.no_default
+        ):
+            warnings.warn(  # noqa: B028
+                "The 'method', 'limit', and 'fill_axis' keywords in "
+                + f"{type(self).__name__}.align are deprecated and will be removed "
+                + "in a future version. Call fillna directly on the returned objects "
+                + "instead.",
+                FutureWarning,
+            )
+        if fill_axis is lib.no_default:
+            fill_axis = 0
+        if method is lib.no_default:
+            method = None
+        if limit is lib.no_default:
+            limit = None
+
+        if broadcast_axis is not lib.no_default:
+            msg = (
+                f"The 'broadcast_axis' keyword in {type(self).__name__}.align is "
+                + "deprecated and will be removed in a future version."
+            )
+            if broadcast_axis is not None:
+                if self.ndim == 1 and other.ndim == 2:
+                    msg += (
+                        " Use left = DataFrame({col: left for col in right.columns}, "
+                        + "index=right.index) before calling `left.align(right)` instead."
+                    )
+                elif self.ndim == 2 and other.ndim == 1:
+                    msg += (
+                        " Use right = DataFrame({col: right for col in left.columns}, "
+                        + "index=left.index) before calling `left.align(right)` instead"
+                    )
+            warnings.warn(msg, FutureWarning)  # noqa: B028
+        else:
+            broadcast_axis = None
+
+        left, right = self._query_compiler.align(
+            other._query_compiler,
             join=join,
             axis=axis,
             level=level,
@@ -909,6 +947,9 @@ def align(
             fill_axis=fill_axis,
             broadcast_axis=broadcast_axis,
         )
+        return self.__constructor__(query_compiler=left), self.__constructor__(
+            query_compiler=right
+        )
 
     def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
         """
@@ -1159,10 +1200,12 @@ def at_time(self, time, asof=False, axis=None):  # noqa: PR01, RT01, D200
         Select values at particular time of day (e.g., 9:30AM).
         """
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        axis = self._get_axis_number(axis)
-        idx = self.index if axis == 0 else self.columns
-        indexer = pandas.Series(index=idx).at_time(time, asof=asof).index
-        return self.loc[indexer] if axis == 0 else self.loc[:, indexer]
+        if asof:
+            # pandas raises NotImplementedError for asof=True, so we do, too.
+            raise NotImplementedError("'asof' argument is not supported")
+        return self.between_time(
+            start_time=time, end_time=time, inclusive="both", axis=axis
+        )
 
     def backfill(
         self,
@@ -1195,18 +1238,14 @@ def between_time(
         axis=None,
     ):  # noqa: PR01, RT01, D200
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        axis = self._get_axis_number(axis)
-        idx = self.index if axis == 0 else self.columns
-        indexer = (
-            pandas.Series(index=idx)
-            .between_time(
-                start_time,
-                end_time,
+        return self._create_or_update_from_compiler(
+            self._query_compiler.between_time(
+                start_time=pandas.core.tools.times.to_time(start_time),
+                end_time=pandas.core.tools.times.to_time(end_time),
                 inclusive=inclusive,
+                axis=self._get_axis_number(axis),
             )
-            .index
         )
-        return self.loc[indexer] if axis == 0 else self.loc[:, indexer]
 
     def bfill(
         self,
@@ -2630,22 +2669,6 @@ def reindex(
             final_query_compiler, inplace=False if copy is None else not copy
         )
 
-    def reindex_like(
-        self, other, method=None, copy=True, limit=None, tolerance=None
-    ):  # noqa: PR01, RT01, D200
-        """
-        Return an object with matching indices as `other` object.
-        """
-        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        return self._default_to_pandas(
-            "reindex_like",
-            other,
-            method=method,
-            copy=copy,
-            limit=limit,
-            tolerance=tolerance,
-        )
-
     def rename_axis(
         self,
         mapper=lib.no_default,
@@ -3759,42 +3782,38 @@ def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
             raise ValueError("transforms cannot produce aggregated results")
         return result
 
-    def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D200
+    def tz_convert(self, tz, axis=0, level=None, copy=None):  # noqa: PR01, RT01, D200
         """
         Convert tz-aware axis to target time zone.
         """
-        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        axis = self._get_axis_number(axis)
-        if level is not None:
-            new_labels = (
-                pandas.Series(index=self.axes[axis]).tz_convert(tz, level=level).index
-            )
-        else:
-            new_labels = self.axes[axis].tz_convert(tz)
-        obj = self.copy() if copy else self
-        return obj.set_axis(new_labels, axis, copy=copy)
+        if copy is None:
+            copy = True
+        return self._create_or_update_from_compiler(
+            self._query_compiler.tz_convert(
+                tz, axis=self._get_axis_number(axis), level=level, copy=copy
+            ),
+            inplace=(not copy),
+        )
 
     def tz_localize(
-        self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
+        self, tz, axis=0, level=None, copy=None, ambiguous="raise", nonexistent="raise"
     ):  # noqa: PR01, RT01, D200
         """
         Localize tz-naive index of a `BasePandasDataset` to target time zone.
         """
-        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        axis = self._get_axis_number(axis)
-        new_labels = (
-            pandas.Series(index=self.axes[axis])
-            .tz_localize(
+        if copy is None:
+            copy = True
+        return self._create_or_update_from_compiler(
+            self._query_compiler.tz_localize(
                 tz,
-                axis=axis,
+                axis=self._get_axis_number(axis),
                 level=level,
-                copy=False,
+                copy=copy,
                 ambiguous=ambiguous,
                 nonexistent=nonexistent,
-            )
-            .index
+            ),
+            inplace=(not copy),
         )
-        return self.set_axis(new_labels, axis, copy=copy)
 
     def var(
         self,
diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index fddd36da1b0..0c8547697d7 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -2088,6 +2088,29 @@ def reindex(
             tolerance=tolerance,
         )
 
+    @dataframe_not_implemented()
+    def reindex_like(
+        self,
+        other,
+        method=None,
+        copy: bool | None = None,
+        limit=None,
+        tolerance=None,
+    ) -> DataFrame:
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if copy is None:
+            copy = True
+        # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).":
+        # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.DataFrame.reindex_like.html
+        return self.reindex(
+            index=other.index,
+            columns=other.columns,
+            method=method,
+            copy=copy,
+            limit=limit,
+            tolerance=tolerance,
+        )
+
     def replace(
         self,
         to_replace=None,
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index e99e9cc89f8..73116c5a43f 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -1745,6 +1745,26 @@ def reindex(self, *args, **kwargs):
             fill_value=fill_value,
         )
 
+    @series_not_implemented()
+    def reindex_like(
+        self,
+        other,
+        method=None,
+        copy: bool | None = None,
+        limit=None,
+        tolerance=None,
+    ) -> Series:
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).":
+        # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.Series.reindex_like.html
+        return self.reindex(
+            index=other.index,
+            method=method,
+            copy=copy,
+            limit=limit,
+            tolerance=tolerance,
+        )
+
     def rename_axis(
         self,
         mapper=no_default,
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 108b594faf6..a4bc9fa306d 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -17342,3 +17342,9 @@ def compare(
         """
 
         return result
+
+    def tz_convert(self, *args: Any, **kwargs: Any) -> None:
+        ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")
+
+    def tz_localize(self, *args: Any, **kwargs: Any) -> None:
+        ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py b/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py
new file mode 100644
index 00000000000..aaf8b86494d
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py
@@ -0,0 +1,414 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""
+The functions in this file are not implemented in Snowpark pandas. In the future, they
+should raise NotImplementedError at the query compiler layer, but doing so requires a longer-term
+effort.
+
+We currently test unsupported APIs under tests/unit/modin/test_unsupported.py, which does not initialize
+a session. As such, many frontend methods have additional query compiler API calls that would have to
+be mocked before the NotImplementedError can appropriately be raised.
+"""
+from __future__ import annotations
+
+import pickle as pkl
+from typing import Any
+
+import numpy as np
+import pandas
+from modin.pandas.base import BasePandasDataset
+from pandas._libs import lib
+from pandas._libs.lib import no_default
+from pandas._typing import (
+    Axis,
+    CompressionOptions,
+    StorageOptions,
+    TimedeltaConvertibleTypes,
+)
+
+from snowflake.snowpark.modin.pandas.api.extensions import (
+    register_dataframe_accessor,
+    register_series_accessor,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_method_decorator,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented
+
+
+def register_base_not_implemented():
+    def decorator(base_method: Any):
+        func = snowpark_pandas_telemetry_method_decorator(
+            base_not_implemented()(base_method)
+        )
+        register_series_accessor(base_method.__name__)(func)
+        register_dataframe_accessor(base_method.__name__)(func)
+        return func
+
+    return decorator
+
+
+@register_base_not_implemented()
+def align(
+    self,
+    other,
+    join="outer",
+    axis=None,
+    level=None,
+    copy=None,
+    fill_value=None,
+    method=lib.no_default,
+    limit=lib.no_default,
+    fill_axis=lib.no_default,
+    broadcast_axis=lib.no_default,
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def asof(self, where, subset=None):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def at_time(self, time, asof=False, axis=None):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def between_time(
+    self: BasePandasDataset,
+    start_time,
+    end_time,
+    inclusive: str | None = None,
+    axis=None,
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def bool(self):  # noqa: RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def clip(
+    self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def combine(self, other, func, fill_value=None, **kwargs):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def combine_first(self, other):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def droplevel(self, level, axis=0):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def explode(self, column, ignore_index: bool = False):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def ewm(
+    self,
+    com: float | None = None,
+    span: float | None = None,
+    halflife: float | TimedeltaConvertibleTypes | None = None,
+    alpha: float | None = None,
+    min_periods: int | None = 0,
+    adjust: bool = True,
+    ignore_na: bool = False,
+    axis: Axis = 0,
+    times: str | np.ndarray | BasePandasDataset | None = None,
+    method: str = "single",
+) -> pandas.core.window.ewm.ExponentialMovingWindow:  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def filter(
+    self, items=None, like=None, regex=None, axis=None
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def infer_objects(
+    self, copy: bool | None = None
+) -> BasePandasDataset:  # pragma: no cover # noqa: RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
+    pass
+
+
+@register_base_not_implemented()
+def kurtosis(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
+    pass
+
+
+@register_base_not_implemented()
+def mode(self, axis=0, numeric_only=False, dropna=True):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def pipe(self, func, *args, **kwargs):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def pop(self, item):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def reindex_like(
+    self, other, method=None, copy=True, limit=None, tolerance=None
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def reorder_levels(self, order, axis=0):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def sem(
+    self,
+    axis: Axis | None = None,
+    skipna: bool = True,
+    ddof: int = 1,
+    numeric_only=False,
+    **kwargs,
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def set_flags(
+    self, *, copy: bool = False, allows_duplicate_labels: bool | None = None
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def swaplevel(self, i=-2, j=-1, axis=0):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_clipboard(
+    self, excel=True, sep=None, **kwargs
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_excel(
+    self,
+    excel_writer,
+    sheet_name="Sheet1",
+    na_rep="",
+    float_format=None,
+    columns=None,
+    header=True,
+    index=True,
+    index_label=None,
+    startrow=0,
+    startcol=0,
+    engine=None,
+    merge_cells=True,
+    encoding=no_default,
+    inf_rep="inf",
+    verbose=no_default,
+    freeze_panes=None,
+    storage_options: StorageOptions = None,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_hdf(
+    self, path_or_buf, key, format="table", **kwargs
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_json(
+    self,
+    path_or_buf=None,
+    orient=None,
+    date_format=None,
+    double_precision=10,
+    force_ascii=True,
+    date_unit="ms",
+    default_handler=None,
+    lines=False,
+    compression="infer",
+    index=True,
+    indent=None,
+    storage_options: StorageOptions = None,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_latex(
+    self,
+    buf=None,
+    columns=None,
+    col_space=None,
+    header=True,
+    index=True,
+    na_rep="NaN",
+    formatters=None,
+    float_format=None,
+    sparsify=None,
+    index_names=True,
+    bold_rows=False,
+    column_format=None,
+    longtable=None,
+    escape=None,
+    encoding=None,
+    decimal=".",
+    multicolumn=None,
+    multicolumn_format=None,
+    multirow=None,
+    caption=None,
+    label=None,
+    position=None,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_markdown(
+    self,
+    buf=None,
+    mode: str = "wt",
+    index: bool = True,
+    storage_options: StorageOptions = None,
+    **kwargs,
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_pickle(
+    self,
+    path,
+    compression: CompressionOptions = "infer",
+    protocol: int = pkl.HIGHEST_PROTOCOL,
+    storage_options: StorageOptions = None,
+):  # pragma: no cover  # noqa: PR01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_string(
+    self,
+    buf=None,
+    columns=None,
+    col_space=None,
+    header=True,
+    index=True,
+    na_rep="NaN",
+    formatters=None,
+    float_format=None,
+    sparsify=None,
+    index_names=True,
+    justify=None,
+    max_rows=None,
+    min_rows=None,
+    max_cols=None,
+    show_dimensions=False,
+    decimal=".",
+    line_width=None,
+    max_colwidth=None,
+    encoding=None,
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_sql(
+    self,
+    name,
+    con,
+    schema=None,
+    if_exists="fail",
+    index=True,
+    index_label=None,
+    chunksize=None,
+    dtype=None,
+    method=None,
+):  # noqa: PR01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_timestamp(
+    self, freq=None, how="start", axis=0, copy=True
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def to_xarray(self):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def truncate(
+    self, before=None, after=None, axis=None, copy=True
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def tz_localize(
+    self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
+):  # noqa: PR01, RT01, D200
+    pass
+
+
+@register_base_not_implemented()
+def __array_wrap__(self, result, context=None):
+    pass
+
+
+@register_base_not_implemented()
+def __finalize__(self, other, method=None, **kwargs):
+    pass
+
+
+@register_base_not_implemented()
+def __sizeof__(self):
+    pass
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
index 1e1b81c10cd..95be5478309 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
@@ -288,6 +288,11 @@ def to_xarray(self):  # noqa: PR01, RT01, D200
     pass
 
 
+@register_base_not_implemented()
+def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
+    pass
+
+
 @register_base_not_implemented()
 def truncate(
     self, before=None, after=None, axis=None, copy=True
diff --git a/src/snowflake/snowpark/modin/plugin/utils/error_message.py b/src/snowflake/snowpark/modin/plugin/utils/error_message.py
index 997af701f2b..9a29ca98903 100644
--- a/src/snowflake/snowpark/modin/plugin/utils/error_message.py
+++ b/src/snowflake/snowpark/modin/plugin/utils/error_message.py
@@ -178,7 +178,7 @@ def method_not_implemented_error(
         class_: str
             The class of Snowpark pandas function associated with the method.
         """
-        message = f"{name} is not yet implemented for {class_}"
+        message = f"Snowpark pandas does not yet support the method {class_}.{name}"
         ErrorMessage.not_implemented(message)
 
     @staticmethod
diff --git a/tests/integ/modin/test_unimplemented.py b/tests/integ/modin/test_unimplemented.py
index 5e865c418b4..0263c9f0f5f 100644
--- a/tests/integ/modin/test_unimplemented.py
+++ b/tests/integ/modin/test_unimplemented.py
@@ -43,9 +43,35 @@ def eval_and_validate_unsupported_methods(
         func(snow_pd_args)
 
 
+def unimplemented_dt_index_helper(name, *args):
+    # Helper method for methods that require the frame to have a DatetimeIndex and tz-aware timestamp data.
+    # If the argument is a native pandas object, then convert its index to DatetimeIndex.
+    # If the argument is a Snowpark pandas object, pass it as-is, since it should fail at the
+    # query compiler layer without validating the index object.
+    def helper(df):
+        if isinstance(df, (native_pd.DataFrame, native_pd.Series)):
+            # When the method is tz_convert, the index must already be tz-aware
+            # otherwise leave it tz-naive
+            df.index = native_pd.to_datetime(range(len(df)), utc=name == "tz_convert")
+        return getattr(df, name)(*args)
+
+    return helper, name
+
+
 # unsupported methods for both dataframe and series
 UNSUPPORTED_DATAFRAME_SERIES_METHODS = [
     (lambda df: df.cumprod(), "cumprod"),
+    unimplemented_dt_index_helper("at_time", "12:00"),
+    unimplemented_dt_index_helper("between_time", "12:00", "13:00"),
+    (lambda df: df.explode("a"), "explode"),
+    (lambda df: df.infer_objects(), "infer_objects"),
+    (lambda df: df.kurt(), "kurt"),
+    (lambda df: df.kurtosis(), "kurtosis"),
+    (lambda df: df.mode(), "mode"),
+    (lambda df: df.sem(), "sem"),
+    (lambda df: df.transform(lambda x: x + 1), "transform"),
+    unimplemented_dt_index_helper("tz_convert", "US/Central"),
+    unimplemented_dt_index_helper("tz_localize", "US/Central"),
 ]
 
 # unsupported methods that can only be applied on dataframe
@@ -65,6 +91,10 @@ def eval_and_validate_unsupported_methods(
 UNSUPPORTED_BINARY_METHODS = [
     # TODO SNOW-862664, support together with combine
     # (lambda dfs: dfs[0].combine(dfs[1], np.minimum, fill_value=1), "combine"),
+    (lambda dfs: dfs[0].align(dfs[1]), "align"),
+    (lambda dfs: dfs[0].combine(dfs[1], func=lambda a, b: a), "combine"),
+    (lambda dfs: dfs[0].combine_first(dfs[1]), "combine_first"),
+    (lambda dfs: dfs[0].reindex_like(dfs[1]), "reindex_like"),
     (lambda dfs: dfs[0].update(dfs[1]), "update"),
 ]
 
diff --git a/tests/unit/modin/test_unsupported.py b/tests/unit/modin/test_unsupported.py
index 5a54a1f32c7..f33babcca28 100644
--- a/tests/unit/modin/test_unsupported.py
+++ b/tests/unit/modin/test_unsupported.py
@@ -60,40 +60,27 @@ def test_unsupported_general(general_method, kwargs):
 @pytest.mark.parametrize(
     "df_method, kwargs",
     [
-        ["align", {"other": ""}],
         ["asof", {"where": ""}],
-        ["at_time", {"time": ""}],
-        ["between_time", {"start_time": "", "end_time": ""}],
         ["bool", {}],
         ["boxplot", {}],
-        ["clip", {}],
-        ["combine", {"other": "", "func": ""}],
-        ["combine_first", {"other": ""}],
         ["corrwith", {"other": ""}],
         ["cov", {}],
         ["dot", {"other": ""}],
         ["droplevel", {"level": ""}],
         ["eval", {"expr": "xxx"}],
         ["ewm", {}],
-        ["explode", {"column": ""}],
         ["filter", {}],
         ["from_dict", {"data": ""}],
         ["from_records", {"data": ""}],
         ["hist", {}],
-        ["infer_objects", {}],
         ["interpolate", {}],
         ["isetitem", {"loc": "", "value": ""}],
-        ["kurt", {}],
-        ["kurtosis", {}],
-        ["mode", {}],
         ["pipe", {"func": ""}],
         ["pop", {"item": ""}],
         ["prod", {}],
         ["product", {}],
         ["query", {"expr": ""}],
-        ["reindex_like", {"other": ""}],
         ["reorder_levels", {"order": ""}],
-        ["sem", {}],
         ["set_flags", {}],
         ["style", {}],
         ["swapaxes", {"axis1": "", "axis2": ""}],
@@ -118,10 +105,7 @@ def test_unsupported_general(general_method, kwargs):
         ["to_timestamp", {}],
         ["to_xarray", {}],
         ["to_xml", {}],
-        ["transform", {"func": [[], {}]}],
         ["truncate", {}],
-        ["tz_convert", {"tz": ""}],
-        ["tz_localize", {"tz": ""}],
         ["xs", {"key": ""}],
         ["__dataframe__", {}],
     ],
@@ -138,47 +122,34 @@ def test_unsupported_df(df_method, kwargs):
 @pytest.mark.parametrize(
     "series_method, kwargs",
     [
-        ["align", {"other": ""}],
         ["argmax", {}],
         ["argmin", {}],
         ["argsort", {}],
         ["array", {}],
         ["asof", {"where": ""}],
-        ["at_time", {"time": ""}],
         ["autocorr", {}],
         ["between", {"left": "", "right": ""}],
-        ["between_time", {"start_time": "", "end_time": ""}],
         ["bool", {}],
-        ["clip", {}],
-        ["combine", {"other": "", "func": ""}],
-        ["combine_first", {"other": ""}],
         ["corr", {"other": ""}],
         ["cov", {"other": ""}],
         ["divmod", {"other": ""}],
         ["dot", {"other": ""}],
         ["droplevel", {"level": ""}],
         ["ewm", {}],
-        ["explode", {}],
         ["factorize", {}],
         ["filter", {}],
         ["hist", {}],
-        ["infer_objects", {}],
         ["interpolate", {}],
         ["item", {}],
-        ["kurt", {}],
-        ["kurtosis", {}],
-        ["mode", {}],
         ["nbytes", {}],
         ["pipe", {"func": ""}],
         ["pop", {"item": ""}],
         ["prod", {}],
         ["ravel", {}],
-        ["reindex_like", {"other": ""}],
         ["reorder_levels", {"order": ""}],
         ["repeat", {"repeats": ""}],
         ["rdivmod", {"other": ""}],
         ["searchsorted", {"value": ""}],
-        ["sem", {}],
         ["set_flags", {}],
         ["swapaxes", {"axis1": "", "axis2": ""}],
         ["swaplevel", {}],
@@ -194,10 +165,7 @@ def test_unsupported_df(df_method, kwargs):
         ["to_string", {}],
         ["to_timestamp", {}],
         ["to_xarray", {}],
-        ["transform", {"func": ""}],
         ["truncate", {}],
-        ["tz_convert", {"tz": ""}],
-        ["tz_localize", {"tz": ""}],
         ["view", {}],
         ["xs", {"key": ""}],
     ],

From 1062d2f481d0db540712bcc959cc0062e5780e98 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 8 Aug 2024 16:07:18 -0700
Subject: [PATCH 3/8] fix transform

---
 .../snowpark/modin/plugin/extensions/dataframe_overrides.py     | 2 ++
 tests/integ/modin/test_unimplemented.py                         | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
index 4ffde752d3d..c80a23b8d54 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -113,6 +113,8 @@ def plot(
     return self._to_pandas().plot
 
 
+@register_dataframe_accessor("transform")
+@snowpark_pandas_telemetry_method_decorator
 def transform(
     self, func: PythonFuncType, axis: Axis = 0, *args: Any, **kwargs: Any
 ) -> DataFrame:  # noqa: PR01, RT01, D200
diff --git a/tests/integ/modin/test_unimplemented.py b/tests/integ/modin/test_unimplemented.py
index 0263c9f0f5f..d5c1c9ac91c 100644
--- a/tests/integ/modin/test_unimplemented.py
+++ b/tests/integ/modin/test_unimplemented.py
@@ -69,7 +69,6 @@ def helper(df):
     (lambda df: df.kurtosis(), "kurtosis"),
     (lambda df: df.mode(), "mode"),
     (lambda df: df.sem(), "sem"),
-    (lambda df: df.transform(lambda x: x + 1), "transform"),
     unimplemented_dt_index_helper("tz_convert", "US/Central"),
     unimplemented_dt_index_helper("tz_localize", "US/Central"),
 ]
@@ -84,6 +83,7 @@ def helper(df):
 UNSUPPORTED_SERIES_METHODS = [
     (lambda se: se.is_monotonic_increasing, "property fget:is_monotonic_increasing"),
     (lambda se: se.is_monotonic_decreasing, "property fget:is_monotonic_decreasing"),
+    (lambda df: df.transform(lambda x: x + 1), "transform"),
 ]
 
 # unsupported binary operations that can be applied on both dataframe and series

From d4b52d79bd2e74cc46e5b3c62d2627abe95237cb Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 8 Aug 2024 16:51:55 -0700
Subject: [PATCH 4/8] update docs

---
 src/snowflake/snowpark/modin/plugin/docstrings/base.py | 10 ----------
 .../snowpark/modin/plugin/docstrings/dataframe.py      | 10 ++++++++++
 .../snowpark/modin/plugin/docstrings/series.py         |  5 +++++
 .../modin/plugin/extensions/series_overrides.py        |  5 +++++
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
index 13d2fc6946d..a6a0aff1af4 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
@@ -1093,11 +1093,6 @@ def drop_duplicates():
         Return `BasePandasDataset` with duplicate rows removed.
         """
 
-    def map():
-        """
-        Apply a function to `BasePandasDataset elementwise.
-        """
-
     def mask():
         """
         Replace values where the condition is True.
@@ -2282,11 +2277,6 @@ def reindex():
         Conform `BasePandasDataset` to new index with optional filling logic.
         """
 
-    def reindex_like():
-        """
-        Return an object with matching indices as `other` object.
-        """
-
     def rename_axis():
         """
         Set the name of the axis for the index or columns.
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
index 6d093eac1d9..a42ef48eb94 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
@@ -3320,6 +3320,11 @@ def reindex():
         is the previous index value when the data is sorted.
         """
 
+    def reindex_like():
+        """
+        Return an object with matching indices as `other` object.
+        """
+
     def replace():
         """
         Replace values given in `to_replace` with `value`.
@@ -4522,6 +4527,11 @@ def value_counts():
         Name: count, dtype: int64
         """
 
+    def map():
+        """
+        Apply a function to the `DataFrame` elementwise.
+        """
+
     def mask():
         """
         Replace values where the condition is True.
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index c73b1f43ca8..75ea0e39fb9 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -2432,6 +2432,11 @@ def reindex():
         is the previous index value when the data is sorted.
         """
 
+    def reindex_like():
+        """
+        Return an object with matching indices as `other` object.
+        """
+
     def rename_axis():
         """
         Set the name of the axis for the index or columns.
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index 31592501bc3..c564007e287 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -163,4 +163,9 @@ def plot(
 @register_series_accessor("transform")
 @series_not_implemented()
 def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
+    """
+    Call ``func`` on self producing a `Series` with the same axis shape as self.
+
+    Snowpark pandas does not yet support this method for Series.
+    """
     pass

From bef43fb0c514e6bde0160c1e8c88d4a6b229fc31 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 8 Aug 2024 16:56:42 -0700
Subject: [PATCH 5/8] fix assertion message

---
 tests/integ/modin/groupby/test_groupby_get_group.py | 2 +-
 tests/unit/modin/test_groupby_unsupported.py        | 4 ++--
 tests/unit/modin/test_series_strings.py             | 8 +++++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/integ/modin/groupby/test_groupby_get_group.py b/tests/integ/modin/groupby/test_groupby_get_group.py
index b17be1914fe..c83fecd24de 100644
--- a/tests/integ/modin/groupby/test_groupby_get_group.py
+++ b/tests/integ/modin/groupby/test_groupby_get_group.py
@@ -77,7 +77,7 @@ def test_groupby_get_group(by):
     # DataFrame with __getitem__
     with pytest.raises(
         NotImplementedError,
-        match="get_group is not yet implemented for SeriesGroupBy",
+        match="Snowpark pandas does not yet support the method SeriesGroupBy.get_group",
     ):
         snowpark_pandas_df.groupby(by)["col5_int16"].get_group(name)
 
diff --git a/tests/unit/modin/test_groupby_unsupported.py b/tests/unit/modin/test_groupby_unsupported.py
index afd8e7feeaf..efc48724055 100644
--- a/tests/unit/modin/test_groupby_unsupported.py
+++ b/tests/unit/modin/test_groupby_unsupported.py
@@ -48,7 +48,7 @@
 def test_series_groupby_unsupported_methods_raises(
     mock_series, func, func_name
 ) -> None:
-    msg = f"{func_name} is not yet implemented for GroupBy"
+    msg = f"Snowpark pandas does not yet support the method GroupBy.{func_name}"
     with pytest.raises(NotImplementedError, match=msg):
         func(mock_series)
 
@@ -92,6 +92,6 @@ def test_series_groupby_unsupported_methods_raises(
 def test_dataframe_groupby_unsupported_methods_raises(
     mock_dataframe, func, func_name
 ) -> None:
-    msg = f"{func_name} is not yet implemented for GroupBy"
+    msg = f"Snowpark pandas does not yet support the method GroupBy.{func_name}"
     with pytest.raises(NotImplementedError, match=msg):
         func(mock_dataframe)
diff --git a/tests/unit/modin/test_series_strings.py b/tests/unit/modin/test_series_strings.py
index 9fc78f519c6..2e643356934 100644
--- a/tests/unit/modin/test_series_strings.py
+++ b/tests/unit/modin/test_series_strings.py
@@ -21,7 +21,8 @@ def test_str_cat_no_others(mock_str_register, mock_series):
     return_callable.return_value = result_query_compiler
     mock_str_register.return_value = return_callable
     with pytest.raises(
-        NotImplementedError, match="cat is not yet implemented for Series.str"
+        NotImplementedError,
+        match="Snowpark pandas does not yet support the method Series.str.cat",
     ):
         mock_series.str.cat()
 
@@ -65,7 +66,7 @@ def test_str_cat_no_others(mock_str_register, mock_series):
 def test_str_methods_with_series_return(func, func_name, mock_series):
     with pytest.raises(
         NotImplementedError,
-        match=f"{func_name} is not yet implemented for Series.str",
+        match=f"Snowpark pandas does not yet support the method Series.str.{func_name}",
     ):
         func(mock_series)
 
@@ -81,7 +82,8 @@ def test_str_methods_with_series_return(func, func_name, mock_series):
 )
 def test_str_methods_with_dataframe_return(func, func_name, mock_series):
     with pytest.raises(
-        NotImplementedError, match="is not yet implemented for Series.str"
+        NotImplementedError,
+        match="Snowpark pandas does not yet support the method Series.str.",
     ):
         func(mock_series)
 

From 39cc24a63ac87c52f68fa9baea252a73886c418d Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Fri, 9 Aug 2024 13:41:44 -0700
Subject: [PATCH 6/8] fix ci

---
 src/snowflake/snowpark/modin/plugin/docstrings/series.py     | 5 +++++
 .../snowpark/modin/plugin/extensions/series_overrides.py     | 5 -----
 tests/integ/modin/frame/test_filter.py                       | 3 +--
 tests/integ/modin/strings/test_case_justify.py               | 3 +--
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
index 75ea0e39fb9..6e48a7e57f3 100644
--- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -3375,6 +3375,11 @@ def to_timestamp():
         Cast to DatetimeIndex of Timestamps, at beginning of period.
         """
 
+    def transform():
+        """
+        Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.
+        """
+
     def transpose():
         """
         Return the transpose, which is by definition `self`.
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index c564007e287..31592501bc3 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -163,9 +163,4 @@ def plot(
 @register_series_accessor("transform")
 @series_not_implemented()
 def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
-    """
-    Call ``func`` on self producing a `Series` with the same axis shape as self.
-
-    Snowpark pandas does not yet support this method for Series.
-    """
     pass
diff --git a/tests/integ/modin/frame/test_filter.py b/tests/integ/modin/frame/test_filter.py
index 577a0d37446..723f4be7454 100644
--- a/tests/integ/modin/frame/test_filter.py
+++ b/tests/integ/modin/frame/test_filter.py
@@ -3,7 +3,6 @@
 #
 
 import random
-import re
 
 import modin.pandas as pd
 import numpy as np
@@ -89,7 +88,7 @@ def test_filtering_with_self_not_implemented(
     snow_df = pd.DataFrame(data)
     with pytest.raises(
         NotImplementedError,
-        match=re.escape("casefold is not yet implemented for Series.str"),
+        match="Snowpark pandas does not yet support the method Series.str.casefold",
     ):
         func(snow_df)
 
diff --git a/tests/integ/modin/strings/test_case_justify.py b/tests/integ/modin/strings/test_case_justify.py
index ec3cfd58cbf..0ba37a39a73 100644
--- a/tests/integ/modin/strings/test_case_justify.py
+++ b/tests/integ/modin/strings/test_case_justify.py
@@ -1,7 +1,6 @@
 #
 # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
 #
-import re
 
 import modin.pandas as pd
 import pandas as native_pd
@@ -23,6 +22,6 @@ def test_title():
 @sql_count_checker(query_count=0)
 def test_casefold_not_implemented():
     s = pd.Series(["ß", "case", "ßd"])
-    msg = re.escape("casefold is not yet implemented for Series.str")
+    msg = "Snowpark pandas does not yet support the method Series.str.casefold"
     with pytest.raises(NotImplementedError, match=msg):
         s.str.casefold()

From d719e1836461eb79207bcc616c801b90731436d3 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Mon, 12 Aug 2024 15:19:46 -0700
Subject: [PATCH 7/8] remove unused file and no cover on unimplemented

---
 .../plugin/extensions/base_not_implemented.py | 414 ------------------
 .../modin/plugin/extensions/base_overrides.py |  50 +--
 .../plugin/extensions/series_overrides.py     |   2 +-
 3 files changed, 26 insertions(+), 440 deletions(-)
 delete mode 100644 src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py b/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py
deleted file mode 100644
index aaf8b86494d..00000000000
--- a/src/snowflake/snowpark/modin/plugin/extensions/base_not_implemented.py
+++ /dev/null
@@ -1,414 +0,0 @@
-#
-# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
-#
-
-"""
-The functions in this file are not implemented in Snowpark pandas. In the future, they
-should raise NotImplementedError at the query compiler layer, but doing so requires a longer-term
-effort.
-
-We currently test unsupported APIs under tests/unit/modin/test_unsupported.py, which does not initialize
-a session. As such, many frontend methods have additional query compiler API calls that would have to
-be mocked before the NotImplementedError can appropriately be raised.
-"""
-from __future__ import annotations
-
-import pickle as pkl
-from typing import Any
-
-import numpy as np
-import pandas
-from modin.pandas.base import BasePandasDataset
-from pandas._libs import lib
-from pandas._libs.lib import no_default
-from pandas._typing import (
-    Axis,
-    CompressionOptions,
-    StorageOptions,
-    TimedeltaConvertibleTypes,
-)
-
-from snowflake.snowpark.modin.pandas.api.extensions import (
-    register_dataframe_accessor,
-    register_series_accessor,
-)
-from snowflake.snowpark.modin.plugin._internal.telemetry import (
-    snowpark_pandas_telemetry_method_decorator,
-)
-from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented
-
-
-def register_base_not_implemented():
-    def decorator(base_method: Any):
-        func = snowpark_pandas_telemetry_method_decorator(
-            base_not_implemented()(base_method)
-        )
-        register_series_accessor(base_method.__name__)(func)
-        register_dataframe_accessor(base_method.__name__)(func)
-        return func
-
-    return decorator
-
-
-@register_base_not_implemented()
-def align(
-    self,
-    other,
-    join="outer",
-    axis=None,
-    level=None,
-    copy=None,
-    fill_value=None,
-    method=lib.no_default,
-    limit=lib.no_default,
-    fill_axis=lib.no_default,
-    broadcast_axis=lib.no_default,
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def asof(self, where, subset=None):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def at_time(self, time, asof=False, axis=None):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def between_time(
-    self: BasePandasDataset,
-    start_time,
-    end_time,
-    inclusive: str | None = None,
-    axis=None,
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def bool(self):  # noqa: RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def clip(
-    self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def combine(self, other, func, fill_value=None, **kwargs):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def combine_first(self, other):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def droplevel(self, level, axis=0):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def explode(self, column, ignore_index: bool = False):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def ewm(
-    self,
-    com: float | None = None,
-    span: float | None = None,
-    halflife: float | TimedeltaConvertibleTypes | None = None,
-    alpha: float | None = None,
-    min_periods: int | None = 0,
-    adjust: bool = True,
-    ignore_na: bool = False,
-    axis: Axis = 0,
-    times: str | np.ndarray | BasePandasDataset | None = None,
-    method: str = "single",
-) -> pandas.core.window.ewm.ExponentialMovingWindow:  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def filter(
-    self, items=None, like=None, regex=None, axis=None
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def infer_objects(
-    self, copy: bool | None = None
-) -> BasePandasDataset:  # pragma: no cover # noqa: RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
-    pass
-
-
-@register_base_not_implemented()
-def kurtosis(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
-    pass
-
-
-@register_base_not_implemented()
-def mode(self, axis=0, numeric_only=False, dropna=True):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def pipe(self, func, *args, **kwargs):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def pop(self, item):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def reindex_like(
-    self, other, method=None, copy=True, limit=None, tolerance=None
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def reorder_levels(self, order, axis=0):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def sem(
-    self,
-    axis: Axis | None = None,
-    skipna: bool = True,
-    ddof: int = 1,
-    numeric_only=False,
-    **kwargs,
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def set_flags(
-    self, *, copy: bool = False, allows_duplicate_labels: bool | None = None
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def swaplevel(self, i=-2, j=-1, axis=0):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_clipboard(
-    self, excel=True, sep=None, **kwargs
-):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_excel(
-    self,
-    excel_writer,
-    sheet_name="Sheet1",
-    na_rep="",
-    float_format=None,
-    columns=None,
-    header=True,
-    index=True,
-    index_label=None,
-    startrow=0,
-    startcol=0,
-    engine=None,
-    merge_cells=True,
-    encoding=no_default,
-    inf_rep="inf",
-    verbose=no_default,
-    freeze_panes=None,
-    storage_options: StorageOptions = None,
-):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_hdf(
-    self, path_or_buf, key, format="table", **kwargs
-):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_json(
-    self,
-    path_or_buf=None,
-    orient=None,
-    date_format=None,
-    double_precision=10,
-    force_ascii=True,
-    date_unit="ms",
-    default_handler=None,
-    lines=False,
-    compression="infer",
-    index=True,
-    indent=None,
-    storage_options: StorageOptions = None,
-):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_latex(
-    self,
-    buf=None,
-    columns=None,
-    col_space=None,
-    header=True,
-    index=True,
-    na_rep="NaN",
-    formatters=None,
-    float_format=None,
-    sparsify=None,
-    index_names=True,
-    bold_rows=False,
-    column_format=None,
-    longtable=None,
-    escape=None,
-    encoding=None,
-    decimal=".",
-    multicolumn=None,
-    multicolumn_format=None,
-    multirow=None,
-    caption=None,
-    label=None,
-    position=None,
-):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_markdown(
-    self,
-    buf=None,
-    mode: str = "wt",
-    index: bool = True,
-    storage_options: StorageOptions = None,
-    **kwargs,
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_pickle(
-    self,
-    path,
-    compression: CompressionOptions = "infer",
-    protocol: int = pkl.HIGHEST_PROTOCOL,
-    storage_options: StorageOptions = None,
-):  # pragma: no cover  # noqa: PR01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_string(
-    self,
-    buf=None,
-    columns=None,
-    col_space=None,
-    header=True,
-    index=True,
-    na_rep="NaN",
-    formatters=None,
-    float_format=None,
-    sparsify=None,
-    index_names=True,
-    justify=None,
-    max_rows=None,
-    min_rows=None,
-    max_cols=None,
-    show_dimensions=False,
-    decimal=".",
-    line_width=None,
-    max_colwidth=None,
-    encoding=None,
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_sql(
-    self,
-    name,
-    con,
-    schema=None,
-    if_exists="fail",
-    index=True,
-    index_label=None,
-    chunksize=None,
-    dtype=None,
-    method=None,
-):  # noqa: PR01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_timestamp(
-    self, freq=None, how="start", axis=0, copy=True
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def to_xarray(self):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def truncate(
-    self, before=None, after=None, axis=None, copy=True
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def tz_localize(
-    self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
-):  # noqa: PR01, RT01, D200
-    pass
-
-
-@register_base_not_implemented()
-def __array_wrap__(self, result, context=None):
-    pass
-
-
-@register_base_not_implemented()
-def __finalize__(self, other, method=None, **kwargs):
-    pass
-
-
-@register_base_not_implemented()
-def __sizeof__(self):
-    pass
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
index 95be5478309..332df757787 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py
@@ -56,17 +56,17 @@ def decorator(base_method: Any):
 
 @register_base_not_implemented()
 def asof(self, where, subset=None):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def bool(self):  # noqa: RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def droplevel(self, level, axis=0):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -83,53 +83,53 @@ def ewm(
     times: str | np.ndarray | BasePandasDataset | None = None,
     method: str = "single",
 ) -> pandas.core.window.ewm.ExponentialMovingWindow:  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def filter(
     self, items=None, like=None, regex=None, axis=None
 ):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def pipe(self, func, *args, **kwargs):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def pop(self, item):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def reorder_levels(self, order, axis=0):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def set_flags(
     self, *, copy: bool = False, allows_duplicate_labels: bool | None = None
 ):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def swaplevel(self, i=-2, j=-1, axis=0):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def to_clipboard(
     self, excel=True, sep=None, **kwargs
 ):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -153,14 +153,14 @@ def to_excel(
     freeze_panes=None,
     storage_options: StorageOptions = None,
 ):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def to_hdf(
     self, path_or_buf, key, format="table", **kwargs
 ):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -179,7 +179,7 @@ def to_json(
     indent=None,
     storage_options: StorageOptions = None,
 ):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -208,7 +208,7 @@ def to_latex(
     label=None,
     position=None,
 ):  # pragma: no cover  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -220,7 +220,7 @@ def to_markdown(
     storage_options: StorageOptions = None,
     **kwargs,
 ):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -231,7 +231,7 @@ def to_pickle(
     protocol: int = pkl.HIGHEST_PROTOCOL,
     storage_options: StorageOptions = None,
 ):  # pragma: no cover  # noqa: PR01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -257,7 +257,7 @@ def to_string(
     max_colwidth=None,
     encoding=None,
 ):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
@@ -273,33 +273,33 @@ def to_sql(
     dtype=None,
     method=None,
 ):  # noqa: PR01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def to_timestamp(
     self, freq=None, how="start", axis=0, copy=True
 ):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def to_xarray(self):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def truncate(
     self, before=None, after=None, axis=None, copy=True
 ):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover
 
 
 @register_base_not_implemented()
 def __finalize__(self, other, method=None, **kwargs):
-    pass
+    pass  # pragma: no cover
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
index 31592501bc3..0afea30e29a 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -163,4 +163,4 @@ def plot(
 @register_series_accessor("transform")
 @series_not_implemented()
 def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
-    pass
+    pass  # pragma: no cover

From 18dfe8a79fc079f047f36cb18ff383bee2fc5036 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Tue, 13 Aug 2024 14:48:02 -0700
Subject: [PATCH 8/8] add no cover

---
 src/snowflake/snowpark/modin/pandas/base.py      | 4 ++--
 src/snowflake/snowpark/modin/pandas/dataframe.py | 6 ++++--
 src/snowflake/snowpark/modin/pandas/series.py    | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py
index 1a5d263a6f8..c08cdee1386 100644
--- a/src/snowflake/snowpark/modin/pandas/base.py
+++ b/src/snowflake/snowpark/modin/pandas/base.py
@@ -891,7 +891,7 @@ def align(
         limit=lib.no_default,
         fill_axis=lib.no_default,
         broadcast_axis=lib.no_default,
-    ):  # noqa: PR01, RT01, D200
+    ):  # pragma: no cover  # noqa: PR01, RT01, D200
         """
         Align two objects on their axes with the specified join method.
         """
@@ -1200,7 +1200,7 @@ def at_time(self, time, asof=False, axis=None):  # noqa: PR01, RT01, D200
         Select values at particular time of day (e.g., 9:30AM).
         """
         # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
-        if asof:
+        if asof:  # pragma: no cover
             # pandas raises NotImplementedError for asof=True, so we do, too.
             raise NotImplementedError("'asof' argument is not supported")
         return self.between_time(
diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
index 0c8547697d7..a7d53813779 100644
--- a/src/snowflake/snowpark/modin/pandas/dataframe.py
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -442,7 +442,9 @@ def add_suffix(self, suffix):
         )
 
     @dataframe_not_implemented()
-    def map(self, func, na_action: str | None = None, **kwargs) -> DataFrame:
+    def map(
+        self, func, na_action: str | None = None, **kwargs
+    ) -> DataFrame:  # pragma: no cover
         if not callable(func):
             raise ValueError(f"'{type(func)}' object is not callable")
         return self.__constructor__(
@@ -2096,7 +2098,7 @@ def reindex_like(
         copy: bool | None = None,
         limit=None,
         tolerance=None,
-    ) -> DataFrame:
+    ) -> DataFrame:  # pragma: no cover
         # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
         if copy is None:
             copy = True
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
index 73116c5a43f..1ce3ecfc997 100644
--- a/src/snowflake/snowpark/modin/pandas/series.py
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -1753,7 +1753,7 @@ def reindex_like(
         copy: bool | None = None,
         limit=None,
         tolerance=None,
-    ) -> Series:
+    ) -> Series:  # pragma: no cover
         # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
         # docs say "Same as calling .reindex(index=other.index, columns=other.columns,...).":
         # https://pandas.pydata.org/pandas-docs/version/1.4/reference/api/pandas.Series.reindex_like.html