SNOW-1458122 Index name apis (#2106)

Co-authored-by: Andong Zhan <[email protected]> Co-authored-by: Mahesh Vashishtha <[email protected]>
snowflakedb · Aug 16, 2024 · 92a7ed4 · 92a7ed4
1 parent 7ba0846
commit 92a7ed4
Show file tree

Hide file tree

Showing 8 changed files with 389 additions and 30 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -82,6 +82,7 @@
 - Added support for index's arithmetic and comparison operators.
 - Added support for `Series.dt.round`.
 - Added documentation pages for `DatetimeIndex`.
+- Added support for `Index.name`, `Index.names`, `Index.rename`, and `Index.set_names`.
 
 #### Improvements
 - Removed the public preview warning message upon importing Snowpark pandas.

diff --git a/docs/source/modin/supported/index_supported.rst b/docs/source/modin/supported/index_supported.rst
@@ -38,7 +38,7 @@ Attributes
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``name``                    | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``names``                   | P                               |                                                    |
+| ``names``                   | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``nbytes``                  | N                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
@@ -114,7 +114,7 @@ Methods
 | ``reindex``                 | P                               |                                  | ``N`` if the Index values are tuple-like, or       |
 |                             |                                 |                                  | method is ``nearest``.                             |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``rename``                  | N                               |                                  |                                                    |
+| ``rename``                  | Y                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``repeat``                  | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
@@ -130,7 +130,7 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``value_counts``            | P                               | ``bins``                         |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``set_names``               | N                               |                                  |                                                    |
+| ``set_names``               | Y                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``droplevel``               | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+

diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py
@@ -674,7 +674,7 @@ def _get_index(self):
             return self._query_compiler.index
 
         idx = Index(query_compiler=self._query_compiler)
-        idx._parent = self
+        idx._set_parent(self)
         return idx
 
     index = property(_get_index, _set_index)

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py
@@ -30,10 +30,12 @@
 import numpy as np
 import pandas as native_pd
 from pandas._libs import lib
+from pandas._libs.lib import is_list_like, is_scalar
 from pandas._typing import ArrayLike, DateTimeErrorChoices, DtypeObj, NaPosition
 from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.common import is_datetime64_any_dtype, pandas_dtype
+from pandas.core.dtypes.inference import is_hashable
 
 from snowflake.snowpark.modin.pandas import DataFrame, Series
 from snowflake.snowpark.modin.pandas.base import BasePandasDataset
@@ -74,7 +76,7 @@ def __new__(
     ) -> Index:
         """
         Override __new__ method to control new instance creation of Index.
-        Depending on data type, it will create a Index or DatetimeIndex instance.
+        Depending on data type, it will create an Index or DatetimeIndex instance.
 
         Parameters
         ----------
@@ -177,6 +179,8 @@ def _init_index(
         query_compiler: SnowflakeQueryCompiler = None,
         **kwargs: Any,
     ):
+        # `_parent` keeps track of any Series or DataFrame that this Index is a part of.
+        self._parent = None
         if query_compiler:
             # Raise warning if `data` is query compiler with non-default arguments.
             for arg_name, arg_value in kwargs.items():
@@ -336,6 +340,12 @@ def __constructor__(self):
         """
         return type(self)
 
+    def _set_parent(self, parent: Series | DataFrame):
+        """
+        Set the parent object of the current Index to a given Series or DataFrame.
+        """
+        self._parent = parent
+
     @property
     def values(self) -> ArrayLike:
         """
@@ -612,7 +622,7 @@ def name(self) -> Hashable:
         Returns
         -------
         Hashable
-            name of this index
+            Name of this index.
 
         Examples
         --------
@@ -629,7 +639,13 @@ def name(self, value: Hashable) -> None:
         """
         Set Index name.
         """
+        if not is_hashable(value):
+            raise TypeError(f"{type(self).__name__}.name must be a hashable type")
         self._query_compiler = self._query_compiler.set_index_names([value])
+        if self._parent is not None:
+            self._parent._update_inplace(
+                new_query_compiler=self._parent._query_compiler.set_index_names([value])
+            )
 
     def _get_names(self) -> list[Hashable]:
         """
@@ -651,6 +667,10 @@ def _set_names(self, values: list) -> None:
         TypeError if each name is not hashable.
         """
         self._query_compiler = self._query_compiler.set_index_names(values)
+        if self._parent is not None:
+            self._parent._update_inplace(
+                new_query_compiler=self._parent._query_compiler.set_index_names(values)
+            )
 
     names = property(fset=_set_names, fget=_get_names)
 
@@ -685,13 +705,23 @@ def set_names(
         >>> idx.set_names('quarter')
         Index([1, 2, 3, 4], dtype='int64', name='quarter')
         """
-        # TODO: SNOW-1458122 implement set_names
-        WarningMessage.index_to_pandas_warning("set_names")
-        if not inplace:
-            return self.__constructor__(
-                self.to_pandas().set_names(names, level=level, inplace=inplace)
+        if is_list_like(names) and len(names) > 1:
+            raise ValueError(
+                f"Since Index is a single index object in Snowpark pandas, "
+                f"the length of new names must be 1, got {len(names)}."
+            )
+        if level is not None and level not in [0, -1]:
+            raise IndexError(
+                f"Level does not exist: Index has only 1 level, {level} is not a valid level number."
             )
-        return self.to_pandas().set_names(names, level=level, inplace=inplace)
+        if inplace:
+            name = names[0] if is_list_like(names) else names
+            self.name = name
+            return None
+        else:
+            res = self.__constructor__(query_compiler=self._query_compiler)
+            res.name = names if is_scalar(names) else names[0]
+            return res
 
     @property
     def ndim(self) -> int:
@@ -1521,8 +1551,7 @@ def reindex(
             )
             return Index(query_compiler=query_compiler), indices
 
-    @index_not_implemented()
-    def rename(self) -> None:
+    def rename(self, name: Any, inplace: bool = False) -> None:
         """
         Alter Index or MultiIndex name.
 
@@ -1545,8 +1574,29 @@ def rename(self) -> None:
         See Also
         --------
         Index.set_names : Able to set new names partially and by level.
+
+        Examples
+        --------
+        >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
+        >>> idx.rename('grade', inplace=False)
+        Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
+        >>> idx.rename('grade', inplace=True)
+
+        Note
+        ----
+        Native pandas only allows hashable types for names. Snowpark pandas allows
+        name to be any scalar or list-like type. If a tuple is used for the name,
+        the tuple itself will be the name.
+
+        For instance,
+        >>> idx = pd.Index([1, 2, 3])
+        >>> idx.rename(('a', 'b', 'c'), inplace=True)
+        >>> idx.name
+        ('a', 'b', 'c')
         """
-        # TODO: SNOW-1458122 implement rename
+        if isinstance(name, tuple):
+            name = [name]  # The entire tuple is the name
+        return self.set_names(names=name, inplace=inplace)
 
     def nunique(self, dropna: bool = True) -> int:
         """

diff --git a/tests/integ/modin/frame/test_nlargest_nsmallest.py b/tests/integ/modin/frame/test_nlargest_nsmallest.py
@@ -54,7 +54,7 @@ def test_nlargest_nsmallest_large_n(snow_df, native_df, method):
     )
 
 
-@sql_count_checker(query_count=4, join_count=1)
+@sql_count_checker(query_count=3)
 def test_nlargest_nsmallest_overlapping_index_name(snow_df, native_df, method):
     snow_df = snow_df.rename_axis("A")
     native_df = native_df.rename_axis("A")