Skip to content

Commit

Permalink
SNOW-1458122 Index name apis (#2106)
Browse files Browse the repository at this point in the history
Co-authored-by: Andong Zhan <[email protected]>
Co-authored-by: Mahesh Vashishtha <[email protected]>
  • Loading branch information
3 people authored Aug 16, 2024
1 parent 7ba0846 commit 92a7ed4
Show file tree
Hide file tree
Showing 8 changed files with 389 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
- Added support for index's arithmetic and comparison operators.
- Added support for `Series.dt.round`.
- Added documentation pages for `DatetimeIndex`.
- Added support for `Index.name`, `Index.names`, `Index.rename`, and `Index.set_names`.

#### Improvements
- Removed the public preview warning message upon importing Snowpark pandas.
Expand Down
6 changes: 3 additions & 3 deletions docs/source/modin/supported/index_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Attributes
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``name`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``names`` | P | |
| ``names`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nbytes`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down Expand Up @@ -114,7 +114,7 @@ Methods
| ``reindex`` | P | | ``N`` if the Index values are tuple-like, or |
| | | | method is ``nearest``. |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``rename`` | N | | |
| ``rename`` | Y | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``repeat`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
Expand All @@ -130,7 +130,7 @@ Methods
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``value_counts`` | P | ``bins`` | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``set_names`` | N | | |
| ``set_names`` | Y | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``droplevel`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def _get_index(self):
return self._query_compiler.index

idx = Index(query_compiler=self._query_compiler)
idx._parent = self
idx._set_parent(self)
return idx

index = property(_get_index, _set_index)
Expand Down
72 changes: 61 additions & 11 deletions src/snowflake/snowpark/modin/plugin/extensions/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
import numpy as np
import pandas as native_pd
from pandas._libs import lib
from pandas._libs.lib import is_list_like, is_scalar
from pandas._typing import ArrayLike, DateTimeErrorChoices, DtypeObj, NaPosition
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.common import is_datetime64_any_dtype, pandas_dtype
from pandas.core.dtypes.inference import is_hashable

from snowflake.snowpark.modin.pandas import DataFrame, Series
from snowflake.snowpark.modin.pandas.base import BasePandasDataset
Expand Down Expand Up @@ -74,7 +76,7 @@ def __new__(
) -> Index:
"""
Override __new__ method to control new instance creation of Index.
Depending on data type, it will create a Index or DatetimeIndex instance.
Depending on data type, it will create an Index or DatetimeIndex instance.
Parameters
----------
Expand Down Expand Up @@ -177,6 +179,8 @@ def _init_index(
query_compiler: SnowflakeQueryCompiler = None,
**kwargs: Any,
):
# `_parent` keeps track of any Series or DataFrame that this Index is a part of.
self._parent = None
if query_compiler:
# Raise warning if `data` is query compiler with non-default arguments.
for arg_name, arg_value in kwargs.items():
Expand Down Expand Up @@ -336,6 +340,12 @@ def __constructor__(self):
"""
return type(self)

def _set_parent(self, parent: Series | DataFrame):
"""
Set the parent object of the current Index to a given Series or DataFrame.
"""
self._parent = parent

@property
def values(self) -> ArrayLike:
"""
Expand Down Expand Up @@ -612,7 +622,7 @@ def name(self) -> Hashable:
Returns
-------
Hashable
name of this index
Name of this index.
Examples
--------
Expand All @@ -629,7 +639,13 @@ def name(self, value: Hashable) -> None:
"""
Set Index name.
"""
if not is_hashable(value):
raise TypeError(f"{type(self).__name__}.name must be a hashable type")
self._query_compiler = self._query_compiler.set_index_names([value])
if self._parent is not None:
self._parent._update_inplace(
new_query_compiler=self._parent._query_compiler.set_index_names([value])
)

def _get_names(self) -> list[Hashable]:
"""
Expand All @@ -651,6 +667,10 @@ def _set_names(self, values: list) -> None:
TypeError if each name is not hashable.
"""
self._query_compiler = self._query_compiler.set_index_names(values)
if self._parent is not None:
self._parent._update_inplace(
new_query_compiler=self._parent._query_compiler.set_index_names(values)
)

names = property(fset=_set_names, fget=_get_names)

Expand Down Expand Up @@ -685,13 +705,23 @@ def set_names(
>>> idx.set_names('quarter')
Index([1, 2, 3, 4], dtype='int64', name='quarter')
"""
# TODO: SNOW-1458122 implement set_names
WarningMessage.index_to_pandas_warning("set_names")
if not inplace:
return self.__constructor__(
self.to_pandas().set_names(names, level=level, inplace=inplace)
if is_list_like(names) and len(names) > 1:
raise ValueError(
f"Since Index is a single index object in Snowpark pandas, "
f"the length of new names must be 1, got {len(names)}."
)
if level is not None and level not in [0, -1]:
raise IndexError(
f"Level does not exist: Index has only 1 level, {level} is not a valid level number."
)
return self.to_pandas().set_names(names, level=level, inplace=inplace)
if inplace:
name = names[0] if is_list_like(names) else names
self.name = name
return None
else:
res = self.__constructor__(query_compiler=self._query_compiler)
res.name = names if is_scalar(names) else names[0]
return res

@property
def ndim(self) -> int:
Expand Down Expand Up @@ -1521,8 +1551,7 @@ def reindex(
)
return Index(query_compiler=query_compiler), indices

@index_not_implemented()
def rename(self) -> None:
def rename(self, name: Any, inplace: bool = False) -> None:
"""
Alter Index or MultiIndex name.
Expand All @@ -1545,8 +1574,29 @@ def rename(self) -> None:
See Also
--------
Index.set_names : Able to set new names partially and by level.
Examples
--------
>>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
>>> idx.rename('grade', inplace=False)
Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
>>> idx.rename('grade', inplace=True)
Note
----
Native pandas only allows hashable types for names. Snowpark pandas allows
name to be any scalar or list-like type. If a tuple is used for the name,
the tuple itself will be the name.
For instance,
>>> idx = pd.Index([1, 2, 3])
>>> idx.rename(('a', 'b', 'c'), inplace=True)
>>> idx.name
('a', 'b', 'c')
"""
# TODO: SNOW-1458122 implement rename
if isinstance(name, tuple):
name = [name] # The entire tuple is the name
return self.set_names(names=name, inplace=inplace)

def nunique(self, dropna: bool = True) -> int:
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/frame/test_nlargest_nsmallest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_nlargest_nsmallest_large_n(snow_df, native_df, method):
)


@sql_count_checker(query_count=4, join_count=1)
@sql_count_checker(query_count=3)
def test_nlargest_nsmallest_overlapping_index_name(snow_df, native_df, method):
snow_df = snow_df.rename_axis("A")
native_df = native_df.rename_axis("A")
Expand Down
Loading

0 comments on commit 92a7ed4

Please sign in to comment.