Skip to content

Commit

Permalink
SNOW-1852934: Add support for Dataframe.map
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-nkumar committed Dec 13, 2024
1 parent 71843f3 commit 7c0fa6e
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- Added support for `Series.str.center`.
- Added support for `Series.str.pad`.
- Added support for applying Snowpark Python function `snowflake_cortex_sentiment`.
- Added support for `DataFrame.map`.


## 1.26.0 (2024-12-05)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/supported/dataframe_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ Methods
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``lt`` | P | ``level`` | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``map`` | N | | |
| ``map`` | P | | ``N`` if ``na_action == "ignore"`` |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``mask`` | P | | ``N`` if given ``axis`` when ``other`` is a |
| | | | ``DataFrame`` or ``level`` parameters; |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,15 @@ def decorator(base_method: Any):

# Avoid overwriting builtin `map` by accident
@register_dataframe_accessor("map")
@dataframe_not_implemented()
def _map(self, func, na_action: str | None = None, **kwargs) -> DataFrame:
pass # pragma: no cover
def _map(self, func: PythonFuncType, na_action: str | None = None, **kwargs):
# TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
if not callable(func):
raise TypeError(f"{func} is not callable")
return self.__constructor__(
query_compiler=self._query_compiler.applymap(
func, na_action=na_action, **kwargs
)
)


@register_dataframe_not_implemented()
Expand Down Expand Up @@ -804,14 +810,12 @@ def apply(
# Snowpark pandas uses a separate QC method, while modin directly calls map.
@register_dataframe_accessor("applymap")
def applymap(self, func: PythonFuncType, na_action: str | None = None, **kwargs):
# TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
if not callable(func):
raise TypeError(f"{func} is not callable")
return self.__constructor__(
query_compiler=self._query_compiler.applymap(
func, na_action=na_action, **kwargs
)
warnings.warn(
"DataFrame.applymap has been deprecated. Use DataFrame.map instead.",
FutureWarning,
stacklevel=2,
)
return self.map(func, na_action=na_action, **kwargs)


# We need to override _get_columns to satisfy
Expand Down
16 changes: 12 additions & 4 deletions tests/integ/modin/frame/test_applymap.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,33 @@
)


@pytest.fixture(params=["applymap", "map"])
def method(request):
"""
how keyword to pass to merge.
"""
return request.param


@pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
@sql_count_checker(query_count=7, udf_count=1)
def test_applymap_basic_without_type_hints(data, func, return_type):
def test_applymap_basic_without_type_hints(data, func, return_type, method):
frame_data = {0: data, 1: data}
native_df = native_pd.DataFrame(frame_data)
snow_df = pd.DataFrame(frame_data)
eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.applymap(func))
eval_snowpark_pandas_result(snow_df, native_df, lambda x: getattr(x, method)(func))


@pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
@sql_count_checker(query_count=7, udf_count=1)
def test_applymap_basic_with_type_hints(data, func, return_type):
def test_applymap_basic_with_type_hints(data, func, return_type, method):
func_with_type_hint = create_func_with_return_type_hint(func, return_type)

frame_data = {0: data, 1: data}
native_df = native_pd.DataFrame(frame_data)
snow_df = pd.DataFrame(frame_data)
eval_snowpark_pandas_result(
snow_df, native_df, lambda x: x.applymap(func_with_type_hint)
snow_df, native_df, lambda x: getattr(x, method)(func_with_type_hint)
)


Expand Down

0 comments on commit 7c0fa6e

Please sign in to comment.