From 710f233d7ccdae64cbefed8035f348376db1a40e Mon Sep 17 00:00:00 2001 From: Naresh Kumar Date: Thu, 12 Dec 2024 05:18:01 -0800 Subject: [PATCH] SNOW-1852934: Add support for Dataframe.map --- CHANGELOG.md | 1 + .../modin/supported/dataframe_supported.rst | 2 +- .../plugin/extensions/dataframe_overrides.py | 24 +++++++++++-------- tests/integ/modin/frame/test_applymap.py | 16 +++++++++---- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 98feea9cdaa..82b5dc88d19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Added support for `Series.str.ljust` and `Series.str.rjust`. - Added support for `Series.str.center`. - Added support for `Series.str.pad`. +- Added support for `DataFrame.map`. ## 1.26.0 (2024-12-05) diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst index 8f139ec5d36..f0150404859 100644 --- a/docs/source/modin/supported/dataframe_supported.rst +++ b/docs/source/modin/supported/dataframe_supported.rst @@ -258,7 +258,7 @@ Methods +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``lt`` | P | ``level`` | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``map`` | N | | | +| ``map`` | P | | ``N`` if ``na_action == "ignore"`` | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``mask`` | P | | ``N`` if given ``axis`` when ``other`` is a | | | | | ``DataFrame`` or ``level`` parameters; | diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py index 7ba3eeabd26..d6ceffd8c47 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py @@ -137,9 +137,15 @@ def decorator(base_method: Any): # Avoid overwriting builtin `map` by accident @register_dataframe_accessor("map") -@dataframe_not_implemented() -def _map(self, func, na_action: str | None = None, **kwargs) -> DataFrame: - pass # pragma: no cover +def _map(self, func: PythonFuncType, na_action: str | None = None, **kwargs): + # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions + if not callable(func): + raise TypeError(f"{func} is not callable") + return self.__constructor__( + query_compiler=self._query_compiler.applymap( + func, na_action=na_action, **kwargs + ) + ) @register_dataframe_not_implemented() @@ -804,14 +810,12 @@ def apply( # Snowpark pandas uses a separate QC method, while modin directly calls map. @register_dataframe_accessor("applymap") def applymap(self, func: PythonFuncType, na_action: str | None = None, **kwargs): - # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions - if not callable(func): - raise TypeError(f"{func} is not callable") - return self.__constructor__( - query_compiler=self._query_compiler.applymap( - func, na_action=na_action, **kwargs - ) + warnings.warn( + "DataFrame.applymap has been deprecated. Use DataFrame.map instead.", + FutureWarning, + stacklevel=2, ) + return self.map(func, na_action=na_action, **kwargs) # We need to override _get_columns to satisfy diff --git a/tests/integ/modin/frame/test_applymap.py b/tests/integ/modin/frame/test_applymap.py index 517b5ce12e8..061836e5d10 100644 --- a/tests/integ/modin/frame/test_applymap.py +++ b/tests/integ/modin/frame/test_applymap.py @@ -28,25 +28,33 @@ ) +@pytest.fixture(params=["applymap", "map"]) +def method(request): + """ + how keyword to pass to merge. + """ + return request.param + + @pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP) @sql_count_checker(query_count=7, udf_count=1) -def test_applymap_basic_without_type_hints(data, func, return_type): +def test_applymap_basic_without_type_hints(data, func, return_type, method): frame_data = {0: data, 1: data} native_df = native_pd.DataFrame(frame_data) snow_df = pd.DataFrame(frame_data) - eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.applymap(func)) + eval_snowpark_pandas_result(snow_df, native_df, lambda x: getattr(x, method)(func)) @pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP) @sql_count_checker(query_count=7, udf_count=1) -def test_applymap_basic_with_type_hints(data, func, return_type): +def test_applymap_basic_with_type_hints(data, func, return_type, method): func_with_type_hint = create_func_with_return_type_hint(func, return_type) frame_data = {0: data, 1: data} native_df = native_pd.DataFrame(frame_data) snow_df = pd.DataFrame(frame_data) eval_snowpark_pandas_result( - snow_df, native_df, lambda x: x.applymap(func_with_type_hint) + snow_df, native_df, lambda x: getattr(x, method)(func_with_type_hint) )