From ab01d9d73942d3ab35ba1441dbdee893d6210bd0 Mon Sep 17 00:00:00 2001 From: Naresh Kumar Date: Thu, 12 Dec 2024 05:18:01 -0800 Subject: [PATCH 1/3] SNOW-1852934: Add support for Dataframe.map --- CHANGELOG.md | 1 + .../modin/supported/dataframe_supported.rst | 2 +- .../plugin/extensions/dataframe_overrides.py | 24 +++++++++++-------- tests/integ/modin/frame/test_applymap.py | 16 +++++++++---- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8186407e7fd..0679a8e7cb5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - Added support for `Series.str.center`. - Added support for `Series.str.pad`. - Added support for applying Snowpark Python function `snowflake_cortex_sentiment`. +- Added support for `DataFrame.map`. #### Improvements - Improve performance of `DataFrame.map`, `Series.apply` and `Series.map` methods by mapping numpy functions to snowpark functions if possible. diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst index 8f139ec5d36..f0150404859 100644 --- a/docs/source/modin/supported/dataframe_supported.rst +++ b/docs/source/modin/supported/dataframe_supported.rst @@ -258,7 +258,7 @@ Methods +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``lt`` | P | ``level`` | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``map`` | N | | | +| ``map`` | P | | ``N`` if ``na_action == "ignore"`` | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``mask`` | P | | ``N`` if given ``axis`` when ``other`` is a | | | | | ``DataFrame`` or ``level`` parameters; | diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py index 7ba3eeabd26..d6ceffd8c47 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py @@ -137,9 +137,15 @@ def decorator(base_method: Any): # Avoid overwriting builtin `map` by accident @register_dataframe_accessor("map") -@dataframe_not_implemented() -def _map(self, func, na_action: str | None = None, **kwargs) -> DataFrame: - pass # pragma: no cover +def _map(self, func: PythonFuncType, na_action: str | None = None, **kwargs): + # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions + if not callable(func): + raise TypeError(f"{func} is not callable") + return self.__constructor__( + query_compiler=self._query_compiler.applymap( + func, na_action=na_action, **kwargs + ) + ) @register_dataframe_not_implemented() @@ -804,14 +810,12 @@ def apply( # Snowpark pandas uses a separate QC method, while modin directly calls map. @register_dataframe_accessor("applymap") def applymap(self, func: PythonFuncType, na_action: str | None = None, **kwargs): - # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions - if not callable(func): - raise TypeError(f"{func} is not callable") - return self.__constructor__( - query_compiler=self._query_compiler.applymap( - func, na_action=na_action, **kwargs - ) + warnings.warn( + "DataFrame.applymap has been deprecated. Use DataFrame.map instead.", + FutureWarning, + stacklevel=2, ) + return self.map(func, na_action=na_action, **kwargs) # We need to override _get_columns to satisfy diff --git a/tests/integ/modin/frame/test_applymap.py b/tests/integ/modin/frame/test_applymap.py index 3e7f06af70e..acd3da340be 100644 --- a/tests/integ/modin/frame/test_applymap.py +++ b/tests/integ/modin/frame/test_applymap.py @@ -28,25 +28,33 @@ ) +@pytest.fixture(params=["applymap", "map"]) +def method(request): + """ + how keyword to pass to merge. + """ + return request.param + + @pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP) @sql_count_checker(query_count=7, udf_count=1) -def test_applymap_basic_without_type_hints(data, func, return_type): +def test_applymap_basic_without_type_hints(data, func, return_type, method): frame_data = {0: data, 1: data} native_df = native_pd.DataFrame(frame_data) snow_df = pd.DataFrame(frame_data) - eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.applymap(func)) + eval_snowpark_pandas_result(snow_df, native_df, lambda x: getattr(x, method)(func)) @pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP) @sql_count_checker(query_count=7, udf_count=1) -def test_applymap_basic_with_type_hints(data, func, return_type): +def test_applymap_basic_with_type_hints(data, func, return_type, method): func_with_type_hint = create_func_with_return_type_hint(func, return_type) frame_data = {0: data, 1: data} native_df = native_pd.DataFrame(frame_data) snow_df = pd.DataFrame(frame_data) eval_snowpark_pandas_result( - snow_df, native_df, lambda x: x.applymap(func_with_type_hint) + snow_df, native_df, lambda x: getattr(x, method)(func_with_type_hint) ) From 7f75c2c84b945264a3c9c4eecf1ca4806d181233 Mon Sep 17 00:00:00 2001 From: Naresh Kumar Date: Thu, 12 Dec 2024 13:26:20 -0800 Subject: [PATCH 2/3] added na_action test --- tests/integ/modin/frame/test_applymap.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integ/modin/frame/test_applymap.py b/tests/integ/modin/frame/test_applymap.py index acd3da340be..8755b61d676 100644 --- a/tests/integ/modin/frame/test_applymap.py +++ b/tests/integ/modin/frame/test_applymap.py @@ -31,7 +31,7 @@ @pytest.fixture(params=["applymap", "map"]) def method(request): """ - how keyword to pass to merge. + method name to test. """ return request.param @@ -120,16 +120,16 @@ def test_applymap_numpy(func): @sql_count_checker(query_count=0) -def test_applymap_na_action_ignore(): +def test_applymap_na_action_ignore(method): snow_df = pd.DataFrame([1, 1.1, "NaN", None], dtype="Float64") msg = "Snowpark pandas applymap API doesn't yet support na_action == 'ignore'" with pytest.raises(NotImplementedError, match=msg): - snow_df.applymap(lambda x: x is None, na_action="ignore") + getattr(snow_df, method)(lambda x: x is None, na_action="ignore") data = ["cat", "dog", np.nan, "rabbit"] snow_df = pd.DataFrame(data) with pytest.raises(NotImplementedError, match=msg): - snow_df.applymap("I am a {}".format, na_action="ignore") + getattr(snow_df, method)("I am a {}".format, na_action="ignore") @pytest.mark.parametrize("invalid_input", ["min", [np.min], {"a": np.max}]) From 887cb80e1ef50ba9cbecc54eaa08651116883c84 Mon Sep 17 00:00:00 2001 From: Naresh Kumar Date: Fri, 13 Dec 2024 10:25:55 -0800 Subject: [PATCH 3/3] enable more tests --- .../snowpark/modin/plugin/extensions/dataframe_overrides.py | 2 +- tests/integ/modin/frame/test_applymap.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py index d6ceffd8c47..ac2a5d02329 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py @@ -140,7 +140,7 @@ def decorator(base_method: Any): def _map(self, func: PythonFuncType, na_action: str | None = None, **kwargs): # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions if not callable(func): - raise TypeError(f"{func} is not callable") + raise TypeError(f"{func} is not callable") # pragma: no cover return self.__constructor__( query_compiler=self._query_compiler.applymap( func, na_action=na_action, **kwargs diff --git a/tests/integ/modin/frame/test_applymap.py b/tests/integ/modin/frame/test_applymap.py index 8755b61d676..4486aea7a33 100644 --- a/tests/integ/modin/frame/test_applymap.py +++ b/tests/integ/modin/frame/test_applymap.py @@ -134,13 +134,13 @@ def test_applymap_na_action_ignore(method): @pytest.mark.parametrize("invalid_input", ["min", [np.min], {"a": np.max}]) @sql_count_checker(query_count=0) -def test_applymap_invalid_input(invalid_input): +def test_applymap_invalid_input(invalid_input, method): snow_df = pd.DataFrame([1]) native_df = native_pd.DataFrame([1]) eval_snowpark_pandas_result( snow_df, native_df, - lambda x: x.applymap(invalid_input), + lambda x: getattr(x, method)(invalid_input), expect_exception=True, expect_exception_match="is not callable", assert_exception_equal=False,