From 810cd76407a7bedc0ab57cdcb4850fc192d94cf7 Mon Sep 17 00:00:00 2001 From: Naresh Kumar Date: Fri, 13 Dec 2024 12:52:33 -0800 Subject: [PATCH] move map to numpy_to_pandas --- .../compiler/snowflake_query_compiler.py | 9 +++- .../modin/plugin/utils/numpy_to_pandas.py | 46 +++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 389c73e0287..5849dba0745 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -185,7 +185,6 @@ APPLY_LABEL_COLUMN_QUOTED_IDENTIFIER, APPLY_VALUE_COLUMN_QUOTED_IDENTIFIER, DEFAULT_UDTF_PARTITION_SIZE, - NUMPY_FUNCTION_TO_SNOWFLAKE_FUNCTION, GroupbyApplySortMethod, check_return_variant_and_get_return_type, create_udf_for_series_apply, @@ -373,6 +372,9 @@ from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage from snowflake.snowpark.modin.utils import MODIN_UNNAMED_SERIES_LABEL +from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import ( + NUMPY_FUNCTION_TO_SNOWFLAKE_FUNCTION, +) from snowflake.snowpark.session import Session from snowflake.snowpark.types import ( ArrayType, @@ -8760,7 +8762,10 @@ def applymap( # Check if the function is a known numpy function that can be translated to Snowflake function. sf_func = NUMPY_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(func) if sf_func is not None: - return self._apply_snowpark_python_function_to_columns(sf_func, kwargs) + # TODO SNOW-1739034: remove pragma no cover when apply tests are enabled in CI + return self._apply_snowpark_python_function_to_columns( + sf_func, kwargs + ) # pragma: no cover # Currently, NULL values are always passed into the udtf even if strict=True, # which is a bug on the server side SNOW-880105. diff --git a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py index ce8c02b7692..8faa9cdb350 100644 --- a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py +++ b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py @@ -6,6 +6,7 @@ import modin.pandas as pd from modin.pandas.base import BasePandasDataset from modin.pandas.utils import is_scalar +import numpy as np from snowflake.snowpark import functions as sp_func from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage @@ -288,3 +289,48 @@ def map_to_bools(inputs: Any) -> Any: sp_func.trunc ), # df.truncate not supported in snowpandas yet } + + +NUMPY_FUNCTION_TO_SNOWFLAKE_FUNCTION = { + # Math operations + np.absolute: sp_func.abs, + np.sign: sp_func.sign, + np.negative: sp_func.negate, + np.positive: lambda col: col, + np.sqrt: sp_func.sqrt, + np.square: lambda col: sp_func.builtin("square")(col), + np.cbrt: lambda col: sp_func.builtin("cbrt")(col), + np.reciprocal: lambda col: 1 / col, + np.exp: sp_func.exp, + np.exp2: lambda col: sp_func.pow(2, col), + np.expm1: lambda col: sp_func.exp(col) - 1, + np.log: sp_func.ln, + np.log2: sp_func._log2, + np.log10: sp_func._log10, + np.log1p: lambda col: sp_func.ln(col + 1), + # Aggregate functions translate to identity functions when applied element wise + np.sum: lambda col: col, + np.min: lambda col: col, + np.max: lambda col: col, + # Trigonometric functions + np.sin: sp_func.sin, + np.cos: sp_func.cos, + np.tan: sp_func.tan, + np.sinh: sp_func.sinh, + np.cosh: sp_func.cosh, + np.tanh: sp_func.tanh, + np.arcsin: lambda col: sp_func.builtin("asin")(col), + np.arccos: lambda col: sp_func.builtin("acos")(col), + np.arctan: lambda col: sp_func.builtin("atan")(col), + np.arctan2: lambda col: sp_func.builtin("atan2")(col), + np.arcsinh: lambda col: sp_func.builtin("asinh")(col), + np.arccosh: lambda col: sp_func.builtin("acosh")(col), + np.arctanh: lambda col: sp_func.builtin("atanh")(col), + np.degrees: lambda col: sp_func.builtin("degrees")(col), + np.radians: lambda col: sp_func.builtin("radians")(col), + # Floating functions + np.ceil: sp_func.ceil, + np.floor: sp_func.floor, + np.trunc: sp_func.trunc, + np.isnan: sp_func.is_null, +}