SNOW-1852900: Support Cortex function Sentiment in apply (#2742)

1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR.  Fixes SNOW-1852900 2. Fill out the following pre-review checklist: - [x] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. - [x] I acknowledge that I have ensured my changes to be thread-safe. Follow the link for more information: [Thread-safe Developer Guidelines](https://github.com/snowflakedb/snowpark-python/blob/main/CONTRIBUTING.md#thread-safe-development) 3. Please describe how your code solves the related issue. Support Cortex function Sentiment in apply. --------- Signed-off-by: Labanya Mukhopadhyay <[email protected]>
snowflakedb · Dec 13, 2024 · 71843f3 · 71843f3
1 parent b6e4553
commit 71843f3
Show file tree

Hide file tree

Showing 6 changed files with 93 additions and 18 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,13 +2,20 @@
 
 ## 1.27.0 (TBD)
 
+### Snowpark Python API Updates
+
+#### New Features
+
+- Added support for function `snowflake_cortex_sentiment` in `functions.py`.
+
 ### Snowpark pandas API Updates
 
 #### New Features
 
 - Added support for `Series.str.ljust` and `Series.str.rjust`.
 - Added support for `Series.str.center`.
 - Added support for `Series.str.pad`.
+- Added support for applying Snowpark Python function `snowflake_cortex_sentiment`.
 
 
 ## 1.26.0 (2024-12-05)

diff --git a/docs/source/snowpark/functions.rst b/docs/source/snowpark/functions.rst
@@ -260,6 +260,7 @@ Functions
     sinh
     size
     skew
+    snowflake_cortex_sentiment
     snowflake_cortex_summarize
     sort_array
     soundex

diff --git a/src/snowflake/snowpark/functions.py b/src/snowflake/snowpark/functions.py
@@ -10159,3 +10159,25 @@ def snowflake_cortex_summarize(text: ColumnOrLiteralStr):
     sql_func_name = "snowflake.cortex.summarize"
     text_col = _to_col_if_lit(text, sql_func_name)
     return builtin(sql_func_name)(text_col)
+
+
+def snowflake_cortex_sentiment(text: ColumnOrLiteralStr):
+    """
+    A string containing the text for which a sentiment score should be calculated.
+
+    Args:
+        text: A string containing the English text from which a summary should be generated.
+    Returns:
+        A floating-point number from -1 to 1 (inclusive) indicating the level of negative or positive sentiment in the
+        text. Values around 0 indicate neutral sentiment.
+
+    Example::
+
+        >>> content = "A very very bad review!"
+        >>> df = session.create_dataframe([[content]], schema=["content"])
+        >>> result = df.select(snowflake_cortex_sentiment(content)).collect()[0][0]
+        >>> assert -1 <= result <= 0
+    """
+    sql_func_name = "snowflake.cortex.sentiment"
+    text_col = _to_col_if_lit(text, sql_func_name)
+    return builtin(sql_func_name)(text_col)
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py
@@ -35,6 +35,7 @@
     _log2,
     _log10,
     sin,
+    snowflake_cortex_sentiment,
     snowflake_cortex_summarize,
     udf,
     to_variant,
@@ -110,6 +111,7 @@
     floor,
     trunc,
     sqrt,
+    snowflake_cortex_sentiment,
     snowflake_cortex_summarize,
 }
 

diff --git a/tests/integ/modin/test_apply_snowpark_python_functions.py b/tests/integ/modin/test_apply_snowpark_python_functions.py
@@ -10,7 +10,7 @@
 import pytest
 
 from tests.integ.modin.utils import assert_frame_equal, assert_series_equal
-from tests.integ.utils.sql_counter import sql_count_checker
+from tests.integ.utils.sql_counter import sql_count_checker, SqlCounter
 
 
 @sql_count_checker(query_count=4)
@@ -71,21 +71,40 @@ def test_apply_snowpark_python_function_not_implemented():
         pd.DataFrame({"a": [1, 2, 3]}).apply(asc, args=(1, 2))
 
 
-@sql_count_checker(query_count=1)
-@pytest.mark.skip("SNOW-1758914 snowflake.cortex.summarize error on GCP")
-def test_apply_snowflake_cortex_summarize():
+def test_apply_snowflake_cortex_summarize(session):
     from snowflake.snowpark.functions import snowflake_cortex_summarize
 
-    content = """pandas on Snowflake lets you run your pandas code in a distributed manner directly on your data in
-    Snowflake. Just by changing the import statement and a few lines of code, you can get the familiar pandas experience
-    you know and love with the scalability and security benefits of Snowflake. With pandas on Snowflake, you can work
-    with much larger datasets and avoid the time and expense of porting your pandas pipelines to other big data
-    frameworks or provisioning large and expensive machines. It runs workloads natively in Snowflake through
-    transpilation to SQL, enabling it to take advantage of parallelization and the data governance and security
-    benefits of Snowflake. pandas on Snowflake is delivered through the Snowpark pandas API as part of the Snowpark
-    Python library, which enables scalable data processing of Python code within the Snowflake platform.
-"""
-    s = pd.Series([content])
-    summary = s.apply(snowflake_cortex_summarize).iloc[0]
-    # this length check is to get around the fact that this function may not be deterministic
-    assert 0 < len(summary) < len(content)
+    # TODO: SNOW-1758914 snowflake.cortex.summarize error on GCP
+    with SqlCounter(query_count=0):
+        if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
+            return
+
+    with SqlCounter(query_count=1):
+        content = """pandas on Snowflake lets you run your pandas code in a distributed manner directly on your data in
+        Snowflake. Just by changing the import statement and a few lines of code, you can get the familiar pandas experience
+        you know and love with the scalability and security benefits of Snowflake. With pandas on Snowflake, you can work
+        with much larger datasets and avoid the time and expense of porting your pandas pipelines to other big data
+        frameworks or provisioning large and expensive machines. It runs workloads natively in Snowflake through
+        transpilation to SQL, enabling it to take advantage of parallelization and the data governance and security
+        benefits of Snowflake. pandas on Snowflake is delivered through the Snowpark pandas API as part of the Snowpark
+        Python library, which enables scalable data processing of Python code within the Snowflake platform.
+        """
+        s = pd.Series([content])
+        summary = s.apply(snowflake_cortex_summarize).iloc[0]
+        # this length check is to get around the fact that this function may not be deterministic
+        assert 0 < len(summary) < len(content)
+
+
+def test_apply_snowflake_cortex_sentiment(session):
+    from snowflake.snowpark.functions import snowflake_cortex_sentiment
+
+    # TODO: SNOW-1758914 snowflake.cortex.sentiment error on GCP
+    with SqlCounter(query_count=0):
+        if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
+            return
+
+    with SqlCounter(query_count=1):
+        content = "A very very bad review!"
+        s = pd.Series([content])
+        sentiment = s.apply(snowflake_cortex_sentiment).iloc[0]
+        assert -1 <= sentiment <= 0
diff --git a/tests/integ/test_function.py b/tests/integ/test_function.py
@@ -127,6 +127,7 @@
     reverse,
     sequence,
     size,
+    snowflake_cortex_sentiment,
     snowflake_cortex_summarize,
     split,
     sqrt,
@@ -2272,8 +2273,11 @@ def test_ln(session):
     "config.getoption('local_testing_mode', default=False)",
     reason="FEAT: snowflake_cortex functions not supported",
 )
-@pytest.mark.skip("SNOW-1758914 snowflake.cortex.summarize error on GCP")
 def test_snowflake_cortex_summarize(session):
+    # TODO: SNOW-1758914 snowflake.cortex.summarize error on GCP
+    if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
+        return
+
     content = """In Snowpark, the main way in which you query and process data is through a DataFrame. This topic explains how to work with DataFrames.
 
 To retrieve and manipulate data, you use the DataFrame class. A DataFrame represents a relational dataset that is evaluated lazily: it only executes when a specific action is triggered. In a sense, a DataFrame is like a query that needs to be evaluated in order to retrieve data.
@@ -2302,3 +2306,23 @@ def test_snowflake_cortex_summarize(session):
     # this length check is to get around the fact that this function may not be deterministic
     assert 0 < len(summary_from_col) < len(content)
     assert 0 < len(summary_from_str) < len(content)
+
+
+@pytest.mark.skipif(
+    "config.getoption('local_testing_mode', default=False)",
+    reason="FEAT: snowflake_cortex functions not supported",
+)
+def test_apply_snowflake_cortex_sentiment(session):
+    # TODO: SNOW-1758914 snowflake.cortex.sentiment error on GCP
+    if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
+        return
+    content = "A very very bad review!"
+    df = session.create_dataframe([[content]], schema=["content"])
+
+    sentiment_from_col = df.select(
+        snowflake_cortex_sentiment(col("content"))
+    ).collect()[0][0]
+    sentiment_from_str = df.select(snowflake_cortex_sentiment(content)).collect()[0][0]
+
+    assert -1 <= sentiment_from_col <= 0
+    assert -1 <= sentiment_from_str <= 0
-Original file line number
+Diff line change
@@ Expand Up / @@ -260,6 +260,7 @@ Functions @@
         sinh
         size
         skew
+        snowflake_cortex_sentiment
         snowflake_cortex_summarize
         sort_array
         soundex
@@ Expand Down @@