From fff7d0535679955b560d00a0b745857e332c9c9e Mon Sep 17 00:00:00 2001 From: sfc-gh-mvashishtha Date: Mon, 16 Sep 2024 11:51:21 -0700 Subject: [PATCH 1/4] SNOW-1664064: Avoid SettingWithCopyW for Timedelta.rning. Signed-off-by: sfc-gh-mvashishtha --- CHANGELOG.md | 2 ++ .../snowpark/modin/plugin/_internal/utils.py | 2 +- tests/integ/modin/types/test_timedelta.py | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bd719dcb8c..66ec30ade36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ - Added support for some cases of aggregating `Timedelta` columns on `axis=0` with `agg` or `aggregate`. - Added support for `by`, `left_by`, and `right_by` for `pd.merge_asof`. +#### Bug Fixes +- Fixed an unhelpful `SettingWithCopyWarning` that sometimes appeared when printing `Timedelta` columns. ## 1.22.1 (2024-09-11) This is a re-release of 1.22.0. Please refer to the 1.22.0 release notes for detailed release content. diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py index 70025fd8b0a..9c955d90f6d 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py @@ -1519,7 +1519,7 @@ def convert_str_to_timedelta(x: str) -> pd.Timedelta: downcast_pandas_df.columns, cached_snowpark_pandas_types ): if snowpark_pandas_type is not None and snowpark_pandas_type == timedelta_t: - downcast_pandas_df[pandas_label] = pandas_df[pandas_label].apply( + downcast_pandas_df.loc[:, pandas_label] = pandas_df[pandas_label].apply( convert_str_to_timedelta ) diff --git a/tests/integ/modin/types/test_timedelta.py b/tests/integ/modin/types/test_timedelta.py index 4c72df42bba..fe51165e779 100644 --- a/tests/integ/modin/types/test_timedelta.py +++ b/tests/integ/modin/types/test_timedelta.py @@ -107,3 +107,14 @@ def test_timedelta_not_supported(): match="SnowflakeQueryCompiler::groupby_groups is not yet implemented for Timedelta Type", ): df.groupby("a").groups() + + +@sql_count_checker(query_count=1) +def test_aggregation_does_not_print_internal_warning_SNOW_1664064(): + import warnings + + from pandas.errors import SettingWithCopyWarning + + with warnings.catch_warnings(): + warnings.simplefilter(category=SettingWithCopyWarning, action="error") + pd.Series(pd.Timedelta(1)).max() From 849248347b350f716de5b1d8310dafa07ecded8e Mon Sep 17 00:00:00 2001 From: sfc-gh-mvashishtha Date: Mon, 16 Sep 2024 14:11:19 -0700 Subject: [PATCH 2/4] Fix tests Signed-off-by: sfc-gh-mvashishtha --- CHANGELOG.md | 2 +- .../snowpark/modin/plugin/_internal/utils.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66ec30ade36..a987df71d33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ - Added support for `by`, `left_by`, and `right_by` for `pd.merge_asof`. #### Bug Fixes -- Fixed an unhelpful `SettingWithCopyWarning` that sometimes appeared when printing `Timedelta` columns. +- Suppressed an unhelpful `SettingWithCopyWarning` that sometimes appeared when printing `Timedelta` columns. ## 1.22.1 (2024-09-11) This is a re-release of 1.22.0. Please refer to the 1.22.0 release notes for detailed release content. diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py index 9c955d90f6d..34a3376fcc1 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py @@ -1519,9 +1519,15 @@ def convert_str_to_timedelta(x: str) -> pd.Timedelta: downcast_pandas_df.columns, cached_snowpark_pandas_types ): if snowpark_pandas_type is not None and snowpark_pandas_type == timedelta_t: - downcast_pandas_df.loc[:, pandas_label] = pandas_df[pandas_label].apply( - convert_str_to_timedelta - ) + # By default, pandas warns, "A value is trying to be set on a + # copy of a slice from a DataFrame" here because we are + # assigning a column to downcast_pandas_df, which is a copy of + # a slice of pandas_df. We don't care what happens to pandas_df, + # so the warning isn't useful to us. + with native_pd.option_context("mode.chained_assignment", None): + downcast_pandas_df[pandas_label] = pandas_df[pandas_label].apply( + convert_str_to_timedelta + ) # Step 7. postprocessing for return types if index_only: From b1a0ea5bcc1096fc97f3299532465a796d3bbe06 Mon Sep 17 00:00:00 2001 From: sfc-gh-mvashishtha Date: Mon, 16 Sep 2024 14:12:55 -0700 Subject: [PATCH 3/4] Fix test Signed-off-by: sfc-gh-mvashishtha --- tests/integ/modin/types/test_timedelta.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integ/modin/types/test_timedelta.py b/tests/integ/modin/types/test_timedelta.py index fe51165e779..d28362374ce 100644 --- a/tests/integ/modin/types/test_timedelta.py +++ b/tests/integ/modin/types/test_timedelta.py @@ -2,10 +2,12 @@ # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # import datetime +import warnings import modin.pandas as pd import pandas as native_pd import pytest +from pandas.errors import SettingWithCopyWarning from tests.integ.modin.sql_counter import sql_count_checker from tests.integ.modin.utils import ( @@ -111,10 +113,6 @@ def test_timedelta_not_supported(): @sql_count_checker(query_count=1) def test_aggregation_does_not_print_internal_warning_SNOW_1664064(): - import warnings - - from pandas.errors import SettingWithCopyWarning - with warnings.catch_warnings(): warnings.simplefilter(category=SettingWithCopyWarning, action="error") pd.Series(pd.Timedelta(1)).max() From 8ef2ac16737328264ac9ceae7a141f7381c573d9 Mon Sep 17 00:00:00 2001 From: sfc-gh-mvashishtha Date: Tue, 17 Sep 2024 09:14:04 -0700 Subject: [PATCH 4/4] Revert deleted extra line Signed-off-by: sfc-gh-mvashishtha --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 82dd5f7895b..daedfe34659 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - Added support for `by`, `left_by`, and `right_by` for `pd.merge_asof`. #### Bug Fixes + - Fixed a bug where an `Index` object created from a `Series`/`DataFrame` incorrectly updates the `Series`/`DataFrame`'s index name after an inplace update has been applied to the original `Series`/`DataFrame`. - Suppressed an unhelpful `SettingWithCopyWarning` that sometimes appeared when printing `Timedelta` columns.