From 8a5520582b8bf230fbdcc21fc90f47d6b9194c35 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Wed, 14 Aug 2024 16:21:13 -0700 Subject: [PATCH 1/7] fix to_snowpark_pandas import --- src/snowflake/snowpark/dataframe.py | 8 +++-- tests/integ/test_df_to_snowpark_pandas.py | 39 +++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 tests/integ/test_df_to_snowpark_pandas.py diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index 539015ad907..8512e7c018e 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -1007,8 +1007,12 @@ def to_snowpark_pandas( B A 2 1 1 3 1 """ - import snowflake.snowpark.modin.pandas as pd # pragma: no cover - + # black and isort disagree on how to format this section with isort: skip + # fmt: off + import snowflake.snowpark.modin.plugin # isort: skip # noqa: F401 + # If snowflake.snowpark.modin.plugin was successfully imported, then modin.pandas is available + import modin.pandas as pd # isort: skip + # fmt: on # create a temporary table out of the current snowpark dataframe temporary_table_name = random_name_for_temp_object( TempObjectType.TABLE diff --git a/tests/integ/test_df_to_snowpark_pandas.py b/tests/integ/test_df_to_snowpark_pandas.py new file mode 100644 index 00000000000..659bc4a26e1 --- /dev/null +++ b/tests/integ/test_df_to_snowpark_pandas.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +# Tests behavior of to_snowpark_pandas() without explicitly initializing Snowpark pandas. + +import pytest + +from snowflake.snowpark._internal.utils import TempObjectType +from tests.utils import Utils + + +@pytest.fixture(scope="module") +def tmp_table_basic(session): + table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE) + Utils.create_table( + session, table_name, "id integer, foot_size float, shoe_model varchar" + ) + session.sql(f"insert into {table_name} values (1, 32.0, 'medium')").collect() + session.sql(f"insert into {table_name} values (2, 27.0, 'small')").collect() + session.sql(f"insert into {table_name} values (3, 40.0, 'large')").collect() + + try: + yield table_name + finally: + Utils.drop_table(session, table_name) + + +def test_to_snowpark_pandas_no_modin(session, tmp_table_basic): + snowpark_df = session.table(tmp_table_basic) + # Check if modin is installed + try: + import modin # noqa: F401 + + snowpark_df.to_snowpark_pandas() # should have no errors + except ModuleNotFoundError: + with pytest.raises(ModuleNotFoundError, match="Modin is not installed."): + snowpark_df.to_snowpark_pandas() From d4b306b5aab5c79f4afdb640fd0f930d633b372a Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Wed, 14 Aug 2024 17:24:26 -0700 Subject: [PATCH 2/7] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 093a6b0a6af..97f7399fe12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -98,6 +98,7 @@ - Fixed a bug in `DataFrame.lineage.trace` to split the quoted feature view's name and version correctly. - Fixed a bug in `Column.isin` that caused invalid sql generation when passed an empty list. - Fixed a bug that fails to raise NotImplementedError while setting cell with list like item. +- Fixed a bug where calling `DataFrame.to_snowpark_pandas_dataframe` without explicitly initializing the Snowpark pandas plugin caused an error. ### Snowpark Local Testing Updates From 5052ed582c44f0fad03b2ae86630c72d0635af94 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Tue, 20 Aug 2024 12:29:05 -0700 Subject: [PATCH 3/7] add test to snowpark pandas ci --- tests/integ/test_df_to_snowpark_pandas.py | 15 +++++++++++++-- tox.ini | 4 ++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/integ/test_df_to_snowpark_pandas.py b/tests/integ/test_df_to_snowpark_pandas.py index 659bc4a26e1..c57be18e642 100644 --- a/tests/integ/test_df_to_snowpark_pandas.py +++ b/tests/integ/test_df_to_snowpark_pandas.py @@ -10,6 +10,14 @@ from snowflake.snowpark._internal.utils import TempObjectType from tests.utils import Utils +pytestmark = [ + pytest.mark.xfail( + "config.getoption('local_testing_mode', default=False)", + reason="This is testing Snowpark pandas installation", + run=False, + ) +] + @pytest.fixture(scope="module") def tmp_table_basic(session): @@ -29,11 +37,14 @@ def tmp_table_basic(session): def test_to_snowpark_pandas_no_modin(session, tmp_table_basic): snowpark_df = session.table(tmp_table_basic) - # Check if modin is installed + # Check if modin is installed (if so, we're running in Snowpark pandas; if not, we're just in Snowpark Python) try: import modin # noqa: F401 snowpark_df.to_snowpark_pandas() # should have no errors except ModuleNotFoundError: - with pytest.raises(ModuleNotFoundError, match="Modin is not installed."): + with pytest.raises( + ModuleNotFoundError, + match=r"(Modin is not installed)|(does not match the supported pandas version in Modin)", + ): snowpark_df.to_snowpark_pandas() diff --git a/tox.ini b/tox.ini index d305c1a8bfa..a6ef16e6fe7 100644 --- a/tox.ini +++ b/tox.ini @@ -96,11 +96,11 @@ commands = local: {env:SNOWFLAKE_PYTEST_CMD} --local_testing_mode -m "integ or unit or mock" {posargs:} tests dailynotdoctest: {env:SNOWFLAKE_PYTEST_DAILY_CMD} -m "{env:SNOWFLAKE_TEST_TYPE} or udf" {posargs:} tests # Snowpark pandas commands: - snowparkpandasnotdoctest: {env:MODIN_PYTEST_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin + snowparkpandasnotdoctest: {env:MODIN_PYTEST_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin tests/integ/test_df_to_snowpark_pandas.py # This one only run doctest but we still need to include the tests folder to let tests/conftest.py to mark the doctest files for us snowparkpandasdoctest: {env:MODIN_PYTEST_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} src/snowflake/snowpark/modin/ tests/unit/modin # This one is used by daily_modin_precommit.yml - snowparkpandasdailynotdoctest: {env:MODIN_PYTEST_DAILY_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin + snowparkpandasdailynotdoctest: {env:MODIN_PYTEST_DAILY_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin tests/integ/test_df_to_snowpark_pandas.py # This one is only called by jenkins job and the only difference from `snowparkpandasnotdoctest` is that it uses # MODIN_PYTEST_NO_COV_CMD instead of MODIN_PYTEST_CMD snowparkpandasjenkins: {env:MODIN_PYTEST_NO_COV_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin From 94218ba23bd88931d3d7d713d88792878b724644 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Tue, 20 Aug 2024 13:33:02 -0700 Subject: [PATCH 4/7] fix pandas version mismatch warning --- tests/integ/test_df_to_snowpark_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integ/test_df_to_snowpark_pandas.py b/tests/integ/test_df_to_snowpark_pandas.py index c57be18e642..e0929a9d447 100644 --- a/tests/integ/test_df_to_snowpark_pandas.py +++ b/tests/integ/test_df_to_snowpark_pandas.py @@ -44,7 +44,7 @@ def test_to_snowpark_pandas_no_modin(session, tmp_table_basic): snowpark_df.to_snowpark_pandas() # should have no errors except ModuleNotFoundError: with pytest.raises( - ModuleNotFoundError, - match=r"(Modin is not installed)|(does not match the supported pandas version in Modin)", + (ModuleNotFoundError, RuntimeError), + match=r"(Modin is not installed)|(does not match the supported pandas version in Snowpark pandas)", ): snowpark_df.to_snowpark_pandas() From 2490c37c4e860a54eafd42734e417b2d57bf4a71 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Tue, 20 Aug 2024 16:36:41 -0700 Subject: [PATCH 5/7] shorten try/except --- tests/integ/test_df_to_snowpark_pandas.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/integ/test_df_to_snowpark_pandas.py b/tests/integ/test_df_to_snowpark_pandas.py index e0929a9d447..f4a678e35ee 100644 --- a/tests/integ/test_df_to_snowpark_pandas.py +++ b/tests/integ/test_df_to_snowpark_pandas.py @@ -41,10 +41,20 @@ def test_to_snowpark_pandas_no_modin(session, tmp_table_basic): try: import modin # noqa: F401 - snowpark_df.to_snowpark_pandas() # should have no errors + modin_installed = True except ModuleNotFoundError: + modin_installed = False + if modin_installed: + snowpark_df.to_snowpark_pandas() # should have no errors + else: + # Current Snowpark Python installs pandas==2.2.2, but Snowpark pandas depends on modin + # 0.28.1, which needs pandas==2.2.1. The pandas version check is currently performed + # before Snowpark pandas checks whether modin is installed. + # TODO: SNOW-1552497: after upgrading to modin 0.30.1, Snowpark pandas will support + # all pandas 2.2.x, and this function call will raise a ModuleNotFoundError since + # modin is not installed. with pytest.raises( - (ModuleNotFoundError, RuntimeError), - match=r"(Modin is not installed)|(does not match the supported pandas version in Snowpark pandas)", + RuntimeError, + match="does not match the supported pandas version in Snowpark pandas", ): snowpark_df.to_snowpark_pandas() From b5941bde5efd0b208cc99d681e18c340283e6b62 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Tue, 20 Aug 2024 16:38:36 -0700 Subject: [PATCH 6/7] use try/except/else --- tests/integ/test_df_to_snowpark_pandas.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/integ/test_df_to_snowpark_pandas.py b/tests/integ/test_df_to_snowpark_pandas.py index f4a678e35ee..05d51b1b38a 100644 --- a/tests/integ/test_df_to_snowpark_pandas.py +++ b/tests/integ/test_df_to_snowpark_pandas.py @@ -40,13 +40,7 @@ def test_to_snowpark_pandas_no_modin(session, tmp_table_basic): # Check if modin is installed (if so, we're running in Snowpark pandas; if not, we're just in Snowpark Python) try: import modin # noqa: F401 - - modin_installed = True except ModuleNotFoundError: - modin_installed = False - if modin_installed: - snowpark_df.to_snowpark_pandas() # should have no errors - else: # Current Snowpark Python installs pandas==2.2.2, but Snowpark pandas depends on modin # 0.28.1, which needs pandas==2.2.1. The pandas version check is currently performed # before Snowpark pandas checks whether modin is installed. @@ -58,3 +52,5 @@ def test_to_snowpark_pandas_no_modin(session, tmp_table_basic): match="does not match the supported pandas version in Snowpark pandas", ): snowpark_df.to_snowpark_pandas() + else: + snowpark_df.to_snowpark_pandas() # should have no errors From 660365c10a2dfbb5b3f1697c0e458dd803190079 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Mon, 26 Aug 2024 14:47:11 -0700 Subject: [PATCH 7/7] move changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97f7399fe12..d6b43baaa3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ - Fixed a bug in `session.read.csv` that caused an error when setting `PARSE_HEADER = True` in an externally defined file format. - Fixed a bug in query generation from set operations that allowed generation of duplicate queries when children have common subqueries. - Fixed a bug in `session.get_session_stage` that referenced a non-existing stage after switching database or schema. +- Fixed a bug where calling `DataFrame.to_snowpark_pandas_dataframe` without explicitly initializing the Snowpark pandas plugin caused an error. ### Snowpark Local Testing Updates @@ -98,7 +99,6 @@ - Fixed a bug in `DataFrame.lineage.trace` to split the quoted feature view's name and version correctly. - Fixed a bug in `Column.isin` that caused invalid sql generation when passed an empty list. - Fixed a bug that fails to raise NotImplementedError while setting cell with list like item. -- Fixed a bug where calling `DataFrame.to_snowpark_pandas_dataframe` without explicitly initializing the Snowpark pandas plugin caused an error. ### Snowpark Local Testing Updates