From 156d621458f4ab9db2fc9b961ae21c2b28bf30e5 Mon Sep 17 00:00:00 2001 From: Shixuan Fan Date: Wed, 15 Nov 2023 16:34:15 -0800 Subject: [PATCH] SNOW-971549 Fix test_to_pandas_cast_integer for new pyarrow/pandas (#1136) Description Starting from pyarrow 13, pyarrow no longer coerces datetime to nanosecond if pandas version is >= 2.0. So we need to adapt our test. Testing integ test --- tests/integ/test_df_to_pandas.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/integ/test_df_to_pandas.py b/tests/integ/test_df_to_pandas.py index 7892e8d8fee..b21a20e20d5 100644 --- a/tests/integ/test_df_to_pandas.py +++ b/tests/integ/test_df_to_pandas.py @@ -14,6 +14,11 @@ except ImportError: pytest.skip("Pandas is not available", allow_module_level=True) +try: + import pyarrow as pa +except ImportError: + pytest.skip("pyarrow is not available", allow_module_level=True) + from snowflake.snowpark._internal.utils import TempObjectType from snowflake.snowpark.exceptions import SnowparkFetchDataException @@ -88,7 +93,16 @@ def test_to_pandas_cast_integer(session, to_pandas_api): if to_pandas_api == "to_pandas" else next(timestamp_snowpark_df.to_pandas_batches()) ) - assert str(timestamp_pandas_df.dtypes[0]) == "datetime64[ns]" + # Starting from pyarrow 13, pyarrow no longer coerces non-nanosecond to nanosecond for pandas >=2.0 + # https://arrow.apache.org/release/13.0.0.html and https://github.com/apache/arrow/issues/33321 + pyarrow_major_version = int(pa.__version__.split(".")[0]) + pandas_major_version = int(pd.__version__.split(".")[0]) + expected_dtype = ( + "datetime64[s]" + if pyarrow_major_version >= 13 and pandas_major_version >= 2 + else "datetime64[ns]" + ) + assert str(timestamp_pandas_df.dtypes[0]) == expected_dtype def test_to_pandas_precision_for_number_38_0(session):