diff --git a/CHANGELOG.md b/CHANGELOG.md index e1d0674e0b2..e9d739d7fb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,14 @@ ### New Features - Added support for an optional `date_part` argument in function `last_day` -### Bug Fixes +## 1.12.1 (TBD) -- Fixed a bug in `DataFrame.to_local_iterator` where the iterator could yield wrong results if another query is executed before the iterator finishes due to wrong isolation level. For details, please see #945. +### New Features ### Bug Fixes +- Fixed a bug in `DataFrame.to_pandas` that caused an error when evaluating on a dataframe with an IntergerType column with null values. +- Fixed a bug in `DataFrame.to_local_iterator` where the iterator could yield wrong results if another query is executed before the iterator finishes due to wrong isolation level. For details, please see #945. - Fixed a bug that truncated table names in error messages while running a plan with local testing enabled. ## 1.12.0 (2024-01-30) diff --git a/src/snowflake/snowpark/_internal/server_connection.py b/src/snowflake/snowpark/_internal/server_connection.py index 252b152213e..bb2ad8e8b73 100644 --- a/src/snowflake/snowpark/_internal/server_connection.py +++ b/src/snowflake/snowpark/_internal/server_connection.py @@ -710,7 +710,7 @@ def _fix_pandas_df_fixed_type( # we try to strictly use astype("int64") in this scenario. If the values are too large to # fit in int64, an OverflowError is thrown and we rely on to_numeric to choose and appropriate # floating datatype to represent the number. - if column_metadata.precision > 10: + if column_metadata.precision > 10 and not pd_df[pandas_col_name].hasnans: try: pd_df[pandas_col_name] = pd_df[pandas_col_name].astype("int64") except OverflowError: diff --git a/tests/integ/test_df_to_pandas.py b/tests/integ/test_df_to_pandas.py index 6dd1ecb4345..438fc970fb3 100644 --- a/tests/integ/test_df_to_pandas.py +++ b/tests/integ/test_df_to_pandas.py @@ -191,6 +191,22 @@ def check_fetch_data_exception(query: str) -> None: assert df._plan.queries[2].sql.strip().startswith("SELECT") isinstance(df.toPandas(), PandasDF) +def test_to_pandas_for_int_column_with_none_values(session): + # Assert that we try to fit into int64 when possible and keep precision + data = [ + [0], + [1], + [None] + ] + schema = ["A"] + df = session.create_dataframe(data, schema) + + pdf = df.to_pandas() + assert pdf["A"][0] == 0 + assert pdf["A"][1] == 1 + assert pd.isna(pdf["A"][2]) + assert pdf["A"].dtype == "float64" + @pytest.mark.skipif( IS_IN_STORED_PROC, reason="SNOW-507565: Need localaws for large result"