diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 41aeb8a882b..e33ee6f73d2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -105,4 +105,4 @@ repos: - types-pyOpenSSL - types-setuptools - pytest - - numpy < 2.0.0 + - numpy diff --git a/setup.py b/setup.py index 6e3670755aa..9cdbfb97ce0 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,6 @@ PANDAS_REQUIREMENTS = [ f"snowflake-connector-python[pandas]{CONNECTOR_DEPENDENCY_VERSION}", - "numpy<2.0.0", ] MODIN_REQUIREMENTS = [ *PANDAS_REQUIREMENTS, diff --git a/src/snowflake/snowpark/mock/_pandas_util.py b/src/snowflake/snowpark/mock/_pandas_util.py index 71608a6e837..49206ae4485 100644 --- a/src/snowflake/snowpark/mock/_pandas_util.py +++ b/src/snowflake/snowpark/mock/_pandas_util.py @@ -68,7 +68,7 @@ def _extract_schema_and_data_from_pandas_df( for col_idx in range(data.shape[1]): if plain_data[row_idx][col_idx] is None: continue - if isinstance(plain_data[row_idx][col_idx], (float, numpy.float_)): + if isinstance(plain_data[row_idx][col_idx], (float, numpy.float64)): # in pandas, a float is represented in type numpy.float64 # which can not be inferred by snowpark python, we cast to built-in float type if math.isnan(plain_data[row_idx][col_idx]): @@ -116,7 +116,7 @@ def _extract_schema_and_data_from_pandas_df( elif isinstance(plain_data[row_idx][col_idx], pd.Interval): def convert_to_python_obj(obj): - if isinstance(obj, numpy.float_): + if isinstance(obj, numpy.float64): return float(obj) elif isinstance(obj, numpy.int64): # on Windows, numpy.int64 and numpy.int_ are different diff --git a/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py index 0b30930d9cf..0224543f0dc 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py @@ -92,7 +92,6 @@ (np.half, FloatType()), (np.float16, FloatType()), (np.float64, DoubleType()), - (np.float_, DoubleType()), (np.object_, VariantType()), (np.bool_, BooleanType()), ("datetime64[ns]", TimestampType()), diff --git a/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py index cea4cb8d082..e6bf0486c0e 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py @@ -233,7 +233,7 @@ def _prepare_unpivot_internal( # dataframe is used to show the intermediate results of the dataframe at each step # using the melt operation (unpivot). # - # data = {"abc": ["A", "B", np.NaN], "123": [1, np.NaN, 3], "state": ["CA", "WA", "NY"]} + # data = {"abc": ["A", "B", np.nan], "123": [1, np.nan, 3], "state": ["CA", "WA", "NY"]} # index = npd.MultiIndex.from_tuples([("one", "there"), ("two", "be"), ("two", "dragons")], # names=["L1", "L2"]) # df = npd.DataFrame(data, index=index) diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 070f0466e88..55f6ba23630 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -13096,7 +13096,7 @@ def output_col( if np.isnan(n): # Follow pandas behavior - return pandas_lit(np.NaN) + return pandas_lit(np.nan) elif n <= 0: # If all possible splits are requested, we just use SQL's split function. new_col = builtin("split")(new_col, pandas_lit(new_pat)) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index 6bb35fee769..da6fd41b1cb 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -1000,7 +1000,7 @@ def dropna(): Empty strings are not considered NA values. ``None`` is considered an NA value. - >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay']) + >>> ser = pd.Series([np.nan, 2, pd.NaT, '', None, 'I stay']) >>> ser # doctest: +NORMALIZE_WHITESPACE 0 None 1 2 diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py index 704dc80703c..7a8504fe8c7 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py @@ -192,7 +192,7 @@ def contains(): -------- Returning a Series of booleans using only a literal pattern. - >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN]) + >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.nan]) >>> s1.str.contains('og', regex=False) 0 False 1 True @@ -203,7 +203,7 @@ def contains(): Returning an Index of booleans using only a literal pattern. - >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN]) + >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.nan]) >>> ind.str.contains('23', regex=False) Index([False, False, False, True, None], dtype='object') diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py index 5e113511f82..79843777236 100644 --- a/tests/integ/modin/binary/test_binary_op.py +++ b/tests/integ/modin/binary/test_binary_op.py @@ -2305,8 +2305,8 @@ def test_binary_add_dataframe_and_series_duplicate_labels_negative(df, s): ), # test with np.Nan as well ( - native_pd.DataFrame([[np.NaN, None, 3], [4, 5, 6]]), - native_pd.DataFrame([[1, -2, 3], [6, -5, np.NaN]]), + native_pd.DataFrame([[np.nan, None, 3], [4, 5, 6]]), + native_pd.DataFrame([[1, -2, 3], [6, -5, np.nan]]), ), # Test column alignment. ( diff --git a/tests/integ/modin/frame/test_melt.py b/tests/integ/modin/frame/test_melt.py index fba6ce6268d..68d25b1e482 100644 --- a/tests/integ/modin/frame/test_melt.py +++ b/tests/integ/modin/frame/test_melt.py @@ -25,7 +25,7 @@ ) data = [ - {"frame": {"abc": ["A", np.NaN, "C"], "123": ["1", "2", np.NaN]}, "kargs": {}}, + {"frame": {"abc": ["A", np.nan, "C"], "123": ["1", "2", np.nan]}, "kargs": {}}, {"frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]}, "kargs": {}}, {"frame": {"abc": ["A", "B", "C"], "123": [1, 2, 3]}, "kargs": {}}, {"frame": {"123": [1, 2, 3], "456": [4, 5, 6]}, "kargs": {}}, @@ -91,8 +91,8 @@ }, "kargs": {}, }, - {"frame": {"abc": ["A", np.NaN, np.NaN], "123": [np.NaN, "2", "3"]}, "kargs": {}}, - {"frame": {"abc": ["A", np.NaN, np.NaN], "123": [np.NaN, 2, 3]}, "kargs": {}}, + {"frame": {"abc": ["A", np.nan, np.nan], "123": [np.nan, "2", "3"]}, "kargs": {}}, + {"frame": {"abc": ["A", np.nan, np.nan], "123": [np.nan, 2, 3]}, "kargs": {}}, ] @@ -286,8 +286,8 @@ def test_everything(): [("one", "there"), ("two", "be"), ("two", "dragons")], names=["L1", "L2"] ) data = { - "abc": ["A", "B", np.NaN], - "123": [1, np.NaN, 3], + "abc": ["A", "B", np.nan], + "123": [1, np.nan, 3], "state": ["CA", "WA", "NY"], } native_df = npd.DataFrame(data, index=index) diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py index 08c14d7bd2f..7ac88042e7f 100644 --- a/tests/integ/modin/frame/test_merge.py +++ b/tests/integ/modin/frame/test_merge.py @@ -32,7 +32,7 @@ def left_df(): { "A": [3, 2, 1, 4, 4], "B": [2, 3, 1, 2, 1], - "left_c": [1.0, 2.0, 3.0, 4.0, np.NaN], + "left_c": [1.0, 2.0, 3.0, 4.0, np.nan], "left_d": [None, "d", "a", "c", "b"], }, index=pd.Index([0, 1, 3, 2, 4], name="left_i"), @@ -61,7 +61,7 @@ def right_df(): { "A": [4, 3, 1, 4, 4], "B": [3, 4, 2, 1, 1], - "right_c": [2.0, 1.0, 4.0, 0.0, np.NaN], + "right_c": [2.0, 1.0, 4.0, 0.0, np.nan], "right_d": ["c", "d", "a", "b", None], }, index=pd.Index([8, 4, 2, 9, 1], name="right_i"), @@ -335,7 +335,7 @@ def test_join_type_mismatch_negative(index1, index2): [3, 4], [True, False], native_pd.DataFrame( - {"A": [np.NaN, 1.0, 2.0], "B": [4, 3, 3]}, + {"A": [np.nan, 1.0, 2.0], "B": [4, 3, 3]}, index=native_pd.Index([False, True, True]), ), ), @@ -345,7 +345,7 @@ def test_join_type_mismatch_negative(index1, index2): ["a", "b"], [True, False], native_pd.DataFrame( - {"A": [1.0, 2.0, np.NaN, np.NaN], "B": [np.NaN, np.NaN, 4.0, 3.0]}, + {"A": [1.0, 2.0, np.nan, np.nan], "B": [np.nan, np.nan, 4.0, 3.0]}, index=native_pd.Index(["a", "b", "false", "true"]), ), ), diff --git a/tests/integ/modin/frame/test_replace.py b/tests/integ/modin/frame/test_replace.py index 1cca1539515..3b8cfc59fdd 100644 --- a/tests/integ/modin/frame/test_replace.py +++ b/tests/integ/modin/frame/test_replace.py @@ -31,7 +31,7 @@ def snow_df(): ("one", None), # scalar -> None (pd.NA, "ONE"), # NULL -> scalar (pd.NaT, "ONE"), # NULL -> scalar - (np.NaN, "ONE"), # NULL -> scalar + (np.nan, "ONE"), # NULL -> scalar (["one"], ["ONE"]), # list -> list ("four", "FOUR"), # no matching value (["one", "two"], ["two", "one"]), # swap values diff --git a/tests/integ/modin/frame/test_skew.py b/tests/integ/modin/frame/test_skew.py index 5719d7231a1..72fad6cebdc 100644 --- a/tests/integ/modin/frame/test_skew.py +++ b/tests/integ/modin/frame/test_skew.py @@ -38,9 +38,9 @@ def test_skew_basic(): { "frame": { "A": [1, 2, 3], - "B": [2, np.NaN, 4], - "C": [1, 2, np.NaN], - "D": [np.NaN, np.NaN, 3], + "B": [2, np.nan, 4], + "C": [1, 2, np.nan], + "D": [np.nan, np.nan, 3], }, "kwargs": {"skipna": True}, }, @@ -48,7 +48,7 @@ def test_skew_basic(): "frame": { "A": [1, 2, 3], "B": ["a", "b", "c"], - "C": [1, 2, np.NaN], + "C": [1, 2, np.nan], "D": ["x", "y", "z"], }, "kwargs": {"numeric_only": True}, @@ -57,7 +57,7 @@ def test_skew_basic(): "frame": { "A": [1, 2, 3], "B": ["a", "b", "c"], - "C": [1, 2, np.NaN], + "C": [1, 2, np.nan], "D": ["x", "y", "z"], }, "kwargs": {"numeric_only": True, "skipna": True}, @@ -86,7 +86,7 @@ def test_skew(data): "frame": { "A": [1, 2, 3], "B": ["a", "b", "c"], - "C": [1, 2, np.NaN], + "C": [1, 2, np.nan], "D": ["x", "y", "z"], }, "kwargs": {"numeric_only": False}, diff --git a/tests/integ/modin/series/test_astype.py b/tests/integ/modin/series/test_astype.py index ff69e2d4944..1c65052afa5 100644 --- a/tests/integ/modin/series/test_astype.py +++ b/tests/integ/modin/series/test_astype.py @@ -56,8 +56,8 @@ def basic_types(): EXTENSION_TYPE_TO_NUMPY_DTYPE = { "boolean": np.bool_, - Float32Dtype(): np.float_, - Float64Dtype(): np.float_, + Float32Dtype(): np.float64, + Float64Dtype(): np.float64, Int64Dtype(): np.int64, UInt64Dtype(): np.uint64, Int32Dtype(): np.int32, @@ -142,7 +142,7 @@ def test_astype_basic(from_dtype, to_dtype): ) if from_dtype in ( float, - np.float_, + np.float64, np.float16, np.float32, Float32Dtype(), diff --git a/tests/integ/modin/series/test_dropna.py b/tests/integ/modin/series/test_dropna.py index bb59fe910d3..6c7c23ccd10 100644 --- a/tests/integ/modin/series/test_dropna.py +++ b/tests/integ/modin/series/test_dropna.py @@ -20,7 +20,7 @@ "sample, expected_query_count", ( ([1.0, 2.0, np.nan], 1), - ([np.NaN, 2, pd.NaT, "", None, "I stay"], 1), + ([np.nan, 2, pd.NaT, "", None, "I stay"], 1), ), ) def test_basic(sample, expected_query_count): diff --git a/tests/integ/modin/series/test_replace.py b/tests/integ/modin/series/test_replace.py index e4b970cfc3a..2ef76b32e83 100644 --- a/tests/integ/modin/series/test_replace.py +++ b/tests/integ/modin/series/test_replace.py @@ -29,7 +29,7 @@ def snow_series(): ("one", None), # scalar -> None (pd.NA, "ONE"), # NULL -> scalar (pd.NaT, "ONE"), # NULL -> scalar - (np.NaN, "ONE"), # NULL -> scalar + (np.nan, "ONE"), # NULL -> scalar (["one"], ["ONE"]), # list -> list ("four", "FOUR"), # no matching value (["one", "two"], ["two", "one"]), # swap values diff --git a/tests/integ/modin/series/test_str_accessor.py b/tests/integ/modin/series/test_str_accessor.py index 5f832391d54..4d2400ecb2d 100644 --- a/tests/integ/modin/series/test_str_accessor.py +++ b/tests/integ/modin/series/test_str_accessor.py @@ -367,7 +367,7 @@ def test_str_replace_neg(pat, n, repl, error): @pytest.mark.parametrize("pat", [None, "a", "|", "%"]) -@pytest.mark.parametrize("n", [None, np.NaN, 3, 2, 1, 0, -1, -2]) +@pytest.mark.parametrize("n", [None, np.nan, 3, 2, 1, 0, -1, -2]) @sql_count_checker(query_count=1) def test_str_split(pat, n): native_ser = native_pd.Series(TEST_DATA) diff --git a/tests/integ/modin/utils.py b/tests/integ/modin/utils.py index b74f5bf2ef8..2155c8a78b1 100644 --- a/tests/integ/modin/utils.py +++ b/tests/integ/modin/utils.py @@ -131,7 +131,7 @@ TEST_DF_DATA = { "float_nan_data": { f"col{int((i - NCOLS / 2) % NCOLS + 1)}": [ - x if (j != i and j - 2 != i and j + 2 != i) else np.NaN + x if (j != i and j - 2 != i and j + 2 != i) else np.nan for j, x in enumerate( random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS)) ) diff --git a/tests/integ/test_udf.py b/tests/integ/test_udf.py index a81d590b430..a7de272dc57 100644 --- a/tests/integ/test_udf.py +++ b/tests/integ/test_udf.py @@ -1899,6 +1899,7 @@ def return_type_in_dataframe(x): [[True]], ( "", + "", "", ), ("bool",),