Skip to content

Commit

Permalink
SNOW-1487311: Fix test failures and lint issues with numpy 2 (#1791)
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-joshi authored Jun 18, 2024
1 parent ee410eb commit e076031
Show file tree
Hide file tree
Showing 19 changed files with 34 additions and 35 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,4 @@ repos:
- types-pyOpenSSL
- types-setuptools
- pytest
- numpy < 2.0.0
- numpy
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

PANDAS_REQUIREMENTS = [
f"snowflake-connector-python[pandas]{CONNECTOR_DEPENDENCY_VERSION}",
"numpy<2.0.0",
]
MODIN_REQUIREMENTS = [
*PANDAS_REQUIREMENTS,
Expand Down
4 changes: 2 additions & 2 deletions src/snowflake/snowpark/mock/_pandas_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _extract_schema_and_data_from_pandas_df(
for col_idx in range(data.shape[1]):
if plain_data[row_idx][col_idx] is None:
continue
if isinstance(plain_data[row_idx][col_idx], (float, numpy.float_)):
if isinstance(plain_data[row_idx][col_idx], (float, numpy.float64)):
# in pandas, a float is represented in type numpy.float64
# which can not be inferred by snowpark python, we cast to built-in float type
if math.isnan(plain_data[row_idx][col_idx]):
Expand Down Expand Up @@ -116,7 +116,7 @@ def _extract_schema_and_data_from_pandas_df(
elif isinstance(plain_data[row_idx][col_idx], pd.Interval):

def convert_to_python_obj(obj):
if isinstance(obj, numpy.float_):
if isinstance(obj, numpy.float64):
return float(obj)
elif isinstance(obj, numpy.int64):
# on Windows, numpy.int64 and numpy.int_ are different
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@
(np.half, FloatType()),
(np.float16, FloatType()),
(np.float64, DoubleType()),
(np.float_, DoubleType()),
(np.object_, VariantType()),
(np.bool_, BooleanType()),
("datetime64[ns]", TimestampType()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def _prepare_unpivot_internal(
# dataframe is used to show the intermediate results of the dataframe at each step
# using the melt operation (unpivot).
#
# data = {"abc": ["A", "B", np.NaN], "123": [1, np.NaN, 3], "state": ["CA", "WA", "NY"]}
# data = {"abc": ["A", "B", np.nan], "123": [1, np.nan, 3], "state": ["CA", "WA", "NY"]}
# index = npd.MultiIndex.from_tuples([("one", "there"), ("two", "be"), ("two", "dragons")],
# names=["L1", "L2"])
# df = npd.DataFrame(data, index=index)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13096,7 +13096,7 @@ def output_col(

if np.isnan(n):
# Follow pandas behavior
return pandas_lit(np.NaN)
return pandas_lit(np.nan)
elif n <= 0:
# If all possible splits are requested, we just use SQL's split function.
new_col = builtin("split")(new_col, pandas_lit(new_pat))
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/plugin/docstrings/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ def dropna():
Empty strings are not considered NA values. ``None`` is considered an
NA value.
>>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
>>> ser = pd.Series([np.nan, 2, pd.NaT, '', None, 'I stay'])
>>> ser # doctest: +NORMALIZE_WHITESPACE
0 None
1 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def contains():
--------
Returning a Series of booleans using only a literal pattern.
>>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
>>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.nan])
>>> s1.str.contains('og', regex=False)
0 False
1 True
Expand All @@ -203,7 +203,7 @@ def contains():
Returning an Index of booleans using only a literal pattern.
>>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
>>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.nan])
>>> ind.str.contains('23', regex=False)
Index([False, False, False, True, None], dtype='object')
Expand Down
4 changes: 2 additions & 2 deletions tests/integ/modin/binary/test_binary_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -2305,8 +2305,8 @@ def test_binary_add_dataframe_and_series_duplicate_labels_negative(df, s):
),
# test with np.Nan as well
(
native_pd.DataFrame([[np.NaN, None, 3], [4, 5, 6]]),
native_pd.DataFrame([[1, -2, 3], [6, -5, np.NaN]]),
native_pd.DataFrame([[np.nan, None, 3], [4, 5, 6]]),
native_pd.DataFrame([[1, -2, 3], [6, -5, np.nan]]),
),
# Test column alignment.
(
Expand Down
10 changes: 5 additions & 5 deletions tests/integ/modin/frame/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)

data = [
{"frame": {"abc": ["A", np.NaN, "C"], "123": ["1", "2", np.NaN]}, "kargs": {}},
{"frame": {"abc": ["A", np.nan, "C"], "123": ["1", "2", np.nan]}, "kargs": {}},
{"frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]}, "kargs": {}},
{"frame": {"abc": ["A", "B", "C"], "123": [1, 2, 3]}, "kargs": {}},
{"frame": {"123": [1, 2, 3], "456": [4, 5, 6]}, "kargs": {}},
Expand Down Expand Up @@ -91,8 +91,8 @@
},
"kargs": {},
},
{"frame": {"abc": ["A", np.NaN, np.NaN], "123": [np.NaN, "2", "3"]}, "kargs": {}},
{"frame": {"abc": ["A", np.NaN, np.NaN], "123": [np.NaN, 2, 3]}, "kargs": {}},
{"frame": {"abc": ["A", np.nan, np.nan], "123": [np.nan, "2", "3"]}, "kargs": {}},
{"frame": {"abc": ["A", np.nan, np.nan], "123": [np.nan, 2, 3]}, "kargs": {}},
]


Expand Down Expand Up @@ -286,8 +286,8 @@ def test_everything():
[("one", "there"), ("two", "be"), ("two", "dragons")], names=["L1", "L2"]
)
data = {
"abc": ["A", "B", np.NaN],
"123": [1, np.NaN, 3],
"abc": ["A", "B", np.nan],
"123": [1, np.nan, 3],
"state": ["CA", "WA", "NY"],
}
native_df = npd.DataFrame(data, index=index)
Expand Down
8 changes: 4 additions & 4 deletions tests/integ/modin/frame/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def left_df():
{
"A": [3, 2, 1, 4, 4],
"B": [2, 3, 1, 2, 1],
"left_c": [1.0, 2.0, 3.0, 4.0, np.NaN],
"left_c": [1.0, 2.0, 3.0, 4.0, np.nan],
"left_d": [None, "d", "a", "c", "b"],
},
index=pd.Index([0, 1, 3, 2, 4], name="left_i"),
Expand Down Expand Up @@ -61,7 +61,7 @@ def right_df():
{
"A": [4, 3, 1, 4, 4],
"B": [3, 4, 2, 1, 1],
"right_c": [2.0, 1.0, 4.0, 0.0, np.NaN],
"right_c": [2.0, 1.0, 4.0, 0.0, np.nan],
"right_d": ["c", "d", "a", "b", None],
},
index=pd.Index([8, 4, 2, 9, 1], name="right_i"),
Expand Down Expand Up @@ -335,7 +335,7 @@ def test_join_type_mismatch_negative(index1, index2):
[3, 4],
[True, False],
native_pd.DataFrame(
{"A": [np.NaN, 1.0, 2.0], "B": [4, 3, 3]},
{"A": [np.nan, 1.0, 2.0], "B": [4, 3, 3]},
index=native_pd.Index([False, True, True]),
),
),
Expand All @@ -345,7 +345,7 @@ def test_join_type_mismatch_negative(index1, index2):
["a", "b"],
[True, False],
native_pd.DataFrame(
{"A": [1.0, 2.0, np.NaN, np.NaN], "B": [np.NaN, np.NaN, 4.0, 3.0]},
{"A": [1.0, 2.0, np.nan, np.nan], "B": [np.nan, np.nan, 4.0, 3.0]},
index=native_pd.Index(["a", "b", "false", "true"]),
),
),
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/frame/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def snow_df():
("one", None), # scalar -> None
(pd.NA, "ONE"), # NULL -> scalar
(pd.NaT, "ONE"), # NULL -> scalar
(np.NaN, "ONE"), # NULL -> scalar
(np.nan, "ONE"), # NULL -> scalar
(["one"], ["ONE"]), # list -> list
("four", "FOUR"), # no matching value
(["one", "two"], ["two", "one"]), # swap values
Expand Down
12 changes: 6 additions & 6 deletions tests/integ/modin/frame/test_skew.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ def test_skew_basic():
{
"frame": {
"A": [1, 2, 3],
"B": [2, np.NaN, 4],
"C": [1, 2, np.NaN],
"D": [np.NaN, np.NaN, 3],
"B": [2, np.nan, 4],
"C": [1, 2, np.nan],
"D": [np.nan, np.nan, 3],
},
"kwargs": {"skipna": True},
},
{
"frame": {
"A": [1, 2, 3],
"B": ["a", "b", "c"],
"C": [1, 2, np.NaN],
"C": [1, 2, np.nan],
"D": ["x", "y", "z"],
},
"kwargs": {"numeric_only": True},
Expand All @@ -57,7 +57,7 @@ def test_skew_basic():
"frame": {
"A": [1, 2, 3],
"B": ["a", "b", "c"],
"C": [1, 2, np.NaN],
"C": [1, 2, np.nan],
"D": ["x", "y", "z"],
},
"kwargs": {"numeric_only": True, "skipna": True},
Expand Down Expand Up @@ -86,7 +86,7 @@ def test_skew(data):
"frame": {
"A": [1, 2, 3],
"B": ["a", "b", "c"],
"C": [1, 2, np.NaN],
"C": [1, 2, np.nan],
"D": ["x", "y", "z"],
},
"kwargs": {"numeric_only": False},
Expand Down
6 changes: 3 additions & 3 deletions tests/integ/modin/series/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def basic_types():

EXTENSION_TYPE_TO_NUMPY_DTYPE = {
"boolean": np.bool_,
Float32Dtype(): np.float_,
Float64Dtype(): np.float_,
Float32Dtype(): np.float64,
Float64Dtype(): np.float64,
Int64Dtype(): np.int64,
UInt64Dtype(): np.uint64,
Int32Dtype(): np.int32,
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_astype_basic(from_dtype, to_dtype):
)
if from_dtype in (
float,
np.float_,
np.float64,
np.float16,
np.float32,
Float32Dtype(),
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/series/test_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"sample, expected_query_count",
(
([1.0, 2.0, np.nan], 1),
([np.NaN, 2, pd.NaT, "", None, "I stay"], 1),
([np.nan, 2, pd.NaT, "", None, "I stay"], 1),
),
)
def test_basic(sample, expected_query_count):
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/series/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def snow_series():
("one", None), # scalar -> None
(pd.NA, "ONE"), # NULL -> scalar
(pd.NaT, "ONE"), # NULL -> scalar
(np.NaN, "ONE"), # NULL -> scalar
(np.nan, "ONE"), # NULL -> scalar
(["one"], ["ONE"]), # list -> list
("four", "FOUR"), # no matching value
(["one", "two"], ["two", "one"]), # swap values
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/series/test_str_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ def test_str_replace_neg(pat, n, repl, error):


@pytest.mark.parametrize("pat", [None, "a", "|", "%"])
@pytest.mark.parametrize("n", [None, np.NaN, 3, 2, 1, 0, -1, -2])
@pytest.mark.parametrize("n", [None, np.nan, 3, 2, 1, 0, -1, -2])
@sql_count_checker(query_count=1)
def test_str_split(pat, n):
native_ser = native_pd.Series(TEST_DATA)
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
TEST_DF_DATA = {
"float_nan_data": {
f"col{int((i - NCOLS / 2) % NCOLS + 1)}": [
x if (j != i and j - 2 != i and j + 2 != i) else np.NaN
x if (j != i and j - 2 != i and j + 2 != i) else np.nan
for j, x in enumerate(
random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS))
)
Expand Down
1 change: 1 addition & 0 deletions tests/integ/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1899,6 +1899,7 @@ def return_type_in_dataframe(x):
[[True]],
(
"<class 'bool'>",
"<class 'numpy.bool'>",
"<class 'numpy.bool_'>",
),
("bool",),
Expand Down

0 comments on commit e076031

Please sign in to comment.