Skip to content

Commit

Permalink
Fix more query counts
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-nkumar committed Aug 12, 2024
1 parent 159c81b commit 27104c7
Show file tree
Hide file tree
Showing 11 changed files with 33 additions and 55 deletions.
21 changes: 9 additions & 12 deletions tests/integ/modin/frame/test_axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ def test_index(test_df):


@pytest.mark.parametrize("test_df", test_dfs)
# One extra query to convert lazy index to series to set index
@sql_count_checker(query_count=9, join_count=3)
@sql_count_checker(query_count=8, join_count=3)
def test_set_and_assign_index(test_df):
def assign_index(df, keys):
df.index = keys
Expand Down Expand Up @@ -290,7 +289,7 @@ def test_duplicate_labels_assignment():
native_pd.DataFrame({"A": [3.14, 1.414, 1.732], "B": [9.8, 1.0, 0]}),
"rows",
[None] * 3,
6,
5,
2,
],
[ # Labels is a MultiIndex from tuples.
Expand All @@ -307,7 +306,7 @@ def test_duplicate_labels_assignment():
native_pd.DataFrame({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
0,
{1: "c", 2: "b", 3: "a"},
6,
5,
2,
],
[
Expand All @@ -327,7 +326,7 @@ def test_duplicate_labels_assignment():
),
0,
['"row 1"', "row 2"],
6,
5,
2,
],
[
Expand All @@ -340,7 +339,7 @@ def test_duplicate_labels_assignment():
),
"rows",
list(range(10)),
6,
5,
2,
],
[
Expand Down Expand Up @@ -875,8 +874,7 @@ def test_set_axis_df_raises_value_error_diff_error_msg(
):
# Should raise a ValueError if the labels for row-like axis are invalid.
# The error messages do not match native pandas.
# one extra query to convert to native pandas in series constructor
with SqlCounter(query_count=2 if isinstance(labels, native_pd.MultiIndex) else 3):
with SqlCounter(query_count=2):
with pytest.raises(ValueError, match=error_msg):
pd.DataFrame(native_df).set_axis(labels, axis=axis)

Expand All @@ -894,7 +892,7 @@ def test_set_axis_df_raises_type_error_diff_error_msg(
pd.DataFrame(native_df).set_axis(labels, axis=axis)


@sql_count_checker(query_count=4, join_count=1)
@sql_count_checker(query_count=3, join_count=1)
def test_df_set_axis_copy_true(caplog):
# Test that warning is raised when copy argument is used.
native_df = native_pd.DataFrame({"A": [1.25], "B": [3]})
Expand Down Expand Up @@ -935,12 +933,11 @@ def test_df_set_axis_with_quoted_index():
# check first that operation result is the same
snow_df = pd.DataFrame(data)
native_df = native_pd.DataFrame(data)
# One extra query to convert to native pandas in series constructor
with SqlCounter(query_count=4):
with SqlCounter(query_count=3):
eval_snowpark_pandas_result(snow_df, native_df, helper)

# then, explicitly compare axes
with SqlCounter(query_count=2):
with SqlCounter(query_count=1):
ans = helper(snow_df)

native_ans = helper(native_df)
Expand Down
3 changes: 1 addition & 2 deletions tests/integ/modin/frame/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ def test_drop_duplicates(subset, keep, ignore_index):
query_count = 1
join_count = 2
if ignore_index is True:
# One extra query to convert index to native pandas in series constructor
query_count += 3
query_count += 2
join_count += 3
with SqlCounter(query_count=query_count, join_count=join_count):
assert_frame_equal(
Expand Down
4 changes: 2 additions & 2 deletions tests/integ/modin/frame/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
def test_df_getitem_with_boolean_list_like(
key, default_index_snowpark_pandas_df, default_index_native_df
):
# one added query to convert to native pandas and 2 added queries for series initialization
# one added query to convert to native pandas and 1 added query for series initialization
with SqlCounter(
query_count=4 if isinstance(key, native_pd.Index) else 1, join_count=1
query_count=3 if isinstance(key, native_pd.Index) else 1, join_count=1
):
# df[boolean list-like key] is the same as df.loc[:, boolean list-like key]
if isinstance(key, native_pd.Index):
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/frame/test_nlargest_nsmallest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_nlargest_nsmallest_large_n(snow_df, native_df, method):
)


@sql_count_checker(query_count=5, join_count=1)
@sql_count_checker(query_count=4, join_count=1)
def test_nlargest_nsmallest_overlapping_index_name(snow_df, native_df, method):
snow_df = snow_df.rename_axis("A")
native_df = native_df.rename_axis("A")
Expand Down
21 changes: 5 additions & 16 deletions tests/integ/modin/frame/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@ def test_set_index_multiindex_columns(snow_df):
)


# One extra query to convert to native pandas to create series to set index
@sql_count_checker(query_count=3)
@sql_count_checker(query_count=2)
def test_set_index_negative(snow_df, native_df):
index = pd.Index([1, 2])
native_index = native_pd.Index([1, 2])
Expand Down Expand Up @@ -122,7 +121,7 @@ def test_set_index_names(snow_df):
# Verify name from input index is set.
index = pd.Index([1, 2, 0])
index.names = ["iname"]
with SqlCounter(query_count=3):
with SqlCounter(query_count=2):
assert snow_df.set_index(index).index.names == ["iname"]

# Verify names from input multiindex are set.
Expand Down Expand Up @@ -229,11 +228,8 @@ def test_set_index_pass_single_array(obj_type, drop, append, native_df):
)
else:
expected_query_count = 3
if obj_type == pd.Series:
if obj_type == pd.Series or obj_type == pd.Index:
expected_query_count = 4
# two extra queries, one to convert to native pandas (like series case) and one to create the series to set index
if obj_type == pd.Index:
expected_query_count = 5
with SqlCounter(query_count=expected_query_count, join_count=1):
eval_snowpark_pandas_result(
snow_df,
Expand Down Expand Up @@ -268,11 +264,7 @@ def test_set_index_pass_arrays(obj_type, drop, append, native_df):
"a",
key.to_pandas() if isinstance(key, (pd.Series, pd.Index)) else key,
]
query_count = 3
# one extra query to convert to series to set index
if obj_type == pd.Index:
query_count = 4
with SqlCounter(query_count=query_count, join_count=1):
with SqlCounter(query_count=3, join_count=1):
eval_snowpark_pandas_result(
snow_df,
native_df,
Expand Down Expand Up @@ -433,7 +425,7 @@ def test_set_index_raise_on_len(length, obj_type, drop, append, native_df):
msg = "Length mismatch: Expected 3 rows, received array of length.*"
# wrong length directly
# one extra query to create the series to set index
with SqlCounter(query_count=3 if obj_type == native_pd.Index else 2):
with SqlCounter(query_count=2):
eval_snowpark_pandas_result(
snow_df,
native_df,
Expand All @@ -451,9 +443,6 @@ def test_set_index_raise_on_len(length, obj_type, drop, append, native_df):
expected_query_count = 1
if obj_type == native_pd.Series:
expected_query_count = 0
# one extra query to convert to native pandas to create the series to set index
if obj_type == native_pd.Index:
expected_query_count = 2
keys = ["a", key]
native_keys = ["a", native_key]
with SqlCounter(query_count=expected_query_count):
Expand Down
20 changes: 10 additions & 10 deletions tests/integ/modin/series/test_axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
native_pd.Series({"A": [1, 2, 3], 5 / 6: [4, 5, 6]}),
"index",
[None] * 2,
4,
3,
1,
],
[
Expand All @@ -44,7 +44,7 @@
),
"index",
["iccanobif", "serauqs", "semirp"],
4,
3,
1,
],
[
Expand All @@ -58,7 +58,7 @@
),
"index",
native_pd.Series(["iccanobif", "serauqs", "semirp"], name="reverse names"),
4,
3,
1,
],
[
Expand All @@ -73,7 +73,7 @@
),
0,
native_pd.Index([99, 999, 9999, 99999, 999999]),
4,
3,
1,
],
[
Expand All @@ -88,7 +88,7 @@
),
0,
native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"),
4,
3,
1,
],
[
Expand All @@ -104,7 +104,7 @@
),
0,
native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"),
4,
3,
1,
],
[ # Index is a MultiIndex from tuples.
Expand Down Expand Up @@ -165,14 +165,14 @@
native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
"index",
{1: 1, 2: 2},
4,
3,
1,
],
[
native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
"rows",
{1, 2},
4,
3,
1,
],
]
Expand Down Expand Up @@ -440,7 +440,7 @@ def test_set_axis_series_raises_value_error_diff_error_msg(
):
# Should raise a ValueError if length of labels passed in
# don't match the number of rows.
with SqlCounter(query_count=2 if isinstance(labels, native_pd.MultiIndex) else 3):
with SqlCounter(query_count=2):
with pytest.raises(ValueError, match=error_msg):
pd.Series(ser).set_axis(labels, axis=axis)

Expand Down Expand Up @@ -474,7 +474,7 @@ def test_set_axis_series_raises_type_error(ser, axis, labels, error_msg):
pd.Series(ser).set_axis(labels, axis=axis)


@sql_count_checker(query_count=4, join_count=1)
@sql_count_checker(query_count=3, join_count=1)
def test_series_set_axis_copy_true(caplog):
# Test that warning is raised when copy argument is used.
series = native_pd.Series([1.25])
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/series/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def getitem_helper(ser):
_key, _ser = snow_key, ser
return _ser[_key]

with SqlCounter(query_count=2 if isinstance(key, native_pd.Index) else 1):
with SqlCounter(query_count=1):
eval_snowpark_pandas_result(
default_index_snowpark_pandas_series,
default_index_native_series,
Expand Down
3 changes: 0 additions & 3 deletions tests/integ/modin/series/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,9 +477,6 @@ def type_convert(key, is_snow_type):
return s.loc[type_convert(native_series_key, isinstance(s, pd.Series))]

# default index
# Note: here number of queries are 2 due to the data type of the series is variant and to_pandas needs to call
# typeof to get the value types
# TODO: SNOW-933782 optimize to_pandas for variant columns to only fire one query
with SqlCounter(query_count=1, join_count=1):
eval_snowpark_pandas_result(
default_index_snowpark_pandas_series,
Expand Down
8 changes: 3 additions & 5 deletions tests/integ/modin/series/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1560,7 +1560,7 @@ def test_series_setitem_with_empty_key_and_empty_item_negative(
else:
snowpark_key = key

with SqlCounter(query_count=1 if isinstance(key, native_pd.Index) else 0):
with SqlCounter(query_count=0):

err_msg = "The length of the value/item to set is empty"
with pytest.raises(ValueError, match=err_msg):
Expand Down Expand Up @@ -1601,7 +1601,7 @@ def test_series_setitem_with_empty_key_and_empty_series_item(
else:
snowpark_key = key

with SqlCounter(query_count=2 if isinstance(key, native_pd.Index) else 1):
with SqlCounter(query_count=1):
native_ser[key] = item
snowpark_ser[
pd.Series(snowpark_key)
Expand Down Expand Up @@ -1649,9 +1649,7 @@ def test_series_setitem_with_empty_key_and_scalar_item(
else:
snowpark_key = key

with SqlCounter(
query_count=2 if isinstance(key, native_pd.Index) else 1, join_count=2
):
with SqlCounter(query_count=1, join_count=2):
native_ser[key] = item
snowpark_ser[
pd.Series(snowpark_key)
Expand Down
2 changes: 0 additions & 2 deletions tests/integ/modin/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,10 +656,8 @@ def test_concat_keys_with_none(df1, df2, axis):
"name1, name2", [("one", "two"), ("one", None), (None, "two"), (None, None)]
)
def test_concat_with_keys_and_names(df1, df2, names, name1, name2, axis):
# One extra query to convert index to native pandas when creating df
with SqlCounter(query_count=0 if name1 is None or axis == 1 else 3, join_count=0):
df1 = df1.rename_axis(name1, axis=axis)
# One extra query to convert index to native pandas when creating df
with SqlCounter(query_count=0 if name2 is None or axis == 1 else 3, join_count=0):
df2 = df2.rename_axis(name2, axis=axis)

Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/test_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def test_telemetry_private_method(name, method, expected_query_count):
assert data["api_calls"] == [{"name": f"DataFrame.DataFrame.{name}"}]


@sql_count_checker(query_count=3)
@sql_count_checker(query_count=2)
def test_telemetry_property_index():
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df._query_compiler.snowpark_pandas_api_calls.clear()
Expand Down

0 comments on commit 27104c7

Please sign in to comment.