From 27104c77d687674e203981ede449867061f2068d Mon Sep 17 00:00:00 2001 From: Naresh Kumar Date: Sat, 10 Aug 2024 15:14:52 -0700 Subject: [PATCH] Fix more query counts --- tests/integ/modin/frame/test_axis.py | 21 ++++++++----------- .../integ/modin/frame/test_drop_duplicates.py | 3 +-- tests/integ/modin/frame/test_getitem.py | 4 ++-- .../modin/frame/test_nlargest_nsmallest.py | 2 +- tests/integ/modin/frame/test_set_index.py | 21 +++++-------------- tests/integ/modin/series/test_axis.py | 20 +++++++++--------- tests/integ/modin/series/test_getitem.py | 2 +- tests/integ/modin/series/test_loc.py | 3 --- tests/integ/modin/series/test_setitem.py | 8 +++---- tests/integ/modin/test_concat.py | 2 -- tests/integ/modin/test_telemetry.py | 2 +- 11 files changed, 33 insertions(+), 55 deletions(-) diff --git a/tests/integ/modin/frame/test_axis.py b/tests/integ/modin/frame/test_axis.py index b253906ba53..28cf55dee40 100644 --- a/tests/integ/modin/frame/test_axis.py +++ b/tests/integ/modin/frame/test_axis.py @@ -81,8 +81,7 @@ def test_index(test_df): @pytest.mark.parametrize("test_df", test_dfs) -# One extra query to convert lazy index to series to set index -@sql_count_checker(query_count=9, join_count=3) +@sql_count_checker(query_count=8, join_count=3) def test_set_and_assign_index(test_df): def assign_index(df, keys): df.index = keys @@ -290,7 +289,7 @@ def test_duplicate_labels_assignment(): native_pd.DataFrame({"A": [3.14, 1.414, 1.732], "B": [9.8, 1.0, 0]}), "rows", [None] * 3, - 6, + 5, 2, ], [ # Labels is a MultiIndex from tuples. @@ -307,7 +306,7 @@ def test_duplicate_labels_assignment(): native_pd.DataFrame({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}), 0, {1: "c", 2: "b", 3: "a"}, - 6, + 5, 2, ], [ @@ -327,7 +326,7 @@ def test_duplicate_labels_assignment(): ), 0, ['"row 1"', "row 2"], - 6, + 5, 2, ], [ @@ -340,7 +339,7 @@ def test_duplicate_labels_assignment(): ), "rows", list(range(10)), - 6, + 5, 2, ], [ @@ -875,8 +874,7 @@ def test_set_axis_df_raises_value_error_diff_error_msg( ): # Should raise a ValueError if the labels for row-like axis are invalid. # The error messages do not match native pandas. - # one extra query to convert to native pandas in series constructor - with SqlCounter(query_count=2 if isinstance(labels, native_pd.MultiIndex) else 3): + with SqlCounter(query_count=2): with pytest.raises(ValueError, match=error_msg): pd.DataFrame(native_df).set_axis(labels, axis=axis) @@ -894,7 +892,7 @@ def test_set_axis_df_raises_type_error_diff_error_msg( pd.DataFrame(native_df).set_axis(labels, axis=axis) -@sql_count_checker(query_count=4, join_count=1) +@sql_count_checker(query_count=3, join_count=1) def test_df_set_axis_copy_true(caplog): # Test that warning is raised when copy argument is used. native_df = native_pd.DataFrame({"A": [1.25], "B": [3]}) @@ -935,12 +933,11 @@ def test_df_set_axis_with_quoted_index(): # check first that operation result is the same snow_df = pd.DataFrame(data) native_df = native_pd.DataFrame(data) - # One extra query to convert to native pandas in series constructor - with SqlCounter(query_count=4): + with SqlCounter(query_count=3): eval_snowpark_pandas_result(snow_df, native_df, helper) # then, explicitly compare axes - with SqlCounter(query_count=2): + with SqlCounter(query_count=1): ans = helper(snow_df) native_ans = helper(native_df) diff --git a/tests/integ/modin/frame/test_drop_duplicates.py b/tests/integ/modin/frame/test_drop_duplicates.py index 3cf38708038..35c4a8edb05 100644 --- a/tests/integ/modin/frame/test_drop_duplicates.py +++ b/tests/integ/modin/frame/test_drop_duplicates.py @@ -64,8 +64,7 @@ def test_drop_duplicates(subset, keep, ignore_index): query_count = 1 join_count = 2 if ignore_index is True: - # One extra query to convert index to native pandas in series constructor - query_count += 3 + query_count += 2 join_count += 3 with SqlCounter(query_count=query_count, join_count=join_count): assert_frame_equal( diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py index 746a8aa6550..fd4ede77d77 100644 --- a/tests/integ/modin/frame/test_getitem.py +++ b/tests/integ/modin/frame/test_getitem.py @@ -39,9 +39,9 @@ def test_df_getitem_with_boolean_list_like( key, default_index_snowpark_pandas_df, default_index_native_df ): - # one added query to convert to native pandas and 2 added queries for series initialization + # one added query to convert to native pandas and 1 added query for series initialization with SqlCounter( - query_count=4 if isinstance(key, native_pd.Index) else 1, join_count=1 + query_count=3 if isinstance(key, native_pd.Index) else 1, join_count=1 ): # df[boolean list-like key] is the same as df.loc[:, boolean list-like key] if isinstance(key, native_pd.Index): diff --git a/tests/integ/modin/frame/test_nlargest_nsmallest.py b/tests/integ/modin/frame/test_nlargest_nsmallest.py index fa57ddeadd2..3b6318179f2 100644 --- a/tests/integ/modin/frame/test_nlargest_nsmallest.py +++ b/tests/integ/modin/frame/test_nlargest_nsmallest.py @@ -54,7 +54,7 @@ def test_nlargest_nsmallest_large_n(snow_df, native_df, method): ) -@sql_count_checker(query_count=5, join_count=1) +@sql_count_checker(query_count=4, join_count=1) def test_nlargest_nsmallest_overlapping_index_name(snow_df, native_df, method): snow_df = snow_df.rename_axis("A") native_df = native_df.rename_axis("A") diff --git a/tests/integ/modin/frame/test_set_index.py b/tests/integ/modin/frame/test_set_index.py index e0088673282..ae035f0b3a4 100644 --- a/tests/integ/modin/frame/test_set_index.py +++ b/tests/integ/modin/frame/test_set_index.py @@ -80,8 +80,7 @@ def test_set_index_multiindex_columns(snow_df): ) -# One extra query to convert to native pandas to create series to set index -@sql_count_checker(query_count=3) +@sql_count_checker(query_count=2) def test_set_index_negative(snow_df, native_df): index = pd.Index([1, 2]) native_index = native_pd.Index([1, 2]) @@ -122,7 +121,7 @@ def test_set_index_names(snow_df): # Verify name from input index is set. index = pd.Index([1, 2, 0]) index.names = ["iname"] - with SqlCounter(query_count=3): + with SqlCounter(query_count=2): assert snow_df.set_index(index).index.names == ["iname"] # Verify names from input multiindex are set. @@ -229,11 +228,8 @@ def test_set_index_pass_single_array(obj_type, drop, append, native_df): ) else: expected_query_count = 3 - if obj_type == pd.Series: + if obj_type == pd.Series or obj_type == pd.Index: expected_query_count = 4 - # two extra queries, one to convert to native pandas (like series case) and one to create the series to set index - if obj_type == pd.Index: - expected_query_count = 5 with SqlCounter(query_count=expected_query_count, join_count=1): eval_snowpark_pandas_result( snow_df, @@ -268,11 +264,7 @@ def test_set_index_pass_arrays(obj_type, drop, append, native_df): "a", key.to_pandas() if isinstance(key, (pd.Series, pd.Index)) else key, ] - query_count = 3 - # one extra query to convert to series to set index - if obj_type == pd.Index: - query_count = 4 - with SqlCounter(query_count=query_count, join_count=1): + with SqlCounter(query_count=3, join_count=1): eval_snowpark_pandas_result( snow_df, native_df, @@ -433,7 +425,7 @@ def test_set_index_raise_on_len(length, obj_type, drop, append, native_df): msg = "Length mismatch: Expected 3 rows, received array of length.*" # wrong length directly # one extra query to create the series to set index - with SqlCounter(query_count=3 if obj_type == native_pd.Index else 2): + with SqlCounter(query_count=2): eval_snowpark_pandas_result( snow_df, native_df, @@ -451,9 +443,6 @@ def test_set_index_raise_on_len(length, obj_type, drop, append, native_df): expected_query_count = 1 if obj_type == native_pd.Series: expected_query_count = 0 - # one extra query to convert to native pandas to create the series to set index - if obj_type == native_pd.Index: - expected_query_count = 2 keys = ["a", key] native_keys = ["a", native_key] with SqlCounter(query_count=expected_query_count): diff --git a/tests/integ/modin/series/test_axis.py b/tests/integ/modin/series/test_axis.py index af00662f8db..d099272d6e9 100644 --- a/tests/integ/modin/series/test_axis.py +++ b/tests/integ/modin/series/test_axis.py @@ -30,7 +30,7 @@ native_pd.Series({"A": [1, 2, 3], 5 / 6: [4, 5, 6]}), "index", [None] * 2, - 4, + 3, 1, ], [ @@ -44,7 +44,7 @@ ), "index", ["iccanobif", "serauqs", "semirp"], - 4, + 3, 1, ], [ @@ -58,7 +58,7 @@ ), "index", native_pd.Series(["iccanobif", "serauqs", "semirp"], name="reverse names"), - 4, + 3, 1, ], [ @@ -73,7 +73,7 @@ ), 0, native_pd.Index([99, 999, 9999, 99999, 999999]), - 4, + 3, 1, ], [ @@ -88,7 +88,7 @@ ), 0, native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"), - 4, + 3, 1, ], [ @@ -104,7 +104,7 @@ ), 0, native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"), - 4, + 3, 1, ], [ # Index is a MultiIndex from tuples. @@ -165,14 +165,14 @@ native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}), "index", {1: 1, 2: 2}, - 4, + 3, 1, ], [ native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}), "rows", {1, 2}, - 4, + 3, 1, ], ] @@ -440,7 +440,7 @@ def test_set_axis_series_raises_value_error_diff_error_msg( ): # Should raise a ValueError if length of labels passed in # don't match the number of rows. - with SqlCounter(query_count=2 if isinstance(labels, native_pd.MultiIndex) else 3): + with SqlCounter(query_count=2): with pytest.raises(ValueError, match=error_msg): pd.Series(ser).set_axis(labels, axis=axis) @@ -474,7 +474,7 @@ def test_set_axis_series_raises_type_error(ser, axis, labels, error_msg): pd.Series(ser).set_axis(labels, axis=axis) -@sql_count_checker(query_count=4, join_count=1) +@sql_count_checker(query_count=3, join_count=1) def test_series_set_axis_copy_true(caplog): # Test that warning is raised when copy argument is used. series = native_pd.Series([1.25]) diff --git a/tests/integ/modin/series/test_getitem.py b/tests/integ/modin/series/test_getitem.py index 3c297f32d0b..0ea84425d18 100644 --- a/tests/integ/modin/series/test_getitem.py +++ b/tests/integ/modin/series/test_getitem.py @@ -46,7 +46,7 @@ def getitem_helper(ser): _key, _ser = snow_key, ser return _ser[_key] - with SqlCounter(query_count=2 if isinstance(key, native_pd.Index) else 1): + with SqlCounter(query_count=1): eval_snowpark_pandas_result( default_index_snowpark_pandas_series, default_index_native_series, diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py index 21fbf6aeafa..aa16a841f27 100644 --- a/tests/integ/modin/series/test_loc.py +++ b/tests/integ/modin/series/test_loc.py @@ -477,9 +477,6 @@ def type_convert(key, is_snow_type): return s.loc[type_convert(native_series_key, isinstance(s, pd.Series))] # default index - # Note: here number of queries are 2 due to the data type of the series is variant and to_pandas needs to call - # typeof to get the value types - # TODO: SNOW-933782 optimize to_pandas for variant columns to only fire one query with SqlCounter(query_count=1, join_count=1): eval_snowpark_pandas_result( default_index_snowpark_pandas_series, diff --git a/tests/integ/modin/series/test_setitem.py b/tests/integ/modin/series/test_setitem.py index 407e93c6a12..50405643bc3 100644 --- a/tests/integ/modin/series/test_setitem.py +++ b/tests/integ/modin/series/test_setitem.py @@ -1560,7 +1560,7 @@ def test_series_setitem_with_empty_key_and_empty_item_negative( else: snowpark_key = key - with SqlCounter(query_count=1 if isinstance(key, native_pd.Index) else 0): + with SqlCounter(query_count=0): err_msg = "The length of the value/item to set is empty" with pytest.raises(ValueError, match=err_msg): @@ -1601,7 +1601,7 @@ def test_series_setitem_with_empty_key_and_empty_series_item( else: snowpark_key = key - with SqlCounter(query_count=2 if isinstance(key, native_pd.Index) else 1): + with SqlCounter(query_count=1): native_ser[key] = item snowpark_ser[ pd.Series(snowpark_key) @@ -1649,9 +1649,7 @@ def test_series_setitem_with_empty_key_and_scalar_item( else: snowpark_key = key - with SqlCounter( - query_count=2 if isinstance(key, native_pd.Index) else 1, join_count=2 - ): + with SqlCounter(query_count=1, join_count=2): native_ser[key] = item snowpark_ser[ pd.Series(snowpark_key) diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py index 628af787ac4..1049d5ea21b 100644 --- a/tests/integ/modin/test_concat.py +++ b/tests/integ/modin/test_concat.py @@ -656,10 +656,8 @@ def test_concat_keys_with_none(df1, df2, axis): "name1, name2", [("one", "two"), ("one", None), (None, "two"), (None, None)] ) def test_concat_with_keys_and_names(df1, df2, names, name1, name2, axis): - # One extra query to convert index to native pandas when creating df with SqlCounter(query_count=0 if name1 is None or axis == 1 else 3, join_count=0): df1 = df1.rename_axis(name1, axis=axis) - # One extra query to convert index to native pandas when creating df with SqlCounter(query_count=0 if name2 is None or axis == 1 else 3, join_count=0): df2 = df2.rename_axis(name2, axis=axis) diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py index c908b56c56a..ba20286579a 100644 --- a/tests/integ/modin/test_telemetry.py +++ b/tests/integ/modin/test_telemetry.py @@ -474,7 +474,7 @@ def test_telemetry_private_method(name, method, expected_query_count): assert data["api_calls"] == [{"name": f"DataFrame.DataFrame.{name}"}] -@sql_count_checker(query_count=3) +@sql_count_checker(query_count=2) def test_telemetry_property_index(): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df._query_compiler.snowpark_pandas_api_calls.clear()