From 27104c77d687674e203981ede449867061f2068d Mon Sep 17 00:00:00 2001
From: Naresh Kumar <naresh.kumar@snowflake.com>
Date: Sat, 10 Aug 2024 15:14:52 -0700
Subject: [PATCH] Fix more query counts

---
 tests/integ/modin/frame/test_axis.py          | 21 ++++++++-----------
 .../integ/modin/frame/test_drop_duplicates.py |  3 +--
 tests/integ/modin/frame/test_getitem.py       |  4 ++--
 .../modin/frame/test_nlargest_nsmallest.py    |  2 +-
 tests/integ/modin/frame/test_set_index.py     | 21 +++++--------------
 tests/integ/modin/series/test_axis.py         | 20 +++++++++---------
 tests/integ/modin/series/test_getitem.py      |  2 +-
 tests/integ/modin/series/test_loc.py          |  3 ---
 tests/integ/modin/series/test_setitem.py      |  8 +++----
 tests/integ/modin/test_concat.py              |  2 --
 tests/integ/modin/test_telemetry.py           |  2 +-
 11 files changed, 33 insertions(+), 55 deletions(-)

diff --git a/tests/integ/modin/frame/test_axis.py b/tests/integ/modin/frame/test_axis.py
index b253906ba53..28cf55dee40 100644
--- a/tests/integ/modin/frame/test_axis.py
+++ b/tests/integ/modin/frame/test_axis.py
@@ -81,8 +81,7 @@ def test_index(test_df):
 
 
 @pytest.mark.parametrize("test_df", test_dfs)
-# One extra query to convert lazy index to series to set index
-@sql_count_checker(query_count=9, join_count=3)
+@sql_count_checker(query_count=8, join_count=3)
 def test_set_and_assign_index(test_df):
     def assign_index(df, keys):
         df.index = keys
@@ -290,7 +289,7 @@ def test_duplicate_labels_assignment():
         native_pd.DataFrame({"A": [3.14, 1.414, 1.732], "B": [9.8, 1.0, 0]}),
         "rows",
         [None] * 3,
-        6,
+        5,
         2,
     ],
     [  # Labels is a MultiIndex from tuples.
@@ -307,7 +306,7 @@ def test_duplicate_labels_assignment():
         native_pd.DataFrame({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
         0,
         {1: "c", 2: "b", 3: "a"},
-        6,
+        5,
         2,
     ],
     [
@@ -327,7 +326,7 @@ def test_duplicate_labels_assignment():
         ),
         0,
         ['"row 1"', "row 2"],
-        6,
+        5,
         2,
     ],
     [
@@ -340,7 +339,7 @@ def test_duplicate_labels_assignment():
         ),
         "rows",
         list(range(10)),
-        6,
+        5,
         2,
     ],
     [
@@ -875,8 +874,7 @@ def test_set_axis_df_raises_value_error_diff_error_msg(
 ):
     # Should raise a ValueError if the labels for row-like axis are invalid.
     # The error messages do not match native pandas.
-    # one extra query to convert to native pandas in series constructor
-    with SqlCounter(query_count=2 if isinstance(labels, native_pd.MultiIndex) else 3):
+    with SqlCounter(query_count=2):
         with pytest.raises(ValueError, match=error_msg):
             pd.DataFrame(native_df).set_axis(labels, axis=axis)
 
@@ -894,7 +892,7 @@ def test_set_axis_df_raises_type_error_diff_error_msg(
         pd.DataFrame(native_df).set_axis(labels, axis=axis)
 
 
-@sql_count_checker(query_count=4, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_df_set_axis_copy_true(caplog):
     # Test that warning is raised when copy argument is used.
     native_df = native_pd.DataFrame({"A": [1.25], "B": [3]})
@@ -935,12 +933,11 @@ def test_df_set_axis_with_quoted_index():
     # check first that operation result is the same
     snow_df = pd.DataFrame(data)
     native_df = native_pd.DataFrame(data)
-    # One extra query to convert to native pandas in series constructor
-    with SqlCounter(query_count=4):
+    with SqlCounter(query_count=3):
         eval_snowpark_pandas_result(snow_df, native_df, helper)
 
     # then, explicitly compare axes
-    with SqlCounter(query_count=2):
+    with SqlCounter(query_count=1):
         ans = helper(snow_df)
 
     native_ans = helper(native_df)
diff --git a/tests/integ/modin/frame/test_drop_duplicates.py b/tests/integ/modin/frame/test_drop_duplicates.py
index 3cf38708038..35c4a8edb05 100644
--- a/tests/integ/modin/frame/test_drop_duplicates.py
+++ b/tests/integ/modin/frame/test_drop_duplicates.py
@@ -64,8 +64,7 @@ def test_drop_duplicates(subset, keep, ignore_index):
     query_count = 1
     join_count = 2
     if ignore_index is True:
-        # One extra query to convert index to native pandas in series constructor
-        query_count += 3
+        query_count += 2
         join_count += 3
     with SqlCounter(query_count=query_count, join_count=join_count):
         assert_frame_equal(
diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py
index 746a8aa6550..fd4ede77d77 100644
--- a/tests/integ/modin/frame/test_getitem.py
+++ b/tests/integ/modin/frame/test_getitem.py
@@ -39,9 +39,9 @@
 def test_df_getitem_with_boolean_list_like(
     key, default_index_snowpark_pandas_df, default_index_native_df
 ):
-    # one added query to convert to native pandas and 2 added queries for series initialization
+    # one added query to convert to native pandas and 1 added query for series initialization
     with SqlCounter(
-        query_count=4 if isinstance(key, native_pd.Index) else 1, join_count=1
+        query_count=3 if isinstance(key, native_pd.Index) else 1, join_count=1
     ):
         # df[boolean list-like key] is the same as df.loc[:, boolean list-like key]
         if isinstance(key, native_pd.Index):
diff --git a/tests/integ/modin/frame/test_nlargest_nsmallest.py b/tests/integ/modin/frame/test_nlargest_nsmallest.py
index fa57ddeadd2..3b6318179f2 100644
--- a/tests/integ/modin/frame/test_nlargest_nsmallest.py
+++ b/tests/integ/modin/frame/test_nlargest_nsmallest.py
@@ -54,7 +54,7 @@ def test_nlargest_nsmallest_large_n(snow_df, native_df, method):
     )
 
 
-@sql_count_checker(query_count=5, join_count=1)
+@sql_count_checker(query_count=4, join_count=1)
 def test_nlargest_nsmallest_overlapping_index_name(snow_df, native_df, method):
     snow_df = snow_df.rename_axis("A")
     native_df = native_df.rename_axis("A")
diff --git a/tests/integ/modin/frame/test_set_index.py b/tests/integ/modin/frame/test_set_index.py
index e0088673282..ae035f0b3a4 100644
--- a/tests/integ/modin/frame/test_set_index.py
+++ b/tests/integ/modin/frame/test_set_index.py
@@ -80,8 +80,7 @@ def test_set_index_multiindex_columns(snow_df):
     )
 
 
-# One extra query to convert to native pandas to create series to set index
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2)
 def test_set_index_negative(snow_df, native_df):
     index = pd.Index([1, 2])
     native_index = native_pd.Index([1, 2])
@@ -122,7 +121,7 @@ def test_set_index_names(snow_df):
     # Verify name from input index is set.
     index = pd.Index([1, 2, 0])
     index.names = ["iname"]
-    with SqlCounter(query_count=3):
+    with SqlCounter(query_count=2):
         assert snow_df.set_index(index).index.names == ["iname"]
 
     # Verify names from input multiindex are set.
@@ -229,11 +228,8 @@ def test_set_index_pass_single_array(obj_type, drop, append, native_df):
             )
     else:
         expected_query_count = 3
-        if obj_type == pd.Series:
+        if obj_type == pd.Series or obj_type == pd.Index:
             expected_query_count = 4
-        # two extra queries, one to convert to native pandas (like series case) and one to create the series to set index
-        if obj_type == pd.Index:
-            expected_query_count = 5
         with SqlCounter(query_count=expected_query_count, join_count=1):
             eval_snowpark_pandas_result(
                 snow_df,
@@ -268,11 +264,7 @@ def test_set_index_pass_arrays(obj_type, drop, append, native_df):
         "a",
         key.to_pandas() if isinstance(key, (pd.Series, pd.Index)) else key,
     ]
-    query_count = 3
-    # one extra query to convert to series to set index
-    if obj_type == pd.Index:
-        query_count = 4
-    with SqlCounter(query_count=query_count, join_count=1):
+    with SqlCounter(query_count=3, join_count=1):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
@@ -433,7 +425,7 @@ def test_set_index_raise_on_len(length, obj_type, drop, append, native_df):
     msg = "Length mismatch: Expected 3 rows, received array of length.*"
     # wrong length directly
     # one extra query to create the series to set index
-    with SqlCounter(query_count=3 if obj_type == native_pd.Index else 2):
+    with SqlCounter(query_count=2):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
@@ -451,9 +443,6 @@ def test_set_index_raise_on_len(length, obj_type, drop, append, native_df):
     expected_query_count = 1
     if obj_type == native_pd.Series:
         expected_query_count = 0
-    # one extra query to convert to native pandas to create the series to set index
-    if obj_type == native_pd.Index:
-        expected_query_count = 2
     keys = ["a", key]
     native_keys = ["a", native_key]
     with SqlCounter(query_count=expected_query_count):
diff --git a/tests/integ/modin/series/test_axis.py b/tests/integ/modin/series/test_axis.py
index af00662f8db..d099272d6e9 100644
--- a/tests/integ/modin/series/test_axis.py
+++ b/tests/integ/modin/series/test_axis.py
@@ -30,7 +30,7 @@
         native_pd.Series({"A": [1, 2, 3], 5 / 6: [4, 5, 6]}),
         "index",
         [None] * 2,
-        4,
+        3,
         1,
     ],
     [
@@ -44,7 +44,7 @@
         ),
         "index",
         ["iccanobif", "serauqs", "semirp"],
-        4,
+        3,
         1,
     ],
     [
@@ -58,7 +58,7 @@
         ),
         "index",
         native_pd.Series(["iccanobif", "serauqs", "semirp"], name="reverse names"),
-        4,
+        3,
         1,
     ],
     [
@@ -73,7 +73,7 @@
         ),
         0,
         native_pd.Index([99, 999, 9999, 99999, 999999]),
-        4,
+        3,
         1,
     ],
     [
@@ -88,7 +88,7 @@
         ),
         0,
         native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"),
-        4,
+        3,
         1,
     ],
     [
@@ -104,7 +104,7 @@
         ),
         0,
         native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"),
-        4,
+        3,
         1,
     ],
     [  # Index is a MultiIndex from tuples.
@@ -165,14 +165,14 @@
         native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
         "index",
         {1: 1, 2: 2},
-        4,
+        3,
         1,
     ],
     [
         native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
         "rows",
         {1, 2},
-        4,
+        3,
         1,
     ],
 ]
@@ -440,7 +440,7 @@ def test_set_axis_series_raises_value_error_diff_error_msg(
 ):
     # Should raise a ValueError if length of labels passed in
     # don't match the number of rows.
-    with SqlCounter(query_count=2 if isinstance(labels, native_pd.MultiIndex) else 3):
+    with SqlCounter(query_count=2):
         with pytest.raises(ValueError, match=error_msg):
             pd.Series(ser).set_axis(labels, axis=axis)
 
@@ -474,7 +474,7 @@ def test_set_axis_series_raises_type_error(ser, axis, labels, error_msg):
         pd.Series(ser).set_axis(labels, axis=axis)
 
 
-@sql_count_checker(query_count=4, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_series_set_axis_copy_true(caplog):
     # Test that warning is raised when copy argument is used.
     series = native_pd.Series([1.25])
diff --git a/tests/integ/modin/series/test_getitem.py b/tests/integ/modin/series/test_getitem.py
index 3c297f32d0b..0ea84425d18 100644
--- a/tests/integ/modin/series/test_getitem.py
+++ b/tests/integ/modin/series/test_getitem.py
@@ -46,7 +46,7 @@ def getitem_helper(ser):
             _key, _ser = snow_key, ser
         return _ser[_key]
 
-    with SqlCounter(query_count=2 if isinstance(key, native_pd.Index) else 1):
+    with SqlCounter(query_count=1):
         eval_snowpark_pandas_result(
             default_index_snowpark_pandas_series,
             default_index_native_series,
diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py
index 21fbf6aeafa..aa16a841f27 100644
--- a/tests/integ/modin/series/test_loc.py
+++ b/tests/integ/modin/series/test_loc.py
@@ -477,9 +477,6 @@ def type_convert(key, is_snow_type):
         return s.loc[type_convert(native_series_key, isinstance(s, pd.Series))]
 
     # default index
-    # Note: here number of queries are 2 due to the data type of the series is variant and to_pandas needs to call
-    # typeof to get the value types
-    # TODO: SNOW-933782 optimize to_pandas for variant columns to only fire one query
     with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             default_index_snowpark_pandas_series,
diff --git a/tests/integ/modin/series/test_setitem.py b/tests/integ/modin/series/test_setitem.py
index 407e93c6a12..50405643bc3 100644
--- a/tests/integ/modin/series/test_setitem.py
+++ b/tests/integ/modin/series/test_setitem.py
@@ -1560,7 +1560,7 @@ def test_series_setitem_with_empty_key_and_empty_item_negative(
     else:
         snowpark_key = key
 
-    with SqlCounter(query_count=1 if isinstance(key, native_pd.Index) else 0):
+    with SqlCounter(query_count=0):
 
         err_msg = "The length of the value/item to set is empty"
         with pytest.raises(ValueError, match=err_msg):
@@ -1601,7 +1601,7 @@ def test_series_setitem_with_empty_key_and_empty_series_item(
     else:
         snowpark_key = key
 
-    with SqlCounter(query_count=2 if isinstance(key, native_pd.Index) else 1):
+    with SqlCounter(query_count=1):
         native_ser[key] = item
         snowpark_ser[
             pd.Series(snowpark_key)
@@ -1649,9 +1649,7 @@ def test_series_setitem_with_empty_key_and_scalar_item(
     else:
         snowpark_key = key
 
-    with SqlCounter(
-        query_count=2 if isinstance(key, native_pd.Index) else 1, join_count=2
-    ):
+    with SqlCounter(query_count=1, join_count=2):
         native_ser[key] = item
         snowpark_ser[
             pd.Series(snowpark_key)
diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py
index 628af787ac4..1049d5ea21b 100644
--- a/tests/integ/modin/test_concat.py
+++ b/tests/integ/modin/test_concat.py
@@ -656,10 +656,8 @@ def test_concat_keys_with_none(df1, df2, axis):
     "name1, name2", [("one", "two"), ("one", None), (None, "two"), (None, None)]
 )
 def test_concat_with_keys_and_names(df1, df2, names, name1, name2, axis):
-    # One extra query to convert index to native pandas when creating df
     with SqlCounter(query_count=0 if name1 is None or axis == 1 else 3, join_count=0):
         df1 = df1.rename_axis(name1, axis=axis)
-    # One extra query to convert index to native pandas when creating df
     with SqlCounter(query_count=0 if name2 is None or axis == 1 else 3, join_count=0):
         df2 = df2.rename_axis(name2, axis=axis)
 
diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py
index c908b56c56a..ba20286579a 100644
--- a/tests/integ/modin/test_telemetry.py
+++ b/tests/integ/modin/test_telemetry.py
@@ -474,7 +474,7 @@ def test_telemetry_private_method(name, method, expected_query_count):
     assert data["api_calls"] == [{"name": f"DataFrame.DataFrame.{name}"}]
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2)
 def test_telemetry_property_index():
     df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
     df._query_compiler.snowpark_pandas_api_calls.clear()