From 39d729a59294d557d4f1bd4dc48e8843fb73e60c Mon Sep 17 00:00:00 2001 From: Naresh Kumar Date: Fri, 9 Aug 2024 12:20:42 -0700 Subject: [PATCH] Fix query counts and ctor updates --- src/snowflake/snowpark/modin/pandas/general.py | 7 +++---- src/snowflake/snowpark/modin/pandas/series.py | 1 + .../snowpark/modin/plugin/extensions/index.py | 2 +- tests/integ/modin/frame/test_loc.py | 12 ++---------- tests/integ/modin/frame/test_set_index.py | 6 +----- tests/integ/modin/test_concat.py | 4 ++-- 6 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/snowflake/snowpark/modin/pandas/general.py b/src/snowflake/snowpark/modin/pandas/general.py index 4161d316b0e..af0369771bf 100644 --- a/src/snowflake/snowpark/modin/pandas/general.py +++ b/src/snowflake/snowpark/modin/pandas/general.py @@ -1352,7 +1352,7 @@ def to_datetime( infer_datetime_format: lib.NoDefault | bool = lib.no_default, origin: Any = "unix", cache: bool = True, -) -> Series | DatetimeScalar | NaTType | None: +) -> pd.DatetimeIndex | Series | DatetimeScalar | NaTType | None: """ Convert argument to datetime. @@ -1459,8 +1459,7 @@ def to_datetime( parsing): - scalar: :class:`Timestamp` (or :class:`datetime.datetime`) - - array-like: :class:`~snowflake.snowpark.modin.pandas.Series` with :class:`datetime64` dtype containing - :class:`datetime.datetime` (or + - array-like: :class:`~snowflake.snowpark.modin.pandas.DatetimeIndex` (or :class: :class:`~snowflake.snowpark.modin.pandas.Series` of :class:`object` dtype containing :class:`datetime.datetime`) - Series: :class:`~snowflake.snowpark.modin.pandas.Series` of :class:`datetime64` dtype (or @@ -2170,7 +2169,7 @@ def date_range( qc = qc.set_index_from_columns(qc.columns.tolist(), include_index=False) # Set index column name. qc = qc.set_index_names([name]) - return pd.DatetimeIndex(data=qc) + return pd.DatetimeIndex(query_compiler=qc) @snowpark_pandas_telemetry_standalone_function_decorator diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index a494b513de5..f268a21306b 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -133,6 +133,7 @@ def __init__( # Convert lazy index to Series without pulling the data to client. if isinstance(data, pd.Index): query_compiler = data.to_series(index=index, name=name)._query_compiler + query_compiler = query_compiler.reset_index(drop=True) elif isinstance(data, type(self)): query_compiler = data._query_compiler.copy() if index is not None: diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py index e11ac325f0d..a3b4265708a 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/index.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py @@ -2429,4 +2429,4 @@ def _to_datetime( origin, include_index=True, ) - return DatetimeIndex(data=new_qc) + return DatetimeIndex(query_compiler=new_qc) diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py index f258f261b51..bad5b312545 100644 --- a/tests/integ/modin/frame/test_loc.py +++ b/tests/integ/modin/frame/test_loc.py @@ -945,11 +945,7 @@ def loc_set_helper(df): _row_key = key_converter(row_key, df) df.loc[_row_key] = pd.DataFrame(item) - with SqlCounter( - # one extra query to convert to series to set item - query_count=2 if key_type == "index" else 1, - join_count=expected_join_count, - ): + with SqlCounter(query_count=1, join_count=expected_join_count): eval_snowpark_pandas_result( pd.DataFrame(native_df), native_df, loc_set_helper, inplace=True ) @@ -971,11 +967,7 @@ def loc_set_helper(df): _row_key = key_converter(row_key, df) df.loc[_row_key, :] = pd.DataFrame(item) - with SqlCounter( - # one extra query to convert to series to set item - query_count=2 if key_type == "index" else 1, - join_count=expected_join_count, - ): + with SqlCounter(query_count=1, join_count=expected_join_count): eval_snowpark_pandas_result( pd.DataFrame(native_df), native_df, loc_set_helper, inplace=True ) diff --git a/tests/integ/modin/frame/test_set_index.py b/tests/integ/modin/frame/test_set_index.py index 15566d630f1..e0088673282 100644 --- a/tests/integ/modin/frame/test_set_index.py +++ b/tests/integ/modin/frame/test_set_index.py @@ -320,11 +320,7 @@ def test_set_index_pass_arrays_duplicate(obj_type1, obj_type2, drop, append, nat obj_type2 = native_pd.Index native_keys = [obj_type1(array), obj_type2(array)] - query_count = 4 - # one extra query per modin index to create the series and set index - query_count += 1 if obj_type1 == native_pd.Index else 0 - query_count += 1 if obj_type2 == native_pd.Index else 0 - with SqlCounter(query_count=query_count, join_count=2): + with SqlCounter(query_count=4, join_count=2): eval_snowpark_pandas_result( snow_df, native_df, diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py index 9437bb6a36c..628af787ac4 100644 --- a/tests/integ/modin/test_concat.py +++ b/tests/integ/modin/test_concat.py @@ -657,10 +657,10 @@ def test_concat_keys_with_none(df1, df2, axis): ) def test_concat_with_keys_and_names(df1, df2, names, name1, name2, axis): # One extra query to convert index to native pandas when creating df - with SqlCounter(query_count=0 if name1 is None or axis == 1 else 4, join_count=0): + with SqlCounter(query_count=0 if name1 is None or axis == 1 else 3, join_count=0): df1 = df1.rename_axis(name1, axis=axis) # One extra query to convert index to native pandas when creating df - with SqlCounter(query_count=0 if name2 is None or axis == 1 else 4, join_count=0): + with SqlCounter(query_count=0 if name2 is None or axis == 1 else 3, join_count=0): df2 = df2.rename_axis(name2, axis=axis) expected_join_count = (