Fix tests and update doctests

snowflakedb · Aug 9, 2024 · 2b5a772 · 2b5a772
1 parent 386e7d5
commit 2b5a772
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 25 deletions.
diff --git a/src/snowflake/snowpark/modin/pandas/general.py b/src/snowflake/snowpark/modin/pandas/general.py
@@ -2167,7 +2167,7 @@ def date_range(
         right_inclusive=right_inclusive,
     )
     # Set date range as index column.
-    qc = qc.set_index_from_columns(qc.columns.tolist())
+    qc = qc.set_index_from_columns(qc.columns.tolist(), include_index=False)
     # Set index column name.
     qc = qc.set_index_names([name])
     return pd.DatetimeIndex(data=qc)

diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
@@ -130,7 +130,10 @@ def __init__(
         # modified:
         # Engine.subscribe(_update_engine)
 
-        if isinstance(data, type(self)):
+        # Convert lazy index to Series without pulling the data to client.
+        if isinstance(data, pd.Index):
+            query_compiler = data.to_series(index=index, name=name)._query_compiler
+        elif isinstance(data, type(self)):
             query_compiler = data._query_compiler.copy()
             if index is not None:
                 if any(i not in data.index for i in index):

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -5530,6 +5530,7 @@ def set_index_from_columns(
         keys: list[Hashable],
         drop: Optional[bool] = True,
         append: Optional[bool] = False,
+        include_index: Optional[bool] = True,
     ) -> "SnowflakeQueryCompiler":
         """
         Create or update index (row labels) from a list of columns.
@@ -5542,6 +5543,8 @@ def set_index_from_columns(
             append: bool, default False
               Whether to add the columns in `keys` as new levels appended to the
               existing index.
+            include_index: bool, default True
+              Whether the keys can also include index column lables as well.
 
         Returns:
             A new QueryCompiler instance with updated index.
@@ -5551,7 +5554,7 @@ def set_index_from_columns(
         for (
             ids
         ) in self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
-            keys, include_index=False
+            keys, include_index=include_index
         ):
             # Error checking for missing labels is already done in frontend layer.
             index_column_snowflake_quoted_identifiers.append(ids[0])

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
@@ -1353,7 +1353,7 @@ def dayofweek():
 
         Examples
         --------
-        >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D')
+        >>> s = pd.Series(pd.date_range('2016-12-31', '2017-01-08', freq='D'))
         >>> s
         0   2016-12-31
         1   2017-01-01
@@ -1390,7 +1390,7 @@ def dayofyear():
 
         Examples
         --------
-        >>> s = pd.to_datetime(["1/1/2020", "2/1/2020"])
+        >>> s = pd.Series(pd.to_datetime(["1/1/2020", "2/1/2020"]))
         >>> s
         0   2020-01-01
         1   2020-02-01
@@ -1670,9 +1670,8 @@ def is_leap_year():
         This method is available on Series with datetime values under the .dt accessor, and directly on DatetimeIndex.
 
         >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="YE")
-        >>> idx  # doctest: +SKIP
-        DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
-                    dtype='datetime64[ns]', freq='YE-DEC')
+        >>> idx
+        DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], dtype='datetime64[ns]', freq=None)
         >>> idx.is_leap_year  # doctest: +SKIP
         array([ True, False, False])
 
@@ -1688,7 +1687,6 @@ def is_leap_year():
         2    False
         dtype: bool
         """
-        # TODO(SNOW-1486910): Unskip when date_range returns DatetimeIndex.
 
     @property
     def daysinmonth():
@@ -1762,22 +1760,19 @@ def month_name():
         dtype: object
 
         >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
-        >>> idx  # doctest: +SKIP
-        DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
-                    dtype='datetime64[ns]', freq='ME')
+        >>> idx
+        DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], dtype='datetime64[ns]', freq=None)
         >>> idx.month_name()  # doctest: +SKIP
         Index(['January', 'February', 'March'], dtype='object')
 
         Using the locale parameter you can set a different locale language, for example: idx.month_name(locale='pt_BR.utf8') will return month names in Brazilian Portuguese language.
 
         >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
-        >>> idx  # doctest: +SKIP
-        DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
-                    dtype='datetime64[ns]', freq='ME')
+        >>> idx
+        DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], dtype='datetime64[ns]', freq=None)
         >>> idx.month_name(locale='pt_BR.utf8')  # doctest: +SKIP
         Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')
         """
-        # TODO(SNOW-1486910): Unskip when date_range returns DatetimeIndex.
 
     def day_name():
         """
@@ -1808,22 +1803,19 @@ def day_name():
         dtype: object
 
         >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
-        >>> idx  # doctest: +SKIP
-        DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
-                    dtype='datetime64[ns]', freq='D')
+        >>> idx
+        DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], dtype='datetime64[ns]', freq=None)
         >>> idx.day_name()  # doctest: +SKIP
         Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
 
         Using the locale parameter you can set a different locale language, for example: idx.day_name(locale='pt_BR.utf8') will return day names in Brazilian Portuguese language.
 
         >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
-        >>> idx  # doctest: +SKIP
-        DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
-                    dtype='datetime64[ns]', freq='D')
+        >>> idx
+        DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], dtype='datetime64[ns]', freq=None)
         >>> idx.day_name(locale='pt_BR.utf8')  # doctest: +SKIP
         Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
         """
-        # TODO(SNOW-1486910): Unskip when date_range returns DatetimeIndex.
 
     def total_seconds():
         pass

diff --git a/tests/integ/modin/frame/test_duplicated.py b/tests/integ/modin/frame/test_duplicated.py
@@ -93,7 +93,7 @@ def test_duplicated_on_empty_frame():
 
 @sql_count_checker(query_count=3, join_count=2)
 def test_frame_datetime64_duplicated():
-    dates = pd.date_range("2010-07-01", end="2010-08-05")
+    dates = pd.date_range("2010-07-01", end="2010-08-05").to_series()
 
     tst = pd.DataFrame({"symbol": "AAA", "date": dates})
     result = tst.duplicated(["date", "symbol"])

diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py
@@ -325,7 +325,7 @@ def sample_function(
 )
 @sql_count_checker(query_count=7, fallback_count=1, sproc_count=1)
 def test_property_methods_telemetry():
-    datetime_series = pd.date_range("2000-01-01", periods=3, freq="h")
+    datetime_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h"))
     ret_series = datetime_series.dt.timetz
     assert len(ret_series._query_compiler.snowpark_pandas_api_calls) == 1
     api_call = ret_series._query_compiler.snowpark_pandas_api_calls[0]