Skip to content

Commit

Permalink
Fix tests and update doctests
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-nkumar committed Aug 9, 2024
1 parent 386e7d5 commit 2b5a772
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 25 deletions.
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -2167,7 +2167,7 @@ def date_range(
right_inclusive=right_inclusive,
)
# Set date range as index column.
qc = qc.set_index_from_columns(qc.columns.tolist())
qc = qc.set_index_from_columns(qc.columns.tolist(), include_index=False)
# Set index column name.
qc = qc.set_index_names([name])
return pd.DatetimeIndex(data=qc)
Expand Down
5 changes: 4 additions & 1 deletion src/snowflake/snowpark/modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ def __init__(
# modified:
# Engine.subscribe(_update_engine)

if isinstance(data, type(self)):
# Convert lazy index to Series without pulling the data to client.
if isinstance(data, pd.Index):
query_compiler = data.to_series(index=index, name=name)._query_compiler
elif isinstance(data, type(self)):
query_compiler = data._query_compiler.copy()
if index is not None:
if any(i not in data.index for i in index):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5530,6 +5530,7 @@ def set_index_from_columns(
keys: list[Hashable],
drop: Optional[bool] = True,
append: Optional[bool] = False,
include_index: Optional[bool] = True,
) -> "SnowflakeQueryCompiler":
"""
Create or update index (row labels) from a list of columns.
Expand All @@ -5542,6 +5543,8 @@ def set_index_from_columns(
append: bool, default False
Whether to add the columns in `keys` as new levels appended to the
existing index.
include_index: bool, default True
Whether the keys can also include index column lables as well.

Returns:
A new QueryCompiler instance with updated index.
Expand All @@ -5551,7 +5554,7 @@ def set_index_from_columns(
for (
ids
) in self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
keys, include_index=False
keys, include_index=include_index
):
# Error checking for missing labels is already done in frontend layer.
index_column_snowflake_quoted_identifiers.append(ids[0])
Expand Down
32 changes: 12 additions & 20 deletions src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ def dayofweek():
Examples
--------
>>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D')
>>> s = pd.Series(pd.date_range('2016-12-31', '2017-01-08', freq='D'))
>>> s
0 2016-12-31
1 2017-01-01
Expand Down Expand Up @@ -1390,7 +1390,7 @@ def dayofyear():
Examples
--------
>>> s = pd.to_datetime(["1/1/2020", "2/1/2020"])
>>> s = pd.Series(pd.to_datetime(["1/1/2020", "2/1/2020"]))
>>> s
0 2020-01-01
1 2020-02-01
Expand Down Expand Up @@ -1670,9 +1670,8 @@ def is_leap_year():
This method is available on Series with datetime values under the .dt accessor, and directly on DatetimeIndex.
>>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="YE")
>>> idx # doctest: +SKIP
DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
dtype='datetime64[ns]', freq='YE-DEC')
>>> idx
DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], dtype='datetime64[ns]', freq=None)
>>> idx.is_leap_year # doctest: +SKIP
array([ True, False, False])
Expand All @@ -1688,7 +1687,6 @@ def is_leap_year():
2 False
dtype: bool
"""
# TODO(SNOW-1486910): Unskip when date_range returns DatetimeIndex.

@property
def daysinmonth():
Expand Down Expand Up @@ -1762,22 +1760,19 @@ def month_name():
dtype: object
>>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
dtype='datetime64[ns]', freq='ME')
>>> idx
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], dtype='datetime64[ns]', freq=None)
>>> idx.month_name() # doctest: +SKIP
Index(['January', 'February', 'March'], dtype='object')
Using the locale parameter you can set a different locale language, for example: idx.month_name(locale='pt_BR.utf8') will return month names in Brazilian Portuguese language.
>>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
dtype='datetime64[ns]', freq='ME')
>>> idx
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], dtype='datetime64[ns]', freq=None)
>>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP
Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')
"""
# TODO(SNOW-1486910): Unskip when date_range returns DatetimeIndex.

def day_name():
"""
Expand Down Expand Up @@ -1808,22 +1803,19 @@ def day_name():
dtype: object
>>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], dtype='datetime64[ns]', freq=None)
>>> idx.day_name() # doctest: +SKIP
Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
Using the locale parameter you can set a different locale language, for example: idx.day_name(locale='pt_BR.utf8') will return day names in Brazilian Portuguese language.
>>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], dtype='datetime64[ns]', freq=None)
>>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
"""
# TODO(SNOW-1486910): Unskip when date_range returns DatetimeIndex.

def total_seconds():
pass
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/frame/test_duplicated.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_duplicated_on_empty_frame():

@sql_count_checker(query_count=3, join_count=2)
def test_frame_datetime64_duplicated():
dates = pd.date_range("2010-07-01", end="2010-08-05")
dates = pd.date_range("2010-07-01", end="2010-08-05").to_series()

tst = pd.DataFrame({"symbol": "AAA", "date": dates})
result = tst.duplicated(["date", "symbol"])
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/test_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def sample_function(
)
@sql_count_checker(query_count=7, fallback_count=1, sproc_count=1)
def test_property_methods_telemetry():
datetime_series = pd.date_range("2000-01-01", periods=3, freq="h")
datetime_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h"))
ret_series = datetime_series.dt.timetz
assert len(ret_series._query_compiler.snowpark_pandas_api_calls) == 1
api_call = ret_series._query_compiler.snowpark_pandas_api_calls[0]
Expand Down

0 comments on commit 2b5a772

Please sign in to comment.