Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-1640548, SNOW-1640549, SNOW-1640554, SNOW-1640562: Add support for Series.dt.days/seconds/microseconds/nanoseconds #2202

Merged
merged 8 commits into from
Sep 5, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
- Added support for `Series.is_monotonic_increasing` and `Series.is_monotonic_decreasing`.
- Added support for `Index.is_monotonic_increasing` and `Index.is_monotonic_decreasing`.
- Added support for `pd.crosstab`.
- Added support for `Series.dt.days`, `Series.dt.seconds`, `Series.dt.microseconds`, and `Series.dt.nanoseconds`.

#### Improvements

Expand Down
4 changes: 4 additions & 0 deletions docs/source/modin/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ Series
Series.dt.ceil
Series.dt.round
Series.dt.normalize
Series.dt.days
Series.dt.seconds
Series.dt.microseconds
Series.dt.nanoseconds


.. rubric:: String accessor methods
Expand Down
8 changes: 4 additions & 4 deletions docs/source/modin/supported/series_dt_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ the method in the left column.
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_pytimedelta`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``seconds`` | N | |
| ``seconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``days`` | N | |
| ``days`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``microseconds`` | N | |
| ``microseconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nanoseconds`` | N | |
| ``nanoseconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``components`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
10 changes: 8 additions & 2 deletions src/snowflake/snowpark/modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@
Renamer,
Scalar,
)
from pandas.api.types import is_datetime64_any_dtype, is_string_dtype
from pandas.api.types import (
is_datetime64_any_dtype,
is_string_dtype,
is_timedelta64_dtype,
)
from pandas.core.common import apply_if_callable, is_bool_indexer
from pandas.core.dtypes.common import is_bool_dtype, is_dict_like, is_list_like
from pandas.core.series import _coerce_method
Expand Down Expand Up @@ -2420,7 +2424,9 @@ def dt(self): # noqa: RT01, D200
"""
# TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
current_dtype = self.dtype
if not is_datetime64_any_dtype(current_dtype):
if not is_datetime64_any_dtype(current_dtype) and not is_timedelta64_dtype(
sfc-gh-helmeleegy marked this conversation as resolved.
Show resolved Hide resolved
current_dtype
):
raise AttributeError("Can only use .dt accessor with datetimelike values")

from modin.pandas.series_utils import DatetimeProperties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10842,16 +10842,16 @@ def dt_freq(self) -> "SnowflakeQueryCompiler":
return self.dt_property("freq")

def dt_seconds(self) -> "SnowflakeQueryCompiler":
return self.dt_property("seconds")
return self.timedelta_property("seconds")
sfc-gh-azhan marked this conversation as resolved.
Show resolved Hide resolved

def dt_days(self) -> "SnowflakeQueryCompiler":
return self.dt_property("days")
return self.timedelta_property("days")

def dt_microseconds(self) -> "SnowflakeQueryCompiler":
return self.dt_property("microseconds")
return self.timedelta_property("microseconds")

def dt_nanoseconds(self) -> "SnowflakeQueryCompiler":
return self.dt_property("nanoseconds")
return self.timedelta_property("nanoseconds")

def dt_components(self) -> "SnowflakeQueryCompiler":
return self.dt_property("components")
Expand Down Expand Up @@ -10880,6 +10880,10 @@ def dt_property(
"""
if not include_index:
assert len(self.columns) == 1, "dt only works for series"
if not is_datetime64_any_dtype(self.dtypes[0]):
raise AttributeError(
f"'TimedeltaProperties' object has no attribute '{property_name}'"
)

# mapping from the property name to the corresponding snowpark function
dt_property_to_function_map = {
Expand Down Expand Up @@ -17815,6 +17819,10 @@ def timedelta_property(
assert (
len(self.columns) == 1
), "dt only works for series" # pragma: no cover
if is_datetime64_any_dtype(self.dtypes[0]):
raise AttributeError(
f"'DatetimeProperties' object has no attribute '{property_name}'"
)

# mapping from the property name to the corresponding snowpark function
property_to_func_map = {
Expand Down
116 changes: 112 additions & 4 deletions src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2173,19 +2173,127 @@ def to_pytimedelta():

@property
def seconds():
pass
"""
Number of seconds (>= 0 and less than 1 day) for each element.

Examples
--------
For Series:

>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='s'))
>>> ser
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: timedelta64[ns]
>>> ser.dt.seconds
0 1
1 2
2 3
dtype: int64

For TimedeltaIndex:

>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='s')
>>> tdelta_idx
TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.seconds
Index([1, 2, 3], dtype='int64')
"""

@property
def days():
pass
"""
Number of days for each element.

Examples
--------
For Series:

>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='d'))
>>> ser
0 1 days
1 2 days
2 3 days
dtype: timedelta64[ns]
>>> ser.dt.days
0 1
1 2
2 3
dtype: int64

For TimedeltaIndex:

>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
>>> tdelta_idx
TimedeltaIndex(['0 days', '10 days', '20 days'], dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.days
Index([0, 10, 20], dtype='int64')
"""

@property
def microseconds():
pass
"""
Number of microseconds (>= 0 and less than 1 second) for each element.

Examples
--------
For Series:

>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='us'))
>>> ser
0 0 days 00:00:00.000001
1 0 days 00:00:00.000002
2 0 days 00:00:00.000003
dtype: timedelta64[ns]
>>> ser.dt.microseconds
0 1
1 2
2 3
dtype: int64

For TimedeltaIndex:

>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='us')
>>> tdelta_idx
TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002',
'0 days 00:00:00.000003'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.microseconds
Index([1, 2, 3], dtype='int64')
"""

@property
def nanoseconds():
pass
"""
Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.

Examples
--------
For Series:

>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='ns'))
>>> ser
0 0 days 00:00:00.000000001
1 0 days 00:00:00.000000002
2 0 days 00:00:00.000000003
dtype: timedelta64[ns]
>>> ser.dt.nanoseconds
0 1
1 2
2 3
dtype: int64

For TimedeltaIndex:

>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='ns')
>>> tdelta_idx
TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002',
'0 days 00:00:00.000000003'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.nanoseconds
Index([1, 2, 3], dtype='int64')
"""

@property
def components():
Expand Down
69 changes: 69 additions & 0 deletions tests/integ/modin/series/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,36 @@ def test_dt_properties(property_name, freq):
)


@pytest.mark.parametrize(
"property_name", ["days", "seconds", "microseconds", "nanoseconds"]
)
@sql_count_checker(query_count=1)
def test_dt_timedelta_properties(property_name):
native_ser = native_pd.Series(
native_pd.TimedeltaIndex(
[
"1d",
"1h",
"60s",
"1s",
"800ms",
"5us",
"6ns",
"1d 3s",
"9m 15s 8us",
None,
]
),
index=[2, 6, 7, 8, 11, 16, 17, 20, 25, 27],
name="test",
)
snow_ser = pd.Series(native_ser)

eval_snowpark_pandas_result(
snow_ser, native_ser, lambda ser: getattr(ser.dt, property_name)
)


@pytest.mark.parametrize(
"data, data_type",
[
Expand Down Expand Up @@ -366,3 +396,42 @@ def test_dt_invalid_dtypes(data, data_type):
expect_exception=True,
expect_exception_match="Can only use .dt accessor with datetimelike values",
)


@pytest.mark.parametrize(
"data, data_type, property_name",
[
(
[
datetime.datetime(2019, 12, 4, 11, 12, 13),
datetime.datetime(2019, 12, 5, 12, 21, 5),
datetime.datetime(2019, 12, 6, 5, 2, 6),
],
None,
"seconds",
),
(
[
datetime.timedelta(11, 12, 13),
datetime.timedelta(12, 21, 5),
datetime.timedelta(5, 2, 6),
],
None,
"second",
),
],
)
@sql_count_checker(query_count=0)
def test_dt_invalid_dtype_property_combo(data, data_type, property_name):
native_ser = native_pd.Series(data)
if data_type:
native_ser.astype(data_type)
snow_ser = pd.Series(native_ser)

eval_snowpark_pandas_result(
snow_ser,
native_ser,
lambda ser: getattr(ser.dt, property_name),
expect_exception=True,
expect_exception_match="object has no attribute",
)
4 changes: 0 additions & 4 deletions tests/unit/modin/test_series_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler:
(lambda s: s.dt.tz_convert(tz="UTC"), "tz_convert"),
(lambda s: s.dt.strftime(date_format="YY/MM/DD"), "strftime"),
(lambda s: s.dt.total_seconds(), "total_seconds"),
(lambda s: s.dt.seconds, "seconds"),
(lambda s: s.dt.days, "days"),
(lambda s: s.dt.microseconds, "microseconds"),
(lambda s: s.dt.nanoseconds, "nanoseconds"),
(lambda s: s.dt.qyear, "qyear"),
(lambda s: s.dt.start_time, "start_time"),
(lambda s: s.dt.end_time, "end_time"),
Expand Down
Loading