Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…or Series.dt.days/seconds/microseconds/nanoseconds (#2202)

<!---
Please answer these questions before creating your pull request. Thanks!
--->

1. Which Jira issue is this PR addressing? Make sure that there is an
accompanying issue to your PR.

   <!---
   In this section, please add a Snowflake Jira issue number.
   
Note that if a corresponding GitHub issue exists, you should still
include
   the Snowflake Jira issue number. For example, for GitHub issue
#1400, you should
   add "SNOW-1335071" here.
    --->

   Fixes SNOW-1640548, SNOW-1640549, SNOW-1640554, SNOW-1640562

2. Fill out the following pre-review checklist:

- [x] I am adding a new automated test(s) to verify correctness of my
new code
- [ ] If this test skips Local Testing mode, I'm requesting review from
@snowflakedb/local-testing
   - [ ] I am adding new logging messages
   - [ ] I am adding a new telemetry message
   - [ ] I am adding new credentials
   - [ ] I am adding a new dependency
- [ ] If this is a new feature/behavior, I'm adding the Local Testing
parity changes.

3. Please describe how your code solves the related issue.

  Add support for Series.dt.days/seconds/microseconds/nanoseconds.
  • Loading branch information
sfc-gh-helmeleegy authored Sep 5, 2024
1 parent 51ab4d9 commit cc68d33
Show file tree
Hide file tree
Showing 8 changed files with 210 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
- Added support for `pd.crosstab`.
- Added support for `pd.bdate_range` and included business frequency support (B, BME, BMS, BQE, BQS, BYE, BYS) for both `pd.date_range` and `pd.bdate_range`.
- Added support for lazy `Index` objects as `labels` in `DataFrame.reindex` and `Series.reindex`.
- Added support for `Series.dt.days`, `Series.dt.seconds`, `Series.dt.microseconds`, and `Series.dt.nanoseconds`.

#### Improvements

Expand Down
4 changes: 4 additions & 0 deletions docs/source/modin/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ Series
Series.dt.ceil
Series.dt.round
Series.dt.normalize
Series.dt.days
Series.dt.seconds
Series.dt.microseconds
Series.dt.nanoseconds


.. rubric:: String accessor methods
Expand Down
8 changes: 4 additions & 4 deletions docs/source/modin/supported/series_dt_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ the method in the left column.
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_pytimedelta`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``seconds`` | N | |
| ``seconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``days`` | N | |
| ``days`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``microseconds`` | N | |
| ``microseconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nanoseconds`` | N | |
| ``nanoseconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``components`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
10 changes: 8 additions & 2 deletions src/snowflake/snowpark/modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@
Renamer,
Scalar,
)
from pandas.api.types import is_datetime64_any_dtype, is_string_dtype
from pandas.api.types import (
is_datetime64_any_dtype,
is_string_dtype,
is_timedelta64_dtype,
)
from pandas.core.common import apply_if_callable, is_bool_indexer
from pandas.core.dtypes.common import is_bool_dtype, is_dict_like, is_list_like
from pandas.core.series import _coerce_method
Expand Down Expand Up @@ -2420,7 +2424,9 @@ def dt(self): # noqa: RT01, D200
"""
# TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
current_dtype = self.dtype
if not is_datetime64_any_dtype(current_dtype):
if not is_datetime64_any_dtype(current_dtype) and not is_timedelta64_dtype(
current_dtype
):
raise AttributeError("Can only use .dt accessor with datetimelike values")

from modin.pandas.series_utils import DatetimeProperties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10899,16 +10899,16 @@ def dt_freq(self) -> "SnowflakeQueryCompiler":
return self.dt_property("freq")

def dt_seconds(self) -> "SnowflakeQueryCompiler":
return self.dt_property("seconds")
return self.timedelta_property("seconds")

def dt_days(self) -> "SnowflakeQueryCompiler":
return self.dt_property("days")
return self.timedelta_property("days")

def dt_microseconds(self) -> "SnowflakeQueryCompiler":
return self.dt_property("microseconds")
return self.timedelta_property("microseconds")

def dt_nanoseconds(self) -> "SnowflakeQueryCompiler":
return self.dt_property("nanoseconds")
return self.timedelta_property("nanoseconds")

def dt_components(self) -> "SnowflakeQueryCompiler":
return self.dt_property("components")
Expand Down Expand Up @@ -10937,6 +10937,10 @@ def dt_property(
"""
if not include_index:
assert len(self.columns) == 1, "dt only works for series"
if not is_datetime64_any_dtype(self.dtypes[0]):
raise AttributeError(
f"'TimedeltaProperties' object has no attribute '{property_name}'"
)

# mapping from the property name to the corresponding snowpark function
dt_property_to_function_map = {
Expand Down Expand Up @@ -17871,6 +17875,10 @@ def timedelta_property(
assert (
len(self.columns) == 1
), "dt only works for series" # pragma: no cover
if is_datetime64_any_dtype(self.dtypes[0]):
raise AttributeError(
f"'DatetimeProperties' object has no attribute '{property_name}'"
)

# mapping from the property name to the corresponding snowpark function
property_to_func_map = {
Expand Down
116 changes: 112 additions & 4 deletions src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2173,19 +2173,127 @@ def to_pytimedelta():

@property
def seconds():
pass
"""
Number of seconds (>= 0 and less than 1 day) for each element.
Examples
--------
For Series:
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='s'))
>>> ser
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: timedelta64[ns]
>>> ser.dt.seconds
0 1
1 2
2 3
dtype: int64
For TimedeltaIndex:
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='s')
>>> tdelta_idx
TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.seconds
Index([1, 2, 3], dtype='int64')
"""

@property
def days():
pass
"""
Number of days for each element.
Examples
--------
For Series:
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='d'))
>>> ser
0 1 days
1 2 days
2 3 days
dtype: timedelta64[ns]
>>> ser.dt.days
0 1
1 2
2 3
dtype: int64
For TimedeltaIndex:
>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
>>> tdelta_idx
TimedeltaIndex(['0 days', '10 days', '20 days'], dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.days
Index([0, 10, 20], dtype='int64')
"""

@property
def microseconds():
pass
"""
Number of microseconds (>= 0 and less than 1 second) for each element.
Examples
--------
For Series:
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='us'))
>>> ser
0 0 days 00:00:00.000001
1 0 days 00:00:00.000002
2 0 days 00:00:00.000003
dtype: timedelta64[ns]
>>> ser.dt.microseconds
0 1
1 2
2 3
dtype: int64
For TimedeltaIndex:
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='us')
>>> tdelta_idx
TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002',
'0 days 00:00:00.000003'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.microseconds
Index([1, 2, 3], dtype='int64')
"""

@property
def nanoseconds():
pass
"""
Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.
Examples
--------
For Series:
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='ns'))
>>> ser
0 0 days 00:00:00.000000001
1 0 days 00:00:00.000000002
2 0 days 00:00:00.000000003
dtype: timedelta64[ns]
>>> ser.dt.nanoseconds
0 1
1 2
2 3
dtype: int64
For TimedeltaIndex:
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='ns')
>>> tdelta_idx
TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002',
'0 days 00:00:00.000000003'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.nanoseconds
Index([1, 2, 3], dtype='int64')
"""

@property
def components():
Expand Down
69 changes: 69 additions & 0 deletions tests/integ/modin/series/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,36 @@ def test_dt_properties(property_name, freq):
)


@pytest.mark.parametrize(
"property_name", ["days", "seconds", "microseconds", "nanoseconds"]
)
@sql_count_checker(query_count=1)
def test_dt_timedelta_properties(property_name):
native_ser = native_pd.Series(
native_pd.TimedeltaIndex(
[
"1d",
"1h",
"60s",
"1s",
"800ms",
"5us",
"6ns",
"1d 3s",
"9m 15s 8us",
None,
]
),
index=[2, 6, 7, 8, 11, 16, 17, 20, 25, 27],
name="test",
)
snow_ser = pd.Series(native_ser)

eval_snowpark_pandas_result(
snow_ser, native_ser, lambda ser: getattr(ser.dt, property_name)
)


@pytest.mark.parametrize(
"data, data_type",
[
Expand Down Expand Up @@ -366,3 +396,42 @@ def test_dt_invalid_dtypes(data, data_type):
expect_exception=True,
expect_exception_match="Can only use .dt accessor with datetimelike values",
)


@pytest.mark.parametrize(
"data, data_type, property_name",
[
(
[
datetime.datetime(2019, 12, 4, 11, 12, 13),
datetime.datetime(2019, 12, 5, 12, 21, 5),
datetime.datetime(2019, 12, 6, 5, 2, 6),
],
None,
"seconds",
),
(
[
datetime.timedelta(11, 12, 13),
datetime.timedelta(12, 21, 5),
datetime.timedelta(5, 2, 6),
],
None,
"second",
),
],
)
@sql_count_checker(query_count=0)
def test_dt_invalid_dtype_property_combo(data, data_type, property_name):
native_ser = native_pd.Series(data)
if data_type:
native_ser.astype(data_type)
snow_ser = pd.Series(native_ser)

eval_snowpark_pandas_result(
snow_ser,
native_ser,
lambda ser: getattr(ser.dt, property_name),
expect_exception=True,
expect_exception_match="object has no attribute",
)
4 changes: 0 additions & 4 deletions tests/unit/modin/test_series_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler:
(lambda s: s.dt.tz_convert(tz="UTC"), "tz_convert"),
(lambda s: s.dt.strftime(date_format="YY/MM/DD"), "strftime"),
(lambda s: s.dt.total_seconds(), "total_seconds"),
(lambda s: s.dt.seconds, "seconds"),
(lambda s: s.dt.days, "days"),
(lambda s: s.dt.microseconds, "microseconds"),
(lambda s: s.dt.nanoseconds, "nanoseconds"),
(lambda s: s.dt.qyear, "qyear"),
(lambda s: s.dt.start_time, "start_time"),
(lambda s: s.dt.end_time, "end_time"),
Expand Down

0 comments on commit cc68d33

Please sign in to comment.