SNOW-1640548, SNOW-1640549, SNOW-1640554, SNOW-1640562: Add support f…

…or Series.dt.days/seconds/microseconds/nanoseconds (#2202)  1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR.  Fixes SNOW-1640548, SNOW-1640549, SNOW-1640554, SNOW-1640562 2. Fill out the following pre-review checklist: - [x] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. 3. Please describe how your code solves the related issue. Add support for Series.dt.days/seconds/microseconds/nanoseconds.
snowflakedb · Sep 5, 2024 · cc68d33 · cc68d33
1 parent 51ab4d9
commit cc68d33
Show file tree

Hide file tree

Showing 8 changed files with 210 additions and 18 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -99,6 +99,7 @@
 - Added support for `pd.crosstab`.
 - Added support for `pd.bdate_range` and included business frequency support (B, BME, BMS, BQE, BQS, BYE, BYS) for both `pd.date_range` and `pd.bdate_range`.
 - Added support for lazy `Index` objects  as `labels` in `DataFrame.reindex` and `Series.reindex`.
+- Added support for `Series.dt.days`, `Series.dt.seconds`, `Series.dt.microseconds`, and `Series.dt.nanoseconds`.
 
 #### Improvements
 

diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst
@@ -275,6 +275,10 @@ Series
     Series.dt.ceil
     Series.dt.round
     Series.dt.normalize
+    Series.dt.days
+    Series.dt.seconds
+    Series.dt.microseconds
+    Series.dt.nanoseconds
 
 
 .. rubric:: String accessor methods

diff --git a/docs/source/modin/supported/series_dt_supported.rst b/docs/source/modin/supported/series_dt_supported.rst
@@ -105,13 +105,13 @@ the method in the left column.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``to_pytimedelta``          | N                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``seconds``                 | N                               |                                                    |
+| ``seconds``                 | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``days``                    | N                               |                                                    |
+| ``days``                    | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``microseconds``            | N                               |                                                    |
+| ``microseconds``            | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``nanoseconds``             | N                               |                                                    |
+| ``nanoseconds``             | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``components``              | N                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+

diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
@@ -46,7 +46,11 @@
     Renamer,
     Scalar,
 )
-from pandas.api.types import is_datetime64_any_dtype, is_string_dtype
+from pandas.api.types import (
+    is_datetime64_any_dtype,
+    is_string_dtype,
+    is_timedelta64_dtype,
+)
 from pandas.core.common import apply_if_callable, is_bool_indexer
 from pandas.core.dtypes.common import is_bool_dtype, is_dict_like, is_list_like
 from pandas.core.series import _coerce_method
@@ -2420,7 +2424,9 @@ def dt(self):  # noqa: RT01, D200
         """
         # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
         current_dtype = self.dtype
-        if not is_datetime64_any_dtype(current_dtype):
+        if not is_datetime64_any_dtype(current_dtype) and not is_timedelta64_dtype(
+            current_dtype
+        ):
             raise AttributeError("Can only use .dt accessor with datetimelike values")
 
         from modin.pandas.series_utils import DatetimeProperties

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -10899,16 +10899,16 @@ def dt_freq(self) -> "SnowflakeQueryCompiler":
         return self.dt_property("freq")
 
     def dt_seconds(self) -> "SnowflakeQueryCompiler":
-        return self.dt_property("seconds")
+        return self.timedelta_property("seconds")
 
     def dt_days(self) -> "SnowflakeQueryCompiler":
-        return self.dt_property("days")
+        return self.timedelta_property("days")
 
     def dt_microseconds(self) -> "SnowflakeQueryCompiler":
-        return self.dt_property("microseconds")
+        return self.timedelta_property("microseconds")
 
     def dt_nanoseconds(self) -> "SnowflakeQueryCompiler":
-        return self.dt_property("nanoseconds")
+        return self.timedelta_property("nanoseconds")
 
     def dt_components(self) -> "SnowflakeQueryCompiler":
         return self.dt_property("components")
@@ -10937,6 +10937,10 @@ def dt_property(
         """
         if not include_index:
             assert len(self.columns) == 1, "dt only works for series"
+            if not is_datetime64_any_dtype(self.dtypes[0]):
+                raise AttributeError(
+                    f"'TimedeltaProperties' object has no attribute '{property_name}'"
+                )
 
         # mapping from the property name to the corresponding snowpark function
         dt_property_to_function_map = {
@@ -17871,6 +17875,10 @@ def timedelta_property(
             assert (
                 len(self.columns) == 1
             ), "dt only works for series"  # pragma: no cover
+            if is_datetime64_any_dtype(self.dtypes[0]):
+                raise AttributeError(
+                    f"'DatetimeProperties' object has no attribute '{property_name}'"
+                )
 
         # mapping from the property name to the corresponding snowpark function
         property_to_func_map = {

diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
@@ -2173,19 +2173,127 @@ def to_pytimedelta():
 
     @property
     def seconds():
-        pass
+        """
+        Number of seconds (>= 0 and less than 1 day) for each element.
+
+        Examples
+        --------
+        For Series:
+
+        >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='s'))
+        >>> ser
+        0   0 days 00:00:01
+        1   0 days 00:00:02
+        2   0 days 00:00:03
+        dtype: timedelta64[ns]
+        >>> ser.dt.seconds
+        0    1
+        1    2
+        2    3
+        dtype: int64
+
+        For TimedeltaIndex:
+
+        >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='s')
+        >>> tdelta_idx
+        TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], dtype='timedelta64[ns]', freq=None)
+        >>> tdelta_idx.seconds
+        Index([1, 2, 3], dtype='int64')
+        """
 
     @property
     def days():
-        pass
+        """
+        Number of days for each element.
+
+        Examples
+        --------
+        For Series:
+
+        >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='d'))
+        >>> ser
+        0   1 days
+        1   2 days
+        2   3 days
+        dtype: timedelta64[ns]
+        >>> ser.dt.days
+        0    1
+        1    2
+        2    3
+        dtype: int64
+
+        For TimedeltaIndex:
+
+        >>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
+        >>> tdelta_idx
+        TimedeltaIndex(['0 days', '10 days', '20 days'], dtype='timedelta64[ns]', freq=None)
+        >>> tdelta_idx.days
+        Index([0, 10, 20], dtype='int64')
+        """
 
     @property
     def microseconds():
-        pass
+        """
+        Number of microseconds (>= 0 and less than 1 second) for each element.
+
+        Examples
+        --------
+        For Series:
+
+        >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='us'))
+        >>> ser
+        0   0 days 00:00:00.000001
+        1   0 days 00:00:00.000002
+        2   0 days 00:00:00.000003
+        dtype: timedelta64[ns]
+        >>> ser.dt.microseconds
+        0    1
+        1    2
+        2    3
+        dtype: int64
+
+        For TimedeltaIndex:
+
+        >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='us')
+        >>> tdelta_idx
+        TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002',
+                        '0 days 00:00:00.000003'],
+                       dtype='timedelta64[ns]', freq=None)
+        >>> tdelta_idx.microseconds
+        Index([1, 2, 3], dtype='int64')
+        """
 
     @property
     def nanoseconds():
-        pass
+        """
+        Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.
+
+        Examples
+        --------
+        For Series:
+
+        >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='ns'))
+        >>> ser
+        0   0 days 00:00:00.000000001
+        1   0 days 00:00:00.000000002
+        2   0 days 00:00:00.000000003
+        dtype: timedelta64[ns]
+        >>> ser.dt.nanoseconds
+        0    1
+        1    2
+        2    3
+        dtype: int64
+
+        For TimedeltaIndex:
+
+        >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='ns')
+        >>> tdelta_idx
+        TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002',
+                        '0 days 00:00:00.000000003'],
+                       dtype='timedelta64[ns]', freq=None)
+        >>> tdelta_idx.nanoseconds
+        Index([1, 2, 3], dtype='int64')
+        """
 
     @property
     def components():

diff --git a/tests/integ/modin/series/test_dt_accessor.py b/tests/integ/modin/series/test_dt_accessor.py
@@ -328,6 +328,36 @@ def test_dt_properties(property_name, freq):
     )
 
 
+@pytest.mark.parametrize(
+    "property_name", ["days", "seconds", "microseconds", "nanoseconds"]
+)
+@sql_count_checker(query_count=1)
+def test_dt_timedelta_properties(property_name):
+    native_ser = native_pd.Series(
+        native_pd.TimedeltaIndex(
+            [
+                "1d",
+                "1h",
+                "60s",
+                "1s",
+                "800ms",
+                "5us",
+                "6ns",
+                "1d 3s",
+                "9m 15s 8us",
+                None,
+            ]
+        ),
+        index=[2, 6, 7, 8, 11, 16, 17, 20, 25, 27],
+        name="test",
+    )
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: getattr(ser.dt, property_name)
+    )
+
+
 @pytest.mark.parametrize(
     "data, data_type",
     [
@@ -366,3 +396,42 @@ def test_dt_invalid_dtypes(data, data_type):
         expect_exception=True,
         expect_exception_match="Can only use .dt accessor with datetimelike values",
     )
+
+
+@pytest.mark.parametrize(
+    "data, data_type, property_name",
+    [
+        (
+            [
+                datetime.datetime(2019, 12, 4, 11, 12, 13),
+                datetime.datetime(2019, 12, 5, 12, 21, 5),
+                datetime.datetime(2019, 12, 6, 5, 2, 6),
+            ],
+            None,
+            "seconds",
+        ),
+        (
+            [
+                datetime.timedelta(11, 12, 13),
+                datetime.timedelta(12, 21, 5),
+                datetime.timedelta(5, 2, 6),
+            ],
+            None,
+            "second",
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_dt_invalid_dtype_property_combo(data, data_type, property_name):
+    native_ser = native_pd.Series(data)
+    if data_type:
+        native_ser.astype(data_type)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: getattr(ser.dt, property_name),
+        expect_exception=True,
+        expect_exception_match="object has no attribute",
+    )
diff --git a/tests/unit/modin/test_series_dt.py b/tests/unit/modin/test_series_dt.py
@@ -36,10 +36,6 @@ def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler:
         (lambda s: s.dt.tz_convert(tz="UTC"), "tz_convert"),
         (lambda s: s.dt.strftime(date_format="YY/MM/DD"), "strftime"),
         (lambda s: s.dt.total_seconds(), "total_seconds"),
-        (lambda s: s.dt.seconds, "seconds"),
-        (lambda s: s.dt.days, "days"),
-        (lambda s: s.dt.microseconds, "microseconds"),
-        (lambda s: s.dt.nanoseconds, "nanoseconds"),
         (lambda s: s.dt.qyear, "qyear"),
         (lambda s: s.dt.start_time, "start_time"),
         (lambda s: s.dt.end_time, "end_time"),