Skip to content

Commit

Permalink
SNOW-1637945: Add support for TimedeltaIndex attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-nkumar committed Aug 29, 2024
1 parent 1cd0149 commit 3f9806c
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 32 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
- support for lazy `TimedeltaIndex`.
- support for `pd.to_timedelta`.
- support for `GroupBy` aggregations `min`, `max`, `mean`, `idxmax`, `idxmin`, `std`, `sum`, `median`, `count`, `any`, `all`, `size`, `nunique`.
- support for `TimedeltaIndex` attributes: `days`, `seconds`, `microseconds` and `nanoseconds`.
- Added support for index's arithmetic and comparison operators.
- Added support for `Series.dt.round`.
- Added documentation pages for `DatetimeIndex`.
Expand Down
8 changes: 4 additions & 4 deletions docs/source/modin/supported/timedelta_index_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ Attributes
+-----------------------------+---------------------------------+----------------------------------------------------+
| TimedeltaIndex attribute | Snowpark implemented? (Y/N/P/D) | Notes for current implementation |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``days`` | N | |
| ``days`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``seconds`` | N | |
| ``seconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``microseconds`` | N | |
| ``microseconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``nanoseconds`` | N | |
| ``nanoseconds`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``components`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
cast,
convert_timezone,
date_part,
floor,
iff,
to_decimal,
trunc,
)
from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
Expand Down Expand Up @@ -176,7 +176,7 @@ def col_to_timedelta(col: Column, unit: str) -> Column:
if not td_unit:
# Same error as native pandas.
raise ValueError(f"invalid unit abbreviation: {unit}")
return cast(floor(col * TIMEDELTA_UNIT_MULTIPLIER[td_unit]), LongType())
return trunc(col * TIMEDELTA_UNIT_MULTIPLIER[td_unit])


PANDAS_DATETIME_FORMAT_TO_SNOWFLAKE_MAPPING = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
to_variant,
translate,
trim,
trunc,
uniform,
upper,
when,
Expand Down Expand Up @@ -382,6 +383,12 @@

SUPPORTED_DT_FLOOR_CEIL_FREQS = ["day", "hour", "minute", "second"]

SECONDS_PER_DAY = 86400
NANOSECONDS_PER_SECOND = 10**9
NANOSECONDS_PER_MICROSECOND = 10**3
MICROSECONDS_PER_SECOND = 10**6
NANOSECONDS_PER_DAY = SECONDS_PER_DAY * NANOSECONDS_PER_SECOND


class SnowflakeQueryCompiler(BaseQueryCompiler):
"""based on: https://modin.readthedocs.io/en/0.11.0/flow/modin/backends/base/query_compiler.html
Expand Down Expand Up @@ -17498,3 +17505,45 @@ def tz_convert(self, *args: Any, **kwargs: Any) -> None:

def tz_localize(self, *args: Any, **kwargs: Any) -> None:
ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset")

def timedelta_property(
self, property_name: str, include_index: bool = False
) -> "SnowflakeQueryCompiler":
"""
Extract a specified component of from Timedelta.

Parameters
----------
property : {'days', 'seconds', 'microseconds', 'nanoseconds'}
The component to extract.
include_index: Whether to include the index columns in the operation.

Returns
-------
A new SnowflakeQueryCompiler with the extracted component.
"""
if not include_index:
assert (
len(self.columns) == 1
), "dt only works for series" # pragma: no cover

# mapping from the property name to the corresponding snowpark function
property_to_func_map = {
"days": lambda column: trunc(column / NANOSECONDS_PER_DAY),
"seconds": lambda column: trunc(column / NANOSECONDS_PER_SECOND)
% SECONDS_PER_DAY,
"microseconds": lambda column: trunc(column / NANOSECONDS_PER_MICROSECOND)
% MICROSECONDS_PER_SECOND,
"nanoseconds": lambda column: column % NANOSECONDS_PER_MICROSECOND,
}
func = property_to_func_map.get(property_name)
if not func:
class_prefix = (
"TimedeltaIndex" if include_index else "Series.dt"
) # pragma: no cover
raise ErrorMessage.not_implemented(
f"Snowpark pandas doesn't yet support the property '{class_prefix}.{property_name}'"
)
return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(func, include_index)
)
60 changes: 37 additions & 23 deletions src/snowflake/snowpark/modin/plugin/extensions/timedelta_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def __init__(
}
self._init_index(data, _CONSTRUCTOR_DEFAULTS, query_compiler, **kwargs)

@timedelta_index_not_implemented()
@property
def days(self) -> Index:
"""
Expand All @@ -142,15 +141,18 @@ def days(self) -> Index:
Examples
--------
>>> idx = pd.to_timedelta(["0 days", "10 days", "20 days"]) # doctest: +SKIP
>>> idx # doctest: +SKIP
TimedeltaIndex(['0 days', '10 days', '20 days'],
dtype='timedelta64[ns]', freq=None)
>>> idx.days # doctest: +SKIP
>>> idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
>>> idx
TimedeltaIndex(['0 days', '10 days', '20 days'], dtype='timedelta64[ns]', freq=None)
>>> idx.days
Index([0, 10, 20], dtype='int64')
"""
return Index(
query_compiler=self._query_compiler.timedelta_property(
"days", include_index=True
)
)

@timedelta_index_not_implemented()
@property
def seconds(self) -> Index:
"""
Expand All @@ -162,15 +164,18 @@ def seconds(self) -> Index:
Examples
--------
>>> idx = pd.to_timedelta([1, 2, 3], unit='s') # doctest: +SKIP
>>> idx # doctest: +SKIP
TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'],
dtype='timedelta64[ns]', freq=None)
>>> idx.seconds # doctest: +SKIP
Index([1, 2, 3], dtype='int32')
>>> idx = pd.to_timedelta([1, 2, 3], unit='s')
>>> idx
TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], dtype='timedelta64[ns]', freq=None)
>>> idx.seconds
Index([1, 2, 3], dtype='int64')
"""
return Index(
query_compiler=self._query_compiler.timedelta_property(
"seconds", include_index=True
)
)

@timedelta_index_not_implemented()
@property
def microseconds(self) -> Index:
"""
Expand All @@ -182,16 +187,20 @@ def microseconds(self) -> Index:
Examples
--------
>>> idx = pd.to_timedelta([1, 2, 3], unit='us') # doctest: +SKIP
>>> idx # doctest: +SKIP
>>> idx = pd.to_timedelta([1, 2, 3], unit='us')
>>> idx
TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002',
'0 days 00:00:00.000003'],
dtype='timedelta64[ns]', freq=None)
>>> idx.microseconds # doctest: +SKIP
Index([1, 2, 3], dtype='int32')
>>> idx.microseconds
Index([1, 2, 3], dtype='int64')
"""
return Index(
query_compiler=self._query_compiler.timedelta_property(
"microseconds", include_index=True
)
)

@timedelta_index_not_implemented()
@property
def nanoseconds(self) -> Index:
"""
Expand All @@ -203,14 +212,19 @@ def nanoseconds(self) -> Index:
Examples
--------
>>> idx = pd.to_timedelta([1, 2, 3], unit='ns') # doctest: +SKIP
>>> idx # doctest: +SKIP
>>> idx = pd.to_timedelta([1, 2, 3], unit='ns')
>>> idx
TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002',
'0 days 00:00:00.000000003'],
dtype='timedelta64[ns]', freq=None)
>>> idx.nanoseconds # doctest: +SKIP
Index([1, 2, 3], dtype='int32')
>>> idx.nanoseconds
Index([1, 2, 3], dtype='int64')
"""
return Index(
query_compiler=self._query_compiler.timedelta_property(
"nanoseconds", include_index=True
)
)

@timedelta_index_not_implemented()
@property
Expand Down
17 changes: 14 additions & 3 deletions tests/integ/modin/index/test_timedelta_index_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import snowflake.snowpark.modin.plugin # noqa: F401
from tests.integ.modin.sql_counter import sql_count_checker
from tests.integ.modin.utils import assert_index_equal


@sql_count_checker(query_count=3)
Expand Down Expand Up @@ -54,12 +55,22 @@ def test_non_default_args(kwargs):
pd.TimedeltaIndex(query_compiler=idx._query_compiler, **kwargs)


@pytest.mark.parametrize(
"property", ["days", "seconds", "microseconds", "nanoseconds", "inferred_freq"]
)
@pytest.mark.parametrize("property", ["components", "inferred_freq"])
@sql_count_checker(query_count=0)
def test_property_not_implemented(property):
snow_index = pd.TimedeltaIndex(["1 days", "2 days"])
msg = f"Snowpark pandas does not yet support the property TimedeltaIndex.{property}"
with pytest.raises(NotImplementedError, match=msg):
getattr(snow_index, property)


@pytest.mark.parametrize("attr", ["days", "seconds", "microseconds", "nanoseconds"])
@sql_count_checker(query_count=1)
def test_timedelta_index_properties(attr):
native_index = native_pd.TimedeltaIndex(
["1d", "1h", "60s", "1s", "800ms", "5us", "6ns", "1d 3s", "9m 15s 8us"]
)
snow_index = pd.Index(native_index)
assert_index_equal(
getattr(snow_index, attr), getattr(native_index, attr), exact=False
)

0 comments on commit 3f9806c

Please sign in to comment.