Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-1636767, SNOW-1635405: Support timestamp +/- timedelta. #2153

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
- `NotImplementedError` will be raised for the rest of methods that do not support `Timedelta`.
- support for subtracting two timestamps to get a Timedelta.
- support indexing with Timedelta data columns.
- support for adding or subtracting timestamps and `Timedelta`.
- Added support for index's arithmetic and comparison operators.
- Added support for `Series.dt.round`.
- Added documentation pages for `DatetimeIndex`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from snowflake.snowpark.functions import (
col,
concat,
dateadd,
datediff,
floor,
iff,
Expand Down Expand Up @@ -277,7 +278,52 @@ def compute_binary_op_between_snowpark_columns(
# some operators and the data types have to be handled specially to align with pandas
# However, it is difficult to fail early if the arithmetic operator is not compatible
# with the data type, so we just let the server raise exception (e.g. a string minus a string).
if op == "truediv":
if (
op == "add"
and isinstance(second_datatype(), TimedeltaType)
and isinstance(first_datatype(), TimestampType)
):
binary_op_result_column = dateadd("ns", second_operand, first_operand)
elif (
op == "add"
and isinstance(first_datatype(), TimedeltaType)
and isinstance(second_datatype(), TimestampType)
):
binary_op_result_column = dateadd("ns", first_operand, second_operand)
elif op == "add" and (
(
isinstance(first_datatype(), TimedeltaType)
and isinstance(second_datatype(), NullType)
)
or (
isinstance(second_datatype(), TimedeltaType)
and isinstance(first_datatype(), NullType)
)
):
return SnowparkPandasColumn(pandas_lit(None), TimedeltaType())
elif (
op == "sub"
and isinstance(second_datatype(), TimedeltaType)
and isinstance(first_datatype(), TimestampType)
):
binary_op_result_column = dateadd("ns", -1 * second_operand, first_operand)
elif (
op == "sub"
and isinstance(first_datatype(), TimedeltaType)
and isinstance(second_datatype(), TimestampType)
):
# Timedelta - Timestamp doesn't make sense. Raise the same error
# message as pandas.
raise TypeError("bad operand type for unary -: 'DatetimeArray'")
elif isinstance(first_datatype(), TimedeltaType) or isinstance(
second_datatype(), TimedeltaType
):
# We don't support these cases yet.
# TODO(SNOW-1637101, SNOW-1637102): Support these cases.
ErrorMessage.not_implemented(
f"Snowpark pandas does not yet support the binary operation {op} with timedelta types."
)
elif op == "truediv":
binary_op_result_column = first_operand / second_operand
elif op == "floordiv":
binary_op_result_column = floor(first_operand / second_operand)
Expand Down
12 changes: 6 additions & 6 deletions src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,12 @@ def to_snowflake(
"""
map a pandas or numpy type to snowpark data type.
"""
snowpark_pandas_type = (
SnowparkPandasType.get_snowpark_pandas_type_for_pandas_type(p)
)
if snowpark_pandas_type is not None:
return snowpark_pandas_type

if isinstance(p, DatetimeTZDtype):
return TimestampType(TimestampTimeZone.TZ)
if p is native_pd.Timestamp or is_datetime64_any_dtype(p):
Expand All @@ -246,12 +252,6 @@ def to_snowflake(
if is_float_dtype(p):
return DoubleType()

snowpark_pandas_type = (
SnowparkPandasType.get_snowpark_pandas_type_for_pandas_type(p)
)
if snowpark_pandas_type is not None:
return snowpark_pandas_type

try:
return PANDAS_TO_SNOWFLAKE_MAP[p]
except KeyError:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1907,8 +1907,6 @@ def _binary_op_list_like_rhs_axis_0(
"""
from snowflake.snowpark.modin.pandas.series import Series

self._raise_not_implemented_error_for_timedelta()

# Step 1: Convert other to a Series and join on the row position with self.
other_qc = Series(other)._query_compiler
self_frame = self._modin_frame.ensure_row_position_column()
Expand Down Expand Up @@ -2059,8 +2057,6 @@ def binary_op(
from snowflake.snowpark.modin.pandas.series import Series
from snowflake.snowpark.modin.pandas.utils import is_scalar

self._raise_not_implemented_error_for_timedelta()

# fail explicitly for unsupported scenarios
if level is not None:
# TODO SNOW-862668: binary operations with level
Expand Down Expand Up @@ -13758,8 +13754,6 @@ def _binary_op_between_dataframe_and_series_along_axis_0(
Returns:
SnowflakeQueryCompiler representing result of binary op operation.
"""
self._raise_not_implemented_error_for_timedelta()

assert (
other.is_series_like()
), "other must be a Snowflake Query Compiler representing a Series"
Expand Down Expand Up @@ -14331,7 +14325,12 @@ def infer_sorted_column_labels(
data_column_snowflake_quoted_identifiers=expanded_data_column_snowflake_quoted_identifiers,
index_column_pandas_labels=index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
data_column_types=None,
data_column_types=[
frame.snowflake_quoted_identifier_to_snowpark_pandas_type.get(
identifier
)
for identifier in expanded_data_column_snowflake_quoted_identifiers
],
index_column_types=None,
)

Expand Down
Loading
Loading