Skip to content

Commit

Permalink
Merge branch 'main' into vbudati/SNOW-1661142-fix-index-name-behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-vbudati authored Sep 12, 2024
2 parents 21dbccb + d1c2cdf commit 34f0e50
Show file tree
Hide file tree
Showing 12 changed files with 410 additions and 22 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
- Fixed a bug where an `Index` object created from a `Series`/`DataFrame` incorrectly updates the `Series`/`DataFrame`'s index name when it is not supposed to.


## 1.22.1 (2024-09-11)
This is a re-release of 1.22.0. Please refer to the 1.22.0 release notes for detailed release content.



## 1.22.0 (2024-09-10)

### Snowpark Python API Updates
Expand Down Expand Up @@ -123,6 +128,7 @@
- Added support for string indexing with `Timedelta` objects.
- Added support for `Series.dt.total_seconds` method.
- Added support for `DataFrame.apply(axis=0)`.
- Added support for `Series.dt.tz_convert` and `Series.dt.tz_localize`.

#### Improvements

Expand All @@ -135,6 +141,7 @@
- Raise a warning whenever MultiIndex values are pulled in locally.
- Improve warning message for `pd.read_snowflake` include the creation reason when temp table creation is triggered.
- Improve performance for `DataFrame.set_index`, or setting `DataFrame.index` or `Series.index` by avoiding checks require eager evaluation. As a consequence, when the new index that does not match the current `Series`/`DataFrame` object length, a `ValueError` is no longer raised. Instead, when the `Series`/`DataFrame` object is longer than the provided index, the `Series`/`DataFrame`'s new index is filled with `NaN` values for the "extra" elements. Otherwise, the extra values in the provided index are ignored.
- Properly raise `NotImplementedError` when ambiguous/nonexistent are non-string in `ceil`/`floor`/`round`.

#### Bug Fixes

Expand Down
2 changes: 2 additions & 0 deletions docs/source/modin/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ Series
Series.dt.seconds
Series.dt.microseconds
Series.dt.nanoseconds
Series.dt.tz_convert
Series.dt.tz_localize


.. rubric:: String accessor methods
Expand Down
5 changes: 3 additions & 2 deletions docs/source/modin/supported/series_dt_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,10 @@ the method in the left column.
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``to_pydatetime`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``tz_localize`` | N | |
| ``tz_localize`` | P | ``N`` if `ambiguous` or `nonexistent` are set to a |
| | | non-default value. |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``tz_convert`` | N | |
| ``tz_convert`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``normalize`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
2 changes: 1 addition & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "snowflake-snowpark-python" %}
{% set version = "1.22.0" %}
{% set version = "1.22.1" %}

package:
name: {{ name|lower }}
Expand Down
65 changes: 65 additions & 0 deletions src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,17 @@
cast,
convert_timezone,
date_part,
dayofmonth,
hour,
iff,
minute,
month,
second,
timestamp_tz_from_parts,
to_decimal,
to_timestamp_ntz,
trunc,
year,
)
from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
Expand Down Expand Up @@ -467,3 +475,60 @@ def convert_dateoffset_to_interval(
)
interval_kwargs[new_param] = offset
return Interval(**interval_kwargs)


def tz_localize_column(column: Column, tz: Union[str, dt.tzinfo]) -> Column:
"""
Localize tz-naive to tz-aware.
Args:
tz : str, pytz.timezone, optional
Localize a tz-naive datetime column to tz-aware
Args:
column: the Snowpark datetime column
tz: time zone for time. Corresponding timestamps would be converted to this time zone of the Datetime Array/Index. A tz of None will convert to UTC and remove the timezone information.
Returns:
The column after tz localization
"""
if tz is None:
# If this column is already a TIMESTAMP_NTZ, this cast does nothing.
# If the column is a TIMESTAMP_TZ, the cast drops the timezone and converts
# to TIMESTAMP_NTZ.
return to_timestamp_ntz(column)
else:
if isinstance(tz, dt.tzinfo):
tz_name = tz.tzname(None)
else:
tz_name = tz
return timestamp_tz_from_parts(
year(column),
month(column),
dayofmonth(column),
hour(column),
minute(column),
second(column),
date_part("nanosecond", column),
pandas_lit(tz_name),
)


def tz_convert_column(column: Column, tz: Union[str, dt.tzinfo]) -> Column:
"""
Converts a datetime column to the specified timezone
Args:
column: the Snowpark datetime column
tz: the target timezone
Returns:
The column after conversion to the specified timezone
"""
if tz is None:
return convert_timezone(pandas_lit("UTC"), column)
else:
if isinstance(tz, dt.tzinfo):
tz_name = tz.tzname(None)
else:
tz_name = tz
return convert_timezone(pandas_lit(tz_name), column)
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@
raise_if_to_datetime_not_supported,
timedelta_freq_to_nanos,
to_snowflake_timestamp_format,
tz_convert_column,
tz_localize_column,
)
from snowflake.snowpark.modin.plugin._internal.transpose_utils import (
clean_up_transpose_result_index_and_labels,
Expand Down Expand Up @@ -16666,7 +16668,7 @@ def dt_tz_localize(
tz: Union[str, tzinfo],
ambiguous: str = "raise",
nonexistent: str = "raise",
) -> None:
) -> "SnowflakeQueryCompiler":
"""
Localize tz-naive to tz-aware.
Args:
Expand All @@ -16678,11 +16680,22 @@ def dt_tz_localize(
BaseQueryCompiler
New QueryCompiler containing values with localized time zone.
"""
ErrorMessage.not_implemented(
"Snowpark pandas doesn't yet support the method 'Series.dt.tz_localize'"
if not isinstance(ambiguous, str) or ambiguous != "raise":
ErrorMessage.parameter_not_implemented_error(
"ambiguous", "Series.dt.tz_localize"
)
if not isinstance(nonexistent, str) or nonexistent != "raise":
ErrorMessage.parameter_not_implemented_error(
"nonexistent", "Series.dt.tz_localize"
)

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
lambda column: tz_localize_column(column, tz)
)
)

def dt_tz_convert(self, tz: Union[str, tzinfo]) -> None:
def dt_tz_convert(self, tz: Union[str, tzinfo]) -> "SnowflakeQueryCompiler":
"""
Convert time-series data to the specified time zone.

Expand All @@ -16692,8 +16705,10 @@ def dt_tz_convert(self, tz: Union[str, tzinfo]) -> None:
Returns:
A new QueryCompiler containing values with converted time zone.
"""
ErrorMessage.not_implemented(
"Snowpark pandas doesn't yet support the method 'Series.dt.tz_convert'"
return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
lambda column: tz_convert_column(column, tz)
)
)

def dt_ceil(
Expand Down Expand Up @@ -16736,9 +16751,9 @@ def dt_ceil(
"column must be datetime or timedelta"
) # pragma: no cover

if ambiguous != "raise":
if not isinstance(ambiguous, str) or ambiguous != "raise":
ErrorMessage.parameter_not_implemented_error("ambiguous", method_name)
if nonexistent != "raise":
if not isinstance(nonexistent, str) or nonexistent != "raise":
ErrorMessage.parameter_not_implemented_error("nonexistent", method_name)

if is_datetime64_any_dtype(dtype):
Expand Down Expand Up @@ -16816,9 +16831,10 @@ def dt_round(
raise AssertionError(
"column must be datetime or timedelta"
) # pragma: no cover
if ambiguous != "raise":

if not isinstance(ambiguous, str) or ambiguous != "raise":
ErrorMessage.parameter_not_implemented_error("ambiguous", method_name)
if nonexistent != "raise":
if not isinstance(nonexistent, str) or nonexistent != "raise":
ErrorMessage.parameter_not_implemented_error("nonexistent", method_name)

if is_datetime64_any_dtype(dtype):
Expand Down Expand Up @@ -16974,9 +16990,10 @@ def dt_floor(
raise AssertionError(
"column must be datetime or timedelta"
) # pragma: no cover
if ambiguous != "raise":

if not isinstance(ambiguous, str) or ambiguous != "raise":
ErrorMessage.parameter_not_implemented_error("ambiguous", method_name)
if nonexistent != "raise":
if not isinstance(nonexistent, str) or nonexistent != "raise":
ErrorMessage.parameter_not_implemented_error("nonexistent", method_name)

if is_datetime64_any_dtype(dtype):
Expand Down
Loading

0 comments on commit 34f0e50

Please sign in to comment.