Merge branch 'main' into vbudati/SNOW-1661142-fix-index-name-behavior

snowflakedb · Sep 12, 2024 · 34f0e50 · 34f0e50
2 parents 21dbccb + d1c2cdf
commit 34f0e50
Show file tree

Hide file tree

Showing 12 changed files with 410 additions and 22 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,11 @@
 - Fixed a bug where an `Index` object created from a `Series`/`DataFrame` incorrectly updates the `Series`/`DataFrame`'s index name when it is not supposed to.
 
 
+## 1.22.1 (2024-09-11)
+This is a re-release of 1.22.0. Please refer to the 1.22.0 release notes for detailed release content.
+
+
+
 ## 1.22.0 (2024-09-10)
 
 ### Snowpark Python API Updates
@@ -123,6 +128,7 @@
 - Added support for string indexing with `Timedelta` objects.
 - Added support for `Series.dt.total_seconds` method.
 - Added support for `DataFrame.apply(axis=0)`.
+- Added support for `Series.dt.tz_convert` and `Series.dt.tz_localize`.
 
 #### Improvements
 
@@ -135,6 +141,7 @@
 - Raise a warning whenever MultiIndex values are pulled in locally.
 - Improve warning message for `pd.read_snowflake` include the creation reason when temp table creation is triggered.
 - Improve performance for `DataFrame.set_index`, or setting `DataFrame.index` or `Series.index` by avoiding checks require eager evaluation. As a consequence, when the new index that does not match the current `Series`/`DataFrame` object length, a `ValueError` is no longer raised. Instead, when the `Series`/`DataFrame` object is longer than the provided index, the `Series`/`DataFrame`'s new index is filled with `NaN` values for the "extra" elements. Otherwise, the extra values in the provided index are ignored.
+- Properly raise `NotImplementedError` when ambiguous/nonexistent are non-string in `ceil`/`floor`/`round`.
 
 #### Bug Fixes
 

diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst
@@ -279,6 +279,8 @@ Series
     Series.dt.seconds
     Series.dt.microseconds
     Series.dt.nanoseconds
+    Series.dt.tz_convert
+    Series.dt.tz_localize
 
 
 .. rubric:: String accessor methods

diff --git a/docs/source/modin/supported/series_dt_supported.rst b/docs/source/modin/supported/series_dt_supported.rst
@@ -80,9 +80,10 @@ the method in the left column.
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``to_pydatetime``           | N                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``tz_localize``             | N                               |                                                    |
+| ``tz_localize``             | P                               | ``N`` if `ambiguous` or `nonexistent` are set to a |
+|                             |                                 | non-default value.                                 |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``tz_convert``              | N                               |                                                    |
+| ``tz_convert``              | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``normalize``               | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+

diff --git a/recipe/meta.yaml b/recipe/meta.yaml
@@ -1,5 +1,5 @@
 {% set name = "snowflake-snowpark-python" %}
-{% set version = "1.22.0" %}
+{% set version = "1.22.1" %}
 
 package:
   name: {{ name|lower }}

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py
@@ -22,9 +22,17 @@
     cast,
     convert_timezone,
     date_part,
+    dayofmonth,
+    hour,
     iff,
+    minute,
+    month,
+    second,
+    timestamp_tz_from_parts,
     to_decimal,
+    to_timestamp_ntz,
     trunc,
+    year,
 )
 from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
 from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
@@ -467,3 +475,60 @@ def convert_dateoffset_to_interval(
             )
         interval_kwargs[new_param] = offset
     return Interval(**interval_kwargs)
+
+
+def tz_localize_column(column: Column, tz: Union[str, dt.tzinfo]) -> Column:
+    """
+        Localize tz-naive to tz-aware.
+        Args:
+            tz : str, pytz.timezone, optional
+    Localize a tz-naive datetime column to tz-aware
+
+    Args:
+        column: the Snowpark datetime column
+        tz: time zone for time. Corresponding timestamps would be converted to this time zone of the Datetime Array/Index. A tz of None will convert to UTC and remove the timezone information.
+
+    Returns:
+        The column after tz localization
+    """
+    if tz is None:
+        # If this column is already a TIMESTAMP_NTZ, this cast does nothing.
+        # If the column is a TIMESTAMP_TZ, the cast drops the timezone and converts
+        # to TIMESTAMP_NTZ.
+        return to_timestamp_ntz(column)
+    else:
+        if isinstance(tz, dt.tzinfo):
+            tz_name = tz.tzname(None)
+        else:
+            tz_name = tz
+        return timestamp_tz_from_parts(
+            year(column),
+            month(column),
+            dayofmonth(column),
+            hour(column),
+            minute(column),
+            second(column),
+            date_part("nanosecond", column),
+            pandas_lit(tz_name),
+        )
+
+
+def tz_convert_column(column: Column, tz: Union[str, dt.tzinfo]) -> Column:
+    """
+    Converts a datetime column to the specified timezone
+
+    Args:
+        column: the Snowpark datetime column
+        tz: the target timezone
+
+    Returns:
+        The column after conversion to the specified timezone
+    """
+    if tz is None:
+        return convert_timezone(pandas_lit("UTC"), column)
+    else:
+        if isinstance(tz, dt.tzinfo):
+            tz_name = tz.tzname(None)
+        else:
+            tz_name = tz
+        return convert_timezone(pandas_lit(tz_name), column)
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -279,6 +279,8 @@
     raise_if_to_datetime_not_supported,
     timedelta_freq_to_nanos,
     to_snowflake_timestamp_format,
+    tz_convert_column,
+    tz_localize_column,
 )
 from snowflake.snowpark.modin.plugin._internal.transpose_utils import (
     clean_up_transpose_result_index_and_labels,
@@ -16666,7 +16668,7 @@ def dt_tz_localize(
         tz: Union[str, tzinfo],
         ambiguous: str = "raise",
         nonexistent: str = "raise",
-    ) -> None:
+    ) -> "SnowflakeQueryCompiler":
         """
         Localize tz-naive to tz-aware.
         Args:
@@ -16678,11 +16680,22 @@ def dt_tz_localize(
             BaseQueryCompiler
                 New QueryCompiler containing values with localized time zone.
         """
-        ErrorMessage.not_implemented(
-            "Snowpark pandas doesn't yet support the method 'Series.dt.tz_localize'"
+        if not isinstance(ambiguous, str) or ambiguous != "raise":
+            ErrorMessage.parameter_not_implemented_error(
+                "ambiguous", "Series.dt.tz_localize"
+            )
+        if not isinstance(nonexistent, str) or nonexistent != "raise":
+            ErrorMessage.parameter_not_implemented_error(
+                "nonexistent", "Series.dt.tz_localize"
+            )
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.apply_snowpark_function_to_columns(
+                lambda column: tz_localize_column(column, tz)
+            )
         )
 
-    def dt_tz_convert(self, tz: Union[str, tzinfo]) -> None:
+    def dt_tz_convert(self, tz: Union[str, tzinfo]) -> "SnowflakeQueryCompiler":
         """
         Convert time-series data to the specified time zone.
 
@@ -16692,8 +16705,10 @@ def dt_tz_convert(self, tz: Union[str, tzinfo]) -> None:
         Returns:
             A new QueryCompiler containing values with converted time zone.
         """
-        ErrorMessage.not_implemented(
-            "Snowpark pandas doesn't yet support the method 'Series.dt.tz_convert'"
+        return SnowflakeQueryCompiler(
+            self._modin_frame.apply_snowpark_function_to_columns(
+                lambda column: tz_convert_column(column, tz)
+            )
         )
 
     def dt_ceil(
@@ -16736,9 +16751,9 @@ def dt_ceil(
                 "column must be datetime or timedelta"
             )  # pragma: no cover
 
-        if ambiguous != "raise":
+        if not isinstance(ambiguous, str) or ambiguous != "raise":
             ErrorMessage.parameter_not_implemented_error("ambiguous", method_name)
-        if nonexistent != "raise":
+        if not isinstance(nonexistent, str) or nonexistent != "raise":
             ErrorMessage.parameter_not_implemented_error("nonexistent", method_name)
 
         if is_datetime64_any_dtype(dtype):
@@ -16816,9 +16831,10 @@ def dt_round(
             raise AssertionError(
                 "column must be datetime or timedelta"
             )  # pragma: no cover
-        if ambiguous != "raise":
+
+        if not isinstance(ambiguous, str) or ambiguous != "raise":
             ErrorMessage.parameter_not_implemented_error("ambiguous", method_name)
-        if nonexistent != "raise":
+        if not isinstance(nonexistent, str) or nonexistent != "raise":
             ErrorMessage.parameter_not_implemented_error("nonexistent", method_name)
 
         if is_datetime64_any_dtype(dtype):
@@ -16974,9 +16990,10 @@ def dt_floor(
             raise AssertionError(
                 "column must be datetime or timedelta"
             )  # pragma: no cover
-        if ambiguous != "raise":
+
+        if not isinstance(ambiguous, str) or ambiguous != "raise":
             ErrorMessage.parameter_not_implemented_error("ambiguous", method_name)
-        if nonexistent != "raise":
+        if not isinstance(nonexistent, str) or nonexistent != "raise":
             ErrorMessage.parameter_not_implemented_error("nonexistent", method_name)
 
         if is_datetime64_any_dtype(dtype):