From fe51d4d167760bb0a858de0e404e9d7a644fb3a9 Mon Sep 17 00:00:00 2001 From: Hazem Elmeleegy Date: Wed, 11 Sep 2024 15:37:40 -0700 Subject: [PATCH 1/4] SNOW-1646704, SNOW-1646706: Add support for Series.dt.tz_convert/tz_localize (#2261) 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. Fixes SNOW-1646704, SNOW-1646706 2. Fill out the following pre-review checklist: - [x] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. 3. Please describe how your code solves the related issue. Add support for Series.dt.tz_convert/tz_localize. --- CHANGELOG.md | 1 + docs/source/modin/series.rst | 2 + .../modin/supported/series_dt_supported.rst | 5 +- .../modin/plugin/_internal/timestamp_utils.py | 65 +++++++ .../compiler/snowflake_query_compiler.py | 27 ++- .../modin/plugin/docstrings/series_utils.py | 175 +++++++++++++++++- tests/integ/modin/series/test_dt_accessor.py | 116 ++++++++++++ tests/unit/modin/test_series_dt.py | 2 - 8 files changed, 381 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c59d53ada05..87056a63f51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -118,6 +118,7 @@ - Added support for string indexing with `Timedelta` objects. - Added support for `Series.dt.total_seconds` method. - Added support for `DataFrame.apply(axis=0)`. +- Added support for `Series.dt.tz_convert` and `Series.dt.tz_localize`. #### Improvements diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst index 188bdab344a..4cb8a238b0f 100644 --- a/docs/source/modin/series.rst +++ b/docs/source/modin/series.rst @@ -279,6 +279,8 @@ Series Series.dt.seconds Series.dt.microseconds Series.dt.nanoseconds + Series.dt.tz_convert + Series.dt.tz_localize .. rubric:: String accessor methods diff --git a/docs/source/modin/supported/series_dt_supported.rst b/docs/source/modin/supported/series_dt_supported.rst index 3377a3d64e2..68853871ea6 100644 --- a/docs/source/modin/supported/series_dt_supported.rst +++ b/docs/source/modin/supported/series_dt_supported.rst @@ -80,9 +80,10 @@ the method in the left column. +-----------------------------+---------------------------------+----------------------------------------------------+ | ``to_pydatetime`` | N | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``tz_localize`` | N | | +| ``tz_localize`` | P | ``N`` if `ambiguous` or `nonexistent` are set to a | +| | | non-default value. | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``tz_convert`` | N | | +| ``tz_convert`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``normalize`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py index 0242177d1f0..f8629e664f3 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py @@ -22,9 +22,17 @@ cast, convert_timezone, date_part, + dayofmonth, + hour, iff, + minute, + month, + second, + timestamp_tz_from_parts, to_decimal, + to_timestamp_ntz, trunc, + year, ) from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage @@ -467,3 +475,60 @@ def convert_dateoffset_to_interval( ) interval_kwargs[new_param] = offset return Interval(**interval_kwargs) + + +def tz_localize_column(column: Column, tz: Union[str, dt.tzinfo]) -> Column: + """ + Localize tz-naive to tz-aware. + Args: + tz : str, pytz.timezone, optional + Localize a tz-naive datetime column to tz-aware + + Args: + column: the Snowpark datetime column + tz: time zone for time. Corresponding timestamps would be converted to this time zone of the Datetime Array/Index. A tz of None will convert to UTC and remove the timezone information. + + Returns: + The column after tz localization + """ + if tz is None: + # If this column is already a TIMESTAMP_NTZ, this cast does nothing. + # If the column is a TIMESTAMP_TZ, the cast drops the timezone and converts + # to TIMESTAMP_NTZ. + return to_timestamp_ntz(column) + else: + if isinstance(tz, dt.tzinfo): + tz_name = tz.tzname(None) + else: + tz_name = tz + return timestamp_tz_from_parts( + year(column), + month(column), + dayofmonth(column), + hour(column), + minute(column), + second(column), + date_part("nanosecond", column), + pandas_lit(tz_name), + ) + + +def tz_convert_column(column: Column, tz: Union[str, dt.tzinfo]) -> Column: + """ + Converts a datetime column to the specified timezone + + Args: + column: the Snowpark datetime column + tz: the target timezone + + Returns: + The column after conversion to the specified timezone + """ + if tz is None: + return convert_timezone(pandas_lit("UTC"), column) + else: + if isinstance(tz, dt.tzinfo): + tz_name = tz.tzname(None) + else: + tz_name = tz + return convert_timezone(pandas_lit(tz_name), column) diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index b1a2736d120..00436f94eec 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -279,6 +279,8 @@ raise_if_to_datetime_not_supported, timedelta_freq_to_nanos, to_snowflake_timestamp_format, + tz_convert_column, + tz_localize_column, ) from snowflake.snowpark.modin.plugin._internal.transpose_utils import ( clean_up_transpose_result_index_and_labels, @@ -16666,7 +16668,7 @@ def dt_tz_localize( tz: Union[str, tzinfo], ambiguous: str = "raise", nonexistent: str = "raise", - ) -> None: + ) -> "SnowflakeQueryCompiler": """ Localize tz-naive to tz-aware. Args: @@ -16678,11 +16680,22 @@ def dt_tz_localize( BaseQueryCompiler New QueryCompiler containing values with localized time zone. """ - ErrorMessage.not_implemented( - "Snowpark pandas doesn't yet support the method 'Series.dt.tz_localize'" + if not isinstance(ambiguous, str) or ambiguous != "raise": + ErrorMessage.parameter_not_implemented_error( + "ambiguous", "Series.dt.tz_localize" + ) + if not isinstance(nonexistent, str) or nonexistent != "raise": + ErrorMessage.parameter_not_implemented_error( + "nonexistent", "Series.dt.tz_localize" + ) + + return SnowflakeQueryCompiler( + self._modin_frame.apply_snowpark_function_to_columns( + lambda column: tz_localize_column(column, tz) + ) ) - def dt_tz_convert(self, tz: Union[str, tzinfo]) -> None: + def dt_tz_convert(self, tz: Union[str, tzinfo]) -> "SnowflakeQueryCompiler": """ Convert time-series data to the specified time zone. @@ -16692,8 +16705,10 @@ def dt_tz_convert(self, tz: Union[str, tzinfo]) -> None: Returns: A new QueryCompiler containing values with converted time zone. """ - ErrorMessage.not_implemented( - "Snowpark pandas doesn't yet support the method 'Series.dt.tz_convert'" + return SnowflakeQueryCompiler( + self._modin_frame.apply_snowpark_function_to_columns( + lambda column: tz_convert_column(column, tz) + ) ) def dt_ceil( diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py index 88c4029a92c..b05d7d76db6 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py @@ -1858,10 +1858,181 @@ def to_pydatetime(): pass def tz_localize(): - pass + """ + Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. + + This method takes a time zone (tz) naive Datetime Array/Index object and makes this time zone aware. It does not move the time to another time zone. + + This method can also be used to do the inverse – to create a time zone unaware object from an aware object. To that end, pass tz=None. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None + Time zone to convert timestamps to. Passing None will remove the time zone information preserving local time. + ambiguous : ‘infer’, ‘NaT’, bool array, default ‘raise’ + When clocks moved backward due to DST, ambiguous times may arise. For example in Central European Time (UTC+01), when going from 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the ambiguous parameter dictates how ambiguous times should be handled. + - ‘infer’ will attempt to infer fall dst-transition hours based on order + - bool-ndarray where True signifies a DST time, False signifies a non-DST time (note that this flag is only applicable for ambiguous times) + - ‘NaT’ will return NaT where there are ambiguous times + - ‘raise’ will raise an AmbiguousTimeError if there are ambiguous times. + nonexistent : ‘shift_forward’, ‘shift_backward, ‘NaT’, timedelta, default ‘raise’ + A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. + - ‘shift_forward’ will shift the nonexistent time forward to the closest existing time + - ‘shift_backward’ will shift the nonexistent time backward to the closest existing time + - ‘NaT’ will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - ‘raise’ will raise an NonExistentTimeError if there are nonexistent times. + + Returns + ------- + Same type as self + Array/Index converted to the specified time zone. + + Raises + ------ + TypeError + If the Datetime Array/Index is tz-aware and tz is not None. + + See also + -------- + DatetimeIndex.tz_convert + Convert tz-aware DatetimeIndex from one time zone to another. + + Examples + -------- + >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + + Localize DatetimeIndex in US/Eastern time zone: + + >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') # doctest: +SKIP + >>> tz_aware # doctest: +SKIP + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + + With the tz=None, we can remove the time zone information while keeping the local time (not converted to UTC): + + >>> tz_aware.tz_localize(None) # doctest: +SKIP + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + + Be careful with DST changes. When there is sequential data, pandas can infer the DST time: + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') # doctest: +SKIP + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[ns, CET] + + In some cases, inferring the DST is impossible. In such cases, you can pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) # doctest: +SKIP + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[ns, CET] + + If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or ‘shift_forward’ or ‘shift_backwards’. + + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') # doctest: +SKIP + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') # doctest: +SKIP + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) # doctest: +SKIP + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + """ def tz_convert(): - pass + """ + Convert tz-aware Datetime Array/Index from one time zone to another. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None + Time zone for time. Corresponding timestamps would be converted to this time zone of the Datetime Array/Index. A tz of None will convert to UTC and remove the timezone information. + + Returns + ------- + Array or Index + + Raises + ------ + TypeError + If Datetime Array/Index is tz-naive. + + See also + DatetimeIndex.tz + A timezone that has a variable offset from UTC. + DatetimeIndex.tz_localize + Localize tz-naive DatetimeIndex to a given time zone, or remove timezone from a tz-aware DatetimeIndex. + + Examples + -------- + With the tz parameter, we can change the DatetimeIndex to other time zones: + + >>> dti = pd.date_range(start='2014-08-01 09:00', + ... freq='h', periods=3, tz='Europe/Berlin') # doctest: +SKIP + + >>> dti # doctest: +SKIP + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='h') + + >>> dti.tz_convert('US/Central') # doctest: +SKIP + DatetimeIndex(['2014-08-01 02:00:00-05:00', + '2014-08-01 03:00:00-05:00', + '2014-08-01 04:00:00-05:00'], + dtype='datetime64[ns, US/Central]', freq='h') + + With the tz=None, we can remove the timezone (after converting to UTC if necessary): + + >>> dti = pd.date_range(start='2014-08-01 09:00', freq='h', + ... periods=3, tz='Europe/Berlin') # doctest: +SKIP + + >>> dti # doctest: +SKIP + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='h') + + >>> dti.tz_convert(None) # doctest: +SKIP + DatetimeIndex(['2014-08-01 07:00:00', + '2014-08-01 08:00:00', + '2014-08-01 09:00:00'], + dtype='datetime64[ns]', freq='h') + """ + # TODO (SNOW-1660843): Support tz in pd.date_range and unskip the doctests. def normalize(): pass diff --git a/tests/integ/modin/series/test_dt_accessor.py b/tests/integ/modin/series/test_dt_accessor.py index 0e1cacf8fc0..d1795fa2c80 100644 --- a/tests/integ/modin/series/test_dt_accessor.py +++ b/tests/integ/modin/series/test_dt_accessor.py @@ -5,8 +5,10 @@ import datetime import modin.pandas as pd +import numpy as np import pandas as native_pd import pytest +import pytz import snowflake.snowpark.modin.plugin # noqa: F401 from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker @@ -39,6 +41,47 @@ ) +timezones = pytest.mark.parametrize( + "tz", + [ + None, + # Use a subset of pytz.common_timezones containing a few timezones in each + *[ + param_for_one_tz + for tz in [ + "Africa/Abidjan", + "Africa/Timbuktu", + "America/Adak", + "America/Yellowknife", + "Antarctica/Casey", + "Asia/Dhaka", + "Asia/Manila", + "Asia/Shanghai", + "Atlantic/Stanley", + "Australia/Sydney", + "Canada/Pacific", + "Europe/Chisinau", + "Europe/Luxembourg", + "Indian/Christmas", + "Pacific/Chatham", + "Pacific/Wake", + "US/Arizona", + "US/Central", + "US/Eastern", + "US/Hawaii", + "US/Mountain", + "US/Pacific", + "UTC", + ] + for param_for_one_tz in ( + pytz.timezone(tz), + tz, + ) + ], + ], +) + + @pytest.fixture def day_of_week_or_year_data() -> native_pd.Series: return native_pd.Series( @@ -174,6 +217,79 @@ def test_normalize(): ) +@sql_count_checker(query_count=1) +@timezones +def test_tz_convert(tz): + datetime_index = native_pd.DatetimeIndex( + [ + "2014-04-04 23:56:01.000000001", + "2014-07-18 21:24:02.000000002", + "2015-11-22 22:14:03.000000003", + "2015-11-23 20:12:04.1234567890", + pd.NaT, + ], + tz="US/Eastern", + ) + native_ser = native_pd.Series(datetime_index) + snow_ser = pd.Series(native_ser) + eval_snowpark_pandas_result( + snow_ser, + native_ser, + lambda s: s.dt.tz_convert(tz), + ) + + +@sql_count_checker(query_count=1) +@timezones +def test_tz_localize(tz): + datetime_index = native_pd.DatetimeIndex( + [ + "2014-04-04 23:56:01.000000001", + "2014-07-18 21:24:02.000000002", + "2015-11-22 22:14:03.000000003", + "2015-11-23 20:12:04.1234567890", + pd.NaT, + ], + ) + native_ser = native_pd.Series(datetime_index) + snow_ser = pd.Series(native_ser) + eval_snowpark_pandas_result( + snow_ser, + native_ser, + lambda s: s.dt.tz_localize(tz), + ) + + +@pytest.mark.parametrize( + "ambiguous, nonexistent", + [ + ("infer", "raise"), + ("NaT", "raise"), + (np.array([True, True, False]), "raise"), + ("raise", "shift_forward"), + ("raise", "shift_backward"), + ("raise", "NaT"), + ("raise", pd.Timedelta("1h")), + ("infer", "shift_forward"), + ], +) +@sql_count_checker(query_count=0) +def test_tz_localize_negative(ambiguous, nonexistent): + datetime_index = native_pd.DatetimeIndex( + [ + "2014-04-04 23:56:01.000000001", + "2014-07-18 21:24:02.000000002", + "2015-11-22 22:14:03.000000003", + "2015-11-23 20:12:04.1234567890", + pd.NaT, + ], + ) + native_ser = native_pd.Series(datetime_index) + snow_ser = pd.Series(native_ser) + with pytest.raises(NotImplementedError): + snow_ser.dt.tz_localize(tz=None, ambiguous=ambiguous, nonexistent=nonexistent) + + @pytest.mark.parametrize("name", [None, "hello"]) def test_isocalendar(name): with SqlCounter(query_count=1): diff --git a/tests/unit/modin/test_series_dt.py b/tests/unit/modin/test_series_dt.py index be0039683a8..0b5572f0592 100644 --- a/tests/unit/modin/test_series_dt.py +++ b/tests/unit/modin/test_series_dt.py @@ -32,8 +32,6 @@ def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler: [ (lambda s: s.dt.timetz, "timetz"), (lambda s: s.dt.to_period(), "to_period"), - (lambda s: s.dt.tz_localize(tz="UTC"), "tz_localize"), - (lambda s: s.dt.tz_convert(tz="UTC"), "tz_convert"), (lambda s: s.dt.strftime(date_format="YY/MM/DD"), "strftime"), (lambda s: s.dt.qyear, "qyear"), (lambda s: s.dt.start_time, "start_time"), From 349037478b0c4988d8e7e888d98c4f4dd5e6f4e8 Mon Sep 17 00:00:00 2001 From: Afroz Alam Date: Wed, 11 Sep 2024 17:12:24 -0700 Subject: [PATCH 2/4] SNOW-1418543: Fix Flaky test (#2275) --- tests/mock/test_multithreading.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/mock/test_multithreading.py b/tests/mock/test_multithreading.py index bae771e8e77..5e0078212d6 100644 --- a/tests/mock/test_multithreading.py +++ b/tests/mock/test_multithreading.py @@ -158,7 +158,7 @@ def test_tabular_entity_registry(test_table): num_threads = 10 def write_read_and_drop_table(): - table_name = "test_table" + table_name = Utils.random_table_name() table_emulator = TableEmulator() entity_registry.write_table(table_name, table_emulator, SaveMode.OVERWRITE) @@ -170,7 +170,7 @@ def write_read_and_drop_table(): entity_registry.drop_table(table_name) def write_read_and_drop_view(): - view_name = "test_view" + view_name = Utils.random_view_name() empty_logical_plan = LogicalPlan() plan = MockExecutionPlan(empty_logical_plan, None) From 700412c97fa0a1f0cb2acd49f39ec6c204ea92ef Mon Sep 17 00:00:00 2001 From: Hazem Elmeleegy Date: Wed, 11 Sep 2024 19:50:42 -0700 Subject: [PATCH 3/4] SNOW-1660880: Properly raise NotImplementedError when ambiguous/nonexistent are non-string in ceil/floor/round (#2276) 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. Fixes SNOW-1660880 2. Fill out the following pre-review checklist: - [x] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. 3. Please describe how your code solves the related issue. Properly raise NotImplementedError when ambiguous/nonexistent are non-string in ceil/floor/round. --- CHANGELOG.md | 1 + .../plugin/compiler/snowflake_query_compiler.py | 14 ++++++++------ .../modin/index/test_datetime_index_methods.py | 6 ++++++ tests/integ/modin/series/test_dt_accessor.py | 5 +++++ 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87056a63f51..b015147d472 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -131,6 +131,7 @@ - Raise a warning whenever MultiIndex values are pulled in locally. - Improve warning message for `pd.read_snowflake` include the creation reason when temp table creation is triggered. - Improve performance for `DataFrame.set_index`, or setting `DataFrame.index` or `Series.index` by avoiding checks require eager evaluation. As a consequence, when the new index that does not match the current `Series`/`DataFrame` object length, a `ValueError` is no longer raised. Instead, when the `Series`/`DataFrame` object is longer than the provided index, the `Series`/`DataFrame`'s new index is filled with `NaN` values for the "extra" elements. Otherwise, the extra values in the provided index are ignored. +- Properly raise `NotImplementedError` when ambiguous/nonexistent are non-string in `ceil`/`floor`/`round`. #### Bug Fixes diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 00436f94eec..b5022bff46b 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -16751,9 +16751,9 @@ def dt_ceil( "column must be datetime or timedelta" ) # pragma: no cover - if ambiguous != "raise": + if not isinstance(ambiguous, str) or ambiguous != "raise": ErrorMessage.parameter_not_implemented_error("ambiguous", method_name) - if nonexistent != "raise": + if not isinstance(nonexistent, str) or nonexistent != "raise": ErrorMessage.parameter_not_implemented_error("nonexistent", method_name) if is_datetime64_any_dtype(dtype): @@ -16831,9 +16831,10 @@ def dt_round( raise AssertionError( "column must be datetime or timedelta" ) # pragma: no cover - if ambiguous != "raise": + + if not isinstance(ambiguous, str) or ambiguous != "raise": ErrorMessage.parameter_not_implemented_error("ambiguous", method_name) - if nonexistent != "raise": + if not isinstance(nonexistent, str) or nonexistent != "raise": ErrorMessage.parameter_not_implemented_error("nonexistent", method_name) if is_datetime64_any_dtype(dtype): @@ -16989,9 +16990,10 @@ def dt_floor( raise AssertionError( "column must be datetime or timedelta" ) # pragma: no cover - if ambiguous != "raise": + + if not isinstance(ambiguous, str) or ambiguous != "raise": ErrorMessage.parameter_not_implemented_error("ambiguous", method_name) - if nonexistent != "raise": + if not isinstance(nonexistent, str) or nonexistent != "raise": ErrorMessage.parameter_not_implemented_error("nonexistent", method_name) if is_datetime64_any_dtype(dtype): diff --git a/tests/integ/modin/index/test_datetime_index_methods.py b/tests/integ/modin/index/test_datetime_index_methods.py index 56fd40a6cb3..143e1d74080 100644 --- a/tests/integ/modin/index/test_datetime_index_methods.py +++ b/tests/integ/modin/index/test_datetime_index_methods.py @@ -4,6 +4,7 @@ import re import modin.pandas as pd +import numpy as np import pandas as native_pd import pytest @@ -268,7 +269,12 @@ def test_floor_ceil_round(datetime_index_value, func, freq): [ ("1w", "raise", "raise"), ("1h", "infer", "raise"), + ("1h", "NaT", "raise"), + ("1h", np.array([True, True, False]), "raise"), ("1h", "raise", "shift_forward"), + ("1h", "raise", "shift_backward"), + ("1h", "raise", "NaT"), + ("1h", "raise", pd.Timedelta("1h")), ("1w", "infer", "shift_forward"), ], ) diff --git a/tests/integ/modin/series/test_dt_accessor.py b/tests/integ/modin/series/test_dt_accessor.py index d1795fa2c80..34e2953cb4b 100644 --- a/tests/integ/modin/series/test_dt_accessor.py +++ b/tests/integ/modin/series/test_dt_accessor.py @@ -183,7 +183,12 @@ def test_floor_ceil_round(datetime_index_value, func, freq): [ ("1w", "raise", "raise"), ("1h", "infer", "raise"), + ("1h", "NaT", "raise"), + ("1h", np.array([True, True, False]), "raise"), ("1h", "raise", "shift_forward"), + ("1h", "raise", "shift_backward"), + ("1h", "raise", "NaT"), + ("1h", "raise", pd.Timedelta("1h")), ("1w", "infer", "shift_forward"), ], ) From d1c2cdf7fc2c5000d7f8ac37ce809b31dfdb97a1 Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Thu, 12 Sep 2024 10:10:30 -0700 Subject: [PATCH 4/4] [CHERRY-PICK][Release v1.22.1] Cherry pick changes made for release v1.22.1 (#2280) cherry pick pr https://github.com/snowflakedb/snowpark-python/pull/2277 --- CHANGELOG.md | 5 +++++ recipe/meta.yaml | 2 +- src/snowflake/snowpark/version.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b015147d472..e0589d4a358 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ - Added support for `TimedeltaIndex.mean` method. + +## 1.22.1 (2024-09-11) +This is a re-release of 1.22.0. Please refer to the 1.22.0 release notes for detailed release content. + + ## 1.22.0 (2024-09-10) ### Snowpark Python API Updates diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 9aed0375d0c..9560f4a4408 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "snowflake-snowpark-python" %} -{% set version = "1.22.0" %} +{% set version = "1.22.1" %} package: name: {{ name|lower }} diff --git a/src/snowflake/snowpark/version.py b/src/snowflake/snowpark/version.py index 4b7ad25b189..798a3d902d0 100644 --- a/src/snowflake/snowpark/version.py +++ b/src/snowflake/snowpark/version.py @@ -4,4 +4,4 @@ # # Update this for the versions -VERSION = (1, 22, 0) +VERSION = (1, 22, 1)