diff --git a/CHANGELOG.md b/CHANGELOG.md index 9489afbb3d0..23eaea959ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Release History +## 1.18.0 (TBD) + +### Snowpark Local Testing Updates + +#### Bug Fixes + +- Fixed a bug that when processing time format, fractional second part is not handled properly. + ## 1.17.0 (2024-05-21) ### Snowpark Python API Updates diff --git a/src/snowflake/snowpark/mock/_functions.py b/src/snowflake/snowpark/mock/_functions.py index df0fcd6e511..251d827c19e 100644 --- a/src/snowflake/snowpark/mock/_functions.py +++ b/src/snowflake/snowpark/mock/_functions.py @@ -536,6 +536,7 @@ def mock_to_time( [x] For this timestamp, the function gets the number of seconds after the start of the Unix epoch. The function performs a modulo operation to get the remainder from dividing this number by the number of seconds in a day (86400): number_of_seconds % 86400 """ + import dateutil.parser def convert_int_string_to_time(d: str): return datetime.datetime.utcfromtimestamp( @@ -549,7 +550,7 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i seconds_part = data_parts[1] # find the idx that the seconds part ends idx = 0 - while seconds_part[idx].isdigit(): + while idx < len(seconds_part) and seconds_part[idx].isdigit(): idx += 1 # truncate to precision seconds_part = ( @@ -557,6 +558,10 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i ) _data = f"{data_parts[0]}.{seconds_part}" + # %f is optional if fractional seconds part doesn't show up in the input which means it is 0 nanoseconds + if len(data_parts) == 1 and ".%f" in _time_format: + _time_format = _time_format.replace(".%f", "") + target_datetime = datetime.datetime.strptime( process_string_time_with_fractional_seconds(_data, _fractional_seconds), _time_format, @@ -578,13 +583,15 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i time_fmt, fractional_seconds, ) = convert_snowflake_datetime_format(_fmt, default_format="%H:%M:%S") - + auto_detect = _fmt is None or str(_fmt).lower() == "auto" if isinstance(datatype, StringType): if data.isdigit(): res.append(convert_int_string_to_time(data)) else: res.append( - convert_string_to_time(data, time_fmt, fractional_seconds) + dateutil.parser.parse(data).time() + if auto_detect + else convert_string_to_time(data, time_fmt, fractional_seconds) ) elif isinstance(datatype, TimestampType): res.append(data.time()) @@ -593,9 +600,8 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i if data.isdigit(): res.append(convert_int_string_to_time(data)) else: - res.append( - convert_string_to_time(data, time_fmt, fractional_seconds) - ) + # variant type does not support format input + res.append(dateutil.parser.parse(data).time()) elif isinstance(data, datetime.time): res.append(data) else: @@ -915,10 +921,9 @@ def convert_char(row): return try_convert(convert_numeric_to_str, try_cast, data) elif isinstance(source_datatype, (DateType, TimeType)): default_format = _DEFAULT_OUTPUT_FORMAT.get(type(source_datatype)) - ( - format, - _, - ) = convert_snowflake_datetime_format(_fmt, default_format=default_format) + (format, _,) = convert_snowflake_datetime_format( + _fmt, default_format=default_format, is_input_format=False + ) convert_date_time_to_str = ( datetime.datetime.strftime if isinstance(source_datatype, DateType) @@ -929,10 +934,9 @@ def convert_char(row): ) elif isinstance(source_datatype, TimestampType): default_format = _DEFAULT_OUTPUT_FORMAT.get(TimestampType) - ( - format, - fractional_seconds, - ) = convert_snowflake_datetime_format(_fmt, default_format) + (format, fractional_seconds,) = convert_snowflake_datetime_format( + _fmt, default_format, is_input_format=False + ) # handle 3f, can use str index time_str = try_convert( lambda x: datetime.date.strftime(x, format), try_cast, data diff --git a/src/snowflake/snowpark/mock/_util.py b/src/snowflake/snowpark/mock/_util.py index dc434170e81..25e4fcce386 100644 --- a/src/snowflake/snowpark/mock/_util.py +++ b/src/snowflake/snowpark/mock/_util.py @@ -120,10 +120,19 @@ def array_custom_comparator(ascend: bool, null_first: bool, a: Any, b: Any): return ret if ascend else -1 * ret -def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int]: +def convert_snowflake_datetime_format( + format, default_format, is_input_format=True +) -> Tuple[str, int]: """ unified processing of the time format converting snowflake date/time/timestamp format into python datetime format + + usage notes on the returning fractional seconds: + fractional seconds does not come into effect when parsing input, see following sql + alter session set TIME_OUTPUT_FORMAT = 'HH:MI:SS.FF9'; + select to_time('11:22:44.333333', 'HH:MI:SS.FF1'); + it still returns '11:22:44.333333' not '11:22:44.3' + however fractional seconds is used in controlling the output format """ format_to_use = format or default_format @@ -155,7 +164,9 @@ def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int] # 'FF' is not in the fmt pass - return time_fmt, fractional_seconds + # in live connection, input does not appreciate fractional_seconds in the format, + # input always treated as nanoseconds if FF[1-9] is specified + return time_fmt, 9 if is_input_format else fractional_seconds def convert_numeric_string_value_to_float_seconds(time: str) -> float: @@ -189,8 +200,10 @@ def process_string_time_with_fractional_seconds(time: str, fractional_seconds) - idx = 0 while idx < len(seconds_part) and seconds_part[idx].isdigit(): idx += 1 - # truncate to precision - seconds_part = seconds_part[: min(idx, fractional_seconds)] + seconds_part[idx:] + # truncate to precision, python can only handle microsecond which is 6 digits + seconds_part = ( + seconds_part[: min(idx, fractional_seconds, 6)] + seconds_part[idx:] + ) ret = f"{time_parts[0]}.{seconds_part}" return ret diff --git a/tests/integ/scala/test_function_suite.py b/tests/integ/scala/test_function_suite.py index ce9244c7091..305de83a05b 100644 --- a/tests/integ/scala/test_function_suite.py +++ b/tests/integ/scala/test_function_suite.py @@ -1361,6 +1361,18 @@ def test_to_time(session, local_testing_mode): [ Row(time(1, 2, 3)), Row(time(22, 33, 44)), + Row(time(22, 33, 44, 123000)), + Row(time(22, 33, 44, 567890)), + ], + ) + + Utils.check_answer( + df.select(*[to_time(column, "HH24:MI:SS.FF4") for column in df.columns]), + [ + Row(time(1, 2, 3)), + Row(time(22, 33, 44)), + Row(time(22, 33, 44, 123000)), + Row(time(22, 33, 44, 567890)), ], ) @@ -1583,7 +1595,9 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode) to_timestamp_tz, [ Row( - datetime(2024, 2, 1, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), + datetime( + 2024, 2, 1, 0, 0, 0, 123456, tzinfo=pytz.timezone("Etc/GMT+8") + ), ), Row( datetime(2024, 2, 2, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), @@ -1596,7 +1610,7 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode) ( to_timestamp_ntz, [ - Row(datetime(2024, 2, 1, 0, 0)), + Row(datetime(2024, 2, 1, 0, 0, 0, 123456)), Row(datetime(2024, 2, 2, 0, 0)), Row(datetime(2024, 2, 3, 0, 0)), ], @@ -1605,7 +1619,9 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode) to_timestamp_ltz, [ Row( - datetime(2024, 2, 1, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), + datetime( + 2024, 2, 1, 0, 0, 0, 123456, tzinfo=pytz.timezone("Etc/GMT+8") + ), ), Row( datetime(2024, 2, 2, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), @@ -1626,7 +1642,7 @@ def test_to_timestamp_fmt_column(to_type, expected, session, local_testing_mode) ): LocalTimezone.set_local_timezone(pytz.timezone("Etc/GMT+8")) data = [ - ("2024-02-01 00:00:00.000000", "YYYY-MM-DD HH24:MI:SS.FF"), + ("2024-02-01 00:00:00.123456789", "YYYY-MM-DD HH24:MI:SS.FF1"), ("20240202000000000000", "YYYYMMDDHH24MISSFF"), ("03 Feb 2024 00:00:00", "DD mon YYYY HH24:MI:SS"), ] diff --git a/tests/integ/scala/test_table_suite.py b/tests/integ/scala/test_table_suite.py index a74bdac4622..42a84affd84 100644 --- a/tests/integ/scala/test_table_suite.py +++ b/tests/integ/scala/test_table_suite.py @@ -260,10 +260,6 @@ def test_table_with_semi_structured_types(session, semi_structured_table): ) -@pytest.mark.skipif( - "config.getoption('local_testing_mode', default=False)", - reason="SNOW-1374013: Local testing fails to parse time '09:15:29.999999'", -) def test_table_with_time_type(session, table_with_time): df = session.table(table_with_time) # snowflake time has accuracy to 0.99999999. Python has accuracy to 0.999999. diff --git a/tests/utils.py b/tests/utils.py index e298fcfd9dc..67139d366e7 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -861,7 +861,7 @@ def datetime_primitives2(cls, session: "Session") -> DataFrame: @classmethod def time_primitives1(cls, session: "Session") -> DataFrame: # simple string data - data = [("01:02:03",), ("22:33:44",)] + data = [("01:02:03",), ("22:33:44",), ("22:33:44.123",), ("22:33:44.56789",)] schema = StructType([StructField("a", StringType())]) return session.create_dataframe(data, schema)