From 6a5c61c3a1c56f4e2ed2650ccde18ed4af42876d Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Wed, 15 May 2024 22:21:29 -0700 Subject: [PATCH 1/6] fix --- src/snowflake/snowpark/mock/_functions.py | 17 ++++++++++------ src/snowflake/snowpark/mock/_util.py | 11 ++++++++--- tests/integ/scala/test_function_suite.py | 24 +++++++++++++++++++---- tests/integ/scala/test_table_suite.py | 4 ---- tests/utils.py | 2 +- 5 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/snowflake/snowpark/mock/_functions.py b/src/snowflake/snowpark/mock/_functions.py index df0fcd6e511..338e6e388c9 100644 --- a/src/snowflake/snowpark/mock/_functions.py +++ b/src/snowflake/snowpark/mock/_functions.py @@ -536,6 +536,7 @@ def mock_to_time( [x] For this timestamp, the function gets the number of seconds after the start of the Unix epoch. The function performs a modulo operation to get the remainder from dividing this number by the number of seconds in a day (86400): number_of_seconds % 86400 """ + import dateutil.parser def convert_int_string_to_time(d: str): return datetime.datetime.utcfromtimestamp( @@ -549,7 +550,7 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i seconds_part = data_parts[1] # find the idx that the seconds part ends idx = 0 - while seconds_part[idx].isdigit(): + while idx < len(seconds_part) and seconds_part[idx].isdigit(): idx += 1 # truncate to precision seconds_part = ( @@ -557,6 +558,9 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i ) _data = f"{data_parts[0]}.{seconds_part}" + if len(data_parts) == 1 and ".%f" in _time_format: + _time_format = _time_format.replace(".%f", "") + target_datetime = datetime.datetime.strptime( process_string_time_with_fractional_seconds(_data, _fractional_seconds), _time_format, @@ -578,13 +582,15 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i time_fmt, fractional_seconds, ) = convert_snowflake_datetime_format(_fmt, default_format="%H:%M:%S") - + auto_detect = _fmt is None or str(_fmt).lower() == "auto" if isinstance(datatype, StringType): if data.isdigit(): res.append(convert_int_string_to_time(data)) else: res.append( - convert_string_to_time(data, time_fmt, fractional_seconds) + dateutil.parser.parse(data).time() + if auto_detect + else convert_string_to_time(data, time_fmt, fractional_seconds) ) elif isinstance(datatype, TimestampType): res.append(data.time()) @@ -593,9 +599,8 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i if data.isdigit(): res.append(convert_int_string_to_time(data)) else: - res.append( - convert_string_to_time(data, time_fmt, fractional_seconds) - ) + # variant type does not support format + res.append(dateutil.parser.parse(data).time()) elif isinstance(data, datetime.time): res.append(data) else: diff --git a/src/snowflake/snowpark/mock/_util.py b/src/snowflake/snowpark/mock/_util.py index dc434170e81..944ca385ecf 100644 --- a/src/snowflake/snowpark/mock/_util.py +++ b/src/snowflake/snowpark/mock/_util.py @@ -146,7 +146,10 @@ def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int] ff_index = str(time_fmt).index("FF") # handle precision string 'FF[0-9]' which could be like FF0, FF1, ..., FF9 if str(time_fmt[ff_index + 2 : ff_index + 3]).isdigit(): - fractional_seconds = int(time_fmt[ff_index + 2 : ff_index + 3]) + # fractional seconds does not come into effect when parsing input, see follow sql + # alter session set TIME_OUTPUT_FORMAT = 'HH:MI:SS.FF9'; + # select to_time('11:22:44.333333', 'HH:MI:SS.FF1'); + # it still returns '11:22:44.333333' not '11:22:44.3' # replace FF[0-9] with %f time_fmt = time_fmt[:ff_index] + "%f" + time_fmt[ff_index + 3 :] else: @@ -189,8 +192,10 @@ def process_string_time_with_fractional_seconds(time: str, fractional_seconds) - idx = 0 while idx < len(seconds_part) and seconds_part[idx].isdigit(): idx += 1 - # truncate to precision - seconds_part = seconds_part[: min(idx, fractional_seconds)] + seconds_part[idx:] + # truncate to precision, python can only handle microsecond which is 6 digits + seconds_part = ( + seconds_part[: min(idx, fractional_seconds, 6)] + seconds_part[idx:] + ) ret = f"{time_parts[0]}.{seconds_part}" return ret diff --git a/tests/integ/scala/test_function_suite.py b/tests/integ/scala/test_function_suite.py index 6c4404b4606..bae3f1b79a1 100644 --- a/tests/integ/scala/test_function_suite.py +++ b/tests/integ/scala/test_function_suite.py @@ -1361,6 +1361,18 @@ def test_to_time(session, local_testing_mode): [ Row(time(1, 2, 3)), Row(time(22, 33, 44)), + Row(time(22, 33, 44, 123000)), + Row(time(22, 33, 44, 567890)), + ], + ) + + Utils.check_answer( + df.select(*[to_time(column, "HH24:MI:SS.FF4") for column in df.columns]), + [ + Row(time(1, 2, 3)), + Row(time(22, 33, 44)), + Row(time(22, 33, 44, 123000)), + Row(time(22, 33, 44, 567890)), ], ) @@ -1583,7 +1595,9 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode) to_timestamp_tz, [ Row( - datetime(2024, 2, 1, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), + datetime( + 2024, 2, 1, 0, 0, 0, 123456, tzinfo=pytz.timezone("Etc/GMT+8") + ), ), Row( datetime(2024, 2, 2, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), @@ -1596,7 +1610,7 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode) ( to_timestamp_ntz, [ - Row(datetime(2024, 2, 1, 0, 0)), + Row(datetime(2024, 2, 1, 0, 0, 0, 123456)), Row(datetime(2024, 2, 2, 0, 0)), Row(datetime(2024, 2, 3, 0, 0)), ], @@ -1605,7 +1619,9 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode) to_timestamp_ltz, [ Row( - datetime(2024, 2, 1, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), + datetime( + 2024, 2, 1, 0, 0, 0, 123456, tzinfo=pytz.timezone("Etc/GMT+8") + ), ), Row( datetime(2024, 2, 2, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")), @@ -1626,7 +1642,7 @@ def test_to_timestamp_fmt_column(to_type, expected, session, local_testing_mode) ): LocalTimezone.set_local_timezone(pytz.timezone("Etc/GMT+8")) data = [ - ("2024-02-01 00:00:00.000000", "YYYY-MM-DD HH24:MI:SS.FF"), + ("2024-02-01 00:00:00.123456789", "YYYY-MM-DD HH24:MI:SS.FF1"), ("20240202000000000000", "YYYYMMDDHH24MISSFF"), ("03 Feb 2024 00:00:00", "DD mon YYYY HH24:MI:SS"), ] diff --git a/tests/integ/scala/test_table_suite.py b/tests/integ/scala/test_table_suite.py index a74bdac4622..42a84affd84 100644 --- a/tests/integ/scala/test_table_suite.py +++ b/tests/integ/scala/test_table_suite.py @@ -260,10 +260,6 @@ def test_table_with_semi_structured_types(session, semi_structured_table): ) -@pytest.mark.skipif( - "config.getoption('local_testing_mode', default=False)", - reason="SNOW-1374013: Local testing fails to parse time '09:15:29.999999'", -) def test_table_with_time_type(session, table_with_time): df = session.table(table_with_time) # snowflake time has accuracy to 0.99999999. Python has accuracy to 0.999999. diff --git a/tests/utils.py b/tests/utils.py index b6060d65aea..448c0a92427 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -860,7 +860,7 @@ def datetime_primitives2(cls, session: "Session") -> DataFrame: @classmethod def time_primitives1(cls, session: "Session") -> DataFrame: # simple string data - data = [("01:02:03",), ("22:33:44",)] + data = [("01:02:03",), ("22:33:44",), ("22:33:44.123",), ("22:33:44.56789",)] schema = StructType([StructField("a", StringType())]) return session.create_dataframe(data, schema) From ce0dec1142894d02b387362e672af650b99da4bc Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Wed, 15 May 2024 22:48:35 -0700 Subject: [PATCH 2/6] fix --- src/snowflake/snowpark/mock/_functions.py | 14 ++++++-------- src/snowflake/snowpark/mock/_util.py | 20 ++++++++++++++------ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/snowflake/snowpark/mock/_functions.py b/src/snowflake/snowpark/mock/_functions.py index 338e6e388c9..b3c61587e8d 100644 --- a/src/snowflake/snowpark/mock/_functions.py +++ b/src/snowflake/snowpark/mock/_functions.py @@ -920,10 +920,9 @@ def convert_char(row): return try_convert(convert_numeric_to_str, try_cast, data) elif isinstance(source_datatype, (DateType, TimeType)): default_format = _DEFAULT_OUTPUT_FORMAT.get(type(source_datatype)) - ( - format, - _, - ) = convert_snowflake_datetime_format(_fmt, default_format=default_format) + (format, _,) = convert_snowflake_datetime_format( + _fmt, default_format=default_format, is_input_format=False + ) convert_date_time_to_str = ( datetime.datetime.strftime if isinstance(source_datatype, DateType) @@ -934,10 +933,9 @@ def convert_char(row): ) elif isinstance(source_datatype, TimestampType): default_format = _DEFAULT_OUTPUT_FORMAT.get(TimestampType) - ( - format, - fractional_seconds, - ) = convert_snowflake_datetime_format(_fmt, default_format) + (format, fractional_seconds,) = convert_snowflake_datetime_format( + _fmt, default_format, is_input_format=False + ) # handle 3f, can use str index time_str = try_convert( lambda x: datetime.date.strftime(x, format), try_cast, data diff --git a/src/snowflake/snowpark/mock/_util.py b/src/snowflake/snowpark/mock/_util.py index 944ca385ecf..25e4fcce386 100644 --- a/src/snowflake/snowpark/mock/_util.py +++ b/src/snowflake/snowpark/mock/_util.py @@ -120,10 +120,19 @@ def array_custom_comparator(ascend: bool, null_first: bool, a: Any, b: Any): return ret if ascend else -1 * ret -def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int]: +def convert_snowflake_datetime_format( + format, default_format, is_input_format=True +) -> Tuple[str, int]: """ unified processing of the time format converting snowflake date/time/timestamp format into python datetime format + + usage notes on the returning fractional seconds: + fractional seconds does not come into effect when parsing input, see following sql + alter session set TIME_OUTPUT_FORMAT = 'HH:MI:SS.FF9'; + select to_time('11:22:44.333333', 'HH:MI:SS.FF1'); + it still returns '11:22:44.333333' not '11:22:44.3' + however fractional seconds is used in controlling the output format """ format_to_use = format or default_format @@ -146,10 +155,7 @@ def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int] ff_index = str(time_fmt).index("FF") # handle precision string 'FF[0-9]' which could be like FF0, FF1, ..., FF9 if str(time_fmt[ff_index + 2 : ff_index + 3]).isdigit(): - # fractional seconds does not come into effect when parsing input, see follow sql - # alter session set TIME_OUTPUT_FORMAT = 'HH:MI:SS.FF9'; - # select to_time('11:22:44.333333', 'HH:MI:SS.FF1'); - # it still returns '11:22:44.333333' not '11:22:44.3' + fractional_seconds = int(time_fmt[ff_index + 2 : ff_index + 3]) # replace FF[0-9] with %f time_fmt = time_fmt[:ff_index] + "%f" + time_fmt[ff_index + 3 :] else: @@ -158,7 +164,9 @@ def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int] # 'FF' is not in the fmt pass - return time_fmt, fractional_seconds + # in live connection, input does not appreciate fractional_seconds in the format, + # input always treated as nanoseconds if FF[1-9] is specified + return time_fmt, 9 if is_input_format else fractional_seconds def convert_numeric_string_value_to_float_seconds(time: str) -> float: From f7d33dfc2b3ee65ab14cc914fcbbb49cae4c120b Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Thu, 16 May 2024 10:39:10 -0700 Subject: [PATCH 3/6] changelog --- CHANGELOG.md | 2 +- src/snowflake/snowpark/mock/_functions.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04f2be8cd63..ef5fd555b07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ - Fixed a bug that stage operation can not handle directories. - Fixed a bug that `DataFrame.to_pandas` should take Snowflake numeric types with precision 38 as `int64`. - Fixed a bug that stored proc and udf should not remove imports already in the sys.path during the clean-up step. -- Fixed a bug that when processing datetime format, fractional second part is not handled properly. +- Fixed a bug that when processing datetime and time format, fractional second part is not handled properly. - Fixed a bug that on Windows platform that file operations was unable to properly handle file separator in directory name. - Fixed a bug that on Windows platform that when reading a pandas dataframe, IntervalType column with integer data can not be processed. - Fixed a bug that function `substr` and `substring` can not handle 0-based `start_expr`. diff --git a/src/snowflake/snowpark/mock/_functions.py b/src/snowflake/snowpark/mock/_functions.py index b3c61587e8d..251d827c19e 100644 --- a/src/snowflake/snowpark/mock/_functions.py +++ b/src/snowflake/snowpark/mock/_functions.py @@ -558,6 +558,7 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i ) _data = f"{data_parts[0]}.{seconds_part}" + # %f is optional if fractional seconds part doesn't show up in the input which means it is 0 nanoseconds if len(data_parts) == 1 and ".%f" in _time_format: _time_format = _time_format.replace(".%f", "") @@ -599,7 +600,7 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i if data.isdigit(): res.append(convert_int_string_to_time(data)) else: - # variant type does not support format + # variant type does not support format input res.append(dateutil.parser.parse(data).time()) elif isinstance(data, datetime.time): res.append(data) From e746375f501d96e6840514ba182c9d97a5157422 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 21 May 2024 15:45:22 -0700 Subject: [PATCH 4/6] changelog update --- CHANGELOG.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9489afbb3d0..f37e4295b59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Release History +## 1.18.0 (TBD) + +### Snowpark Local Testing Updates + +#### Bug Fixes + +- Fixed a bug that when processing datetime and time format, fractional second part is not handled properly. + ## 1.17.0 (2024-05-21) ### Snowpark Python API Updates @@ -41,7 +49,6 @@ #### Bug Fixes - Fixed a bug that stored procedure and UDF should not remove imports already in the `sys.path` during the clean-up step. -- Fixed a bug that when processing datetime format, the fractional second part is not handled properly. - Fixed a bug that on Windows platform that file operations was unable to properly handle file separator in directory name. - Fixed a bug that on Windows platform that when reading a pandas dataframe, IntervalType column with integer data can not be processed. - Fixed a bug that prevented users from being able to select multiple columns with the same alias. From d971e3de288cd2e903ebbcdcb01547c69706cd8b Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 21 May 2024 15:46:32 -0700 Subject: [PATCH 5/6] minor --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f37e4295b59..9f2fdc82e15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ #### Bug Fixes -- Fixed a bug that when processing datetime and time format, fractional second part is not handled properly. +- Fixed a bug that when processing time format, fractional second part is not handled properly. ## 1.17.0 (2024-05-21) @@ -49,6 +49,7 @@ #### Bug Fixes - Fixed a bug that stored procedure and UDF should not remove imports already in the `sys.path` during the clean-up step. +- - Fixed a bug that when processing datetime format, the fractional second part is not handled properly. - Fixed a bug that on Windows platform that file operations was unable to properly handle file separator in directory name. - Fixed a bug that on Windows platform that when reading a pandas dataframe, IntervalType column with integer data can not be processed. - Fixed a bug that prevented users from being able to select multiple columns with the same alias. From 0fa6ce369cc5cd369157c5a60d05488bbcf3c310 Mon Sep 17 00:00:00 2001 From: Adam Ling Date: Tue, 21 May 2024 15:46:50 -0700 Subject: [PATCH 6/6] nit --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f2fdc82e15..23eaea959ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,7 +49,7 @@ #### Bug Fixes - Fixed a bug that stored procedure and UDF should not remove imports already in the `sys.path` during the clean-up step. -- - Fixed a bug that when processing datetime format, the fractional second part is not handled properly. +- Fixed a bug that when processing datetime format, the fractional second part is not handled properly. - Fixed a bug that on Windows platform that file operations was unable to properly handle file separator in directory name. - Fixed a bug that on Windows platform that when reading a pandas dataframe, IntervalType column with integer data can not be processed. - Fixed a bug that prevented users from being able to select multiple columns with the same alias.