Skip to content

Commit

Permalink
SNOW-1374013: local testing fix dataframe time parse precision error (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-aling authored May 21, 2024
1 parent a2d33a0 commit 74aaf84
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 27 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Release History

## 1.18.0 (TBD)

### Snowpark Local Testing Updates

#### Bug Fixes

- Fixed a bug that when processing time format, fractional second part is not handled properly.

## 1.17.0 (2024-05-21)

### Snowpark Python API Updates
Expand Down
32 changes: 18 additions & 14 deletions src/snowflake/snowpark/mock/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ def mock_to_time(
[x] For this timestamp, the function gets the number of seconds after the start of the Unix epoch. The function performs a modulo operation to get the remainder from dividing this number by the number of seconds in a day (86400): number_of_seconds % 86400
"""
import dateutil.parser

def convert_int_string_to_time(d: str):
return datetime.datetime.utcfromtimestamp(
Expand All @@ -549,14 +550,18 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i
seconds_part = data_parts[1]
# find the idx that the seconds part ends
idx = 0
while seconds_part[idx].isdigit():
while idx < len(seconds_part) and seconds_part[idx].isdigit():
idx += 1
# truncate to precision
seconds_part = (
seconds_part[: min(idx, _fractional_seconds)] + seconds_part[idx:]
)
_data = f"{data_parts[0]}.{seconds_part}"

# %f is optional if fractional seconds part doesn't show up in the input which means it is 0 nanoseconds
if len(data_parts) == 1 and ".%f" in _time_format:
_time_format = _time_format.replace(".%f", "")

target_datetime = datetime.datetime.strptime(
process_string_time_with_fractional_seconds(_data, _fractional_seconds),
_time_format,
Expand All @@ -578,13 +583,15 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i
time_fmt,
fractional_seconds,
) = convert_snowflake_datetime_format(_fmt, default_format="%H:%M:%S")

auto_detect = _fmt is None or str(_fmt).lower() == "auto"
if isinstance(datatype, StringType):
if data.isdigit():
res.append(convert_int_string_to_time(data))
else:
res.append(
convert_string_to_time(data, time_fmt, fractional_seconds)
dateutil.parser.parse(data).time()
if auto_detect
else convert_string_to_time(data, time_fmt, fractional_seconds)
)
elif isinstance(datatype, TimestampType):
res.append(data.time())
Expand All @@ -593,9 +600,8 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i
if data.isdigit():
res.append(convert_int_string_to_time(data))
else:
res.append(
convert_string_to_time(data, time_fmt, fractional_seconds)
)
# variant type does not support format input
res.append(dateutil.parser.parse(data).time())
elif isinstance(data, datetime.time):
res.append(data)
else:
Expand Down Expand Up @@ -915,10 +921,9 @@ def convert_char(row):
return try_convert(convert_numeric_to_str, try_cast, data)
elif isinstance(source_datatype, (DateType, TimeType)):
default_format = _DEFAULT_OUTPUT_FORMAT.get(type(source_datatype))
(
format,
_,
) = convert_snowflake_datetime_format(_fmt, default_format=default_format)
(format, _,) = convert_snowflake_datetime_format(
_fmt, default_format=default_format, is_input_format=False
)
convert_date_time_to_str = (
datetime.datetime.strftime
if isinstance(source_datatype, DateType)
Expand All @@ -929,10 +934,9 @@ def convert_char(row):
)
elif isinstance(source_datatype, TimestampType):
default_format = _DEFAULT_OUTPUT_FORMAT.get(TimestampType)
(
format,
fractional_seconds,
) = convert_snowflake_datetime_format(_fmt, default_format)
(format, fractional_seconds,) = convert_snowflake_datetime_format(
_fmt, default_format, is_input_format=False
)
# handle 3f, can use str index
time_str = try_convert(
lambda x: datetime.date.strftime(x, format), try_cast, data
Expand Down
21 changes: 17 additions & 4 deletions src/snowflake/snowpark/mock/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,19 @@ def array_custom_comparator(ascend: bool, null_first: bool, a: Any, b: Any):
return ret if ascend else -1 * ret


def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int]:
def convert_snowflake_datetime_format(
format, default_format, is_input_format=True
) -> Tuple[str, int]:
"""
unified processing of the time format
converting snowflake date/time/timestamp format into python datetime format
usage notes on the returning fractional seconds:
fractional seconds does not come into effect when parsing input, see following sql
alter session set TIME_OUTPUT_FORMAT = 'HH:MI:SS.FF9';
select to_time('11:22:44.333333', 'HH:MI:SS.FF1');
it still returns '11:22:44.333333' not '11:22:44.3'
however fractional seconds is used in controlling the output format
"""

format_to_use = format or default_format
Expand Down Expand Up @@ -155,7 +164,9 @@ def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int]
# 'FF' is not in the fmt
pass

return time_fmt, fractional_seconds
# in live connection, input does not appreciate fractional_seconds in the format,
# input always treated as nanoseconds if FF[1-9] is specified
return time_fmt, 9 if is_input_format else fractional_seconds


def convert_numeric_string_value_to_float_seconds(time: str) -> float:
Expand Down Expand Up @@ -189,8 +200,10 @@ def process_string_time_with_fractional_seconds(time: str, fractional_seconds) -
idx = 0
while idx < len(seconds_part) and seconds_part[idx].isdigit():
idx += 1
# truncate to precision
seconds_part = seconds_part[: min(idx, fractional_seconds)] + seconds_part[idx:]
# truncate to precision, python can only handle microsecond which is 6 digits
seconds_part = (
seconds_part[: min(idx, fractional_seconds, 6)] + seconds_part[idx:]
)
ret = f"{time_parts[0]}.{seconds_part}"
return ret

Expand Down
24 changes: 20 additions & 4 deletions tests/integ/scala/test_function_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,18 @@ def test_to_time(session, local_testing_mode):
[
Row(time(1, 2, 3)),
Row(time(22, 33, 44)),
Row(time(22, 33, 44, 123000)),
Row(time(22, 33, 44, 567890)),
],
)

Utils.check_answer(
df.select(*[to_time(column, "HH24:MI:SS.FF4") for column in df.columns]),
[
Row(time(1, 2, 3)),
Row(time(22, 33, 44)),
Row(time(22, 33, 44, 123000)),
Row(time(22, 33, 44, 567890)),
],
)

Expand Down Expand Up @@ -1583,7 +1595,9 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode)
to_timestamp_tz,
[
Row(
datetime(2024, 2, 1, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")),
datetime(
2024, 2, 1, 0, 0, 0, 123456, tzinfo=pytz.timezone("Etc/GMT+8")
),
),
Row(
datetime(2024, 2, 2, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")),
Expand All @@ -1596,7 +1610,7 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode)
(
to_timestamp_ntz,
[
Row(datetime(2024, 2, 1, 0, 0)),
Row(datetime(2024, 2, 1, 0, 0, 0, 123456)),
Row(datetime(2024, 2, 2, 0, 0)),
Row(datetime(2024, 2, 3, 0, 0)),
],
Expand All @@ -1605,7 +1619,9 @@ def test_to_timestamp_fmt_string(to_type, expected, session, local_testing_mode)
to_timestamp_ltz,
[
Row(
datetime(2024, 2, 1, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")),
datetime(
2024, 2, 1, 0, 0, 0, 123456, tzinfo=pytz.timezone("Etc/GMT+8")
),
),
Row(
datetime(2024, 2, 2, 0, 0, tzinfo=pytz.timezone("Etc/GMT+8")),
Expand All @@ -1626,7 +1642,7 @@ def test_to_timestamp_fmt_column(to_type, expected, session, local_testing_mode)
):
LocalTimezone.set_local_timezone(pytz.timezone("Etc/GMT+8"))
data = [
("2024-02-01 00:00:00.000000", "YYYY-MM-DD HH24:MI:SS.FF"),
("2024-02-01 00:00:00.123456789", "YYYY-MM-DD HH24:MI:SS.FF1"),
("20240202000000000000", "YYYYMMDDHH24MISSFF"),
("03 Feb 2024 00:00:00", "DD mon YYYY HH24:MI:SS"),
]
Expand Down
4 changes: 0 additions & 4 deletions tests/integ/scala/test_table_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,6 @@ def test_table_with_semi_structured_types(session, semi_structured_table):
)


@pytest.mark.skipif(
"config.getoption('local_testing_mode', default=False)",
reason="SNOW-1374013: Local testing fails to parse time '09:15:29.999999'",
)
def test_table_with_time_type(session, table_with_time):
df = session.table(table_with_time)
# snowflake time has accuracy to 0.99999999. Python has accuracy to 0.999999.
Expand Down
2 changes: 1 addition & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ def datetime_primitives2(cls, session: "Session") -> DataFrame:
@classmethod
def time_primitives1(cls, session: "Session") -> DataFrame:
# simple string data
data = [("01:02:03",), ("22:33:44",)]
data = [("01:02:03",), ("22:33:44",), ("22:33:44.123",), ("22:33:44.56789",)]
schema = StructType([StructField("a", StringType())])
return session.create_dataframe(data, schema)

Expand Down

0 comments on commit 74aaf84

Please sign in to comment.