Skip to content

Commit

Permalink
Merge branch 'main' into rdurrani-SNOW-1013917
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-rdurrani committed May 22, 2024
2 parents 0400ab1 + 28cc324 commit bf214f5
Show file tree
Hide file tree
Showing 11 changed files with 122 additions and 431 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/changedoc_snowpark_pandas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
pull_request:
types: [opened, synchronize, labeled, unlabeled]
branches:
- pandas-main
- main
paths:
- 'src/snowflake/snowpark/modin/**'

Expand Down
22 changes: 0 additions & 22 deletions .github/workflows/changelog_snowpark_pandas.yml

This file was deleted.

58 changes: 58 additions & 0 deletions .github/workflows/precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,64 @@ jobs:
.tox/.coverage
.tox/coverage.xml
combine-coverage:
if: ${{ success() || failure() }}
name: Combine coverage
needs: test
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/download-artifact@v4
with:
path: artifacts
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Upgrade setuptools and pip
run: python -m pip install -U setuptools pip
- name: Install tox
run: python -m pip install tox
- name: Collect all coverages to one dir
run: |
python -c '
from pathlib import Path
import shutil
src_dir = Path("artifacts")
dst_dir = Path(".") / ".tox"
dst_dir.mkdir()
for src_file in src_dir.glob("*/.coverage"):
dst_file = dst_dir / ".coverage.{}".format(src_file.parent.name[9:])
print("{} copy to {}".format(src_file, dst_file))
shutil.copy(str(src_file), str(dst_file))'
- name: Combine coverages
run: python -m tox -e coverage
- name: Publish html coverage
uses: actions/upload-artifact@v4
with:
name: overall_cov_html
path: .tox/htmlcov
- name: Publish xml coverage
uses: actions/upload-artifact@v4
with:
name: overall_cov_xml
path: .tox/coverage.xml
- uses: codecov/codecov-action@v1
with:
file: .tox/coverage.xml
- name: Show coverage diff
run: |
pip install diff_cover
git log -n 5 --pretty=oneline
git fetch -f origin main:main || true
diff-cover .tox/coverage.xml --compare-branch=main --fail-under=95
doc:
needs: lint
name: Build Doc
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@

- Added partial support for `DataFrame.pivot_table` with no `index` parameter, as well as for `margins` parameter.

### Snowpark Local Testing Updates

#### Bug Fixes

- Fixed a bug that when processing time format, fractional second part is not handled properly.

## 1.17.0 (2024-05-21)

### Snowpark Python API Updates
Expand Down
32 changes: 18 additions & 14 deletions src/snowflake/snowpark/mock/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ def mock_to_time(
[x] For this timestamp, the function gets the number of seconds after the start of the Unix epoch. The function performs a modulo operation to get the remainder from dividing this number by the number of seconds in a day (86400): number_of_seconds % 86400
"""
import dateutil.parser

def convert_int_string_to_time(d: str):
return datetime.datetime.utcfromtimestamp(
Expand All @@ -549,14 +550,18 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i
seconds_part = data_parts[1]
# find the idx that the seconds part ends
idx = 0
while seconds_part[idx].isdigit():
while idx < len(seconds_part) and seconds_part[idx].isdigit():
idx += 1
# truncate to precision
seconds_part = (
seconds_part[: min(idx, _fractional_seconds)] + seconds_part[idx:]
)
_data = f"{data_parts[0]}.{seconds_part}"

# %f is optional if fractional seconds part doesn't show up in the input which means it is 0 nanoseconds
if len(data_parts) == 1 and ".%f" in _time_format:
_time_format = _time_format.replace(".%f", "")

target_datetime = datetime.datetime.strptime(
process_string_time_with_fractional_seconds(_data, _fractional_seconds),
_time_format,
Expand All @@ -578,13 +583,15 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i
time_fmt,
fractional_seconds,
) = convert_snowflake_datetime_format(_fmt, default_format="%H:%M:%S")

auto_detect = _fmt is None or str(_fmt).lower() == "auto"
if isinstance(datatype, StringType):
if data.isdigit():
res.append(convert_int_string_to_time(data))
else:
res.append(
convert_string_to_time(data, time_fmt, fractional_seconds)
dateutil.parser.parse(data).time()
if auto_detect
else convert_string_to_time(data, time_fmt, fractional_seconds)
)
elif isinstance(datatype, TimestampType):
res.append(data.time())
Expand All @@ -593,9 +600,8 @@ def convert_string_to_time(_data: str, _time_format: str, _fractional_seconds: i
if data.isdigit():
res.append(convert_int_string_to_time(data))
else:
res.append(
convert_string_to_time(data, time_fmt, fractional_seconds)
)
# variant type does not support format input
res.append(dateutil.parser.parse(data).time())
elif isinstance(data, datetime.time):
res.append(data)
else:
Expand Down Expand Up @@ -915,10 +921,9 @@ def convert_char(row):
return try_convert(convert_numeric_to_str, try_cast, data)
elif isinstance(source_datatype, (DateType, TimeType)):
default_format = _DEFAULT_OUTPUT_FORMAT.get(type(source_datatype))
(
format,
_,
) = convert_snowflake_datetime_format(_fmt, default_format=default_format)
(format, _,) = convert_snowflake_datetime_format(
_fmt, default_format=default_format, is_input_format=False
)
convert_date_time_to_str = (
datetime.datetime.strftime
if isinstance(source_datatype, DateType)
Expand All @@ -929,10 +934,9 @@ def convert_char(row):
)
elif isinstance(source_datatype, TimestampType):
default_format = _DEFAULT_OUTPUT_FORMAT.get(TimestampType)
(
format,
fractional_seconds,
) = convert_snowflake_datetime_format(_fmt, default_format)
(format, fractional_seconds,) = convert_snowflake_datetime_format(
_fmt, default_format, is_input_format=False
)
# handle 3f, can use str index
time_str = try_convert(
lambda x: datetime.date.strftime(x, format), try_cast, data
Expand Down
21 changes: 17 additions & 4 deletions src/snowflake/snowpark/mock/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,19 @@ def array_custom_comparator(ascend: bool, null_first: bool, a: Any, b: Any):
return ret if ascend else -1 * ret


def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int]:
def convert_snowflake_datetime_format(
format, default_format, is_input_format=True
) -> Tuple[str, int]:
"""
unified processing of the time format
converting snowflake date/time/timestamp format into python datetime format
usage notes on the returning fractional seconds:
fractional seconds does not come into effect when parsing input, see following sql
alter session set TIME_OUTPUT_FORMAT = 'HH:MI:SS.FF9';
select to_time('11:22:44.333333', 'HH:MI:SS.FF1');
it still returns '11:22:44.333333' not '11:22:44.3'
however fractional seconds is used in controlling the output format
"""

format_to_use = format or default_format
Expand Down Expand Up @@ -155,7 +164,9 @@ def convert_snowflake_datetime_format(format, default_format) -> Tuple[str, int]
# 'FF' is not in the fmt
pass

return time_fmt, fractional_seconds
# in live connection, input does not appreciate fractional_seconds in the format,
# input always treated as nanoseconds if FF[1-9] is specified
return time_fmt, 9 if is_input_format else fractional_seconds


def convert_numeric_string_value_to_float_seconds(time: str) -> float:
Expand Down Expand Up @@ -189,8 +200,10 @@ def process_string_time_with_fractional_seconds(time: str, fractional_seconds) -
idx = 0
while idx < len(seconds_part) and seconds_part[idx].isdigit():
idx += 1
# truncate to precision
seconds_part = seconds_part[: min(idx, fractional_seconds)] + seconds_part[idx:]
# truncate to precision, python can only handle microsecond which is 6 digits
seconds_part = (
seconds_part[: min(idx, fractional_seconds, 6)] + seconds_part[idx:]
)
ret = f"{time_parts[0]}.{seconds_part}"
return ret

Expand Down
Loading

0 comments on commit bf214f5

Please sign in to comment.