Skip to content

Commit

Permalink
SNOW-1848977, SNOW-1848979: Add support for Series.str.ljust/rjust (#…
Browse files Browse the repository at this point in the history
…2726)

<!---
Please answer these questions before creating your pull request. Thanks!
--->

1. Which Jira issue is this PR addressing? Make sure that there is an
accompanying issue to your PR.

   <!---
   In this section, please add a Snowflake Jira issue number.

Note that if a corresponding GitHub issue exists, you should still
include
   the Snowflake Jira issue number. For example, for GitHub issue
#1400, you should
   add "SNOW-1335071" here.
    --->

   Fixes SNOW-1848977, SNOW-1848979

2. Fill out the following pre-review checklist:

- [x] I am adding a new automated test(s) to verify correctness of my
new code
- [ ] If this test skips Local Testing mode, I'm requesting review from
@snowflakedb/local-testing
   - [ ] I am adding new logging messages
   - [ ] I am adding a new telemetry message
   - [ ] I am adding new credentials
   - [ ] I am adding a new dependency
- [ ] If this is a new feature/behavior, I'm adding the Local Testing
parity changes.
- [ ] I acknowledge that I have ensured my changes to be thread-safe.
Follow the link for more information: [Thread-safe Developer
Guidelines](https://github.com/snowflakedb/snowpark-python/blob/main/CONTRIBUTING.md#thread-safe-development)

3. Please describe how your code solves the related issue.

   Add support for Series.str.ljust/rjust.
  • Loading branch information
sfc-gh-helmeleegy authored Dec 7, 2024
1 parent 0c74413 commit 91b49f4
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 20 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

## 1.27.0 (TBD)

### Snowpark pandas API Updates

#### New Features

- Added support for `Series.str.ljust` and `Series.str.rjust`.


## 1.26.0 (2024-12-05)

### Snowpark Python API Updates
Expand Down
4 changes: 2 additions & 2 deletions docs/source/modin/supported/series_str_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ the method in the left column.
| ``len`` | P | Only string and list data values are supported. |
| | | All column values must be of the same type. |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ljust`` | N | |
| ``ljust`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``lower`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand All @@ -106,7 +106,7 @@ the method in the left column.
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rindex`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rjust`` | N | |
| ``rjust`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``rpartition`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
least,
length,
lower,
lpad,
ltrim,
max as max_,
min as min_,
Expand All @@ -133,6 +134,7 @@
reverse,
round as snowpark_round,
row_number,
rpad,
rtrim,
second,
substring,
Expand Down Expand Up @@ -16450,11 +16452,74 @@ def str_len(self, **kwargs: Any) -> "SnowflakeQueryCompiler":
)
)

def str_ljust(self, width: int, fillchar: str = " ") -> None:
ErrorMessage.method_not_implemented_error("ljust", "Series.str")
def str_ljust(self, width: int, fillchar: str = " ") -> "SnowflakeQueryCompiler":
"""
Pad right side of strings in the Series/Index.

Equivalent to str.ljust().

Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled with fillchar.
fillchar : str
Additional character for filling, default is whitespace.

Returns
-------
SnowflakeQueryCompiler representing result of the string operation.
"""
if not isinstance(width, int):
raise TypeError(
f"width must be of integer type, not {type(width).__name__}"
)

def output_col(column: SnowparkColumn) -> SnowparkColumn:
new_col = rpad(
column,
greatest(length(column), pandas_lit(width)),
pandas_lit(fillchar),
)
return self._replace_non_str(column, new_col)

new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns(
output_col
)
return SnowflakeQueryCompiler(new_internal_frame)

def str_rjust(self, width: int, fillchar: str = " ") -> None:
ErrorMessage.method_not_implemented_error("rjust", "Series.str")
def str_rjust(self, width: int, fillchar: str = " ") -> "SnowflakeQueryCompiler":
"""
Pad left side of strings in the Series/Index.

Equivalent to str.rjust().

Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled with fillchar.
fillchar : str
Additional character for filling, default is whitespace.
Returns
-------
SnowflakeQueryCompiler representing result of the string operation.
"""
if not isinstance(width, int):
raise TypeError(
f"width must be of integer type, not {type(width).__name__}"
)

def output_col(column: SnowparkColumn) -> SnowparkColumn:
new_col = lpad(
column,
greatest(length(column), pandas_lit(width)),
pandas_lit(fillchar),
)
return self._replace_non_str(column, new_col)

new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns(
output_col
)
return SnowflakeQueryCompiler(new_internal_frame)

def str_normalize(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None:
ErrorMessage.method_not_implemented_error("normalize", "Series.str")
Expand Down
91 changes: 89 additions & 2 deletions src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,97 @@ def center():
pass

def ljust():
pass
"""
Pad right side of strings in the Series/Index.
Equivalent to str.ljust().
Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled with fillchar.
fillchar : str
Additional character for filling, default is whitespace.
Returns
-------
Series/Index of objects.
Examples
--------
For Series.str.center:
>>> ser = pd.Series(['dog', 'bird', 'mouse'])
>>> ser.str.center(8, fillchar='.') # doctest: +SKIP
0 ..dog...
1 ..bird..
2 .mouse..
dtype: object
For Series.str.ljust:
>>> ser = pd.Series(['dog', 'bird', 'mouse'])
>>> ser.str.ljust(8, fillchar='.')
0 dog.....
1 bird....
2 mouse...
dtype: object
For Series.str.rjust:
>>> ser = pd.Series(['dog', 'bird', 'mouse'])
>>> ser.str.rjust(8, fillchar='.')
0 .....dog
1 ....bird
2 ...mouse
dtype: object
"""

def rjust():
pass
"""
Pad left side of strings in the Series/Index.
Equivalent to str.rjust().
Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled with fillchar.
fillchar : str
Additional character for filling, default is whitespace.
Returns
-------
Series/Index of objects.
Examples
--------
For Series.str.center:
>>> ser = pd.Series(['dog', 'bird', 'mouse'])
>>> ser.str.center(8, fillchar='.') # doctest: +SKIP
0 ..dog...
1 ..bird..
2 .mouse..
dtype: object
For Series.str.ljust:
>>> ser = pd.Series(['dog', 'bird', 'mouse'])
>>> ser.str.ljust(8, fillchar='.')
0 dog.....
1 bird....
2 mouse...
dtype: object
For Series.str.rjust:
>>> ser = pd.Series(['dog', 'bird', 'mouse'])
>>> ser.str.rjust(8, fillchar='.')
0 .....dog
1 ....bird
2 ...mouse
dtype: object
"""

def zfill():
pass
Expand Down
32 changes: 32 additions & 0 deletions tests/integ/modin/series/test_str_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,38 @@ def test_str_no_params(func):
)


@pytest.mark.parametrize("func", ["ljust", "rjust"])
@pytest.mark.parametrize("width", [-1, 0, 1, 10, 100])
@pytest.mark.parametrize("fillchar", [" ", "#"])
@sql_count_checker(query_count=1)
def test_str_ljust_rjust(func, width, fillchar):
native_ser = native_pd.Series(TEST_DATA)
snow_ser = pd.Series(native_ser)
eval_snowpark_pandas_result(
snow_ser,
native_ser,
lambda ser: getattr(ser.str, func)(width=width, fillchar=fillchar),
)


@pytest.mark.parametrize("func", ["ljust", "rjust"])
@pytest.mark.parametrize(
"width, fillchar",
[
(None, " "),
(10, ""),
(10, "ab"),
(10, None),
],
)
@sql_count_checker(query_count=0)
def test_str_ljust_rjust_neg(func, width, fillchar):
native_ser = native_pd.Series(TEST_DATA)
snow_ser = pd.Series(native_ser)
with pytest.raises(TypeError):
getattr(snow_ser.str, func)(width=width, fillchar=fillchar)


@pytest.mark.parametrize(
"data",
[
Expand Down
12 changes: 0 additions & 12 deletions tests/unit/modin/test_series_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def test_str_cat_no_others(mock_str_register, mock_series):
(lambda s: s.str.join("_"), "join"),
(lambda s: s.str.pad(10), "pad"),
(lambda s: s.str.center(10), "center"),
(lambda s: s.str.ljust(8), "ljust"),
(lambda s: s.str.rjust(8), "rjust"),
(lambda s: s.str.zfill(8), "zfill"),
(lambda s: s.str.wrap(3), "wrap"),
(lambda s: s.str.slice_replace(start=3, stop=5, repl="abc"), "slice_replace"),
Expand Down Expand Up @@ -112,16 +110,6 @@ def test_str_methods_with_dataframe_return(func, func_name, mock_series):
TypeError,
"fillchar must be a character, not str",
),
(
lambda s: s.str.ljust(3, fillchar="abc"),
TypeError,
"fillchar must be a character, not str",
),
(
lambda s: s.str.rjust(3, fillchar="abc"),
TypeError,
"fillchar must be a character, not str",
),
(lambda s: s.str.wrap(-1), ValueError, r"invalid width -1 \(must be > 0\)"),
(
lambda s: s.str.count(12),
Expand Down

0 comments on commit 91b49f4

Please sign in to comment.