Skip to content

Commit

Permalink
respond to reviews
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-joshi committed Jun 17, 2024
1 parent 202ee28 commit 0c4dd8d
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13409,7 +13409,6 @@ def str_translate(self, table: dict) -> "SnowflakeQueryCompiler":
# Because TRANSLATE only supports 1-to-1 character mappings, any entries with multi-character
# values must be handled by REPLACE instead. 1-character keys are always invalid.
single_char_pairs = {}
multi_char_pairs = {}
none_keys = set()
for key, value in table.items():
# Treat integers as unicode codepoints
Expand All @@ -13430,8 +13429,6 @@ def str_translate(self, table: dict) -> "SnowflakeQueryCompiler":
)
if value is None or len(value) == 0:
none_keys.add(key)
elif len(value) > 1:
multi_char_pairs[key] = value
else:
single_char_pairs[key] = value
source_alphabet = "".join(single_char_pairs.keys()) + "".join(none_keys)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1011,8 +1011,8 @@ def translate():
0 AAA
dtype: object
Furthermore, due to restrictions in the underlying SQL, Snowpark pandas requires all string
values must to be one unicode codepoint in length. To create replacements of multiple
Furthermore, due to restrictions in the underlying SQL, Snowpark pandas currently requires
all string values to be one unicode codepoint in length. To create replacements of multiple
characters, chain calls to `Series.str.replace` as needed.
Vanilla pandas code:
Expand Down
8 changes: 4 additions & 4 deletions tests/integ/modin/strings/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,12 +631,12 @@ def test_get_with_dict_label(key, expected_result):
# Outputs:
# - "head shaking vertically" = 1F642 + 200D + 2195 + FE0F
# - "mending heart" = 2764 + FE0F + 200D + 1FA79
# - "health worker" = 1F91D1 + 200D + 2605 + FE0F
# - "health worker" = 1F91D1 + 200D + 2695 + FE0F
["πŸ™‚β€β†”οΈ", "❀️‍πŸ”₯", "πŸ§‘β€βš–οΈ"],
{
0x2194: 0x2195,
0x1F525: 0x1FA79,
0x2696: 0x2605,
0x2696: 0x2695,
},
),
],
Expand Down Expand Up @@ -675,8 +675,8 @@ def test_translate_without_maketrans():
{"πŸ˜Άβ€πŸŒ«οΈ": "a"}, # This emoji key is secretly 4 code points
{"aa": "a"}, # Key is 2 chars
# Mapping 1 char to multiple is valid in vanilla pandas, but we don't support this
{"a": "πŸ˜Άβ€πŸŒ«οΈ"}, # This emoji value is secretly 4 code points
{"a": "aa"}, # Value is 2 chars
{ord("a"): "πŸ˜Άβ€πŸŒ«οΈ"}, # This emoji value is secretly 4 code points
{ord("a"): "aa"}, # Value is 2 chars
],
)
@sql_count_checker(query_count=0)
Expand Down

0 comments on commit 0c4dd8d

Please sign in to comment.