From 0c4dd8d49000500afda723c606ae427f73baef35 Mon Sep 17 00:00:00 2001 From: Jonathan Shi Date: Fri, 14 Jun 2024 17:40:41 -0700 Subject: [PATCH] respond to reviews --- .../modin/plugin/compiler/snowflake_query_compiler.py | 3 --- .../snowpark/modin/plugin/docstrings/series_utils.py | 4 ++-- tests/integ/modin/strings/test_strings.py | 8 ++++---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 9b763269497..ac02c5f6578 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -13409,7 +13409,6 @@ def str_translate(self, table: dict) -> "SnowflakeQueryCompiler": # Because TRANSLATE only supports 1-to-1 character mappings, any entries with multi-character # values must be handled by REPLACE instead. 1-character keys are always invalid. single_char_pairs = {} - multi_char_pairs = {} none_keys = set() for key, value in table.items(): # Treat integers as unicode codepoints @@ -13430,8 +13429,6 @@ def str_translate(self, table: dict) -> "SnowflakeQueryCompiler": ) if value is None or len(value) == 0: none_keys.add(key) - elif len(value) > 1: - multi_char_pairs[key] = value else: single_char_pairs[key] = value source_alphabet = "".join(single_char_pairs.keys()) + "".join(none_keys) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py index 45e9226c796..df7663cfca1 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py @@ -1011,8 +1011,8 @@ def translate(): 0 AAA dtype: object - Furthermore, due to restrictions in the underlying SQL, Snowpark pandas requires all string - values must to be one unicode codepoint in length. To create replacements of multiple + Furthermore, due to restrictions in the underlying SQL, Snowpark pandas currently requires + all string values to be one unicode codepoint in length. To create replacements of multiple characters, chain calls to `Series.str.replace` as needed. Vanilla pandas code: diff --git a/tests/integ/modin/strings/test_strings.py b/tests/integ/modin/strings/test_strings.py index d8ffb9907f3..60fac375ca8 100644 --- a/tests/integ/modin/strings/test_strings.py +++ b/tests/integ/modin/strings/test_strings.py @@ -631,12 +631,12 @@ def test_get_with_dict_label(key, expected_result): # Outputs: # - "head shaking vertically" = 1F642 + 200D + 2195 + FE0F # - "mending heart" = 2764 + FE0F + 200D + 1FA79 - # - "health worker" = 1F91D1 + 200D + 2605 + FE0F + # - "health worker" = 1F91D1 + 200D + 2695 + FE0F ["πŸ™‚β€β†”οΈ", "❀️‍πŸ”₯", "πŸ§‘β€βš–οΈ"], { 0x2194: 0x2195, 0x1F525: 0x1FA79, - 0x2696: 0x2605, + 0x2696: 0x2695, }, ), ], @@ -675,8 +675,8 @@ def test_translate_without_maketrans(): {"πŸ˜Άβ€πŸŒ«οΈ": "a"}, # This emoji key is secretly 4 code points {"aa": "a"}, # Key is 2 chars # Mapping 1 char to multiple is valid in vanilla pandas, but we don't support this - {"a": "πŸ˜Άβ€πŸŒ«οΈ"}, # This emoji value is secretly 4 code points - {"a": "aa"}, # Value is 2 chars + {ord("a"): "πŸ˜Άβ€πŸŒ«οΈ"}, # This emoji value is secretly 4 code points + {ord("a"): "aa"}, # Value is 2 chars ], ) @sql_count_checker(query_count=0)