diff --git a/integration_tests/test_str_concat_in_seq_literals.py b/integration_tests/test_str_concat_in_seq_literals.py new file mode 100644 index 00000000..bc2f2233 --- /dev/null +++ b/integration_tests/test_str_concat_in_seq_literals.py @@ -0,0 +1,37 @@ +from core_codemods.str_concat_in_seq_literal import StrConcatInSeqLiteral +from integration_tests.base_test import ( + BaseIntegrationTest, + original_and_expected_from_code_path, +) + + +class TestStrConcatInSeqLiteral(BaseIntegrationTest): + codemod = StrConcatInSeqLiteral + code_path = "tests/samples/str_concat_in_sequence_literals.py" + original_code, expected_new_code = original_and_expected_from_code_path( + code_path, + [ + (1, """ "ab",\n"""), + (4, """ "gh",\n"""), + ], + ) + + # fmt: off + expected_diff =( + """--- \n""" + """+++ \n""" + """@@ -1,7 +1,7 @@\n""" + """ bad = [\n""" + """- "ab"\n""" + """+ "ab",\n""" + """ "cd",\n""" + """ "ef",\n""" + """- "gh"\n""" + """+ "gh",\n""" + """ "ij",\n""" + """ ]\n""") + # fmt: on + + expected_line_change = "1" + change_description = StrConcatInSeqLiteral.change_description + num_changes = 2 diff --git a/src/codemodder/scripts/generate_docs.py b/src/codemodder/scripts/generate_docs.py index 20e3e9c8..e2256edd 100644 --- a/src/codemodder/scripts/generate_docs.py +++ b/src/codemodder/scripts/generate_docs.py @@ -226,6 +226,10 @@ class DocMetadata: importance="Medium", guidance_explained="We believe this change is safe and will not cause any issues.", ), + "str-concat-in-sequence-literals": DocMetadata( + importance="Medium", + guidance_explained="While string concatenation inside a sequence iterable is likely a mistake, there are instances when you may choose to use them..", + ), } METADATA = CORE_METADATA | { diff --git a/src/core_codemods/__init__.py b/src/core_codemods/__init__.py index 2da21231..09f56835 100644 --- a/src/core_codemods/__init__.py +++ b/src/core_codemods/__init__.py @@ -60,6 +60,7 @@ from .sonar.sonar_flask_json_response_type import SonarFlaskJsonResponseType from .sonar.sonar_django_json_response_type import SonarDjangoJsonResponseType from .lazy_logging import LazyLogging +from .str_concat_in_seq_literal import StrConcatInSeqLiteral registry = CodemodCollection( origin="pixee", @@ -116,6 +117,7 @@ RemoveAssertionInPytestRaises, FixAssertTuple, LazyLogging, + StrConcatInSeqLiteral, ], ) diff --git a/src/core_codemods/docs/pixee_python_str-concat-in-sequence-literals.md b/src/core_codemods/docs/pixee_python_str-concat-in-sequence-literals.md new file mode 100644 index 00000000..ddc6cf9b --- /dev/null +++ b/src/core_codemods/docs/pixee_python_str-concat-in-sequence-literals.md @@ -0,0 +1,14 @@ +This codemod fixes cases of implicit string concatenation inside lists, sets, or tuples. This is most likely a mistake: you probably meant include a comma in between the concatenated strings. + +Our changes look something like this: +```diff +bad = [ +- "ab" ++ "ab", + "cd", + "ef", +- "gh" ++ "gh", + "ij", +] +``` diff --git a/src/core_codemods/str_concat_in_seq_literal.py b/src/core_codemods/str_concat_in_seq_literal.py new file mode 100644 index 00000000..36b859fc --- /dev/null +++ b/src/core_codemods/str_concat_in_seq_literal.py @@ -0,0 +1,76 @@ +import libcst as cst +from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod +from codemodder.codemods.utils_mixin import NameResolutionMixin, AncestorPatternsMixin + + +class StrConcatInSeqLiteral(SimpleCodemod, NameResolutionMixin, AncestorPatternsMixin): + metadata = Metadata( + name="str-concat-in-sequence-literals", + summary="Convert Implicit String Concat Inside Sequence into Individual Elements", + review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, + references=[], + ) + change_description = "Convert implicit string concat into individual elements." + + def leave_List(self, original_node: cst.List, updated_node: cst.List) -> cst.List: + return self.process_node_elements(original_node, updated_node) + + def leave_Tuple( + self, original_node: cst.Tuple, updated_node: cst.Tuple + ) -> cst.Tuple: + return self.process_node_elements(original_node, updated_node) + + def leave_Set(self, original_node: cst.Set, updated_node: cst.Set) -> cst.Set: + return self.process_node_elements(original_node, updated_node) + + def process_node_elements( + self, original_node: cst.CSTNode, updated_node: cst.CSTNode + ) -> cst.CSTNode: + if not self.filter_by_path_includes_or_excludes( + self.node_position(original_node) + ): + return updated_node + return updated_node.with_changes(elements=self._process_elements(original_node)) + + def _process_elements(self, original_node: cst.List) -> list[cst.Element]: + new_elements = [] + prev_comma = None + for element in original_node.elements: + match element.value: + case cst.ConcatenatedString(): + self.report_change(original_node) + flattened_parts = self._flatten_concatenated_strings(element.value) + for part in flattened_parts: + # the very last element should only have a comma if the last element + # of the original list had a comma + if ( + element == original_node.elements[-1] + and part == flattened_parts[-1] + ): + new_elements.append( + cst.Element(value=part, comma=element.comma) + ) + else: + new_elements.append( + cst.Element( + value=part, comma=prev_comma or element.comma + ) + ) + case _: + prev_comma = element.comma + new_elements.append(element) + return new_elements + + def _flatten_concatenated_strings( + self, concat_node: cst.ConcatenatedString, parts=None + ): + if parts is None: + parts = [] + + for node in concat_node.left, concat_node.right: + match node: + case cst.ConcatenatedString(): + self._flatten_concatenated_strings(node, parts) + case _: + parts.append(node) + return parts diff --git a/tests/codemods/test_str_concat_in_seq_literal.py b/tests/codemods/test_str_concat_in_seq_literal.py new file mode 100644 index 00000000..a45e2269 --- /dev/null +++ b/tests/codemods/test_str_concat_in_seq_literal.py @@ -0,0 +1,363 @@ +import pytest +from core_codemods.str_concat_in_seq_literal import StrConcatInSeqLiteral +from tests.codemods.base_codemod_test import BaseCodemodTest + + +class TestList(BaseCodemodTest): + codemod = StrConcatInSeqLiteral + + def test_no_change(self, tmpdir): + input_code = """ + good = ["ab", "cd"] + good = [f"ab", "cd"] + var = "ab" + good = [f"{var}", "cd"] + """ + self.run_and_assert(tmpdir, input_code, input_code) + + @pytest.mark.parametrize( + "input_code,expected_output", + [ + ( + """ + bad = ["ab" "cd", "ef", "gh"] + """, + """ + bad = ["ab", "cd", "ef", "gh"] + """, + ), + ( + """ + bad = [ + "ab" + "cd", + "ef", + "gh"] + """, + """ + bad = [ + "ab", + "cd", + "ef", + "gh"] + """, + ), + ( + """ + bad = [ + "ab" + "cd", + "ef", + "gh" + ] + """, + """ + bad = [ + "ab", + "cd", + "ef", + "gh" + ] + """, + ), + ], + ) + def test_change(self, tmpdir, input_code, expected_output): + self.run_and_assert(tmpdir, input_code, expected_output) + + @pytest.mark.parametrize("final_comma", ["", ","]) + def test_change_multiple(self, tmpdir, final_comma): + input_code = f""" + bad = [ + "ab" + "cd", + "ef", + "gh" + "ij"{final_comma} + ] + """ + expected_output = f""" + bad = [ + "ab", + "cd", + "ef", + "gh", + "ij"{final_comma} + ] + """ + self.run_and_assert(tmpdir, input_code, expected_output, num_changes=2) + + def test_change_all(self, tmpdir): + input_code = """ + bad = [ + "ab" + "cd" + "ef" + "gh" + "ij" + ] + """ + expected_output = """ + bad = [ + "ab", "cd", "ef", "gh", "ij" + ] + """ + self.run_and_assert(tmpdir, input_code, expected_output) + + def test_exclude_line(self, tmpdir): + input_code = ( + expected + ) = """ + bad = ["ab" "cd" "ef" "gh" "ij"] + """ + lines_to_exclude = [2] + self.run_and_assert( + tmpdir, + input_code, + expected, + lines_to_exclude=lines_to_exclude, + ) + + +class TestSet(BaseCodemodTest): + codemod = StrConcatInSeqLiteral + + def test_no_change(self, tmpdir): + input_code = """ + good = {"ab", "cd"} + good = {f"ab", "cd"} + var = "ab" + good = {f"{var}", "cd"} + """ + self.run_and_assert(tmpdir, input_code, input_code) + + @pytest.mark.parametrize( + "input_code,expected_output", + [ + ( + """ + bad = {"ab" "cd", "ef", "gh"} + """, + """ + bad = {"ab", "cd", "ef", "gh"} + """, + ), + ( + """ + bad = { + "ab" + "cd", + "ef", + "gh"} + """, + """ + bad = { + "ab", + "cd", + "ef", + "gh"} + """, + ), + ( + """ + bad = { + "ab" + "cd", + "ef", + "gh" + } + """, + """ + bad = { + "ab", + "cd", + "ef", + "gh" + } + """, + ), + ], + ) + def test_change(self, tmpdir, input_code, expected_output): + self.run_and_assert(tmpdir, input_code, expected_output) + + @pytest.mark.parametrize("final_comma", ["", ","]) + def test_change_multiple(self, tmpdir, final_comma): + input_code = f""" + bad = {{ + "ab" + "cd", + "ef", + "gh" + "ij"{final_comma} + }} + """ + expected_output = f""" + bad = {{ + "ab", + "cd", + "ef", + "gh", + "ij"{final_comma} + }} + """ + self.run_and_assert(tmpdir, input_code, expected_output, num_changes=2) + + def test_change_all(self, tmpdir): + input_code = """ + bad = { + "ab" + "cd" + "ef" + "gh" + "ij" + } + """ + expected_output = """ + bad = { + "ab", "cd", "ef", "gh", "ij" + } + """ + self.run_and_assert(tmpdir, input_code, expected_output) + + def test_exclude_line(self, tmpdir): + input_code = ( + expected + ) = """ + bad = {"ab" "cd" "ef" "gh" "ij"} + """ + lines_to_exclude = [2] + self.run_and_assert( + tmpdir, + input_code, + expected, + lines_to_exclude=lines_to_exclude, + ) + + +class TestTuple(BaseCodemodTest): + codemod = StrConcatInSeqLiteral + + def test_no_change(self, tmpdir): + input_code = """ + good = ("ab", "cd") + good = (f"ab", "cd") + var = "ab" + good = (f"{var}", "cd") + """ + self.run_and_assert(tmpdir, input_code, input_code) + + @pytest.mark.parametrize( + "input_code,expected_output", + [ + ( + """ + bad = ("ab" "cd", "ef", "gh") + """, + """ + bad = ("ab", "cd", "ef", "gh") + """, + ), + ( + """ + bad = ( + "ab" + "cd", + "ef", + "gh") + """, + """ + bad = ( + "ab", + "cd", + "ef", + "gh") + """, + ), + ( + """ + bad = ( + "ab" + "cd", + "ef", + "gh" + ) + """, + """ + bad = ( + "ab", + "cd", + "ef", + "gh" + ) + """, + ), + ], + ) + def test_change(self, tmpdir, input_code, expected_output): + self.run_and_assert(tmpdir, input_code, expected_output) + + @pytest.mark.parametrize("final_comma", ["", ","]) + def test_change_multiple(self, tmpdir, final_comma): + input_code = f""" + bad = ( + "ab" + "cd", + "ef", + "gh" + "ij"{final_comma} + ) + """ + expected_output = f""" + bad = ( + "ab", + "cd", + "ef", + "gh", + "ij"{final_comma} + ) + """ + self.run_and_assert(tmpdir, input_code, expected_output, num_changes=2) + + def test_no_change_tuple_lookalike(self, tmpdir): + input_code = """ + concat = ( + "ab" + "cd" + "ef" + "gh" + "ij" + ) + """ + self.run_and_assert(tmpdir, input_code, input_code) + + def test_change_all(self, tmpdir): + input_code = """ + bad = ( + "ab" + "cd" + "ef" + "gh" + "ij", + ) + """ + expected_output = """ + bad = ( + "ab","cd","ef","gh","ij", + ) + """ + self.run_and_assert(tmpdir, input_code, expected_output) + + def test_exclude_line(self, tmpdir): + input_code = ( + expected + ) = """ + bad = ("ab" "cd" "ef" "gh" "ij") + """ + lines_to_exclude = [2] + self.run_and_assert( + tmpdir, + input_code, + expected, + lines_to_exclude=lines_to_exclude, + ) diff --git a/tests/samples/str_concat_in_sequence_literals.py b/tests/samples/str_concat_in_sequence_literals.py new file mode 100644 index 00000000..d53a6236 --- /dev/null +++ b/tests/samples/str_concat_in_sequence_literals.py @@ -0,0 +1,7 @@ +bad = [ + "ab" + "cd", + "ef", + "gh" + "ij", +]