pixee · drdavella · Feb 16, 2024 · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024
@@ -0,0 +1,37 @@
+from core_codemods.str_concat_in_seq_literal import StrConcatInSeqLiteral
+from integration_tests.base_test import (
+    BaseIntegrationTest,
+    original_and_expected_from_code_path,
+)
+
+
+class TestStrConcatInSeqLiteral(BaseIntegrationTest):
+    codemod = StrConcatInSeqLiteral
+    code_path = "tests/samples/str_concat_in_sequence_literals.py"
+    original_code, expected_new_code = original_and_expected_from_code_path(
+        code_path,
+        [
+            (1, """    "ab",\n"""),
+            (4, """    "gh",\n"""),
+        ],
+    )
+
+    # fmt: off
+    expected_diff =(
+    """--- \n"""
+    """+++ \n"""
+    """@@ -1,7 +1,7 @@\n"""
+    """ bad = [\n"""
+    """-    "ab"\n"""
+    """+    "ab",\n"""
+    """     "cd",\n"""
+    """     "ef",\n"""
+    """-    "gh"\n"""
+    """+    "gh",\n"""
+    """     "ij",\n"""
+    """ ]\n""")
+    # fmt: on
+
+    expected_line_change = "1"
+    change_description = StrConcatInSeqLiteral.change_description
+    num_changes = 2
@@ -226,6 +226,10 @@ class DocMetadata:
         importance="Medium",
         guidance_explained="We believe this change is safe and will not cause any issues.",
     ),
+    "str-concat-in-sequence-literals": DocMetadata(
+        importance="Medium",
+        guidance_explained="While string concatenation inside a sequence iterable is likely a mistake, there are instances when you may choose to use them..",
+    ),
 }
 
 METADATA = CORE_METADATA | {

@@ -60,6 +60,7 @@
 from .sonar.sonar_flask_json_response_type import SonarFlaskJsonResponseType
 from .sonar.sonar_django_json_response_type import SonarDjangoJsonResponseType
 from .lazy_logging import LazyLogging
+from .str_concat_in_seq_literal import StrConcatInSeqLiteral
 
 registry = CodemodCollection(
     origin="pixee",
@@ -116,6 +117,7 @@
         RemoveAssertionInPytestRaises,
         FixAssertTuple,
         LazyLogging,
+        StrConcatInSeqLiteral,
     ],
 )
 

@@ -0,0 +1,14 @@
+This codemod fixes cases of implicit string concatenation inside lists, sets, or tuples. This is most likely a mistake: you probably meant include a comma in between the concatenated strings. 
+
+Our changes look something like this:
+```diff
+bad = [
+-    "ab"
++    "ab",
+     "cd",
+     "ef",
+-    "gh"
++    "gh",
+     "ij",
+]
+```
@@ -0,0 +1,76 @@
+import libcst as cst
+from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod
+from codemodder.codemods.utils_mixin import NameResolutionMixin, AncestorPatternsMixin
+
+
+class StrConcatInSeqLiteral(SimpleCodemod, NameResolutionMixin, AncestorPatternsMixin):
+    metadata = Metadata(
+        name="str-concat-in-sequence-literals",
+        summary="Convert Implicit String Concat Inside Sequence into Individual Elements",
+        review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
+        references=[],
+    )
+    change_description = "Convert implicit string concat into individual elements."
+
+    def leave_List(self, original_node: cst.List, updated_node: cst.List) -> cst.List:
+        return self.process_node_elements(original_node, updated_node)
+
+    def leave_Tuple(
+        self, original_node: cst.Tuple, updated_node: cst.Tuple
+    ) -> cst.Tuple:
+        return self.process_node_elements(original_node, updated_node)
+
+    def leave_Set(self, original_node: cst.Set, updated_node: cst.Set) -> cst.Set:
+        return self.process_node_elements(original_node, updated_node)
+
+    def process_node_elements(
+        self, original_node: cst.CSTNode, updated_node: cst.CSTNode
+    ) -> cst.CSTNode:
+        if not self.filter_by_path_includes_or_excludes(
+            self.node_position(original_node)
+        ):
+            return updated_node
+        return updated_node.with_changes(elements=self._process_elements(original_node))
+
+    def _process_elements(self, original_node: cst.List) -> list[cst.Element]:
+        new_elements = []
+        prev_comma = None
+        for element in original_node.elements:
+            match element.value:
+                case cst.ConcatenatedString():
+                    self.report_change(original_node)
+                    flattened_parts = self._flatten_concatenated_strings(element.value)
+                    for part in flattened_parts:
+                        # the very last element should only have a comma if the last element
+                        # of the original list had a comma
+                        if (
+                            element == original_node.elements[-1]
+                            and part == flattened_parts[-1]
+                        ):
+                            new_elements.append(
+                                cst.Element(value=part, comma=element.comma)
+                            )
+                        else:
+                            new_elements.append(
+                                cst.Element(
+                                    value=part, comma=prev_comma or element.comma
+                                )
+                            )
+                case _:
+                    prev_comma = element.comma
+                    new_elements.append(element)
+        return new_elements
+
+    def _flatten_concatenated_strings(
+        self, concat_node: cst.ConcatenatedString, parts=None
+    ):
+        if parts is None:
+            parts = []
+
+        for node in concat_node.left, concat_node.right:
+            match node:
+                case cst.ConcatenatedString():
+                    self._flatten_concatenated_strings(node, parts)
+                case _:
+                    parts.append(node)
+        return parts