pixee · andrecsilva · Jan 29, 2024 · Jan 22, 2024 · Jan 23, 2024 · Jan 26, 2024
@@ -55,6 +55,7 @@ class BaseIntegrationTest(DependencyTestMixin, CleanRepoMixin):
     _lines = []
     num_changed_files = 1
     allowed_exceptions = ()
+    sonar_issues_json: str | None = None
 
     @classmethod
     def setup_class(cls):
@@ -82,9 +83,12 @@ def _assert_run_fields(self, run, output_path):
         assert run["tool"] == "codemodder-python"
         assert run["version"] == __version__
         assert run["elapsed"] != ""
-        assert (
-            run["commandLine"]
-            == f'codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_instance.name} --path-include={code_path} --path-exclude=""'
+        assert run[
+            "commandLine"
+        ] == f'codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_instance.name} --path-include={code_path} --path-exclude=""' + (
+            f" --sonar-issues-json={self.sonar_issues_json}"
+            if self.sonar_issues_json
+            else ""
         )
         assert run["directory"] == os.path.abspath(SAMPLES_DIR)
         assert run["sarifs"] == []
@@ -161,6 +165,9 @@ def test_file_rewritten(self):
             '--path-exclude=""',
         ]
 
+        if self.sonar_issues_json:
+            command.append(f"--sonar-issues-json={self.sonar_issues_json}")
+
         self.check_code_before()
         self.check_dependencies_before()
 

@@ -1,4 +1,7 @@
-from core_codemods.numpy_nan_equality import NumpyNanEquality
+from core_codemods.numpy_nan_equality import (
+    NumpyNanEquality,
+    NumpyNanEqualityTransformer,
+)
 from integration_tests.base_test import (
     BaseIntegrationTest,
     original_and_expected_from_code_path,
@@ -30,5 +33,5 @@ class TestNumpyNanEquality(BaseIntegrationTest):
     # fmt: on
 
     expected_line_change = "4"
-    change_description = NumpyNanEquality.change_description
+    change_description = NumpyNanEqualityTransformer.change_description
     num_changed_files = 1
@@ -1,4 +1,7 @@
 import subprocess
+from core_codemods.remove_assertion_in_pytest_raises import (
+    RemoveAssertionInPytestRaises,
+)
 
 
 class TestProgramFails:
@@ -21,3 +24,21 @@ def test_codemods_include_exclude_conflict(self):
             check=False,
         )
         assert completed_process.returncode == 3
+
+    def test_load_sast_only_by_flag(self, tmp_path):
+        tmp_file_path = tmp_path / "sonar.json"
+        tmp_file_path.touch()
+        completed_process = subprocess.run(
+            [
+                "codemodder",
+                "tests/samples/",
+                "--sonar-issues-json",
+                f"{tmp_file_path}",
+                "--dry-run",
+            ],
+            check=False,
+            capture_output=True,
+            text=True,
+        )
+        assert completed_process.returncode == 0
+        assert RemoveAssertionInPytestRaises.id not in completed_process.stdout
@@ -0,0 +1,38 @@
+from core_codemods.sonar.sonar_numpy_nan_equality import (
+    SonarNumpyNanEquality,
+    SonarNumpyNanEqualityTransformer,
+)
+from integration_tests.base_test import (
+    BaseIntegrationTest,
+    original_and_expected_from_code_path,
+)
+
+
+class TestNumpyNanEquality(BaseIntegrationTest):
+    codemod = SonarNumpyNanEquality
+    code_path = "tests/samples/numpy_nan_equality.py"
+    original_code, expected_new_code = original_and_expected_from_code_path(
+        code_path,
+        [
+            (3, """if np.isnan(a):\n"""),
+        ],
+    )
+    sonar_issues_json = "tests/samples/sonar_issues.json"
+
+    # fmt: off
+    expected_diff =(
+    """--- \n"""
+    """+++ \n"""
+    """@@ -1,5 +1,5 @@\n"""
+    """ import numpy as np\n"""
+    """ \n"""
+    """ a = np.nan\n"""
+    """-if a == np.nan:\n"""
+    """+if np.isnan(a):\n"""
+    """     pass\n"""
+    )
+    # fmt: on
+
+    expected_line_change = "4"
+    change_description = SonarNumpyNanEqualityTransformer.change_description
+    num_changed_files = 1
@@ -74,6 +74,7 @@ all = [
 
 [project.entry-points.codemods]
 core = "core_codemods:registry"
+sonar = "core_codemods:sonar_registry"
 
 [tool.setuptools]
 

@@ -156,6 +156,9 @@ def run(original_args) -> int:
     logger.info("codemodder: python/%s", __version__)
     logger.info("command: %s %s", Path(sys.argv[0]).name, " ".join(original_args))
 
+    tool_result_files_map = {"sonar": argv.sonar_issues_json}
+    # TODO find the tool name in the --sarif files here and populate the dict
+
     repo_manager = PythonRepoManager(Path(argv.directory))
     context = CodemodExecutionContext(
         Path(argv.directory),
@@ -165,14 +168,17 @@ def run(original_args) -> int:
         repo_manager,
         argv.path_include,
         argv.path_exclude,
+        tool_result_files_map,
         argv.max_workers,
     )
 
     repo_manager.parse_project()
 
     # TODO: this should be a method of CodemodExecutionContext
     codemods_to_run = codemod_registry.match_codemods(
-        argv.codemod_include, argv.codemod_exclude
+        argv.codemod_include,
+        argv.codemod_exclude,
+        sast_only=argv.sonar_issues_json or argv.sarif,
     )
 
     log_section("setup")

@@ -3,6 +3,7 @@
 from dataclasses import dataclass, field
 from enum import Enum
 from functools import cached_property
+import functools
 import importlib.resources
 from importlib.abc import Traversable
 from pathlib import Path
@@ -40,7 +41,7 @@ class Metadata:
     summary: str
     review_guidance: ReviewGuidance
     references: list[Reference] = field(default_factory=list)
-    has_description: bool = True
+    description: str | None = None
 
 
 class BaseCodemod(metaclass=ABCMeta):
@@ -107,11 +108,10 @@ def docs_module(self) -> Traversable:
 
     @cached_property
     def description(self) -> str:
-        if not self._metadata.has_description:
-            return ""
-
-        doc_path = self.docs_module / f"{self.origin}_python_{self.name}.md"
-        return doc_path.read_text()
+        if self._metadata.description is None:
+            doc_path = self.docs_module / f"{self.origin}_python_{self.name}.md"
+            return doc_path.read_text()
+        return self._metadata.description
 
     @property
     def review_guidance(self):
@@ -129,6 +129,30 @@ def describe(self):
             "references": [ref.to_json() for ref in self.references],
         }
 
+    def _apply(
+        self,
+        context: CodemodExecutionContext,
+        files_to_analyze: list[Path],
+        rules: list[str],
+    ) -> None:
+        results = (
+            # It seems like semgrep doesn't like our fully-specified id format
+            self.detector.apply(self.name, context, files_to_analyze)
+            if self.detector
+            else ResultSet()
+        )
+
+        process_file = functools.partial(
+            self._process_file, context=context, results=results, rules=rules
+        )
+
+        with ThreadPoolExecutor() as executor:
+            logger.debug("using executor with %s workers", context.max_workers)
+            contexts = executor.map(process_file, files_to_analyze)
+            executor.shutdown(wait=True)
+
+        context.process_results(self.id, contexts)
+
     def apply(
         self,
         context: CodemodExecutionContext,
@@ -150,36 +174,32 @@ def apply(
         :param context: The codemod execution context
         :param files_to_analyze: The list of files to analyze
         """
-        results = (
-            # It seems like semgrep doesn't like our fully-specified id format
-            self.detector.apply(self.name, context, files_to_analyze)
-            if self.detector
-            else ResultSet()
-        )
-
-        def process_file(filename: Path):
-            line_exclude = file_line_patterns(filename, context.path_exclude)
-            line_include = file_line_patterns(filename, context.path_include)
-            findings_for_rule = results.results_for_rule_and_file(self.name, filename)
-
-            file_context = FileContext(
-                context.directory,
-                filename,
-                line_exclude,
-                line_include,
-                findings_for_rule,
-            )
-
-            if change_set := self.transformer.apply(
-                context, file_context, findings_for_rule
-            ):
-                file_context.add_result(change_set)
+        self._apply(context, files_to_analyze, [self.name])
 
-            return file_context
+    def _process_file(
+        self,
+        filename: Path,
+        context: CodemodExecutionContext,
+        results: ResultSet,
+        rules: list[str],
+    ):
+        line_exclude = file_line_patterns(filename, context.path_exclude)
+        line_include = file_line_patterns(filename, context.path_include)
+        findings_for_rule = []
+        for rule in rules:
+            findings_for_rule.extend(results.results_for_rule_and_file(rule, filename))
+
+        file_context = FileContext(
+            context.directory,
+            filename,
+            line_exclude,
+            line_include,
+            findings_for_rule,
+        )
 
-        with ThreadPoolExecutor() as executor:
-            logger.debug("using executor with %s workers", context.max_workers)
-            contexts = executor.map(process_file, files_to_analyze)
-            executor.shutdown(wait=True)
+        if change_set := self.transformer.apply(
+            context, file_context, findings_for_rule
+        ):
+            file_context.add_result(change_set)
 
-        context.process_results(self.id, contexts)
+        return file_context
@@ -0,0 +1,34 @@
+from pathlib import Path
+from codemodder.codemods.base_detector import BaseDetector
+from codemodder.context import CodemodExecutionContext
+from codemodder.result import ResultSet
+from codemodder.sonar_results import SonarResultSet
+from core_codemods.api.core_codemod import SASTCodemod
+
+
+class SonarCodemod(SASTCodemod):
+    @property
+    def origin(self):
+        return "sonar"
+
+
+class SonarDetector(BaseDetector):
+    _lazy_cache = None
+
+    def _process_sonar_findings(self, sonar_json_files: list[str]) -> SonarResultSet:
+        combined_result_set = SonarResultSet()
+        for file in sonar_json_files or []:
+            combined_result_set |= SonarResultSet.from_json(file)
+        return combined_result_set
+
+    def apply(
+        self,
+        codemod_id: str,
+        context: CodemodExecutionContext,
+        files_to_analyze: list[Path],
+    ) -> ResultSet:
+        if not self._lazy_cache:
+            self._lazy_cache = self._process_sonar_findings(
+                context.tool_result_files_map.get("sonar", [])
+            )
+        return self._lazy_cache
@@ -37,6 +37,7 @@ class CodemodExecutionContext:  # pylint: disable=too-many-instance-attributes
     path_include: list[str]
     path_exclude: list[str]
     max_workers: int = 1
+    tool_result_files_map: dict[str, list[str]]
 
     def __init__(
         self,
@@ -47,6 +48,7 @@ def __init__(
         repo_manager: PythonRepoManager,
         path_include: list[str],
         path_exclude: list[str],
+        tool_result_files_map: dict[str, list[str]] | None = None,
         max_workers: int = 1,
     ):  # pylint: disable=too-many-arguments
         self.directory = directory
@@ -61,6 +63,7 @@ def __init__(
         self.path_include = path_include
         self.path_exclude = path_exclude
         self.max_workers = max_workers
+        self.tool_result_files_map = tool_result_files_map or {}
 
     def add_results(self, codemod_name: str, change_sets: List[ChangeSet]):
         self._results_by_codemod.setdefault(codemod_name, []).extend(change_sets)

@@ -1,6 +1,5 @@
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import List
 
 from codemodder.change import Change, ChangeSet
 from codemodder.dependency import Dependency
@@ -16,13 +15,13 @@ class FileContext:  # pylint: disable=too-many-instance-attributes
 
     base_directory: Path
     file_path: Path
-    line_exclude: List[int] = field(default_factory=list)
-    line_include: List[int] = field(default_factory=list)
-    findings: List[Result] = field(default_factory=list)
+    line_exclude: list[int] = field(default_factory=list)
+    line_include: list[int] = field(default_factory=list)
+    findings: list[Result] = field(default_factory=list)
     dependencies: set[Dependency] = field(default_factory=set)
-    codemod_changes: List[Change] = field(default_factory=list)
-    results: List[ChangeSet] = field(default_factory=list)
-    failures: List[Path] = field(default_factory=list)
+    codemod_changes: list[Change] = field(default_factory=list)
+    results: list[ChangeSet] = field(default_factory=list)
+    failures: list[Path] = field(default_factory=list)
     timer: Timer = field(default_factory=Timer)
 
     def add_dependency(self, dependency: Dependency):

@@ -59,14 +59,21 @@ def match_codemods(
         self,
         codemod_include: Optional[list] = None,
         codemod_exclude: Optional[list] = None,
+        sast_only=False,
     ) -> list[BaseCodemod]:
         codemod_include = codemod_include or []
         codemod_exclude = codemod_exclude or DEFAULT_EXCLUDED_CODEMODS
+        base_list = [
+            codemod
+            for codemod in self.codemods
+            if (sast_only and codemod.origin != "pixee")
+            or (not sast_only and codemod.origin == "pixee")
+        ]
 
         if codemod_exclude and not codemod_include:
             return [
                 codemod
-                for codemod in self.codemods
+                for codemod in base_list
                 if codemod.name not in codemod_exclude
                 and codemod.id not in codemod_exclude
             ]