Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sonar Integration #223

Merged
merged 7 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions integration_tests/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class BaseIntegrationTest(DependencyTestMixin, CleanRepoMixin):
_lines = []
num_changed_files = 1
allowed_exceptions = ()
sonar_issues_json: str | None = None

@classmethod
def setup_class(cls):
Expand Down Expand Up @@ -82,9 +83,12 @@ def _assert_run_fields(self, run, output_path):
assert run["tool"] == "codemodder-python"
assert run["version"] == __version__
assert run["elapsed"] != ""
assert (
run["commandLine"]
== f'codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_instance.name} --path-include={code_path} --path-exclude=""'
assert run[
"commandLine"
] == f'codemodder {SAMPLES_DIR} --output {output_path} --codemod-include={self.codemod_instance.name} --path-include={code_path} --path-exclude=""' + (
f" --sonar-issues-json={self.sonar_issues_json}"
if self.sonar_issues_json
else ""
)
assert run["directory"] == os.path.abspath(SAMPLES_DIR)
assert run["sarifs"] == []
Expand Down Expand Up @@ -161,6 +165,9 @@ def test_file_rewritten(self):
'--path-exclude=""',
]

if self.sonar_issues_json:
command.append(f"--sonar-issues-json={self.sonar_issues_json}")

self.check_code_before()
self.check_dependencies_before()

Expand Down
7 changes: 5 additions & 2 deletions integration_tests/test_numpy_nan_equality.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from core_codemods.numpy_nan_equality import NumpyNanEquality
from core_codemods.numpy_nan_equality import (
NumpyNanEquality,
NumpyNanEqualityTransformer,
)
from integration_tests.base_test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
Expand Down Expand Up @@ -30,5 +33,5 @@ class TestNumpyNanEquality(BaseIntegrationTest):
# fmt: on

expected_line_change = "4"
change_description = NumpyNanEquality.change_description
change_description = NumpyNanEqualityTransformer.change_description
num_changed_files = 1
21 changes: 21 additions & 0 deletions integration_tests/test_program.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import subprocess
from core_codemods.remove_assertion_in_pytest_raises import (
RemoveAssertionInPytestRaises,
)


class TestProgramFails:
Expand All @@ -21,3 +24,21 @@ def test_codemods_include_exclude_conflict(self):
check=False,
)
assert completed_process.returncode == 3

def test_load_sast_only_by_flag(self, tmp_path):
tmp_file_path = tmp_path / "sonar.json"
tmp_file_path.touch()
completed_process = subprocess.run(
[
"codemodder",
"tests/samples/",
"--sonar-issues-json",
f"{tmp_file_path}",
"--dry-run",
],
check=False,
capture_output=True,
text=True,
)
assert completed_process.returncode == 0
assert RemoveAssertionInPytestRaises.id not in completed_process.stdout
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you explain the goal of this assertion?

Copy link
Contributor Author

@andrecsilva andrecsilva Jan 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goal is to make sure no non-sast codemods are loaded by default while the --sarif or --sonar-... flags are passed. Couldn't think of a better way other than looking for the codemod ids printed to stdout. RemoveAssertionInPytestRaises just happens to the one I've used to check it.

38 changes: 38 additions & 0 deletions integration_tests/test_sonar_numpy_nan_equality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from core_codemods.sonar.sonar_numpy_nan_equality import (
SonarNumpyNanEquality,
SonarNumpyNanEqualityTransformer,
)
from integration_tests.base_test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)


class TestNumpyNanEquality(BaseIntegrationTest):
codemod = SonarNumpyNanEquality
code_path = "tests/samples/numpy_nan_equality.py"
original_code, expected_new_code = original_and_expected_from_code_path(
code_path,
[
(3, """if np.isnan(a):\n"""),
],
)
sonar_issues_json = "tests/samples/sonar_issues.json"

# fmt: off
expected_diff =(
"""--- \n"""
"""+++ \n"""
"""@@ -1,5 +1,5 @@\n"""
""" import numpy as np\n"""
""" \n"""
""" a = np.nan\n"""
"""-if a == np.nan:\n"""
"""+if np.isnan(a):\n"""
""" pass\n"""
)
# fmt: on

expected_line_change = "4"
change_description = SonarNumpyNanEqualityTransformer.change_description
num_changed_files = 1
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ all = [

[project.entry-points.codemods]
core = "core_codemods:registry"
sonar = "core_codemods:sonar_registry"

[tool.setuptools]

Expand Down
8 changes: 7 additions & 1 deletion src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ def run(original_args) -> int:
logger.info("codemodder: python/%s", __version__)
logger.info("command: %s %s", Path(sys.argv[0]).name, " ".join(original_args))

tool_result_files_map = {"sonar": argv.sonar_issues_json}
# TODO find the tool name in the --sarif files here and populate the dict

repo_manager = PythonRepoManager(Path(argv.directory))
context = CodemodExecutionContext(
Path(argv.directory),
Expand All @@ -165,14 +168,17 @@ def run(original_args) -> int:
repo_manager,
argv.path_include,
argv.path_exclude,
tool_result_files_map,
argv.max_workers,
)

repo_manager.parse_project()

# TODO: this should be a method of CodemodExecutionContext
codemods_to_run = codemod_registry.match_codemods(
argv.codemod_include, argv.codemod_exclude
argv.codemod_include,
argv.codemod_exclude,
sast_only=argv.sonar_issues_json or argv.sarif,
)

log_section("setup")
Expand Down
92 changes: 56 additions & 36 deletions src/codemodder/codemods/base_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dataclasses import dataclass, field
from enum import Enum
from functools import cached_property
import functools
import importlib.resources
from importlib.abc import Traversable
from pathlib import Path
Expand Down Expand Up @@ -40,7 +41,7 @@ class Metadata:
summary: str
review_guidance: ReviewGuidance
references: list[Reference] = field(default_factory=list)
has_description: bool = True
description: str | None = None


class BaseCodemod(metaclass=ABCMeta):
Expand Down Expand Up @@ -107,11 +108,10 @@ def docs_module(self) -> Traversable:

@cached_property
def description(self) -> str:
if not self._metadata.has_description:
return ""

doc_path = self.docs_module / f"{self.origin}_python_{self.name}.md"
return doc_path.read_text()
if self._metadata.description is None:
doc_path = self.docs_module / f"{self.origin}_python_{self.name}.md"
return doc_path.read_text()
return self._metadata.description

@property
def review_guidance(self):
Expand All @@ -129,6 +129,30 @@ def describe(self):
"references": [ref.to_json() for ref in self.references],
}

def _apply(
self,
context: CodemodExecutionContext,
files_to_analyze: list[Path],
rules: list[str],
) -> None:
results = (
# It seems like semgrep doesn't like our fully-specified id format
self.detector.apply(self.name, context, files_to_analyze)
if self.detector
else ResultSet()
)

process_file = functools.partial(
self._process_file, context=context, results=results, rules=rules
)

with ThreadPoolExecutor() as executor:
logger.debug("using executor with %s workers", context.max_workers)
contexts = executor.map(process_file, files_to_analyze)
executor.shutdown(wait=True)

context.process_results(self.id, contexts)

def apply(
self,
context: CodemodExecutionContext,
Expand All @@ -150,36 +174,32 @@ def apply(
:param context: The codemod execution context
:param files_to_analyze: The list of files to analyze
"""
results = (
# It seems like semgrep doesn't like our fully-specified id format
self.detector.apply(self.name, context, files_to_analyze)
if self.detector
else ResultSet()
)

def process_file(filename: Path):
line_exclude = file_line_patterns(filename, context.path_exclude)
line_include = file_line_patterns(filename, context.path_include)
findings_for_rule = results.results_for_rule_and_file(self.name, filename)

file_context = FileContext(
context.directory,
filename,
line_exclude,
line_include,
findings_for_rule,
)

if change_set := self.transformer.apply(
context, file_context, findings_for_rule
):
file_context.add_result(change_set)
self._apply(context, files_to_analyze, [self.name])

return file_context
def _process_file(
self,
filename: Path,
context: CodemodExecutionContext,
results: ResultSet,
rules: list[str],
):
line_exclude = file_line_patterns(filename, context.path_exclude)
line_include = file_line_patterns(filename, context.path_include)
findings_for_rule = []
for rule in rules:
findings_for_rule.extend(results.results_for_rule_and_file(rule, filename))

file_context = FileContext(
context.directory,
filename,
line_exclude,
line_include,
findings_for_rule,
)

with ThreadPoolExecutor() as executor:
logger.debug("using executor with %s workers", context.max_workers)
contexts = executor.map(process_file, files_to_analyze)
executor.shutdown(wait=True)
if change_set := self.transformer.apply(
context, file_context, findings_for_rule
):
file_context.add_result(change_set)

context.process_results(self.id, contexts)
return file_context
34 changes: 34 additions & 0 deletions src/codemodder/codemods/sonar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pathlib import Path
from codemodder.codemods.base_detector import BaseDetector
from codemodder.context import CodemodExecutionContext
from codemodder.result import ResultSet
from codemodder.sonar_results import SonarResultSet
from core_codemods.api.core_codemod import SASTCodemod


class SonarCodemod(SASTCodemod):
@property
def origin(self):
return "sonar"


class SonarDetector(BaseDetector):
_lazy_cache = None

def _process_sonar_findings(self, sonar_json_files: list[str]) -> SonarResultSet:
combined_result_set = SonarResultSet()
for file in sonar_json_files or []:
combined_result_set |= SonarResultSet.from_json(file)
return combined_result_set

def apply(
self,
codemod_id: str,
context: CodemodExecutionContext,
files_to_analyze: list[Path],
) -> ResultSet:
if not self._lazy_cache:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason not to use funcutils.lru_cache?

Copy link
Contributor Author

@andrecsilva andrecsilva Jan 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember deliberately doing it like this to test something. But I forgot what it was. I'll just use lru_cache.
Actually, now I remember. The cache from lru_cache depends on the arguments, meaning if you change the arguments the cache won't apply. This means you would have to process the sonar json for each codemod. Doing it manually like this avoids that.

self._lazy_cache = self._process_sonar_findings(
context.tool_result_files_map.get("sonar", [])
)
return self._lazy_cache
3 changes: 3 additions & 0 deletions src/codemodder/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes
path_include: list[str]
path_exclude: list[str]
max_workers: int = 1
tool_result_files_map: dict[str, list[str]]

def __init__(
self,
Expand All @@ -47,6 +48,7 @@ def __init__(
repo_manager: PythonRepoManager,
path_include: list[str],
path_exclude: list[str],
tool_result_files_map: dict[str, list[str]] | None = None,
max_workers: int = 1,
): # pylint: disable=too-many-arguments
self.directory = directory
Expand All @@ -61,6 +63,7 @@ def __init__(
self.path_include = path_include
self.path_exclude = path_exclude
self.max_workers = max_workers
self.tool_result_files_map = tool_result_files_map or {}

def add_results(self, codemod_name: str, change_sets: List[ChangeSet]):
self._results_by_codemod.setdefault(codemod_name, []).extend(change_sets)
Expand Down
13 changes: 6 additions & 7 deletions src/codemodder/file_context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from dataclasses import dataclass, field
from pathlib import Path
from typing import List

from codemodder.change import Change, ChangeSet
from codemodder.dependency import Dependency
Expand All @@ -16,13 +15,13 @@ class FileContext: # pylint: disable=too-many-instance-attributes

base_directory: Path
file_path: Path
line_exclude: List[int] = field(default_factory=list)
line_include: List[int] = field(default_factory=list)
findings: List[Result] = field(default_factory=list)
line_exclude: list[int] = field(default_factory=list)
line_include: list[int] = field(default_factory=list)
findings: list[Result] = field(default_factory=list)
dependencies: set[Dependency] = field(default_factory=set)
codemod_changes: List[Change] = field(default_factory=list)
results: List[ChangeSet] = field(default_factory=list)
failures: List[Path] = field(default_factory=list)
codemod_changes: list[Change] = field(default_factory=list)
results: list[ChangeSet] = field(default_factory=list)
failures: list[Path] = field(default_factory=list)
timer: Timer = field(default_factory=Timer)

def add_dependency(self, dependency: Dependency):
Expand Down
9 changes: 8 additions & 1 deletion src/codemodder/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,21 @@ def match_codemods(
self,
codemod_include: Optional[list] = None,
codemod_exclude: Optional[list] = None,
sast_only=False,
) -> list[BaseCodemod]:
codemod_include = codemod_include or []
codemod_exclude = codemod_exclude or DEFAULT_EXCLUDED_CODEMODS
base_list = [
codemod
for codemod in self.codemods
if (sast_only and codemod.origin != "pixee")
or (not sast_only and codemod.origin == "pixee")
]

if codemod_exclude and not codemod_include:
return [
codemod
for codemod in self.codemods
for codemod in base_list
if codemod.name not in codemod_exclude
and codemod.id not in codemod_exclude
]
Expand Down
Loading
Loading