diff --git a/src/codemodder/codemods/api.py b/src/codemodder/codemods/api.py index c375dc41..33bae932 100644 --- a/src/codemodder/codemods/api.py +++ b/src/codemodder/codemods/api.py @@ -9,6 +9,7 @@ Reference, ReviewGuidance, ToolMetadata, + ToolRule, ) from codemodder.codemods.libcst_transformer import ( LibcstResultTransformer, diff --git a/src/codemodder/codemods/base_codemod.py b/src/codemodder/codemods/base_codemod.py index b103f94c..e2125272 100644 --- a/src/codemodder/codemods/base_codemod.py +++ b/src/codemodder/codemods/base_codemod.py @@ -13,7 +13,7 @@ from codemodder.code_directory import file_line_patterns from codemodder.codemods.base_detector import BaseDetector from codemodder.codemods.base_transformer import BaseTransformerPipeline -from codemodder.codetf import DetectionTool, Reference, Rule +from codemodder.codetf import DetectionTool, Reference from codemodder.context import CodemodExecutionContext from codemodder.file_context import FileContext from codemodder.logging import logger @@ -37,12 +37,21 @@ class Metadata: language: str = "python" +@dataclass +class ToolRule: + id: str + name: str + url: str | None = None + + @dataclass class ToolMetadata: name: str - rule_id: str - rule_name: str - rule_url: str | None = None + rules: list[ToolRule] + + @property + def rule_ids(self): + return [rule.id for rule in self.rules] class BaseCodemod(metaclass=ABCMeta): @@ -115,11 +124,6 @@ def detection_tool(self) -> DetectionTool | None: return DetectionTool( name=self._metadata.tool.name, - rule=Rule( - id=self._metadata.tool.rule_id, - name=self._metadata.tool.rule_name, - url=self._metadata.tool.rule_url, - ), ) @cached_property diff --git a/src/codemodder/codemods/base_visitor.py b/src/codemodder/codemods/base_visitor.py index 4de8a358..2159f026 100644 --- a/src/codemodder/codemods/base_visitor.py +++ b/src/codemodder/codemods/base_visitor.py @@ -1,4 +1,5 @@ -from typing import ClassVar, Collection +from functools import cache +from typing import ClassVar, Collection, cast import libcst as cst from libcst import MetadataDependent @@ -23,13 +24,22 @@ def __init__( self.line_exclude = line_exclude self.line_include = line_include - def filter_by_result(self, node): + def filter_by_result(self, node: cst.CSTNode) -> bool: + # Codemods with detectors will only run their transformations if there are results. + return self.results is None or any(self.results_for_node(node)) + + @cache + def results_for_node(self, node: cst.CSTNode) -> list[Result]: pos_to_match = self.node_position(node) - if self.results is None: - # Returning True here means codemods without detectors (and results) - # will still run their transformations. - return True - return any(result.match_location(pos_to_match, node) for result in self.results) + return ( + [ + result + for result in self.results + if result.match_location(pos_to_match, node) + ] + if self.results + else [] + ) def filter_by_path_includes_or_excludes(self, pos_to_match): """ @@ -55,13 +65,17 @@ def node_position(self, node): # By default a function's position includes the entire # function definition. Instead, we will only use the first line # of the function definition. - params_end = self.get_metadata(PositionProvider, node.params).end + params_end = cast( + CodeRange, self.get_metadata(PositionProvider, node.params) + ).end return CodeRange( - start=self.get_metadata(PositionProvider, node).start, + start=cast( + CodeRange, self.get_metadata(PositionProvider, node) + ).start, end=CodePosition(params_end.line, params_end.column + 1), ) case _: - return self.get_metadata(PositionProvider, node) + return cast(CodeRange, self.get_metadata(PositionProvider, node)) def lineno_for_node(self, node): return self.node_position(node).start.line diff --git a/src/codemodder/codemods/libcst_transformer.py b/src/codemodder/codemods/libcst_transformer.py index 7730415a..398c1ada 100644 --- a/src/codemodder/codemods/libcst_transformer.py +++ b/src/codemodder/codemods/libcst_transformer.py @@ -124,9 +124,15 @@ def report_change(self, original_node, description: str | None = None): Change( lineNumber=line_number, description=description or self.change_description, + # TODO: add fixed findings ) ) + def report_unfixed(self, original_node: cst.CSTNode, reason: str): + pass + # results = self.results_for_node(original_node) + # self.file_context.unfixed_findings.extend(results) + def remove_unused_import(self, original_node): RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node) diff --git a/src/codemodder/codemods/test/integration_utils.py b/src/codemodder/codemods/test/integration_utils.py index c28b28a8..95477386 100644 --- a/src/codemodder/codemods/test/integration_utils.py +++ b/src/codemodder/codemods/test/integration_utils.py @@ -288,9 +288,9 @@ def check_sonar_issues(cls): ) assert ( - cls.codemod.rule_id in sonar_results + cls.codemod.requested_rules[-1] in sonar_results ), f"Make sure to add a sonar issue/hotspot for {cls.codemod.rule_id} in {cls.sonar_issues_json} or {cls.sonar_hotspots_json}" - results_for_codemod = sonar_results[cls.codemod.rule_id] + results_for_codemod = sonar_results[cls.codemod.requested_rules[-1]] file_path = pathlib.Path(cls.code_filename) assert ( file_path in results_for_codemod @@ -300,19 +300,9 @@ def _assert_sonar_fields(self, result): assert self.codemod_instance._metadata.tool is not None assert ( result["references"][-1]["description"] - == self.codemod_instance._metadata.tool.rule_name + == self.codemod_instance._metadata.tool.rules[0].name ) assert result["detectionTool"]["name"] == "Sonar" - assert ( - result["detectionTool"]["rule"]["id"] - == self.codemod_instance._metadata.tool.rule_id - ) - assert ( - result["detectionTool"]["rule"]["name"] - == self.codemod_instance._metadata.tool.rule_name - ) - # TODO: empty array until we add findings metadata - assert result["detectionTool"]["findings"] == [] def original_and_expected_from_code_path(code_path, replacements): diff --git a/src/codemodder/codetf.py b/src/codemodder/codetf.py index a0b09f1b..f2b0aa42 100644 --- a/src/codemodder/codetf.py +++ b/src/codemodder/codetf.py @@ -52,6 +52,7 @@ class Change(BaseModel): diffSide: DiffSide = DiffSide.RIGHT properties: Optional[dict] = None packageActions: Optional[list[PackageAction]] = None + finding: Optional[Finding] = None class ChangeSet(BaseModel): @@ -80,19 +81,17 @@ class Rule(BaseModel): class Finding(BaseModel): id: str - fixed: bool - reason: Optional[str] = None + rule: Rule - @model_validator(mode="after") - def validate_reason(self): - assert self.fixed or self.reason, "reason is required if fixed is False" - return self + +class UnfixedFinding(Finding): + path: str + lineNumber: Optional[int] = None + reason: str class DetectionTool(BaseModel): name: str - rule: Rule - findings: list[Finding] = [] class Result(BaseModel): @@ -104,6 +103,7 @@ class Result(BaseModel): properties: Optional[dict] = None failedFiles: Optional[list[str]] = None changeset: list[ChangeSet] + unfixedFindings: Optional[list[UnfixedFinding]] = None class Sarif(BaseModel): diff --git a/src/codemodder/file_context.py b/src/codemodder/file_context.py index a0aceea8..3ed793bb 100644 --- a/src/codemodder/file_context.py +++ b/src/codemodder/file_context.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from pathlib import Path -from codemodder.codetf import Change, ChangeSet +from codemodder.codetf import Change, ChangeSet, UnfixedFinding from codemodder.dependency import Dependency from codemodder.result import Result from codemodder.utils.timer import Timer @@ -20,6 +20,7 @@ class FileContext: findings: list[Result] | None = field(default_factory=list) dependencies: set[Dependency] = field(default_factory=set) codemod_changes: list[Change] = field(default_factory=list) + unfixed_findings: list[UnfixedFinding] = field(default_factory=list) results: list[ChangeSet] = field(default_factory=list) failures: list[Path] = field(default_factory=list) timer: Timer = field(default_factory=Timer) diff --git a/src/codemodder/result.py b/src/codemodder/result.py index d14a57cd..579a662f 100644 --- a/src/codemodder/result.py +++ b/src/codemodder/result.py @@ -27,7 +27,7 @@ class Location(ABCDataclass): end: LineInfo -@dataclass +@dataclass(kw_only=True) class Result(ABCDataclass): rule_id: str locations: list[Location] @@ -47,6 +47,11 @@ def match_location(self, pos: CodeRange, node: cst.CSTNode) -> bool: ) +@dataclass(kw_only=True) +class SASTResult(Result): + finding_id: str + + def same_line(pos: CodeRange, location: Location) -> bool: return pos.start.line == location.start.line and pos.end.line == location.end.line diff --git a/src/core_codemods/defectdojo/api.py b/src/core_codemods/defectdojo/api.py index ee2cb6ba..23361090 100644 --- a/src/core_codemods/defectdojo/api.py +++ b/src/core_codemods/defectdojo/api.py @@ -6,7 +6,7 @@ from typing_extensions import override -from codemodder.codemods.api import Metadata, Reference, ToolMetadata +from codemodder.codemods.api import Metadata, Reference, ToolMetadata, ToolRule from codemodder.codemods.base_detector import BaseDetector from codemodder.context import CodemodExecutionContext from codemodder.result import ResultSet @@ -62,9 +62,13 @@ def from_core_codemod( + other.description, tool=ToolMetadata( name="DefectDojo", - rule_id=rule_id, - rule_name=rule_name, - rule_url=rule_url, + rules=[ + ToolRule( + id=rule_id, + name=rule_name, + url=rule_url, + ) + ], ), ), transformer=other.transformer, @@ -82,5 +86,5 @@ def apply( context, files_to_analyze, # We know this has a tool because we created it with `from_core_codemod` - [cast(ToolMetadata, self._metadata.tool).rule_id], + cast(ToolMetadata, self._metadata.tool).rule_ids, ) diff --git a/src/core_codemods/defectdojo/results.py b/src/core_codemods/defectdojo/results.py index da96d687..d5434364 100644 --- a/src/core_codemods/defectdojo/results.py +++ b/src/core_codemods/defectdojo/results.py @@ -6,7 +6,7 @@ from libcst._position import CodeRange from typing_extensions import Self, override -from codemodder.result import LineInfo, Location, Result, ResultSet +from codemodder.result import LineInfo, Location, ResultSet, SASTResult class DefectDojoLocation(Location): @@ -20,10 +20,11 @@ def from_finding(cls, finding: dict) -> Self: ) -class DefectDojoResult(Result): +class DefectDojoResult(SASTResult): @classmethod def from_finding(cls, finding: dict) -> Self: return cls( + finding_id=finding["id"], rule_id=finding["title"], locations=[DefectDojoLocation.from_finding(finding)], ) diff --git a/src/core_codemods/defectdojo/semgrep/avoid_insecure_deserialization.py b/src/core_codemods/defectdojo/semgrep/avoid_insecure_deserialization.py index 2b8a3a63..e9548780 100644 --- a/src/core_codemods/defectdojo/semgrep/avoid_insecure_deserialization.py +++ b/src/core_codemods/defectdojo/semgrep/avoid_insecure_deserialization.py @@ -1,6 +1,6 @@ import libcst as cst -from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata +from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata, ToolRule from codemodder.codemods.libcst_transformer import ( LibcstResultTransformer, LibcstTransformerPipeline, @@ -20,14 +20,22 @@ class AvoidInsecureDeserializationTransformer( def leave_Call( self, original_node: cst.Call, updated_node: cst.Call ) -> cst.Call | None: - if ( - self.node_is_selected(original_node) - and self.find_base_name(original_node) == "yaml.load" - ): - self.report_change( - original_node, description="Use SafeLoader in pyyaml.load calls" - ) - return self.update_call(original_node, updated_node) + if not self.node_is_selected(original_node): + return updated_node + + match self.find_base_name(original_node): + case "pickle.loads": + self.report_unfixed( + original_node, + reason="`fickling` does not yet support `pickle.loads`", + ) + return updated_node + case "yaml.load": + self.report_change( + original_node, + description="Use SafeLoader in pyyaml.load calls", + ) + return self.update_call(original_node, updated_node) return updated_node @@ -39,9 +47,13 @@ def leave_Call( review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, tool=ToolMetadata( name="DefectDojo", - rule_id="python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization", - rule_name="avoid-insecure-deserialization", - rule_url="https://semgrep.dev/playground/r/python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization", + rules=[ + ToolRule( + id="python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization", + name="avoid-insecure-deserialization", + url="https://semgrep.dev/playground/r/python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization", + ) + ], ), references=[], ), diff --git a/src/core_codemods/defectdojo/semgrep/django_secure_set_cookie.py b/src/core_codemods/defectdojo/semgrep/django_secure_set_cookie.py index 53bfb793..986a4045 100644 --- a/src/core_codemods/defectdojo/semgrep/django_secure_set_cookie.py +++ b/src/core_codemods/defectdojo/semgrep/django_secure_set_cookie.py @@ -1,6 +1,6 @@ import libcst as cst -from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata +from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata, ToolRule from codemodder.codemods.libcst_transformer import ( LibcstResultTransformer, LibcstTransformerPipeline, @@ -41,9 +41,13 @@ def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Cal review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, tool=ToolMetadata( name="DefectDojo", - rule_id="python.django.security.audit.secure-cookies.django-secure-set-cookie", - rule_name="django-secure-set-cookie", - rule_url="https://semgrep.dev/playground/r/python.django.security.audit.secure-cookies.django-secure-set-cookie", + rules=[ + ToolRule( + id="python.django.security.audit.secure-cookies.django-secure-set-cookie", + name="django-secure-set-cookie", + url="https://semgrep.dev/playground/r/python.django.security.audit.secure-cookies.django-secure-set-cookie", + ) + ], ), references=[], ), diff --git a/src/core_codemods/sonar/api.py b/src/core_codemods/sonar/api.py index 0205a0ff..ac3031d1 100644 --- a/src/core_codemods/sonar/api.py +++ b/src/core_codemods/sonar/api.py @@ -1,7 +1,7 @@ from functools import cache from pathlib import Path -from codemodder.codemods.base_codemod import Metadata, Reference, ToolMetadata +from codemodder.codemods.base_codemod import Metadata, Reference, ToolMetadata, ToolRule from codemodder.codemods.base_detector import BaseDetector from codemodder.codemods.base_transformer import BaseTransformerPipeline from codemodder.context import CodemodExecutionContext @@ -15,10 +15,6 @@ class SonarCodemod(SASTCodemod): def origin(self): return "sonar" - @property - def rule_id(self): - return self._metadata.tool.rule_id - @classmethod def from_core_codemod( cls, @@ -41,9 +37,13 @@ def from_core_codemod( + other.description, tool=ToolMetadata( name="Sonar", - rule_id=rule_id, - rule_name=rule_name, - rule_url=rule_url, + rules=[ + ToolRule( + id=rule_id, + name=rule_name, + url=rule_url, + ) + ], ), ), transformer=transformer if transformer else other.transformer, diff --git a/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py b/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py index ae89847a..c222353a 100644 --- a/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py +++ b/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py @@ -33,6 +33,7 @@ def test_yaml_load(self, tmpdir): findings = { "results": [ { + "id": 1, "title": RULE_ID, "file_path": "code.py", "line": 4, @@ -58,6 +59,7 @@ def test_pickle_load(self, adds_dependency, tmpdir): findings = { "results": [ { + "id": 2, "title": RULE_ID, "file_path": "code.py", "line": 4, @@ -88,11 +90,13 @@ def test_pickle_and_yaml(self, adds_dependency, tmpdir): findings = { "results": [ { + "id": 3, "title": RULE_ID, "file_path": "code.py", "line": 5, }, { + "id": 4, "title": RULE_ID, "file_path": "code.py", "line": 6, @@ -108,3 +112,27 @@ def test_pickle_and_yaml(self, adds_dependency, tmpdir): num_changes=2, ) adds_dependency.assert_called_once_with(Fickling) + + @mock.patch("codemodder.codemods.api.FileContext.add_dependency") + def test_pickle_loads(self, adds_dependency, tmpdir): + input_code = ( + expected + ) = """ + import pickle + + result = pickle.loads("data") + """ + + findings = { + "results": [ + { + "id": 5, + "title": RULE_ID, + "file_path": "code.py", + "line": 4, + }, + ] + } + + self.run_and_assert(tmpdir, input_code, expected, results=json.dumps(findings)) + adds_dependency.assert_not_called() diff --git a/tests/codemods/defectdojo/semgrep/test_django_secure_set_cookie.py b/tests/codemods/defectdojo/semgrep/test_django_secure_set_cookie.py index 4c036335..72509307 100644 --- a/tests/codemods/defectdojo/semgrep/test_django_secure_set_cookie.py +++ b/tests/codemods/defectdojo/semgrep/test_django_secure_set_cookie.py @@ -24,6 +24,7 @@ def test_simple(self, tmpdir): findings = { "results": [ { + "id": 1, "title": "python.django.security.audit.secure-cookies.django-secure-set-cookie", "file_path": "code.py", "line": 2, diff --git a/tests/test_codetf.py b/tests/test_codetf.py index 17da7ac2..fc48be6e 100644 --- a/tests/test_codetf.py +++ b/tests/test_codetf.py @@ -2,19 +2,10 @@ from pathlib import Path import jsonschema -import pydantic import pytest import requests -from codemodder.codetf import ( - Change, - ChangeSet, - CodeTF, - DiffSide, - Finding, - Reference, - Result, -) +from codemodder.codetf import Change, ChangeSet, CodeTF, DiffSide, Reference, Result @pytest.fixture(autouse=True) @@ -125,13 +116,3 @@ def test_write_codetf_with_results(tmpdir, mocker, codetf_schema): def test_reference_use_url_for_description(): ref = Reference(url="https://example.com") assert ref.description == "https://example.com" - - -def test_unfixed_finding_with_no_reason(): - with pytest.raises(pydantic.ValidationError): - Finding(id="test", fixed=False) - - -def test_unfixed_finding_with_reason(): - finding = Finding(id="test", fixed=False, reason="test") - assert finding.reason == "test"