Skip to content

Commit

Permalink
Update codetf bindings (#490)
Browse files Browse the repository at this point in the history
* Update CodeTF bindings to reflect new spec

* Update rule metadata to accept multiple rules
  • Loading branch information
drdavella authored Apr 22, 2024
1 parent b7514e1 commit b8a62db
Show file tree
Hide file tree
Showing 16 changed files with 145 additions and 93 deletions.
1 change: 1 addition & 0 deletions src/codemodder/codemods/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Reference,
ReviewGuidance,
ToolMetadata,
ToolRule,
)
from codemodder.codemods.libcst_transformer import (
LibcstResultTransformer,
Expand Down
22 changes: 13 additions & 9 deletions src/codemodder/codemods/base_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from codemodder.code_directory import file_line_patterns
from codemodder.codemods.base_detector import BaseDetector
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codetf import DetectionTool, Reference, Rule
from codemodder.codetf import DetectionTool, Reference
from codemodder.context import CodemodExecutionContext
from codemodder.file_context import FileContext
from codemodder.logging import logger
Expand All @@ -37,12 +37,21 @@ class Metadata:
language: str = "python"


@dataclass
class ToolRule:
id: str
name: str
url: str | None = None


@dataclass
class ToolMetadata:
name: str
rule_id: str
rule_name: str
rule_url: str | None = None
rules: list[ToolRule]

@property
def rule_ids(self):
return [rule.id for rule in self.rules]


class BaseCodemod(metaclass=ABCMeta):
Expand Down Expand Up @@ -115,11 +124,6 @@ def detection_tool(self) -> DetectionTool | None:

return DetectionTool(
name=self._metadata.tool.name,
rule=Rule(
id=self._metadata.tool.rule_id,
name=self._metadata.tool.rule_name,
url=self._metadata.tool.rule_url,
),
)

@cached_property
Expand Down
34 changes: 24 additions & 10 deletions src/codemodder/codemods/base_visitor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import ClassVar, Collection
from functools import cache
from typing import ClassVar, Collection, cast

import libcst as cst
from libcst import MetadataDependent
Expand All @@ -23,13 +24,22 @@ def __init__(
self.line_exclude = line_exclude
self.line_include = line_include

def filter_by_result(self, node):
def filter_by_result(self, node: cst.CSTNode) -> bool:
# Codemods with detectors will only run their transformations if there are results.
return self.results is None or any(self.results_for_node(node))

@cache
def results_for_node(self, node: cst.CSTNode) -> list[Result]:
pos_to_match = self.node_position(node)
if self.results is None:
# Returning True here means codemods without detectors (and results)
# will still run their transformations.
return True
return any(result.match_location(pos_to_match, node) for result in self.results)
return (
[
result
for result in self.results
if result.match_location(pos_to_match, node)
]
if self.results
else []
)

def filter_by_path_includes_or_excludes(self, pos_to_match):
"""
Expand All @@ -55,13 +65,17 @@ def node_position(self, node):
# By default a function's position includes the entire
# function definition. Instead, we will only use the first line
# of the function definition.
params_end = self.get_metadata(PositionProvider, node.params).end
params_end = cast(
CodeRange, self.get_metadata(PositionProvider, node.params)
).end
return CodeRange(
start=self.get_metadata(PositionProvider, node).start,
start=cast(
CodeRange, self.get_metadata(PositionProvider, node)
).start,
end=CodePosition(params_end.line, params_end.column + 1),
)
case _:
return self.get_metadata(PositionProvider, node)
return cast(CodeRange, self.get_metadata(PositionProvider, node))

def lineno_for_node(self, node):
return self.node_position(node).start.line
Expand Down
6 changes: 6 additions & 0 deletions src/codemodder/codemods/libcst_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,15 @@ def report_change(self, original_node, description: str | None = None):
Change(
lineNumber=line_number,
description=description or self.change_description,
# TODO: add fixed findings
)
)

def report_unfixed(self, original_node: cst.CSTNode, reason: str):
pass
# results = self.results_for_node(original_node)
# self.file_context.unfixed_findings.extend(results)

def remove_unused_import(self, original_node):
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)

Expand Down
16 changes: 3 additions & 13 deletions src/codemodder/codemods/test/integration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,9 @@ def check_sonar_issues(cls):
)

assert (
cls.codemod.rule_id in sonar_results
cls.codemod.requested_rules[-1] in sonar_results
), f"Make sure to add a sonar issue/hotspot for {cls.codemod.rule_id} in {cls.sonar_issues_json} or {cls.sonar_hotspots_json}"
results_for_codemod = sonar_results[cls.codemod.rule_id]
results_for_codemod = sonar_results[cls.codemod.requested_rules[-1]]
file_path = pathlib.Path(cls.code_filename)
assert (
file_path in results_for_codemod
Expand All @@ -300,19 +300,9 @@ def _assert_sonar_fields(self, result):
assert self.codemod_instance._metadata.tool is not None
assert (
result["references"][-1]["description"]
== self.codemod_instance._metadata.tool.rule_name
== self.codemod_instance._metadata.tool.rules[0].name
)
assert result["detectionTool"]["name"] == "Sonar"
assert (
result["detectionTool"]["rule"]["id"]
== self.codemod_instance._metadata.tool.rule_id
)
assert (
result["detectionTool"]["rule"]["name"]
== self.codemod_instance._metadata.tool.rule_name
)
# TODO: empty array until we add findings metadata
assert result["detectionTool"]["findings"] == []


def original_and_expected_from_code_path(code_path, replacements):
Expand Down
16 changes: 8 additions & 8 deletions src/codemodder/codetf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class Change(BaseModel):
diffSide: DiffSide = DiffSide.RIGHT
properties: Optional[dict] = None
packageActions: Optional[list[PackageAction]] = None
finding: Optional[Finding] = None


class ChangeSet(BaseModel):
Expand Down Expand Up @@ -80,19 +81,17 @@ class Rule(BaseModel):

class Finding(BaseModel):
id: str
fixed: bool
reason: Optional[str] = None
rule: Rule

@model_validator(mode="after")
def validate_reason(self):
assert self.fixed or self.reason, "reason is required if fixed is False"
return self

class UnfixedFinding(Finding):
path: str
lineNumber: Optional[int] = None
reason: str


class DetectionTool(BaseModel):
name: str
rule: Rule
findings: list[Finding] = []


class Result(BaseModel):
Expand All @@ -104,6 +103,7 @@ class Result(BaseModel):
properties: Optional[dict] = None
failedFiles: Optional[list[str]] = None
changeset: list[ChangeSet]
unfixedFindings: Optional[list[UnfixedFinding]] = None


class Sarif(BaseModel):
Expand Down
3 changes: 2 additions & 1 deletion src/codemodder/file_context.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass, field
from pathlib import Path

from codemodder.codetf import Change, ChangeSet
from codemodder.codetf import Change, ChangeSet, UnfixedFinding
from codemodder.dependency import Dependency
from codemodder.result import Result
from codemodder.utils.timer import Timer
Expand All @@ -20,6 +20,7 @@ class FileContext:
findings: list[Result] | None = field(default_factory=list)
dependencies: set[Dependency] = field(default_factory=set)
codemod_changes: list[Change] = field(default_factory=list)
unfixed_findings: list[UnfixedFinding] = field(default_factory=list)
results: list[ChangeSet] = field(default_factory=list)
failures: list[Path] = field(default_factory=list)
timer: Timer = field(default_factory=Timer)
Expand Down
7 changes: 6 additions & 1 deletion src/codemodder/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Location(ABCDataclass):
end: LineInfo


@dataclass
@dataclass(kw_only=True)
class Result(ABCDataclass):
rule_id: str
locations: list[Location]
Expand All @@ -47,6 +47,11 @@ def match_location(self, pos: CodeRange, node: cst.CSTNode) -> bool:
)


@dataclass(kw_only=True)
class SASTResult(Result):
finding_id: str


def same_line(pos: CodeRange, location: Location) -> bool:
return pos.start.line == location.start.line and pos.end.line == location.end.line

Expand Down
14 changes: 9 additions & 5 deletions src/core_codemods/defectdojo/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from typing_extensions import override

from codemodder.codemods.api import Metadata, Reference, ToolMetadata
from codemodder.codemods.api import Metadata, Reference, ToolMetadata, ToolRule
from codemodder.codemods.base_detector import BaseDetector
from codemodder.context import CodemodExecutionContext
from codemodder.result import ResultSet
Expand Down Expand Up @@ -62,9 +62,13 @@ def from_core_codemod(
+ other.description,
tool=ToolMetadata(
name="DefectDojo",
rule_id=rule_id,
rule_name=rule_name,
rule_url=rule_url,
rules=[
ToolRule(
id=rule_id,
name=rule_name,
url=rule_url,
)
],
),
),
transformer=other.transformer,
Expand All @@ -82,5 +86,5 @@ def apply(
context,
files_to_analyze,
# We know this has a tool because we created it with `from_core_codemod`
[cast(ToolMetadata, self._metadata.tool).rule_id],
cast(ToolMetadata, self._metadata.tool).rule_ids,
)
5 changes: 3 additions & 2 deletions src/core_codemods/defectdojo/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from libcst._position import CodeRange
from typing_extensions import Self, override

from codemodder.result import LineInfo, Location, Result, ResultSet
from codemodder.result import LineInfo, Location, ResultSet, SASTResult


class DefectDojoLocation(Location):
Expand All @@ -20,10 +20,11 @@ def from_finding(cls, finding: dict) -> Self:
)


class DefectDojoResult(Result):
class DefectDojoResult(SASTResult):
@classmethod
def from_finding(cls, finding: dict) -> Self:
return cls(
finding_id=finding["id"],
rule_id=finding["title"],
locations=[DefectDojoLocation.from_finding(finding)],
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import libcst as cst

from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata
from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata, ToolRule
from codemodder.codemods.libcst_transformer import (
LibcstResultTransformer,
LibcstTransformerPipeline,
Expand All @@ -20,14 +20,22 @@ class AvoidInsecureDeserializationTransformer(
def leave_Call(
self, original_node: cst.Call, updated_node: cst.Call
) -> cst.Call | None:
if (
self.node_is_selected(original_node)
and self.find_base_name(original_node) == "yaml.load"
):
self.report_change(
original_node, description="Use SafeLoader in pyyaml.load calls"
)
return self.update_call(original_node, updated_node)
if not self.node_is_selected(original_node):
return updated_node

match self.find_base_name(original_node):
case "pickle.loads":
self.report_unfixed(
original_node,
reason="`fickling` does not yet support `pickle.loads`",
)
return updated_node
case "yaml.load":
self.report_change(
original_node,
description="Use SafeLoader in pyyaml.load calls",
)
return self.update_call(original_node, updated_node)

return updated_node

Expand All @@ -39,9 +47,13 @@ def leave_Call(
review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
tool=ToolMetadata(
name="DefectDojo",
rule_id="python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization",
rule_name="avoid-insecure-deserialization",
rule_url="https://semgrep.dev/playground/r/python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization",
rules=[
ToolRule(
id="python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization",
name="avoid-insecure-deserialization",
url="https://semgrep.dev/playground/r/python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization",
)
],
),
references=[],
),
Expand Down
12 changes: 8 additions & 4 deletions src/core_codemods/defectdojo/semgrep/django_secure_set_cookie.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import libcst as cst

from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata
from codemodder.codemods.api import Metadata, ReviewGuidance, ToolMetadata, ToolRule
from codemodder.codemods.libcst_transformer import (
LibcstResultTransformer,
LibcstTransformerPipeline,
Expand Down Expand Up @@ -41,9 +41,13 @@ def leave_Call(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Cal
review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
tool=ToolMetadata(
name="DefectDojo",
rule_id="python.django.security.audit.secure-cookies.django-secure-set-cookie",
rule_name="django-secure-set-cookie",
rule_url="https://semgrep.dev/playground/r/python.django.security.audit.secure-cookies.django-secure-set-cookie",
rules=[
ToolRule(
id="python.django.security.audit.secure-cookies.django-secure-set-cookie",
name="django-secure-set-cookie",
url="https://semgrep.dev/playground/r/python.django.security.audit.secure-cookies.django-secure-set-cookie",
)
],
),
references=[],
),
Expand Down
Loading

0 comments on commit b8a62db

Please sign in to comment.