Skip to content

Commit

Permalink
Semgrep defused xml codemod (#705)
Browse files Browse the repository at this point in the history
* do not short circuit on entire tre

* new semgrep use defusdxml

* update docs

* fix description
  • Loading branch information
clavedeluna authored Jul 10, 2024
1 parent 0105c7d commit c5b471c
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 6 deletions.
3 changes: 0 additions & 3 deletions src/codemodder/codemods/import_modifier_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ def mapping(self) -> Mapping[str, str]:
pass

def transform_module_impl(self, tree: cst.Module) -> cst.Module:
if not self.node_is_selected(tree):
return tree

visitor = MappingImportedCallModifier(
self.context,
self.file_context,
Expand Down
1 change: 1 addition & 0 deletions src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ class DocMetadata:
SEMGREP_CODEMOD_NAMES = [
"enable-jinja2-autoescape",
"jwt-decode-verify",
"use-defusedxml",
]
SEMGREP_CODEMODS = {
name: DocMetadata(
Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from .secure_random import SecureRandom
from .semgrep.semgrep_enable_jinja2_autoescape import SemgrepEnableJinja2Autoescape
from .semgrep.semgrep_jwt_decode_verify import SemgrepJwtDecodeVerify
from .semgrep.semgrep_use_defused_xml import SemgrepUseDefusedXml
from .sonar.sonar_break_or_continue_out_of_loop import SonarBreakOrContinueOutOfLoop
from .sonar.sonar_disable_graphql_introspection import SonarDisableGraphQLIntrospection
from .sonar.sonar_django_json_response_type import SonarDjangoJsonResponseType
Expand Down Expand Up @@ -200,5 +201,6 @@
codemods=[
SemgrepEnableJinja2Autoescape,
SemgrepJwtDecodeVerify,
SemgrepUseDefusedXml,
],
)
55 changes: 52 additions & 3 deletions src/core_codemods/semgrep/api.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from codemodder.codemods.api import SimpleCodemod
from codemodder.codemods.base_codemod import Metadata, Reference, ToolMetadata, ToolRule
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codemods.libcst_transformer import LibcstTransformerPipeline
from codemodder.codemods.semgrep import SemgrepSarifFileDetector
from core_codemods.api.core_codemod import CoreCodemod, SASTCodemod


def semgrep_url_from_id(rule_id: str) -> str:
return f"https://semgrep.dev/r?q={rule_id}"


class SemgrepCodemod(SASTCodemod):
@property
def origin(self):
Expand All @@ -18,14 +24,18 @@ def from_core_codemod(
rule_name: str,
transformer: BaseTransformerPipeline | None = None,
):
rule_url = f"https://semgrep.dev/r?q={rule_id}"
return SemgrepCodemod(
metadata=Metadata(
name=name,
summary=other.summary,
review_guidance=other._metadata.review_guidance,
references=(
other.references + [Reference(url=rule_url, description=rule_name)]
other.references
+ [
Reference(
url=semgrep_url_from_id(rule_id), description=rule_name
)
]
),
description=other.description,
tool=ToolMetadata(
Expand All @@ -34,7 +44,7 @@ def from_core_codemod(
ToolRule(
id=rule_id,
name=rule_name,
url=rule_url,
url=semgrep_url_from_id(rule_id),
)
],
),
Expand All @@ -43,3 +53,42 @@ def from_core_codemod(
detector=SemgrepSarifFileDetector(),
requested_rules=[rule_id],
)

@classmethod
def from_import_modifier_codemod(
cls,
name: str,
other: type[SimpleCodemod],
rule_id: str,
rule_name: str,
):
metadata = other.metadata
return SemgrepCodemod(
metadata=Metadata(
name=name,
summary=metadata.summary,
review_guidance=metadata.review_guidance,
references=(
metadata.references
+ [
Reference(
url=semgrep_url_from_id(rule_id), description=rule_name
)
]
),
description=other.change_description,
tool=ToolMetadata(
name="Semgrep",
rules=[
ToolRule(
id=rule_id,
name=rule_name,
url=semgrep_url_from_id(rule_id),
)
],
),
),
transformer=LibcstTransformerPipeline(other),
detector=SemgrepSarifFileDetector(),
requested_rules=[rule_id],
)
9 changes: 9 additions & 0 deletions src/core_codemods/semgrep/semgrep_use_defused_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from core_codemods.semgrep.api import SemgrepCodemod
from core_codemods.use_defused_xml import UseDefusedXml

SemgrepUseDefusedXml = SemgrepCodemod.from_import_modifier_codemod(
name="use-defusedxml",
other=UseDefusedXml,
rule_id="python.lang.security.use-defused-xml-parse.use-defused-xml-parse",
rule_name="use-defused-xml-parse",
)
72 changes: 72 additions & 0 deletions tests/codemods/semgrep/test_semgrep_use_defused_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import json

import mock

from codemodder.codemods.test import BaseSASTCodemodTest
from codemodder.dependency import DefusedXML
from core_codemods.semgrep.semgrep_use_defused_xml import SemgrepUseDefusedXml


class TestSemgrepUseDefusedXml(BaseSASTCodemodTest):
codemod = SemgrepUseDefusedXml
tool = "semgrep"

def test_name(self):
assert self.codemod.name == "use-defusedxml"

@mock.patch("codemodder.codemods.api.FileContext.add_dependency")
def test_etree_parse(self, add_dependency, tmpdir):
original_code = """\
from xml.etree.ElementTree import parse
et = parse(user_input)
"""

new_code = """\
import defusedxml.ElementTree
et = defusedxml.ElementTree.parse(user_input)
"""

results = {
"runs": [
{
"results": [
{
"fingerprints": {"matchBasedId/v1": "123"},
"locations": [
{
"physicalLocation": {
"artifactLocation": {
"uri": "code.py",
"uriBaseId": "%SRCROOT%",
},
"region": {
"endColumn": 23,
"endLine": 3,
"snippet": {
"text": "et = parse(user_input)"
},
"startColumn": 6,
"startLine": 3,
},
}
}
],
"message": {
"text": 'The native Python `xml` library is vulnerable to XML External Entity (XXE) attacks. These attacks can leak confidential data and "XML bombs" can cause denial of service. Do not use this library to parse untrusted input. Instead the Python documentation recommends using `defusedxml`.'
},
"ruleId": "python.lang.security.use-defused-xml-parse.use-defused-xml-parse",
}
]
}
]
}

self.run_and_assert(
tmpdir,
original_code,
new_code,
results=json.dumps(results),
)
add_dependency.assert_called_once_with(DefusedXML)

0 comments on commit c5b471c

Please sign in to comment.