Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable use-defusedxml codemod #95

Merged
merged 4 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[run]
source = codemodder
omit =
*/codemodder/scripts/*

[paths]
codemodder =
Expand Down
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ repos:
- id: trailing-whitespace
exclude: |
(?x)^(
src/core_codemods/docs/.*
src/core_codemods/docs/.*|
integration_tests/.*
)$
- id: check-added-large-files
- repo: https://github.com/psf/black
Expand Down
1 change: 1 addition & 0 deletions ci_tests/test_pygoat_findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"pixee:python/harden-pyyaml",
"pixee:python/django-debug-flag-on",
"pixee:python/url-sandbox",
"pixee:python/use-defusedxml",
"pixee:python/use-walrus-if",
]

Expand Down
2 changes: 1 addition & 1 deletion integration_tests/test_process_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ class TestProcessSandbox(BaseIntegrationTest):

requirements_path = "tests/samples/requirements.txt"
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
expected_new_reqs = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity~=1.2.0"
expected_new_reqs = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity~=1.2.0\n"
2 changes: 1 addition & 1 deletion integration_tests/test_url_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ class TestUrlSandbox(BaseIntegrationTest):

requirements_path = "tests/samples/requirements.txt"
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
expected_new_reqs = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity~=1.2.0"
expected_new_reqs = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity~=1.2.0\n"
42 changes: 42 additions & 0 deletions integration_tests/test_use_defusedxml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from core_codemods.use_defused_xml import UseDefusedXml
from integration_tests.base_test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)


class TestUseDefusedXml(BaseIntegrationTest):
codemod = UseDefusedXml
code_path = "tests/samples/use_defusedxml.py"

original_code, _ = original_and_expected_from_code_path(code_path, [])
expected_new_code = """\
from io import StringIO
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can now indent?

from xml.etree import ElementInclude # pylint: disable=unused-import
import defusedxml.ElementTree

xml = StringIO("<root>Hello XML</root>")
et = defusedxml.ElementTree.parse(xml)
"""

num_changed_files = 2
expected_diff = """\
---
+++
@@ -1,5 +1,6 @@
from io import StringIO
-from xml.etree import ElementTree, ElementInclude # pylint: disable=unused-import
+from xml.etree import ElementInclude # pylint: disable=unused-import
+import defusedxml.ElementTree

xml = StringIO("<root>Hello XML</root>")
-et = ElementTree.parse(xml)
+et = defusedxml.ElementTree.parse(xml)
"""

expected_line_change = "5"
change_description = UseDefusedXml.CHANGE_DESCRIPTION

requirements_path = "tests/samples/requirements.txt"
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
expected_new_reqs = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\ndefusedxml~=0.7.1\n"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [

[project.scripts]
codemodder = "codemodder.codemodder:main"
generate-docs = 'scripts.generate_docs:main'
generate-docs = 'codemodder.scripts.generate_docs:main'


[project.entry-points.codemods]
Expand Down
14 changes: 8 additions & 6 deletions src/codemodder/dependency_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def new_requirements(self) -> list[str]:
def add(self, dependencies: list[Dependency]):
"""add any number of dependencies to the end of list of dependencies."""
for dep in dependencies:
if dep not in self.dependencies:
self.dependencies.update({dep.requirement: None})
if dep.requirement.name not in self.dependencies:
self.dependencies.update({dep.requirement.name: dep.requirement})
self._new_requirements.append(dep)

def write(self, dry_run: bool = False) -> Optional[ChangeSet]:
Expand Down Expand Up @@ -58,7 +58,7 @@ def write(self, dry_run: bool = False) -> Optional[ChangeSet]:
if not dry_run:
with open(self.dependency_file, "w", encoding="utf-8") as f:
f.writelines(self._lines)
f.writelines(self.new_requirements)
f.writelines([f"{line}\n" for line in self.new_requirements])

self.dependency_file_changed = True
return ChangeSet(str(self.dependency_file), diff, changes=changes)
Expand All @@ -77,7 +77,7 @@ def dependency_file(self) -> Optional[Path]:
return None

@cached_property
def dependencies(self) -> dict[Requirement, None]:
def dependencies(self) -> dict[str, Requirement]:
"""
Extract list of dependencies from requirements.txt file.
Same order of requirements is maintained, no alphabetical sorting is done.
Expand All @@ -89,8 +89,10 @@ def dependencies(self) -> dict[Requirement, None]:
self._lines = f.readlines()

return {
Requirement(line): None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the intention here was order preservation? not sure but change LGTM

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that makes sense. We actually don't need that anymore since we preserve the original lines, but we still want to check for the presence of existing dependencies, and we want to do it on the basis of name only (and not version).

requirement.name: requirement
for x in self._lines
# Skip empty lines and comments
if (line := x.strip()) and not line.startswith("#")
if (line := x.strip())
and not line.startswith("#")
and (requirement := Requirement(line))
}
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ class DocMetadata:
+resp = safe_requests.get.get(url, allowed_protocols=("ftp",))
```""",
),
"use-defusedxml": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe and effective and guards against serious XML vulnerabilities. You should review this code before merging to make sure the dependency has been properly added to your project.",
),
"use-walrus-if": DocMetadata(
importance="Low",
guidance_explained="We believe that using the walrus operator is an improvement in terms of clarity and readability. However, this change is only compatible with codebases that support Python 3.8 and later, so it requires quick validation before merging.",
Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .upgrade_sslcontext_minimum_version import UpgradeSSLContextMinimumVersion
from .upgrade_sslcontext_tls import UpgradeSSLContextTLS
from .url_sandbox import UrlSandbox
from .use_defused_xml import UseDefusedXml
from .use_walrus_if import UseWalrusIf
from .with_threading_lock import WithThreadingLock

Expand Down Expand Up @@ -51,6 +52,7 @@
UpgradeSSLContextMinimumVersion,
UpgradeSSLContextTLS,
UrlSandbox,
UseDefusedXml,
UseWalrusIf,
WithThreadingLock,
SQLQueryParameterization,
Expand Down
24 changes: 24 additions & 0 deletions src/core_codemods/docs/pixee_python_use-defusedxml.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
You might be surprised to learn that Python's standard library XML libraries are
[considered insecure](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
against various kinds of attacks.

In fact, the [Python documentation
itself](https://docs.python.org/3/library/xml.html#the-defusedxml-package)
recommends the use of [defusedxml](https://pypi.org/project/defusedxml/) for
parsing untrusted XML data. `defusedxml` is an
[open-source](https://github.com/tiran/defusedxml), permissively licensed
project that is intended as a drop-in replacement for Python's standard library
XML parsers.

This codemod updates all relevant uses of the standard library parsers with
safe versions from `defusedxml`. It also adds the `defusedxml` dependency to
your project where possible.

The changes from this codemod look like this:
```diff
- from xml.etree.ElementTree import parse
+ import defusedxml.ElementTree

- et = parse('data.xml')
+ et = defusedxml.ElementTree.parse('data.xml')
```
5 changes: 5 additions & 0 deletions tests/samples/use_defusedxml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from io import StringIO
from xml.etree import ElementTree, ElementInclude # pylint: disable=unused-import

xml = StringIO("<root>Hello XML</root>")
et = ElementTree.parse(xml)
2 changes: 2 additions & 0 deletions tests/test_codemod_docs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from codemodder.registry import load_registered_codemods
from codemodder.scripts.generate_docs import METADATA


def pytest_generate_tests(metafunc):
Expand All @@ -18,3 +19,4 @@ def test_load_codemod_docs_info(codemod):
"Merge After Cursory Review",
"Merge Without Review",
)
assert codemod.name in METADATA
45 changes: 42 additions & 3 deletions tests/test_dependency_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from codemodder.dependency import DefusedXML
from codemodder.dependency import DefusedXML, Security
from codemodder.dependency_manager import DependencyManager, Requirement


Expand All @@ -19,7 +19,7 @@ def test_read_dependency_file(self, tmpdir):
dependency_file.write_text("requests\n", encoding="utf-8")

dm = DependencyManager(Path(tmpdir))
assert dm.dependencies == {Requirement("requests"): None}
assert dm.dependencies == {"requests": Requirement("requests")}

@pytest.mark.parametrize("dry_run", [True, False])
def test_add_dependency_preserve_comments(self, tmpdir, dry_run):
Expand All @@ -32,7 +32,7 @@ def test_add_dependency_preserve_comments(self, tmpdir, dry_run):
changeset = dm.write(dry_run=dry_run)

assert dependency_file.read_text(encoding="utf-8") == (
contents if dry_run else "# comment\n\nrequests\ndefusedxml~=0.7.1"
contents if dry_run else "# comment\n\nrequests\ndefusedxml~=0.7.1\n"
)

assert changeset is not None
Expand All @@ -50,3 +50,42 @@ def test_add_dependency_preserve_comments(self, tmpdir, dry_run):
assert changeset.changes[0].lineNumber == 4
assert changeset.changes[0].description == DefusedXML.build_description()
assert changeset.changes[0].properties == {"contextual_description": True}

def test_add_multiple_dependencies(self, tmpdir):
dependency_file = Path(tmpdir) / "requirements.txt"
dependency_file.write_text("requests\n", encoding="utf-8")

for dep in [DefusedXML, Security]:
dm = DependencyManager(Path(tmpdir))
dm.add([dep])
dm.write()

assert dependency_file.read_text(encoding="utf-8") == (
"requests\ndefusedxml~=0.7.1\nsecurity~=1.2.0\n"
)

def test_add_same_dependency_only_once(self, tmpdir):
dependency_file = Path(tmpdir) / "requirements.txt"
dependency_file.write_text("requests\n", encoding="utf-8")

for dep in [Security, Security]:
dm = DependencyManager(Path(tmpdir))
dm.add([dep])
dm.write()

assert dependency_file.read_text(encoding="utf-8") == (
"requests\nsecurity~=1.2.0\n"
)

@pytest.mark.parametrize("version", ["1.2.0", "1.0.1"])
def test_dont_add_existing_dependency(self, version, tmpdir):
dependency_file = Path(tmpdir) / "requirements.txt"
dependency_file.write_text(f"requests\nsecurity~={version}\n", encoding="utf-8")

dm = DependencyManager(Path(tmpdir))
dm.add([Security])
dm.write()

assert dependency_file.read_text(encoding="utf-8") == (
f"requests\nsecurity~={version}\n"
)