Skip to content

Commit

Permalink
Add codemod harden-pickle-load (#332)
Browse files Browse the repository at this point in the history
* Implement `harden-pickle-load` codemod

* Refactor to share code for import modifier codemods
  • Loading branch information
drdavella authored Mar 6, 2024
1 parent aa607c9 commit 018ce52
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 54 deletions.
51 changes: 51 additions & 0 deletions integration_tests/test_harden_pickle_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from codemodder.codemods.test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)
from codemodder.dependency import Fickling
from core_codemods.harden_pickle_load import HardenPickleLoad


class TestHardenPickleLoad(BaseIntegrationTest):
codemod = HardenPickleLoad
code_path = "tests/samples/harden_pickle.py"

original_code, _ = original_and_expected_from_code_path(code_path, [])
expected_new_code = """
import fickling
try:
data = fickling.load(open("some.pickle", "rb"))
except FileNotFoundError:
data = None
""".lstrip()

expected_diff = """
---
+++
@@ -1,6 +1,6 @@
-import pickle
+import fickling
try:
- data = pickle.load(open("some.pickle", "rb"))
+ data = fickling.load(open("some.pickle", "rb"))
except FileNotFoundError:
data = None
""".lstrip()

num_changed_files = 2
change_description = HardenPickleLoad.change_description
expected_line_change = 4

requirements_path = "tests/samples/requirements.txt"
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
expected_new_reqs = (
f"# file used to test dependency management\n"
"requests==2.31.0\n"
"black==23.7.*\n"
"mypy~=1.4\n"
"pylint>1\n"
f"{Fickling.requirement} \\\n"
f"{Fickling.build_hashes()}"
)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ test = [
"django>=4,<6",
"numpy~=1.26.0",
"flask_wtf~=1.2.0",
"fickling~=0.1.0",
]
complexity = [
"radon==6.0.*",
Expand Down
70 changes: 70 additions & 0 deletions src/codemodder/codemods/import_modifier_codemod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from abc import ABCMeta, abstractmethod
from typing import Mapping

import libcst as cst
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor

from codemodder.codemods.api import SimpleCodemod
from codemodder.codemods.imported_call_modifier import ImportedCallModifier
from codemodder.dependency import Dependency


class MappingImportedCallModifier(ImportedCallModifier[Mapping[str, str]]):
def update_attribute(self, true_name, original_node, updated_node, new_args):
if not self.node_is_selected(original_node):
return updated_node

import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)

def update_simple_name(self, true_name, original_node, updated_node, new_args):
if not self.node_is_selected(original_node):
return updated_node

import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)


class ImportModifierCodemod(SimpleCodemod, metaclass=ABCMeta):
@property
def dependency(self) -> Dependency | None:
return None

@property
@abstractmethod
def mapping(self) -> Mapping[str, str]:
pass

def transform_module_impl(self, tree: cst.Module) -> cst.Module:
if not self.node_is_selected(tree):
return tree

visitor = MappingImportedCallModifier(
self.context,
self.file_context,
self.mapping,
self.change_description,
self.results,
)
result_tree = visitor.transform_module(tree)
self.file_context.codemod_changes.extend(visitor.changes_in_file)
if visitor.changes_in_file and (dependency := self.dependency):
self.add_dependency(dependency)

return result_tree
7 changes: 6 additions & 1 deletion src/codemodder/codemods/imported_call_modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from libcst.metadata import PositionProvider

from codemodder.change import Change
from codemodder.codemods.base_visitor import UtilsMixin
from codemodder.codemods.utils_mixin import NameResolutionMixin
from codemodder.file_context import FileContext
from codemodder.result import Result

# It seems to me like we actually want two separate bounds instead of a Union but this is what mypy wants
FunctionMatchType = TypeVar("FunctionMatchType", bound=Union[Mapping, Set])
Expand All @@ -18,6 +20,7 @@ class ImportedCallModifier(
Generic[FunctionMatchType],
VisitorBasedCodemodCommand,
NameResolutionMixin,
UtilsMixin,
metaclass=abc.ABCMeta,
):
METADATA_DEPENDENCIES = (PositionProvider,)
Expand All @@ -28,13 +31,15 @@ def __init__(
file_context: FileContext,
matching_functions: FunctionMatchType,
change_description: str,
results: list[Result] | None = None,
):
super().__init__(codemod_context)
VisitorBasedCodemodCommand.__init__(self, codemod_context)
self.line_exclude = file_context.line_exclude
self.line_include = file_context.line_include
self.matching_functions: FunctionMatchType = matching_functions
self.change_description = change_description
self.changes_in_file: list[Change] = []
self.results = results

def updated_args(self, original_args: Sequence[cst.Arg]):
return original_args
Expand Down
15 changes: 15 additions & 0 deletions src/codemodder/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,21 @@ def __hash__(self):
package_link="https://pypi.org/project/security/",
)

Fickling = Dependency(
Requirement("fickling~=0.1.0"),
hashes=[
"a5bb5982e2c0e86d41fceaf9576929f0e7bfeef53998248f69c885224cf45739",
"1d74a9ef84e56ecd3114563907166bfa65e17e3a00190157c1514fff08e086b4",
],
description="""This package provides analysis of pickled data to help identify potential security vulnerabilities.""",
_license=License(
"LGPL-3.0",
"https://opensource.org/license/LGPL-3.0/",
),
oss_link="https://github.com/trailofbits/fickling",
package_link="https://pypi.org/project/fickling/",
)


DEPENDENCY_NOTIFICATION = """
## Dependency Updates
Expand Down
4 changes: 4 additions & 0 deletions src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ class DocMetadata:
importance="Low",
guidance_explained="We believe this change is safe because using `callable` is a more reliable way to check if an object is a callable.",
),
"harden-pickle-load": DocMetadata(
importance="High",
guidance_explained="This change may impact performance in some cases, but it is recommended when handling untrusted data.",
),
}

METADATA = CORE_METADATA | {
Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .fix_mutable_params import FixMutableParams
from .flask_enable_csrf_protection import FlaskEnableCSRFProtection
from .flask_json_response_type import FlaskJsonResponseType
from .harden_pickle_load import HardenPickleLoad
from .harden_pyyaml import HardenPyyaml
from .harden_ruamel import HardenRuamel
from .https_connection import HTTPSConnection
Expand Down Expand Up @@ -75,6 +76,7 @@
EnableJinja2Autoescape,
FixDeprecatedAbstractproperty,
FixMutableParams,
HardenPickleLoad,
HardenPyyaml,
HardenRuamel,
HTTPSConnection,
Expand Down
2 changes: 1 addition & 1 deletion src/core_codemods/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# ruff: noqa: F401
from codemodder.codemods.api import Metadata, Reference, ReviewGuidance

from .core_codemod import CoreCodemod, SimpleCodemod
from .core_codemod import CoreCodemod, ImportModifierCodemod, SimpleCodemod
7 changes: 7 additions & 0 deletions src/core_codemods/api/core_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from codemodder.codemods.base_codemod import Metadata
from codemodder.codemods.base_detector import BaseDetector
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codemods.import_modifier_codemod import (
ImportModifierCodemod as _ImportModifierCodemod,
)
from codemodder.context import CodemodExecutionContext


Expand Down Expand Up @@ -52,3 +55,7 @@ class SimpleCodemod(_SimpleCodemod):
"""

codemod_base = CoreCodemod


class ImportModifierCodemod(SimpleCodemod, _ImportModifierCodemod):
pass
14 changes: 14 additions & 0 deletions src/core_codemods/docs/pixee_python_harden-pickle-load.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Python's `pickle` module is notoriouly insecure. While it is very useful for serializing and deserializing Python objects, it is not safe to use `pickle` to load data from untrusted sources. This is because `pickle` can execute arbitrary code when loading data. This can be exploited by an attacker to execute arbitrary code on your system. Unlike `yaml` there is no concept of a "safe" loader in `pickle`. Therefore, it is recommended to avoid `pickle` and to use a different serialization format such as `json` or `yaml` when working with untrusted data.

However, if you must use `pickle` to load data from an untrusted source, we recommend using the open-source `fickling` library. `fickling` is a drop-in replacement for `pickle` that validates the data before loading it and checks for the possibility of code execution. This makes it much safer (although still not entirely safe) to use `pickle` to load data from untrusted sources.

This codemod replaces calls to `pickle.load` with `fickling.load` in Python code. It also adds an import statement for `fickling` if it is not already present.

The changes look like the following:
```diff
- import pickle
+ import fickling

- data = pickle.load(file)
+ data = fickling.load(file)
```
37 changes: 37 additions & 0 deletions src/core_codemods/harden_pickle_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Mapping

from codemodder.dependency import Dependency, Fickling
from core_codemods.api import ImportModifierCodemod, Metadata, Reference, ReviewGuidance


class HardenPickleLoad(ImportModifierCodemod):
metadata = Metadata(
name="harden-pickle-load",
summary="Harden `pickle.load()` against deserialization attacks",
review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
references=[
Reference(url="https://docs.python.org/3/library/pickle.html"),
Reference(
url="https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data"
),
Reference(
url="https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html#clear-box-review_1"
),
Reference(
url="https://github.com/trailofbits/fickling",
),
],
)

change_description = "Harden `pickle.load()` against deserialization attacks"

@property
def dependency(self) -> Dependency:
return Fickling

@property
def mapping(self) -> Mapping[str, str]:
# NOTE: the fickling api doesn't seem to support `loads` yet
return {
"pickle.load": "fickling",
}
59 changes: 7 additions & 52 deletions src/core_codemods/use_defused_xml.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,15 @@
from functools import cached_property
from typing import Mapping

import libcst as cst
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor

from codemodder.codemods.imported_call_modifier import ImportedCallModifier
from codemodder.dependency import DefusedXML
from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod


class DefusedXmlModifier(ImportedCallModifier[Mapping[str, str]]):
def update_attribute(self, true_name, original_node, updated_node, new_args):
import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)

def update_simple_name(self, true_name, original_node, updated_node, new_args):
import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)

from codemodder.dependency import DefusedXML, Dependency
from core_codemods.api import ImportModifierCodemod, Metadata, Reference, ReviewGuidance

ETREE_METHODS = ["parse", "fromstring", "iterparse", "XMLParser"]
SAX_METHODS = ["parse", "make_parser", "parseString"]
DOM_METHODS = ["parse", "parseString"]
# TODO: add expat methods?


class UseDefusedXml(SimpleCodemod):
class UseDefusedXml(ImportModifierCodemod):
metadata = Metadata(
name="use-defusedxml",
summary="Use `defusedxml` for Parsing XML",
Expand All @@ -63,7 +31,7 @@ class UseDefusedXml(SimpleCodemod):
change_description = "Replace builtin XML method with safe `defusedxml` method"

@cached_property
def matching_functions(self) -> dict[str, str]:
def mapping(self) -> dict[str, str]:
"""Build a mapping of functions to their defusedxml imports"""
_matching_functions: dict[str, str] = {}
for module, defusedxml, methods in [
Expand All @@ -78,19 +46,6 @@ def matching_functions(self) -> dict[str, str]:
)
return _matching_functions

def transform_module_impl(self, tree: cst.Module) -> cst.Module:
if not self.node_is_selected(tree):
return tree

visitor = DefusedXmlModifier(
self.context,
self.file_context,
self.matching_functions,
self.change_description,
)
result_tree = visitor.transform_module(tree)
self.file_context.codemod_changes.extend(visitor.changes_in_file)
if visitor.changes_in_file:
self.add_dependency(DefusedXML)

return result_tree
@property
def dependency(self) -> Dependency:
return DefusedXML
Loading

0 comments on commit 018ce52

Please sign in to comment.