Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add codemod harden-pickle-load #332

Merged
merged 2 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions integration_tests/test_harden_pickle_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from codemodder.codemods.test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)
from codemodder.dependency import Fickling
from core_codemods.harden_pickle_load import HardenPickleLoad


class TestHardenPickleLoad(BaseIntegrationTest):
codemod = HardenPickleLoad
code_path = "tests/samples/harden_pickle.py"

original_code, _ = original_and_expected_from_code_path(code_path, [])
expected_new_code = """
import fickling

try:
data = fickling.load(open("some.pickle", "rb"))
except FileNotFoundError:
data = None
""".lstrip()

expected_diff = """
---
+++
@@ -1,6 +1,6 @@
-import pickle
+import fickling

try:
- data = pickle.load(open("some.pickle", "rb"))
+ data = fickling.load(open("some.pickle", "rb"))
except FileNotFoundError:
data = None
""".lstrip()

num_changed_files = 2
change_description = HardenPickleLoad.change_description
expected_line_change = 4

requirements_path = "tests/samples/requirements.txt"
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
expected_new_reqs = (
f"# file used to test dependency management\n"
"requests==2.31.0\n"
"black==23.7.*\n"
"mypy~=1.4\n"
"pylint>1\n"
f"{Fickling.requirement} \\\n"
f"{Fickling.build_hashes()}"
)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ test = [
"django>=4,<6",
"numpy~=1.26.0",
"flask_wtf~=1.2.0",
"fickling~=0.1.0",
]
complexity = [
"radon==6.0.*",
Expand Down
70 changes: 70 additions & 0 deletions src/codemodder/codemods/import_modifier_codemod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from abc import ABCMeta, abstractmethod
from typing import Mapping

import libcst as cst
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor

from codemodder.codemods.api import SimpleCodemod
from codemodder.codemods.imported_call_modifier import ImportedCallModifier
from codemodder.dependency import Dependency


class MappingImportedCallModifier(ImportedCallModifier[Mapping[str, str]]):
def update_attribute(self, true_name, original_node, updated_node, new_args):
if not self.node_is_selected(original_node):
return updated_node

import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)

def update_simple_name(self, true_name, original_node, updated_node, new_args):
if not self.node_is_selected(original_node):
return updated_node

import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)


class ImportModifierCodemod(SimpleCodemod, metaclass=ABCMeta):
@property
def dependency(self) -> Dependency | None:
return None

@property
@abstractmethod
def mapping(self) -> Mapping[str, str]:
pass

def transform_module_impl(self, tree: cst.Module) -> cst.Module:
if not self.node_is_selected(tree):
return tree

visitor = MappingImportedCallModifier(
self.context,
self.file_context,
self.mapping,
self.change_description,
self.results,
)
result_tree = visitor.transform_module(tree)
self.file_context.codemod_changes.extend(visitor.changes_in_file)
if visitor.changes_in_file and (dependency := self.dependency):
self.add_dependency(dependency)

return result_tree
7 changes: 6 additions & 1 deletion src/codemodder/codemods/imported_call_modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from libcst.metadata import PositionProvider

from codemodder.change import Change
from codemodder.codemods.base_visitor import UtilsMixin
from codemodder.codemods.utils_mixin import NameResolutionMixin
from codemodder.file_context import FileContext
from codemodder.result import Result

# It seems to me like we actually want two separate bounds instead of a Union but this is what mypy wants
FunctionMatchType = TypeVar("FunctionMatchType", bound=Union[Mapping, Set])
Expand All @@ -18,6 +20,7 @@ class ImportedCallModifier(
Generic[FunctionMatchType],
VisitorBasedCodemodCommand,
NameResolutionMixin,
UtilsMixin,
metaclass=abc.ABCMeta,
):
METADATA_DEPENDENCIES = (PositionProvider,)
Expand All @@ -28,13 +31,15 @@ def __init__(
file_context: FileContext,
matching_functions: FunctionMatchType,
change_description: str,
results: list[Result] | None = None,
):
super().__init__(codemod_context)
VisitorBasedCodemodCommand.__init__(self, codemod_context)
self.line_exclude = file_context.line_exclude
self.line_include = file_context.line_include
self.matching_functions: FunctionMatchType = matching_functions
self.change_description = change_description
self.changes_in_file: list[Change] = []
self.results = results

def updated_args(self, original_args: Sequence[cst.Arg]):
return original_args
Expand Down
15 changes: 15 additions & 0 deletions src/codemodder/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,21 @@ def __hash__(self):
package_link="https://pypi.org/project/security/",
)

Fickling = Dependency(
Requirement("fickling~=0.1.0"),
hashes=[
"a5bb5982e2c0e86d41fceaf9576929f0e7bfeef53998248f69c885224cf45739",
"1d74a9ef84e56ecd3114563907166bfa65e17e3a00190157c1514fff08e086b4",
],
description="""This package provides analysis of pickled data to help identify potential security vulnerabilities.""",
_license=License(
"LGPL-3.0",
"https://opensource.org/license/LGPL-3.0/",
),
oss_link="https://github.com/trailofbits/fickling",
package_link="https://pypi.org/project/fickling/",
)


DEPENDENCY_NOTIFICATION = """
## Dependency Updates
Expand Down
4 changes: 4 additions & 0 deletions src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ class DocMetadata:
importance="Low",
guidance_explained="We believe this change is safe because using `callable` is a more reliable way to check if an object is a callable.",
),
"harden-pickle-load": DocMetadata(
importance="High",
guidance_explained="This change may impact performance in some cases, but it is recommended when handling untrusted data.",
),
}

METADATA = CORE_METADATA | {
Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .fix_mutable_params import FixMutableParams
from .flask_enable_csrf_protection import FlaskEnableCSRFProtection
from .flask_json_response_type import FlaskJsonResponseType
from .harden_pickle_load import HardenPickleLoad
from .harden_pyyaml import HardenPyyaml
from .harden_ruamel import HardenRuamel
from .https_connection import HTTPSConnection
Expand Down Expand Up @@ -75,6 +76,7 @@
EnableJinja2Autoescape,
FixDeprecatedAbstractproperty,
FixMutableParams,
HardenPickleLoad,
HardenPyyaml,
HardenRuamel,
HTTPSConnection,
Expand Down
2 changes: 1 addition & 1 deletion src/core_codemods/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# ruff: noqa: F401
from codemodder.codemods.api import Metadata, Reference, ReviewGuidance

from .core_codemod import CoreCodemod, SimpleCodemod
from .core_codemod import CoreCodemod, ImportModifierCodemod, SimpleCodemod
7 changes: 7 additions & 0 deletions src/core_codemods/api/core_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from codemodder.codemods.base_codemod import Metadata
from codemodder.codemods.base_detector import BaseDetector
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codemods.import_modifier_codemod import (
ImportModifierCodemod as _ImportModifierCodemod,
)
from codemodder.context import CodemodExecutionContext


Expand Down Expand Up @@ -52,3 +55,7 @@ class SimpleCodemod(_SimpleCodemod):
"""

codemod_base = CoreCodemod


class ImportModifierCodemod(SimpleCodemod, _ImportModifierCodemod):
pass
14 changes: 14 additions & 0 deletions src/core_codemods/docs/pixee_python_harden-pickle-load.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Python's `pickle` module is notoriouly insecure. While it is very useful for serializing and deserializing Python objects, it is not safe to use `pickle` to load data from untrusted sources. This is because `pickle` can execute arbitrary code when loading data. This can be exploited by an attacker to execute arbitrary code on your system. Unlike `yaml` there is no concept of a "safe" loader in `pickle`. Therefore, it is recommended to avoid `pickle` and to use a different serialization format such as `json` or `yaml` when working with untrusted data.

However, if you must use `pickle` to load data from an untrusted source, we recommend using the open-source `fickling` library. `fickling` is a drop-in replacement for `pickle` that validates the data before loading it and checks for the possibility of code execution. This makes it much safer (although still not entirely safe) to use `pickle` to load data from untrusted sources.

This codemod replaces calls to `pickle.load` with `fickling.load` in Python code. It also adds an import statement for `fickling` if it is not already present.

The changes look like the following:
```diff
- import pickle
+ import fickling

- data = pickle.load(file)
+ data = fickling.load(file)
```
37 changes: 37 additions & 0 deletions src/core_codemods/harden_pickle_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Mapping

from codemodder.dependency import Dependency, Fickling
from core_codemods.api import ImportModifierCodemod, Metadata, Reference, ReviewGuidance


class HardenPickleLoad(ImportModifierCodemod):
metadata = Metadata(
name="harden-pickle-load",
summary="Harden `pickle.load()` against deserialization attacks",
review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
references=[
Reference(url="https://docs.python.org/3/library/pickle.html"),
Reference(
url="https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data"
),
Reference(
url="https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html#clear-box-review_1"
),
Reference(
url="https://github.com/trailofbits/fickling",
),
],
)

change_description = "Harden `pickle.load()` against deserialization attacks"

@property
def dependency(self) -> Dependency:
return Fickling

@property
def mapping(self) -> Mapping[str, str]:
# NOTE: the fickling api doesn't seem to support `loads` yet
return {
"pickle.load": "fickling",
}
59 changes: 7 additions & 52 deletions src/core_codemods/use_defused_xml.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,15 @@
from functools import cached_property
from typing import Mapping

import libcst as cst
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor

from codemodder.codemods.imported_call_modifier import ImportedCallModifier
from codemodder.dependency import DefusedXML
from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod


class DefusedXmlModifier(ImportedCallModifier[Mapping[str, str]]):
def update_attribute(self, true_name, original_node, updated_node, new_args):
import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)

def update_simple_name(self, true_name, original_node, updated_node, new_args):
import_name = self.matching_functions[true_name]
AddImportsVisitor.add_needed_import(self.context, import_name)
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)
return updated_node.with_changes(
args=new_args,
func=cst.Attribute(
value=cst.parse_expression(import_name),
attr=cst.Name(value=true_name.split(".")[-1]),
),
)

from codemodder.dependency import DefusedXML, Dependency
from core_codemods.api import ImportModifierCodemod, Metadata, Reference, ReviewGuidance

ETREE_METHODS = ["parse", "fromstring", "iterparse", "XMLParser"]
SAX_METHODS = ["parse", "make_parser", "parseString"]
DOM_METHODS = ["parse", "parseString"]
# TODO: add expat methods?


class UseDefusedXml(SimpleCodemod):
class UseDefusedXml(ImportModifierCodemod):
metadata = Metadata(
name="use-defusedxml",
summary="Use `defusedxml` for Parsing XML",
Expand All @@ -63,7 +31,7 @@ class UseDefusedXml(SimpleCodemod):
change_description = "Replace builtin XML method with safe `defusedxml` method"

@cached_property
def matching_functions(self) -> dict[str, str]:
def mapping(self) -> dict[str, str]:
"""Build a mapping of functions to their defusedxml imports"""
_matching_functions: dict[str, str] = {}
for module, defusedxml, methods in [
Expand All @@ -78,19 +46,6 @@ def matching_functions(self) -> dict[str, str]:
)
return _matching_functions

def transform_module_impl(self, tree: cst.Module) -> cst.Module:
if not self.node_is_selected(tree):
return tree

visitor = DefusedXmlModifier(
self.context,
self.file_context,
self.matching_functions,
self.change_description,
)
result_tree = visitor.transform_module(tree)
self.file_context.codemod_changes.extend(visitor.changes_in_file)
if visitor.changes_in_file:
self.add_dependency(DefusedXML)

return result_tree
@property
def dependency(self) -> Dependency:
return DefusedXML
Loading
Loading