-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add codemod
harden-pickle-load
(#332)
* Implement `harden-pickle-load` codemod * Refactor to share code for import modifier codemods
- Loading branch information
Showing
14 changed files
with
315 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from codemodder.codemods.test import ( | ||
BaseIntegrationTest, | ||
original_and_expected_from_code_path, | ||
) | ||
from codemodder.dependency import Fickling | ||
from core_codemods.harden_pickle_load import HardenPickleLoad | ||
|
||
|
||
class TestHardenPickleLoad(BaseIntegrationTest): | ||
codemod = HardenPickleLoad | ||
code_path = "tests/samples/harden_pickle.py" | ||
|
||
original_code, _ = original_and_expected_from_code_path(code_path, []) | ||
expected_new_code = """ | ||
import fickling | ||
try: | ||
data = fickling.load(open("some.pickle", "rb")) | ||
except FileNotFoundError: | ||
data = None | ||
""".lstrip() | ||
|
||
expected_diff = """ | ||
--- | ||
+++ | ||
@@ -1,6 +1,6 @@ | ||
-import pickle | ||
+import fickling | ||
try: | ||
- data = pickle.load(open("some.pickle", "rb")) | ||
+ data = fickling.load(open("some.pickle", "rb")) | ||
except FileNotFoundError: | ||
data = None | ||
""".lstrip() | ||
|
||
num_changed_files = 2 | ||
change_description = HardenPickleLoad.change_description | ||
expected_line_change = 4 | ||
|
||
requirements_path = "tests/samples/requirements.txt" | ||
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n" | ||
expected_new_reqs = ( | ||
f"# file used to test dependency management\n" | ||
"requests==2.31.0\n" | ||
"black==23.7.*\n" | ||
"mypy~=1.4\n" | ||
"pylint>1\n" | ||
f"{Fickling.requirement} \\\n" | ||
f"{Fickling.build_hashes()}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
from abc import ABCMeta, abstractmethod | ||
from typing import Mapping | ||
|
||
import libcst as cst | ||
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor | ||
|
||
from codemodder.codemods.api import SimpleCodemod | ||
from codemodder.codemods.imported_call_modifier import ImportedCallModifier | ||
from codemodder.dependency import Dependency | ||
|
||
|
||
class MappingImportedCallModifier(ImportedCallModifier[Mapping[str, str]]): | ||
def update_attribute(self, true_name, original_node, updated_node, new_args): | ||
if not self.node_is_selected(original_node): | ||
return updated_node | ||
|
||
import_name = self.matching_functions[true_name] | ||
AddImportsVisitor.add_needed_import(self.context, import_name) | ||
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node) | ||
return updated_node.with_changes( | ||
args=new_args, | ||
func=cst.Attribute( | ||
value=cst.parse_expression(import_name), | ||
attr=cst.Name(value=true_name.split(".")[-1]), | ||
), | ||
) | ||
|
||
def update_simple_name(self, true_name, original_node, updated_node, new_args): | ||
if not self.node_is_selected(original_node): | ||
return updated_node | ||
|
||
import_name = self.matching_functions[true_name] | ||
AddImportsVisitor.add_needed_import(self.context, import_name) | ||
RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node) | ||
return updated_node.with_changes( | ||
args=new_args, | ||
func=cst.Attribute( | ||
value=cst.parse_expression(import_name), | ||
attr=cst.Name(value=true_name.split(".")[-1]), | ||
), | ||
) | ||
|
||
|
||
class ImportModifierCodemod(SimpleCodemod, metaclass=ABCMeta): | ||
@property | ||
def dependency(self) -> Dependency | None: | ||
return None | ||
|
||
@property | ||
@abstractmethod | ||
def mapping(self) -> Mapping[str, str]: | ||
pass | ||
|
||
def transform_module_impl(self, tree: cst.Module) -> cst.Module: | ||
if not self.node_is_selected(tree): | ||
return tree | ||
|
||
visitor = MappingImportedCallModifier( | ||
self.context, | ||
self.file_context, | ||
self.mapping, | ||
self.change_description, | ||
self.results, | ||
) | ||
result_tree = visitor.transform_module(tree) | ||
self.file_context.codemod_changes.extend(visitor.changes_in_file) | ||
if visitor.changes_in_file and (dependency := self.dependency): | ||
self.add_dependency(dependency) | ||
|
||
return result_tree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# ruff: noqa: F401 | ||
from codemodder.codemods.api import Metadata, Reference, ReviewGuidance | ||
|
||
from .core_codemod import CoreCodemod, SimpleCodemod | ||
from .core_codemod import CoreCodemod, ImportModifierCodemod, SimpleCodemod |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Python's `pickle` module is notoriouly insecure. While it is very useful for serializing and deserializing Python objects, it is not safe to use `pickle` to load data from untrusted sources. This is because `pickle` can execute arbitrary code when loading data. This can be exploited by an attacker to execute arbitrary code on your system. Unlike `yaml` there is no concept of a "safe" loader in `pickle`. Therefore, it is recommended to avoid `pickle` and to use a different serialization format such as `json` or `yaml` when working with untrusted data. | ||
|
||
However, if you must use `pickle` to load data from an untrusted source, we recommend using the open-source `fickling` library. `fickling` is a drop-in replacement for `pickle` that validates the data before loading it and checks for the possibility of code execution. This makes it much safer (although still not entirely safe) to use `pickle` to load data from untrusted sources. | ||
|
||
This codemod replaces calls to `pickle.load` with `fickling.load` in Python code. It also adds an import statement for `fickling` if it is not already present. | ||
|
||
The changes look like the following: | ||
```diff | ||
- import pickle | ||
+ import fickling | ||
|
||
- data = pickle.load(file) | ||
+ data = fickling.load(file) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from typing import Mapping | ||
|
||
from codemodder.dependency import Dependency, Fickling | ||
from core_codemods.api import ImportModifierCodemod, Metadata, Reference, ReviewGuidance | ||
|
||
|
||
class HardenPickleLoad(ImportModifierCodemod): | ||
metadata = Metadata( | ||
name="harden-pickle-load", | ||
summary="Harden `pickle.load()` against deserialization attacks", | ||
review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, | ||
references=[ | ||
Reference(url="https://docs.python.org/3/library/pickle.html"), | ||
Reference( | ||
url="https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data" | ||
), | ||
Reference( | ||
url="https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html#clear-box-review_1" | ||
), | ||
Reference( | ||
url="https://github.com/trailofbits/fickling", | ||
), | ||
], | ||
) | ||
|
||
change_description = "Harden `pickle.load()` against deserialization attacks" | ||
|
||
@property | ||
def dependency(self) -> Dependency: | ||
return Fickling | ||
|
||
@property | ||
def mapping(self) -> Mapping[str, str]: | ||
# NOTE: the fickling api doesn't seem to support `loads` yet | ||
return { | ||
"pickle.load": "fickling", | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.