Skip to content

Commit

Permalink
Fix harden-pyyaml to handle bad default
Browse files Browse the repository at this point in the history
  • Loading branch information
drdavella committed Feb 29, 2024
1 parent 2d1d4e3 commit 38240d2
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 8 deletions.
10 changes: 5 additions & 5 deletions src/core_codemods/docs/pixee_python_harden-pyyaml.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
This codemod hardens all [`yaml.load()`](https://pyyaml.org/wiki/PyYAMLDocumentation) calls against attacks that could result from deserializing untrusted data.
The default loaders in PyYAML are not safe to use with untrusted data. They potentially make your application vulnerable to arbitrary code execution attacks. If you open a YAML file from an untrusted source, and the file is loaded with the default loader, an attacker could execute arbitrary code on your machine.

The fix uses a safety check that already exists in the `yaml` module, replacing unsafe loader class with `SafeLoader`.
The changes from this codemod look like this:
This codemod hardens all [`yaml.load()`](https://pyyaml.org/wiki/PyYAMLDocumentation) calls against such attacks by replacing the default loader with `yaml.SafeLoader`. This is the recommended loader for loading untrusted data. For most use cases it functions as a drop-in replacement for the default loader.

Calling `yaml.load()` without an explicit loader argument is equivalent to calling it with `Loader=yaml.Loader`, which is unsafe. This usage [has been deprecated](https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input\)-Deprecation) since PyYAML 5.1. This codemod will add an explicit `SafeLoader` argument to all `yaml.load()` calls that don't use an explicit loader.

The changes from this codemod look like the following:
```diff
import yaml
data = b'!!python/object/apply:subprocess.Popen \\n- ls'
- deserialized_data = yaml.load(data, yaml.Loader)
+ deserialized_data = yaml.load(data, Loader=yaml.SafeLoader)
```
The codemod will also catch if you pass in the loader argument as a kwarg and if you use any loader other than `SafeLoader`,
including `FullLoader` and `UnsafeLoader`.
30 changes: 27 additions & 3 deletions src/core_codemods/harden_pyyaml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Union
from typing import Union, cast
import libcst as cst
from codemodder.codemods.utils_mixin import NameResolutionMixin
from core_codemods.api import (
Expand All @@ -18,6 +18,9 @@ class HardenPyyaml(SimpleCodemod, NameResolutionMixin):
Reference(
url="https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data"
),
Reference(
url="https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation"
),
],
)
change_description = "Replace unsafe `pyyaml` loader with `SafeLoader` in calls to `yaml.load` or custom loader classes."
Expand All @@ -26,6 +29,12 @@ class HardenPyyaml(SimpleCodemod, NameResolutionMixin):
detector_pattern = """
rules:
- pattern-either:
- patterns:
- pattern: yaml.load(...)
- pattern-inside: |
import yaml
...
yaml.load($ARG)
- patterns:
- pattern: yaml.load(...)
- pattern-inside: |
Expand Down Expand Up @@ -81,10 +90,25 @@ def on_result_found(
)
if (maybe_name := maybe_name or self._module_name) == self._module_name:
self.add_needed_import(self._module_name)
updated_node = cast(cst.Call, updated_node) # satisfy the type checker
new_args = [
*updated_node.args[:1],
updated_node.args[1].with_changes(
value=self.parse_expression(f"{maybe_name}.SafeLoader")
# This is the case where the arg is present but a bad value
(
updated_node.args[1].with_changes(
value=self.parse_expression(f"{maybe_name}.SafeLoader")
)
if len(updated_node.args) > 1
# This is the case where the arg is not present
# Note that this case is deprecated in PyYAML 5.1 since the default is unsafe
else cst.Arg(
keyword=cst.Name("Loader"),
value=self.parse_expression(f"{maybe_name}.SafeLoader"),
equal=cst.AssignEqual(
whitespace_before=cst.SimpleWhitespace(""),
whitespace_after=cst.SimpleWhitespace(""),
),
)
),
]
return self.update_arg_target(updated_node, new_args)
Expand Down
13 changes: 13 additions & 0 deletions tests/codemods/test_harden_pyyaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,19 @@ def test_preserve_custom_loader_kwarg(self, tmpdir):

self.run_and_assert(tmpdir, input_code, expected)

def test_default_loader_unsafe(self, tmpdir):
input_code = """
import yaml
yaml.load(data)
"""
expected = """
import yaml
yaml.load(data, Loader=yaml.SafeLoader)
"""
self.run_and_assert(tmpdir, input_code, expected)


class TestHardenPyyamlClassInherit(BaseSemgrepCodemodTest):
codemod = HardenPyyaml
Expand Down

0 comments on commit 38240d2

Please sign in to comment.