Skip to content

Commit

Permalink
initial impl and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
clavedeluna committed Jan 5, 2024
1 parent 03c3782 commit dcf4660
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 16 deletions.
5 changes: 5 additions & 0 deletions src/codemodder/codemods/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,8 @@ def leave_Call(self, original_node: cst.Call, updated_node: cst.Call):

def leave_Assign(self, original_node, updated_node):
return self._new_or_updated_node(original_node, updated_node)

def leave_ClassDef(
self, original_node: cst.ClassDef, updated_node: cst.ClassDef
) -> cst.ClassDef:
return self._new_or_updated_node(original_node, updated_node)
69 changes: 55 additions & 14 deletions src/core_codemods/harden_pyyaml.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from typing import Union
import libcst as cst
from libcst import matchers
from codemodder.codemods.base_codemod import ReviewGuidance
from codemodder.codemods.api import SemgrepCodemod
from codemodder.codemods.utils_mixin import NameResolutionMixin
Expand All @@ -6,8 +9,8 @@
class HardenPyyaml(SemgrepCodemod, NameResolutionMixin):
NAME = "harden-pyyaml"
REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW
SUMMARY = "Use SafeLoader in `yaml.load()` Calls"
DESCRIPTION = "Ensures all calls to yaml.load use `SafeLoader`."
SUMMARY = "Replace unsafe `pyyaml` loader with `SafeLoader`"
DESCRIPTION = "Replace unsafe `pyyaml` loader with `SafeLoader` in calls to `yaml.load` or custom loader classes."
REFERENCES = [
{
"url": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data",
Expand Down Expand Up @@ -50,18 +53,56 @@ def rule(cls):
- pattern: yaml.BaseLoader
- pattern: yaml.FullLoader
- pattern: yaml.UnsafeLoader
- patterns:
- pattern: |
class $X(...,$LOADER, ...):
...
- metavariable-pattern:
metavariable: $LOADER
patterns:
- pattern-either:
- pattern: yaml.Loader
- pattern: yaml.BaseLoader
- pattern: yaml.FullLoader
- pattern: yaml.UnsafeLoader
"""

def on_result_found(self, original_node, updated_node):
maybe_name = self.get_aliased_prefix_name(original_node, self._module_name)
maybe_name = maybe_name or self._module_name
if maybe_name == self._module_name:
self.add_needed_import(self._module_name)
new_args = [
*updated_node.args[:1],
updated_node.args[1].with_changes(
value=self.parse_expression(f"{maybe_name}.SafeLoader")
),
]
return self.update_arg_target(updated_node, new_args)
def on_result_found(
self,
original_node: Union[cst.Call, cst.ClassDef],
updated_node: Union[cst.Call, cst.ClassDef],
):
match original_node:
case cst.Call():
maybe_name = self.get_aliased_prefix_name(
original_node, self._module_name
)
maybe_name = maybe_name or self._module_name
if maybe_name == self._module_name:
self.add_needed_import(self._module_name)
new_args = [
*updated_node.args[:1],
updated_node.args[1].with_changes(
value=self.parse_expression(f"{maybe_name}.SafeLoader")
),
]
return self.update_arg_target(updated_node, new_args)
case cst.ClassDef():
# todo: maybe name above
# todo: generalize method to update_bases
new = []
for base_arg in original_node.bases:
if matchers.matches(
base_arg.value,
matchers.Attribute(value=matchers.Name(value="yaml")),
):
base_arg = base_arg.with_changes(
value=base_arg.value.with_changes(
attr=cst.Name("SafeLoader")
)
)
new.append(base_arg)

return updated_node.with_changes(bases=new)
return updated_node
58 changes: 56 additions & 2 deletions tests/codemods/test_harden_pyyaml.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from textwrap import dedent
import pytest
import yaml
from core_codemods.harden_pyyaml import HardenPyyaml
from tests.codemods.base_codemod_test import BaseSemgrepCodemodTest

UNSAFE_LOADERS = yaml.loader.__all__.copy() # type: ignore
UNSAFE_LOADERS.remove("SafeLoader")
loaders = pytest.mark.parametrize("loader", UNSAFE_LOADERS)


class TestHardenPyyaml(BaseSemgrepCodemodTest):
Expand All @@ -20,7 +22,7 @@ def test_safe_loader(self, tmpdir):
"""
self.run_and_assert(tmpdir, input_code, input_code)

@pytest.mark.parametrize("loader", UNSAFE_LOADERS)
@loaders
def test_all_unsafe_loaders_arg(self, tmpdir, loader):
input_code = f"""import yaml
data = b'!!python/object/apply:subprocess.Popen \\n- ls'
Expand All @@ -33,7 +35,7 @@ def test_all_unsafe_loaders_arg(self, tmpdir, loader):
"""
self.run_and_assert(tmpdir, input_code, expected)

@pytest.mark.parametrize("loader", UNSAFE_LOADERS)
@loaders
def test_all_unsafe_loaders_kwarg(self, tmpdir, loader):
input_code = f"""import yaml
data = b'!!python/object/apply:subprocess.Popen \\n- ls'
Expand Down Expand Up @@ -80,3 +82,55 @@ def test_preserve_custom_loader_kwarg(self, tmpdir):
"""

self.run_and_assert(tmpdir, input_code, expected)


class TestHardenPyyamlClassInherit(BaseSemgrepCodemodTest):
codemod = HardenPyyaml

def test_safe_loader(self, tmpdir):
input_code = """\
import yaml
class MyCustomLoader(yaml.SafeLoader):
def __init__(self, *args, **kwargs):
super(MyCustomLoader, self).__init__(*args, **kwargs)
"""

self.run_and_assert(tmpdir, dedent(input_code), dedent(input_code))

@loaders
def test_unsafe_loaders(self, tmpdir, loader):
input_code = f"""\
import yaml
class MyCustomLoader(yaml.{loader}):
def __init__(self, *args, **kwargs):
super(MyCustomLoader, self).__init__(*args, **kwargs)
"""
expected = f"""\
import yaml
class MyCustomLoader(yaml.SafeLoader):
def __init__(self, *args, **kwargs):
super(MyCustomLoader, self).__init__(*args, **kwargs)
"""
self.run_and_assert(tmpdir, dedent(input_code), dedent(expected))

def test_import_alias(self, tmpdir):
input_code = """import yaml as yam
from yaml import Loader
data = b'!!python/object/apply:subprocess.Popen \\n- ls'
deserialized_data = yam.load(data, Loader=Loader)
"""
expected = """import yaml as yam
from yaml import Loader
data = b'!!python/object/apply:subprocess.Popen \\n- ls'
deserialized_data = yam.load(data, Loader=yam.SafeLoader)
"""
self.run_and_assert(tmpdir, input_code, expected)

# todo test_import_alias
# todo: multiple class inheritance

0 comments on commit dcf4660

Please sign in to comment.