diff --git a/MANIFEST.in b/MANIFEST.in index aa3fd3321..63a8e0161 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,4 @@ include README.md include LICENSE recursive-include src/codemodder/codemods/semgrep *.yaml +recursive-include src/codemodder/codemods/docs *.md diff --git a/pyproject.toml b/pyproject.toml index 524d50309..ae906c6d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ codemodder = "codemodder.__main__:main" [tool.setuptools.package-data] "codemodder.codemods.semgrep" = ["src/codemodder/codemods/semgrep/*.yaml"] +"codemodder.codemods.docs" = ["src/codemodder/codemods/docs/*.md"] [tool.pytest.ini_options] # Ignore integration tests and ci tests by default diff --git a/src/codemodder/__main__.py b/src/codemodder/__main__.py index 3de1b0c05..f68d5a935 100644 --- a/src/codemodder/__main__.py +++ b/src/codemodder/__main__.py @@ -2,6 +2,8 @@ import difflib import os import sys +from importlib.resources import files + from pathlib import Path import libcst as cst from libcst.codemod import CodemodContext @@ -18,6 +20,9 @@ from codemodder.sarifs import parse_sarif_files +CODEMODDER_DOCS_MODULE = files("codemodder.codemods.docs") + + def update_code(file_path, new_code): """ Write the `new_code` to the `file_path` @@ -107,17 +112,33 @@ def analyze_files( ) +# TODO: this logic should eventually belong to CodemodRegistry +def load_codemod_description(name) -> str: + """ + Load the description of the codemod from the docs module. + """ + # TODO: this prefix should not be hardcoded + filename = f"pixee_python_{name}.md" + return CODEMODDER_DOCS_MODULE.joinpath(filename).read_text() + + def compile_results(execution_context: CodemodExecutionContext, codemods): results = [] for name, codemod_kls in codemods.items(): if not (changeset := execution_context.results_by_codemod.get(name)): continue + try: + description = load_codemod_description(name) + except FileNotFoundError: + logger.warning("No docs found for codemod %s", name) + description = codemod_kls.METADATA.DESCRIPTION + data = { # TODO: this prefix should not be hardcoded "codemod": f"pixee:python/{name}", "summary": codemod_kls.SUMMARY, - "description": codemod_kls.METADATA.DESCRIPTION, + "description": description, "references": [], "properties": {}, "failedFiles": [], diff --git a/src/codemodder/codemods/docs/pixee_python_django-debug-flag-on.md b/src/codemodder/codemods/docs/pixee_python_django-debug-flag-on.md new file mode 100644 index 000000000..2e00be759 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_django-debug-flag-on.md @@ -0,0 +1,8 @@ +This codemod will flip django's `DEBUG` flag to `False` if it's `True` on the `settings.py` file within django's default directory structure. + +Having the debug flag on may result in sensitive information exposure. When an exception occurs while the `DEBUG` flag in on, it will dump metadata of your environment, including the settings module. The attacker can purposefully request a non-existing url to trigger an exception and gather information about your system. + +```diff +- DEBUG = True ++ DEBUG = False +``` diff --git a/src/codemodder/codemods/docs/pixee_python_django-session-cookie-secure-off.md b/src/codemodder/codemods/docs/pixee_python_django-session-cookie-secure-off.md new file mode 100644 index 000000000..970876c3d --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_django-session-cookie-secure-off.md @@ -0,0 +1,7 @@ +This codemod will set django's `SESSION_COOKIE_SECURE` flag to `True` if it's `False` or missing on the `settings.py` file within django's default directory structure. + +```diff ++ SESSION_COOKIE_SECURE = True +``` + +Setting this flag on ensures that the session cookies are only sent under an HTTPS connection. Leaving this flag off may enable an attacker to use a sniffer to capture the unencrypted session cookie and hijack the user's session. diff --git a/src/codemodder/codemods/docs/pixee_python_harden-pyyaml.md b/src/codemodder/codemods/docs/pixee_python_harden-pyyaml.md new file mode 100644 index 000000000..49dacb7dc --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_harden-pyyaml.md @@ -0,0 +1,13 @@ +This codemod hardens all [`yaml.load()`](https://pyyaml.org/wiki/PyYAMLDocumentation) calls against attacks that could result from deserializing untrusted data. + +The fix uses a safety check that already exists in the `yaml` module, replacing unsafe loader class with `SafeLoader`. +The changes from this codemod look like this: + +```diff + import yaml + data = b'!!python/object/apply:subprocess.Popen \\n- ls' +- deserialized_data = yaml.load(data, yaml.Loader) ++ deserialized_data = yaml.load(data, Loader=yaml.SafeLoader) +``` +The codemod will also catch if you pass in the loader argument as a kwarg and if you use any loader other than `SafeLoader`, +including `FullLoader` and `UnsafeLoader`. diff --git a/src/codemodder/codemods/docs/pixee_python_harden-ruamel.md b/src/codemodder/codemods/docs/pixee_python_harden-ruamel.md new file mode 100644 index 000000000..718e79643 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_harden-ruamel.md @@ -0,0 +1,12 @@ +This codemod hardens any unsafe [`ruamel.yaml.YAML()`](https://yaml.readthedocs.io/en/latest/) calls against attacks that could result from deserializing untrusted data. + +The fix uses a safety check that already exists in the `ruamel` module, replacing an unsafe `typ` argument with `typ="safe"`. +The changes from this codemod look like this: + +```diff + from ruamel.yaml import YAML +- serializer = YAML(typ="unsafe") +- serializer = YAML(typ="base") ++ serializer = YAML(typ="safe") ++ serializer = YAML(typ="safe") +``` diff --git a/src/codemodder/codemods/docs/pixee_python_https-connection.md b/src/codemodder/codemods/docs/pixee_python_https-connection.md new file mode 100644 index 000000000..c26fcbaa1 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_https-connection.md @@ -0,0 +1,9 @@ +This codemod replaces calls to `urllib3.connectionpool.HTTPConnectionPool` and `urllib3.HTTPConnectionPool` with their secure variant (`HTTPSConnectionPool`). + +Programmers should opt to use HTTPS over HTTP for secure encrypted communication whenever possible. + +```diff +import urllib3 +- urllib3.HTTPConnectionPool("www.example.com","80") ++ urllib3.HTTPSConnectionPool("www.example.com","80") +``` diff --git a/src/codemodder/codemods/docs/pixee_python_limit-readline.md b/src/codemodder/codemods/docs/pixee_python_limit-readline.md new file mode 100644 index 000000000..236bcbf02 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_limit-readline.md @@ -0,0 +1,10 @@ +This codemod hardens all [`readline()`](https://docs.python.org/3/library/io.html#io.IOBase.readline) calls from file objects returned from an `open()` call, `StringIO` and `BytesIO` against denial of service attacks. A stream influenced by an attacker could keep providing bytes until the system runs out of memory, causing a crash. + +Fixing it is straightforward by providing adding a size argument to any `readline()` calls. +The changes from this codemod look like this: + +```diff + file = open('some_file.txt') +- file.readline() ++ file.readline(5_000_000) +``` diff --git a/src/codemodder/codemods/docs/pixee_python_remove-unnecessary-f-str.md b/src/codemodder/codemods/docs/pixee_python_remove-unnecessary-f-str.md new file mode 100644 index 000000000..659ed9711 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_remove-unnecessary-f-str.md @@ -0,0 +1,7 @@ +This codemod converts any f-strings without interpolation to regular strings. + +```diff +- var = f"hello" ++ var = "hello" + ... +``` diff --git a/src/codemodder/codemods/docs/pixee_python_requests-verify.md b/src/codemodder/codemods/docs/pixee_python_requests-verify.md new file mode 100644 index 000000000..36837b593 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_requests-verify.md @@ -0,0 +1,16 @@ +This codemod checks that calls to the `requests` module API use `verify=True` or a path to a CA bundle to ensure TLS certificate validation. + +The [requests documentation](https://requests.readthedocs.io/en/latest/api/) warns that the `verify` flag +> When set to False, requests will accept any TLS certificate presented by the server, and will ignore hostname mismatches and/or expired certificates, which will make your application vulnerable to man-in-the-middle (MitM) attacks. Setting verify to False may be useful during local development or testing. + +The changes from this codemod look like this: + + +```diff + import requests + +- requests.get("www.google.com", ...,verify=False) ++ requests.get("www.google.com", ...,verify=True) +``` + +This codemod also checks other methods in the `requests` module that accept a `verify` flag (e.g. `requests.post`, etc.) diff --git a/src/codemodder/codemods/docs/pixee_python_sandbox-process-creation.md b/src/codemodder/codemods/docs/pixee_python_sandbox-process-creation.md new file mode 100644 index 000000000..bf32e9408 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_sandbox-process-creation.md @@ -0,0 +1,22 @@ +This codemod sandboxes all instances of [subprocess.run](https://docs.python.org/3/library/subprocess.html#subprocess.run) and [subprocess.call](https://docs.python.org/3/library/subprocess.html#subprocess.call) to offer protection against attack. + +Left unchecked, `subprocess.run` and `subprocess.call` can execute any arbitrary system command. If an attacker can control part of the strings used as program paths or arguments, they could execute arbitrary programs, install malware, and anything else they could do if they had a shell open on the application host. + +Our change introduces a sandbox which protects the application: + +```diff + import subprocess ++ from security import safe_command + ... +- subprocess.run("echo 'hi'", shell=True) ++ safe_command.run(subprocess.run, "echo 'hi'", shell=True) + ... +- subprocess.call(["ls", "-l"]) ++ safe_command.call(subprocess.call, ["ls", "-l"]) +``` + +The default `safe_command` restrictions applied are the following: +* **Prevent command chaining**. Many exploits work by injecting command separators and causing the shell to interpret a second, malicious command. The `safe_command` functions attempt to parse the given command, and throw a `SecurityException` if multiple commands are present. +* **Prevent arguments targeting sensitive files.** There is little reason for custom code to target sensitive system files like `/etc/passwd`, so the sandbox prevents arguments that point to these files that may be targets for exfiltration. + +There are [more options for sandboxing](https://github.com/pixee/python-security/blob/main/src/security/safe_command/api.py#L5) if you are interested in locking down system commands even more. diff --git a/src/codemodder/codemods/docs/pixee_python_secure-random.md b/src/codemodder/codemods/docs/pixee_python_secure-random.md new file mode 100644 index 000000000..7affa3413 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_secure-random.md @@ -0,0 +1,15 @@ +This codemod replaces all instances of functions in the `random` module (e.g. `random.random()` with their, much more secure, equivalents from the `secrets` module (e.g. `secrets.SystemRandom().random()`). + +There is significant algorithmic complexity in getting computers to generate genuinely unguessable random bits. The `random.random()` function uses a method of pseudo-random number generation that unfortunately emits fairly predictable numbers. + +If the numbers it emits are predictable, then it's obviously not safe to use in cryptographic operations, file name creation, token construction, password generation, and anything else that's related to security. In fact, it may affect security even if it's not directly obvious. + +Switching to a more secure version is simple and the changes look something like this: + +```diff +- import random ++ import secrets + ... +- random.random() ++ secrets.SystemRandom().random() +``` diff --git a/src/codemodder/codemods/docs/pixee_python_secure-tempfile.md b/src/codemodder/codemods/docs/pixee_python_secure-tempfile.md new file mode 100644 index 000000000..1269e0548 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_secure-tempfile.md @@ -0,0 +1,12 @@ +This codemod replaces all `tempfile.mktemp` calls to the more secure `tempfile.mkstemp`. + +The Python [tempfile documentation](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) is explicit +that `tempfile.mktemp` should be deprecated to avoid an unsafe and unexpected race condition. +The changes from this codemod look like this: + + +```diff + import tempfile +- tempfile.mktemp(...) ++ tempfile.mkstemp(...) +``` diff --git a/src/codemodder/codemods/docs/pixee_python_unused-imports.md b/src/codemodder/codemods/docs/pixee_python_unused-imports.md new file mode 100644 index 000000000..d416423df --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_unused-imports.md @@ -0,0 +1,10 @@ +Removes unused imports from a module. Imports involving the `__future__` module are ignored. + +```diff +- import a +import b + +b.function() +``` + +If you have feedback on this codemod, [please let us know](mailto:feedback@pixee.ai)! diff --git a/src/codemodder/codemods/docs/pixee_python_upgrade-sslcontext-minimum-version.md b/src/codemodder/codemods/docs/pixee_python_upgrade-sslcontext-minimum-version.md new file mode 100644 index 000000000..f0272a0a4 --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_upgrade-sslcontext-minimum-version.md @@ -0,0 +1,16 @@ +This codemod replaces all unsafe and/or deprecated SSL/TLS versions when used +to set the `ssl.SSLContext.minimum_version` attribute. It uses +`ssl.TLSVersion.TLSv1_2` instead, which ensures a safe default minimum TLS +version. + +Our change involves modifying the `minimum_version` attribute of +`ssl.SSLContext` instances to use `ssl.TLSVersion.TLSv1_2`. + +```diff + import ssl + context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT) +- context.minimum_version = ssl.TLSVersion.SSLv3 ++ context.minimum_version = ssl.TLSVersion.TLSv1_2 +``` + +There is no functional difference between the unsafe and safe versions, and all modern servers offer TLSv1.2. diff --git a/src/codemodder/codemods/docs/pixee_python_upgrade-sslcontext-tls.md b/src/codemodder/codemods/docs/pixee_python_upgrade-sslcontext-tls.md new file mode 100644 index 000000000..11212b8dc --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_upgrade-sslcontext-tls.md @@ -0,0 +1,20 @@ +This codemod replaces the use of all unsafe and/or deprecated SSL/TLS versions +in the `ssl.SSLContext` constructor. It uses `PROTOCOL_TLS_CLIENT` instead, +which ensures a safe default TLS version. + +Our change involves modifying the argument to `ssl.SSLContext()` to +use `PROTOCOL_TLS_CLIENT`. + +```diff + import ssl +- context = ssl.SSLContext(protocol=PROTOCOL_SSLv3) ++ context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT) +``` + +There is no functional difference between the unsafe and safe versions, and all modern servers offer TLSv1.2. + +The use of explicit TLS versions (even safe ones) is deprecated by the `ssl` +module, so it is necessary to choose either `PROTOCOL_TLS_CLIENT` or +`PROTOCOL_TLS_SERVER`. Using `PROTOCOL_TLS_CLIENT` is expected to be the +correct choice for most applications but in some cases it will be necessary to +use `PROTOCOL_TLS_SERVER` instead. diff --git a/src/codemodder/codemods/docs/pixee_python_url-sandbox.md b/src/codemodder/codemods/docs/pixee_python_url-sandbox.md new file mode 100644 index 000000000..f80b75d8c --- /dev/null +++ b/src/codemodder/codemods/docs/pixee_python_url-sandbox.md @@ -0,0 +1,63 @@ +This codemod sandboxes calls to [`requests.get`](https://requests.readthedocs.io/en/latest/api/#requests.get) to be more resistant to Server-Side Request Forgery (SSRF) attacks. + +Most of the time when you make a `GET` request to a URL, you're intending to reference an HTTP endpoint, like an internal microservice. However, URLs can point to local file system files, a Gopher stream in your local network, a JAR file on a remote Internet site, and all kinds of other unexpected and undesirable outcomes. When the URL values are influenced by attackers, they can trick your application into fetching internal resources, running malicious code, or otherwise harming the system. +Consider the following code for a Flask app: + +```python +from flask import Flask, request +import requests + +app = Flask(__name__) + +@app.route("/request-url") +def request_url(): + url = request.args["loc"] + resp = requests.get(url) + ... +``` + +In this case, an attacker could supply a value like `"http://169.254.169.254/user-data/"` and attempt to access user information. + +Our changes introduce sandboxing around URL creation that force developers to specify some boundaries on the types of URLs they expect to create: + +```diff + from flask import Flask, request +- import requests ++ from security import safe_requests + + app = Flask(__name__) + + @app.route("/request-url") + def request_url(): + url = request.args["loc"] +- resp = requests.get(url) ++ resp = safe_requests.get.get(url) + ... +``` + +This change alone reduces attack surface significantly because the default behavior of `safe_requests.get` raises a `SecurityException` if +a user attempts to access a known infrastructure location, unless specifically disabled. + + +If you have feedback on this codemod, [please let us know](mailto:feedback@pixee.ai)! + +## F.A.Q. + +### Why does this codemod require a Pixee dependency? + +We always prefer to use built-in Python functions or one from a well-known and trusted community dependency. However, we cannot find any such control. If you know of one, [please let us know](https://ask.pixee.ai/feedback). + +### Why is this codemod marked as Merge After Cursory Review? + +By default, the protection only weaves in 2 checks, which we believe will not cause any issues with the vast majority of code: +1. The given URL must be HTTP/HTTPS. +2. The given URL must not point to a "well-known infrastructure target", which includes things like AWS Metadata Service endpoints, and internal routers (e.g., 192.168.1.1) which are common targets of attacks. + +However, on rare occasions an application may use a URL protocol like "file://" or "ftp://" in backend or middleware code. + +If you want to allow those protocols, change the incoming PR to look more like this and get the best security possible: + +```diff +-resp = requests.get(url) ++resp = safe_requests.get.get(url, allowed_protocols=("ftp",)) +``` diff --git a/tests/test_codemod_docs.py b/tests/test_codemod_docs.py new file mode 100644 index 000000000..0d0b35e21 --- /dev/null +++ b/tests/test_codemod_docs.py @@ -0,0 +1,16 @@ +import pytest + +from codemodder.__main__ import load_codemod_description +from codemodder.codemods import DEFAULT_CODEMODS + + +@pytest.fixture(params=DEFAULT_CODEMODS) +def codemod_name(request): + name = request.param.name() + if name in ["order-imports", "use-walrus-if"]: + pytest.skip("No docs yet") + return name + + +def test_load_codemod_description(codemod_name): # pylint: disable=redefined-outer-name + load_codemod_description(codemod_name)