From 8128ee617143d9537c98cd76b862f08abd9b92a2 Mon Sep 17 00:00:00 2001 From: Dani Alcala <112832187+clavedeluna@users.noreply.github.com> Date: Mon, 16 Oct 2023 17:07:48 -0300 Subject: [PATCH] Script to automate codemod docs (#73) * script * working doc gen script * add references to codemods * make description = url automated * update all codemod summary fields * test docs * update codemod metadata * fix descriptions --- .pre-commit-config.yaml | 6 +- integration_tests/base_test.py | 6 + pyproject.toml | 2 + src/codemodder/codemods/api/__init__.py | 1 + src/codemodder/codemods/base_codemod.py | 18 +- src/codemodder/context.py | 2 +- src/codemodder/executor.py | 8 + src/codemodder/registry.py | 2 +- src/core_codemods/django_debug_flag_on.py | 15 +- .../django_session_cookie_secure_off.py | 14 +- .../pixee_python_enable-jinja2-autoescape.md | 3 +- .../docs/pixee_python_jwt-decode-verify.md | 8 +- .../docs/pixee_python_unused-imports.md | 2 - src/core_codemods/enable_jinja2_autoescape.py | 13 +- src/core_codemods/fix_mutable_params.py | 4 +- src/core_codemods/harden_pyyaml.py | 8 +- src/core_codemods/harden_ruamel.py | 8 +- src/core_codemods/https_connection.py | 18 +- src/core_codemods/jwt_decode_verify.py | 11 +- src/core_codemods/limit_readline.py | 5 +- .../lxml_safe_parser_defaults.py | 18 +- src/core_codemods/lxml_safe_parsing.py | 18 +- src/core_codemods/order_imports.py | 5 +- src/core_codemods/process_creation_sandbox.py | 12 +- src/core_codemods/remove_unnecessary_f_str.py | 8 +- src/core_codemods/remove_unused_imports.py | 3 +- src/core_codemods/requests_verify.py | 11 +- src/core_codemods/secure_random.py | 11 +- src/core_codemods/tempfile_mktemp.py | 8 +- .../upgrade_sslcontext_minimum_version.py | 15 +- src/core_codemods/upgrade_sslcontext_tls.py | 15 +- src/core_codemods/url_sandbox.py | 21 +- src/core_codemods/use_walrus_if.py | 10 +- src/core_codemods/with_threading_lock.py | 16 +- src/scripts/__init__.py | 0 src/scripts/generate_docs.py | 181 ++++++++++++++++++ tests/test_codemod_docs.py | 7 +- 37 files changed, 459 insertions(+), 54 deletions(-) create mode 100644 src/scripts/__init__.py create mode 100644 src/scripts/generate_docs.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5801c840..104b1faf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,11 @@ repos: rev: 23.9.1 hooks: - id: black - exclude: samples/ + exclude: | + (?x)^( + samples/.*| + core_codemods/docs/.* + )$ - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.5.1 hooks: diff --git a/integration_tests/base_test.py b/integration_tests/base_test.py index c894ab6a..e3886e72 100644 --- a/integration_tests/base_test.py +++ b/integration_tests/base_test.py @@ -83,6 +83,12 @@ def _assert_results_fields(self, results, output_path): assert len(results) == 1 result = results[0] assert result["codemod"] == self.codemod_wrapper.id + assert result["references"] == self.codemod_wrapper.references + + # TODO: once we add description for each url. + for reference in result["references"]: + assert reference["url"] == reference["description"] + assert len(result["changeset"]) == self.num_changed_files # A codemod may change multiple files. For now we will diff --git a/pyproject.toml b/pyproject.toml index cf41b185..4d21ce91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ dependencies = [ [project.scripts] codemodder = "codemodder.codemodder:main" +generate-docs = 'scripts.generate_docs:main' + [project.entry-points.codemods] core = "core_codemods:registry" diff --git a/src/codemodder/codemods/api/__init__.py b/src/codemodder/codemods/api/__init__.py index accbef43..c8a9a661 100644 --- a/src/codemodder/codemods/api/__init__.py +++ b/src/codemodder/codemods/api/__init__.py @@ -69,6 +69,7 @@ def __init_subclass__(cls): cls.DESCRIPTION, # pylint: disable=no-member cls.NAME, # pylint: disable=no-member cls.REVIEW_GUIDANCE, # pylint: disable=no-member + cls.REFERENCES, # pylint: disable=no-member ) # This is a little bit hacky, but it also feels like the right solution? diff --git a/src/codemodder/codemods/base_codemod.py b/src/codemodder/codemods/base_codemod.py index 65390c6a..d2a4fe4a 100644 --- a/src/codemodder/codemods/base_codemod.py +++ b/src/codemodder/codemods/base_codemod.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum from typing import List, ClassVar @@ -21,6 +21,22 @@ class CodemodMetadata: DESCRIPTION: str # TODO: this field should be optional NAME: str REVIEW_GUIDANCE: ReviewGuidance + REFERENCES: list = field(default_factory=list) + + # TODO: remove post_init update_references once we add description for each url. + def __post_init__(self): + object.__setattr__(self, "REFERENCES", self.update_references(self.REFERENCES)) + + @staticmethod + def update_references(references): + updated_references = [] + for reference in references: + updated_reference = dict( + reference + ) # Create a copy to avoid modifying the original dict + updated_reference["description"] = updated_reference["url"] + updated_references.append(updated_reference) + return updated_references class BaseCodemod: diff --git a/src/codemodder/context.py b/src/codemodder/context.py index 26eeace9..70ac466f 100644 --- a/src/codemodder/context.py +++ b/src/codemodder/context.py @@ -82,7 +82,7 @@ def compile_results(self, codemods: list[CodemodExecutorWrapper]): "codemod": codemod.id, "summary": codemod.summary, "description": codemod.description, - "references": [], + "references": codemod.references, "properties": {}, "failedFiles": [], "changeset": [change.to_json() for change in changeset], diff --git a/src/codemodder/executor.py b/src/codemodder/executor.py index 6bc166de..47910b9f 100644 --- a/src/codemodder/executor.py +++ b/src/codemodder/executor.py @@ -58,6 +58,14 @@ def description(self): # TODO: temporary workaround return self.METADATA.DESCRIPTION + @property + def review_guidance(self): + return self.METADATA.REVIEW_GUIDANCE.name.replace("_", " ").title() + + @property + def references(self): + return self.METADATA.REFERENCES + @property def yaml_files(self): return [ diff --git a/src/codemodder/registry.py b/src/codemodder/registry.py index a903a8e4..4a06997c 100644 --- a/src/codemodder/registry.py +++ b/src/codemodder/registry.py @@ -61,7 +61,7 @@ def _validate_codemod(self, codemod): for k, v in asdict(codemod.METADATA).items(): if v is NotImplemented: raise NotImplementedError(f"METADATA.{k} not defined for {codemod}") - if not v: + if k != "REFERENCES" and not v: raise NotImplementedError( f"METADATA.{k} should not be None or empty for {codemod}" ) diff --git a/src/core_codemods/django_debug_flag_on.py b/src/core_codemods/django_debug_flag_on.py index de8766b8..e435f9ee 100644 --- a/src/core_codemods/django_debug_flag_on.py +++ b/src/core_codemods/django_debug_flag_on.py @@ -15,11 +15,22 @@ class DjangoDebugFlagOn(SemgrepCodemod, Codemod): METADATA = CodemodMetadata( - DESCRIPTION=("Flips django's debug flag if on."), + DESCRIPTION="Flip `Django` debug flag to off.", NAME="django-debug-flag-on", REVIEW_GUIDANCE=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, + REFERENCES=[ + { + "url": "https://owasp.org/www-project-top-ten/2017/A3_2017-Sensitive_Data_Exposure", + "description": "", + }, + { + "url": "https://docs.djangoproject.com/en/4.2/ref/settings/#std-setting-DEBUG", + "description": "", + }, + ], ) - SUMMARY = CHANGE_DESCRIPTION = "Flip Django debug flag to off" + SUMMARY = "Disable Django Debug Mode" + CHANGE_DESCRIPTION = METADATA.DESCRIPTION YAML_FILES = [ "django-debug-flag-on.yaml", ] diff --git a/src/core_codemods/django_session_cookie_secure_off.py b/src/core_codemods/django_session_cookie_secure_off.py index acfaa4ee..26e5f9c6 100644 --- a/src/core_codemods/django_session_cookie_secure_off.py +++ b/src/core_codemods/django_session_cookie_secure_off.py @@ -17,9 +17,19 @@ class DjangoSessionCookieSecureOff(SemgrepCodemod, Codemod): METADATA = CodemodMetadata( DESCRIPTION=("Sets Django's `SESSION_COOKIE_SECURE` flag if off or missing."), NAME="django-session-cookie-secure-off", - REVIEW_GUIDANCE=ReviewGuidance.MERGE_AFTER_REVIEW, + REVIEW_GUIDANCE=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, + REFERENCES=[ + { + "url": "https://owasp.org/www-community/controls/SecureCookieAttribute", + "description": "", + }, + { + "url": "https://docs.djangoproject.com/en/4.2/ref/settings/#session-cookie-secure", + "description": "", + }, + ], ) - SUMMARY = "Secure setting for Django `SESSION_COOKIE_SECURE` flag" + SUMMARY = "Secure Setting for Django `SESSION_COOKIE_SECURE` flag" CHANGE_DESCRIPTION = METADATA.DESCRIPTION YAML_FILES = [ "detect-django-settings.yaml", diff --git a/src/core_codemods/docs/pixee_python_enable-jinja2-autoescape.md b/src/core_codemods/docs/pixee_python_enable-jinja2-autoescape.md index 8e49775c..f0b77d4e 100644 --- a/src/core_codemods/docs/pixee_python_enable-jinja2-autoescape.md +++ b/src/core_codemods/docs/pixee_python_enable-jinja2-autoescape.md @@ -1,6 +1,6 @@ This codemod enables autoescaping of HTML content in `jinja2`. Unfortunately, the jinja2 default behavior is to not autoescape when rendering templates, which makes your applications -vulnerable to Cross-Site Scripting (XSS) attacks. +potentially vulnerable to Cross-Site Scripting (XSS) attacks. Our codemod checks if you forgot to enable autoescape or if you explicitly disabled it. The change looks as follows: @@ -13,4 +13,3 @@ Our codemod checks if you forgot to enable autoescape or if you explicitly disab + env = Environment(autoescape=True, loader=some_loader) ... ``` - diff --git a/src/core_codemods/docs/pixee_python_jwt-decode-verify.md b/src/core_codemods/docs/pixee_python_jwt-decode-verify.md index e900d150..d736cc11 100644 --- a/src/core_codemods/docs/pixee_python_jwt-decode-verify.md +++ b/src/core_codemods/docs/pixee_python_jwt-decode-verify.md @@ -9,10 +9,8 @@ Our change looks as follows: - decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False) + decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True) ... -- decoded_payload = jwt.decode( - encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False, "verify_exp": False}) -+ decoded_payload = jwt.decode( - encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True, "verify_exp": True}) +- decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False, "verify_exp": False}) ++ decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True, "verify_exp": True}) ``` -Any `verify` parameter not listed relies on the secure `True` default value. \ No newline at end of file +Any `verify` parameter not listed relies on the secure `True` default value. diff --git a/src/core_codemods/docs/pixee_python_unused-imports.md b/src/core_codemods/docs/pixee_python_unused-imports.md index d416423d..cb3137a2 100644 --- a/src/core_codemods/docs/pixee_python_unused-imports.md +++ b/src/core_codemods/docs/pixee_python_unused-imports.md @@ -6,5 +6,3 @@ import b b.function() ``` - -If you have feedback on this codemod, [please let us know](mailto:feedback@pixee.ai)! diff --git a/src/core_codemods/enable_jinja2_autoescape.py b/src/core_codemods/enable_jinja2_autoescape.py index 8e8fe798..7f649bc7 100644 --- a/src/core_codemods/enable_jinja2_autoescape.py +++ b/src/core_codemods/enable_jinja2_autoescape.py @@ -5,9 +5,16 @@ class EnableJinja2Autoescape(SemgrepCodemod): NAME = "enable-jinja2-autoescape" - REVIEW_GUIDANCE = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW - SUMMARY = "Enable jinja2 autoescape" - DESCRIPTION = "Makes the `autoescape` parameter to jinja2.Environment be `True`." + REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW + SUMMARY = "Enable Jinja2 Autoescape" + DESCRIPTION = "Sets the `autoescape` parameter in jinja2.Environment to `True`." + REFERENCES = [ + {"url": "https://owasp.org/www-community/attacks/xss/", "description": ""}, + { + "url": "https://jinja.palletsprojects.com/en/3.1.x/api/#autoescaping", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/fix_mutable_params.py b/src/core_codemods/fix_mutable_params.py index bd5cd5cd..f6ab3dd4 100644 --- a/src/core_codemods/fix_mutable_params.py +++ b/src/core_codemods/fix_mutable_params.py @@ -9,8 +9,8 @@ class FixMutableParams(BaseCodemod): NAME = "fix-mutable-params" SUMMARY = "Replace Mutable Default Parameters" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - DESCRIPTION = "Replace mutable parameter with None" - + DESCRIPTION = "Replace mutable parameter with `None`." + REFERENCES: list = [] _BUILTIN_TO_LITERAL = { "list": cst.List(elements=[]), "dict": cst.Dict(elements=[]), diff --git a/src/core_codemods/harden_pyyaml.py b/src/core_codemods/harden_pyyaml.py index 0fde850c..3e099190 100644 --- a/src/core_codemods/harden_pyyaml.py +++ b/src/core_codemods/harden_pyyaml.py @@ -5,8 +5,14 @@ class HardenPyyaml(SemgrepCodemod): NAME = "harden-pyyaml" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Use SafeLoader when loading YAML" + SUMMARY = "Use SafeLoader in `yaml.load()` Calls" DESCRIPTION = "Ensures all calls to yaml.load use `SafeLoader`." + REFERENCES = [ + { + "url": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data", + "description": "", + } + ] @classmethod def rule(cls): diff --git a/src/core_codemods/harden_ruamel.py b/src/core_codemods/harden_ruamel.py index 86c9a7c3..8eb0a12b 100644 --- a/src/core_codemods/harden_ruamel.py +++ b/src/core_codemods/harden_ruamel.py @@ -6,8 +6,14 @@ class HardenRuamel(SemgrepCodemod): NAME = "harden-ruamel" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Use safe YAML loading in ruamel.yaml" + SUMMARY = "Use `typ='safe'` in ruamel.yaml() Calls" DESCRIPTION = "Ensures all unsafe calls to ruamel.yaml.YAML use `typ='safe'`." + REFERENCES = [ + { + "url": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data", + "description": "", + } + ] @classmethod def rule(cls): diff --git a/src/core_codemods/https_connection.py b/src/core_codemods/https_connection.py index 2ba92ee2..0ca170c8 100644 --- a/src/core_codemods/https_connection.py +++ b/src/core_codemods/https_connection.py @@ -22,12 +22,24 @@ class HTTPSConnection(BaseCodemod, Codemod): METADATA = CodemodMetadata( - DESCRIPTION=("Enforce HTTPS connection"), + DESCRIPTION="Enforce HTTPS connection for `urllib3`.", NAME="https-connection", REVIEW_GUIDANCE=ReviewGuidance.MERGE_WITHOUT_REVIEW, + REFERENCES=[ + { + "url": "https://owasp.org/www-community/vulnerabilities/Insecure_Transport", + "description": "", + }, + { + "url": "https://urllib3.readthedocs.io/en/stable/reference/urllib3.connectionpool.html#urllib3.HTTPConnectionPool", + "description": "", + }, + ], + ) + CHANGE_DESCRIPTION = METADATA.DESCRIPTION + SUMMARY = ( + "Changes HTTPConnectionPool to HTTPSConnectionPool to Enforce Secure Connection" ) - CHANGE_DESCRIPTION = "Enforce HTTPS connection" - SUMMARY = "Changes HTTPConnectionPool to HTTPSConnectionPool to enforce secure connection." METADATA_DEPENDENCIES = (PositionProvider,) diff --git a/src/core_codemods/jwt_decode_verify.py b/src/core_codemods/jwt_decode_verify.py index f1e4f145..343ac94a 100644 --- a/src/core_codemods/jwt_decode_verify.py +++ b/src/core_codemods/jwt_decode_verify.py @@ -8,8 +8,15 @@ class JwtDecodeVerify(SemgrepCodemod): NAME = "jwt-decode-verify" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Enable all verifications in `jwt.decode` call." - DESCRIPTION = "Makes any of the multiple `verify` parameters to a `jwt.decode` call be `True`." + SUMMARY = "Verify JWT Decode" + DESCRIPTION = "Enable all verifications in `jwt.decode` call." + REFERENCES = [ + {"url": "https://pyjwt.readthedocs.io/en/stable/api.html", "description": ""}, + { + "url": "https://owasp.org/www-project-web-security-testing-guide/latest/4-Web_Application_Security_Testing/06-Session_Management_Testing/10-Testing_JSON_Web_Tokens", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/limit_readline.py b/src/core_codemods/limit_readline.py index e0dc77dc..694780b0 100644 --- a/src/core_codemods/limit_readline.py +++ b/src/core_codemods/limit_readline.py @@ -9,8 +9,11 @@ class LimitReadline(SemgrepCodemod): NAME = "limit-readline" REVIEW_GUIDANCE = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW - SUMMARY = "Limit the size of readline() calls" + SUMMARY = "Limit readline()" DESCRIPTION = "Adds a size limit argument to readline() calls." + REFERENCES = [ + {"url": "https://cwe.mitre.org/data/definitions/400.html", "description": ""} + ] @classmethod def rule(cls): diff --git a/src/core_codemods/lxml_safe_parser_defaults.py b/src/core_codemods/lxml_safe_parser_defaults.py index 7e621599..20b47b84 100644 --- a/src/core_codemods/lxml_safe_parser_defaults.py +++ b/src/core_codemods/lxml_safe_parser_defaults.py @@ -6,8 +6,22 @@ class LxmlSafeParserDefaults(SemgrepCodemod): NAME = "safe-lxml-parser-defaults" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Use safe defaults for lxml parsers" - DESCRIPTION = "Replace lxml parser parameters with safe defaults" + SUMMARY = "Use Safe Defaults for `lxml` Parsers" + DESCRIPTION = "Replace `lxml` parser parameters with safe defaults." + REFERENCES = [ + { + "url": "https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser", + "description": "", + }, + { + "url": "https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing", + "description": "", + }, + { + "url": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/lxml_safe_parsing.py b/src/core_codemods/lxml_safe_parsing.py index 6ddd817b..a7a98101 100644 --- a/src/core_codemods/lxml_safe_parsing.py +++ b/src/core_codemods/lxml_safe_parsing.py @@ -6,10 +6,24 @@ class LxmlSafeParsing(SemgrepCodemod): NAME = "safe-lxml-parsing" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Use safe parsers in lxml parsing functions" + SUMMARY = "Use Safe Parsers in `lxml` Parsing Functions" DESCRIPTION = ( - "Call `lxml.etree.parse` and `lxml.etree.fromstring` with a safe parser" + "Call `lxml.etree.parse` and `lxml.etree.fromstring` with a safe parser." ) + REFERENCES = [ + { + "url": "https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser", + "description": "", + }, + { + "url": "https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing", + "description": "", + }, + { + "url": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/order_imports.py b/src/core_codemods/order_imports.py index c0ddfa94..df34326a 100644 --- a/src/core_codemods/order_imports.py +++ b/src/core_codemods/order_imports.py @@ -15,11 +15,12 @@ class OrderImports(BaseCodemod, Codemod): METADATA = CodemodMetadata( - DESCRIPTION=("Formats and order imports by categories"), + DESCRIPTION=("Formats and orders imports by categories."), NAME="order-imports", REVIEW_GUIDANCE=ReviewGuidance.MERGE_WITHOUT_REVIEW, + REFERENCES=[], ) - SUMMARY = "Order imports by categories" + SUMMARY = "Order Imports" CHANGE_DESCRIPTION = "Ordered and formatted import block below this line" METADATA_DEPENDENCIES = (PositionProvider,) diff --git a/src/core_codemods/process_creation_sandbox.py b/src/core_codemods/process_creation_sandbox.py index 84d46ffd..11d99e57 100644 --- a/src/core_codemods/process_creation_sandbox.py +++ b/src/core_codemods/process_creation_sandbox.py @@ -6,10 +6,20 @@ class ProcessSandbox(SemgrepCodemod): NAME = "sandbox-process-creation" REVIEW_GUIDANCE = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW - SUMMARY = "Sandbox process creation" + SUMMARY = "Sandbox Process Creation" DESCRIPTION = ( "Replaces subprocess.{func} with more secure safe_command library functions." ) + REFERENCES = [ + { + "url": "https://github.com/pixee/python-security/blob/main/src/security/safe_command/api.py", + "description": "", + }, + { + "url": "https://cheatsheetseries.owasp.org/cheatsheets/OS_Command_Injection_Defense_Cheat_Sheet.html", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/remove_unnecessary_f_str.py b/src/core_codemods/remove_unnecessary_f_str.py index 58fc65e4..d45d3f04 100644 --- a/src/core_codemods/remove_unnecessary_f_str.py +++ b/src/core_codemods/remove_unnecessary_f_str.py @@ -11,8 +11,14 @@ class RemoveUnnecessaryFStr(BaseCodemod, UnnecessaryFormatString): NAME = "remove-unnecessary-f-str" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Remove unnecessary f-strings" + SUMMARY = "Remove Unnecessary F-strings" DESCRIPTION = UnnecessaryFormatString.DESCRIPTION + REFERENCES = [ + { + "url": "https://github.com/Instagram/LibCST/blob/main/libcst/codemod/commands/unnecessary_format_string.py", + "description": "", + } + ] def __init__(self, codemod_context: CodemodContext, *codemod_args): UnnecessaryFormatString.__init__(self, codemod_context) diff --git a/src/core_codemods/remove_unused_imports.py b/src/core_codemods/remove_unused_imports.py index 5744043d..61414836 100644 --- a/src/core_codemods/remove_unused_imports.py +++ b/src/core_codemods/remove_unused_imports.py @@ -28,8 +28,9 @@ class RemoveUnusedImports(BaseCodemod, Codemod): DESCRIPTION=("Remove unused imports from a module."), NAME="unused-imports", REVIEW_GUIDANCE=ReviewGuidance.MERGE_WITHOUT_REVIEW, + REFERENCES=[], ) - SUMMARY = "Remove unused imports from a module" + SUMMARY = "Remove Unused Imports" CHANGE_DESCRIPTION = "Unused import." METADATA_DEPENDENCIES = ( diff --git a/src/core_codemods/requests_verify.py b/src/core_codemods/requests_verify.py index c01d4594..08671265 100644 --- a/src/core_codemods/requests_verify.py +++ b/src/core_codemods/requests_verify.py @@ -6,10 +6,17 @@ class RequestsVerify(SemgrepCodemod): NAME = "requests-verify" REVIEW_GUIDANCE = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW - SUMMARY = "Verify SSL certificates when making requests." + SUMMARY = "Verify SSL Certificates for Requests." DESCRIPTION = ( - "Makes any calls to requests.{func} with `verify=False` to `verify=True`" + "Makes any calls to requests.{func} with `verify=False` to `verify=True`." ) + REFERENCES = [ + {"url": "https://requests.readthedocs.io/en/latest/api/", "description": ""}, + { + "url": "https://owasp.org/www-community/attacks/Manipulator-in-the-middle_attack", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/secure_random.py b/src/core_codemods/secure_random.py index 7aebf862..b1b6944c 100644 --- a/src/core_codemods/secure_random.py +++ b/src/core_codemods/secure_random.py @@ -4,9 +4,16 @@ class SecureRandom(SemgrepCodemod): NAME = "secure-random" - REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Use secrets.SystemRandom() instead of random" + REVIEW_GUIDANCE = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW + SUMMARY = "Secure Source of Randomness" DESCRIPTION = "Replaces random.{func} with more secure secrets library functions." + REFERENCES = [ + { + "url": "https://owasp.org/www-community/vulnerabilities/Insecure_Randomness", + "description": "", + }, + {"url": "https://docs.python.org/3/library/random.html", "description": ""}, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/tempfile_mktemp.py b/src/core_codemods/tempfile_mktemp.py index 26d92d5e..cb8ea15d 100644 --- a/src/core_codemods/tempfile_mktemp.py +++ b/src/core_codemods/tempfile_mktemp.py @@ -5,8 +5,14 @@ class TempfileMktemp(SemgrepCodemod): NAME = "secure-tempfile" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Use `tempfile.mkstemp` instead of `tempfile.mktemp`" + SUMMARY = "Upgrade and Secure Temp File Creation" DESCRIPTION = "Replaces `tempfile.mktemp` with `tempfile.mkstemp`." + REFERENCES = [ + { + "url": "https://docs.python.org/3/library/tempfile.html#tempfile.mktemp", + "description": "", + } + ] @classmethod def rule(cls): diff --git a/src/core_codemods/upgrade_sslcontext_minimum_version.py b/src/core_codemods/upgrade_sslcontext_minimum_version.py index cd8763df..29142bb6 100644 --- a/src/core_codemods/upgrade_sslcontext_minimum_version.py +++ b/src/core_codemods/upgrade_sslcontext_minimum_version.py @@ -5,8 +5,19 @@ class UpgradeSSLContextMinimumVersion(SemgrepCodemod): NAME = "upgrade-sslcontext-minimum-version" REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW - SUMMARY = "Upgrade minimum SSL/TLS version for SSLContext" - DESCRIPTION = "Replaces minimum SSL/TLS version for SSLContext" + SUMMARY = "Upgrade SSLContext Minimum Version" + DESCRIPTION = "Replaces minimum SSL/TLS version for SSLContext." + REFERENCES = [ + { + "url": "https://docs.python.org/3/library/ssl.html#security-considerations", + "description": "", + }, + {"url": "https://datatracker.ietf.org/doc/rfc8996/", "description": ""}, + { + "url": "https://www.digicert.com/blog/depreciating-tls-1-0-and-1-1", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/core_codemods/upgrade_sslcontext_tls.py b/src/core_codemods/upgrade_sslcontext_tls.py index a2e42182..a51a430a 100644 --- a/src/core_codemods/upgrade_sslcontext_tls.py +++ b/src/core_codemods/upgrade_sslcontext_tls.py @@ -13,11 +13,22 @@ class UpgradeSSLContextTLS(SemgrepCodemod, BaseTransformer): METADATA = CodemodMetadata( - DESCRIPTION="Replaces known insecure TLS/SSL protocol versions in SSLContext with secure ones", + DESCRIPTION="Replaces known insecure TLS/SSL protocol versions in SSLContext with secure ones.", NAME="upgrade-sslcontext-tls", REVIEW_GUIDANCE=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, + REFERENCES=[ + { + "url": "https://docs.python.org/3/library/ssl.html#security-considerations", + "description": "", + }, + {"url": "https://datatracker.ietf.org/doc/rfc8996/", "description": ""}, + { + "url": "https://www.digicert.com/blog/depreciating-tls-1-0-and-1-1", + "description": "", + }, + ], ) - SUMMARY = "Replace known insecure TLS/SSL protocol versions in SSLContext with secure ones" + SUMMARY = "Upgrade TLS Version In SSLContext" CHANGE_DESCRIPTION = "Upgrade to use a safe version of TLS in SSLContext" YAML_FILES = [ "upgrade_sslcontext_tls.yaml", diff --git a/src/core_codemods/url_sandbox.py b/src/core_codemods/url_sandbox.py index c7f7c8f1..e068200e 100644 --- a/src/core_codemods/url_sandbox.py +++ b/src/core_codemods/url_sandbox.py @@ -29,8 +29,27 @@ class UrlSandbox(SemgrepCodemod, Codemod): ), NAME="url-sandbox", REVIEW_GUIDANCE=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, + REFERENCES=[ + { + "url": "https://github.com/pixee/python-security/blob/main/src/security/safe_requests/api.py", + "description": "", + }, + {"url": "https://portswigger.net/web-security/ssrf", "description": ""}, + { + "url": "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html", + "description": "", + }, + { + "url": "https://www.rapid7.com/blog/post/2021/11/23/owasp-top-10-deep-dive-defending-against-server-side-request-forgery/", + "description": "", + }, + { + "url": "https://blog.assetnote.io/2021/01/13/blind-ssrf-chains/", + "description": "", + }, + ], ) - SUMMARY = "Ensure that requests are made safely." + SUMMARY = "Sandbox URL Creation" CHANGE_DESCRIPTION = "Switch use of requests for security.safe_requests" YAML_FILES = [ "sandbox_url_creation.yaml", diff --git a/src/core_codemods/use_walrus_if.py b/src/core_codemods/use_walrus_if.py index 6508d277..65e30e2c 100644 --- a/src/core_codemods/use_walrus_if.py +++ b/src/core_codemods/use_walrus_if.py @@ -15,11 +15,17 @@ class UseWalrusIf(SemgrepCodemod): ScopeProvider, ) NAME = "use-walrus-if" - SUMMARY = "Use Assignment Expression in Conditional" + SUMMARY = "Use Assignment Expression (Walrus) In Conditional" REVIEW_GUIDANCE = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW DESCRIPTION = ( - "Replaces multiple expressions involving `if` operator with 'walrus' operator" + "Replaces multiple expressions involving `if` operator with 'walrus' operator." ) + REFERENCES = [ + { + "url": "https://docs.python.org/3/whatsnew/3.8.html#assignment-expressions", + "description": "", + } + ] @classmethod def rule(cls): diff --git a/src/core_codemods/with_threading_lock.py b/src/core_codemods/with_threading_lock.py index 349bb48f..b7ae9705 100644 --- a/src/core_codemods/with_threading_lock.py +++ b/src/core_codemods/with_threading_lock.py @@ -5,9 +5,21 @@ class WithThreadingLock(SemgrepCodemod): NAME = "bad-lock-with-statement" - SUMMARY = "Replace deprecated usage of threading lock classes as context managers" + SUMMARY = "Separate Lock Instantiation from `with` Call" + DESCRIPTION = ( + "Replace deprecated usage of threading lock classes as context managers." + ) REVIEW_GUIDANCE = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW - DESCRIPTION = "Separates threading lock instantiation and call with `with` statement into two steps." + REFERENCES = [ + { + "url": "https://pylint.pycqa.org/en/latest/user_guide/messages/warning/useless-with-lock.", + "description": "", + }, + { + "url": "https://docs.python.org/3/library/threading.html#using-locks-conditions-and-semaphores-in-the-with-statement", + "description": "", + }, + ] @classmethod def rule(cls): diff --git a/src/scripts/__init__.py b/src/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/scripts/generate_docs.py b/src/scripts/generate_docs.py new file mode 100644 index 00000000..1f68c3cc --- /dev/null +++ b/src/scripts/generate_docs.py @@ -0,0 +1,181 @@ +import argparse +from dataclasses import dataclass +from codemodder.registry import load_registered_codemods +from pathlib import Path + + +@dataclass +class DocMetadata: + """Codemod specific metadata only for documentation""" + + importance: str + guidance_explained: str + need_sarif: str = "No" + + +# codemod-specific metadata that's used only for docs, not for codemod API +METADATA = { + "django-debug-flag-on": DocMetadata( + importance="Medium", + guidance_explained="Django's `DEBUG` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the `DEBUG` flag may intentionally be left on as a development aid.", + ), + "django-session-cookie-secure-off": DocMetadata( + importance="Medium", + guidance_explained="Django's `SESSION_COOKIE_SECURE` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the flag may intentionally be left off or missing. Also some applications may still want to support pure http. This is often the case for legacy apps.", + ), + "enable-jinja2-autoescape": DocMetadata( + importance="High", + guidance_explained="This codemod protects your applications against XSS attacks. We believe using the default behavior is unsafe.", + ), + "fix-mutable-params": DocMetadata( + importance="Medium", + guidance_explained="We believe that this codemod fixes an unsafe practice and that the changes themselves are safe and reliable.", + ), + "harden-pyyaml": DocMetadata( + importance="Medium", + guidance_explained="This codemod replaces any unsafe loaders with the `SafeLoader`, which is already the recommended replacement suggested in `yaml` documentation. We believe this replacement is safe and should not result in any issues.", + ), + "harden-ruamel": DocMetadata( + importance="Medium", + guidance_explained="This codemod replaces any unsafe `typ` argument with `typ='safe'`, which makes safety explicit and is one of the recommended uses suggested in `ruamel` documentation. We believe this replacement is safe and should not result in any issues.", + ), + "https-connection": DocMetadata( + importance="High", + guidance_explained="Support for HTTPS is widespread which, save in some legacy applications, makes this change safe.", + ), + "jwt-decode-verify": DocMetadata( + importance="High", + guidance_explained="This codemod ensures your code uses all available validations when calling `jwt.decode`. We believe this replacement is safe and should not result in any issues.", + ), + "limit-readline": DocMetadata( + importance="Medium", + guidance_explained="This codemod sets a maximum of 5MB allowed per line read by default. It is unlikely but possible that your code may receive lines that are greater than 5MB _and_ you'd still be interested in reading them, so there is some nominal risk of exceptional cases.", + ), + "safe-lxml-parser-defaults": DocMetadata( + importance="High", + guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.", + ), + "safe-lxml-parsing": DocMetadata( + importance="High", + guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.", + ), + "order-imports": DocMetadata( + importance="Low", + guidance_explained="TODO SKIP FOR NOW", + ), + "sandbox-process-creation": DocMetadata( + importance="High", + guidance_explained="We believe this change is safe and effective. The behavior of sandboxing `subprocess.run` and `subprocess.call` calls will only throw `SecurityException` if they see behavior involved in malicious code execution, which is extremely unlikely to happen in normal operation.", + ), + "remove-unnecessary-f-str": DocMetadata( + importance="Low", + guidance_explained="We believe this codemod is safe and will not cause any issues.", + ), + "unused-imports": DocMetadata( + importance="Low", + guidance_explained="We believe this codemod is safe and will not cause any issues. It is important to note that importing modules may have side-effects that alter the behavior, even if unused, but we believe those cases are rare enough to be safe.", + ), + "requests-verify": DocMetadata( + importance="High", + guidance_explained="There may be times when setting `verify=False` is useful for testing though we discourage it. \nYou may also decide to set `verify=/path/to/ca/bundle`. This codemod will not attempt to modify the `verify` value if you do set it to a path.", + ), + "secure-random": DocMetadata( + importance="High", + guidance_explained="While most of the functions in the `random` module aren't cryptographically secure, there are still valid use cases for `random.random()` such as for simulations or games.", + ), + "secure-tempfile": DocMetadata( + importance="High", + guidance_explained="We believe this codemod is safe and will cause no unexpected errors.", + ), + "upgrade-sslcontext-minimum-version": DocMetadata( + importance="High", + guidance_explained="This codemod updates the minimum supported version of TLS. Since this is an important security fix and since all modern servers offer TLSv1.2, we believe this change can be safely merged without review.", + ), + "upgrade-sslcontext-tls": DocMetadata( + importance="High", + guidance_explained="This codemod updates the minimum supported version of TLS. Since this is an important security fix and since all modern servers offer TLSv1.2, we believe this change can be safely merged without review.", + ), + "url-sandbox": DocMetadata( + importance="High", + guidance_explained="""By default, the protection only weaves in 2 checks, which we believe will not cause any issues with the vast majority of code: +1. The given URL must be HTTP/HTTPS. +2. The given URL must not point to a "well-known infrastructure target", which includes things like AWS Metadata Service endpoints, and internal routers (e.g., 192.168.1.1) which are common targets of attacks. + +However, on rare occasions an application may use a URL protocol like "file://" or "ftp://" in backend or middleware code. + +If you want to allow those protocols, change the incoming PR to look more like this and get the best security possible: + +```diff +-resp = requests.get(url) ++resp = safe_requests.get.get(url, allowed_protocols=("ftp",)) +```""", + ), + "use-walrus-if": DocMetadata( + importance="Low", + guidance_explained="We believe that using the walrus operator is an improvement in terms of clarity and readability. However, this change is only compatible with codebases that support Python 3.8 and later, so it requires quick validation before merging.", + ), + "bad-lock-with-statement": DocMetadata( + importance="Low", guidance_explained="TODO AFTER PR MERGE" + ), +} + + +def generate_docs(codemod): + try: + codemod_data = METADATA[codemod.name] + except KeyError as exc: + raise KeyError(f"Must add {codemod.name} to METADATA") from exc + + formatted_references = [ + f"* [{ref['description']}]({ref['url']})" for ref in codemod.references + ] + markdown_references = "\n".join(formatted_references) or "N/A" + + output = f"""--- +title: {codemod.summary} +sidebar_position: 1 +--- + +## {codemod.id} + +| Importance | Review Guidance | Requires SARIF Tool | +|------------|----------------------------|---------------------| +| {codemod_data.importance} | {codemod.review_guidance} | {codemod_data.need_sarif} | + +{codemod.description} +If you have feedback on this codemod, [please let us know](mailto:feedback@pixee.ai)! + +## F.A.Q. + +### Why is this codemod marked as {codemod.review_guidance}? + +{codemod_data.guidance_explained} + +## Codemod Settings + +N/A + +## References + +{markdown_references} +""" + return output + + +def main(): + parser = argparse.ArgumentParser( + description="Generate public docs for registered codemods." + ) + + parser.add_argument( + "directory", type=str, help="directory path where to create doc files" + ) + argv = parser.parse_args() + parent_dir = Path(argv.directory) + + registry = load_registered_codemods() + for codemod in registry.codemods: + doc = generate_docs(codemod) + codemod_doc_name = f"{codemod.id.replace(':', '_').replace('/', '_')}.md" + with open(parent_dir / codemod_doc_name, "w", encoding="utf-8") as f: + f.write(doc) diff --git a/tests/test_codemod_docs.py b/tests/test_codemod_docs.py index 57cc95f0..2c717416 100644 --- a/tests/test_codemod_docs.py +++ b/tests/test_codemod_docs.py @@ -9,7 +9,12 @@ def pytest_generate_tests(metafunc): metafunc.parametrize("codemod", registry.codemods) -def test_load_codemod_description(codemod): +def test_load_codemod_docs_info(codemod): if codemod.name in ["order-imports"]: pytest.xfail(reason="order-imports has no description") assert codemod._get_description() is not None # pylint: disable=protected-access + assert codemod.review_guidance in ( + "Merge After Review", + "Merge After Cursory Review", + "Merge Without Review", + )