working doc gen script

pixee · Oct 13, 2023 · 1e10304 · 1e10304
1 parent d908de3
commit 1e10304
Show file tree

Hide file tree

Showing 3 changed files with 156 additions and 26 deletions.
diff --git a/src/codemodder/executor.py b/src/codemodder/executor.py
@@ -58,6 +58,10 @@ def description(self):
             # TODO: temporary workaround
             return self.METADATA.DESCRIPTION
 
+    @property
+    def review_guidance(self):
+        return self.METADATA.REVIEW_GUIDANCE.name.replace("_", " ").title()
+
     @property
     def yaml_files(self):
         return [

diff --git a/src/core_codemods/lxml_safe_parser_defaults.py b/src/core_codemods/lxml_safe_parser_defaults.py
@@ -6,7 +6,7 @@
 class LxmlSafeParserDefaults(SemgrepCodemod):
     NAME = "safe-lxml-parser-defaults"
     REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW
-    SUMMARY = "Use safe defaults for lxml parsers"
+    SUMMARY = "Use Safe Defaults for lxml Parsers"
     DESCRIPTION = "Replace lxml parser parameters with safe defaults"
 
     @classmethod

diff --git a/src/scripts/generate_docs.py b/src/scripts/generate_docs.py
@@ -1,51 +1,177 @@
+import argparse
+from dataclasses import dataclass
 from codemodder.registry import load_registered_codemods
+from pathlib import Path
+
+
+@dataclass
+class DocMetadata:
+    """Codemod specific metadata only for documentation"""
+
+    importance: str
+    guidance_explained: str
+    need_sarif: str = "No"
+
+
+# codemod-specific metadata that's used only for docs, not for codemod API
+METADATA = {
+    "django-debug-flag-on": DocMetadata(
+        importance="Medium",
+        guidance_explained="Django's `DEBUG` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the `DEBUG` flag may intentionally be left on as a development aid.",
+    ),
+    "django-session-cookie-secure-off": DocMetadata(
+        importance="Medium",
+        guidance_explained="Django's `SESSION_COOKIE_SECURE` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the flag may intentionally be left off or missing. Also some applications may still want to support pure http. This is often the case for legacy apps.",
+    ),
+    "enable-jinja2-autoescape": DocMetadata(
+        importance="High",
+        guidance_explained="This codemod protects your applications against XSS attacks. We believe using the default behavior is unsafe.",
+    ),
+    "fix-mutable-params": DocMetadata(
+        importance="Medium",
+        guidance_explained="We believe that this codemod fixes an unsafe practice and that the changes themselves are safe and reliable.",
+    ),
+    "harden-pyyaml": DocMetadata(
+        importance="Medium",
+        guidance_explained="This codemod replaces any unsafe loaders with the `SafeLoader`, which is already the recommended replacement suggested in `yaml` documentation. We believe this replacement is safe and should not result in any issues.",
+    ),
+    "harden-ruamel": DocMetadata(
+        importance="Medium",
+        guidance_explained="This codemod replaces any unsafe `typ` argument with `typ='safe'`, which makes safety explicit and is one of the recommended uses suggested in `ruamel` documentation. We believe this replacement is safe and should not result in any issues.",
+    ),
+    "https-connection": DocMetadata(
+        importance="High",
+        guidance_explained="Support for HTTPS is widespread which, save in some legacy applications, makes this change safe.",
+    ),
+    "jwt-decode-verify": DocMetadata(
+        importance="SOMETHING", guidance_explained="SOMETHING"
+    ),
+    "limit-readline": DocMetadata(
+        importance="Medium",
+        guidance_explained="This codemod sets a maximum of 5MB allowed per line read by default. It is unlikely but possible that your code may receive lines that are greater than 5MB _and_ you'd still be interested in reading them, so there is some nominal risk of exceptional cases.",
+    ),
+    "safe-lxml-parser-defaults": DocMetadata(
+        importance="High",
+        guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.",
+    ),
+    "safe-lxml-parsing": DocMetadata(
+        importance="High",
+        guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.",
+    ),
+    "order-imports": DocMetadata(
+        importance="Low",
+        guidance_explained="We believe this codemod is safe and will not cause any issues. It is important to note that importing modules may have side-effects that alter the behavior, even if unused, but we believe those cases are rare enough to be safe.",
+    ),
+    "sandbox-process-creation": DocMetadata(
+        importance="High",
+        guidance_explained="We believe this change is safe and effective. The behavior of sandboxing `subprocess.run` and `subprocess.call` calls will only throw `SecurityException` if they see behavior involved in malicious code execution, which is extremely unlikely to happen in normal operation.",
+    ),
+    "remove-unnecessary-f-str": DocMetadata(
+        importance="Low",
+        guidance_explained="We believe this codemod is safe and will not cause any issues.",
+    ),
+    "unused-imports": DocMetadata(
+        importance="Low",
+        guidance_explained="We believe this codemod is safe and will not cause any issues. It is important to note that importing modules may have side-effects that alter the behavior, even if unused, but we believe those cases are rare enough to be safe.",
+    ),
+    "requests-verify": DocMetadata(
+        importance="High",
+        guidance_explained="There may be times when setting `verify=False` is useful for testing though we discourage it. \nYou may also decide to set `verify=/path/to/ca/bundle`. This codemod will not attempt to modify the `verify` value if you do set it to a path.",
+    ),
+    "secure-random": DocMetadata(
+        importance="High",
+        guidance_explained="While most of the functions in the `random` module aren't cryptographically secure, there are still valid use cases for `random.random()` such as for simulations or games.",
+    ),
+    "secure-tempfile": DocMetadata(
+        importance="High",
+        guidance_explained="We believe this codemod is safe and will cause no unexpected errors.",
+    ),
+    "upgrade-sslcontext-minimum-version": DocMetadata(
+        importance="SOMETHING", guidance_explained="SOMETHING"
+    ),
+    "upgrade-sslcontext-tls": DocMetadata(
+        importance="High",
+        guidance_explained="This codemod updates the minimum supported version of TLS. Since this is an important security fix and since all modern servers offer TLSv1.2, we believe this change can be safely merged without review.",
+    ),
+    "url-sandbox": DocMetadata(
+        importance="High",
+        guidance_explained="""By default, the protection only weaves in 2 checks, which we believe will not cause any issues with the vast majority of code:
+1. The given URL must be HTTP/HTTPS.
+2. The given URL must not point to a "well-known infrastructure target", which includes things like AWS Metadata Service endpoints, and internal routers (e.g., 192.168.1.1) which are common targets of attacks.
+
+However, on rare occasions an application may use a URL protocol like "file://" or "ftp://" in backend or middleware code.
+
+If you want to allow those protocols, change the incoming PR to look more like this and get the best security possible:
+
+```diff
+-resp = requests.get(url)
++resp = safe_requests.get.get(url, allowed_protocols=("ftp",))
+```""",
+    ),
+    "use-walrus-if": DocMetadata(
+        importance="Low",
+        guidance_explained="We believe that using the walrus operator is an improvement in terms of clarity and readability. However, this change is only compatible with codebases that support Python 3.8 and later, so it requires quick validation before merging.",
+    ),
+    # "bad-lock-with-statement": DocMetadata(
+    #     importance="Low", guidance_explained="TODO AFTER PR MERGE"
+    # ),
+}
+
 
 def generate_docs(codemod):
-    breakpoint()
-    output = f"""
----
-title: Safe lxml Parsing
+    try:
+        codemod_data = METADATA[codemod.name]
+    except KeyError as exc:
+        raise KeyError(f"Must add {codemod.name} to METADATA") from exc
+
+    output = f"""---
+title: {codemod.summary}
 sidebar_position: 1
 ---
 
 ## {codemod.id}
 
 | Importance | Review Guidance      | Requires SARIF Tool |
 |------------|----------------------|---------------------|
- | High       | Merge Without Review | No                  |
+ | {codemod_data.importance}       | {codemod.review_guidance} | {codemod_data.need_sarif}                  |
 
-This codemod sets the `parser` parameter in calls to  `lxml.etree.parse`  and `lxml.etree.fromstring`
-if omitted or set to `None` (the default value). Unfortunately, the default `parser=None` means `lxml`
-will rely on an unsafe parser, making your code potentially vulnerable to entity expansion
-attacks and external entity (XXE) attacks.
+{codemod.description}
 
-The changes look as follows:
+If you have feedback on this codemod, [please let us know](mailto:[email protected])!
 
-```diff
-  import lxml.etree
-- lxml.etree.parse("path_to_file")
-- lxml.etree.fromstring("xml_str")
-+ lxml.etree.parse("path_to_file", parser=lxml.etree.XMLParser(resolve_entities=False))
-+ lxml.etree.fromstring("xml_str", parser=lxml.etree.XMLParser(resolve_entities=False))
-```
+## F.A.Q.
 
-If you have feedback on this codemod, [please let us know](mailto:[email protected])!
+### Why is this codemod marked as {codemod.review_guidance}?
 
-## F.A.Q. 
+{codemod_data.guidance_explained}
 
-### Why is this codemod marked as Merge Without Review?
+## Codemod Settings
 
-We believe this change is safe, effective, and protects your code against very serious security attacks.
+N/A
 
 ## References
 * [https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser](https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser)
 * [https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing](https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing)
-* [https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html](https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html)    
+* [https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html](https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html)
+#codemod.references
 """
-    codemod._get_description()
+    return output
+
 
 def main():
-    # todo: get dir path
+    parser = argparse.ArgumentParser(
+        description="Generate public docs for registered codemods."
+    )
+
+    parser.add_argument(
+        "directory", type=str, help="directory path where to create doc files"
+    )
+    argv = parser.parse_args()
+    parent_dir = Path(argv.directory)
+
     registry = load_registered_codemods()
     for codemod in registry.codemods:
-        generate_docs(codemod)
+        doc = generate_docs(codemod)
+        codemod_doc_name = f"{codemod.id.replace(':', '_').replace('/', '_')}.md"
+        with open(parent_dir / codemod_doc_name, "w", encoding="utf-8") as f:
+            f.write(doc)