Merge pull request #16657 from joefarebrother/python-partial-ssrf-fp

Python: Add additional sanitizers to SSRF
github · Jun 11, 2024 · f441c68 · f441c68
2 parents e9bd85e + 93f10fc
commit f441c68
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 1 deletion.
diff --git a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll
@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
 private import semmle.python.Concepts
 private import semmle.python.dataflow.new.RemoteFlowSources
 private import semmle.python.dataflow.new.BarrierGuards
+private import semmle.python.ApiGraphs
 
 /**
  * Provides default sources, sinks and sanitizers for detecting
@@ -137,4 +138,34 @@ module ServerSideRequestForgery {
       )
     }
   }
+
+  /** A validation that a string does not contain certain characters, considered as a sanitizer. */
+  private class StringRestrictionSanitizerGuard extends Sanitizer {
+    StringRestrictionSanitizerGuard() {
+      this = DataFlow::BarrierGuard<stringRestriction/3>::getABarrierNode()
+    }
+  }
+
+  private predicate stringRestriction(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
+    exists(DataFlow::MethodCallNode call, DataFlow::Node strNode |
+      call.asCfgNode() = g and strNode.asCfgNode() = node
+    |
+      branch = true and
+      call.calls(strNode,
+        ["isalnum", "isalpha", "isdecimal", "isdigit", "isidentifier", "isnumeric", "isspace"])
+      or
+      branch = true and
+      call = API::moduleImport("re").getMember(["match", "fullmatch"]).getACall() and
+      strNode = [call.getArg(1), call.getArgByName("string")]
+      or
+      branch = true and
+      call =
+        API::moduleImport("re")
+            .getMember("compile")
+            .getReturn()
+            .getMember(["match", "fullmatch"])
+            .getACall() and
+      strNode = [call.getArg(0), call.getArgByName("string")]
+    )
+  }
 }
diff --git a/python/ql/src/change-notes/2024-06-04-ssrf-sanitizers.md b/python/ql/src/change-notes/2024-06-04-ssrf-sanitizers.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Additional sanitizers have been added to the `py/full-ssrf` and `py/partial-ssrf` queries for methods that verify a string contains only a certain set of characters, such as `.isalnum()` as well as regular expression tests.
diff --git a/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py b/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py
@@ -1,7 +1,7 @@
 from flask import request
 
 import requests
-
+import re
 
 def full_ssrf():
     user_input = request.args['untrusted_input']
@@ -120,3 +120,57 @@ def partial_ssrf_6():
 
     url = f"https://example.com/foo#{user_input}"
     requests.get(url) # NOT OK -- user only controlled fragment
+
+def partial_ssrf_7():
+    user_input = request.args['untrusted_input']
+
+    if user_input.isalnum():
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain alphanumerical characters 
+
+    if user_input.isalpha():
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain alphabetical characters 
+
+    if user_input.isdecimal():
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain decimal characters 
+
+    if user_input.isdigit():
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain digits
+
+    if user_input.isnumeric():
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain numeric characters
+
+    if user_input.isspace():
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain whitespace characters 
+
+    if re.fullmatch(r'[a-zA-Z0-9]+', user_input):
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain alphanumerical characters
+
+    if re.fullmatch(r'.*[a-zA-Z0-9]+.*', user_input):
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary characters 
+
+
+    if re.match(r'^[a-zA-Z0-9]+$', user_input):
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # OK - user input can only contain alphanumerical characters
+
+    if re.match(r'[a-zA-Z0-9]+', user_input):
+        url = f"https://example.com/foo#{user_input}"
+        requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix.
+
+    reg = re.compile(r'^[a-zA-Z0-9]+$')
+
+    if reg.match(user_input):
+       url = f"https://example.com/foo#{user_input}"
+       requests.get(url) # OK - user input can only contain alphanumerical characters
+
+    if reg.fullmatch(user_input):
+       url = f"https://example.com/foo#{user_input}"
+       requests.get(url) # OK - user input can only contain alphanumerical characters