From 6ac46b8436c2112a1db593fd061a69845d16f818 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 3 Jun 2024 23:21:35 +0100 Subject: [PATCH 1/4] Add additional sanitizers to SSRF for methods that restrict the contents of a string. --- ...ServerSideRequestForgeryCustomizations.qll | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll index a462752b3194..9d4350cb8a13 100644 --- a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.BarrierGuards +private import semmle.python.ApiGraphs /** * Provides default sources, sinks and sanitizers for detecting @@ -137,4 +138,25 @@ module ServerSideRequestForgery { ) } } + + /** A validation that a string does not contain certain characters, considered as a sanitizer. */ + private class StringRestrictionSanitizerGuard extends Sanitizer { + StringRestrictionSanitizerGuard() { + this = DataFlow::BarrierGuard::getABarrierNode() + } + } + + private predicate stringRestriction(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) { + exists(DataFlow::MethodCallNode call, DataFlow::Node strNode | + call.asCfgNode() = g and strNode.asCfgNode() = node + | + branch = true and + call.calls(strNode, + ["isalnum", "isalpha", "isdecimal", "isdigit", "isidentifier", "isnumeric", "isspace"]) + or + branch = true and + call = API::moduleImport("re").getMember(["match", "fullmatch"]).getACall() and + strNode = [call.getArg(1), call.getArgByName("string")] + ) + } } From 9331c2c33a8252a9852b95f34a187c23297ec5dd Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Tue, 4 Jun 2024 09:39:37 +0100 Subject: [PATCH 2/4] Add tests --- .../full_partial_test.py | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py b/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py index 618f06061a35..4f9151042726 100644 --- a/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py +++ b/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py @@ -1,7 +1,7 @@ from flask import request import requests - +import re def full_ssrf(): user_input = request.args['untrusted_input'] @@ -120,3 +120,47 @@ def partial_ssrf_6(): url = f"https://example.com/foo#{user_input}" requests.get(url) # NOT OK -- user only controlled fragment + +def partial_ssrf_7(): + user_input = request.args['untrusted_input'] + + if user_input.isalnum(): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain alphanumerical characters + + if user_input.isalpha(): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain alphabetical characters + + if user_input.isdecimal(): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain decimal characters + + if user_input.isdigit(): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain digits + + if user_input.isnumeric(): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain numeric characters + + if user_input.isspace(): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain whitespace characters + + if re.fullmatch(r'[a-zA-Z0-9]+', user_input): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain alphanumerical characters + + if re.fullmatch(r'.*[a-zA-Z0-9]+.*', user_input): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary characters + + + if re.match(r'^[a-zA-Z0-9]+$', user_input): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain alphanumerical characters + + if re.match(r'[a-zA-Z0-9]+', user_input): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix. From 6ff7fb2a7041f14be3285b038749615fd0ab9641 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Tue, 4 Jun 2024 09:52:57 +0100 Subject: [PATCH 3/4] Add change note --- python/ql/src/change-notes/2024-06-04-ssrf-sanitizers.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/src/change-notes/2024-06-04-ssrf-sanitizers.md diff --git a/python/ql/src/change-notes/2024-06-04-ssrf-sanitizers.md b/python/ql/src/change-notes/2024-06-04-ssrf-sanitizers.md new file mode 100644 index 000000000000..c55da2513775 --- /dev/null +++ b/python/ql/src/change-notes/2024-06-04-ssrf-sanitizers.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Additional sanitizers have been added to the `py/full-ssrf` and `py/partial-ssrf` queries for methods that verify a string contains only a certain set of characters, such as `.isalnum()` as well as regular expression tests. \ No newline at end of file From 93f10fcf14591826b5d3ae7dbee2e6b5748edea2 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Tue, 11 Jun 2024 15:44:16 +0100 Subject: [PATCH 4/4] Add sanitizers for compiled regexes --- .../ServerSideRequestForgeryCustomizations.qll | 9 +++++++++ .../full_partial_test.py | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll index 9d4350cb8a13..a4e3ecc9ee18 100644 --- a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll @@ -157,6 +157,15 @@ module ServerSideRequestForgery { branch = true and call = API::moduleImport("re").getMember(["match", "fullmatch"]).getACall() and strNode = [call.getArg(1), call.getArgByName("string")] + or + branch = true and + call = + API::moduleImport("re") + .getMember("compile") + .getReturn() + .getMember(["match", "fullmatch"]) + .getACall() and + strNode = [call.getArg(0), call.getArgByName("string")] ) } } diff --git a/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py b/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py index 4f9151042726..95ff9d64944e 100644 --- a/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py +++ b/python/ql/test/query-tests/Security/CWE-918-ServerSideRequestForgery/full_partial_test.py @@ -164,3 +164,13 @@ def partial_ssrf_7(): if re.match(r'[a-zA-Z0-9]+', user_input): url = f"https://example.com/foo#{user_input}" requests.get(url) # NOT OK, but NOT FOUND - user input can contain arbitrary character as a suffix. + + reg = re.compile(r'^[a-zA-Z0-9]+$') + + if reg.match(user_input): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain alphanumerical characters + + if reg.fullmatch(user_input): + url = f"https://example.com/foo#{user_input}" + requests.get(url) # OK - user input can only contain alphanumerical characters