Skip to content

Commit

Permalink
Python: model that finditer returns iterable of re.Match objects
Browse files Browse the repository at this point in the history
  • Loading branch information
yoff committed Oct 9, 2024
1 parent 494b8bd commit 0ac4a10
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 22 deletions.
52 changes: 32 additions & 20 deletions python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3284,6 +3284,18 @@ module StdlibPrivate {
}
}

/**
* A base API node for regular expression functions.
* Either the `re` module or a compiled regular expression.
*/
private API::Node re(boolean compiled) {
result = API::moduleImport("re") and
compiled = false
or
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() and
compiled = true
}

/**
* A flow summary for methods returning a `re.Match` object
*
Expand All @@ -3293,17 +3305,18 @@ module StdlibPrivate {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }

override DataFlow::CallCfgNode getACall() {
this = "re.Match" and
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
or
this = "compiled re.Match" and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(["match", "search", "fullmatch"])
.getACall()
exists(API::Node re, boolean compiled |
re = re(compiled) and
(
compiled = false and
this = "re.Match"
or
compiled = true and
this = "compiled re.Match"
)
|
result = re.getMember(["match", "search", "fullmatch"]).getACall()
)
}

override DataFlow::ArgumentNode getACallback() { none() }
Expand Down Expand Up @@ -3340,6 +3353,13 @@ module StdlibPrivate {
}
}

/** An API node for a `re.Match` object */
private API::Node match() {
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
or
result = re(_).getMember("finditer").getReturn().getASubscript()
}

/**
* A flow summary for methods on a `re.Match` object
*
Expand All @@ -3353,15 +3373,7 @@ module StdlibPrivate {
methodName in ["expand", "group", "groups", "groupdict"]
}

override DataFlow::CallCfgNode getACall() {
result =
any(ReMatchSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }

override DataFlow::ArgumentNode getACallback() { none() }

Expand Down
4 changes: 2 additions & 2 deletions python/ql/test/library-tests/frameworks/stdlib/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
list(re.finditer(pat, ts))[0].string, # $ tainted
[m.string for m in re.finditer(pat, ts)], # $ tainted

list(re.finditer(pat, ts))[0].groups()[0], # $ MISSING: tainted
[m.groups()[0] for m in re.finditer(pat, ts)], # $ MISSING: tainted
list(re.finditer(pat, ts))[0].groups()[0], # $ MISSING: tainted // this requires list content in type tracking
[m.groups()[0] for m in re.finditer(pat, ts)], # $ tainted
)
ensure_not_tainted(
safe_match.expand("Hello \1"),
Expand Down

0 comments on commit 0ac4a10

Please sign in to comment.