Skip to content

Commit

Permalink
Merge pull request #18089 from Napalys/napalys/regexp-unknown-flags
Browse files Browse the repository at this point in the history
JS: RegExp unknown flags support and enhanced compatibility with RegExp objects
  • Loading branch information
Napalys authored Dec 3, 2024
2 parents 6b7522f + 08ef0dc commit 1e1674a
Show file tree
Hide file tree
Showing 36 changed files with 761 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
category: majorAnalysis
---
* The `js/incomplete-sanitization` query now also checks regular expressions constructed using `new RegExp(..)`. Previously it only checked regular expression literals.
* Regular expression-based sanitisers implemented with `new RegExp(..)` are now detected in more cases.
* Regular expression related queries now account for unknown flags.
6 changes: 6 additions & 0 deletions javascript/ql/lib/semmle/javascript/StandardLibrary.qll
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ class StringReplaceCall extends DataFlow::MethodCallNode {
*/
predicate isGlobal() { this.getRegExp().isGlobal() or this.getMethodName() = "replaceAll" }

/**
* Holds if this is a global replacement, that is, the first argument is a regular expression
* with the `g` flag or unknown flags, or this is a call to `.replaceAll()`.
*/
predicate maybeGlobal() { this.getRegExp().maybeGlobal() or this.getMethodName() = "replaceAll" }

/**
* Holds if this call to `replace` replaces `old` with `new`.
*/
Expand Down
3 changes: 3 additions & 0 deletions javascript/ql/lib/semmle/javascript/dataflow/Nodes.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1685,6 +1685,9 @@ class RegExpCreationNode extends DataFlow::SourceNode {
/** Holds if the constructed predicate has the `g` flag. */
predicate isGlobal() { RegExp::isGlobal(this.getFlags()) }

/** Holds if the constructed predicate has the `g` flag or unknown flags. */
predicate maybeGlobal() { RegExp::maybeGlobal(this.tryGetFlags()) }

/** Gets a data flow node referring to this regular expression. */
private DataFlow::SourceNode getAReference(DataFlow::TypeTracker t) {
t.start() and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ private StringReplaceCall getAStringReplaceMethodCall(StringReplaceCall n) {
module HtmlSanitization {
private predicate fixedGlobalReplacement(StringReplaceCallSequence chain) {
forall(StringReplaceCall member | member = chain.getAMember() |
member.isGlobal() and member.getArgument(0) instanceof DataFlow::RegExpLiteralNode
member.maybeGlobal() and member.getArgument(0) instanceof DataFlow::RegExpCreationNode
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,12 @@ module CleartextLogging {
*/
class MaskingReplacer extends Barrier, StringReplaceCall {
MaskingReplacer() {
this.isGlobal() and
this.maybeGlobal() and
exists(this.getRawReplacement().getStringValue()) and
any(RegExpDot term).getLiteral() = this.getRegExp().asExpr()
exists(DataFlow::RegExpCreationNode regexpObj |
this.(StringReplaceCall).getRegExp() = regexpObj and
regexpObj.getRoot() = any(RegExpDot term).getRootTerm()
)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class Configuration extends TaintTracking::Configuration {
// Replacing with "_" is likely to be exploitable
not replace.getRawReplacement().getStringValue() = "_" and
(
replace.isGlobal()
replace.maybeGlobal()
or
// Non-global replace with a non-empty string can also prevent __proto__ by
// inserting a chunk of text that doesn't fit anywhere in __proto__
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ module RegExpInjection {
*/
class MetacharEscapeSanitizer extends Sanitizer, StringReplaceCall {
MetacharEscapeSanitizer() {
this.isGlobal() and
this.maybeGlobal() and
(
RegExp::alwaysMatchesMetaCharacter(this.getRegExp().getRoot(), ["{", "[", "+"])
or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ module TaintedPath {
this instanceof StringReplaceCall and
input = this.getReceiver() and
output = this and
not exists(RegExpLiteral literal, RegExpTerm term |
this.(StringReplaceCall).getRegExp().asExpr() = literal and
this.(StringReplaceCall).isGlobal() and
literal.getRoot() = term
not exists(DataFlow::RegExpCreationNode regexp, RegExpTerm term |
this.(StringReplaceCall).getRegExp() = regexp and
this.(StringReplaceCall).maybeGlobal() and
regexp.getRoot() = term
|
term.getAMatchedString() = "/" or
term.getAMatchedString() = "." or
Expand Down Expand Up @@ -305,9 +305,9 @@ module TaintedPath {
input = this.getReceiver() and
output = this and
this.isGlobal() and
exists(RegExpLiteral literal, RegExpTerm term |
this.getRegExp().asExpr() = literal and
literal.getRoot() = term and
exists(DataFlow::RegExpCreationNode regexp, RegExpTerm term |
this.getRegExp() = regexp and
regexp.getRoot() = term and
not term.getAMatchedString() = "/"
|
term.getAMatchedString() = "." or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ module UnsafeShellCommandConstruction {
class ReplaceQuotesSanitizer extends Sanitizer, StringReplaceCall {
ReplaceQuotesSanitizer() {
this.getAReplacedString() = "'" and
this.isGlobal() and
this.maybeGlobal() and
this.getRawReplacement().mayHaveStringValue(["'\\''", ""])
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ module Shared {
*/
class MetacharEscapeSanitizer extends Sanitizer, StringReplaceCall {
MetacharEscapeSanitizer() {
this.isGlobal() and
this.maybeGlobal() and
(
RegExp::alwaysMatchesMetaCharacter(this.getRegExp().getRoot(), ["<", "'", "\""])
or
Expand Down
21 changes: 15 additions & 6 deletions javascript/ql/src/Security/CWE-116/IncompleteSanitization.ql
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ string metachar() { result = "'\"\\&<>\n\r\t*|{}[]%$".charAt(_) }

/** Gets a string matched by `e` in a `replace` call. */
string getAMatchedString(DataFlow::Node e) {
result = e.(DataFlow::RegExpLiteralNode).getRoot().getAMatchedString()
result = e.(DataFlow::RegExpCreationNode).getRoot().getAMatchedString()
or
result = e.getStringValue()
}
Expand Down Expand Up @@ -52,8 +52,8 @@ predicate isSimpleAlt(RegExpAlt t) { forall(RegExpTerm ch | ch = t.getAChild() |
* Holds if `mce` is of the form `x.replace(re, new)`, where `re` is a global
* regular expression and `new` prefixes the matched string with a backslash.
*/
predicate isBackslashEscape(StringReplaceCall mce, DataFlow::RegExpLiteralNode re) {
mce.isGlobal() and
predicate isBackslashEscape(StringReplaceCall mce, DataFlow::RegExpCreationNode re) {
mce.maybeGlobal() and
re = mce.getRegExp() and
(
// replacement with `\$&`, `\$1` or similar
Expand All @@ -72,7 +72,7 @@ predicate allBackslashesEscaped(DataFlow::Node nd) {
nd instanceof JsonStringifyCall
or
// check whether `nd` itself escapes backslashes
exists(DataFlow::RegExpLiteralNode rel | isBackslashEscape(nd, rel) |
exists(DataFlow::RegExpCreationNode rel | isBackslashEscape(nd, rel) |
// if it's a complex regexp, we conservatively assume that it probably escapes backslashes
not isSimple(rel.getRoot()) or
getAMatchedString(rel) = "\\"
Expand Down Expand Up @@ -143,12 +143,21 @@ predicate whitelistedRemoval(StringReplaceCall repl) {
)
}

/**
* Gets a nice string representation of the pattern or value of the node.
*/
string getPatternOrValueString(DataFlow::Node node) {
if node instanceof DataFlow::RegExpConstructorInvokeNode
then result = "/" + node.(DataFlow::RegExpConstructorInvokeNode).getRoot() + "/"
else result = node.toString()
}

from StringReplaceCall repl, DataFlow::Node old, string msg
where
(old = repl.getArgument(0) or old = repl.getRegExp()) and
(
not repl.isGlobal() and
msg = "This replaces only the first occurrence of " + old + "." and
not repl.maybeGlobal() and
msg = "This replaces only the first occurrence of " + getPatternOrValueString(old) + "." and
// only flag if this is likely to be a sanitizer or URL encoder or decoder
exists(string m | m = getAMatchedString(old) |
// sanitizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ predicate isCaseSensitiveMiddleware(
arg = call.getArgument(0) and
regexp.getAReference().flowsTo(arg) and
exists(string flags |
flags = regexp.getFlags() and
not RegExp::isIgnoreCase(flags)
flags = regexp.tryGetFlags() and
not RegExp::maybeIgnoreCase(flags)
)
)
}
Expand Down
Loading

0 comments on commit 1e1674a

Please sign in to comment.