Skip to content

Commit

Permalink
JS: do fewer regexp matches in SensitiveActions
Browse files Browse the repository at this point in the history
  • Loading branch information
nickrolfe committed Apr 23, 2024
1 parent bea7b94 commit 003d208
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 27 deletions.
52 changes: 25 additions & 27 deletions javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll
Original file line number Diff line number Diff line change
Expand Up @@ -86,39 +86,37 @@ private predicate writesProperty(DataFlow::Node node, string name) {

/** A write to a variable or property that might contain sensitive data. */
private class BasicSensitiveWrite extends SensitiveWrite {
SensitiveDataClassification classification;
string name;

BasicSensitiveWrite() {
exists(string name |
/*
* PERFORMANCE OPTIMISATION:
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
* To carry out a regex match, we must first compute the Cartesian product
* of all possible `name`s and regexes, then match.
* To keep this product as small as possible,
* we want to filter `name` as much as possible before the product.
*
* Do this by factoring out a helper predicate containing the filtering
* logic that restricts `name`. This helper predicate will get picked first
* in the join order, since it is the only call here that binds `name`.
*/

writesProperty(this, name) and
nameIndicatesSensitiveData(name, classification)
)
/*
* PERFORMANCE OPTIMISATION:
* `nameIndicatesSensitiveData` performs a `regexpMatch` on `name`.
* To carry out a regex match, we must first compute the Cartesian product
* of all possible `name`s and regexes, then match.
* To keep this product as small as possible,
* we want to filter `name` as much as possible before the product.
*
* Do this by factoring out a helper predicate containing the filtering
* logic that restricts `name`. This helper predicate will get picked first
* in the join order, since it is the only call here that binds `name`.
*/

writesProperty(this, name) and
nameIndicatesSensitiveData(name)
}

/** Gets a classification of the kind of sensitive data the write might handle. */
SensitiveDataClassification getClassification() { result = classification }
SensitiveDataClassification getClassification() { nameIndicatesSensitiveData(name, result) }
}

/** An access to a variable or property that might contain sensitive data. */
private class BasicSensitiveVariableAccess extends SensitiveVariableAccess {
SensitiveDataClassification classification;

BasicSensitiveVariableAccess() { nameIndicatesSensitiveData(name, classification) }
BasicSensitiveVariableAccess() { nameIndicatesSensitiveData(name) }

override SensitiveDataClassification getClassification() { result = classification }
override SensitiveDataClassification getClassification() {
nameIndicatesSensitiveData(name, result)
}
}

/** A function name that suggests it may be sensitive. */
Expand All @@ -138,11 +136,11 @@ abstract class SensitiveDataFunctionName extends SensitiveFunctionName {

/** A method that might return sensitive data, based on the name. */
class CredentialsFunctionName extends SensitiveDataFunctionName {
SensitiveDataClassification classification;

CredentialsFunctionName() { nameIndicatesSensitiveData(this, classification) }
CredentialsFunctionName() { nameIndicatesSensitiveData(this) }

override SensitiveDataClassification getClassification() { result = classification }
override SensitiveDataClassification getClassification() {
nameIndicatesSensitiveData(this, result)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}

/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}

/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
Expand All @@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}

/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}

/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
Expand All @@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}

/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}

/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
Expand All @@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ module HeuristicNames {
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|certain|concert|secretar|accountant|accountab).*"
}

/**
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
*
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name) {
exists(string combinedRegexp |
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
combinedRegexp =
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
name.regexpMatch(combinedRegexp)
) and
not name.regexpMatch(notSensitiveRegexp())
}

/**
* Holds if `name` may indicate the presence of sensitive data, and
* `name` does not indicate that the data is in fact non-sensitive (for example since
Expand All @@ -115,6 +134,10 @@ module HeuristicNames {
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
* given classification), and none of the regexps from `notSensitiveRegexp` matches
* `name`.
*
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
* pass, since that combines all the regexps into one, and should be faster. Then call this
* predicate to get the classification(s).
*/
bindingset[name]
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
Expand Down

0 comments on commit 003d208

Please sign in to comment.