Skip to content

Commit

Permalink
Merge pull request #15599 from aschackmull/dataflow/fieldflowbranchli…
Browse files Browse the repository at this point in the history
…mit-v2

Dataflow: update fieldFlowBranchLimit semantics
  • Loading branch information
aschackmull authored Apr 23, 2024
2 parents 19974f0 + 5950149 commit b2f0994
Show file tree
Hide file tree
Showing 19 changed files with 302 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ predicate knownSourceModel(Node source, string model) { none() }

predicate knownSinkModel(Node sink, string model) { none() }

class DataFlowSecondLevelScope = Unit;

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ module CppDataFlow implements InputSig<Location> {

predicate getAdditionalFlowIntoCallNodeTerm = Private::getAdditionalFlowIntoCallNodeTerm/2;

predicate getSecondLevelScope = Private::getSecondLevelScope/1;

predicate validParameterAliasStep = Private::validParameterAliasStep/2;

predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1583,3 +1583,74 @@ predicate validParameterAliasStep(Node node1, Node node2) {
)
)
}

private predicate isTopLevel(Cpp::Stmt s) { any(Function f).getBlock().getAStmt() = s }

private Cpp::Stmt getAChainedBranch(Cpp::IfStmt s) {
result = s.getThen()
or
exists(Cpp::Stmt elseBranch | s.getElse() = elseBranch |
result = getAChainedBranch(elseBranch)
or
result = elseBranch and not elseBranch instanceof Cpp::IfStmt
)
}

private Instruction getInstruction(Node n) {
result = n.asInstruction() or
result = n.asOperand().getUse() or
result = n.(SsaPhiNode).getPhiNode().getBasicBlock().getFirstInstruction() or
n.(IndirectInstruction).hasInstructionAndIndirectionIndex(result, _) or
result = getInstruction(n.(PostUpdateNode).getPreUpdateNode())
}

private newtype TDataFlowSecondLevelScope =
TTopLevelIfBranch(Cpp::Stmt s) {
exists(Cpp::IfStmt ifstmt | s = getAChainedBranch(ifstmt) and isTopLevel(ifstmt))
} or
TTopLevelSwitchCase(Cpp::SwitchCase s) {
exists(Cpp::SwitchStmt switchstmt | s = switchstmt.getASwitchCase() and isTopLevel(switchstmt))
}

/**
* A second-level control-flow scope in a `switch` or a chained `if` statement.
*
* This is a `switch` case or a branch of a chained `if` statement, given that
* the `switch` or `if` statement is top level, that is, it is not nested inside
* other CFG constructs.
*/
class DataFlowSecondLevelScope extends TDataFlowSecondLevelScope {
/** Gets a textual representation of this element. */
string toString() {
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.toString())
or
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.toString())
}

/** Gets the primary location of this element. */
Cpp::Location getLocation() {
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.getLocation())
or
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getLocation())
}

/**
* Gets a statement directly contained in this scope. For an `if` branch, this
* is the branch itself, and for a `switch case`, this is one the statements
* of that case branch.
*/
private Cpp::Stmt getAStmt() {
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s)
or
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getAStmt())
}

/** Gets a data-flow node nested within this scope. */
Node getANode() {
getInstruction(result).getAst().(Cpp::ControlFlowNode).getEnclosingStmt().getParentStmt*() =
this.getAStmt()
}
}

/** Gets the second-level scope containing the node `n`, if any. */
DataFlowSecondLevelScope getSecondLevelScope(Node n) { result.getANode() = n }
2 changes: 1 addition & 1 deletion cpp/ql/test/library-tests/dataflow/taint-tests/swap2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void test_copy_assignment_operator()

swap(z1, z2);

sink(z2.data1); // $ ir MISSING: ast
sink(z2.data1); // $ ir ast
sink(z1.data1); // $ SPURIOUS: ir ast=81:27 ast=82:16
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2882,6 +2882,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo

predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) }

class DataFlowSecondLevelScope = Unit;

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ module ArrayFlowConfig implements DataFlow::ConfigSig {
mc.getAnArgument() = sink.asExpr()
)
}

int fieldFlowBranchLimit() { result = 100 }
}

module ArrayFlow = DataFlow::Global<ArrayFlowConfig>;
Expand Down
2 changes: 0 additions & 2 deletions csharp/ql/test/library-tests/dataflow/types/Types.ql
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ module TypesConfig implements DataFlow::ConfigSig {
mc.getAnArgument() = sink.asExpr()
)
}

int fieldFlowBranchLimit() { result = 1000 }
}

import ValueFlowTest<TypesConfig>
Expand Down
2 changes: 2 additions & 0 deletions go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo

predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) }

class DataFlowSecondLevelScope = Unit;

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ module JavaDataFlow implements InputSig<Location> {

Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }

predicate getSecondLevelScope = Private::getSecondLevelScope/1;

predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;

predicate viableImplInCallContext = Private::viableImplInCallContext/2;
Expand Down
75 changes: 75 additions & 0 deletions java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,81 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo

predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) }

private predicate isTopLevel(Stmt s) {
any(Callable c).getBody() = s
or
exists(BlockStmt b | s = b.getAStmt() and isTopLevel(b))
}

private Stmt getAChainedBranch(IfStmt s) {
result = s.getThen()
or
exists(Stmt elseBranch | s.getElse() = elseBranch |
result = getAChainedBranch(elseBranch)
or
result = elseBranch and not elseBranch instanceof IfStmt
)
}

private newtype TDataFlowSecondLevelScope =
TTopLevelIfBranch(Stmt s) {
exists(IfStmt ifstmt | s = getAChainedBranch(ifstmt) and isTopLevel(ifstmt))
} or
TTopLevelSwitchCase(SwitchCase s) {
exists(SwitchStmt switchstmt | s = switchstmt.getACase() and isTopLevel(switchstmt))
}

private SwitchCase getPrecedingCase(Stmt s) {
result = s
or
exists(SwitchStmt switch, int i |
s = switch.getStmt(i) and
not s instanceof SwitchCase and
result = getPrecedingCase(switch.getStmt(i - 1))
)
}

/**
* A second-level control-flow scope in a `switch` or a chained `if` statement.
*
* This is a `switch` case or a branch of a chained `if` statement, given that
* the `switch` or `if` statement is top level, that is, it is not nested inside
* other CFG constructs.
*/
class DataFlowSecondLevelScope extends TDataFlowSecondLevelScope {
/** Gets a textual representation of this element. */
string toString() {
exists(Stmt s | this = TTopLevelIfBranch(s) | result = s.toString())
or
exists(SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.toString())
}

/**
* Gets a statement directly contained in this scope. For an `if` branch, this
* is the branch itself, and for a `switch case`, this is one the statements
* of that case branch.
*/
private Stmt getAStmt() {
exists(Stmt s | this = TTopLevelIfBranch(s) | result = s)
or
exists(SwitchCase s | this = TTopLevelSwitchCase(s) |
result = s.getRuleStatement() or
s = getPrecedingCase(result)
)
}

/** Gets a data-flow node nested within this scope. */
Node getANode() { getRelatedExpr(result).getAnEnclosingStmt() = this.getAStmt() }
}

private Expr getRelatedExpr(Node n) {
n.asExpr() = result or
n.(PostUpdateNode).getPreUpdateNode().asExpr() = result
}

/** Gets the second-level scope containing the node `n`, if any. */
DataFlowSecondLevelScope getSecondLevelScope(Node n) { result.getANode() = n }

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ module ValueFlowConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node n) {
exists(MethodCall ma | ma.getMethod().hasName("sink") | n.asExpr() = ma.getAnArgument())
}

int fieldFlowBranchLimit() { result = 100 }
}

module ValueFlow = DataFlow::Global<ValueFlowConfig>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,8 @@ predicate knownSinkModel(Node sink, string model) {
sink = ModelOutput::getASinkNode(_, model).asSink()
}

class DataFlowSecondLevelScope = Unit;

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
Expand Down
2 changes: 2 additions & 0 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -2254,6 +2254,8 @@ predicate knownSinkModel(Node sink, string model) {
sink = ModelOutput::getASinkNode(_, model).asSink()
}

class DataFlowSecondLevelScope = Unit;

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: majorAnalysis
---
* The data flow library performs heuristic filtering of code paths that have a high degree of control-flow uncertainty for improved performance in cases that are deemed unlikely to yield true positive flow paths. This filtering can be controlled with the `fieldFlowBranchLimit` predicate in configurations. Two bugs have been fixed in relation to this: Some cases of high uncertainty were not being correctly identified. This fix improves performance in certain scenarios. Another group of cases of low uncertainty were also being misidentified, which led to false negatives. Taken together, we generally expect some additional query results with more true positives and fewer false positives.
18 changes: 18 additions & 0 deletions shared/dataflow/codeql/dataflow/DataFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,24 @@ signature module InputSig<LocationSig Location> {
*/
default int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() }

/**
* A second-level control-flow scope in a callable.
*
* This is used to provide a more fine-grained separation of a callable
* context for the purpose of identifying uncertain control flow. For most
* languages, this is not needed, as this separation is handled through
* virtual dispatch, but for some cases (for example, C++) this can be used to
* identify, for example, large top-level switch statements acting like
* virtual dispatch.
*/
class DataFlowSecondLevelScope {
/** Gets a textual representation of this element. */
string toString();
}

/** Gets the second-level scope containing the node `n`, if any. */
default DataFlowSecondLevelScope getSecondLevelScope(Node n) { none() }

bindingset[call, p, arg]
default predicate golangSpecificParamArgFilter(
DataFlowCall call, ParameterNode p, ArgumentNode arg
Expand Down
Loading

0 comments on commit b2f0994

Please sign in to comment.