From 82afbbc17b63df59a9dfa0579be8bbeff9515491 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Tue, 13 Feb 2024 14:09:22 +0100 Subject: [PATCH 1/8] Dataflow: Adjust fieldFlowBranchLimit count (block less) and adjust return edge condition (block more) --- .../codeql/dataflow/internal/DataFlowImpl.qll | 132 +++++++++++++----- 1 file changed, 99 insertions(+), 33 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index d445fa6237ee..d5af828babbf 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -722,7 +722,7 @@ module MakeImpl Lang> { * the enclosing callable in order to reach a sink. */ pragma[nomagic] - private predicate revFlow(NodeEx node, boolean toReturn) { + additional predicate revFlow(NodeEx node, boolean toReturn) { revFlow0(node, toReturn) and fwdFlow(node) } @@ -1113,6 +1113,43 @@ module MakeImpl Lang> { result = getAdditionalFlowIntoCallNodeTerm(arg.projectToNode(), p.projectToNode()) } + pragma[nomagic] + private predicate returnCallEdge1(DataFlowCallable c, DataFlowCall call, NodeEx out) { + exists(RetNodeEx ret | + flowOutOfCallNodeCand1(call, ret, _, out) and c = ret.getEnclosingCallable() + ) + } + + private int simpleDispatchFanoutOnReturn(DataFlowCall call, NodeEx out) { + result = strictcount(DataFlowCallable c | returnCallEdge1(c, call, out)) + } + + private int ctxDispatchFanoutOnReturn(NodeEx out, DataFlowCall ctx) { + exists(DataFlowCall call, DataFlowCallable c | + simpleDispatchFanoutOnReturn(call, out) > 1 and + not Stage1::revFlow(out, false) and + call.getEnclosingCallable() = c and + returnCallEdge1(c, ctx, _) and + mayBenefitFromCallContextExt(call, _) and + result = + count(DataFlowCallable tgt | + tgt = viableImplInCallContextExt(call, ctx) and + returnCallEdge1(tgt, call, out) + ) + ) + } + + private int ctxDispatchFanoutOnReturn(NodeEx out) { + result = max(DataFlowCall ctx | | ctxDispatchFanoutOnReturn(out, ctx)) + } + + private int dispatchFanoutOnReturn(NodeEx out) { + result = ctxDispatchFanoutOnReturn(out) + or + not exists(ctxDispatchFanoutOnReturn(out)) and + result = simpleDispatchFanoutOnReturn(_, out) + } + /** * Gets the amount of forward branching on the origin of a cross-call path * edge in the graph of paths between sources and sinks that ignores call @@ -1121,8 +1158,12 @@ module MakeImpl Lang> { pragma[nomagic] private int branch(NodeEx n1) { result = - strictcount(NodeEx n | - flowOutOfCallNodeCand1(_, n1, _, n) or flowIntoCallNodeCand1(_, n1, n) + strictcount(DataFlowCallable c | + exists(NodeEx n | + flowOutOfCallNodeCand1(_, n1, _, n) or flowIntoCallNodeCand1(_, n1, n) + | + c = n.getEnclosingCallable() + ) ) + sum(ParamNodeEx p1 | | getLanguageSpecificFlowIntoCallNodeCand1(n1, p1)) } @@ -1134,8 +1175,12 @@ module MakeImpl Lang> { pragma[nomagic] private int join(NodeEx n2) { result = - strictcount(NodeEx n | - flowOutOfCallNodeCand1(_, n, _, n2) or flowIntoCallNodeCand1(_, n, n2) + strictcount(DataFlowCallable c | + exists(NodeEx n | + flowOutOfCallNodeCand1(_, n, _, n2) or flowIntoCallNodeCand1(_, n, n2) + | + c = n.getEnclosingCallable() + ) ) + sum(ArgNodeEx arg2 | | getLanguageSpecificFlowIntoCallNodeCand1(arg2, n2)) } @@ -1151,17 +1196,25 @@ module MakeImpl Lang> { DataFlowCall call, RetNodeEx ret, ReturnKindExt kind, NodeEx out, boolean allowsFieldFlow ) { flowOutOfCallNodeCand1(call, ret, kind, out) and - exists(int b, int j | - b = branch(ret) and - j = join(out) and + exists(int j | + j = dispatchFanoutOnReturn(out) and + j > 0 and if - b.minimum(j) <= Config::fieldFlowBranchLimit() or + j <= Config::fieldFlowBranchLimit() or ignoreFieldFlowBranchLimit(ret.getEnclosingCallable()) then allowsFieldFlow = true else allowsFieldFlow = false ) } + pragma[nomagic] + private predicate allowsFieldFlowThrough(DataFlowCall call, DataFlowCallable c) { + exists(RetNodeEx ret | + flowOutOfCallNodeCand1(call, ret, _, _, true) and + c = ret.getEnclosingCallable() + ) + } + /** * Holds if data can flow into `call` and that this step is part of a * path from a source to a sink. The `allowsFieldFlow` flag indicates whether @@ -1412,14 +1465,16 @@ module MakeImpl Lang> { ) or // flow into a callable - fwdFlowIn(node, apa, state, cc, t, ap) and - if PrevStage::parameterMayFlowThrough(node, apa) - then ( - summaryCtx = TParamNodeSome(node.asNode()) and - argT = TypOption::some(t) and - argAp = apSome(ap) - ) else ( - summaryCtx = TParamNodeNone() and argT instanceof TypOption::None and argAp = apNone() + exists(boolean allowsFlowThrough | + fwdFlowIn(node, apa, state, cc, t, ap, allowsFlowThrough) and + if allowsFlowThrough = true + then ( + summaryCtx = TParamNodeSome(node.asNode()) and + argT = TypOption::some(t) and + argAp = apSome(ap) + ) else ( + summaryCtx = TParamNodeNone() and argT instanceof TypOption::None and argAp = apNone() + ) ) or // flow out of a callable @@ -1604,7 +1659,7 @@ module MakeImpl Lang> { private predicate fwdFlowInCand( DataFlowCall call, ArgNodeEx arg, FlowState state, Cc outercc, DataFlowCallable inner, ParamNodeEx p, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, - boolean emptyAp, ApApprox apa, boolean cc + boolean emptyAp, ApApprox apa, boolean cc, boolean allowsFlowThrough ) { exists(boolean allowsFieldFlow | fwdFlowIntoArg(arg, state, outercc, summaryCtx, argT, argAp, t, ap, emptyAp, apa, cc) and @@ -1614,7 +1669,10 @@ module MakeImpl Lang> { viableImplArgNotCallContextReduced(call, arg, outercc) ) and callEdgeArgParamRestrictedInlineLate(call, inner, arg, p, allowsFieldFlow, apa) and - if allowsFieldFlow = false then emptyAp = true else any() + (if allowsFieldFlow = false then emptyAp = true else any()) and + if allowsFieldFlowThrough(call, inner) + then allowsFlowThrough = true + else allowsFlowThrough = emptyAp ) } @@ -1622,20 +1680,21 @@ module MakeImpl Lang> { private predicate fwdFlowInCandTypeFlowDisabled( DataFlowCall call, ArgNodeEx arg, FlowState state, Cc outercc, DataFlowCallable inner, ParamNodeEx p, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, - ApApprox apa, boolean cc + ApApprox apa, boolean cc, boolean allowsFlowThrough ) { not enableTypeFlow() and fwdFlowInCand(call, arg, state, outercc, inner, p, summaryCtx, argT, argAp, t, ap, _, - apa, cc) + apa, cc, allowsFlowThrough) } pragma[nomagic] private predicate fwdFlowInCandTypeFlowEnabled( DataFlowCall call, ArgNodeEx arg, Cc outercc, DataFlowCallable inner, ParamNodeEx p, - boolean emptyAp, ApApprox apa, boolean cc + boolean emptyAp, ApApprox apa, boolean cc, boolean allowsFlowThrough ) { enableTypeFlow() and - fwdFlowInCand(call, arg, _, outercc, inner, p, _, _, _, _, _, emptyAp, apa, cc) + fwdFlowInCand(call, arg, _, outercc, inner, p, _, _, _, _, _, emptyAp, apa, cc, + allowsFlowThrough) } pragma[nomagic] @@ -1650,9 +1709,10 @@ module MakeImpl Lang> { pragma[nomagic] private predicate fwdFlowInValidEdgeTypeFlowEnabled( DataFlowCall call, ArgNodeEx arg, Cc outercc, DataFlowCallable inner, ParamNodeEx p, - CcCall innercc, boolean emptyAp, ApApprox apa, boolean cc + CcCall innercc, boolean emptyAp, ApApprox apa, boolean cc, boolean allowsFlowThrough ) { - fwdFlowInCandTypeFlowEnabled(call, arg, outercc, inner, p, emptyAp, apa, cc) and + fwdFlowInCandTypeFlowEnabled(call, arg, outercc, inner, p, emptyAp, apa, cc, + allowsFlowThrough) and FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and innercc = getCallContextCall(call, inner) } @@ -1661,19 +1721,19 @@ module MakeImpl Lang> { predicate fwdFlowIn( DataFlowCall call, DataFlowCallable inner, ParamNodeEx p, FlowState state, Cc outercc, CcCall innercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, - Ap ap, ApApprox apa, boolean cc + Ap ap, ApApprox apa, boolean cc, boolean allowsFlowThrough ) { exists(ArgNodeEx arg | // type flow disabled: linear recursion fwdFlowInCandTypeFlowDisabled(call, arg, state, outercc, inner, p, summaryCtx, argT, - argAp, t, ap, apa, cc) and + argAp, t, ap, apa, cc, allowsFlowThrough) and fwdFlowInValidEdgeTypeFlowDisabled(call, inner, innercc, pragma[only_bind_into](cc)) or // type flow enabled: non-linear recursion exists(boolean emptyAp | fwdFlowIntoArg(arg, state, outercc, summaryCtx, argT, argAp, t, ap, emptyAp, apa, cc) and fwdFlowInValidEdgeTypeFlowEnabled(call, arg, outercc, inner, p, innercc, emptyAp, - apa, cc) + apa, cc, allowsFlowThrough) ) ) } @@ -1683,10 +1743,16 @@ module MakeImpl Lang> { pragma[nomagic] private predicate fwdFlowIn( - ParamNodeEx p, ApApprox apa, FlowState state, CcCall innercc, Typ t, Ap ap + ParamNodeEx p, ApApprox apa, FlowState state, CcCall innercc, Typ t, Ap ap, + boolean allowsFlowThrough ) { - FwdFlowIn::fwdFlowIn(_, _, p, state, _, innercc, _, _, _, t, ap, - apa, _) + exists(boolean allowsFlowThrough0 | + FwdFlowIn::fwdFlowIn(_, _, p, state, _, innercc, _, _, _, t, ap, + apa, _, allowsFlowThrough0) and + if PrevStage::parameterMayFlowThrough(p, apa) + then allowsFlowThrough = allowsFlowThrough0 + else allowsFlowThrough = false + ) } pragma[nomagic] @@ -1784,7 +1850,7 @@ module MakeImpl Lang> { Typ t, Ap ap, boolean cc ) { FwdFlowIn::fwdFlowIn(call, c, p, state, _, innercc, _, _, _, t, - ap, _, cc) + ap, _, cc, _) } pragma[nomagic] @@ -1903,7 +1969,7 @@ module MakeImpl Lang> { ApOption argAp, ParamNodeEx p, Typ t, Ap ap ) { FwdFlowIn::fwdFlowIn(call, _, p, _, cc, innerCc, summaryCtx, - argT, argAp, t, ap, _, _) + argT, argAp, t, ap, _, _, true) } pragma[nomagic] From f945687a93ab43d4166e2000f8bfa150b5f932f7 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 26 Feb 2024 11:03:31 +0100 Subject: [PATCH 2/8] Dataflow: Simplify branch and join. --- .../dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index d5af828babbf..0e4c1bb7823a 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1156,12 +1156,11 @@ module MakeImpl Lang> { * contexts. */ pragma[nomagic] - private int branch(NodeEx n1) { + private int branch(ArgNodeEx n1) { result = strictcount(DataFlowCallable c | exists(NodeEx n | - flowOutOfCallNodeCand1(_, n1, _, n) or flowIntoCallNodeCand1(_, n1, n) - | + flowIntoCallNodeCand1(_, n1, n) and c = n.getEnclosingCallable() ) ) + sum(ParamNodeEx p1 | | getLanguageSpecificFlowIntoCallNodeCand1(n1, p1)) @@ -1173,12 +1172,11 @@ module MakeImpl Lang> { * contexts. */ pragma[nomagic] - private int join(NodeEx n2) { + private int join(ParamNodeEx n2) { result = strictcount(DataFlowCallable c | exists(NodeEx n | - flowOutOfCallNodeCand1(_, n, _, n2) or flowIntoCallNodeCand1(_, n, n2) - | + flowIntoCallNodeCand1(_, n, n2) and c = n.getEnclosingCallable() ) ) + sum(ArgNodeEx arg2 | | getLanguageSpecificFlowIntoCallNodeCand1(arg2, n2)) From b87b8329a06a9e08a2877a085f0104a14f7a798c Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Wed, 21 Feb 2024 12:09:30 +0100 Subject: [PATCH 3/8] Dataflow: Use default fieldFlowBranchLimit in qltests. --- .../test/library-tests/dataflow/collections/CollectionFlow.ql | 2 -- csharp/ql/test/library-tests/dataflow/types/Types.ql | 2 -- java/ql/test/library-tests/frameworks/guava/handwritten/flow.ql | 2 -- shared/dataflow/codeql/dataflow/test/InlineFlowTest.qll | 2 -- swift/ql/test/TestUtilities/InlineFlowTest.qll | 2 -- 5 files changed, 10 deletions(-) diff --git a/csharp/ql/test/library-tests/dataflow/collections/CollectionFlow.ql b/csharp/ql/test/library-tests/dataflow/collections/CollectionFlow.ql index abeb61cf3fc4..cb80bc98938c 100644 --- a/csharp/ql/test/library-tests/dataflow/collections/CollectionFlow.ql +++ b/csharp/ql/test/library-tests/dataflow/collections/CollectionFlow.ql @@ -14,8 +14,6 @@ module ArrayFlowConfig implements DataFlow::ConfigSig { mc.getAnArgument() = sink.asExpr() ) } - - int fieldFlowBranchLimit() { result = 100 } } module ArrayFlow = DataFlow::Global; diff --git a/csharp/ql/test/library-tests/dataflow/types/Types.ql b/csharp/ql/test/library-tests/dataflow/types/Types.ql index 1a8728df5a33..be631788642d 100644 --- a/csharp/ql/test/library-tests/dataflow/types/Types.ql +++ b/csharp/ql/test/library-tests/dataflow/types/Types.ql @@ -18,8 +18,6 @@ module TypesConfig implements DataFlow::ConfigSig { mc.getAnArgument() = sink.asExpr() ) } - - int fieldFlowBranchLimit() { result = 1000 } } import ValueFlowTest diff --git a/java/ql/test/library-tests/frameworks/guava/handwritten/flow.ql b/java/ql/test/library-tests/frameworks/guava/handwritten/flow.ql index 3e008f4856d4..bb4592b0dba0 100644 --- a/java/ql/test/library-tests/frameworks/guava/handwritten/flow.ql +++ b/java/ql/test/library-tests/frameworks/guava/handwritten/flow.ql @@ -18,8 +18,6 @@ module ValueFlowConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node n) { exists(MethodCall ma | ma.getMethod().hasName("sink") | n.asExpr() = ma.getAnArgument()) } - - int fieldFlowBranchLimit() { result = 100 } } module ValueFlow = DataFlow::Global; diff --git a/shared/dataflow/codeql/dataflow/test/InlineFlowTest.qll b/shared/dataflow/codeql/dataflow/test/InlineFlowTest.qll index e35d1332bca9..bced15203f18 100644 --- a/shared/dataflow/codeql/dataflow/test/InlineFlowTest.qll +++ b/shared/dataflow/codeql/dataflow/test/InlineFlowTest.qll @@ -55,8 +55,6 @@ module InlineFlowTestMake< predicate isSource(DataFlowLang::Node source) { Impl::defaultSource(source) } predicate isSink(DataFlowLang::Node sink) { Impl::defaultSink(sink) } - - int fieldFlowBranchLimit() { result = 1000 } } private module NoFlowConfig implements DataFlow::ConfigSig { diff --git a/swift/ql/test/TestUtilities/InlineFlowTest.qll b/swift/ql/test/TestUtilities/InlineFlowTest.qll index cbf74d76d56b..214d28cac042 100644 --- a/swift/ql/test/TestUtilities/InlineFlowTest.qll +++ b/swift/ql/test/TestUtilities/InlineFlowTest.qll @@ -61,8 +61,6 @@ module DefaultFlowConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { defaultSource(source) } predicate isSink(DataFlow::Node sink) { defaultSink(sink) } - - int fieldFlowBranchLimit() { result = 1000 } } module NoFlowConfig implements DataFlow::ConfigSig { From 9e39be5aeaa1f8a0f4972e8c9c437ef722698828 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 26 Feb 2024 14:36:21 +0100 Subject: [PATCH 4/8] C++: Update qltest. --- cpp/ql/test/library-tests/dataflow/taint-tests/swap2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/swap2.cpp b/cpp/ql/test/library-tests/dataflow/taint-tests/swap2.cpp index efb309a45792..509d96535a3d 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/swap2.cpp +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/swap2.cpp @@ -84,7 +84,7 @@ void test_copy_assignment_operator() swap(z1, z2); - sink(z2.data1); // $ ir MISSING: ast + sink(z2.data1); // $ ir ast sink(z1.data1); // $ SPURIOUS: ir ast=81:27 ast=82:16 } From db6d27bd2be27a58718065979532c05e8f0d522c Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 4 Mar 2024 11:41:33 +0100 Subject: [PATCH 5/8] C++: Count return dispatch based on 2nd level scopes. --- .../internal/DataFlowImplSpecific.qll | 2 + .../ir/dataflow/internal/DataFlowPrivate.qll | 71 +++++++++++++++++++ shared/dataflow/codeql/dataflow/DataFlow.qll | 18 +++++ .../codeql/dataflow/internal/DataFlowImpl.qll | 30 ++++++-- 4 files changed, 115 insertions(+), 6 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll index aeb136c761e2..755a20ebe53a 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll @@ -22,6 +22,8 @@ module CppDataFlow implements InputSig { predicate getAdditionalFlowIntoCallNodeTerm = Private::getAdditionalFlowIntoCallNodeTerm/2; + predicate getSecondLevelScope = Private::getSecondLevelScope/1; + predicate validParameterAliasStep = Private::validParameterAliasStep/2; predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1; diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 3c853c431f2a..005667d38851 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -1263,3 +1263,74 @@ predicate validParameterAliasStep(Node node1, Node node2) { ) ) } + +private predicate isTopLevel(Cpp::Stmt s) { any(Function f).getBlock().getAStmt() = s } + +private Cpp::Stmt getAChainedBranch(Cpp::IfStmt s) { + result = s.getThen() + or + exists(Cpp::Stmt elseBranch | s.getElse() = elseBranch | + result = getAChainedBranch(elseBranch) + or + result = elseBranch and not elseBranch instanceof Cpp::IfStmt + ) +} + +private Instruction getInstruction(Node n) { + result = n.asInstruction() or + result = n.asOperand().getUse() or + result = n.(SsaPhiNode).getPhiNode().getBasicBlock().getFirstInstruction() or + n.(IndirectInstruction).hasInstructionAndIndirectionIndex(result, _) or + result = getInstruction(n.(PostUpdateNode).getPreUpdateNode()) +} + +private newtype TDataFlowSecondLevelScope = + TTopLevelIfBranch(Cpp::Stmt s) { + exists(Cpp::IfStmt ifstmt | s = getAChainedBranch(ifstmt) and isTopLevel(ifstmt)) + } or + TTopLevelSwitchCase(Cpp::SwitchCase s) { + exists(Cpp::SwitchStmt switchstmt | s = switchstmt.getASwitchCase() and isTopLevel(switchstmt)) + } + +/** + * A second-level control-flow scope in a `switch` or a chained `if` statement. + * + * This is a `switch` case or a branch of a chained `if` statement, given that + * the `switch` or `if` statement is top level, that is, it is not nested inside + * other CFG constructs. + */ +class DataFlowSecondLevelScope extends TDataFlowSecondLevelScope { + /** Gets a textual representation of this element. */ + string toString() { + exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.toString()) + or + exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.toString()) + } + + /** Gets the primary location of this element. */ + Cpp::Location getLocation() { + exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.getLocation()) + or + exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getLocation()) + } + + /** + * Gets a statement directly contained in this scope. For an `if` branch, this + * is the branch itself, and for a `switch case`, this is one the statements + * of that case branch. + */ + private Cpp::Stmt getAStmt() { + exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s) + or + exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getAStmt()) + } + + /** Gets a data-flow node nested within this scope. */ + Node getANode() { + getInstruction(result).getAst().(Cpp::ControlFlowNode).getEnclosingStmt().getParentStmt*() = + this.getAStmt() + } +} + +/** Gets the second-level scope containing the node `n`, if any. */ +DataFlowSecondLevelScope getSecondLevelScope(Node n) { result.getANode() = n } diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 4943fc9790dd..1ff18d5803fa 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -308,6 +308,24 @@ signature module InputSig { */ default int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() } + /** + * A second-level control-flow scope in a callable. + * + * This is used to provide a more fine-grained separation of a callable + * context for the purpose of identifying uncertain control flow. For most + * languages, this is not needed, as this separation is handled through + * virtual dispatch, but for some cases (for example, C++) this can be used to + * identify, for example, large top-level switch statements acting like + * virtual dispatch. + */ + class DataFlowSecondLevelScope { + /** Gets a textual representation of this element. */ + string toString(); + } + + /** Gets the second-level scope containing the node `n`, if any. */ + default DataFlowSecondLevelScope getSecondLevelScope(Node n) { none() } + bindingset[call, p, arg] default predicate golangSpecificParamArgFilter( DataFlowCall call, ParameterNode p, ArgumentNode arg diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 0e4c1bb7823a..7948274b6dd2 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1113,15 +1113,33 @@ module MakeImpl Lang> { result = getAdditionalFlowIntoCallNodeTerm(arg.projectToNode(), p.projectToNode()) } + private module SndLevelScopeOption = Option; + + private class SndLevelScopeOption = SndLevelScopeOption::Option; + + pragma[nomagic] + private SndLevelScopeOption getScope(RetNodeEx ret) { + result = SndLevelScopeOption::some(getSecondLevelScope(ret.asNode())) + or + result instanceof SndLevelScopeOption::None and not exists(getSecondLevelScope(ret.asNode())) + } + pragma[nomagic] - private predicate returnCallEdge1(DataFlowCallable c, DataFlowCall call, NodeEx out) { + private predicate returnCallEdge1( + DataFlowCallable c, SndLevelScopeOption scope, DataFlowCall call, NodeEx out + ) { exists(RetNodeEx ret | - flowOutOfCallNodeCand1(call, ret, _, out) and c = ret.getEnclosingCallable() + flowOutOfCallNodeCand1(call, ret, _, out) and + c = ret.getEnclosingCallable() and + scope = getScope(ret) ) } private int simpleDispatchFanoutOnReturn(DataFlowCall call, NodeEx out) { - result = strictcount(DataFlowCallable c | returnCallEdge1(c, call, out)) + result = + strictcount(DataFlowCallable c, SndLevelScopeOption scope | + returnCallEdge1(c, scope, call, out) + ) } private int ctxDispatchFanoutOnReturn(NodeEx out, DataFlowCall ctx) { @@ -1129,12 +1147,12 @@ module MakeImpl Lang> { simpleDispatchFanoutOnReturn(call, out) > 1 and not Stage1::revFlow(out, false) and call.getEnclosingCallable() = c and - returnCallEdge1(c, ctx, _) and + returnCallEdge1(c, _, ctx, _) and mayBenefitFromCallContextExt(call, _) and result = - count(DataFlowCallable tgt | + count(DataFlowCallable tgt, SndLevelScopeOption scope | tgt = viableImplInCallContextExt(call, ctx) and - returnCallEdge1(tgt, call, out) + returnCallEdge1(tgt, scope, call, out) ) ) } From 2f0987e980f1b64e5725b5ce76327f2b9af6a383 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Tue, 5 Mar 2024 14:08:57 +0100 Subject: [PATCH 6/8] Dataflow: Add dummy DataFlowSecondLevelScope implementations. These could be an empty type, but Unit was available and it probably doesn't matter. --- .../lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll | 2 ++ .../semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll | 2 ++ go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll | 2 ++ .../lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll | 2 ++ .../lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 2 ++ ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll | 2 ++ swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll | 2 ++ 7 files changed, 14 insertions(+) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll index 1af2a9028dac..da95ac075e12 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll @@ -290,6 +290,8 @@ predicate knownSourceModel(Node source, string model) { none() } predicate knownSinkModel(Node sink, string model) { none() } +class DataFlowSecondLevelScope = Unit; + /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a * side-effect, resulting in a summary from `p` to itself. diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll index 0e3821103ef5..8c25ac5b186a 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll @@ -2882,6 +2882,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) } +class DataFlowSecondLevelScope = Unit; + /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a * side-effect, resulting in a summary from `p` to itself. diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll index ebfc08a797bf..d9bfa96abd8c 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -415,6 +415,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) } +class DataFlowSecondLevelScope = Unit; + /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a * side-effect, resulting in a summary from `p` to itself. diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll index 7b151c8b13e6..91737dc036e0 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll @@ -581,6 +581,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) } +class DataFlowSecondLevelScope = Unit; + /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a * side-effect, resulting in a summary from `p` to itself. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 9d7b1d5aa84b..307fb74fe194 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -1087,6 +1087,8 @@ predicate knownSinkModel(Node sink, string model) { sink = ModelOutput::getASinkNode(_, model).asSink() } +class DataFlowSecondLevelScope = Unit; + /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a * side-effect, resulting in a summary from `p` to itself. diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll index 2760d3ee8fef..b50d04ed9f4c 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll @@ -2254,6 +2254,8 @@ predicate knownSinkModel(Node sink, string model) { sink = ModelOutput::getASinkNode(_, model).asSink() } +class DataFlowSecondLevelScope = Unit; + /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a * side-effect, resulting in a summary from `p` to itself. diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll index acdd9bc7185c..730dfe370730 100644 --- a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll +++ b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll @@ -1404,6 +1404,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) } +class DataFlowSecondLevelScope = Unit; + /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a * side-effect, resulting in a summary from `p` to itself. From 3c69f8f607f4ecfdefbd4ce5aa8cbe518d3676e7 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Wed, 10 Apr 2024 14:57:51 +0200 Subject: [PATCH 7/8] Java: Count second level scopes for fieldFlowBranchLimit. --- .../internal/DataFlowImplSpecific.qll | 2 + .../dataflow/internal/DataFlowPrivate.qll | 75 ++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll index 84cdf19ed518..5693e2deaef3 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll @@ -20,6 +20,8 @@ module JavaDataFlow implements InputSig { Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) } + predicate getSecondLevelScope = Private::getSecondLevelScope/1; + predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1; predicate viableImplInCallContext = Private::viableImplInCallContext/2; diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll index 91737dc036e0..e6f223c195cb 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll @@ -581,7 +581,80 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) } -class DataFlowSecondLevelScope = Unit; +private predicate isTopLevel(Stmt s) { + any(Callable c).getBody() = s + or + exists(BlockStmt b | s = b.getAStmt() and isTopLevel(b)) +} + +private Stmt getAChainedBranch(IfStmt s) { + result = s.getThen() + or + exists(Stmt elseBranch | s.getElse() = elseBranch | + result = getAChainedBranch(elseBranch) + or + result = elseBranch and not elseBranch instanceof IfStmt + ) +} + +private newtype TDataFlowSecondLevelScope = + TTopLevelIfBranch(Stmt s) { + exists(IfStmt ifstmt | s = getAChainedBranch(ifstmt) and isTopLevel(ifstmt)) + } or + TTopLevelSwitchCase(SwitchCase s) { + exists(SwitchStmt switchstmt | s = switchstmt.getACase() and isTopLevel(switchstmt)) + } + +private SwitchCase getPrecedingCase(Stmt s) { + result = s + or + exists(SwitchStmt switch, int i | + s = switch.getStmt(i) and + not s instanceof SwitchCase and + result = getPrecedingCase(switch.getStmt(i - 1)) + ) +} + +/** + * A second-level control-flow scope in a `switch` or a chained `if` statement. + * + * This is a `switch` case or a branch of a chained `if` statement, given that + * the `switch` or `if` statement is top level, that is, it is not nested inside + * other CFG constructs. + */ +class DataFlowSecondLevelScope extends TDataFlowSecondLevelScope { + /** Gets a textual representation of this element. */ + string toString() { + exists(Stmt s | this = TTopLevelIfBranch(s) | result = s.toString()) + or + exists(SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.toString()) + } + + /** + * Gets a statement directly contained in this scope. For an `if` branch, this + * is the branch itself, and for a `switch case`, this is one the statements + * of that case branch. + */ + private Stmt getAStmt() { + exists(Stmt s | this = TTopLevelIfBranch(s) | result = s) + or + exists(SwitchCase s | this = TTopLevelSwitchCase(s) | + result = s.getRuleStatement() or + s = getPrecedingCase(result) + ) + } + + /** Gets a data-flow node nested within this scope. */ + Node getANode() { getRelatedExpr(result).getAnEnclosingStmt() = this.getAStmt() } +} + +private Expr getRelatedExpr(Node n) { + n.asExpr() = result or + n.(PostUpdateNode).getPreUpdateNode().asExpr() = result +} + +/** Gets the second-level scope containing the node `n`, if any. */ +DataFlowSecondLevelScope getSecondLevelScope(Node n) { result.getANode() = n } /** * Holds if flow is allowed to pass from parameter `p` and back to itself as a From 595014966a3012917bfd7070cebd2e2ab73ee33f Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Fri, 19 Apr 2024 08:46:04 +0200 Subject: [PATCH 8/8] Dataflow: Add change note. --- .../dataflow/change-notes/2024-04-19-fieldflowbranchlimit.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 shared/dataflow/change-notes/2024-04-19-fieldflowbranchlimit.md diff --git a/shared/dataflow/change-notes/2024-04-19-fieldflowbranchlimit.md b/shared/dataflow/change-notes/2024-04-19-fieldflowbranchlimit.md new file mode 100644 index 000000000000..887d17f28664 --- /dev/null +++ b/shared/dataflow/change-notes/2024-04-19-fieldflowbranchlimit.md @@ -0,0 +1,4 @@ +--- +category: majorAnalysis +--- +* The data flow library performs heuristic filtering of code paths that have a high degree of control-flow uncertainty for improved performance in cases that are deemed unlikely to yield true positive flow paths. This filtering can be controlled with the `fieldFlowBranchLimit` predicate in configurations. Two bugs have been fixed in relation to this: Some cases of high uncertainty were not being correctly identified. This fix improves performance in certain scenarios. Another group of cases of low uncertainty were also being misidentified, which led to false negatives. Taken together, we generally expect some additional query results with more true positives and fewer false positives.