From f4002280371c9493531e82b8016c46d6235d8505 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 5 Mar 2024 12:55:55 -0800 Subject: [PATCH] C++: Remove the pruning stage from SSA. --- .../cpp/ir/dataflow/internal/SsaInternals.qll | 50 +-- .../dataflow/internal/ssa0/SsaInternals.qll | 347 ------------------ 2 files changed, 13 insertions(+), 384 deletions(-) delete mode 100644 cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll index 69ace9f890e4..5f254ee12b76 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -10,13 +10,12 @@ private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as FIO private import semmle.code.cpp.ir.internal.IRCppLanguage private import semmle.code.cpp.ir.dataflow.internal.ModelUtil private import DataFlowPrivate -private import ssa0.SsaInternals as SsaInternals0 import SsaInternalsCommon private module SourceVariables { cached private newtype TSourceVariable = - TMkSourceVariable(SsaInternals0::SourceVariable base, int ind) { + TMkSourceVariable(BaseSourceVariable base, int ind) { ind = [0 .. countIndirectionsForCppType(base.getLanguageType()) + 1] } @@ -30,7 +29,7 @@ private module SourceVariables { } class SourceVariable extends TSourceVariable { - SsaInternals0::SourceVariable base; + BaseSourceVariable base; int ind; SourceVariable() { this = TMkSourceVariable(base, ind) } @@ -42,7 +41,7 @@ private module SourceVariables { * Gets the base source variable (i.e., the variable without any * indirections) of this source variable. */ - SsaInternals0::SourceVariable getBaseVariable() { result = base } + BaseSourceVariable getBaseVariable() { result = base } /** Gets a textual representation of this element. */ string toString() { result = repeatStars(this.getIndirection()) + base.toString() } @@ -105,16 +104,7 @@ predicate hasRawIndirectInstruction(Instruction instr, int indirectionIndex) { cached private newtype TDefOrUseImpl = TDefImpl(BaseSourceVariableInstruction base, Operand address, int indirectionIndex) { - isDef(_, _, address, base, _, indirectionIndex) and - ( - // We only include the definition if the SSA pruning stage - // concluded that the definition is live after the write. - any(SsaInternals0::Def def).getAddressOperand() = address - or - // Since the pruning stage doesn't know about global variables we can't use the above check to - // rule out dead assignments to globals. - base.(VariableAddressInstruction).getAstVariable() instanceof GlobalLikeVariable - ) + isDef(_, _, address, base, _, indirectionIndex) } or TUseImpl(BaseSourceVariableInstruction base, Operand operand, int indirectionIndex) { isUse(_, operand, base, _, indirectionIndex) and @@ -133,8 +123,7 @@ private newtype TDefOrUseImpl = TIteratorDef( Operand iteratorDerefAddress, BaseSourceVariableInstruction container, int indirectionIndex ) { - isIteratorDef(container, iteratorDerefAddress, _, _, indirectionIndex) and - any(SsaInternals0::Def def | def.isIteratorDef()).getAddressOperand() = iteratorDerefAddress + isIteratorDef(container, iteratorDerefAddress, _, _, indirectionIndex) } or TIteratorUse( Operand iteratorAddress, BaseSourceVariableInstruction container, int indirectionIndex @@ -984,17 +973,6 @@ predicate fromPhiNode(SsaPhiNode nodeFrom, Node nodeTo) { ) } -/** - * Holds if there is a write at index `i` in basic block `bb` to variable `v` that's - * subsequently read (as determined by the SSA pruning stage). - */ -private predicate variableWriteCand(IRBlock bb, int i, SourceVariable v) { - exists(SsaInternals0::Def def, SsaInternals0::SourceVariable v0 | - def.asDefOrUse().hasIndexInBlock(bb, i, v0) and - v0 = v.getBaseVariable() - ) -} - private predicate sourceVariableIsGlobal( SourceVariable sv, GlobalLikeVariable global, IRFunction func, int indirectionIndex ) { @@ -1018,16 +996,14 @@ private module SsaInput implements SsaImplCommon::InputSig { predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) { DataFlowImplCommon::forceCachingInSameStage() and ( - variableWriteCand(bb, i, v) or - sourceVariableIsGlobal(v, _, _, _) - ) and - exists(DefImpl def | def.hasIndexInBlock(bb, i, v) | - if def.isCertain() then certain = true else certain = false - ) - or - exists(GlobalDefImpl global | - global.hasIndexInBlock(bb, i, v) and - certain = true + exists(DefImpl def | def.hasIndexInBlock(bb, i, v) | + if def.isCertain() then certain = true else certain = false + ) + or + exists(GlobalDefImpl global | + global.hasIndexInBlock(bb, i, v) and + certain = true + ) ) } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll deleted file mode 100644 index ce53005470d7..000000000000 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll +++ /dev/null @@ -1,347 +0,0 @@ -/** - * This module defines an initial SSA pruning stage that doesn't take - * indirections into account. - */ - -private import codeql.ssa.Ssa as SsaImplCommon -private import semmle.code.cpp.ir.IR -private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon -private import semmle.code.cpp.models.interfaces.Allocation as Alloc -private import semmle.code.cpp.models.interfaces.DataFlow as DataFlow -private import semmle.code.cpp.ir.implementation.raw.internal.SideEffects as SideEffects -private import semmle.code.cpp.ir.internal.IRCppLanguage -private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate -private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil -private import semmle.code.cpp.ir.dataflow.internal.SsaInternalsCommon - -private module SourceVariables { - class SourceVariable extends BaseSourceVariable { - /** - * Gets the base source variable of this `SourceVariable`. - */ - BaseSourceVariable getBaseVariable() { result = this } - } -} - -import SourceVariables - -private newtype TDefOrUseImpl = - TDefImpl(Operand address) { isDef(_, _, address, _, _, _) } or - TUseImpl(Operand operand) { - isUse(_, operand, _, _, _) and - not isDef(true, _, operand, _, _, _) - } or - TIteratorDef(BaseSourceVariableInstruction container, Operand iteratorAddress) { - isIteratorDef(container, iteratorAddress, _, _, _) - } or - TIteratorUse(BaseSourceVariableInstruction container, Operand iteratorAddress) { - isIteratorUse(container, iteratorAddress, _, _) - } or - TFinalParameterUse(Parameter p) { - any(Indirection indirection).getType() = p.getUnspecifiedType() - } - -abstract private class DefOrUseImpl extends TDefOrUseImpl { - /** Gets a textual representation of this element. */ - abstract string toString(); - - /** Gets the block of this definition or use. */ - final IRBlock getBlock() { this.hasIndexInBlock(result, _) } - - /** Holds if this definition or use has index `index` in block `block`. */ - abstract predicate hasIndexInBlock(IRBlock block, int index); - - final predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) { - this.hasIndexInBlock(block, index) and - sv = this.getSourceVariable() - } - - /** Gets the location of this element. */ - abstract Cpp::Location getLocation(); - - abstract BaseSourceVariableInstruction getBase(); - - final BaseSourceVariable getBaseSourceVariable() { - result = this.getBase().getBaseSourceVariable() - } - - /** Gets the variable that is defined or used. */ - final SourceVariable getSourceVariable() { - result.getBaseVariable() = this.getBaseSourceVariable() - } - - abstract predicate isCertain(); -} - -abstract class DefImpl extends DefOrUseImpl { - Operand address; - - Operand getAddressOperand() { result = address } - - abstract Node0Impl getValue(); - - override string toString() { result = address.toString() } - - override Cpp::Location getLocation() { result = this.getAddressOperand().getLocation() } - - final override predicate hasIndexInBlock(IRBlock block, int index) { - this.getAddressOperand().getUse() = block.getInstruction(index) - } -} - -private class DirectDef extends DefImpl, TDefImpl { - DirectDef() { this = TDefImpl(address) } - - override BaseSourceVariableInstruction getBase() { isDef(_, _, address, result, _, _) } - - override Node0Impl getValue() { isDef(_, result, address, _, _, _) } - - override predicate isCertain() { isDef(true, _, address, _, _, _) } -} - -private class IteratorDef extends DefImpl, TIteratorDef { - BaseSourceVariableInstruction container; - - IteratorDef() { this = TIteratorDef(container, address) } - - override BaseSourceVariableInstruction getBase() { result = container } - - override Node0Impl getValue() { isIteratorDef(_, address, result, _, _) } - - override predicate isCertain() { none() } -} - -abstract class UseImpl extends DefOrUseImpl { } - -abstract private class OperandBasedUse extends UseImpl { - Operand operand; - - override string toString() { result = operand.toString() } - - final override predicate hasIndexInBlock(IRBlock block, int index) { - // Ideally, this would just be implemented as: - // ``` - // operand.getUse() = block.getInstruction(index) - // ``` - // but because the IR generated for a snippet such as - // ``` - // int x = *p++; - // ``` - // looks like - // ``` - // r1(glval) = VariableAddress[x] : - // r2(glval) = VariableAddress[p] : - // r3(int *) = Load[p] : &:r2, m1 - // r4(int) = Constant[1] : - // r5(int *) = PointerAdd[4] : r3, r4 - // m3(int *) = Store[p] : &:r2, r5 - // r6(int *) = CopyValue : r3 - // r7(int) = Load[?] : &:r6, ~m2 - // m2(int) = Store[x] : &:r1, r7 - // ``` - // we need to ensure that the `r3` operand of the `CopyValue` instruction isn't seen as a fresh use - // of `p` that happens after the increment. So if the base instruction of this use comes from a - // post-fix crement operation we set the index of the SSA use that wraps the `r3` operand at the - // `CopyValue` instruction to be the same index as the `r3` operand at the `PointerAdd` instruction. - // This ensures that the SSA library doesn't create flow from the `PointerAdd` to `r6`. - exists(BaseSourceVariableInstruction base | base = this.getBase() | - if base.getAst() = any(Cpp::PostfixCrementOperation c).getOperand() - then - exists(Operand op | - op = - min(Operand cand, int i | - isUse(_, cand, base, _, _) and - block.getInstruction(i) = cand.getUse() - | - cand order by i - ) and - block.getInstruction(index) = op.getUse() - ) - else operand.getUse() = block.getInstruction(index) - ) - } - - final override Cpp::Location getLocation() { result = operand.getLocation() } -} - -private class DirectUse extends OperandBasedUse, TUseImpl { - DirectUse() { this = TUseImpl(operand) } - - override BaseSourceVariableInstruction getBase() { isUse(_, operand, result, _, _) } - - override predicate isCertain() { isUse(true, operand, _, _, _) } -} - -private class IteratorUse extends OperandBasedUse, TIteratorUse { - BaseSourceVariableInstruction container; - - IteratorUse() { this = TIteratorUse(container, operand) } - - override BaseSourceVariableInstruction getBase() { result = container } - - override predicate isCertain() { none() } -} - -private class FinalParameterUse extends UseImpl, TFinalParameterUse { - Parameter p; - - FinalParameterUse() { this = TFinalParameterUse(p) } - - override string toString() { result = p.toString() } - - final override predicate hasIndexInBlock(IRBlock block, int index) { - // Ideally, this should always be a `ReturnInstruction`, but if - // someone forgets to write a `return` statement in a function - // with a non-void return type we generate an `UnreachedInstruction`. - // In this case we still want to generate flow out of such functions - // if they write to a parameter. So we pick the index of the - // `UnreachedInstruction` as the index of this use. - // Note that a function may have both a `ReturnInstruction` and an - // `UnreachedInstruction`. If that's the case this predicate will - // return multiple results. I don't think this is detrimental to - // performance, however. - exists(Instruction return | - return instanceof ReturnInstruction or - return instanceof UnreachedInstruction - | - block.getInstruction(index) = return and - return.getEnclosingFunction() = p.getFunction() - ) - } - - final override Cpp::Location getLocation() { - // Parameters can have multiple locations. When there's a unique location we use - // that one, but if multiple locations exist we default to an unknown location. - result = unique( | | p.getLocation()) - or - not exists(unique( | | p.getLocation())) and - result instanceof UnknownDefaultLocation - } - - override BaseSourceVariableInstruction getBase() { - exists(InitializeParameterInstruction init | - init.getParameter() = p and - // This is always a `VariableAddressInstruction` - result = init.getAnOperand().getDef() - ) - } - - override predicate isCertain() { any() } -} - -private module SsaInput implements SsaImplCommon::InputSig { - import InputSigCommon - import SourceVariables - - /** - * Holds if the `i`'th write in block `bb` writes to the variable `v`. - * `certain` is `true` if the write is guaranteed to overwrite the entire variable. - */ - predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) { - DataFlowImplCommon::forceCachingInSameStage() and - exists(DefImpl def | def.hasIndexInBlock(bb, i, v) | - if def.isCertain() then certain = true else certain = false - ) - } - - /** - * Holds if the `i`'th read in block `bb` reads to the variable `v`. - * `certain` is `true` if the read is guaranteed. - */ - predicate variableRead(IRBlock bb, int i, SourceVariable v, boolean certain) { - exists(UseImpl use | use.hasIndexInBlock(bb, i, v) | - if use.isCertain() then certain = true else certain = false - ) - } -} - -private newtype TSsaDefOrUse = - TDefOrUse(DefOrUseImpl defOrUse) { - defOrUse instanceof UseImpl - or - // If `defOrUse` is a definition we only include it if the - // SSA library concludes that it's live after the write. - exists(DefinitionExt def, SourceVariable sv, IRBlock bb, int i | - def.definesAt(sv, bb, i, _) and - defOrUse.(DefImpl).hasIndexInBlock(bb, i, sv) - ) - } or - TPhi(PhiNode phi) - -abstract private class SsaDefOrUse extends TSsaDefOrUse { - string toString() { result = "SsaDefOrUse" } - - DefOrUseImpl asDefOrUse() { none() } - - PhiNode asPhi() { none() } - - abstract Location getLocation(); -} - -class DefOrUse extends TDefOrUse, SsaDefOrUse { - DefOrUseImpl defOrUse; - - DefOrUse() { this = TDefOrUse(defOrUse) } - - final override DefOrUseImpl asDefOrUse() { result = defOrUse } - - final override Location getLocation() { result = defOrUse.getLocation() } - - final SourceVariable getSourceVariable() { result = defOrUse.getSourceVariable() } -} - -class Phi extends TPhi, SsaDefOrUse { - PhiNode phi; - - Phi() { this = TPhi(phi) } - - final override PhiNode asPhi() { result = phi } - - final override Location getLocation() { result = phi.getBasicBlock().getLocation() } -} - -class UseOrPhi extends SsaDefOrUse { - UseOrPhi() { - this.asDefOrUse() instanceof UseImpl - or - this instanceof Phi - } - - final override Location getLocation() { - result = this.asDefOrUse().getLocation() or result = this.(Phi).getLocation() - } - - override string toString() { - result = this.asDefOrUse().toString() - or - this instanceof Phi and - result = "Phi" - } -} - -class Def extends DefOrUse { - override DefImpl defOrUse; - - Operand getAddressOperand() { result = defOrUse.getAddressOperand() } - - Instruction getAddress() { result = this.getAddressOperand().getDef() } - - Node0Impl getValue() { result = defOrUse.getValue() } - - override string toString() { result = this.asDefOrUse().toString() } - - BaseSourceVariableInstruction getBase() { result = defOrUse.getBase() } - - predicate isIteratorDef() { defOrUse instanceof IteratorDef } -} - -private module SsaImpl = SsaImplCommon::Make; - -class PhiNode extends SsaImpl::DefinitionExt { - PhiNode() { - this instanceof SsaImpl::PhiNode or - this instanceof SsaImpl::PhiReadNode - } -} - -class DefinitionExt = SsaImpl::DefinitionExt;