Skip to content

Commit

Permalink
Merge pull request #16151 from MathiasVP/use-shared-typeflow-lib
Browse files Browse the repository at this point in the history
C++: Use the shared typeflow library
  • Loading branch information
MathiasVP authored Apr 10, 2024
2 parents 996f535 + a53ef49 commit 59936c8
Show file tree
Hide file tree
Showing 6 changed files with 313 additions and 6 deletions.
1 change: 1 addition & 0 deletions cpp/ql/lib/qlpack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies:
codeql/dataflow: ${workspace}
codeql/rangeanalysis: ${workspace}
codeql/ssa: ${workspace}
codeql/typeflow: ${workspace}
codeql/tutorial: ${workspace}
codeql/util: ${workspace}
codeql/xml: ${workspace}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ private import DataFlowImplCommon as DataFlowImplCommon
private import DataFlowUtil
private import semmle.code.cpp.models.interfaces.PointerWrapper
private import DataFlowPrivate
private import TypeFlow
private import semmle.code.cpp.ir.ValueNumbering

/**
Expand Down Expand Up @@ -955,11 +956,7 @@ private module Cached {
* Holds if the address computed by `operand` is guaranteed to write
* to a specific address.
*/
private predicate isCertainAddress(Operand operand) {
valueNumberOfOperand(operand).getAnInstruction() instanceof VariableAddressInstruction
or
operand.getType() instanceof Cpp::ReferenceType
}
private predicate isCertainAddress(Operand operand) { isPointerToSingleObject(operand.getDef()) }

/**
* Holds if `address` is a use of an SSA variable rooted at `base`, and the
Expand Down
278 changes: 278 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TypeFlow.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
private import cpp
private import semmle.code.cpp.ir.IR
private import codeql.typeflow.TypeFlow

private module Input implements TypeFlowInput<Location> {
/** Holds if `alloc` dynamically allocates a single object. */
private predicate isSingleObjectAllocation(AllocationExpr alloc) {
// i.e., `new int`;
alloc instanceof NewExpr
or
// i.e., `malloc(sizeof(int))`
exists(SizeofTypeOperator sizeOf | sizeOf = alloc.getSizeExpr() |
not sizeOf.getTypeOperand().getUnspecifiedType() instanceof ArrayType
)
}

/**
* Holds if `i` is the result of a dynamic allocation.
*
* `isObject` is `true` if the allocation allocated a single object,
* and `false` otherwise.
*/
private predicate isAllocation(Instruction i, boolean isObject) {
exists(AllocationExpr alloc | alloc = i.getUnconvertedResultExpression() |
if isSingleObjectAllocation(alloc) then isObject = true else isObject = false
)
}

private predicate hasExactSingleType(Instruction i) {
// The address of a variable is always a single object
i instanceof VariableAddressInstruction
or
// A reference always points to a single object
i.getResultLanguageType().hasUnspecifiedType(any(ReferenceType rt), false)
or
// `this` is never an array
i instanceof InitializeThisInstruction
or
// An allocation of a non-array object
isAllocation(i, true)
}

private predicate hasExactBufferType(Instruction i) {
// Anything with an array type is a buffer
i.getResultLanguageType().hasUnspecifiedType(any(ArrayType at), false)
or
// An allocation expression that we couldn't conclude allocated a single
// expression is assigned a buffer type.
isAllocation(i, false)
}

private newtype TTypeFlowNode =
TInstructionNode(Instruction i) or
TFunctionNode(IRFunction func)

abstract class TypeFlowNode extends TTypeFlowNode {
/** Gets a textual representation of this node. */
abstract string toString();

/**
* Gets the type of this node. This type may not be the most precise
* possible type, but will be used as a starting point of the analysis.
*/
abstract Type getType();

/** Gets the location of this node. */
abstract Location getLocation();

/** Gets the underlying `Instruction` of this node, if any. */
Instruction asInstruction() { none() }

/** Gets the underlying `IRFunction` of this node, if any. */
IRFunction asFunction() { none() }

/** Holds if the value of this node is always null. */
abstract predicate isNullValue();
}

private class InstructionNode extends TypeFlowNode, TInstructionNode {
Instruction instr;

InstructionNode() { this = TInstructionNode(instr) }

override string toString() { result = instr.toString() }

override Type getType() {
if hasExactSingleType(instr) then result.isSingle() else result.isBuffer()
}

override Location getLocation() { result = instr.getLocation() }

override Instruction asInstruction() { result = instr }

override predicate isNullValue() {
instr.(ConstantInstruction).getValue() = "0" and
instr.getResultIRType() instanceof IRAddressType
}
}

/** Gets the `TypeFlowNode` corresponding to `i`. */
additional InstructionNode instructionNode(Instruction i) { result.asInstruction() = i }

private class FunctionNode extends TypeFlowNode, TFunctionNode {
IRFunction func;

FunctionNode() { this = TFunctionNode(func) }

override string toString() { result = func.toString() }

Instruction getReturnValueInstruction() {
result = func.getReturnInstruction().(ReturnValueInstruction).getReturnValue()
}

override Type getType() { result = instructionNode(this.getReturnValueInstruction()).getType() }

override Location getLocation() { result = func.getLocation() }

override IRFunction asFunction() { result = func }

override predicate isNullValue() {
instructionNode(this.getReturnValueInstruction()).isNullValue()
}
}

/**
* Gets an ultimiate definition of `phi`. That is, an input to `phi` that is
* not itself a `PhiInstruction`.
*/
private Instruction getAnUltimateLocalDefinition(PhiInstruction phi) {
result = phi.getAnInput*() and not result instanceof PhiInstruction
}

/**
* Holds if this function is private (i.e., cannot be accessed outside its
* compilation unit). This means we can use a closed-world assumption about
* calls to this function.
*/
private predicate isPrivate(Function func) {
// static functions have internal linkage
func.isStatic()
or
// anonymous namespaces have internal linkage
func.getNamespace().getParentNamespace*().isAnonymous()
or
// private member functions are only called internally from inside the class
func.(MemberFunction).isPrivate()
}

/**
* Holds if `arg` is an argument for the parameter `p` in a private callable.
*/
pragma[nomagic]
private predicate privateParamArg(InitializeParameterInstruction p, Instruction arg) {
exists(CallInstruction call, int i, Function func |
call.getArgument(pragma[only_bind_into](i)) = arg and
func = call.getStaticCallTarget() and
func.getParameter(pragma[only_bind_into](i)) = p.getParameter() and
isPrivate(func)
)
}

predicate joinStep(TypeFlowNode n1, TypeFlowNode n2) {
// instruction -> phi
getAnUltimateLocalDefinition(n2.asInstruction()) = n1.asInstruction()
or
// return value -> function
n2.(FunctionNode).getReturnValueInstruction() = n1.asInstruction()
or
// function -> call
exists(Function func | func = n1.asFunction().getFunction() |
not func.isVirtual() and
n2.asInstruction().(CallInstruction).getStaticCallTarget() = func
)
or
// Argument -> parameter where the parameter's enclosing function
// is "private".
exists(Instruction arg, Instruction p |
privateParamArg(p, arg) and
n1.asInstruction() = arg and
n2.asInstruction() = p
)
}

/**
* Holds if knowing whether `i1` points to a single object or buffer implies
* knowing whether `i2` points to a single object or buffer.
*/
private predicate instructionStep(Instruction i1, Instruction i2) {
i2.(CopyInstruction).getSourceValue() = i1
or
i2.(CopyValueInstruction).getSourceValue() = i1
or
i2.(ConvertInstruction).getUnary() = i1
or
i2.(CheckedConvertOrNullInstruction).getUnary() = i1
or
i2.(InheritanceConversionInstruction).getUnary() = i1
or
i2.(PointerArithmeticInstruction).getLeft() = i1
}

predicate step(TypeFlowNode n1, TypeFlowNode n2) {
instructionStep(n1.asInstruction(), n2.asInstruction())
}

predicate isNullValue(TypeFlowNode n) { n.isNullValue() }

private newtype TType =
TSingle() or
TBuffer()

class Type extends TType {
string toString() {
this.isSingle() and
result = "Single"
or
this.isBuffer() and
result = "Buffer"
}

/** Holds if this type is the type that represents a single object. */
predicate isSingle() { this = TSingle() }

/** Holds if this type is the type that represents a buffer. */
predicate isBuffer() { this = TBuffer() }

/**
* Gets a super type of this type, if any.
*
* The type relation is `Single <: Buffer`.
*/
Type getASupertype() {
this.isSingle() and
result.isBuffer()
}
}

predicate exactTypeBase(TypeFlowNode n, Type t) {
exists(Instruction instr | instr = n.asInstruction() |
hasExactSingleType(instr) and t.isSingle()
or
hasExactBufferType(instr) and t.isBuffer()
)
}

pragma[nomagic]
private predicate upcastCand(TypeFlowNode n, Type t1, Type t2) {
exists(TypeFlowNode next |
step(n, next)
or
joinStep(n, next)
|
n.getType() = t1 and
next.getType() = t2 and
t1 != t2
)
}

private predicate upcast(TypeFlowNode n, Type t1) {
exists(Type t2 | upcastCand(n, t1, t2) |
// No need for transitive closure since the subtyping relation is just `Single <: Buffer`
t1.getASupertype() = t2
)
}

predicate typeFlowBaseCand(TypeFlowNode n, Type t) { upcast(n, t) }
}

private module TypeFlow = Make<Location, Input>;

/**
* Holds if `i` is an instruction that computes an address that points to a
* single object (as opposed to pointing into a buffer).
*/
pragma[nomagic]
predicate isPointerToSingleObject(Instruction i) {
TypeFlow::bestTypeFlow(Input::instructionNode(i), any(Input::Type t | t.isSingle()), _)
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ argHasPostUpdate
| test.cpp:67:29:67:35 | source1 | ArgumentNode is missing PostUpdateNode. |
| test.cpp:813:19:813:35 | * ... | ArgumentNode is missing PostUpdateNode. |
| test.cpp:848:23:848:25 | rpx | ArgumentNode is missing PostUpdateNode. |
| test.cpp:1057:19:1057:21 | * ... | ArgumentNode is missing PostUpdateNode. |
postWithInFlow
| BarrierGuard.cpp:49:6:49:6 | x [post update] | PostUpdateNode should not be the target of local flow. |
| BarrierGuard.cpp:60:7:60:7 | x [post update] | PostUpdateNode should not be the target of local flow. |
Expand Down Expand Up @@ -168,6 +169,13 @@ postWithInFlow
| test.cpp:1045:9:1045:11 | ref arg buf | PostUpdateNode should not be the target of local flow. |
| test.cpp:1051:5:1051:11 | content [post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1052:9:1052:9 | a [inner post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1056:5:1056:7 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1056:6:1056:7 | pp [inner post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1062:53:1062:53 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1072:3:1072:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1072:4:1072:4 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1073:3:1073:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
| test.cpp:1073:4:1073:4 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
viableImplInCallContextTooLarge
uniqueParameterNodeAtPosition
uniqueParameterNodePosition
Expand Down
24 changes: 23 additions & 1 deletion cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1050,4 +1050,26 @@ void flow_out_of_address_with_local_flow() {
MyStruct a;
a.content = nullptr;
sink(&a); // $ SPURIOUS: ast
}
}

static void static_func_that_reassigns_pointer_before_sink(char** pp) { // $ ast-def=pp ir-def=*pp ir-def=**pp
*pp = "";
indirect_sink(*pp); // clean
}

void test_static_func_that_reassigns_pointer_before_sink() {
char* p = (char*)indirect_source();
static_func_that_reassigns_pointer_before_sink(&p);
}

void single_object_in_both_cases(bool b, int x, int y) {
int* p;
if(b) {
p = &x;
} else {
p = &y;
}
*p = source();
*p = 0;
sink(*p); // clean
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ incorrectBaseType
| test.cpp:848:23:848:25 | (reference dereference) | Expected 'Node.getType()' to be int, but it was int * |
| test.cpp:854:10:854:36 | * ... | Expected 'Node.getType()' to be const int, but it was int |
| test.cpp:867:10:867:30 | * ... | Expected 'Node.getType()' to be const int, but it was int |
| test.cpp:1062:52:1062:53 | *& ... | Expected 'Node.getType()' to be char, but it was char * |
failures

0 comments on commit 59936c8

Please sign in to comment.