Skip to content

Commit

Permalink
Java: Prototype implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelnebel committed Sep 5, 2024
1 parent cb14cc8 commit 8473b8c
Show file tree
Hide file tree
Showing 5 changed files with 262 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ import internal.CaptureModels

from DataFlowSummaryTargetApi api, string flow
where flow = captureContentFlow(api)
select flow order by flow
select api, flow order by flow
219 changes: 212 additions & 7 deletions java/ql/src/utils/modelgenerator/internal/CaptureModels.qll
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = Printing::asValueModel(api, qualifierString(), "ReturnValue")
result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}

private int accessPathLimit0() { result = 2 }
Expand Down Expand Up @@ -237,7 +237,7 @@ string captureThroughFlow0(
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = Printing::asTaintModel(api, input, output)
result = Printing::asLiftedTaintModel(api, input, output)
)
}

Expand Down Expand Up @@ -279,6 +279,9 @@ private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig

private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;

/**
* Gets a MaD string representation of a store step access path.
*/
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
not exists(ap.getHead()) and result = ""
or
Expand All @@ -289,6 +292,9 @@ private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
)
}

/**
* Gets a MaD string representation of a read step access path.
*/
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
not exists(ap.getHead()) and result = ""
or
Expand All @@ -299,18 +305,217 @@ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
)
}

string captureContentFlow(DataFlowSummaryTargetApi api) {
/**
* Holds if the access path `ap` contains a field or synthetic field access.
*/
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail() and
(mentionsField(tail) or isField(head))
)
}

pragma[nomagic]
private predicate apiContentFlow(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.getEnclosingCallable() = api
}

/**
* A class of APIs relevant for modeling using content flow.
* The following heuristic is applied:
* Content flow is only relevant for an API, if
* #content flow <= 2 * #parameters + 3
* If an API produces more content flow, it is likely that
* 1. Types are not sufficiently constrained leading to combinatorial
* explosion in dispatch and thus in the generated summaries.
* 2. It is a reasonable approximation to use the non-content based flow
* detection instead, as reads and stores would use a significant
* part of the object internal state.
*/
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
ContentDataFlowSummaryTargetApi() {
count(DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores |
apiContentFlow(this, p, reads, returnNodeExt, stores, _)
) <= 2 * this.getNumberOfParameters() + 3
}
}

/**
* Holds if any of the content sets in `path` translates into a synthetic field.
*/
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
exists(PropagateContentFlow::AccessPath tail, ContentSet head |
head = path.getHead() and
tail = path.getTail() and
(
exists(getSyntheticName(head)) or
hasSyntheticContent(tail)
)
)
}

/**
* A module containing predicates for validating access paths containing synthetic fields
* to be used for generated summary models.
*/
private module AccessPathSyntheticValidation {
/**
* Holds if there exist an API within the type `t` that has
* content flow from `read` to `store`.
*/
private predicate step(
Type t, PropagateContentFlow::AccessPath read, PropagateContentFlow::AccessPath store
) {
exists(ContentDataFlowSummaryTargetApi api |
api.getDeclaringType() = t and
apiContentFlow(api, _, read, _, store, _)
)
}

/**
* Holds if there is an API in type `t` that has content flow from `read` to `store`,
* where `read` does not have synthetic content and `store` does.
*
* Step A -> Synth.
*/
private predicate synthPathEntry(
Type t, PropagateContentFlow::AccessPath read, PropagateContentFlow::AccessPath store
) {
not hasSyntheticContent(read) and
hasSyntheticContent(store) and
step(t, read, store)
}

/**
* Holds if there is an API in type `t` that has content flow from `read` to `store`,
* where `read` has synthetic content and `store` does not.
*
* Step Synth -> A.
*/
private predicate synthPathExit(
Type t, PropagateContentFlow::AccessPath read, PropagateContentFlow::AccessPath store
) {
hasSyntheticContent(read) and
not hasSyntheticContent(store) and
step(t, read, store)
}

/**
* Takes zero or more synthetic steps.
* Synth ->* Synth
*/
private predicate synthPathStepRec(
Type t, PropagateContentFlow::AccessPath read, PropagateContentFlow::AccessPath store
) {
hasSyntheticContent(read) and
hasSyntheticContent(store) and
(
read = store
or
exists(PropagateContentFlow::AccessPath mid |
step(t, read, mid) and synthPathStepRec(t, mid, store)
)

Check warning

Code scanning / CodeQL

Var only used in one side of disjunct. Warning

The
variable t
is only used in one side of disjunct.
)
}

/**
* Holds if there exists a path of steps from `ap1` to an exit.
* ap1 ->* Synth -> A
*/
private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath ap1) {
exists(PropagateContentFlow::AccessPath mid |
synthPathStepRec(t, ap1, mid) and synthPathExit(t, mid, _)
)
}

/**
* Holds if there exists a path of steps from an entry to `ap1`.
*
* A -> Synth ->* ap1
*/
private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath ap1) {
exists(PropagateContentFlow::AccessPath mid |
synthPathEntry(t, _, mid) and synthPathStepRec(t, mid, ap1)
)
}

/**
* Holds if at least one of the access paths `read` and `store` contain content
* that will be translated into a synthetic field, when being used in
* a MaD summary model, and if the type `t` contains a range of APIs, such that
* when chaining their flow access paths, there exists access paths `A` and `B` where
* A ->* read -> store ->* B and where `A` and `B` do not contain content that will
* be translated into a synthetic field.
*
* This is needed because we don't want to include summaries that reads from or
* stores into a "dead" synthetic field.
*
* Example:
* Assume we have a type `t` with methods `getX` and `setX`, which gets and sets
* a private field `X` on `t`. This would lead to the following content flows
* getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
* setX : Argument[0] -> Argument[this].SyntheticField[t.X]
* As the reads and stores are on synthetic fields we should only make summaries
* if both of these methods exist.
*/
pragma[nomagic]
predicate acceptReadStore(
Type t, PropagateContentFlow::AccessPath read, PropagateContentFlow::AccessPath store
) {
synthPathEntry(t, read, store) and reachesSynthExit(t, store)
or
synthEntryReaches(t, read) and synthPathExit(t, read, store)
or
synthEntryReaches(t, read) and step(t, read, store) and reachesSynthExit(t, store)
}
}

/**
* Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
* Flow is considered relevant,
* 1. If `read` or `store` do not contain content that translates into synthetic fields.
* 2. If `read` or `store` contain content that translates into synthetic fields, and
* the synthetic content is "live" on the declaring type of `api`.
*/
private predicate apiRelevantContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath store, boolean preservesValue
) {
apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and
(
not hasSyntheticContent(read) and not hasSyntheticContent(store)
or
AccessPathSyntheticValidation::acceptReadStore(api.getDeclaringType(), read, store)
)
}

/**
* Gets the content based summary model(s) of `api`, if there is flow from a parameter to
* the return value or a parameter.
*
* Models are lifted to the best type in case the read and store access paths do not
* contain a field or synthetic field access.
*/
string captureContentFlow(ContentDataFlowSummaryTargetApi api) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
boolean preservesValue
boolean preservesValue, boolean lift
|
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
result = Printing::asModel(api, input, output, preservesValue)
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) and
result = Printing::asModel(api, input, output, preservesValue, lift)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,17 +339,31 @@ predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2
)
}

/** Holds if the contentset `c` is a field or synthetic field. */
predicate isField(ContentSet c) {
c instanceof DataFlowUtil::FieldContent or
c instanceof DataFlowUtil::SyntheticFieldContent
}

/**
* Gets the MaD string representation of the contentset `c`.
* Gets the MaD synthetic name string representation for the contentset `c`, if any.
*/
string getSyntheticName(DataFlow::ContentSet c) {
exists(Field f |
not f.isPublic() and
f = c.(DataFlowUtil::FieldContent).getField() and
result = f.getQualifiedName()
)
or
result = c.(DataFlowUtil::SyntheticFieldContent).getField()
}

string printContent(ContentSet c) {
exists(Field f, string name |
f = c.(DataFlowUtil::FieldContent).getField() and name = f.getQualifiedName()
|
if f.isPublic() then result = "Field[" + name + "]" else result = "SyntheticField[" + name + "]"
exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() |
result = "Field[" + f.getQualifiedName() + "]"
)
or
result = "SyntheticField[" + c.(DataFlowUtil::SyntheticFieldContent).getField() + "]"
result = "SyntheticField[" + getSyntheticName(c) + "]"
or
c instanceof DataFlowUtil::CollectionContent and result = "Element"
or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi {
output(this, tv, output) and
input != output
|
result = Printing::asValueModel(this, input, output)
result = Printing::asLiftedValueModel(this, input, output)
)
}
}
Expand Down
43 changes: 28 additions & 15 deletions shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,23 @@ module ModelPrintingImpl<ModelPrintingLangSig Lang> {

/**
* Gets the summary model for `api` with `input`, `output` and `kind`.
* The model is lifted in case `lift` is true.
*/
bindingset[input, output, kind]
private string asSummaryModel(Printing::SummaryApi api, string input, string output, string kind) {
result =
asPartialModel(api.lift()) + input + ";" //
+ output + ";" //
+ kind + ";" //
+ Printing::getProvenance()
private string asSummaryModel(
Printing::SummaryApi api, string input, string output, string kind, boolean lift
) {
exists(Lang::Callable c |
lift = true and c = api.lift()
or
lift = false and c = api
|
result =
asPartialModel(c) + input + ";" //
+ output + ";" //
+ kind + ";" //
+ Printing::getProvenance()
)
}

string asNeutralSummaryModel(Printing::SummaryApi api) {
Expand All @@ -82,31 +91,35 @@ module ModelPrintingImpl<ModelPrintingLangSig Lang> {
}

/**
* Gets the value summary model for `api` with `input` and `output`.
* Gets the lifted value summary model for `api` with `input` and `output`.
*/
bindingset[input, output]
string asValueModel(Printing::SummaryApi api, string input, string output) {
result = asSummaryModel(api, input, output, "value")
string asLiftedValueModel(Printing::SummaryApi api, string input, string output) {
result = asModel(api, input, output, true, true)
}

/**
* Gets the taint summary model for `api` with `input` and `output`.
* Gets the lifted taint summary model for `api` with `input` and `output`.
*/
bindingset[input, output]
string asTaintModel(Printing::SummaryApi api, string input, string output) {
result = asSummaryModel(api, input, output, "taint")
string asLiftedTaintModel(Printing::SummaryApi api, string input, string output) {
result = asModel(api, input, output, false, true)
}

/**
* Gets the summary model for `api` with `input` and `output`.
* (1) If `preservesValue` is true a "value" model is created.
* (2) If `lift` is true the model is lifted to the best possible type.
*/
bindingset[input, output, preservesValue]
string asModel(Printing::SummaryApi api, string input, string output, boolean preservesValue) {
string asModel(
Printing::SummaryApi api, string input, string output, boolean preservesValue, boolean lift
) {
preservesValue = true and
result = asValueModel(api, input, output)
result = asSummaryModel(api, input, output, "value", lift)
or
preservesValue = false and
result = asTaintModel(api, input, output)
result = asSummaryModel(api, input, output, "taint", lift)
}

/**
Expand Down

0 comments on commit 8473b8c

Please sign in to comment.