Skip to content

Commit

Permalink
Java: More improvements.
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelnebel committed Sep 3, 2024
1 parent 02eb15e commit 76b9cdd
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 34 deletions.
81 changes: 63 additions & 18 deletions java/ql/src/utils/modelgenerator/internal/CaptureModels.qll
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = Printing::asValueModel(api, qualifierString(), "ReturnValue")
result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}

private int accessPathLimit0() { result = 2 }
Expand Down Expand Up @@ -237,7 +237,7 @@ string captureThroughFlow0(
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = Printing::asTaintModel(api, input, output)
result = Printing::asLiftedTaintModel(api, input, output)
)
}

Expand Down Expand Up @@ -279,6 +279,9 @@ private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig

private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;

/**
* Gets a MaD string representation of a store step access path.
*/
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
not exists(ap.getHead()) and result = ""
or
Expand All @@ -289,6 +292,9 @@ private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
)
}

/**
* Gets a MaD string representation of a read step access path.
*/
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
not exists(ap.getHead()) and result = ""
or
Expand All @@ -299,8 +305,19 @@ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
)
}

/**
* Holds if the access path `ap` contains a field or synthetic field access.
*/
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail() and
(mentionsField(tail) or isField(head))
)
}

pragma[nomagic]
predicate apiContentFlow(
private predicate apiContentFlow(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
Expand All @@ -309,14 +326,30 @@ predicate apiContentFlow(
p.getEnclosingCallable() = api
}

/**
* A class of APIs relevant for modelling using content flow.
* The following heuristic is applied:
* Content flow is only relevant for an API, if
* #content flow <= 2 * #parameters + 3
* If an API produces more content flow, it is likely that
* 1. Types are not sufficiently constrained leading to combinatorial
* explosion in dispatch and thus in the generated summaries.
* 2. It is a reasonable approximation to use the non-content based flow
* detection instead, as reads and stores would use a significant
* part of the object internal state.
*/

Check warning

Code scanning / CodeQL

Misspelling Warning

This comment contains the non-US spelling 'modelling', which should instead be 'modeling'.
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
ContentDataFlowSummaryTargetApi() {
count(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
apiContentFlow(this, p, _, returnNodeExt, _, _)
count(DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores |
apiContentFlow(this, p, reads, returnNodeExt, stores, _)
) <= 2 * this.getNumberOfParameters() + 3
}
}

/**
* Gets the synthetic names on the access path `path`, if any.
*/
private string getSyntheticNames(PropagateContentFlow::AccessPath path) {
exists(PropagateContentFlow::AccessPath tail, ContentSet head |
head = path.getHead() and
Expand All @@ -329,16 +362,16 @@ private string getSyntheticNames(PropagateContentFlow::AccessPath path) {
}

/**
* This class models pr type the relevant synthetic names.
* This class models the relevant synthetic names for a type.
* For a synthetic name to relavant for a type, the following must hold:
* 1. The synthetic name must be used both in a read and store.
* 2. The read and store should be in different flows.
*/
private class RelevantSynthetics extends Type {
private class SyntheticsType extends Type {
private PropagateContentFlow::AccessPath reads;
private PropagateContentFlow::AccessPath stores;

RelevantSynthetics() {
SyntheticsType() {
exists(
ContentDataFlowSummaryTargetApi api1, DataFlow::ParameterNode p1,
ContentDataFlowSummaryTargetApi api2, DataFlow::ParameterNode p2
Expand All @@ -353,35 +386,47 @@ private class RelevantSynthetics extends Type {

/**
* Gets the relevant synthetic names for this type, if any.
* This is the intersection of the synthetic names used in the
* reads and stores.
*/
string getRelevantSynthetics() {
result = getSyntheticNames(reads) and result = getSyntheticNames(stores)
}
}

/**
* Holds if all the potential synthetic names in the access path `path` are
* contained in the intersection of all the synthetic reads and and of all
* the synthetic stores of all apis with relevant content flow within the type `type`.
*/
pragma[inline]
private predicate syntheticsMatched(
ContentDataFlowSummaryTargetApi api, PropagateContentFlow::AccessPath path
) {
private predicate syntheticsMatched(SyntheticsType type, PropagateContentFlow::AccessPath path) {
forall(string synthetic | synthetic = getSyntheticNames(path) |
synthetic = api.getDeclaringType().(RelevantSynthetics).getRelevantSynthetics()
synthetic = type.getRelevantSynthetics()
)
}

/**
* Gets the content based summary model(s) of `api`, if there is flow from a parameter to
* the return value or a parameter.
*
* Models are lifted to the best type in case the read and store access paths do not
* contain a field or synthetic field access.
*/
string captureContentFlow(ContentDataFlowSummaryTargetApi api) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output,
PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores,
boolean preservesValue
boolean preservesValue, boolean lift
|
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
apiContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
syntheticsMatched(api, reads) and
syntheticsMatched(api, stores) and
input != output and
result = Printing::asModel(api, input, output, preservesValue)
syntheticsMatched(api.getDeclaringType(), reads) and
syntheticsMatched(api.getDeclaringType(), stores) and
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) and
result = Printing::asModel(api, input, output, preservesValue, lift)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,12 @@ predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2
)
}

/** Holds if the contentset `c` is a field or synthetic field. */
predicate isField(ContentSet c) {
c instanceof DataFlowUtil::FieldContent or
c instanceof DataFlowUtil::SyntheticFieldContent
}

/**
* Gets the MaD synthetic name string representation for the contentset `c`, if any.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi {
output(this, tv, output) and
input != output
|
result = Printing::asValueModel(this, input, output)
result = Printing::asLiftedValueModel(this, input, output)
)
}
}
Expand Down
43 changes: 28 additions & 15 deletions shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,23 @@ module ModelPrintingImpl<ModelPrintingLangSig Lang> {

/**
* Gets the summary model for `api` with `input`, `output` and `kind`.
* The model is lifted in case `lift` is true.
*/
bindingset[input, output, kind]
private string asSummaryModel(Printing::SummaryApi api, string input, string output, string kind) {
result =
asPartialModel(api.lift()) + input + ";" //
+ output + ";" //
+ kind + ";" //
+ Printing::getProvenance()
private string asSummaryModel(
Printing::SummaryApi api, string input, string output, string kind, boolean lift
) {
exists(Lang::Callable c |
lift = true and c = api.lift()
or
lift = false and c = api
|
result =
asPartialModel(c) + input + ";" //
+ output + ";" //
+ kind + ";" //
+ Printing::getProvenance()
)
}

string asNeutralSummaryModel(Printing::SummaryApi api) {
Expand All @@ -82,31 +91,35 @@ module ModelPrintingImpl<ModelPrintingLangSig Lang> {
}

/**
* Gets the value summary model for `api` with `input` and `output`.
* Gets the lifted value summary model for `api` with `input` and `output`.
*/
bindingset[input, output]
string asValueModel(Printing::SummaryApi api, string input, string output) {
result = asSummaryModel(api, input, output, "value")
string asLiftedValueModel(Printing::SummaryApi api, string input, string output) {
result = asModel(api, input, output, true, true)
}

/**
* Gets the taint summary model for `api` with `input` and `output`.
* Gets the lifted taint summary model for `api` with `input` and `output`.
*/
bindingset[input, output]
string asTaintModel(Printing::SummaryApi api, string input, string output) {
result = asSummaryModel(api, input, output, "taint")
string asLiftedTaintModel(Printing::SummaryApi api, string input, string output) {
result = asModel(api, input, output, false, true)
}

/**
* Gets the summary model for `api` with `input` and `output`.
* (1) If `preservesValue` is true a "value" model is created.
* (2) If `lift` is true the model is lifted to the best possible type.
*/
bindingset[input, output, preservesValue]
string asModel(Printing::SummaryApi api, string input, string output, boolean preservesValue) {
string asModel(
Printing::SummaryApi api, string input, string output, boolean preservesValue, boolean lift
) {
preservesValue = true and
result = asValueModel(api, input, output)
result = asSummaryModel(api, input, output, "value", lift)
or
preservesValue = false and
result = asTaintModel(api, input, output)
result = asSummaryModel(api, input, output, "taint", lift)
}

/**
Expand Down

0 comments on commit 76b9cdd

Please sign in to comment.