diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 9ae9e2bb3f2d5..afbf947206bfb 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) { api = returnNodeEnclosingCallable(ret) and isOwnInstanceAccessNode(ret) ) and - result = Printing::asValueModel(api, qualifierString(), "ReturnValue") + result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue") } private int accessPathLimit0() { result = 2 } @@ -237,7 +237,7 @@ string captureThroughFlow0( input = parameterNodeAsInput(p) and output = getOutput(returnNodeExt) and input != output and - result = Printing::asTaintModel(api, input, output) + result = Printing::asLiftedTaintModel(api, input, output) ) } @@ -279,6 +279,9 @@ private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig private module PropagateContentFlow = ContentDataFlow::Global; +/** + * Gets a MaD string representation of a store step access path. + */ private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { not exists(ap.getHead()) and result = "" or @@ -289,6 +292,9 @@ private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { ) } +/** + * Gets a MaD string representation of a read step access path. + */ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { not exists(ap.getHead()) and result = "" or @@ -299,8 +305,19 @@ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { ) } +/** + * Holds if the access path `ap` contains a field or synthetic field access. + */ +private predicate mentionsField(PropagateContentFlow::AccessPath ap) { + exists(ContentSet head, PropagateContentFlow::AccessPath tail | + head = ap.getHead() and + tail = ap.getTail() and + (mentionsField(tail) or isField(head)) + ) +} + pragma[nomagic] -predicate apiContentFlow( +private predicate apiContentFlow( DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue ) { @@ -309,14 +326,30 @@ predicate apiContentFlow( p.getEnclosingCallable() = api } +/** + * A class of APIs relevant for modelling using content flow. + * The following heuristic is applied: + * Content flow is only relevant for an API, if + * #content flow <= 2 * #parameters + 3 + * If an API produces more content flow, it is likely that + * 1. Types are not sufficiently constrained leading to combinatorial + * explosion in dispatch and thus in the generated summaries. + * 2. It is a reasonable approximation to use the non-content based flow + * detection instead, as reads and stores would use a significant + * part of the object internal state. + */ private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { ContentDataFlowSummaryTargetApi() { - count(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - apiContentFlow(this, p, _, returnNodeExt, _, _) + count(DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, + ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores | + apiContentFlow(this, p, reads, returnNodeExt, stores, _) ) <= 2 * this.getNumberOfParameters() + 3 } } +/** + * Gets the synthetic names on the access path `path`, if any. + */ private string getSyntheticNames(PropagateContentFlow::AccessPath path) { exists(PropagateContentFlow::AccessPath tail, ContentSet head | head = path.getHead() and @@ -329,16 +362,16 @@ private string getSyntheticNames(PropagateContentFlow::AccessPath path) { } /** - * This class models pr type the relevant synthetic names. + * This class models the relevant synthetic names for a type. * For a synthetic name to relavant for a type, the following must hold: * 1. The synthetic name must be used both in a read and store. * 2. The read and store should be in different flows. */ -private class RelevantSynthetics extends Type { +private class SyntheticsType extends Type { private PropagateContentFlow::AccessPath reads; private PropagateContentFlow::AccessPath stores; - RelevantSynthetics() { + SyntheticsType() { exists( ContentDataFlowSummaryTargetApi api1, DataFlow::ParameterNode p1, ContentDataFlowSummaryTargetApi api2, DataFlow::ParameterNode p2 @@ -353,35 +386,47 @@ private class RelevantSynthetics extends Type { /** * Gets the relevant synthetic names for this type, if any. + * This is the intersection of the synthetic names used in the + * reads and stores. */ string getRelevantSynthetics() { result = getSyntheticNames(reads) and result = getSyntheticNames(stores) } } +/** + * Holds if all the potential synthetic names in the access path `path` are + * contained in the intersection of all the synthetic reads and and of all + * the synthetic stores of all apis with relevant content flow within the type `type`. + */ pragma[inline] -private predicate syntheticsMatched( - ContentDataFlowSummaryTargetApi api, PropagateContentFlow::AccessPath path -) { +private predicate syntheticsMatched(SyntheticsType type, PropagateContentFlow::AccessPath path) { forall(string synthetic | synthetic = getSyntheticNames(path) | - synthetic = api.getDeclaringType().(RelevantSynthetics).getRelevantSynthetics() + synthetic = type.getRelevantSynthetics() ) } +/** + * Gets the content based summary model(s) of `api`, if there is flow from a parameter to + * the return value or a parameter. + * + * Models are lifted to the best type in case the read and store access paths do not + * contain a field or synthetic field access. + */ string captureContentFlow(ContentDataFlowSummaryTargetApi api) { exists( DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output, PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores, - boolean preservesValue + boolean preservesValue, boolean lift | - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and + apiContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and - syntheticsMatched(api, reads) and - syntheticsMatched(api, stores) and input != output and - result = Printing::asModel(api, input, output, preservesValue) + syntheticsMatched(api.getDeclaringType(), reads) and + syntheticsMatched(api.getDeclaringType(), stores) and + (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) and + result = Printing::asModel(api, input, output, preservesValue, lift) ) } diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll index 051e0d5d1b7c6..e75a6caefbcb0 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll @@ -339,6 +339,12 @@ predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2 ) } +/** Holds if the contentset `c` is a field or synthetic field. */ +predicate isField(ContentSet c) { + c instanceof DataFlowUtil::FieldContent or + c instanceof DataFlowUtil::SyntheticFieldContent +} + /** * Gets the MaD synthetic name string representation for the contentset `c`, if any. */ diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 34e53bad2d7fe..3d56dff50726b 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -329,7 +329,7 @@ class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi { output(this, tv, output) and input != output | - result = Printing::asValueModel(this, input, output) + result = Printing::asLiftedValueModel(this, input, output) ) } } diff --git a/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll b/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll index 2867d927984d1..4f5fa59d5377b 100644 --- a/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll +++ b/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll @@ -64,14 +64,23 @@ module ModelPrintingImpl { /** * Gets the summary model for `api` with `input`, `output` and `kind`. + * The model is lifted in case `lift` is true. */ bindingset[input, output, kind] - private string asSummaryModel(Printing::SummaryApi api, string input, string output, string kind) { - result = - asPartialModel(api.lift()) + input + ";" // - + output + ";" // - + kind + ";" // - + Printing::getProvenance() + private string asSummaryModel( + Printing::SummaryApi api, string input, string output, string kind, boolean lift + ) { + exists(Lang::Callable c | + lift = true and c = api.lift() + or + lift = false and c = api + | + result = + asPartialModel(c) + input + ";" // + + output + ";" // + + kind + ";" // + + Printing::getProvenance() + ) } string asNeutralSummaryModel(Printing::SummaryApi api) { @@ -82,31 +91,35 @@ module ModelPrintingImpl { } /** - * Gets the value summary model for `api` with `input` and `output`. + * Gets the lifted value summary model for `api` with `input` and `output`. */ bindingset[input, output] - string asValueModel(Printing::SummaryApi api, string input, string output) { - result = asSummaryModel(api, input, output, "value") + string asLiftedValueModel(Printing::SummaryApi api, string input, string output) { + result = asModel(api, input, output, true, true) } /** - * Gets the taint summary model for `api` with `input` and `output`. + * Gets the lifted taint summary model for `api` with `input` and `output`. */ bindingset[input, output] - string asTaintModel(Printing::SummaryApi api, string input, string output) { - result = asSummaryModel(api, input, output, "taint") + string asLiftedTaintModel(Printing::SummaryApi api, string input, string output) { + result = asModel(api, input, output, false, true) } /** * Gets the summary model for `api` with `input` and `output`. + * (1) If `preservesValue` is true a "value" model is created. + * (2) If `lift` is true the model is lifted to the best possible type. */ bindingset[input, output, preservesValue] - string asModel(Printing::SummaryApi api, string input, string output, boolean preservesValue) { + string asModel( + Printing::SummaryApi api, string input, string output, boolean preservesValue, boolean lift + ) { preservesValue = true and - result = asValueModel(api, input, output) + result = asSummaryModel(api, input, output, "value", lift) or preservesValue = false and - result = asTaintModel(api, input, output) + result = asSummaryModel(api, input, output, "taint", lift) } /**