From 7c0101ad06da0a41580de020c76ac4bcb15d8163 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Fri, 6 Sep 2024 10:58:18 +0200 Subject: [PATCH 01/10] Shared: Add some helper predicates to the AccessPath class in content flow. --- .../dataflow/internal/ContentDataFlowImpl.qll | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll index f413c6dd7ad0..b972fc533ce4 100644 --- a/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/ContentDataFlowImpl.qll @@ -271,6 +271,38 @@ module MakeImplContentDataFlow Lang> { result = head + "." + tail ) } + + private ContentSet getAtIndex(int i) { + i = 0 and + result = this.getHead() + or + i > 0 and + result = this.getTail().getAtIndex(i - 1) + } + + private AccessPath reverse0(int i) { + i = -1 and result = TAccessPathNil() + or + i >= 0 and + result = TAccessPathCons(this.getAtIndex(i), this.reverse0(i - 1)) + } + + /** + * Gets the length of this access path. + */ + private int length() { + result = 0 and this = TAccessPathNil() + or + result = 1 + this.getTail().length() + } + + /** + * Gets the reversed access path, if any. + * + * Note that not all access paths have a reverse as these are not + * included by default in the IPA type. + */ + AccessPath reverse() { result = this.reverse0(this.length() - 1) } } /** From d2c98c86dcb4aecefe67f6767f78ea9ede629d4b Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Tue, 3 Sep 2024 10:57:00 +0200 Subject: [PATCH 02/10] Java: Improve content based model generation. --- .../modelgenerator/internal/CaptureModels.qll | 247 +++++++++++++++++- .../internal/CaptureModelsSpecific.qll | 31 ++- .../CaptureTypeBasedSummaryModels.qll | 2 +- .../mad/modelgenerator/ModelPrinting.qll | 43 +-- 4 files changed, 294 insertions(+), 29 deletions(-) diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 0f24bab005eb..9373382941a8 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) { api = returnNodeEnclosingCallable(ret) and isOwnInstanceAccessNode(ret) ) and - result = Printing::asValueModel(api, qualifierString(), "ReturnValue") + result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue") } private int accessPathLimit0() { result = 2 } @@ -237,7 +237,7 @@ string captureThroughFlow0( input = parameterNodeAsInput(p) and output = getOutput(returnNodeExt) and input != output and - result = Printing::asTaintModel(api, input, output) + result = Printing::asLiftedTaintModel(api, input, output) ) } @@ -291,26 +291,259 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) { ) } +/** + * Gets the MaD string representation of a store step access path. + */ private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { result = concat(int i | | getContent(ap, i), "" order by i) } +/** + * Gets the MaD string representation of a read step access path. + */ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { result = concat(int i | | getContent(ap, i), "" order by i desc) } -string captureContentFlow(DataFlowSummaryTargetApi api) { +/** + * Holds if the access path `ap` contains a field or synthetic field access. + */ +private predicate mentionsField(PropagateContentFlow::AccessPath ap) { + exists(ContentSet head, PropagateContentFlow::AccessPath tail | + head = ap.getHead() and + tail = ap.getTail() and + (mentionsField(tail) or isField(head)) + ) +} + +private predicate apiFlow( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, + ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue +) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + returnNodeExt.getEnclosingCallable() = api and + p.getEnclosingCallable() = api +} + +/** + * A class of APIs relevant for modeling using content flow. + * The following heuristic is applied: + * Content flow is only relevant for an API, if + * #content flow <= 2 * #parameters + 3 + * If an API produces more content flow, it is likely that + * 1. Types are not sufficiently constrained leading to a combinatorial + * explosion in dispatch and thus in the generated summaries. + * 2. It is a reasonable approximation to use the non-content based flow + * detection instead, as reads and stores would use a significant + * part of an objects internal state. + */ +private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { + ContentDataFlowSummaryTargetApi() { + count(string input, string output | + exists( + DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, + ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores + | + apiFlow(this, p, reads, returnNodeExt, stores, _) and + input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) + ) + ) <= 2 * this.getNumberOfParameters() + 3 + } +} + +pragma[nomagic] +private predicate apiContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue +) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + returnNodeExt.getEnclosingCallable() = api and + p.getEnclosingCallable() = api +} + +/** + * Holds if any of the content sets in `path` translates into a synthetic field. + */ +private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { + exists(PropagateContentFlow::AccessPath tail, ContentSet head | + head = path.getHead() and + tail = path.getTail() and + ( + exists(getSyntheticName(head)) or + hasSyntheticContent(tail) + ) + ) +} + +/** + * A module containing predicates for validating access paths containing content sets + * that translates into synthetic fields, when used for generated summary models. + */ +private module AccessPathSyntheticValidation { + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`). + */ + private predicate step( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + p.getType() = t1 and + returnNodeExt.getType() = t2 and + apiContentFlow(_, p, read, returnNodeExt, store, _) + ) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. + * + * Step A -> Synth. + */ + private predicate synthPathEntry( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + not hasSyntheticContent(read) and + hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` has synthetic content + * and `store` does not. + * + * Step Synth -> A. + */ + private predicate synthPathExit( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + hasSyntheticContent(read) and + not hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Takes one or more synthetic steps. + * Synth ->+ Synth + */ + private predicate synthPathStepRec( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + hasSyntheticContent(read) and + hasSyntheticContent(store) and + ( + step(t1, read, t2, store) + or + exists(PropagateContentFlow::AccessPath mid, Type midType | + step(t1, read, midType, mid) and synthPathStepRec(midType, mid.reverse(), t2, store) + ) + ) + } + + /** + * Holds if there exists a path of steps from `read` to an exit. + * + * read ->* Synth -> A + */ + private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { + synthPathExit(t, read, _, _) + or + exists(PropagateContentFlow::AccessPath mid, Type midType | + synthPathStepRec(t, read, midType, mid) and synthPathExit(midType, mid.reverse(), _, _) + ) + } + + /** + * Holds if there exists a path of steps from an entry to `store`. + * + * A -> Synth ->* store + */ + private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { + synthPathEntry(_, _, t, store) + or + exists(PropagateContentFlow::AccessPath mid, Type midType | + synthPathEntry(_, _, midType, mid) and synthPathStepRec(midType, mid.reverse(), t, store) + ) + } + + /** + * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) + * contain content that will be translated into a synthetic field, when being used in + * a MaD summary model, and if there is a range of APIs, such that + * when chaining their flow access paths, there exists access paths `A` and `B` where + * A ->* read -> store ->* B and where `A` and `B` do not contain content that will + * be translated into a synthetic field. + * + * This is needed because we don't want to include summaries that reads from or + * stores into a "dead" synthetic field. + * + * Example: + * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and + * `setX`, which gets and sets a private field `X` on `t`. + * This would lead to the following content flows + * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. + * setX : Argument[0] -> Argument[this].SyntheticField[t.X] + * As the reads and stores are on synthetic fields we should only make summaries + * if both of these methods exist. + */ + pragma[nomagic] + predicate acceptReadStore( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse()) + or + exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read | + synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) + or + synthEntryReaches(t1, store0) and + step(t1, read, t2, store) and + reachesSynthExit(t2, store.reverse()) + ) + } +} + +/** + * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. + * Flow is considered relevant, + * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. + * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if + * the synthetic content is "live" on the relevant declaring type. + */ +private predicate apiRelevantContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath store, boolean preservesValue +) { + apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and + ( + not hasSyntheticContent(read) and not hasSyntheticContent(store) + or + AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store) + ) +} + +/** + * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to + * the return value or a parameter). + * + * Models are lifted to the best type in case the read and store access paths do not + * contain a field or synthetic field access. + */ +string captureContentFlow(ContentDataFlowSummaryTargetApi api) { exists( DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output, PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores, - boolean preservesValue + boolean preservesValue, boolean lift | - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and + apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and input != output and - result = Printing::asModel(api, input, output, preservesValue) + (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) and + result = Printing::asModel(api, input, output, preservesValue, lift) ) } diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll index 2be162d5f9b4..363b81293ddb 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll @@ -340,16 +340,35 @@ predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2 } /** - * Gets the MaD string representation of the contentset `c`. + * Holds if the content set `c` is a field or a synthetic field. + */ +predicate isField(ContentSet c) { + c instanceof DataFlowUtil::FieldContent or + c instanceof DataFlowUtil::SyntheticFieldContent +} + +/** + * Gets the MaD synthetic name string representation for the content set `c`, if any. + */ +string getSyntheticName(DataFlow::ContentSet c) { + exists(Field f | + not f.isPublic() and + f = c.(DataFlowUtil::FieldContent).getField() and + result = f.getQualifiedName() + ) + or + result = c.(DataFlowUtil::SyntheticFieldContent).getField() +} + +/** + * Gets the MaD string representation of the content set `c`. */ string printContent(ContentSet c) { - exists(Field f, string name | - f = c.(DataFlowUtil::FieldContent).getField() and name = f.getQualifiedName() - | - if f.isPublic() then result = "Field[" + name + "]" else result = "SyntheticField[" + name + "]" + exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() | + result = "Field[" + f.getQualifiedName() + "]" ) or - result = "SyntheticField[" + c.(DataFlowUtil::SyntheticFieldContent).getField() + "]" + result = "SyntheticField[" + getSyntheticName(c) + "]" or c instanceof DataFlowUtil::CollectionContent and result = "Element" or diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 34e53bad2d7f..3d56dff50726 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -329,7 +329,7 @@ class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi { output(this, tv, output) and input != output | - result = Printing::asValueModel(this, input, output) + result = Printing::asLiftedValueModel(this, input, output) ) } } diff --git a/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll b/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll index 2867d927984d..4f5fa59d5377 100644 --- a/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll +++ b/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll @@ -64,14 +64,23 @@ module ModelPrintingImpl { /** * Gets the summary model for `api` with `input`, `output` and `kind`. + * The model is lifted in case `lift` is true. */ bindingset[input, output, kind] - private string asSummaryModel(Printing::SummaryApi api, string input, string output, string kind) { - result = - asPartialModel(api.lift()) + input + ";" // - + output + ";" // - + kind + ";" // - + Printing::getProvenance() + private string asSummaryModel( + Printing::SummaryApi api, string input, string output, string kind, boolean lift + ) { + exists(Lang::Callable c | + lift = true and c = api.lift() + or + lift = false and c = api + | + result = + asPartialModel(c) + input + ";" // + + output + ";" // + + kind + ";" // + + Printing::getProvenance() + ) } string asNeutralSummaryModel(Printing::SummaryApi api) { @@ -82,31 +91,35 @@ module ModelPrintingImpl { } /** - * Gets the value summary model for `api` with `input` and `output`. + * Gets the lifted value summary model for `api` with `input` and `output`. */ bindingset[input, output] - string asValueModel(Printing::SummaryApi api, string input, string output) { - result = asSummaryModel(api, input, output, "value") + string asLiftedValueModel(Printing::SummaryApi api, string input, string output) { + result = asModel(api, input, output, true, true) } /** - * Gets the taint summary model for `api` with `input` and `output`. + * Gets the lifted taint summary model for `api` with `input` and `output`. */ bindingset[input, output] - string asTaintModel(Printing::SummaryApi api, string input, string output) { - result = asSummaryModel(api, input, output, "taint") + string asLiftedTaintModel(Printing::SummaryApi api, string input, string output) { + result = asModel(api, input, output, false, true) } /** * Gets the summary model for `api` with `input` and `output`. + * (1) If `preservesValue` is true a "value" model is created. + * (2) If `lift` is true the model is lifted to the best possible type. */ bindingset[input, output, preservesValue] - string asModel(Printing::SummaryApi api, string input, string output, boolean preservesValue) { + string asModel( + Printing::SummaryApi api, string input, string output, boolean preservesValue, boolean lift + ) { preservesValue = true and - result = asValueModel(api, input, output) + result = asSummaryModel(api, input, output, "value", lift) or preservesValue = false and - result = asTaintModel(api, input, output) + result = asSummaryModel(api, input, output, "taint", lift) } /** From d7e61d07d1be9f96ae6c5be04747637408cc25ba Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Thu, 5 Sep 2024 16:45:14 +0200 Subject: [PATCH 03/10] Java: Update some model generator test cases. --- .../dataflow/p/InnerHolder.java | 8 ++- .../modelgenerator/dataflow/p/Joiner.java | 16 +++-- .../dataflow/p/MultipleImpls.java | 10 ++- .../utils/modelgenerator/dataflow/p/Pojo.java | 63 ++++++++++++++++++- .../p/PrivateFlowViaPublicInterface.java | 10 ++- 5 files changed, 85 insertions(+), 22 deletions(-) diff --git a/java/ql/test/utils/modelgenerator/dataflow/p/InnerHolder.java b/java/ql/test/utils/modelgenerator/dataflow/p/InnerHolder.java index 5e8a050a428c..b44c397bb1a5 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/p/InnerHolder.java +++ b/java/ql/test/utils/modelgenerator/dataflow/p/InnerHolder.java @@ -37,8 +37,14 @@ public void append(String value) { } // summary=p;InnerHolder;false;getValue;();;Argument[this];ReturnValue;taint;df-generated - // contentbased-summary=p;InnerHolder;false;getValue;();;Argument[this].SyntheticField[p.InnerHolder.context].SyntheticField[p.InnerHolder$Context.value];ReturnValue;value;df-generated + // contentbased-summary=p;InnerHolder;false;getValue;();;Argument[this].SyntheticField[p.InnerHolder.sb];ReturnValue;taint;df-generated public String getValue() { + return sb.toString(); + } + + // summary=p;InnerHolder;false;getContextValue;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=p;InnerHolder;false;getContextValue;();;Argument[this].SyntheticField[p.InnerHolder.context].SyntheticField[p.InnerHolder$Context.value];ReturnValue;value;df-generated + public String getContextValue() { return context.getValue(); } } diff --git a/java/ql/test/utils/modelgenerator/dataflow/p/Joiner.java b/java/ql/test/utils/modelgenerator/dataflow/p/Joiner.java index cf7626eba619..5998af55f61e 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/p/Joiner.java +++ b/java/ql/test/utils/modelgenerator/dataflow/p/Joiner.java @@ -22,8 +22,7 @@ public Joiner(CharSequence delimiter) { // summary=p;Joiner;false;Joiner;(CharSequence,CharSequence,CharSequence);;Argument[1];Argument[this];taint;df-generated // summary=p;Joiner;false;Joiner;(CharSequence,CharSequence,CharSequence);;Argument[2];Argument[this];taint;df-generated // contentbased-summary=p;Joiner;false;Joiner;(CharSequence,CharSequence,CharSequence);;Argument[0];Argument[this].SyntheticField[p.Joiner.delimiter];taint;df-generated - // contentbased-summary=p;Joiner;false;Joiner;(CharSequence,CharSequence,CharSequence);;Argument[1];Argument[this].SyntheticField[p.Joiner.prefix];taint;df-generated - // contentbased-summary=p;Joiner;false;Joiner;(CharSequence,CharSequence,CharSequence);;Argument[2];Argument[this].SyntheticField[p.Joiner.suffix];taint;df-generated + // No content based summaries for prefix and suffix as they are "dead" synthetic fields. public Joiner(CharSequence delimiter, CharSequence prefix, CharSequence suffix) { Objects.requireNonNull(prefix, "The prefix must not be null"); Objects.requireNonNull(delimiter, "The delimiter must not be null"); @@ -36,8 +35,7 @@ public Joiner(CharSequence delimiter, CharSequence prefix, CharSequence suffix) // summary=p;Joiner;false;setEmptyValue;(CharSequence);;Argument[0];Argument[this];taint;df-generated // summary=p;Joiner;false;setEmptyValue;(CharSequence);;Argument[this];ReturnValue;value;df-generated - // contentbased-summary=p;Joiner;false;setEmptyValue;(CharSequence);;Argument[0];Argument[this].SyntheticField[p.Joiner.emptyValue];taint;df-generated - // contentbased-summary=p;Joiner;false;setEmptyValue;(CharSequence);;Argument[0];ReturnValue.SyntheticField[p.Joiner.emptyValue];taint;df-generated + // No content based summary as emptyValue is "dead" (synthetic)field. // contentbased-summary=p;Joiner;false;setEmptyValue;(CharSequence);;Argument[this];ReturnValue;value;df-generated public Joiner setEmptyValue(CharSequence emptyValue) { this.emptyValue = @@ -45,6 +43,12 @@ public Joiner setEmptyValue(CharSequence emptyValue) { return this; } + // summary=p;Joiner;false;getDelimiter;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=p;Joiner;false;getDelimiter;();;Argument[this].SyntheticField[p.Joiner.delimiter];ReturnValue;value;df-generated + public String getDelimiter() { + return delimiter; + } + private static int getChars(String s, char[] chars, int start) { int len = s.length(); s.getChars(0, len, chars, start); @@ -78,8 +82,8 @@ public String toString() { } // summary=p;Joiner;false;add;(CharSequence);;Argument[this];ReturnValue;value;df-generated - // contentbased-summary=p;Joiner;false;add;(CharSequence);;Argument[this].SyntheticField[p.Joiner.elts].ArrayElement;ReturnValue.SyntheticField[p.Joiner.elts].ArrayElement;value;df-generated // contentbased-summary=p;Joiner;false;add;(CharSequence);;Argument[this];ReturnValue;value;df-generated + // MISSING content based summaries for "elts". This could be a synthetic field. public Joiner add(CharSequence newElement) { final String elt = String.valueOf(newElement); if (elts == null) { @@ -103,8 +107,8 @@ private int checkAddLength(int oldLen, int inc) { } // summary=p;Joiner;false;merge;(Joiner);;Argument[this];ReturnValue;value;df-generated - // contentbased-summary=p;Joiner;false;merge;(Joiner);;Argument[this].SyntheticField[p.Joiner.elts].ArrayElement;ReturnValue.SyntheticField[p.Joiner.elts].ArrayElement;value;df-generated // contentbased-summary=p;Joiner;false;merge;(Joiner);;Argument[this];ReturnValue;value;df-generated + // MISSING content based summaries for "elts". This could be a synthetic field. public Joiner merge(Joiner other) { Objects.requireNonNull(other); if (other.elts == null) { diff --git a/java/ql/test/utils/modelgenerator/dataflow/p/MultipleImpls.java b/java/ql/test/utils/modelgenerator/dataflow/p/MultipleImpls.java index 164f55ae7327..e854327d0325 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/p/MultipleImpls.java +++ b/java/ql/test/utils/modelgenerator/dataflow/p/MultipleImpls.java @@ -30,18 +30,16 @@ public static class Strat2 implements Strategy { private String foo; // summary=p;MultipleImpls$Strategy;true;doSomething;(String);;Argument[0];Argument[this];taint;df-generated - // A field based model should not be lifted if the field pertains to the concrete - // implementation. - // SPURIOUS-contentbased-summary=p;MultipleImpls$Strategy;true;doSomething;(String);;Argument[0];Argument[this].SyntheticField[p.MultipleImpls$Strat2.foo];value;df-generated + // The content based summary is not lifted as it pertains to a (synthetic)field. + // contentbased-summary=p;MultipleImpls$Strat2;true;doSomething;(String);;Argument[0];Argument[this].SyntheticField[p.MultipleImpls$Strat2.foo];value;df-generated public String doSomething(String value) { this.foo = value; return "none"; } // summary=p;MultipleImpls$Strat2;true;getValue;();;Argument[this];ReturnValue;taint;df-generated - // A field based model should not be lifted if the field pertains to the concrete - // implementation. - // SPURIOUS-contentbased-summary=p;MultipleImpls$Strat2;true;getValue;();;Argument[this].SyntheticField[p.MultipleImpls$Strat2.foo];ReturnValue;value;df-generated + // The content based summary is not lifted as it pertains to a (synthetic)field. + // contentbased-summary=p;MultipleImpls$Strat2;true;getValue;();;Argument[this].SyntheticField[p.MultipleImpls$Strat2.foo];ReturnValue;value;df-generated public String getValue() { return this.foo; } diff --git a/java/ql/test/utils/modelgenerator/dataflow/p/Pojo.java b/java/ql/test/utils/modelgenerator/dataflow/p/Pojo.java index e6fb581aac2b..3e83c7150c1e 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/p/Pojo.java +++ b/java/ql/test/utils/modelgenerator/dataflow/p/Pojo.java @@ -26,9 +26,20 @@ int length() { public byte[] byteArray = new byte[] {1, 2, 3}; private float[] floatArray = new float[] {1, 2, 3}; - private char[] charArray = new char[] {'a', 'b', 'c'}; private List charList = Arrays.asList('a', 'b', 'c'); - private Byte[] byteObjectArray = new Byte[] {1, 2, 3}; + private char[] charArray; + private Byte[] byteObjectArray; + private String stringValue1; + private String stringValue2; + + // summary=p;Pojo;false;Pojo;(Byte[],char[]);;Argument[0];Argument[this];taint;df-generated + // summary=p;Pojo;false;Pojo;(Byte[],char[]);;Argument[1];Argument[this];taint;df-generated + // contentbased-summary=p;Pojo;false;Pojo;(Byte[],char[]);;Argument[0];Argument[this].SyntheticField[p.Pojo.byteObjectArray];value;df-generated + // contentbased-summary=p;Pojo;false;Pojo;(Byte[],char[]);;Argument[1];Argument[this].SyntheticField[p.Pojo.charArray];value;df-generated + public Pojo(Byte[] byteObjectArray, char[] charArray) { + this.byteObjectArray = byteObjectArray; + this.charArray = charArray; + } // summary=p;Pojo;false;getValue;();;Argument[this];ReturnValue;taint;df-generated // contentbased-summary=p;Pojo;false;getValue;();;Argument[this].SyntheticField[p.Pojo.value];ReturnValue;value;df-generated @@ -75,6 +86,12 @@ public byte[] getByteArray() { return byteArray; } + // summary=p;Pojo;false;setByteArray;(byte[]);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=p;Pojo;false;setByteArray;(byte[]);;Argument[0];Argument[this].Field[p.Pojo.byteArray];value;df-generated + public void setByteArray(byte[] value) { + byteArray = value; + } + // neutral=p;Pojo;getFloatArray;();summary;df-generated public float[] getFloatArray() { return floatArray; @@ -91,7 +108,7 @@ public Collection getBoxedCollection() { } // summary=p;Pojo;false;getBoxedChars;();;Argument[this];ReturnValue;taint;df-generated - // contentbased-summary=p;Pojo;false;getBoxedChars;();;Argument[this].SyntheticField[p.Pojo.charList];ReturnValue;value;df-generated + // No content based summary as charList is a "dead" (synthetic)field. public List getBoxedChars() { return charList; } @@ -117,4 +134,44 @@ public BigDecimal getBigDecimal() { public void fillIn(List target) { target.add(value); } + + // summary=p;Pojo;false;setStringValue1;(String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=p;Pojo;false;setStringValue1;(String);;Argument[0];Argument[this].SyntheticField[p.Pojo.stringValue1];value;df-generated + public void setStringValue1(String value) { + this.stringValue1 = value; + } + + // neutral=p;Pojo;copyStringValue;();summary;df-generated + // contentbased-summary=p;Pojo;false;copyStringValue;();;Argument[this].SyntheticField[p.Pojo.stringValue1];Argument[this].SyntheticField[p.Pojo.stringValue2];value;df-generated + public void copyStringValue() { + this.stringValue2 = this.stringValue1; + } + + // summary=p;Pojo;false;getStringValue2;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=p;Pojo;false;getStringValue2;();;Argument[this].SyntheticField[p.Pojo.stringValue2];ReturnValue;value;df-generated + public String getStringValue2() { + return this.stringValue2; + } + + public class InnerPojo { + private String value; + + // summary=p;Pojo$InnerPojo;true;InnerPojo;(String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=p;Pojo$InnerPojo;true;InnerPojo;(String);;Argument[0];Argument[this].SyntheticField[p.Pojo$InnerPojo.value];value;df-generated + public InnerPojo(String value) { + this.value = value; + } + + // summary=p;Pojo$InnerPojo;true;getValue;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=p;Pojo$InnerPojo;true;getValue;();;Argument[this].SyntheticField[p.Pojo$InnerPojo.value];ReturnValue;value;df-generated + public String getValue() { + return value; + } + } + + // summary=p;Pojo;false;makeInnerPojo;(String);;Argument[0];ReturnValue;taint;df-generated + // contentbased-summary=p;Pojo;false;makeInnerPojo;(String);;Argument[0];ReturnValue.SyntheticField[p.Pojo$InnerPojo.value];value;df-generated + public InnerPojo makeInnerPojo(String value) { + return new InnerPojo(value); + } } diff --git a/java/ql/test/utils/modelgenerator/dataflow/p/PrivateFlowViaPublicInterface.java b/java/ql/test/utils/modelgenerator/dataflow/p/PrivateFlowViaPublicInterface.java index b5a1de27d4a9..8b9397fdb3ca 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/p/PrivateFlowViaPublicInterface.java +++ b/java/ql/test/utils/modelgenerator/dataflow/p/PrivateFlowViaPublicInterface.java @@ -29,9 +29,9 @@ public PrivateImplWithSink(File file) { } // summary=p;PrivateFlowViaPublicInterface$SPI;true;openStream;();;Argument[this];ReturnValue;taint;df-generated - // A field based model should not be lifted if the field pertains to the concrete - // implementation. - // SPURIOUS-contentbased-summary=p;PrivateFlowViaPublicInterface$SPI;true;openStream;();;Argument[this].SyntheticField[p.PrivateFlowViaPublicInterface$PrivateImplWithSink.file];ReturnValue;taint;df-generated + // This summary shouldn't be created because the method is private. + // This is most likely because the lifting logic hasn't been properly adapted. + // SPURIOUS-contentbased-summary=p;PrivateFlowViaPublicInterface$PrivateImplWithSink;false;openStream;();;Argument[this].SyntheticField[p.PrivateFlowViaPublicInterface$PrivateImplWithSink.file];ReturnValue;taint;df-generated @Override public OutputStream openStream() throws IOException { return new FileOutputStream(file); @@ -54,9 +54,7 @@ public OutputStream openStreamNone() throws IOException { } // summary=p;PrivateFlowViaPublicInterface;true;createAnSPI;(File);;Argument[0];ReturnValue;taint;df-generated - // A field based model should not be lifted if the field pertains to the concrete - // implementation. - // SPURIOUS-contentbased-summary=p;PrivateFlowViaPublicInterface;true;createAnSPI;(File);;Argument[0];ReturnValue.SyntheticField[p.PrivateFlowViaPublicInterface$PrivateImplWithSink.file];value;df-generated + // contentbased-summary=p;PrivateFlowViaPublicInterface;true;createAnSPI;(File);;Argument[0];ReturnValue.SyntheticField[p.PrivateFlowViaPublicInterface$PrivateImplWithSink.file];value;df-generated public static SPI createAnSPI(File file) { return new PrivateImplWithSink(file); } From 9149a17d7937e9094c1ac03b0ad7f76dbb87b355 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Mon, 9 Sep 2024 11:26:13 +0200 Subject: [PATCH 04/10] Java: Only keep the best generated model in terms of taint/value. --- .../modelgenerator/internal/CaptureModels.qll | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 9373382941a8..359a01dde4d0 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -525,6 +525,23 @@ private predicate apiRelevantContentFlow( ) } +pragma[nomagic] +private predicate captureContentFlow0( + ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, + boolean lift +) { + exists( + DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads, + PropagateContentFlow::AccessPath stores + | + apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and + input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and + input != output and + (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) + ) +} + /** * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to * the return value or a parameter). @@ -533,16 +550,9 @@ private predicate apiRelevantContentFlow( * contain a field or synthetic field access. */ string captureContentFlow(ContentDataFlowSummaryTargetApi api) { - exists( - DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output, - PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores, - boolean preservesValue, boolean lift - | - apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and - input != output and - (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) and + exists(string input, string output, boolean lift, boolean preservesValue | + captureContentFlow0(api, input, output, _, lift) and + preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and result = Printing::asModel(api, input, output, preservesValue, lift) ) } From 0fbeca14ada61483465b0b3528485bc816a4a929 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Mon, 9 Sep 2024 10:31:44 +0200 Subject: [PATCH 05/10] Java: Add content based example with multiple paths. --- .../utils/modelgenerator/dataflow/p/MultiPaths.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 java/ql/test/utils/modelgenerator/dataflow/p/MultiPaths.java diff --git a/java/ql/test/utils/modelgenerator/dataflow/p/MultiPaths.java b/java/ql/test/utils/modelgenerator/dataflow/p/MultiPaths.java new file mode 100644 index 000000000000..d36d99c20c20 --- /dev/null +++ b/java/ql/test/utils/modelgenerator/dataflow/p/MultiPaths.java @@ -0,0 +1,13 @@ +package p; + +public class MultiPaths { + + // summary=p;MultiPaths;true;cond;(String,String);;Argument[0];ReturnValue;taint;df-generated + // contentbased-summary=p;MultiPaths;true;cond;(String,String);;Argument[0];ReturnValue;value;df-generated + public String cond(String x, String other) { + if (x == other) { + return x.substring(0, 100); + } + return x; + } +} From e94890280a0191e4cb657f42b97e0722cd5ba8ce Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Tue, 3 Sep 2024 09:50:26 +0200 Subject: [PATCH 06/10] C#: Sync changes and make language specific parts. --- .../dataflow/internal/DataFlowPublic.qll | 5 +- .../modelgenerator/internal/CaptureModels.qll | 261 +++++++++++++++++- .../internal/CaptureModelsSpecific.qll | 38 ++- .../CaptureTypeBasedSummaryModels.qll | 2 +- 4 files changed, 288 insertions(+), 18 deletions(-) diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll index df876aae455e..034a50269dd7 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll @@ -293,9 +293,12 @@ class ContentSet extends TContentSet { */ predicate isProperty(Property p) { this = TPropertyContentSet(p) } - /** Holds if this content set represent the field `f`. */ + /** Holds if this content set represents the field `f`. */ predicate isField(Field f) { this.isSingleton(TFieldContent(f)) } + /** Holds if this content set represents the synthetic field `s`. */ + predicate isSyntheticField(string s) { this.isSingleton(TSyntheticFieldContent(s)) } + /** Holds if this content set represents an element in a collection. */ predicate isElement() { this.isSingleton(TElementContent()) } diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 0f24bab005eb..359a01dde4d0 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) { api = returnNodeEnclosingCallable(ret) and isOwnInstanceAccessNode(ret) ) and - result = Printing::asValueModel(api, qualifierString(), "ReturnValue") + result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue") } private int accessPathLimit0() { result = 2 } @@ -237,7 +237,7 @@ string captureThroughFlow0( input = parameterNodeAsInput(p) and output = getOutput(returnNodeExt) and input != output and - result = Printing::asTaintModel(api, input, output) + result = Printing::asLiftedTaintModel(api, input, output) ) } @@ -291,26 +291,269 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) { ) } +/** + * Gets the MaD string representation of a store step access path. + */ private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { result = concat(int i | | getContent(ap, i), "" order by i) } +/** + * Gets the MaD string representation of a read step access path. + */ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { result = concat(int i | | getContent(ap, i), "" order by i desc) } -string captureContentFlow(DataFlowSummaryTargetApi api) { +/** + * Holds if the access path `ap` contains a field or synthetic field access. + */ +private predicate mentionsField(PropagateContentFlow::AccessPath ap) { + exists(ContentSet head, PropagateContentFlow::AccessPath tail | + head = ap.getHead() and + tail = ap.getTail() and + (mentionsField(tail) or isField(head)) + ) +} + +private predicate apiFlow( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, + ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue +) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + returnNodeExt.getEnclosingCallable() = api and + p.getEnclosingCallable() = api +} + +/** + * A class of APIs relevant for modeling using content flow. + * The following heuristic is applied: + * Content flow is only relevant for an API, if + * #content flow <= 2 * #parameters + 3 + * If an API produces more content flow, it is likely that + * 1. Types are not sufficiently constrained leading to a combinatorial + * explosion in dispatch and thus in the generated summaries. + * 2. It is a reasonable approximation to use the non-content based flow + * detection instead, as reads and stores would use a significant + * part of an objects internal state. + */ +private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { + ContentDataFlowSummaryTargetApi() { + count(string input, string output | + exists( + DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, + ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores + | + apiFlow(this, p, reads, returnNodeExt, stores, _) and + input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) + ) + ) <= 2 * this.getNumberOfParameters() + 3 + } +} + +pragma[nomagic] +private predicate apiContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue +) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + returnNodeExt.getEnclosingCallable() = api and + p.getEnclosingCallable() = api +} + +/** + * Holds if any of the content sets in `path` translates into a synthetic field. + */ +private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { + exists(PropagateContentFlow::AccessPath tail, ContentSet head | + head = path.getHead() and + tail = path.getTail() and + ( + exists(getSyntheticName(head)) or + hasSyntheticContent(tail) + ) + ) +} + +/** + * A module containing predicates for validating access paths containing content sets + * that translates into synthetic fields, when used for generated summary models. + */ +private module AccessPathSyntheticValidation { + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`). + */ + private predicate step( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + p.getType() = t1 and + returnNodeExt.getType() = t2 and + apiContentFlow(_, p, read, returnNodeExt, store, _) + ) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. + * + * Step A -> Synth. + */ + private predicate synthPathEntry( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + not hasSyntheticContent(read) and + hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` has synthetic content + * and `store` does not. + * + * Step Synth -> A. + */ + private predicate synthPathExit( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + hasSyntheticContent(read) and + not hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Takes one or more synthetic steps. + * Synth ->+ Synth + */ + private predicate synthPathStepRec( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + hasSyntheticContent(read) and + hasSyntheticContent(store) and + ( + step(t1, read, t2, store) + or + exists(PropagateContentFlow::AccessPath mid, Type midType | + step(t1, read, midType, mid) and synthPathStepRec(midType, mid.reverse(), t2, store) + ) + ) + } + + /** + * Holds if there exists a path of steps from `read` to an exit. + * + * read ->* Synth -> A + */ + private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { + synthPathExit(t, read, _, _) + or + exists(PropagateContentFlow::AccessPath mid, Type midType | + synthPathStepRec(t, read, midType, mid) and synthPathExit(midType, mid.reverse(), _, _) + ) + } + + /** + * Holds if there exists a path of steps from an entry to `store`. + * + * A -> Synth ->* store + */ + private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { + synthPathEntry(_, _, t, store) + or + exists(PropagateContentFlow::AccessPath mid, Type midType | + synthPathEntry(_, _, midType, mid) and synthPathStepRec(midType, mid.reverse(), t, store) + ) + } + + /** + * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) + * contain content that will be translated into a synthetic field, when being used in + * a MaD summary model, and if there is a range of APIs, such that + * when chaining their flow access paths, there exists access paths `A` and `B` where + * A ->* read -> store ->* B and where `A` and `B` do not contain content that will + * be translated into a synthetic field. + * + * This is needed because we don't want to include summaries that reads from or + * stores into a "dead" synthetic field. + * + * Example: + * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and + * `setX`, which gets and sets a private field `X` on `t`. + * This would lead to the following content flows + * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. + * setX : Argument[0] -> Argument[this].SyntheticField[t.X] + * As the reads and stores are on synthetic fields we should only make summaries + * if both of these methods exist. + */ + pragma[nomagic] + predicate acceptReadStore( + Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store + ) { + synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse()) + or + exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read | + synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) + or + synthEntryReaches(t1, store0) and + step(t1, read, t2, store) and + reachesSynthExit(t2, store.reverse()) + ) + } +} + +/** + * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. + * Flow is considered relevant, + * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. + * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if + * the synthetic content is "live" on the relevant declaring type. + */ +private predicate apiRelevantContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath store, boolean preservesValue +) { + apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and + ( + not hasSyntheticContent(read) and not hasSyntheticContent(store) + or + AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store) + ) +} + +pragma[nomagic] +private predicate captureContentFlow0( + ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, + boolean lift +) { exists( - DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, string input, string output, - PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores, - boolean preservesValue + DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads, + PropagateContentFlow::AccessPath stores | - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and + apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and input != output and - result = Printing::asModel(api, input, output, preservesValue) + (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) + ) +} + +/** + * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to + * the return value or a parameter). + * + * Models are lifted to the best type in case the read and store access paths do not + * contain a field or synthetic field access. + */ +string captureContentFlow(ContentDataFlowSummaryTargetApi api) { + exists(string input, string output, boolean lift, boolean preservesValue | + captureContentFlow0(api, input, output, _, lift) and + preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and + result = Printing::asModel(api, input, output, preservesValue, lift) ) } diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll index 2f2a1ef2761b..8ac07342d559 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll @@ -390,23 +390,47 @@ private string getFullyQualifiedName(Declaration d) { } /** - * Gets the MaD string representation of the contentset `c`. + * Holds if the content set `c` is a field, property or synthetic field. + */ +predicate isField(ContentSet c) { c.isField(_) or c.isSyntheticField(_) or c.isProperty(_) } + +/** + * Gets the MaD synthetic name string representation for the content set `c`, if any. + */ +string getSyntheticName(DataFlow::ContentSet c) { + exists(CS::Field f | + not f.isEffectivelyPublic() and + c.isField(f) and + result = getFullyQualifiedName(f) + ) + or + exists(CS::Property p | + not p.isEffectivelyPublic() and + c.isProperty(p) and + result = getFullyQualifiedName(p) + ) + or + c.isSyntheticField(result) +} + +/** + * Gets the MaD string representation of the content set `c`. */ string printContent(DataFlow::ContentSet c) { exists(CS::Field f, string name | name = getFullyQualifiedName(f) | c.isField(f) and - if f.isEffectivelyPublic() - then result = "Field[" + name + "]" - else result = "SyntheticField[" + name + "]" + f.isEffectivelyPublic() and + result = "Field[" + name + "]" ) or exists(CS::Property p, string name | name = getFullyQualifiedName(p) | c.isProperty(p) and - if p.isEffectivelyPublic() - then result = "Property[" + name + "]" - else result = "SyntheticField[" + name + "]" + p.isEffectivelyPublic() and + result = "Property[" + name + "]" ) or + result = "SyntheticField[" + getSyntheticName(c) + "]" + or c.isElement() and result = "Element" } diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 8efdc6100cda..1a0a3d2ca420 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -223,7 +223,7 @@ class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi { output(this, tp, output) and input != output | - result = Printing::asValueModel(this, input, output) + result = Printing::asLiftedValueModel(this, input, output) ) } } From da012a7a44b2b5f03af45d2a876c472e19f6b997 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Mon, 9 Sep 2024 12:24:28 +0200 Subject: [PATCH 07/10] C#: Add the capture content summary models query. --- .../modelgenerator/CaptureContentSummaryModels.ql | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql diff --git a/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql new file mode 100644 index 000000000000..5a653867572e --- /dev/null +++ b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -0,0 +1,13 @@ +/** + * @name Capture content based summary models. + * @description Finds applicable content based summary models to be used by other queries. + * @kind diagnostic + * @id cs/utils/modelgenerator/contentbased-summary-models + * @tags modelgenerator + */ + +import internal.CaptureModels + +from DataFlowSummaryTargetApi api, string flow +where flow = captureContentFlow(api) +select flow order by flow From b94940b6d9a85e32c98cc0150f96594354b0cde1 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Mon, 9 Sep 2024 13:11:13 +0200 Subject: [PATCH 08/10] C#: Adjust existing model generator tests and update expected output. --- .../modelgenerator/dataflow/Summaries.cs | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs b/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs index 994eaf7c3788..281b75b0168c 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs +++ b/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs @@ -66,7 +66,14 @@ public string ReturnField() public class CollectionFlow { - private string tainted; + private readonly string tainted; + + // summary=Models;CollectionFlow;false;CollectionFlow;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;CollectionFlow;false;CollectionFlow;(System.String);;Argument[0];Argument[this].SyntheticField[Models.CollectionFlow.tainted];value;df-generated + public CollectionFlow(string s) + { + tainted = s; + } // summary=Models;CollectionFlow;false;ReturnArrayElement;(System.Object[]);;Argument[0].Element;ReturnValue;taint;df-generated // contentbased-summary=Models;CollectionFlow;false;ReturnArrayElement;(System.Object[]);;Argument[0].Element;ReturnValue;value;df-generated @@ -177,7 +184,14 @@ public Dictionary ReturnSimpleTypeDictionary(Dictionary a) public class IEnumerableFlow { - private string tainted; + private readonly string tainted; + + // summary=Models;IEnumerableFlow;false;IEnumerableFlow;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;IEnumerableFlow;false;IEnumerableFlow;(System.String);;Argument[0];Argument[this].SyntheticField[Models.IEnumerableFlow.tainted];value;df-generated + public IEnumerableFlow(string s) + { + tainted = s; + } // SPURIOUS-summary=Models;IEnumerableFlow;false;ReturnIEnumerable;(System.Collections.Generic.IEnumerable);;Argument[0].Element;ReturnValue;taint;df-generated // contentbased-summary=Models;IEnumerableFlow;false;ReturnIEnumerable;(System.Collections.Generic.IEnumerable);;Argument[0];ReturnValue;value;df-generated @@ -611,10 +625,17 @@ public abstract class D : IPublic3 public class DImpl : D { - private string tainted; + private readonly string tainted; + + // summary=Models;Inheritance+DImpl;false;DImpl;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;Inheritance+DImpl;false;DImpl;(System.String);;Argument[0];Argument[this].SyntheticField[Models.Inheritance+DImpl.tainted];value;df-generated + public DImpl(string s) + { + tainted = s; + } // summary=Models;Inheritance+IPublic3;true;get_Prop;();;Argument[this];ReturnValue;taint;df-generated - // contentbased-summary=Models;Inheritance+IPublic3;true;get_Prop;();;Argument[this].SyntheticField[Models.Inheritance+DImpl.tainted];ReturnValue;value;df-generated + // contentbased-summary=Models;Inheritance+DImpl;true;get_Prop;();;Argument[this].SyntheticField[Models.Inheritance+DImpl.tainted];ReturnValue;value;df-generated public override string Prop { get { return tainted; } } } } From 0abc08c7731f73125a5ace385c45534101931398 Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Mon, 9 Sep 2024 14:05:29 +0200 Subject: [PATCH 09/10] C#: Add some synthetic field content based examples. --- .../modelgenerator/dataflow/Summaries.cs | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs b/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs index 281b75b0168c..5a110fc02aac 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs +++ b/csharp/ql/test/utils/modelgenerator/dataflow/Summaries.cs @@ -701,3 +701,140 @@ public NestedFieldFlow ReverseFields() return new NestedFieldFlow() { FieldA = x }; } } + +public class SyntheticFields +{ + private string value1; + private string value2; + private string value3; + + private string chainBegin; + private string chainEnd; + + private string brokenChainBegin; + private string brokenChainEnd; + + // summary=Models;SyntheticFields;false;SyntheticFields;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;SyntheticFields;false;SyntheticFields;(System.String);;Argument[0];Argument[this].SyntheticField[Models.SyntheticFields.value1];value;df-generated + public SyntheticFields(string v1) + { + value1 = v1; + } + + // summary=Models;SyntheticFields;false;GetValue1;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=Models;SyntheticFields;false;GetValue1;();;Argument[this].SyntheticField[Models.SyntheticFields.value1];ReturnValue;value;df-generated + public string GetValue1() + { + return value1; + } + + // summary=Models;SyntheticFields;false;GetValue2;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=Models;SyntheticFields;false;GetValue2;();;Argument[this].SyntheticField[Models.SyntheticFields.value2];ReturnValue;value;df-generated + public string GetValue2() + { + return value2; + } + + // summary=Models;SyntheticFields;false;SetValue2;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;SyntheticFields;false;SetValue2;(System.String);;Argument[0];Argument[this].SyntheticField[Models.SyntheticFields.value2];value;df-generated + public void SetValue2(string v2) + { + value2 = v2; + } + + // summary=Models;SyntheticFields;false;SetValue3;(System.String);;Argument[0];Argument[this];taint;df-generated + // No content based summary as value3 is a dead synthetic field. + public void SetValue3(string v3) + { + value3 = v3; + } + + // summary=Models;SyntheticFields;false;SetChainBegin;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;SyntheticFields;false;SetChainBegin;(System.String);;Argument[0];Argument[this].SyntheticField[Models.SyntheticFields.chainBegin];value;df-generated + public void SetChainBegin(string v) + { + chainBegin = v; + } + + // neutral=Models;SyntheticFields;CopyChainValue;();summary;df-generated + // contentbased-summary=Models;SyntheticFields;false;CopyChainValue;();;Argument[this].SyntheticField[Models.SyntheticFields.chainBegin];Argument[this].SyntheticField[Models.SyntheticFields.chainEnd];value;df-generated + public void CopyChainValue() + { + chainEnd = chainBegin; + } + + // summary=Models;SyntheticFields;false;GetChainEnd;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=Models;SyntheticFields;false;GetChainEnd;();;Argument[this].SyntheticField[Models.SyntheticFields.chainEnd];ReturnValue;value;df-generated + public string GetChainEnd() + { + return chainEnd; + } + + // summary=Models;SyntheticFields;false;SetBrokenChainBegin;(System.String);;Argument[0];Argument[this];taint;df-generated + // No content based summary as brokenChainBegin is a dead synthetic field. + public void SetBrokenChainBegin(string v) + { + brokenChainBegin = v; + } + + // summary=Models;SyntheticFields;false;GetBrokenChainEnd;();;Argument[this];ReturnValue;taint;df-generated + // No content based summary as brokenChainEnd is a dead synthetic field. + public string GetBrokenChainEnd() + { + return brokenChainEnd; + } + + public class InnerSyntheticFields + { + private readonly string value; + + // summary=Models;SyntheticFields+InnerSyntheticFields;false;InnerSyntheticFields;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;SyntheticFields+InnerSyntheticFields;false;InnerSyntheticFields;(System.String);;Argument[0];Argument[this].SyntheticField[Models.SyntheticFields+InnerSyntheticFields.value];value;df-generated + public InnerSyntheticFields(string v) + { + value = v; + } + + // summary=Models;SyntheticFields+InnerSyntheticFields;false;GetValue;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=Models;SyntheticFields+InnerSyntheticFields;false;GetValue;();;Argument[this].SyntheticField[Models.SyntheticFields+InnerSyntheticFields.value];ReturnValue;value;df-generated + public string GetValue() + { + return value; + } + } + + // summary=Models;SyntheticFields;false;MakeInner;(System.String);;Argument[0];ReturnValue;taint;df-generated + // contentbased-summary=Models;SyntheticFields;false;MakeInner;(System.String);;Argument[0];ReturnValue.SyntheticField[Models.SyntheticFields+InnerSyntheticFields.value];value;df-generated + public InnerSyntheticFields MakeInner(string v) + { + return new InnerSyntheticFields(v); + } +} + +public class SyntheticProperties +{ + private string Prop1 { get; set; } + + private string Prop2 { get; set; } + + // summary=Models;SyntheticProperties;false;SyntheticProperties;(System.String);;Argument[0];Argument[this];taint;df-generated + // contentbased-summary=Models;SyntheticProperties;false;SyntheticProperties;(System.String);;Argument[0];Argument[this].SyntheticField[Models.SyntheticProperties.Prop1];value;df-generated + public SyntheticProperties(string v1) + { + Prop1 = v1; + } + + // summary=Models;SyntheticProperties;false;GetProp1;();;Argument[this];ReturnValue;taint;df-generated + // contentbased-summary=Models;SyntheticProperties;false;GetProp1;();;Argument[this].SyntheticField[Models.SyntheticProperties.Prop1];ReturnValue;value;df-generated + public string GetProp1() + { + return Prop1; + } + + // summary=Models;SyntheticProperties;false;SetProp2;(System.String);;Argument[0];Argument[this];taint;df-generated + // No content based summary as Prop2 is a dead synthetic field. + public void SetProp2(string v) + { + Prop2 = v; + } +} From 68165bbce40a3677db8b23eb118641dce0532daa Mon Sep 17 00:00:00 2001 From: Michael Nebel Date: Tue, 17 Sep 2024 16:08:37 +0200 Subject: [PATCH 10/10] C#/Java: Address review comments. --- .../modelgenerator/internal/CaptureModels.qll | 42 +++++++------------ .../modelgenerator/internal/CaptureModels.qll | 42 +++++++------------ 2 files changed, 30 insertions(+), 54 deletions(-) diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 359a01dde4d0..ab5de0d01979 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -311,8 +311,9 @@ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { private predicate mentionsField(PropagateContentFlow::AccessPath ap) { exists(ContentSet head, PropagateContentFlow::AccessPath tail | head = ap.getHead() and - tail = ap.getTail() and - (mentionsField(tail) or isField(head)) + tail = ap.getTail() + | + mentionsField(tail) or isField(head) ) } @@ -369,11 +370,10 @@ private predicate apiContentFlow( private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { exists(PropagateContentFlow::AccessPath tail, ContentSet head | head = path.getHead() and - tail = path.getTail() and - ( - exists(getSyntheticName(head)) or - hasSyntheticContent(tail) - ) + tail = path.getTail() + | + exists(getSyntheticName(head)) or + hasSyntheticContent(tail) ) } @@ -425,24 +425,6 @@ private module AccessPathSyntheticValidation { step(t1, read, t2, store) } - /** - * Takes one or more synthetic steps. - * Synth ->+ Synth - */ - private predicate synthPathStepRec( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - hasSyntheticContent(read) and - hasSyntheticContent(store) and - ( - step(t1, read, t2, store) - or - exists(PropagateContentFlow::AccessPath mid, Type midType | - step(t1, read, midType, mid) and synthPathStepRec(midType, mid.reverse(), t2, store) - ) - ) - } - /** * Holds if there exists a path of steps from `read` to an exit. * @@ -451,8 +433,11 @@ private module AccessPathSyntheticValidation { private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { synthPathExit(t, read, _, _) or + hasSyntheticContent(read) and exists(PropagateContentFlow::AccessPath mid, Type midType | - synthPathStepRec(t, read, midType, mid) and synthPathExit(midType, mid.reverse(), _, _) + hasSyntheticContent(mid) and + step(t, read, midType, mid) and + reachesSynthExit(midType, mid.reverse()) ) } @@ -464,8 +449,11 @@ private module AccessPathSyntheticValidation { private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { synthPathEntry(_, _, t, store) or + hasSyntheticContent(store) and exists(PropagateContentFlow::AccessPath mid, Type midType | - synthPathEntry(_, _, midType, mid) and synthPathStepRec(midType, mid.reverse(), t, store) + hasSyntheticContent(mid) and + step(midType, mid, t, store) and + synthEntryReaches(midType, mid.reverse()) ) } diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 359a01dde4d0..ab5de0d01979 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -311,8 +311,9 @@ private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { private predicate mentionsField(PropagateContentFlow::AccessPath ap) { exists(ContentSet head, PropagateContentFlow::AccessPath tail | head = ap.getHead() and - tail = ap.getTail() and - (mentionsField(tail) or isField(head)) + tail = ap.getTail() + | + mentionsField(tail) or isField(head) ) } @@ -369,11 +370,10 @@ private predicate apiContentFlow( private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { exists(PropagateContentFlow::AccessPath tail, ContentSet head | head = path.getHead() and - tail = path.getTail() and - ( - exists(getSyntheticName(head)) or - hasSyntheticContent(tail) - ) + tail = path.getTail() + | + exists(getSyntheticName(head)) or + hasSyntheticContent(tail) ) } @@ -425,24 +425,6 @@ private module AccessPathSyntheticValidation { step(t1, read, t2, store) } - /** - * Takes one or more synthetic steps. - * Synth ->+ Synth - */ - private predicate synthPathStepRec( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - hasSyntheticContent(read) and - hasSyntheticContent(store) and - ( - step(t1, read, t2, store) - or - exists(PropagateContentFlow::AccessPath mid, Type midType | - step(t1, read, midType, mid) and synthPathStepRec(midType, mid.reverse(), t2, store) - ) - ) - } - /** * Holds if there exists a path of steps from `read` to an exit. * @@ -451,8 +433,11 @@ private module AccessPathSyntheticValidation { private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { synthPathExit(t, read, _, _) or + hasSyntheticContent(read) and exists(PropagateContentFlow::AccessPath mid, Type midType | - synthPathStepRec(t, read, midType, mid) and synthPathExit(midType, mid.reverse(), _, _) + hasSyntheticContent(mid) and + step(t, read, midType, mid) and + reachesSynthExit(midType, mid.reverse()) ) } @@ -464,8 +449,11 @@ private module AccessPathSyntheticValidation { private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { synthPathEntry(_, _, t, store) or + hasSyntheticContent(store) and exists(PropagateContentFlow::AccessPath mid, Type midType | - synthPathEntry(_, _, midType, mid) and synthPathStepRec(midType, mid.reverse(), t, store) + hasSyntheticContent(mid) and + step(midType, mid, t, store) and + synthEntryReaches(midType, mid.reverse()) ) }