From baf186fed78eeda71e4b9e1aaa4aade46a09194c Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Wed, 11 Dec 2024 10:06:47 +0100 Subject: [PATCH] Address review comments --- .../rust/dataflow/internal/ModelsAsData.qll | 48 +++++++++++++++++-- .../dataflow/models/models.expected | 29 ++++++----- .../dataflow/models/models.ext.yml | 1 - .../library-tests/dataflow/models/models.ql | 15 ++++++ 4 files changed, 74 insertions(+), 19 deletions(-) diff --git a/rust/ql/lib/codeql/rust/dataflow/internal/ModelsAsData.qll b/rust/ql/lib/codeql/rust/dataflow/internal/ModelsAsData.qll index ebffe41a185d..244cd6f300f0 100644 --- a/rust/ql/lib/codeql/rust/dataflow/internal/ModelsAsData.qll +++ b/rust/ql/lib/codeql/rust/dataflow/internal/ModelsAsData.qll @@ -1,5 +1,48 @@ /** * Defines extensible predicates for contributing library models from data extensions. + * + * The extensible relations have the following columns: + * + * - Sources: + * `crate; path; output; kind; provenance` + * - Sinks: + * `crate; path; input; kind; provenance` + * - Summaries: + * `crate; path; input; output; kind; provenance` + * + * The interpretation of a row is similar to API-graphs with a left-to-right + * reading. + * + * 1. The `crate` column selects a crate. + * 2. The `path` column selects a function with the given canonical path within + * the crate. + * 3. The `input` column specifies how data enters the element selected by the + * first 2 columns, and the `output` column specifies how data leaves the + * element selected by the first 2 columns. Both `input` and `output` are + * `.`-separated lists of "access path tokens" to resolve, starting at the + * selected function. + * + * The following tokens are supported: + * - `Argument[n]`: the `n`-th argument to a call. May be a range of form `x..y` (inclusive) + * and/or a comma-separated list. + * - `Parameter[n]`: the `n`-th parameter of a callback. May be a range of form `x..y` (inclusive) + * and/or a comma-separated list. + * - `ReturnValue`: the value returned by a function call. + * - `ArrayElement`: an element of an array. + * - `Variant[v::f]`: field `f` of the variant with canonical path `v`, for example + * `Variant[crate::ihex::Record::Data::value]`. + * - `Variant[v(i)]`: position `i` inside the variant with canonical path `v`, for example + * `Variant[crate::option::Option::Some(0)]`. + * - `Struct[s::f]`: field `f` of the struct with canonical path `v`, for example + * `Struct[crate::process::Child::stdin]`. + * - `Tuple[i]`: the `i`th element of a tuple. + * 4. The `kind` column is a tag that can be referenced from QL to determine to + * which classes the interpreted elements should be added. For example, for + * sources `"remote"` indicates a default remote flow source, and for summaries + * `"taint"` indicates a default additional taint step and `"value"` indicates a + * globally applicable value-preserving step. + * 5. The `provenance` column is mainly used internally, and should be set to `"manual"` for + * all custom models. */ private import rust @@ -12,9 +55,8 @@ private import codeql.rust.dataflow.FlowSummary * * `output = "ReturnValue"` simply means the result of the call itself. * - * The following kinds are supported: - * - * - `remote`: a general remote flow source. + * For more information on the `kind` parameter, see + * https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst. */ extensible predicate sourceModel( string crate, string path, string output, string kind, string provenance, diff --git a/rust/ql/test/library-tests/dataflow/models/models.expected b/rust/ql/test/library-tests/dataflow/models/models.expected index 5b0d5c1588e5..8c5ce17f8c83 100644 --- a/rust/ql/test/library-tests/dataflow/models/models.expected +++ b/rust/ql/test/library-tests/dataflow/models/models.expected @@ -5,16 +5,15 @@ models | 4 | Summary: repo::test; crate::get_tuple_element; Argument[0].Tuple[0]; ReturnValue; value | | 5 | Summary: repo::test; crate::get_var_field; Argument[0].Variant[crate::MyFieldEnum::C::field_c]; ReturnValue; value | | 6 | Summary: repo::test; crate::get_var_pos; Argument[0].Variant[crate::MyPosEnum::A(0)]; ReturnValue; value | -| 7 | Summary: repo::test; crate::identity; Argument[0]; ReturnValue; value | -| 8 | Summary: repo::test; crate::set_array_element; Argument[0]; ReturnValue.ArrayElement; value | -| 9 | Summary: repo::test; crate::set_tuple_element; Argument[0]; ReturnValue.Tuple[1]; value | -| 10 | Summary: repo::test; crate::set_var_field; Argument[0]; ReturnValue.Variant[crate::MyFieldEnum::D::field_d]; value | -| 11 | Summary: repo::test; crate::set_var_pos; Argument[0]; ReturnValue.Variant[crate::MyPosEnum::B(0)]; value | +| 7 | Summary: repo::test; crate::set_array_element; Argument[0]; ReturnValue.ArrayElement; value | +| 8 | Summary: repo::test; crate::set_tuple_element; Argument[0]; ReturnValue.Tuple[1]; value | +| 9 | Summary: repo::test; crate::set_var_field; Argument[0]; ReturnValue.Variant[crate::MyFieldEnum::D::field_d]; value | +| 10 | Summary: repo::test; crate::set_var_pos; Argument[0]; ReturnValue.Variant[crate::MyPosEnum::B(0)]; value | edges | main.rs:15:13:15:21 | source(...) | main.rs:16:19:16:19 | s | provenance | | | main.rs:15:13:15:21 | source(...) | main.rs:16:19:16:19 | s | provenance | | -| main.rs:16:19:16:19 | s | main.rs:16:10:16:20 | identity(...) | provenance | MaD:7 | -| main.rs:16:19:16:19 | s | main.rs:16:10:16:20 | identity(...) | provenance | MaD:7 | +| main.rs:16:19:16:19 | s | main.rs:16:10:16:20 | identity(...) | provenance | QL | +| main.rs:16:19:16:19 | s | main.rs:16:10:16:20 | identity(...) | provenance | QL | | main.rs:25:13:25:22 | source(...) | main.rs:26:17:26:17 | s | provenance | | | main.rs:26:17:26:17 | s | main.rs:26:10:26:18 | coerce(...) | provenance | MaD:1 | | main.rs:40:13:40:21 | source(...) | main.rs:41:27:41:27 | s | provenance | | @@ -29,8 +28,8 @@ edges | main.rs:53:13:53:21 | source(...) | main.rs:54:26:54:26 | s | provenance | | | main.rs:54:14:54:27 | set_var_pos(...) [B] | main.rs:57:9:57:23 | ...::B(...) [B] | provenance | | | main.rs:54:14:54:27 | set_var_pos(...) [B] | main.rs:57:9:57:23 | ...::B(...) [B] | provenance | | -| main.rs:54:26:54:26 | s | main.rs:54:14:54:27 | set_var_pos(...) [B] | provenance | MaD:11 | -| main.rs:54:26:54:26 | s | main.rs:54:14:54:27 | set_var_pos(...) [B] | provenance | MaD:11 | +| main.rs:54:26:54:26 | s | main.rs:54:14:54:27 | set_var_pos(...) [B] | provenance | MaD:10 | +| main.rs:54:26:54:26 | s | main.rs:54:14:54:27 | set_var_pos(...) [B] | provenance | MaD:10 | | main.rs:57:9:57:23 | ...::B(...) [B] | main.rs:57:22:57:22 | i | provenance | | | main.rs:57:9:57:23 | ...::B(...) [B] | main.rs:57:22:57:22 | i | provenance | | | main.rs:57:22:57:22 | i | main.rs:57:33:57:33 | i | provenance | | @@ -47,8 +46,8 @@ edges | main.rs:85:13:85:21 | source(...) | main.rs:86:28:86:28 | s | provenance | | | main.rs:86:14:86:29 | set_var_field(...) [D] | main.rs:89:9:89:37 | ...::D {...} [D] | provenance | | | main.rs:86:14:86:29 | set_var_field(...) [D] | main.rs:89:9:89:37 | ...::D {...} [D] | provenance | | -| main.rs:86:28:86:28 | s | main.rs:86:14:86:29 | set_var_field(...) [D] | provenance | MaD:10 | -| main.rs:86:28:86:28 | s | main.rs:86:14:86:29 | set_var_field(...) [D] | provenance | MaD:10 | +| main.rs:86:28:86:28 | s | main.rs:86:14:86:29 | set_var_field(...) [D] | provenance | MaD:9 | +| main.rs:86:28:86:28 | s | main.rs:86:14:86:29 | set_var_field(...) [D] | provenance | MaD:9 | | main.rs:89:9:89:37 | ...::D {...} [D] | main.rs:89:35:89:35 | i | provenance | | | main.rs:89:9:89:37 | ...::D {...} [D] | main.rs:89:35:89:35 | i | provenance | | | main.rs:89:35:89:35 | i | main.rs:89:47:89:47 | i | provenance | | @@ -71,8 +70,8 @@ edges | main.rs:148:13:148:21 | source(...) | main.rs:149:33:149:33 | s | provenance | | | main.rs:149:15:149:34 | set_array_element(...) [array[]] | main.rs:150:10:150:12 | arr [array[]] | provenance | | | main.rs:149:15:149:34 | set_array_element(...) [array[]] | main.rs:150:10:150:12 | arr [array[]] | provenance | | -| main.rs:149:33:149:33 | s | main.rs:149:15:149:34 | set_array_element(...) [array[]] | provenance | MaD:8 | -| main.rs:149:33:149:33 | s | main.rs:149:15:149:34 | set_array_element(...) [array[]] | provenance | MaD:8 | +| main.rs:149:33:149:33 | s | main.rs:149:15:149:34 | set_array_element(...) [array[]] | provenance | MaD:7 | +| main.rs:149:33:149:33 | s | main.rs:149:15:149:34 | set_array_element(...) [array[]] | provenance | MaD:7 | | main.rs:150:10:150:12 | arr [array[]] | main.rs:150:10:150:15 | arr[0] | provenance | | | main.rs:150:10:150:12 | arr [array[]] | main.rs:150:10:150:15 | arr[0] | provenance | | | main.rs:159:13:159:22 | source(...) | main.rs:160:14:160:14 | s | provenance | | @@ -87,8 +86,8 @@ edges | main.rs:172:13:172:22 | source(...) | main.rs:173:31:173:31 | s | provenance | | | main.rs:173:13:173:32 | set_tuple_element(...) [tuple.1] | main.rs:175:10:175:10 | t [tuple.1] | provenance | | | main.rs:173:13:173:32 | set_tuple_element(...) [tuple.1] | main.rs:175:10:175:10 | t [tuple.1] | provenance | | -| main.rs:173:31:173:31 | s | main.rs:173:13:173:32 | set_tuple_element(...) [tuple.1] | provenance | MaD:9 | -| main.rs:173:31:173:31 | s | main.rs:173:13:173:32 | set_tuple_element(...) [tuple.1] | provenance | MaD:9 | +| main.rs:173:31:173:31 | s | main.rs:173:13:173:32 | set_tuple_element(...) [tuple.1] | provenance | MaD:8 | +| main.rs:173:31:173:31 | s | main.rs:173:13:173:32 | set_tuple_element(...) [tuple.1] | provenance | MaD:8 | | main.rs:175:10:175:10 | t [tuple.1] | main.rs:175:10:175:12 | t.1 | provenance | | | main.rs:175:10:175:10 | t [tuple.1] | main.rs:175:10:175:12 | t.1 | provenance | | nodes diff --git a/rust/ql/test/library-tests/dataflow/models/models.ext.yml b/rust/ql/test/library-tests/dataflow/models/models.ext.yml index 80a19b3e8dac..d06fc442e2a4 100644 --- a/rust/ql/test/library-tests/dataflow/models/models.ext.yml +++ b/rust/ql/test/library-tests/dataflow/models/models.ext.yml @@ -3,7 +3,6 @@ extensions: pack: codeql/rust-all extensible: summaryModel data: - - ["repo::test", "crate::identity", "Argument[0]", "ReturnValue", "value", "manual"] - ["repo::test", "crate::coerce", "Argument[0]", "ReturnValue", "taint", "manual"] - ["repo::test", "crate::get_var_pos", "Argument[0].Variant[crate::MyPosEnum::A(0)]", "ReturnValue", "value", "manual"] - ["repo::test", "crate::set_var_pos", "Argument[0]", "ReturnValue.Variant[crate::MyPosEnum::B(0)]", "value", "manual"] diff --git a/rust/ql/test/library-tests/dataflow/models/models.ql b/rust/ql/test/library-tests/dataflow/models/models.ql index 10d2f9f91b70..e456d6d1c1c1 100644 --- a/rust/ql/test/library-tests/dataflow/models/models.ql +++ b/rust/ql/test/library-tests/dataflow/models/models.ql @@ -15,6 +15,21 @@ query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) Private::External::invalidSpecComponent(s, c) } +// not defined in `models.ext.yml`, in order to test that we can also define +// models directly in QL +private class SummarizedCallableIdentity extends SummarizedCallable::Range { + SummarizedCallableIdentity() { this = "repo::test::_::crate::identity" } + + override predicate propagatesFlow( + string input, string output, boolean preservesValue, string provenance + ) { + input = "Argument[0]" and + output = "ReturnValue" and + preservesValue = true and + provenance = "QL" + } +} + module CustomConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { DefaultFlowConfig::isSource(source) }