Skip to content

Commit

Permalink
Merge pull request #18231 from hvitved/rust/models-as-data-summaries
Browse files Browse the repository at this point in the history
Rust: Models-as-data for flow summaries
  • Loading branch information
hvitved authored Dec 11, 2024
2 parents 34aefb4 + baf186f commit 22aaf74
Show file tree
Hide file tree
Showing 16 changed files with 439 additions and 79 deletions.
14 changes: 2 additions & 12 deletions rust/ql/lib/codeql/rust/dataflow/FlowSummary.qll
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,7 @@ private import codeql.rust.elements.internal.CallExprBaseImpl::Impl as CallExprB
// import all instances below
private module Summaries {
private import codeql.rust.Frameworks

// TODO: Use models-as-data when it's available
private class UnwrapSummary extends SummarizedCallable::Range {
UnwrapSummary() { this = "lang:core::_::<crate::option::Option>::unwrap" }

override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input = "Argument[self].Variant[crate::option::Option::Some(0)]" and
output = "ReturnValue" and
preservesValue = true
}
}
private import codeql.rust.dataflow.internal.ModelsAsData
}

/** Provides the `Range` class used to define the extent of `LibraryCallable`. */
Expand Down Expand Up @@ -62,7 +52,7 @@ module SummarizedCallable {
*
* `preservesValue` indicates whether this is a value-preserving step or a taint-step.
*/
abstract predicate propagatesFlow(string input, string output, boolean preservesValue);
predicate propagatesFlow(string input, string output, boolean preservesValue) { none() }
}
}

Expand Down
4 changes: 3 additions & 1 deletion rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ private class VariantFieldContent extends VariantContent, TVariantFieldContent {
}

/** A canonical path pointing to a struct. */
private class StructCanonicalPath extends MkStructCanonicalPath {
class StructCanonicalPath extends MkStructCanonicalPath {
CrateOriginOption crate;
string path;

Expand All @@ -606,6 +606,8 @@ private class StructCanonicalPath extends MkStructCanonicalPath {
/** Gets the underlying struct. */
Struct getStruct() { hasExtendedCanonicalPath(result, crate, path) }

string getExtendedCanonicalPath() { result = path }

string toString() { result = this.getStruct().getName().getText() }

Location getLocation() { result = this.getStruct().getLocation() }
Expand Down
16 changes: 16 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/internal/FlowSummaryImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,22 @@ module Input implements InputSig<Location, RustDataFlow> {
arg = v.getExtendedCanonicalPath() + "::" + field
)
)
or
exists(StructCanonicalPath s, string field |
result = "Struct" and
c = TStructFieldContent(s, field) and
arg = s.getExtendedCanonicalPath() + "::" + field
)
or
result = "ArrayElement" and
c = TArrayElement() and
arg = ""
or
exists(int pos |
result = "Tuple" and
c = TTuplePositionContent(pos) and
arg = pos.toString()
)
)
}

Expand Down
140 changes: 140 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/internal/ModelsAsData.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/**
* Defines extensible predicates for contributing library models from data extensions.
*
* The extensible relations have the following columns:
*
* - Sources:
* `crate; path; output; kind; provenance`
* - Sinks:
* `crate; path; input; kind; provenance`
* - Summaries:
* `crate; path; input; output; kind; provenance`
*
* The interpretation of a row is similar to API-graphs with a left-to-right
* reading.
*
* 1. The `crate` column selects a crate.
* 2. The `path` column selects a function with the given canonical path within
* the crate.
* 3. The `input` column specifies how data enters the element selected by the
* first 2 columns, and the `output` column specifies how data leaves the
* element selected by the first 2 columns. Both `input` and `output` are
* `.`-separated lists of "access path tokens" to resolve, starting at the
* selected function.
*
* The following tokens are supported:
* - `Argument[n]`: the `n`-th argument to a call. May be a range of form `x..y` (inclusive)
* and/or a comma-separated list.
* - `Parameter[n]`: the `n`-th parameter of a callback. May be a range of form `x..y` (inclusive)
* and/or a comma-separated list.
* - `ReturnValue`: the value returned by a function call.
* - `ArrayElement`: an element of an array.
* - `Variant[v::f]`: field `f` of the variant with canonical path `v`, for example
* `Variant[crate::ihex::Record::Data::value]`.
* - `Variant[v(i)]`: position `i` inside the variant with canonical path `v`, for example
* `Variant[crate::option::Option::Some(0)]`.
* - `Struct[s::f]`: field `f` of the struct with canonical path `v`, for example
* `Struct[crate::process::Child::stdin]`.
* - `Tuple[i]`: the `i`th element of a tuple.
* 4. The `kind` column is a tag that can be referenced from QL to determine to
* which classes the interpreted elements should be added. For example, for
* sources `"remote"` indicates a default remote flow source, and for summaries
* `"taint"` indicates a default additional taint step and `"value"` indicates a
* globally applicable value-preserving step.
* 5. The `provenance` column is mainly used internally, and should be set to `"manual"` for
* all custom models.
*/

private import rust
private import codeql.rust.dataflow.FlowSummary

/**
* Holds if in a call to the function with canonical path `path`, defined in the
* crate `crate`, the value referred to by `output` is a flow source of the given
* `kind`.
*
* `output = "ReturnValue"` simply means the result of the call itself.
*
* For more information on the `kind` parameter, see
* https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst.
*/
extensible predicate sourceModel(
string crate, string path, string output, string kind, string provenance,
QlBuiltins::ExtensionId madId
);

/**
* Holds if in a call to the function with canonical path `path`, defined in the
* crate `crate`, the value referred to by `input` is a flow sink of the given
* `kind`.
*
* For example, `input = Argument[0]` means the first argument of the call.
*
* The following kinds are supported:
*
* - `sql-injection`: a flow sink for SQL injection.
*/
extensible predicate sinkModel(
string crate, string path, string input, string kind, string provenance,
QlBuiltins::ExtensionId madId
);

/**
* Holds if in a call to the function with canonical path `path`, defined in the
* crate `crate`, the value referred to by `input` can flow to the value referred
* to by `output`.
*
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving
* steps, respectively.
*/
extensible predicate summaryModel(
string crate, string path, string input, string output, string kind, string provenance,
QlBuiltins::ExtensionId madId
);

/**
* Holds if the given extension tuple `madId` should pretty-print as `model`.
*
* This predicate should only be used in tests.
*/
predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) {
exists(string crate, string path, string output, string kind |
sourceModel(crate, path, kind, output, _, madId) and
model = "Source: " + crate + "; " + path + "; " + output + "; " + kind
)
or
exists(string crate, string path, string input, string kind |
sinkModel(crate, path, kind, input, _, madId) and
model = "Sink: " + crate + "; " + path + "; " + input + "; " + kind
)
or
exists(string type, string path, string input, string output, string kind |
summaryModel(type, path, input, output, kind, _, madId) and
model = "Summary: " + type + "; " + path + "; " + input + "; " + output + "; " + kind
)
}

private class SummarizedCallableFromModel extends SummarizedCallable::Range {
private string crate;
private string path;

SummarizedCallableFromModel() {
summaryModel(crate, path, _, _, _, _, _) and
this = crate + "::_::" + path
}

override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
exists(string kind, QlBuiltins::ExtensionId madId |
summaryModel(crate, path, input, output, kind, _, madId) and
model = "MaD:" + madId.toString()
|
kind = "value" and
preservesValue = true
or
kind = "taint" and
preservesValue = false
)
}
}
17 changes: 17 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/internal/empty.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
extensions:
# Make sure that the extensible model predicates have at least one definition
# to avoid errors about undefined extensionals.
- addsTo:
pack: codeql/rust-all
extensible: sourceModel
data: []

- addsTo:
pack: codeql/rust-all
extensible: sinkModel
data: []

- addsTo:
pack: codeql/rust-all
extensible: summaryModel
data: []
6 changes: 6 additions & 0 deletions rust/ql/lib/codeql/rust/frameworks/stdlib/lang-core.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/rust-all
extensible: summaryModel
data:
- ["lang:core", "<crate::option::Option>::unwrap", "Argument[self].Variant[crate::option::Option::Some(0)]", "ReturnValue", "value", "manual"]
2 changes: 2 additions & 0 deletions rust/ql/lib/qlpack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@ dependencies:
codeql/ssa: ${workspace}
codeql/tutorial: ${workspace}
codeql/util: ${workspace}
dataExtensions:
- /**/*.model.yml
warnOnImplicitThis: true
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
models
| 1 | Summary: lang:core; <crate::option::Option>::unwrap; Argument[self].Variant[crate::option::Option::Some(0)]; ReturnValue; value |
edges
| main.rs:19:13:19:21 | source(...) | main.rs:20:10:20:10 | s | provenance | |
| main.rs:24:13:24:21 | source(...) | main.rs:27:10:27:10 | c | provenance | |
Expand Down Expand Up @@ -35,7 +36,7 @@ edges
| main.rs:214:14:214:14 | n | main.rs:214:25:214:25 | n | provenance | |
| main.rs:224:14:224:29 | Some(...) [Some] | main.rs:225:10:225:11 | s1 [Some] | provenance | |
| main.rs:224:19:224:28 | source(...) | main.rs:224:14:224:29 | Some(...) [Some] | provenance | |
| main.rs:225:10:225:11 | s1 [Some] | main.rs:225:10:225:20 | s1.unwrap(...) | provenance | |
| main.rs:225:10:225:11 | s1 [Some] | main.rs:225:10:225:20 | s1.unwrap(...) | provenance | MaD:1 |
| main.rs:229:14:229:29 | Some(...) [Some] | main.rs:231:14:231:15 | s1 [Some] | provenance | |
| main.rs:229:19:229:28 | source(...) | main.rs:229:14:229:29 | Some(...) [Some] | provenance | |
| main.rs:231:14:231:15 | s1 [Some] | main.rs:231:14:231:16 | TryExpr | provenance | |
Expand Down
91 changes: 91 additions & 0 deletions rust/ql/test/library-tests/dataflow/models/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,102 @@ fn test_set_var_field() {
}
}

struct MyStruct {
field1: i64,
field2: i64,
}

// has a flow model
fn get_struct_field(s: MyStruct) -> i64 {
0
}

fn test_get_struct_field() {
let s = source(6);
let my_struct = MyStruct {
field1: s,
field2: 0,
};
sink(get_struct_field(my_struct)); // $ hasValueFlow=6
let my_struct2 = MyStruct {
field1: 0,
field2: s,
};
sink(get_struct_field(my_struct2));
}

// has a flow model
fn set_struct_field(i: i64) -> MyStruct {
MyStruct {
field1: 0,
field2: 1,
}
}

fn test_set_struct_field() {
let s = source(7);
let my_struct = set_struct_field(s);
sink(my_struct.field1);
sink(my_struct.field2); // $ MISSING: hasValueFlow=7
}

// has a flow model
fn get_array_element(a: [i64; 1]) -> i64 {
0
}

fn test_get_array_element() {
let s = source(8);
sink(get_array_element([s])); // $ hasValueFlow=8
}

// has a flow model
fn set_array_element(i: i64) -> [i64; 1] {
[0]
}

fn test_set_array_element() {
let s = source(9);
let arr = set_array_element(s);
sink(arr[0]); // $ hasValueFlow=9
}

// has a flow model
fn get_tuple_element(a: (i64, i64)) -> i64 {
0
}

fn test_get_tuple_element() {
let s = source(10);
let t = (s, 0);
sink(get_tuple_element(t)); // $ hasValueFlow=10
let t = (0, s);
sink(get_tuple_element(t));
}

// has a flow model
fn set_tuple_element(i: i64) -> (i64, i64) {
(0, 1)
}

fn test_set_tuple_element() {
let s = source(11);
let t = set_tuple_element(s);
sink(t.0);
sink(t.1); // $ hasValueFlow=11
}

fn main() {
test_identify();
test_get_var_pos();
test_set_var_pos();
test_get_var_field();
test_set_var_field();
test_get_struct_field();
test_set_struct_field();
test_get_array_element();
test_set_array_element();
test_get_tuple_element();
test_set_tuple_element();
let dummy = Some(0); // ensure that the the `lang:core` crate is extracted
}
Loading

0 comments on commit 22aaf74

Please sign in to comment.