Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JS: Add library for exporting graphs as type models #15386

Merged
merged 27 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
acef9b7
Dynamic/JS: Add library for exporting models
asgerf Apr 4, 2024
c55e03c
Dynamic/JS: Add support for re-exporting type models
asgerf Apr 4, 2024
348c95e
JS: Add a test case with fluent flow
asgerf Apr 5, 2024
946f0b4
JS: Add test for class with aliases
asgerf Apr 5, 2024
f4e05cc
JS: Add tests with semi-internal class problem
asgerf Apr 5, 2024
ab3c03d
JS: Add test where root export object is a function
asgerf Apr 5, 2024
3022c59
JS: Add access path alias test
asgerf Apr 5, 2024
ef7767b
JS: Add partial test for subclassing
asgerf Apr 5, 2024
9313564
JS: Add subclassing test and fix lack of subclassing handling
asgerf Apr 5, 2024
f2ea88a
JS: Add test showing missing re-export of base class relationship
asgerf Apr 5, 2024
56ebe6c
JS: More re-export logic to handle subclass export
asgerf Apr 5, 2024
29a6145
JS: Add test case showing problem with chains going through internal …
asgerf Apr 5, 2024
81b96a8
JS: Ensure MkClassInstance exists for base classes
asgerf Apr 5, 2024
8cb80d6
JS: Switch from hasLocationInfo to Location
asgerf Apr 8, 2024
8210143
Dynamic: Add hasPrettyName()
asgerf Apr 8, 2024
f5355cf
Dynamic: Sync ApiGraphModels.qll
asgerf Apr 9, 2024
15eabb4
JS: Address review comments
asgerf Apr 12, 2024
330229c
Update javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsDa…
asgerf Apr 12, 2024
3949ae4
Update shared/mad/codeql/mad/dynamic/GraphExport.qll
asgerf Apr 12, 2024
844b29b
Update shared/mad/codeql/mad/dynamic/GraphExport.qll
asgerf Apr 16, 2024
ee5cb6f
Update shared/mad/codeql/mad/dynamic/GraphExport.qll
asgerf Apr 16, 2024
be64daf
Merge branch 'main' into js/graph-export
asgerf Apr 16, 2024
c0db40d
Merge branch 'js/graph-export' of github.com:asgerf/codeql into js/gr…
asgerf Apr 16, 2024
3335d48
Sync files
asgerf Apr 16, 2024
93a9c62
Merge branch 'main' into js/graph-export
asgerf Apr 17, 2024
5e7026c
JS: Use AccessPath as parameter type
asgerf Apr 17, 2024
3c885f3
JS: Fix bug in MkClassInstance use-nodes
asgerf Apr 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions javascript/ql/lib/semmle/javascript/ApiGraphs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -501,16 +501,25 @@ module API {
}

/**
* Gets the location of this API node, if it corresponds to a program element with a source location.
*/
final Location getLocation() { result = this.getInducingNode().getLocation() }

/**
* DEPRECATED: Use `getLocation().hasLocationInfo()` instead.
*
* Holds if this node is located in file `path` between line `startline`, column `startcol`,
* and line `endline`, column `endcol`.
*
* For nodes that do not have a meaningful location, `path` is the empty string and all other
* parameters are zero.
*/
predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
this.getInducingNode().hasLocationInfo(path, startline, startcol, endline, endcol)
deprecated predicate hasLocationInfo(
string path, int startline, int startcol, int endline, int endcol
) {
this.getLocation().hasLocationInfo(path, startline, startcol, endline, endcol)
or
not exists(this.getInducingNode()) and
not exists(this.getLocation()) and
path = "" and
startline = 0 and
startcol = 0 and
Expand Down Expand Up @@ -696,14 +705,7 @@ module API {
or
any(Type t).hasUnderlyingType(m, _)
} or
MkClassInstance(DataFlow::ClassNode cls) {
hasSemantics(cls) and
(
cls = trackDefNode(_)
or
cls.getAnInstanceReference() = trackDefNode(_)
)
} or
MkClassInstance(DataFlow::ClassNode cls) { needsDefNode(cls) } or
MkDef(DataFlow::Node nd) { rhs(_, _, nd) } or
MkUse(DataFlow::Node nd) { use(_, _, nd) } or
/** A use of a TypeScript type. */
Expand All @@ -716,6 +718,17 @@ module API {
trackUseNode(src, true, bound, "").flowsTo(nd.getCalleeNode())
}

private predicate needsDefNode(DataFlow::ClassNode cls) {
hasSemantics(cls) and
(
cls = trackDefNode(_)
or
cls.getAnInstanceReference() = trackDefNode(_)
or
needsDefNode(cls.getADirectSubClass())
)
}

class TDef = MkModuleDef or TNonModuleDef;

class TNonModuleDef = MkModuleExport or MkClassInstance or MkDef or MkSyntheticCallbackArg;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,11 @@ private predicate isPrivateAssignment(DataFlow::Node node) {
)
}

private predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }
/**
* Holds if `node` is the sink node corresponding to the right-hand side of a private declaration,
* like a private field (`#field`) or class member with the `private` modifier.
*/
predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }

bindingset[name]
private int getNameBadness(string name) {
Expand Down
104 changes: 104 additions & 0 deletions javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
private import javascript
private import internal.ApiGraphModels as Shared
private import internal.ApiGraphModelsSpecific as Specific
private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming
import Shared::ModelInput as ModelInput
import Shared::ModelOutput as ModelOutput

Expand Down Expand Up @@ -55,3 +56,106 @@ private class TaintStepFromSummary extends TaintTracking::SharedTaintStep {
summaryStepNodes(pred, succ, "taint")
}
}

/**
* Specifies which parts of the API graph to export in `ModelExport`.
*/
signature module ModelExportSig {
/**
* Holds if the exported model should contain `node`, if it is publicly accessible.
*
* This ensures that all ways to access `node` will be exported in type models.
*/
predicate shouldContain(API::Node node);

/**
* Holds if `node` must be named if it is part of the exported graph.
*/
default predicate mustBeNamed(API::Node node) { none() }

/**
* Holds if the exported model should preserve all paths leading to an instance of `type`,
* including partial ones. It does not need to be closed transitively, `ModelExport` will
* extend this to include type models from which `type` can be derived.
*/
default predicate shouldContainType(string type) { none() }
}

/**
* Module for exporting type models for a given set of nodes in the API graph.
*/
module ModelExport<ModelExportSig S> {
private import codeql.mad.dynamic.GraphExport
private import internal.ApiGraphModelsExport

private module GraphExportConfig implements GraphExportSig<Location, API::Node> {
predicate edge = Specific::apiGraphHasEdge/3;

predicate shouldContain = S::shouldContain/1;

predicate shouldNotContain(API::Node node) {
EndpointNaming::isPrivateLike(node)
or
node instanceof API::Use
}

predicate mustBeNamed(API::Node node) {
node.getAValueReachingSink() instanceof DataFlow::ClassNode
or
node = API::Internal::getClassInstance(_)
or
S::mustBeNamed(node)
}

predicate exposedName(API::Node node, string type, string path) {
node = API::moduleExport(type) and path = ""
}

predicate suggestedName(API::Node node, string type) {
exists(string package, string name |
(
EndpointNaming::sinkHasPrimaryName(node, package, name) and
not EndpointNaming::aliasDefinition(_, _, _, _, node)
or
EndpointNaming::aliasDefinition(_, _, package, name, node)
) and
type = EndpointNaming::renderName(package, name)
)
}

bindingset[host]
predicate hasTypeSummary(API::Node host, string path) {
exists(string methodName |
functionReturnsReceiver(host.getMember(methodName).getAValueReachingSink()) and
path = "Member[" + methodName + "].ReturnValue"
)
}

pragma[nomagic]
private predicate functionReturnsReceiver(DataFlow::FunctionNode func) {
getAReceiverRef(func).flowsTo(func.getReturnNode())
}

pragma[nomagic]
private DataFlow::MethodCallNode getAReceiverCall(DataFlow::FunctionNode func) {
result = getAReceiverRef(func).getAMethodCall()
}

pragma[nomagic]
private predicate callReturnsReceiver(DataFlow::MethodCallNode call) {
functionReturnsReceiver(call.getACallee().flow())
}

pragma[nomagic]
private DataFlow::SourceNode getAReceiverRef(DataFlow::FunctionNode func) {
result = func.getReceiver()
or
result = getAReceiverCall(func) and
callReturnsReceiver(result)
}
}

private module ExportedGraph = TypeGraphExport<GraphExportConfig, S::shouldContainType/1>;

import ExportedGraph
}
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ private predicate summaryModel(string type, string path, string input, string ou
}

/** Holds if a type model exists for the given parameters. */
private predicate typeModel(string type1, string type2, string path) {
predicate typeModel(string type1, string type2, string path) {
exists(string row |
typeModel(row) and
row.splitAt(";", 0) = type1 and
Expand Down Expand Up @@ -435,7 +435,7 @@ private API::Node getNodeFromType(string type) {
* Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple.
*/
pragma[nomagic]
private API::Node getNodeFromPath(string type, AccessPath path, int n) {
API::Node getNodeFromPath(string type, AccessPath path, int n) {
isRelevantFullPath(type, path) and
(
n = 0 and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
* Contains an extension of `GraphExport` that relies on API graph specific functionality.
*/

private import ApiGraphModels as Shared
private import codeql.mad.dynamic.GraphExport
private import ApiGraphModelsSpecific as Specific

private module API = Specific::API;

private import Shared

/**
* Holds if some proper prefix of `(type, path)` evaluated to `node`, where `remainingPath`
* is bound to the suffix of `path` that was not evaluated yet.
*
* See concrete examples in `TypeGraphExport`.
*/
bindingset[type, path]
private predicate partiallyEvaluatedModel(
string type, string path, API::Node node, string remainingPath
) {
exists(int n, AccessPath accessPath |
accessPath = path and
asgerf marked this conversation as resolved.
Show resolved Hide resolved
getNodeFromPath(type, accessPath, n) = node and
n > 0 and
// Note that `n < accessPath.getNumToken()` is implied by the use of strictconcat()
remainingPath =
strictconcat(int k |
k = [n .. accessPath.getNumToken() - 1]
|
accessPath.getToken(k), "." order by k
)
)
}

/**
* Holds if `type` and all types leading to `type` should be re-exported.
*/
signature predicate shouldContainTypeSig(string type);

/**
* Wrapper around `GraphExport` that also exports information about re-exported types.
*
* ### JavaScript example 1
* For example, suppose `shouldContainType("foo")` holds, and the following is the entry point for a package `bar`:
* ```js
* // bar.js
* module.exports.xxx = require('foo');
* ```
* then this would generate the following type model:
* ```
* foo; bar; Member[xxx]
* ```
*
* ### JavaScript example 2
* For a more complex case, suppose the following type model exists:
* ```
* foo.XYZ; foo; Member[x].Member[y].Member[z]
* ```
* And the package exports something that matches a prefix of the access path above:
* ```js
* module.exports.blah = require('foo').x.y;
* ```
* This would result in the following type model:
* ```
* foo.XYZ; bar; Member[blah].Member[z]
* ```
* Notice that the access path `Member[blah].Member[z]` consists of an access path generated from the API
* graph, with pieces of the access path from the original type model appended to it.
*/
module TypeGraphExport<
GraphExportSig<Specific::Location, API::Node> S, shouldContainTypeSig/1 shouldContainType>
{
/** Like `shouldContainType` but includes types that lead to `type` via type models. */
private predicate shouldContainTypeEx(string type) {
shouldContainType(type)
or
exists(string prevType |
shouldContainType(prevType) and
Shared::typeModel(prevType, type, _)
)
}

private module Config implements GraphExportSig<Specific::Location, API::Node> {
import S

predicate shouldContain(API::Node node) {
S::shouldContain(node)
or
exists(string type1 | shouldContainTypeEx(type1) |
ModelOutput::getATypeNode(type1).getAValueReachableFromSource() = node.asSink()
or
exists(string type2, string path |
Shared::typeModel(type1, type2, path) and
getNodeFromPath(type2, path, _).getAValueReachableFromSource() = node.asSink()
)
)
}
}

private module ExportedGraph = GraphExport<Specific::Location, API::Node, Config>;

import ExportedGraph

/**
* Holds if `type1, type2, path` should be emitted as a type model, that is `(type2, path)` leads to an instance of `type1`.
*/
predicate typeModel(string type1, string type2, string path) {
ExportedGraph::typeModel(type1, type2, path)
or
shouldContainTypeEx(type1) and
exists(API::Node node |
// A relevant type is exported directly
Specific::sourceFlowsToSink(ModelOutput::getATypeNode(type1), node) and
ExportedGraph::pathToNode(type2, path, node)
or
// Something that leads to a relevant type, but didn't finish its access path, is exported
exists(string midType, string midPath, string remainingPath, string prefix, API::Node source |
Shared::typeModel(type1, midType, midPath) and
partiallyEvaluatedModel(midType, midPath, source, remainingPath) and
Specific::sourceFlowsToSink(source, node) and
ExportedGraph::pathToNode(type2, prefix, node) and
path = join(prefix, remainingPath)
)
)
}
}
Loading
Loading