Merge pull request #17256 from RasmusWL/js-threat-models

JS: Add support for threat models
github · Nov 4, 2024 · 8f80c24 · 8f80c24
2 parents 34e8ea1 + c0ad9ba
commit 8f80c24
Show file tree

Hide file tree

Showing 45 changed files with 2,361 additions and 185 deletions.
diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-javascript.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-javascript.rst
@@ -506,7 +506,7 @@ Kinds
 Source kinds
 ~~~~~~~~~~~~
 
-- **remote**: A generic source of remote flow. Most taint-tracking queries will use such a source. Currently this is the only supported source kind.
+See documentation below for :ref:`Threat models <threat-models-javascript>`.
 
 Sink kinds
 ~~~~~~~~~~
@@ -529,3 +529,10 @@ Summary kinds
 
 - **taint**: A summary that propagates taint. This means the output is not necessarily equal to the input, but it was derived from the input in an unrestrictive way. An attacker who controls the input will have significant control over the output as well.
 - **value**: A summary that preserves the value of the input or creates a copy of the input such that all of its object properties are preserved.
+
+.. _threat-models-javascript:
+
+Threat models
+-------------
+
+.. include:: ../reusables/threat-model-description.rst
diff --git a/docs/codeql/reusables/beta-note-threat-models.rst b/docs/codeql/reusables/beta-note-threat-models.rst
@@ -2,4 +2,4 @@
 
     Note
 
-    Threat models are currently in beta and subject to change. During the beta, threat models are supported only by Java, C# and Python analysis.
+    Threat models are currently in beta and subject to change. During the beta, threat models are supported only by Java, C#, Python and JavaScript/TypeScript analysis.
diff --git a/javascript/ql/lib/change-notes/2024-10-29-threat-models.md b/javascript/ql/lib/change-notes/2024-10-29-threat-models.md
@@ -0,0 +1,4 @@
+---
+category: feature
+---
+* Added support for custom threat-models, which can be used in most of our taint-tracking queries, see our [documentation](https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#extending-codeql-coverage-with-threat-models) for more details.
diff --git a/javascript/ql/lib/ext/default-threat-models-fixup.model.yml b/javascript/ql/lib/ext/default-threat-models-fixup.model.yml
@@ -0,0 +1,8 @@
+extensions:
+  - addsTo:
+      pack: codeql/threat-models
+      extensible: threatModelConfiguration
+    data:
+    # Since responses are enabled by default in the shared threat-models configuration,
+    # we need to disable it here to keep existing behavior for the javascript analysis.
+      - ["response", false, -2147483647]
diff --git a/javascript/ql/lib/javascript.qll b/javascript/ql/lib/javascript.qll
@@ -81,6 +81,7 @@ import semmle.javascript.frameworks.Classnames
 import semmle.javascript.frameworks.ClassValidator
 import semmle.javascript.frameworks.ClientRequests
 import semmle.javascript.frameworks.ClosureLibrary
+import semmle.javascript.frameworks.CommandLineArguments
 import semmle.javascript.frameworks.CookieLibraries
 import semmle.javascript.frameworks.Credentials
 import semmle.javascript.frameworks.CryptoLibraries

diff --git a/javascript/ql/lib/qlpack.yml b/javascript/ql/lib/qlpack.yml
@@ -9,6 +9,7 @@ dependencies:
   codeql/dataflow: ${workspace}
   codeql/mad: ${workspace}
   codeql/regex: ${workspace}
+  codeql/threat-models: ${workspace}
   codeql/tutorial: ${workspace}
   codeql/util: ${workspace}
   codeql/xml: ${workspace}
@@ -17,4 +18,5 @@ dataExtensions:
   - semmle/javascript/frameworks/**/model.yml
   - semmle/javascript/frameworks/**/*.model.yml
   - semmle/javascript/security/domains/**/*.model.yml
+  - ext/*.model.yml
 warnOnImplicitThis: true
diff --git a/javascript/ql/lib/semmle/javascript/Concepts.qll b/javascript/ql/lib/semmle/javascript/Concepts.qll
@@ -5,6 +5,63 @@
  */
 
 import javascript
+private import codeql.threatmodels.ThreatModels
+
+/**
+ * A data flow source, for a specific threat-model.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `ThreatModelSource::Range` instead.
+ */
+class ThreatModelSource extends DataFlow::Node instanceof ThreatModelSource::Range {
+  /**
+   * Gets a string that represents the source kind with respect to threat modeling.
+   *
+   *
+   * See
+   * - https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst
+   * - https://github.com/github/codeql/blob/main/shared/threat-models/ext/threat-model-grouping.model.yml
+   */
+  string getThreatModel() { result = super.getThreatModel() }
+
+  /** Gets a string that describes the type of this threat-model source. */
+  string getSourceType() { result = super.getSourceType() }
+}
+
+/** Provides a class for modeling new sources for specific threat-models. */
+module ThreatModelSource {
+  /**
+   * A data flow source, for a specific threat-model.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `ThreatModelSource` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets a string that represents the source kind with respect to threat modeling.
+     *
+     * See
+     * - https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst
+     * - https://github.com/github/codeql/blob/main/shared/threat-models/ext/threat-model-grouping.model.yml
+     */
+    abstract string getThreatModel();
+
+    /** Gets a string that describes the type of this threat-model source. */
+    abstract string getSourceType();
+  }
+}
+
+/**
+ * A data flow source that is enabled in the current threat model configuration.
+ */
+class ActiveThreatModelSource extends ThreatModelSource {
+  ActiveThreatModelSource() {
+    exists(string kind |
+      currentThreatModel(kind) and
+      this.getThreatModel() = kind
+    )
+  }
+}
 
 /**
  * A data flow node that executes an operating system command,
@@ -65,6 +122,19 @@ abstract class FileSystemReadAccess extends FileSystemAccess {
   abstract DataFlow::Node getADataNode();
 }
 
+/**
+ * A FileSystemReadAccess seen as a ThreatModelSource.
+ */
+private class FileSystemReadAccessAsThreatModelSource extends ThreatModelSource::Range {
+  FileSystemReadAccessAsThreatModelSource() {
+    this = any(FileSystemReadAccess access).getADataNode()
+  }
+
+  override string getThreatModel() { result = "file" }
+
+  override string getSourceType() { result = "FileSystemReadAccess" }
+}
+
 /**
  * A data flow node that writes data to the file system.
  */
@@ -91,6 +161,17 @@ abstract class DatabaseAccess extends DataFlow::Node {
   }
 }
 
+/**
+ * A DatabaseAccess seen as a ThreatModelSource.
+ */
+private class DatabaseAccessAsThreatModelSource extends ThreatModelSource::Range {
+  DatabaseAccessAsThreatModelSource() { this = any(DatabaseAccess access).getAResult() }
+
+  override string getThreatModel() { result = "database" }
+
+  override string getSourceType() { result = "DatabaseAccess" }
+}
+
 /**
  * A data flow node that reads persistent data.
  */

diff --git a/javascript/ql/lib/semmle/javascript/frameworks/CommandLineArguments.qll b/javascript/ql/lib/semmle/javascript/frameworks/CommandLineArguments.qll
@@ -0,0 +1,144 @@
+/** Provides modeling for parsed command line arguments. */
+
+import javascript
+
+/**
+ * An object containing command-line arguments, potentially parsed by a library.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `CommandLineArguments::Range` instead.
+ */
+class CommandLineArguments extends ThreatModelSource instanceof CommandLineArguments::Range { }
+
+/** Provides a class for modeling new sources of remote user input. */
+module CommandLineArguments {
+  /**
+   * An object containing command-line arguments, potentially parsed by a library.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `CommandLineArguments` instead.
+   */
+  abstract class Range extends ThreatModelSource::Range {
+    override string getThreatModel() { result = "commandargs" }
+
+    override string getSourceType() { result = "CommandLineArguments" }
+  }
+}
+
+/** A read of `process.argv`, considered as a threat-model source. */
+private class ProcessArgv extends CommandLineArguments::Range {
+  ProcessArgv() {
+    // `process.argv[0]` and `process.argv[1]` are paths to `node` and `main`, and
+    // therefore should not be considered a threat-source... However, we don't have an
+    // easy way to exclude them, so we need to allow them.
+    this = NodeJSLib::process().getAPropertyRead("argv")
+  }
+
+  override string getSourceType() { result = "process.argv" }
+}
+
+private class DefaultModels extends CommandLineArguments::Range {
+  DefaultModels() {
+    // `require('get-them-args')(...)` => `{ unknown: [], a: ... b: ... }`
+    this = DataFlow::moduleImport("get-them-args").getACall()
+    or
+    // `require('optimist').argv` => `{ _: [], a: ... b: ... }`
+    this = DataFlow::moduleMember("optimist", "argv")
+    or
+    // `require("arg")({...spec})` => `{_: [], a: ..., b: ...}`
+    this = DataFlow::moduleImport("arg").getACall()
+    or
+    // `(new (require(argparse)).ArgumentParser({...spec})).parse_args()` => `{a: ..., b: ...}`
+    this =
+      API::moduleImport("argparse")
+          .getMember("ArgumentParser")
+          .getInstance()
+          .getMember("parse_args")
+          .getACall()
+    or
+    // `require('command-line-args')({...spec})` => `{a: ..., b: ...}`
+    this = DataFlow::moduleImport("command-line-args").getACall()
+    or
+    // `require('meow')(help, {...spec})` => `{a: ..., b: ....}`
+    this = DataFlow::moduleImport("meow").getACall()
+    or
+    // `require("dashdash").createParser(...spec)` => `{a: ..., b: ...}`
+    this =
+      [
+        API::moduleImport("dashdash"),
+        API::moduleImport("dashdash").getMember("createParser").getReturn()
+      ].getMember("parse").getACall()
+    or
+    // `require('commander').myCmdArgumentName`
+    this = commander().getAMember().asSource()
+    or
+    // `require('commander').opt()` => `{a: ..., b: ...}`
+    this = commander().getMember("opts").getACall()
+    or
+    this = API::moduleImport("yargs/yargs").getReturn().getMember("argv").asSource()
+  }
+}
+
+/**
+ * A step for propagating taint through command line parsing,
+ * such as `var succ = require("minimist")(pred)`.
+ */
+private class ArgsParseStep extends TaintTracking::SharedTaintStep {
+  override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
+    exists(DataFlow::CallNode call |
+      call = DataFlow::moduleMember("args", "parse").getACall() or
+      call = DataFlow::moduleImport(["yargs-parser", "minimist", "subarg"]).getACall()
+    |
+      succ = call and
+      pred = call.getArgument(0)
+    )
+  }
+}
+
+/**
+ * Gets a Command instance from the `commander` library.
+ */
+private API::Node commander() {
+  result = API::moduleImport("commander")
+  or
+  // `require("commander").program === require("commander")`
+  result = commander().getMember("program")
+  or
+  result = commander().getMember("Command").getInstance()
+  or
+  // lots of chainable methods
+  result = commander().getAMember().getReturn()
+}
+
+/**
+ * Gets an instance of `yargs`.
+ * Either directly imported as a module, or through some chained method call.
+ */
+private DataFlow::SourceNode yargs() {
+  result = DataFlow::moduleImport("yargs")
+  or
+  // script used to generate list of chained methods: https://gist.github.com/erik-krogh/f8afe952c0577f4b563a993e613269ba
+  exists(string method |
+    not method =
+      // the methods that does not return a chained `yargs` object.
+      [
+        "getContext", "getDemandedOptions", "getDemandedCommands", "getDeprecatedOptions",
+        "_getParseContext", "getOptions", "getGroups", "getStrict", "getStrictCommands",
+        "getExitProcess", "locale", "getUsageInstance", "getCommandInstance"
+      ]
+  |
+    result = yargs().getAMethodCall(method)
+  )
+}
+
+/**
+ * An array of command line arguments (`argv`) parsed by the `yargs` library.
+ */
+private class YargsArgv extends CommandLineArguments::Range {
+  YargsArgv() {
+    this = yargs().getAPropertyRead("argv")
+    or
+    this = yargs().getAMethodCall("parse") and
+    this.(DataFlow::MethodCallNode).getNumArgument() = 0
+  }
+}
diff --git a/javascript/ql/lib/semmle/javascript/frameworks/NodeJSLib.model.yml b/javascript/ql/lib/semmle/javascript/frameworks/NodeJSLib.model.yml
@@ -0,0 +1,10 @@
+extensions:
+  - addsTo:
+      pack: codeql/javascript-all
+      extensible: sourceModel
+    data:
+      - ['fs', 'Member[promises].Member[readFile].ReturnValue.Member[then].Argument[0].Parameter[0]', 'file']
+      - ['global', 'Member[process].Member[stdin].Member[read].ReturnValue', 'stdin']
+      - ['global', 'Member[process].Member[stdin].Member[on,addListener].WithStringArgument[0=data].Argument[1].Parameter[0]', 'stdin']
+      - ['readline', 'Member[createInterface].ReturnValue.Member[question].Argument[1].Parameter[0]', 'stdin']
+      - ['readline', 'Member[createInterface].ReturnValue.Member[on,addListener].WithStringArgument[0=line].Argument[1].Parameter[0]', 'stdin']
diff --git a/javascript/ql/lib/semmle/javascript/frameworks/NodeJSLib.qll b/javascript/ql/lib/semmle/javascript/frameworks/NodeJSLib.qll
@@ -1244,4 +1244,13 @@ module NodeJSLib {
       result = moduleImport().getAPropertyRead(member)
     }
   }
+
+  /** A read of `process.env`, considered as a threat-model source. */
+  private class ProcessEnvThreatSource extends ThreatModelSource::Range {
+    ProcessEnvThreatSource() { this = NodeJSLib::process().getAPropertyRead("env") }
+
+    override string getThreatModel() { result = "environment" }
+
+    override string getSourceType() { result = "process.env" }
+  }
 }
diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll
@@ -32,6 +32,19 @@ private class RemoteFlowSourceFromMaD extends RemoteFlowSource {
   override string getSourceType() { result = "Remote flow" }
 }
 
+/**
+ * A threat-model flow source originating from a data extension.
+ */
+private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Range {
+  ThreatModelSourceFromDataExtension() { this = ModelOutput::getASourceNode(_).asSource() }
+
+  override string getThreatModel() { this = ModelOutput::getASourceNode(result).asSource() }
+
+  override string getSourceType() {
+    result = "Source node (" + this.getThreatModel() + ") [from data-extension]"
+  }
+}
+
 /**
  * Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes.
  */

diff --git a/...script/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectCustomizations.qll b/...script/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectCustomizations.qll
@@ -38,9 +38,16 @@ module ClientSideUrlRedirect {
     DocumentUrl() { this = "document.url" }
   }
 
-  /** A source of remote user input, considered as a flow source for unvalidated URL redirects. */
-  class RemoteFlowSourceAsSource extends Source instanceof RemoteFlowSource {
-    RemoteFlowSourceAsSource() { not this.(ClientSideRemoteFlowSource).getKind().isPath() }
+  /**
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
+   */
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source instanceof ActiveThreatModelSource {
+    ActiveThreatModelSourceAsSource() { not this.(ClientSideRemoteFlowSource).getKind().isPath() }
 
     override DataFlow::FlowLabel getAFlowLabel() {
       if this.(ClientSideRemoteFlowSource).getKind().isUrl()

diff --git a/javascript/ql/lib/semmle/javascript/security/dataflow/CodeInjectionCustomizations.qll b/javascript/ql/lib/semmle/javascript/security/dataflow/CodeInjectionCustomizations.qll
@@ -27,8 +27,15 @@ module CodeInjection {
    */
   abstract class Sanitizer extends DataFlow::Node { }
 
-  /** A source of remote user input, considered as a flow source for code injection. */
-  class RemoteFlowSourceAsSource extends Source instanceof RemoteFlowSource { }
+  /**
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
+   */
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }
 
   /**
    * An expression which may be interpreted as an AngularJS expression.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,4 +2,4 @@

		Note

		Threat models are currently in beta and subject to change. During the beta, threat models are supported only by Java, C# and Python analysis.
		Threat models are currently in beta and subject to change. During the beta, threat models are supported only by Java, C#, Python and JavaScript/TypeScript analysis.