From f7ac78cb9cefab8264d2dc8915c1928126478aa2 Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim Date: Tue, 23 Jan 2024 16:03:22 +0100 Subject: [PATCH 01/11] Remove obsolete bits from the spec. --- index.bs | 950 ------------------------------------------------------- 1 file changed, 950 deletions(-) diff --git a/index.bs b/index.bs index 1d55498..f4265ae 100644 --- a/index.bs +++ b/index.bs @@ -37,18 +37,6 @@ text: template contents; type: dfn; url: https://html.spec.whatwg.org/#template- "href": "https://cure53.de/fp170.pdf", "title": "mXSS Attacks: Attacking well-secured Web-Applications by using innerHTML Mutations", "publisher": "Ruhr-Universität Bochum" - }, - "MXSS1": { - "href": "https://research.securitum.com/mutation-xss-via-mathml-mutation-dompurify-2-0-17-bypass/", - "title": "Mutation XSS via namespace confusion" - }, - "MXSS2": { - "href": "https://www.checkmarx.com/blog/technical-blog/vulnerabilities-discovered-in-mozilla-bleach/", - "title": "CVE-2020-6802 Write-up" - }, - "DEFAULTS": { - "href": "https://github.com/WICG/sanitizer-api/blob/main/resources/defaults-derivation.html", - "title": "Sanitizer API Defaults" } } @@ -96,857 +84,10 @@ API which aims to do just that. ## API Summary ## {#api-summary} -
-```js -let s = new Sanitizer(); - -// Case: The input data is available as a tree of DOM nodes. -let userControlledTree = ...; -element.replaceChildren(s.sanitize(userControlledTree)); - -// Case: The input is available as a string, and we know the element to insert -// it into: -let userControlledInput = "<img src=x onerror=alert(1)//>"; -element.setHTML(userControlledInput, {sanitizer: s}); - -// Case: The input is available as a string, and we know which type of element -// we will eventually insert it to, but can't or don't want to perform the -// insertion now: -let forDiv = s.sanitizeFor("div", userControlledInput); -// Later: -document.querySelector(\`${forDiv.localName}#target\`).replaceChildren(...forDiv.childNodes); -``` -
- -## The Trouble With Strings ## {#strings} - -Many HTML sanitizer libraries are based on string-to-string APIs, while this -API does not offer such a method. This sub-section explains the reasons and -implications for the Sanitizer API. - -To convert a string into a tree of nodes (or a fragment), it needs to be parsed. -The [HTML parsing algorithm](https://html.spec.whatwg.org/multipage/parsing.html#parsing) -carefully specifies how parsing HTML works. This parsing algorithm is dependent -on the current node as its parsing context. That is, the same string parsed -in the context of different HTML nodes will yield different parse trees. - -
-The string `bla` in `
` and `` ⇨ `` -
- -
-A table cell in `` and non-table (`
`) context. - * `
text
` ⇨ `
text
` - * `
text
` ⇨ `
text
` -
- -These differences can allow bugs to creep into a site's sanitization strategy, -which can (and have been) exploited by a class of XSS-style attacks called mXSS. -These attacks ultimately depend on confusions of the parsing context, for -example when a developer will sanitize a string in one (parsing) context, -while then applying the resulting string in a different context, where it will -be interpreted differently. - -
- Two mXSS-style examples in real-world libraries can be found in - [[MXSS1]]] and [[MXSS2]]. We'd like to stress that we picked these reports - for their ease of reading. There are similar reports for pretty much every - other tools that deals with HTML parsing. -
- -Since this attack class depends on a particular usage of the string *after* -the sanitization has occurred, the API itself has only limited capability -to protect its users. As a result, the Sanitizer API follows the following -principle: - -Whenever the Sanitzer API parses or unparses a DOM (sub-)tree to or from a -string, it will either do so in a fashion where the correct parse context is -implied by the operation; or it will require a parse context to be supplied by -the developer and will retain the given context in the resulting argument. -In other words, the Sanitzer API will never assume a parsing context, or -disappear a parsing context that has been supplied earlier. - -### Case 1: Sanitizing With Nodes, Only. ### {#string-context-case-1} - -If the user data in question is already available as DOM nodes - for example -a {{Document}} instance in a frame - then the Sanitizer can be easily used: - -
-```js -const sanitizer = new Sanitizer( ... ); // Our Sanitizer; - -// There is an iframe with id "userFrame" whose content we are interested in. -const user_tree = document.getElementById("userFrame").contentWindow.document; -const sanitized = sanitizer.sanitize(user_tree); -``` -
- -Note: Parsing an HTML string can have various side-effects, like network - requests or executing scripts. Naively parsing these, e.g. by assigning a - string to `.innerHTML` of an unconnected element, will not reliably prevent - these. Therefore, if the user data to be sanitized is originally - in string form, we recommend to go with one of the following cases. - -### Case 2: Sanitizing a String with Implied Context. ### {#string-context-case-2} - -If the user data is available in string form and we wish to -directly insert the sanitized subtree into the DOM, we can do so as follows: - -
-```js -const user_string = "..."; // The user string. -const sanitizer = new Sanitizer( ... ); // Our Sanitizer; - -// We want to insert the HTML in user_string into a target element with id -// target. That is, we want the equivalent of target.innerHTML = value, except -// without the XSS risks. -document.getElementById("target").setHTML(user_string, {sanitizer: sanitizer}); -``` -
- -### Case 3: Sanitizing a String with a Given Context. ### {#string-context-case-3} - -If the user data is available in string form and the developer wishes to -sanitize it now, but apply the result to the DOM later, then the Sanitizer -must be informed about the context that it will be used. To prevent context -confusion the result is wrapper a container that contains both the -result and also the parse context. Conveniently, this container already -exists, and it is the node itself! - -
-```js -// A certain piece of user input is meant to be used repeatedly, to insert -// it in multiple elements on the page. All these elements will be
-// elements. -const user_string = "..."; // The user string. -const sanitizer = new Sanitizer( ... ); // Our Sanitizer. - -const sanitized = sanitizer.sanitizeFor("div", user_string); -sanitized instanceof HTMLDivElement // true. The Sanitizer has given us a node. - -// ... later, in the same program ... -for (let elem = ... of ...) { - // All of our "elem" instances should be of the same type used in the - // .sanitizeFor call above. With an assertion library, this could look as - // follows: - assert_true(elem instanceof sanitized.constructor); // Assuming assert_true, like in WPT tests. - elem.replaceChildren(...sanitized.childNodes); -} - -// Instead of: -elem.replaceChildren(...sanitized.childNodes); -// one could write: -elem.innerHTML = sanitized.innerHTML; -// This should have the same effect, except be slower, since this will trigger -// un-parsing and then re-parsing the node tree which we already have -// available as a node tree. So we recommend to stick with the former version. -``` -
- -### The Other Case ### {#string-context-case-other} - -What if neither of these cases works with a given application structure, and a -string-to-string operation is required? In this case, the developer is free to -take the sanitization result and remove it from its context. In this case, the -responsibility to prevent mXSS-class attacks that stem from mis-applying those -strings in an inappropriate context remains with the developer. - -
-```js -const user_string = "..."; // The user string. -const sanitizer = new Sanitizer( ... ); // Our Sanitizer. - -// The developer plans to insert this string into a
element, but has to -// keep this around as a string (instead of an element). It's important that -// the developer remembers the parsing context and MUST NOT use this in a -// different parsing context in order to prevent mXSS attacks. -const sanitized_for_div = sanitizer.sanitizeFor("div", user_string).innerHTML; -``` -
- # Framework # {#framework} -## Sanitizer API ## {#sanitizer-api} - -The core API is the `Sanitizer` object and the sanitize method. Sanitizers can -be instantiated using an optional `SanitizerConfig` dictionary for options. -The most common use-case - preventing XSS - is handled by default, -so that creating a Sanitizer with a custom config is necessary only to -handle additional, application-specific use cases. - -
-  [
-    Exposed=(Window),
-    SecureContext
-  ] interface Sanitizer {
-    constructor(optional SanitizerConfig config = {});
-
-    DocumentFragment sanitize((Document or DocumentFragment) input);
-    Element? sanitizeFor(DOMString element, DOMString input);
-
-    SanitizerConfig getConfiguration();
-    static SanitizerConfig getDefaultConfiguration();
-  };
-
- -* The - new Sanitizer(config) constructor steps - are to run the [=create a sanitizer=] algorithm steps on [=this=] with - |config| as parameter. -* The sanitize(input) - method steps are to return the result of running the [=sanitize=] - algorithm on |input|, -* The sanitizeFor(element, input) - method steps are to return the result of running [=sanitizeFor=] - algorithm on |element| and |input|. -* The getConfiguration() method steps are - to return the result of running the [=query the sanitizer config=] - algorithm. It essentially returns a copy of the Sanitizer's - [=configuration dictionary=], with some degree of normalization. -* The value of the static - getDefaultConfiguration() method steps - are to return the value of the [=default configuration=] object. - -The {{Element}} interface gains an additional method, `setHTML` which -applies a string using a `Sanitizer` directly to an existing element node. - -
-  dictionary SetHTMLOptions {
-    Sanitizer sanitizer;
-  };
-  [SecureContext]
-  partial interface Element {
-    undefined setHTML(DOMString input, optional SetHTMLOptions options = {});
-  };
-
- -* The setHTML(input, options) - method steps are to run the [=sanitizeAndSet=] algorithm on [=this=], |input|, and - |options|. - - -sanitizer-secure-context.https.html -sanitizer-insecure-context.html -idlharness.https.window.js - - -Issue: Is this how we specify a method on existing class "owned" by a different spe? - -
-```js - // To make our examples easy to follow, we'll need a way create DOM nodes. - // The following is hacky way to accomplish this, for illustration only, - // that you shall pretty please not use in practice. This parsing method can - // cause side-effects based on the string being parsed, which is insecure. - // In fact, this very API exists for the sole purpose of preventing the - // problems that this approach has. - // - // But... for our examples we'll need something that is quick and easy, since - // we cannot use our own Sanitizer API to explain our own Sanitizer API. - const to_node = str => document.createRange().createContextualFragment(str); - - // The core API of the Sanitizer is the .sanitize method: - let untrusted_input = to_node("Hello!"); - const sanitizer = new Sanitizer(); - sanitizer.sanitize(untrusted_input); // DocumentFragment w/ a text node, "Hello!" - - // Probably we want to put this somewhere in our DOM: - element.replaceChildren(sanitizer.sanitize(untrusted_input)); - - // If our input contains markup it'll be mostly preserved, except for - // script-y markup: - untrusted_input = to_node("Hello!"); - sanitizer.sanitize(untrusted_input); // Hello! - element.replaceChildren(sanitizer.sanitize(untrusted_input)); // No alert! - - // The .sanitize method is the primary API, and returns a DocumentFragment. - // The .sanitizeFor method accepts and parses a string and returns an HTML - // element node. - const hello = to_node("hello"); - (sanitizer.sanitize(hello)) instanceof DocumentFragment; // true - (sanitizer.sanitizeFor("template", "hello")) instanceof HTMLTemplateElement; // true -``` -
- -## String Handling ## {#api-string-handling} - -Parsing (and unparsing) strings to (or from) HTML requires a context element. -Thus, the `sanitizeFor` method requires us to pass in a context, which the -implementation can then hand over to the HTML Parser. - -Additionally, the {{Element}} interface gains a `setHTML` method, which -always knows the correct context, because it is applied to a given {{Element}} -instance. This {{Element}} is the correct context for both parsing and -unparsing its own content. - -One way to conceptualize this is to view string sanitization as a three step -operation: 1, parsing the string; 2, sanitizing the resulting node tree; -and 3, grafting the resulting subtree onto our live DOM. -`Sanitizer.sanitize` is the middle step. -`Sanitizer.sanitizeFor` performs the first and second steps, but leaves the -third to the developer. `Element.setHTML` does all three. Which to use -depends on the structure of your application, whether you can do all three -steps simultaneously, or whether maybe the sanitization is removed (in either -code structure or point in time) from the eventual modification of the DOM. - -
-```js - // If the markup to be sanitized is already available as a tree, for example - // from an embedded frame, one can use sanitize: - document.getElementById("target").replaceChildren( - sanitizer.sanitize( - document.querySelector("iframe#myframe").contentWindow.document)); - - // If the markup to be sanitized is present in string form, but we already - // have the element we want to insert in available: - const untrusted_input = "...."; - document.getElementById("someelement").setHTML( - untrusted_input, {sanitizer: sanitizer}); - - // Same as above, but using the default Sanitizer configuration: - document.getElementById("somelement").setHTML(untrusted_input); - - // If the markup to be sanitized is present in string form, but we don't want - // to do the DOM insertion now: - let no_xss = sanitizer.sanitizeFor("div", untrusted_input); - // ... much later ... - document.querySelector("div#targetdiv").replaceChildren(...no_xss.childNodes); - - // Note that parsing HTML depends on the current context in many ways, some - // subtle, some not so much. Supplying a different context than what the - // result will eventually be used in has both security and functional risks. - // It's up to the developer to handle this safely. - // - // Example: Most, many parsing contexts disallow table data () without - // an enclosing table. - sanitizer.sanitizeFor("div", "data").innerHTML // "data" - sanitizer.sanitizeFor("table", "data").innerHTML // "data" -``` -
- -
-Note: Sanitizing a string will use the [=HTML Parser=] to parse the input, -which will perform some degree of normalization. So even -if no sanitization steps are taken on a particular input, it cannot be -guaranteed that the output of `.sanitizeFor` will be character-for-character -identical to the input. - -
- ```js - sanitizer.sanitizeFor("div", "Straße") // Straße - sanitizer.sanitizeFor("div", "") // - ``` -
-
- -
-Note: `Sanitizer.sanitizeFor` and `Element.setHTML` can replace the - respective other. Both are provided since they support different use cases. - -
- ```js - // sanitizeFor, based on SetInnerHTML. - function sanitizeFor(element, input) { - const elem = document.createElement(element); - elem.setHTML(input, {sanitizer: this}); - return elem; - } - - // setHTML, based on sanitizeFor. - function setHTML(input, options) { - const sanitizer = options?.sanitizer ?? new Sanitizer(); - this.replaceChildren(...sanitizer.sanitizeFor(this.localName, input).childNodes); - } - ``` -
-
- -## The Configuration Dictionary ## {#config} - -The Sanitizer's configuration dictionary is a dictionary which -describes modifications to the sanitize operation. If a Sanitizer has -not received an explicit configuration, for example when being -constructed without any parameters, then the [=default configuration=] value -is used as the configuration dictionary. - -
-  dictionary SanitizerConfig {
-    sequence<DOMString> allowElements;
-    sequence<DOMString> blockElements;
-    sequence<DOMString> dropElements;
-    AttributeMatchList allowAttributes;
-    AttributeMatchList dropAttributes;
-    boolean allowCustomElements;
-    boolean allowUnknownMarkup;
-    boolean allowComments;
-  };
-
- -: allowElements -:: The element allow list is a sequence of strings with - elements that the sanitizer should retain in the input. -: blockElements -:: The element block list is a sequence of strings with elements - where the sanitizer should remove the elements from the input, but retain - their children. -: dropElements -:: The element drop list is a sequence of strings with elements - that the sanitizer should remove from the input, including its children. -: allowAttributes -:: The attribute allow list is an [=attribute match list=], which - determines whether an attribute (on a given element) should be allowed. -: dropAttributes -:: The attribute drop list is an [=attribute match list=], which - determines whether an attribute (on a given element) should be dropped. -: allowCustomElements -:: The {{SanitizerConfig/allowCustomElements|allow custom elements}} option - determines whether - [=custom elements=] are to be considered. The default is to drop them. - If this option is true, custom elements will still be checked against all - other built-in or configured checks. -: allowUnknownMarkup -:: The {{SanitizerConfig/allowUnknownMarkup|allow unknown markup}} option - determines whether unknown HTML elements are to be considered. The default - is to drop them. - If this option is true, unkown HTML elements will still be checked against - all other built-in or configured checks. -: allowComments -:: The allow comments option determines whether HTML comments are - allowed. - -Note: `allowElements` creates a sanitizer that defaults to dropping elements, - while `blockElements` and `dropElements` defaults to keeping unknown - elements. Using both types is possible, but is probably of little practical - use. The same applies to `allowAttributes` and `dropAttributes`. - -
-```js - const sample = to_node("Some text with tags."); - const script_sample = to_node("abc def"); - - // Some text with text tags. - new Sanitizer({allowElements: [ "b" ]}).sanitize(sample); - - // Some text with tags. - new Sanitizer({blockElements: [ "b" ]}).sanitize(sample); - - // Some text tags. - new Sanitizer({dropElements: [ "b" ]}).sanitize(sample); - - // Note: The default configuration handles XSS-relevant input: - - // Non-scripting input will be passed through: - new Sanitizer().sanitize(sample); // Will output sample unmodified. - - // Scripts will be blocked: "abc alert(1) def" - new Sanitizer().sanitize(script_sample); -``` -
- -In addition to allow and block lists for elements and attributes, there are -also options to configure some node or element types. - -Examples: -```js - // Comments will be dropped by default. - const comment = to_node("Hello World!"); - new Sanitizer().sanitize(comment); // "Hello World!" - new Sanitizer({allowComments: true}).sanitize(comment); // Same as comment. -``` - -A sanitizer's configuration can be queried using the -[=query the sanitizer config=] method. - -
-```js - // Does the default config allow script elements? - Sanitizer.getDefaultConfiguration().allowElements.includes("script") // false - - // We found a Sanitizer instance. Does it have an allow-list configured? - const a_sanitizer = ...; - !!a_sanitizer.getConfiguration().allowElements // true, if an allowElements list is configured - - // If it does have an allow elements list, does it include the
element? - a_sanitizer.getConfiguration().allowElements.includes("div") // true, if "div" is in allowElements. - - // Note that the getConfiguration method might do some normalization. E.g., it won't - // contain key/value pairs that are not declare in the IDL. - Object.keys(new Sanitizer({madeUpDictionaryKey: "Hello"}).getConfiguration()) // [] - - // As a Sanitizer's config describes its operation, a new sanitizer with - // another instance's configuration should behave identically. - // (For illustration purposes only. It would make more sense to just use a directly.) - const a = /* ... a Sanitizer we found somewhere ... */; - const b = new Sanitizer(a.getConfiguration()); // b should behave the same as a. - - // getDefaultConfiguration() and new Sanitizer().getConfiguration should be the same. - // (For illustration purposes only. There are better ways of implementing - // object equality in JavaScript.) - JSON.stringify(Sanitizer.getDefaultConfiguration()) == JSON.stringify(new Sanitizer().getConfiguration()); // true -``` -
- -### Attribute Match Lists ### {#attr-match-list} - -An attribute match list is a map of attributes to elements, -where the special name "*" stands for all attributes or elements. -A given |attribute| belonging to an |element| matches an -[=attribute match list=], if the |attribute| is a key in the match list, -and |element| or `"*"` are found in the |attribute|'s value list. - -Element names are interpreted as names in the [[HTML namespace]] and -non-namespaced attributes - i.e., what one may think of as normal [[HTML]] -elements and attributes. Elements are named by their [=Element/local name=], and -[=Attr/local name|attributes, too=]. - -
-  typedef record<DOMString, sequence<DOMString>> AttributeMatchList;
-
- -
-Examples for attributes and attribute match lists: -```js - const sample = to_node("hello"); - - // Allow only : ... - new Sanitizer({allowAttributes: {"style": ["span"]}}).sanitize(sample); - - // Allow style, but not on span: ... - new Sanitizer({allowAttributes: {"style": ["div"]}}).sanitize(sample); - - // Allow style on any elements: ... - new Sanitizer({allowAttributes: {"style": ["*"]}}).sanitize(sample); - - // Drop : ... - new Sanitizer({dropAttributes: {"id": ["span"]}}).sanitize(sample); - - // Drop id, everywhere: ... - new Sanitizer({dropAttributes: {"id": ["*"]}}).sanitize(sample); -``` -
- # Algorithms # {#algorithms} -## API Implementation ## {#api-algorithms} - -
-To create a Sanitizer with an optional |config| parameter, run -these steps: - 1. Create a copy of |config|. - 1. Set |config| as [=this=]'s [=configuration dictionary=]. - - Issue(148): This should explicitly state the config's properties in which element names are found and modify the config wih map operations. -
- -Note: The configuration object contains element names in the - [=element allow list=], [=element block list=], and [=element drop list=], and - in the mapped values in the [=attribute allow list=] and [=attribute drop list=]. - -
-To sanitize a given |input| of type `Document or DocumentFragment` -run these steps: - 1. Let |fragment| be the result of running the [=create a document fragment=] - algorithm on |input|. - 1. Run the [=sanitize a document fragment=] algorithm on |fragment|. - 1. Return |fragment|. - - sanitizer-sanitize.https.tentative.html - -
- -Issue(149): The sanitize algorithm does not need to run "create a document fragment". - -
-To sanitize for an |element name| of type -|DOMString| and a given |input| of type |DOMString| run these steps: - 1. Let |element| be an HTML element created by running the steps - of the [=creating an element=] algorithm with the current document, - |element name|, the [=HTML namespace=], and no optional parameters. - 1. If the [=element kind=] of |element| is `regular` and if the - [=baseline element allow list=] does not contain |element name|, - then return `null`. - 1. Let |fragment| be the result of invoking the - [html fragment parsing algorithm](https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm), - with |element| as the `context element` and |input| as `markup`. - 1. Run the steps of the [=sanitize a document fragment=] algorithm on |fragment|. - 1. [=Replace all=] with |fragment| as the `node` and |element| as the - `parent`. - 1. Return |element|. - - sanitizer-sanitizeFor.https.tentative.html - -
- -Issue(140): Does the `.sanitizeFor` element name require namespace-related processing? - -
-To sanitize and set a |value| using an -{{SetHTMLOptions}} |options| dictionary on an {{Element}} node [=this=], -run these steps: - 1. If the [=element kind=] of [=this=] is `regular` and [=this=]' [=Element/local name=] does not - [=element matches an element name|match=] any name in the - [=baseline element allow list=], then throw a {{TypeError}} and return. - 1. If the {{sanitizer}} member [=map/exists=] in the |options| - {{SetHTMLOptions}} dictionary, - 1. then let |sanitizer| be [=map/get|the value=] of the {{sanitizer}} member - of the |options| {{SetHTMLOptions}} dictionary, - 1. otherwise let |sanitizer| be the result of the [=create a Sanitizer=] - algorithm without a `config` parameter. - 1. Let |fragment| be the result of invoking the - [html fragment parsing algorithm](https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm) - with [=this=] as the `context node` and |value| as `markup`. - 1. Run the steps if the [=sanitize a document fragment=] algorithm - on |fragment|, using |sanitizer| as the current {{Sanitizer}} instance. - 1. [=Replace all=] with |fragment| as the `node` and [=this=] as the `parent`. - - element-set-sanitized-html.https.html - -
- -
-To query the sanitizer config of a given sanitizer instance, -run these steps: - 1. Let |sanitizer| be the current Sanitizer. - 1. Let |config| be |sanitizer|'s [=configuration dictionary=], or the - [=default configuration=] if no [=configuration dictionary=] was given. - 1. Let |result| be a newly constructed {{SanitizerConfig}} dictionary. - 1. For any non-empty member of |config| whose key is declared in - {{SanitizerConfig}}, copy the value to |result|. - 1. Return |result|. - - sanitizer-config.https.html - sanitizer-query-config.https.html - -
- -Issue(150): IDL is taking care of most steps in "query the sanitizer config". Clean up. - -## Helper Definitions ## {#helper-algorithms} - -
-To create a document fragment named |fragment| from an -|input| of type `Document or DocumentFragment`, run these steps: - - 1. Let |node| be null. - 1. Switch based on |input|'s type: - 1. If |input| is of type {{DocumentFragment}}, then: - 1. Set |node| to |input|. - 1. If |input| is of type {{Document}}, then: - 1. Set |node| to |input|'s `documentElement`. - 1. Let |clone| be the result of running [=clone a node=] on |node| with the - clone children flag set. - 1. Let |fragment| be a new {{DocumentFragment}} whose [=Node/node document=] is |node|'s [=Node/node document=]. - 1. [=/Append=] the node |clone| to |fragment|. - 1. Return |fragment|. -
- -## Sanitization Algorithms ## {#sanitizer-algorithms} - -
-To sanitize a document fragment named |fragment| with a {{Sanitizer}} |sanitizer| run these steps: - 1. Let |m| be a map that maps nodes to a [=sanitize action=]. - 1. Let |nodes| be a list containing the [=inclusive descendants=] of - |fragment|, in [=tree order=]. - 1. [=list/iterate|For each=] |node| in |nodes|: - 1. Let |action| be the result of running the [=sanitize a node=] algorithm - on |node| with |sanitizer|. - 1. [=map/Set=] |m|[|node|] to |action|. - 1. [=list/iterate|For each=] |node| in |nodes|: - 1. If |m|[|node|] is `drop`, [=/remove=] |node|. - 1. If |m|[|node|] is `block`, create a {{DocumentFragment}} |fragment|, - [=/append=] all of |node|'s [=tree/children=] to |fragment|, and - [=/replace=] |node| within |node|'s [=tree/parent=] with |fragment|. - 1. If |m|[|node|] is `keep`, do nothing. -
- -Issue(156): The step above needs to explicitly iterate over the children and insert into parent. It could collect them in a variable or do things in place, but this is a bit too imprecise. - -
-To sanitize a node named |node| with |sanitizer| run these steps: - 1. [=Assert=]: |node| is not a {{Document}} or {{DocumentFragment}} or {{Attr}} or {{DocumentType}} [=/node=]. - 1. If |node| is an element node: - 1. Let |element| be |node|. - 1. [=list/iterate|For each=] |attr| in |element|'s - [=Element/attribute list=]: - 1. Let |attr action| be the result of running the - [=sanitize action for an attribute=] algorithm on |attr| and |element|. - 1. If |attr action| is different from `keep`, [=remove an attribute=] supplying |attr|. - 1. Run the steps to [=handle funky elements=] on |element|. - 1. Let |action| be the result of running the - [=sanitize action for an element=] on |element|. - 1. Return |action|. - 1. If |node| is a {{Comment}} [=node=]: - 1. Let |config| be |sanitizer|'s [=configuration dictionary=], or the - [=default configuration=] if no [=configuration dictionary=] was given. - 1. If |config|'s [=allow comments option=] [=map/exists=] and `|config|[allowComments]` is `true`: Return `keep`. - 1. Return `drop`. - 1. If |node| is a {{Text}} [=node=]: Return `keep`. - 1. [=Assert=]: |node| is a {{ProcessingInstruction}} - 1. Return `drop`. -
- -Issue(151): The [=sanitize action for an attribute=] algorithm parameters do not match. -Issue(153): consider creating an effective sanitizer config. Also, IDL guarantees that a config is ALWAYS given. The question is really whether the members exists. - -Some HTML elements require special treatment in a way that can't be easily -expressed in terms of configuration options or other algorithms. The following -algorithm collects these in one place. - -
-To handle funky elements on a given |element|, run these steps: - - 1. If |element|'s [=Element/namespace=] [=is=] [=HTML namespace|HTML=] and - the [=Element/local name=] [=is=] `"template"`: - 1. Run the steps of the [=sanitize a document fragment=] algorithm on - |element|'s [=template contents=] attribute. - 1. Drop all child nodes of |element|. - 1. If |element|'s [=Element/namespace=] [=is=] [=HTML namespace|HTML=] and - the [=Element/local name=] [=is=] one of `"a"` or `"area"`, - and if |element|'s `protocol` property is "javascript:": - 1. Remove the `href` attribute from |element|. - 1. If |element|'s [=Element/namespace=] [=is=] [=HTML namespace|HTML=] and - the [=Element/local name=] [=is=] `"form"` - and if |element|'s `action` attribute is a [[URL]] with `javascript:` - protocol: - 1. Remove the `action` attribute from |element|. - 1. If |element|'s [=Element/namespace=] [=is=] [=HTML namespace|HTML=] and - the [=Element/local name=] [=is=] `"input"` or `"button"`, - and if |element|'s `formaction` attribute is a [[URL]] with `javascript:` protocol - 1. Remove the `formaction` attribute from |element|. -
- -Issue(154): Export and refer funky element properties more precisely. - - -## Matching Against The Configuration ## {#configuration} - -A sanitize action is `keep`, `drop`, or `block`. - -
-To determine the sanitize action for an |element|, given a -{{SanitizerConfig}} |config|, run these steps: - - 1. Let |kind| be |element|'s [=element kind=]. - 1. If |kind| is `regular` and |element| does not - [=element matches an element name|match=] any name in the - [=baseline element allow list=]: Return `drop`. - 1. If |kind| is `custom` and if |config|["{{SanitizerConfig/allowCustomElements}}"] does not [=map/exist=] or if - |config|["{{SanitizerConfig/allowCustomElements}}"] is `false`: Return `drop`. - 1. If |kind| is `unknown` and if |config|["{{SanitizerConfig/allowUnknownMarkup}}"] - does not [=map/exist=] or it |config|["{{SanitizerConfig/allowUnknownMarkup}}"] - is `false`: Return `drop`. - 1. If |element| [=element matches an element name|matches=] any name - in |config|["{{SanitizerConfig/dropElements}}"]: Return `drop`. - 1. If |element| [=element matches an element name|matches=] any name - in |config|["{{SanitizerConfig/blockElements}}"]: Return `block`. - 1. Let |allow list| be null. - 1. If "{{SanitizerConfig/allowElements}}" [=map/exists=] in |config|: - 1. Then : Set |allow list| to |config|["{{SanitizerConfig/allowElements}}"]. - 1. Otherwise: Set |allow list| to the [=default configuration=]'s - [=element allow list=]. - 1. If |element| does not [=element matches an element name|match=] any name - in |allow list|: Return `block`. - 1. Return `keep`. - -sanitizer-unknown.https.html - -
- -
-To determine whether an |element| matches an element |name|, -run these steps: - - 1. If |element| is in the [=HTML namespace=] - and if |element|'s [=Element/local name=] is - [=identical to=] |name|: Return `true`. - 1. Return `false`. -
- -Issue(146): Whitespaces or colons? - -
-To determine whether an |attribute| matches an [=attribute match -list=] |list|, run these steps: - - 1. If |attribute|'s [=Attr/namespace=] is not `null`: Return `false`. - 1. If |attribute|'s [=Attr/local name=] does not match the - [=attribute match list=] |list|'s - [key](https://webidl.spec.whatwg.org/#idl-record) and if the key is - not `"*"`: Return `false`. - 1. Let |element| be the |attribute|'s {{Element}}. - 1. Let |element name| be |element|'s [=Element/local name=]. - 1. If |list|'s [value](https://webidl.spec.whatwg.org/#idl-record) does not - contain |element name| and value is not `["*"]`: Return `false`. - 1. Return `true`. - -
-To determine the sanitize action for an |attribute| given a Sanitizer -configuration dictionary |config|, run these steps: - - 1. Let |kind| be |attribute|'s [=attribute kind=]. - 1. If |kind| is `unknown` and if |config|["{{SanitizerConfig/allowUnknownMarkup}}"] - does not [=map/exist=] or it |config|["{{SanitizerConfig/allowUnknownMarkup}}"] - is `false`: Return `drop`. - 1. If |kind| is `regular` and |attribute|'s [=Attr/local name=] does not match any - name in the [=baseline attribute allow list=]: Return `drop`. - 1. If |attribute| [=attribute matches an attribute match list|matches=] any - [=attribute match list=] in |config|'s [=attribute drop list=]: Return - `drop`. - 1. If [=attribute allow list=] [=map/exists=] in |config|: - 1. Then let |allow list| be `|config|["allowAttributes"]`. - 1. Otherwise: Let |allow list| be the [=default configuration=]'s - [=attribute allow list=]. - 1. If |attribute| does not - [=attribute matches an attribute match list|match=] any - [=attribute match list=] in |allow list|: Return `drop`. - 1. Return `keep`. -
- -
-The element kind of an |element| is one of `regular`, `unknown`, -or `custom`. Let element kind be: - - `custom`, if |element|'s [=Element/local name=] is a - [=valid custom element name=], - - `unknown`, if |element| is not in the [[HTML]] namespace or if |element|'s - [=Element/local name=] denotes an unknown element — that is, if the - [=element interface=] the [[HTML]] specification assigns to it would - be {{HTMLUnknownElement}}, - -Issue(147): We do not want to use the interface (e.g., "applet" and "blink" are HTMLUnknownElement) - - - `regular`, otherwise. -
- -
-Similarly, the attribute kind of an |attribute| is one of `regular` -or `unknown`. Let attribute kind be: - - `unknown`, if the [[HTML]] specification does not assign any meaning to - |attribute|'s name. - - Issue(147): Again, this needs to be more specific. Historical, obsolete, conforming, non-conforming (e.g. bgcolor). It is desirable we make a sanitizer-specific list. - - - `regular`, otherwise. -
- - -## Baseline and Defaults ## {#defaults} - -Issue: The sanitizer baseline and defaults need to be carefully vetted, and - are still under discussion. The values below are for illustrative - purposes only. - -The sanitizer has a built-in [=default configuration=], which is stricter than -the baseline and aims to eliminate any script-injection possibility, as well -as legacy or unusual constructs. - -The defaults and baseline are defined by three JSON constants, -[=baseline element allow list=], [=baseline attribute allow list=], -[=default configuration=]. For better readability, these have been moved to -an appendix A. - - # Security Considerations # {#security-considerations} The Sanitizer API is intended to prevent DOM-based Cross-Site Scripting @@ -1031,95 +172,4 @@ A more complete treatement of mXSS can be found in [[MXSS]]. Cure53's [[DOMPURIFY]] is a clear inspiration for the API this document describes, as is Internet Explorer's {{window.toStaticHTML()}}. -# Appendix A: Built-in Constants # {#constants} - -This appendix is normative, except where explicitly noted otherwise. -These constants define core behaviour of the Sanitizer algorithm. - -## Built-ins Justification ## {#builtins-justification} - -This subsection is super duper non-normative. - -Note: The normative values of these constants are found below. The derivation - of these are explained here, with an implementation in the [[DEFAULTS]] - script. It is expected that these values will change before this - specification is finalized. Also, we expect these - to be updated to include additional HTML elements as they are - introduced in user agents. - -For the purpose of this Sanitizer API, [[HTML]] constructs fall into one of -four classes, where the first defines the baseline, and the first, second, -plus the third define the default: - -1. Elements and attributes that (directly) execute script. - In other words, elements and attributes that are unconditionally script-ish. -1. Legacy and "difficult" elements and attributes. - Examples are the `` `<xmp>` and elements, which have special - parsing rules attached to them. These are not dangerous _per se_, but they - have contributed to existing vulnerability. -1. Elements and attributes that we feel rarely make sense in user-supplied - content. -1. All the rest. - -Specifically: - -1. Script-ish constructs: - - The {{HTMLScriptElement}}, which proudly executes script as its sole purpose. - - All [event handler attributes](https://html.spec.whatwg.org/#event-handler-attributes), - since these also execute script. - - {{HTMLIFrameElement}}, which loads arbitrary HTML content and therefor also script. - - The legacy {{HTMLObjectElement}} and {{HTMLEmbedElement}}, which load - non-HTML active content. Also, `<object>`'s side-kick {{HTMLParamElement}}. - - The [no-longer conforming](https://html.spec.whatwg.org/#non-conforming-features) - `<frame>`, `<frameset>`, and `<applet>` tags, which are - outdated versions companions of several elements listed above. - - The `<noscript>`, `<noframes>`, `<noembed>`, and `<nolayer>` elements. - These, by themselves, are arguably not script-ish, but they are companions - to elements listed above, and make no sense on their own. - - Also, the {{HTMLBaseElement}}, as this effectively modifies interpretation - of other URLs. - -1. Legacy and "difficult" elements. - - Special parsing behaviour. This is not dangerous in its own right, but has - contributed to mXSS-style attacks. This includes: - - `<plaintext>` (Which parses in [=PLAINTEXT state=].) - - `<title>` and `<textarea>` (Which parse in [=RCDATA state=].) - - The non-conforming [`<xmp>`](https://html.spec.whatwg.org/#xmp) element. - - Legacy elements: - - `<image>` ([which is parsed as `<img>`](https://html.spec.whatwg.org/#parsing-main-inbody)). - - `<basefont>` - -1. Constructs unlikely to be beneficial in user-supplied content: - - The {{HTMLTemplateElement}}, which introduces a new template to be used - by JavaScript, and its {{HTMLSlotElement}} accomplice. - - The frame-like [HTMLPortalElement](https://wicg.github.io/portals/). - - {{HTMLDataElement}}, - - The (deprecated) [allowpaymentrequest](https://www.w3.org/TR/payment-request/) attribute. - -## The Baseline Element Allow List ## {#baseline-elements} - -The built-in <dfn>baseline element allow list</dfn> has the following value: - -<pre class=include-code> -path: resources/baseline-element-allow-list.json -highlight: js -</pre> - -## The Baseline Attribute Allow List ## {#baseline-attributes} - -The <dfn>baseline attribute allow list</dfn> has the following value: - -<pre class=include-code> -path: resources/baseline-attribute-allow-list.json -highlight: js -</pre> - -## The Default Configuration Dictionary ## {#default-configuration-dictionary} - -The built-in <dfn>default configuration</dfn> has the following value: - -<pre class=include-code> -path: resources/default-configuration.json -highlight: js -</pre> From 2ce9d433e644a38fca56e1065313c9ba83e992b1 Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Tue, 23 Jan 2024 16:04:29 +0100 Subject: [PATCH 02/11] First draft; Jan 10 --- index.bs | 315 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) diff --git a/index.bs b/index.bs index f4265ae..d6dd426 100644 --- a/index.bs +++ b/index.bs @@ -84,10 +84,325 @@ API which aims to do just that. ## API Summary ## {#api-summary} +The Sanitizer API offers functionality to parse a string containing HTML into +a DOM tree, and to filter the resulting tree according to a user-supplied +configuration. The methods come in two by two flavours: + +* Safe and unsafe: The "safe" methods will not generate any markup that executes + script. That is, they should be safe from XSS. The "unsafe" methods will parse + and filter whatever they're supposed to. +* Context: Methods are defined on {{Element}} and {{ShadowRoot}} and will + replace these {{Node}}'s children, and are largely analogous to {{innerHTML}}. + There are also static methods on the {{Document}}, which parse an entire + document are are largely analogous to {{DOMParser}}.{{parseFromString()}}. + + # Framework # {#framework} +## Sanitizer API ## {#sanitizer-api} + +The {{Element}} interface defines two methods, {{Element/setHTML()}} and +{{Element/setHTMLUnsafe()}}. Both of these take a {{DOMString}} with HTML +markup, and an optional configuration. + +<pre class=idl> +partial interface Element { + [CEReactions] undefined setHTMLUnsafe(DOMString html, optional SanitizerConfig config); + [CEReactions] undefined setHTML(DOMString html, optional SanitizerConfig config); +}; +</pre> + +<div algorithm="DOM-Element-setHTMLUnsafe" export> +{{Element}}'s <dfn for="DOM/Element">setHTMLUnsafe</dfn>(|html|, |options|?) method steps are: + +1. Let |target| be |this|'s [=template contents=] if [=this=] is {{HTMLTemplateElement|template}} element; otherwise |this|. +1. [=Unsafely set HTML=] given |target|, [=this=], |html|, and |options|. + +</div> + +<div algorithm="DOM-Element-setHTML" export> +{{Element}}'s <dfn for="DOM/Element">setHTML</dfn>(|html|, |options|?) method steps are: + +1. Let |target| be |this|'s [=template contents=] if [=this=] is a + {{HTMLTemplateElement|template}}; otherwise |this|. +1. [=Safely set HTML=] given |target|, [=this=], |html|, and |options|. + +</div> + +<pre class=idl> +partial interface ShadowRoot { + [CEReactions] undefined setHTMLUnsafe(DOMString html, optional SanitizerConfig config); + [CEReactions] undefined setHTML(DOMString html, optional SanitizerConfig config); +}; +</pre> + +These methods are mirrored on the {{ShadowRoot}}: + +<div algorithm="ShadowRoot-setHTMLUnsafe" export> +{{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTMLUnsafe</dfn>(|html|, |options|?) method steps are: + +1. [=Unsafely set HTML=] using [=this=], |html|, and |options|. + +</div> + +<div algorithm="ShadowRoot-setHTML" export> +{{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTML</dfn>(|html|, |options|?)</dfn> method steps are: + +1. [=Safely set HTML=] using [=this=], |html|, and |options|. + +</div> + +The {{Document}} interface gains two new methods which parse an entire {{Document}}: + +<pre class=idl> +partial interface Document { + static Document parseHTMLUnsafe(DOMString html, optional SanitizerConfig config); + static Document parseHTML(DOMString html, optional SanitizerConfig config); +}; +</pre> + +<div algorithm="parseHTMLUnsafe" export> +The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|?) method steps are: + +1. Let |document| be a new {{Document}}, whose [=content type=] is "text/html". + Note: Since document does not have a browsing context, scripting is disabled. +1. Set |document|'s [=allow declarative shadow roots=] to `true`. +1. [=Parse HTML=] from a string given |document| and |html|. +1. If |options| is set: + 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |options|. +1. Return |document|. + +</div> + + +<div algorithm="parseHTML" export> +The <dfn for="DOM/Document">parseHTML</dfn>(|html|, |options|?) method steps are: + +1. Let |document| be a new {{Document}}, whose [=content type=] is "text/html". + Note: Since document does not have a browsing context, scripting is disabled. +1. Set |document|'s [=allow declarative shadow roots=] to `true`. +1. [=Parse HTML=] from a string given |document| and |html|. +1. Run [=sanitize=] on |document|'s [=tree/root|root node=] with |options|. +1. Run [=sanitize=] on |document|'s [=tree/root|root node=] using the + [=built-in default config=], and with `allow-unknown` set to `true`. +1. Return |document|. + +NOTE: An actual implementation would presumably merge the two [=sanitize=] calls. +</div> + + + +## The Configuration Dictionary ## {#config} + +<pre class=idl> +dictionary SanitizerElementNamespace { + required DOMString name; + DOMString? _namespace = "http://www.w3.org/1999/xhtml"; +}; + +// Used by "elements" +dictionary SanitizerElementNamespaceWithAttributes : SanitizerElementNamespace { + sequence&lt;SanitizerAttribute> attributes; + sequence&lt;SanitizerAttribute> removeAttributes; +}; + +typedef (DOMString or SanitizerElementNamespace) SanitizerElement; +typedef (DOMString or SanitizerElementNamespaceWithAttributes) SanitizerElementWithAttributes; + +dictionary SanitizerAttributeNamespace { + required DOMString name; + DOMString? _namespace = null; +}; +typedef (DOMString or SanitizerAttributeNamespace) SanitizerAttribute; + +dictionary SanitizerConfig { + sequence&lt;SanitizerElementWithAttributes> elements; + sequence&lt;SanitizerElement> removeElements; + sequence&lt;SanitizerElement> replaceWithChildrenElements; + + sequence&lt;SanitizerAttribute> attributes; + sequence&lt;SanitizerAttribute> removeAttributes; + + boolean customElements; + boolean comments; +}; +</pre> + +## Canonical Configuration ## {#config-canonical} + +For the purpose of specifying these algorithms, we define a <dfn>canonical +configuration</dfn>. This canonical configuration removes redundant ways of +expressing the same configuration and resolves the built-in defaults. This +allows us to specify the core filtering operations in two steps: Deriving +a [=canonical configuration=] from the user-supplied {{SanitizerConfig}}, +and then the actual filtering algorithms based on the +[=canonical configuration=]. + +<pre class=idl> +dictionary CanonicalConfigName { + DOMString name; + DOMString _namespace; +}; +dictionary CanonicalConfigNameMap { + CanonicalConfigName name; + sequence&lt;CanonicalConfigName> attributes; +}; +// TODO: Should these be sets and a map? +dictionary CanonicalConfig { + sequence&lt;CanonicalConfigName> globalElements; + sequence&lt;CanonicalConfigName> globalReplaceElements; + sequence&lt;CanonicalConfigName> globalAttributes; + sequence&lt;CanonicalConfigNameMap> perElement; + boolean globalAllowComments; + // TODO: globalAllowCustomElements ? +}; +</pre> + # Algorithms # {#algorithms} +<div algorithm="unsafely set HTML"> +To <dfn>unsafely set HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|: + +1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] + given |contextElement|, |html|, and `true`. +1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. +1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] node to |fragment|. +1. If |options| is set: + 1. Run [=sanitize=] on |node| using |options|. +1. [=Replace all=] with |fragment| within |target|. + +</div> + +<div algorithm="safely set HTML"> +To <dfn>safely set HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|: + +1. If |target| is a {{HTMLScriptElement}} or {{SVGScriptElement}}, return. +1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] + given |contextElement|, |html|, and `true`. +1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. +1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] |node| to |fragment|. +1. Run [=sanitize=] on |fragment| using |options|. +1. Run [=sanitize=] on |fragment| using the [=built-in default config=], with `allow-unknown` set to `true`. +1. [=Replace all=] with |fragment| within |target|. + +Note: An actual implementation would presumably merge the two [=sanitize=] +calls into one. +</div> + +## Sanitization Algorithms ## {#sanitization} + +<div algorithm="sanitize"> +The main <dfn>sanitize</dfn> operation, using a {{ParentNode}} node, a {{SanitizerConfig}} |config|, and an optional boolean |allow-unknown|: + +Note: |allow-unknown| is not exposed to the user. It's merely a specification + tool, so that we can re-use this algorithm for the handling of + default filtering. + +1. Let |cconfig| be the result of running [=canonicalize a configuration=] + on |config|. +1. [=list/iterate|For each=] |child| in |current|'s [=tree/children=]: + 1. [=Assert=]: |child| is none of: + 1. {{ATTRIBUTE_NODE}}, {{DOCUMENT_NODE}}, {{DOCUMENT_TYPE_NODE}}, + {{DOCUMENT_FRAGMENT_NODE}}. + 1. {{CDATA_SECTION_NODE}} or {{PROCESSING_INSTRUCTION_NODE}}. + (These should not occur in a node tree parsed as HTML.) + 1. {{ENTITY_REFERENCE_NODE}}, {{ENTITY_NODE}}, or {{NOTATION_NODE}}. + (These are legacy node types.) + 1. if |child| is a {{TEXT_NODE}}: + 1. do nothing. + 1. else if |child| is a {{COMMENT_NODE}}: + 1. if |cconfig|'s {{globalAllowComments}} is not `true`: + 1. {{Node/removeChild()}} |child| from |current|. + 1. else if |child| is an {{ELEMENT_NODE}}: + 1. Let |element-name| be a {{CanonicalConfigName}} with |child|'s + [=Element/local name=] and [=Element/namespace=]. + 1. if |cconfig|'s {{globalElements}} [=list/contains=] |element-name|, or + if |allow-unknown| is `true` and |child| is not an element defined by + the [[HTML]] specification: + 1. [=list/iterate|For each=] |attr| in |current|'s [=Element/attribute list=]: + 1. Let |attr-name| be a {{CanonicalConfigName}} with |attr|'s + [=Attr/local name=] and [=Attr/namespace=]. + 1. Let |per-element-attrs| be |cconfig|'s {{perElement}} entry with + the `name` equals |element-name|. TODO: I don't think this works. + 1. If neither {{globalAttributes}} or |per-element-attrs| [=list/contains=] + contains |attr-name|, then remove |attr| from |child|. + 1. If |child| is a [=Element/shadow host=]: + 1. Call [=sanitize=] on |child|'s [=Element/shadow root=], using + |config| and |allow-unknown|. + 1. else if |cconfig|'s {{globalReplaceElements}} [=list/contains=] |element-name|: + 1. Call [=sanitize=] on |child| with |config| and |allow-unknown|. + 1. Call {{ParentNode/replaceChildren()}} on |child| with |child|'s [=tree/children=] as arguments. + 1. else: + 1. Call {{Node/removeChild()}} on |child|. + 1. else: + 1. Call {{Node/removeChild()}} on |child|. + +TODO: Add "funky elements" / handling of `javascript:`-URLs back in. + +</div> + +## Configuration Processing ## {#configuration-processing} + +<div algorithm> +In order to <dfn>validate</dfn> a |config|, run these steps: + +1. If |config| has {{removeElements}} and either {{elements}} or + {{replaceWithChildrenElements}}, then return `false`. +1. If |config| has {{SanitizerConfig/removeAttributes}} and {{SanitizerConfig/attributes}}, then return `false`. +1. TODO: ... more checks ... +1. Return `true`. + +</div> + + +<div algorithm> +In order to <dfn>canonicalize a configuration</dfn> |config|, run the following steps: + +1. If |config| does not [=validate=], then [=throw=] a {{TypeError}}. +1. Let |cconfig| be a new [=dictionary=]. +1. If |config| has {{SanitizerConfig/removeElements}} set, then: + 1. Set |cconfig|.{{CanonicalConfig/globalElements}} to [=built-in default config=].{{SanitizerConfig/elements}}. + 1. [=list/iterate|For each=] item in + |config|.{{SanitizerConfig/removeElements}}, call + [=canonicalize a sanitizer name=], and [=set/remove=] the result from + |cconfig|.{{CanonicalConfig/globalElements}}. +1. If |config| has {{SanitizerConfig/elements}} set, then: + 1. [=list/iterate|For each=] item in + |config|.{{SanitizerConfig/elements}}, call + [=canonicalize a sanitizer name=], and [=list/append=] the result to + |cconfig|.{{CanonicalConfig/globalElements}}. +1. If |config| has {{SanitizerConfig/replaceWithChildrenElements}} set, then: + 1. [=list/iterate|For each=] item in + |config|.{{SanitizerConfig/replaceWithChildrenElements}}, call + [=canonicalize a sanitizer name=], and [=list/append=] the result to + |cconfig|.{{CanonicalConfig/globalReplaceElements}}. +1. TODO: Add all the others. + +</div> + +<div algorithm> +In order to <dfn>canonicalize a sanitizer name</dfn> |name|, run the following +steps: + +1. Let |cname| be an empty dictionary. +1. TODO: Map |name| (DOMString or dictionary) to canonicalized name/namespace dictionary. +1. Return |cname|. + +</div> + +## Defaults ## {#sanitization-defaults} + +The <dfn>built-in default config</dfn> is as follows: +``` +{ + elements: [....], + attributes: [....], + comments: true, + customElements: true +} +``` + + # Security Considerations # {#security-considerations} The Sanitizer API is intended to prevent DOM-based Cross-Site Scripting From a2ab6029e61f2956f45ea34813bd721a0208b158 Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Fri, 19 Jan 2024 18:13:21 +0100 Subject: [PATCH 03/11] Feedback from Jan 10 meeting. --- index.bs | 399 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 288 insertions(+), 111 deletions(-) diff --git a/index.bs b/index.bs index d6dd426..669eb9d 100644 --- a/index.bs +++ b/index.bs @@ -164,12 +164,14 @@ partial interface Document { <div algorithm="parseHTMLUnsafe" export> The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|?) method steps are: -1. Let |document| be a new {{Document}}, whose [=content type=] is "text/html". +1. Let |document| be a new {{Document}}, whose [=Document/content type=] is "text/html". Note: Since document does not have a browsing context, scripting is disabled. 1. Set |document|'s [=allow declarative shadow roots=] to `true`. 1. [=Parse HTML=] from a string given |document| and |html|. 1. If |options| is set: - 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |options|. +1. Let |config| be the result of calling [=canonicalize a configuration=] on + |options|[`"sanitizer"`] and `false`. + 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. </div> @@ -178,13 +180,13 @@ The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|?) method ste <div algorithm="parseHTML" export> The <dfn for="DOM/Document">parseHTML</dfn>(|html|, |options|?) method steps are: -1. Let |document| be a new {{Document}}, whose [=content type=] is "text/html". +1. Let |document| be a new {{Document}}, whose [=Document/content type=] is "text/html". Note: Since document does not have a browsing context, scripting is disabled. 1. Set |document|'s [=allow declarative shadow roots=] to `true`. 1. [=Parse HTML=] from a string given |document| and |html|. -1. Run [=sanitize=] on |document|'s [=tree/root|root node=] with |options|. -1. Run [=sanitize=] on |document|'s [=tree/root|root node=] using the - [=built-in default config=], and with `allow-unknown` set to `true`. +1. Let |config| be the result of calling [=canonicalize a configuration=] on + |options|[`"sanitizer"`] and `true`. +1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. NOTE: An actual implementation would presumably merge the two [=sanitize=] calls. @@ -223,7 +225,6 @@ dictionary SanitizerConfig { sequence&lt;SanitizerAttribute> attributes; sequence&lt;SanitizerAttribute> removeAttributes; - boolean customElements; boolean comments; }; </pre> @@ -231,111 +232,136 @@ dictionary SanitizerConfig { ## Canonical Configuration ## {#config-canonical} For the purpose of specifying these algorithms, we define a <dfn>canonical -configuration</dfn>. This canonical configuration removes redundant ways of -expressing the same configuration and resolves the built-in defaults. This -allows us to specify the core filtering operations in two steps: Deriving -a [=canonical configuration=] from the user-supplied {{SanitizerConfig}}, -and then the actual filtering algorithms based on the -[=canonical configuration=]. +configuration</dfn>. These [=canonical configurations=] are meant to be a subset +of allowed configurations, that eliminate redundant ways to express the same thing. + +For example, the regular configuration allows element or attributes to be described +by string containing its name (in a default namespace); by a dictionary with a +name string and an implied namespace, +or by a dictionary with both name and namespace given explicitly. The canonical +configuration allows only the latter form, a dictionary with explicit name and +namespaces. + +The [=canonical configuration=] is chiefly a specification tool that users +do not need to concern themselves with. But it allows us to specify the +Sanitizer operation in two steps: First canonicalize the configuration, and then +have a (simpler) algorithm that will do the actual sanitization. <pre class=idl> -dictionary CanonicalConfigName { - DOMString name; - DOMString _namespace; +dictionary CanonicalSanitizerName { + required DOMString name; + required DOMString _namespace; }; -dictionary CanonicalConfigNameMap { - CanonicalConfigName name; - sequence&lt;CanonicalConfigName> attributes; +dictionary CanonicalSanitizerNameWithAttributes : CanonicalSanitizerName { + sequence&lt;CanonicalSanitizerName> attributes; + sequence&lt;CanonicalSanitizerName> removeAttributes; }; -// TODO: Should these be sets and a map? -dictionary CanonicalConfig { - sequence&lt;CanonicalConfigName> globalElements; - sequence&lt;CanonicalConfigName> globalReplaceElements; - sequence&lt;CanonicalConfigName> globalAttributes; - sequence&lt;CanonicalConfigNameMap> perElement; - boolean globalAllowComments; - // TODO: globalAllowCustomElements ? +dictionary CanonicalSanitizerConfig { + sequence&lt;CanonicalSanitizerNameWithAttributes> elements; + sequence&lt;CanonicalSanitizerName> removeElements; + sequence&lt;CanonicalSanitizerName> replaceWithChildrenElements; + sequence&lt;CanonicalSanitizerName> attributes; + sequence&lt;CanonicalSanitizerName> removeAttributes; + required boolean comments; }; </pre> # Algorithms # {#algorithms} -<div algorithm="unsafely set HTML"> +<div algorithm> To <dfn>unsafely set HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|: -1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] - given |contextElement|, |html|, and `true`. -1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. -1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] node to |fragment|. -1. If |options| is set: - 1. Run [=sanitize=] on |node| using |options|. -1. [=Replace all=] with |fragment| within |target|. +1. Let |config| be the result of calling [=canonicalize a configuration=] on + |options|[`"sanitizer"`] and `false`. +1. Run [=set and filter HTML=] on |target|, |contextElement|, |html|, and |config|. </div> -<div algorithm="safely set HTML"> +<div algorithm> To <dfn>safely set HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|: -1. If |target| is a {{HTMLScriptElement}} or {{SVGScriptElement}}, return. +1. Let |config| be the result of calling [=canonicalize a configuration=] on + |options|[`"sanitizer"`] and `true`. +1. Run [=set and filter HTML=] on |target|, |contextElement|, |html|, and |config|. + +</div> + +<div algorithm> +To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=canonical=] |config|, run these steps: + 1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] given |contextElement|, |html|, and `true`. 1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. 1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] |node| to |fragment|. -1. Run [=sanitize=] on |fragment| using |options|. -1. Run [=sanitize=] on |fragment| using the [=built-in default config=], with `allow-unknown` set to `true`. +1. Run [=sanitize=] on |fragment| using |config|. 1. [=Replace all=] with |fragment| within |target|. -Note: An actual implementation would presumably merge the two [=sanitize=] -calls into one. </div> ## Sanitization Algorithms ## {#sanitization} <div algorithm="sanitize"> -The main <dfn>sanitize</dfn> operation, using a {{ParentNode}} node, a {{SanitizerConfig}} |config|, and an optional boolean |allow-unknown|: +For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a +[=canonical=] {{SanitizerConfig}} |config|, run these steps: -Note: |allow-unknown| is not exposed to the user. It's merely a specification - tool, so that we can re-use this algorithm for the handling of - default filtering. - -1. Let |cconfig| be the result of running [=canonicalize a configuration=] - on |config|. +1. [=Assert=]: |config| is [=canonical=]. +1. Initialize |current| with |node|. 1. [=list/iterate|For each=] |child| in |current|'s [=tree/children=]: - 1. [=Assert=]: |child| is none of: - 1. {{ATTRIBUTE_NODE}}, {{DOCUMENT_NODE}}, {{DOCUMENT_TYPE_NODE}}, - {{DOCUMENT_FRAGMENT_NODE}}. - 1. {{CDATA_SECTION_NODE}} or {{PROCESSING_INSTRUCTION_NODE}}. - (These should not occur in a node tree parsed as HTML.) - 1. {{ENTITY_REFERENCE_NODE}}, {{ENTITY_NODE}}, or {{NOTATION_NODE}}. - (These are legacy node types.) - 1. if |child| is a {{TEXT_NODE}}: - 1. do nothing. - 1. else if |child| is a {{COMMENT_NODE}}: - 1. if |cconfig|'s {{globalAllowComments}} is not `true`: + 1. [=Assert=]: |child| [=implements=] {{Text}}, {{Comment}}, or {{Element}}. + + Note: Currently, this algorithm is only be called on output of the HTML + parser, for which this assertion should hold. If this is to be + generalized, this algorithm needs to be re-examined. + 1. If |child| [=implements=] {{Text}}: + 1. Do nothing. + 1. else if |child| [=implements=] {{Comment}}: + 1. If |config|'s {{CanonicalSanitizerConfig/comments}} is not `true`: 1. {{Node/removeChild()}} |child| from |current|. - 1. else if |child| is an {{ELEMENT_NODE}}: - 1. Let |element-name| be a {{CanonicalConfigName}} with |child|'s + 1. else if |child| [=implements=] {{Element}}: + 1. Let |element-name| be a {{CanonicalSanitizerName}} with |child|'s [=Element/local name=] and [=Element/namespace=]. - 1. if |cconfig|'s {{globalElements}} [=list/contains=] |element-name|, or - if |allow-unknown| is `true` and |child| is not an element defined by - the [[HTML]] specification: - 1. [=list/iterate|For each=] |attr| in |current|'s [=Element/attribute list=]: - 1. Let |attr-name| be a {{CanonicalConfigName}} with |attr|'s - [=Attr/local name=] and [=Attr/namespace=]. - 1. Let |per-element-attrs| be |cconfig|'s {{perElement}} entry with - the `name` equals |element-name|. TODO: I don't think this works. - 1. If neither {{globalAttributes}} or |per-element-attrs| [=list/contains=] - contains |attr-name|, then remove |attr| from |child|. - 1. If |child| is a [=Element/shadow host=]: - 1. Call [=sanitize=] on |child|'s [=Element/shadow root=], using - |config| and |allow-unknown|. - 1. else if |cconfig|'s {{globalReplaceElements}} [=list/contains=] |element-name|: - 1. Call [=sanitize=] on |child| with |config| and |allow-unknown|. - 1. Call {{ParentNode/replaceChildren()}} on |child| with |child|'s [=tree/children=] as arguments. + 1. If |config|[{{CanonicalSanitizerConfig/elements}}] exists and + |config|[{{CanonicalSanitizerConfig/elements}}] does not [=list/contain=] + [|element-name|]: + 1. Call {{Node/removeChild()}} on |child|. + 1. else if |config|[{{CanonicalSanitizerConfig/removeElements}}] exists and + |config|[{{CanonicalSanitizerConfig/removeElements}}] [=list/contains=] + [|element-name|]: + 1. Call {{Node/removeChild()}} on |child|. + 1. If |config|[{{CanonicalSanitizerConfig/replaceWithChildrenElements}}] exists and |config|[{{CanonicalSanitizerConfig/replaceWithChildrenElements}}] [=list/contains=] |element-name|: + 1. Call [=sanitize=] on |child| with |config|. + 1. Call {{ParentNode/replaceChildren()}} on |child| with |child|'s + [=tree/children=] as arguments. + 1. [=list/iterate|For each=] |attr| in |current|'s [=Element/attribute list=]: + 1. Let |attr-name| be a {{CanonicalSanitizerName}} with |attr|'s + [=Attr/local name=] and [=Attr/namespace=]. + 1. If |config|[{{CanonicalSanitizerConfig/attributes}}] exists and + |config|[{{CanonicalSanitizerConfig/attributes}}] does not [=list/contain=] + [attr-name|: + 1. Remove |attr| from |child|. + 1. else if |config|[{{CanonicalSanitizerConfig/removeAttributes}}] exists and + |config|[{{CanonicalSanitizerConfig/removeAttributes}}] [=list/contains=] + [attr-name|: + 1. Remove |attr| from |child|. + 1. If |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|] exists, + and if + |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/attributes}}] + exists, and if + |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/attributes}}] + does not [=list/contain=] |attr-name|: + 1. Remove |attr| from |child|. + 1. If |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|] exists, + and if + |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/removeAttributes}}] + exists, and if + |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/removeAttributes}}] + [=list/contains=] |attr-name|: + 1. If |child| is a [=Element/shadow host=]: + 1. Call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. 1. else: 1. Call {{Node/removeChild()}} on |child|. 1. else: - 1. Call {{Node/removeChild()}} on |child|. + 1. [=Assert=]: We shouldn't reach this branch. TODO: Add "funky elements" / handling of `javascript:`-URLs back in. @@ -344,52 +370,204 @@ TODO: Add "funky elements" / handling of `javascript:`-URLs back in. ## Configuration Processing ## {#configuration-processing} <div algorithm> -In order to <dfn>validate</dfn> a |config|, run these steps: +A |config| is <dfn>valid</dfn> if all these conditions are met: + +1. |config| [=conforms=] to {{SanitizerConfig}}. +1. |config| [=map/keys=] contains either {{SanitizerConfig/elements}} or + {{SanitizerConfig/removeElements}}, or neither of them, but not both. +1. |config| [=map/keys=] contains either {{SanitizerConfig/removeAttributes}} + or {{SanitizerConfig/attributes}}, or neither, but not both. +1. If |config|[{{SanitizerConfig/elements}}] exists, then none of its members' + [=map/keys=] contains both {{SanitizerElementNamespaceWithAttributes/attributes}} + and {{SanitizerElementNamespaceWithAttributes/removeAttributes}}. +1. TODO: check that name dictionaries must contain "name" + +</div> + +<div algorithm> +A |config| is <dfn>canonical</dfn> if all these conditions are met: + +1. |config| is [=valid=]. +1. |config| [=strictly conforms=] to {{CanonicalSanitizerConfig}}. +1. |config|'s [=map/keys|key set=] [=set/equals=] any of: + 1. &laquo; + {{SanitizerConfig/elements}}, + {{SanitizerConfig/attributes}}, + {{SanitizerConfig/comments}} + &raquo; + 1. &laquo; + {{SanitizerConfig/elements}}, + {{SanitizerConfig/replaceWithChildrenElements}}, + {{SanitizerConfig/attributes}}, + {{SanitizerConfig/comments}} + &raquo; + 1. &laquo; + {{SanitizerConfig/removeElements}}, + {{SanitizerConfig/removeAttributes}}, + {{SanitizerConfig/comments}} + &raquo; + 1. &laquo; + {{SanitizerConfig/removeElements}}, + {{SanitizerConfig/removeAttributes}}, + {{SanitizerConfig/replaceWithChildrenElements}}, + {{SanitizerConfig/comments}} + &raquo; +1. TODO: Elements with attributes -1. If |config| has {{removeElements}} and either {{elements}} or - {{replaceWithChildrenElements}}, then return `false`. -1. If |config| has {{SanitizerConfig/removeAttributes}} and {{SanitizerConfig/attributes}}, then return `false`. -1. TODO: ... more checks ... -1. Return `true`. +</div> + +<div algorithm> +In order to <dfn>canonicalize a configuration</dfn> |config| with a boolean +parameter |safe|, run the following steps: + +TODO: Handle empty |config|. + +1. If |config| is not [=valid=], then [=throw=] a {{TypeError}}. +1. Let |result| be a new [=dictionary=]. +1. For each |key| of + {{SanitizerConfig/elements}}, + {{SanitizerConfig/removeElements}}, + {{SanitizerConfig/replaceWithChildrenElements}}: + 1. If |config|[|key|] exists, set |result|[|key|] to the result of running + [=canonicalize a sanitizer element list=] on |config|[|key|] with + [=HTML namespace=] as the default namespace. +1. For each |key| of + {{SanitizerConfig/attributes}}, + {{SanitizerConfig/removeAttributes}}: + 1. If |config|[|key|] exists, set |result|[|key|] to the result of running + [=canonicalize a sanitizer element list=] on |config|[|key|] with `""` as + the default namespace. +1. Set |result|[{{SanitizerConfig/comments}}] to + |config|[{{SanitizerConfig/comments}}]. +1. Let |default| be the result of [=canonicalizing a configuration=] for the + [=built-in default config=]. +1. If |safe|: + 1. Let |known elements| be an [=ordered set=] of all elements known to the + [[HTML]] specification, where the set members [=strictly conform=] to + {{CanonicalSanitizerName}}. + 1. Let |known attributes| be an [=ordered set=] of all attributes known to the + [[HTML]] specification, where the set members [=strictly conform=] to + {{CanonicalSanitizerName}}. + 1. If |config|[{{SanitizerConfig/elements}}] [=map/exists=]: + 1. Set |result|[{{SanitizerConfig/elements}}] to the + [=intersection complement=] of |result|[{{SanitizerConfig/elements}}] and + the [=intersection complement=] of |known elements| and + |default|[{{SanitizerConfig/elements}}]. + + Note: This sounds more complicated than it is. This the same as the + [=set/intersection=] of |result|[{{SanitizerConfig/elements}}] and + |default|[{{SanitizerConfig/elements}}], except that it also + preserves unknown HTML elements, which a plain [=set/intersection=] + would remove. + 1. If |config|[{{SanitizerConfig/removeElements}}] [=map/exists=]: + 1. Set |result|[{{SanitizerConfig/elements}}] to the + [=intersection complement=] of |default|[{{SanitizerConfig/elements}}] + and |result|[{{SanitizerConfig/removeElements}}]. + 1. [=set/Remove=] {{SanitizerConfig/removeElements}} from |result|. + 1. If neither |config|[{{SanitizerConfig/elements}}] nor + |config|[{{SanitizerConfig/removeElements}}] [=map/exist=]: + 1. Set |result|[{{SanitizerConfig/elements}}] to + |default|[{{SanitizerConfig/elements}}]. + 1. If |config|[{{SanitizerConfig/attributes}}] [=map/exists=]: + 1. Set |result|[{{SanitizerConfig/attributes}}] to the + [=intersection complement=] of |result|[{{SanitizerConfig/attributes}}] and + the [=intersection complement=] attributes |known attributes| and + |default|[{{SanitizerConfig/attributes}}]. + 1. If |config|[{{SanitizerConfig/removeAttributes}}] [=map/exists=]: + 1. Set |result|[{{SanitizerConfig/attributes}}] to the + [=intersection complement=] of |default|[{{SanitizerConfig/attributes}}] + and |result|[{{SanitizerConfig/removeAttributes}}]. + 1. [=set/Remove=] {{SanitizerConfig/removeAttributes}} from |result|. + 1. If neither |config|[{{SanitizerConfig/attributes}}] nor + |config|[{{SanitizerConfig/removeAttributes}}] [=map/exist=]: + 1. Set |result|[{{SanitizerConfig/attributes}}] to + |default|[{{SanitizerConfig/attributes}}]. +1. Else (if not |safe|): + 1. If neither |config|[{{SanitizerConfig/elements}}] nor + |config|[{{SanitizerConfig/removeElements}} [=map/exist=]: + 1. Set |result|[{{SanitizerConfig/elements}}] to + |default|[{{SanitizerConfig/elements}}]. + 1. If neither |config|[{{SanitizerConfig/attributes}}] nor + |config|[{{SanitizerConfig/removeAttributes}} [=map/exist=]: + 1. Set |result|[{{SanitizerConfig/attributes}}] to + |default|[{{SanitizerConfig/attributes}}]. +1. [=Assert=]: |result| is [=valid=]. +1. [=Assert=]: |result| is [=canonical=]. +1. Return |result|. + +</div> + +<div algorithm> +In order to <dfn>canonicalize a sanitizer element list</dfn> |list|, with a +default namespace |default namespace|, run the following steps: + +1. Let |result| be a new [=ordered set=]. +2. [=list/iterate|For each=] |name| in |list|, call + [=canonicalize a sanitizer name=] on |name| with |default namespace| and + [=set/append=] to |result|. +3. Return |result|. + +</div> + +<div algorithm> +In order to <dfn>canonicalize a sanitizer name</dfn> |name|, with a default +namespace |default namespace|, run the following steps: + +1. [=Assert=]: |name| is either a {{DOMString}} or a [=dictionary=]. +1. If |name| is a {{DOMString}}: + 1. Return &laquo;[ `"name"` &rightarrow; |name|, `"namespace"` &rightarrow; |default namespace|]&raquo;. +1. [=Assert=]: |name| is a [=dictionary=] and |name|["name"] [=map/exists=]. +1. Return &laquo;[ <br> + `"name"` &rightarrow; |name|["name"], <br> + `"namespace"` &rightarrow; |name|["namespace"] if it [=map/exists=], otherwise |default namespace| <br> + ]&raquo;. </div> +## Supporting Algorithms ## {#alg-support} + +<div algorithm> +The <dfn>intersection complement</dfn> of two [=ordered sets=] |A| and |B|, is +the result of creating a new [=ordered set=] |set| and, [=list/iterate|for each=] +|item| of |A|, if |B| does not [=set/contain=] item, [=set/appending=] |item| to +|set|. + +Note: [=intersection complement=] is the same as [=set/intersection=], but with the + complement of parameter |B|. +</div> <div algorithm> -In order to <dfn>canonicalize a configuration</dfn> |config|, run the following steps: - -1. If |config| does not [=validate=], then [=throw=] a {{TypeError}}. -1. Let |cconfig| be a new [=dictionary=]. -1. If |config| has {{SanitizerConfig/removeElements}} set, then: - 1. Set |cconfig|.{{CanonicalConfig/globalElements}} to [=built-in default config=].{{SanitizerConfig/elements}}. - 1. [=list/iterate|For each=] item in - |config|.{{SanitizerConfig/removeElements}}, call - [=canonicalize a sanitizer name=], and [=set/remove=] the result from - |cconfig|.{{CanonicalConfig/globalElements}}. -1. If |config| has {{SanitizerConfig/elements}} set, then: - 1. [=list/iterate|For each=] item in - |config|.{{SanitizerConfig/elements}}, call - [=canonicalize a sanitizer name=], and [=list/append=] the result to - |cconfig|.{{CanonicalConfig/globalElements}}. -1. If |config| has {{SanitizerConfig/replaceWithChildrenElements}} set, then: - 1. [=list/iterate|For each=] item in - |config|.{{SanitizerConfig/replaceWithChildrenElements}}, call - [=canonicalize a sanitizer name=], and [=list/append=] the result to - |cconfig|.{{CanonicalConfig/globalReplaceElements}}. -1. TODO: Add all the others. +[=Ordered sets=] |A| and |B| are <dfn for=set>equal</dfn> if both |A| is a [=superset=] of +|B| and |B| is a [=superset=] of |A|. +Note: Equality for [=ordered sets=] is equality of its members, but without +regard to order. </div> <div algorithm> -In order to <dfn>canonicalize a sanitizer name</dfn> |name|, run the following -steps: +A value |D| <dfn>conforms</dfn> to a +[=dictionary|dictionary definition=] if |D| is a [=map=] and all of |D|'s [=map/entries=] +corrspond to [=dictionary members=], as long as those entries have the correct +types, and there are [=map/entries=] present for any [=dictionary member/required=] or +[=dictionary member/default value|defaulted=] dictionary members, and any [=dictionary=]-typed values [=conform=] to their [=dictionary member=]'s type. + +Note: This largely corresponds to language in [=dictionary=], but re-words this +as a predicate. +</div> -1. Let |cname| be an empty dictionary. -1. TODO: Map |name| (DOMString or dictionary) to canonicalized name/namespace dictionary. -1. Return |cname|. +<div algorithm> +A value |D| <dfn>strictly conforms</dfn> to a +[=dictionary|dictionary definition=] if + +1. |D| [=conforms=] to the definition, +1. there are no [=map/entries=] present that do not have a corresponding + [=dictionary member=], and +1. [=dictionary=]-valued members [=strictly conform=] to their + [=dictionary member=]'s type. </div> + ## Defaults ## {#sanitization-defaults} The <dfn>built-in default config</dfn> is as follows: @@ -398,7 +576,6 @@ The <dfn>built-in default config</dfn> is as follows: elements: [....], attributes: [....], comments: true, - customElements: true } ``` From 2cf1b2fb0a864426974dbe69c2ce3bcc23cdfcbd Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Thu, 25 Jan 2024 14:26:21 +0100 Subject: [PATCH 04/11] Feedback from Jan 24 meeting: - Use camel case for variable names. - Use list syntax. Mark list members as strings. - Be more explicit about "strictly conform to". - Remove canonical config. - Replace removeChild(). - "set and filter" take a "safe" boolean - Use "set difference" instead of intersection complement. - Introduce "dataAttributes" for data-* attributes. - Add special cases for javascript:-URLs and templates ("funky elements") back in - Minor edits. --- index.bs | 553 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 319 insertions(+), 234 deletions(-) diff --git a/index.bs b/index.bs index 669eb9d..cf7aa07 100644 --- a/index.bs +++ b/index.bs @@ -116,7 +116,7 @@ partial interface Element { {{Element}}'s <dfn for="DOM/Element">setHTMLUnsafe</dfn>(|html|, |options|?) method steps are: 1. Let |target| be |this|'s [=template contents=] if [=this=] is {{HTMLTemplateElement|template}} element; otherwise |this|. -1. [=Unsafely set HTML=] given |target|, [=this=], |html|, and |options|. +1. [=Set and filter HTML=] given |target|, [=this=], |html|, |options|, and `safe` set to `false`. </div> @@ -125,7 +125,7 @@ partial interface Element { 1. Let |target| be |this|'s [=template contents=] if [=this=] is a {{HTMLTemplateElement|template}}; otherwise |this|. -1. [=Safely set HTML=] given |target|, [=this=], |html|, and |options|. +1. [=Set and filter HTML=] given |target|, [=this=], |html|, |options|, and `safe` set to `true`. </div> @@ -141,14 +141,18 @@ These methods are mirrored on the {{ShadowRoot}}: <div algorithm="ShadowRoot-setHTMLUnsafe" export> {{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTMLUnsafe</dfn>(|html|, |options|?) method steps are: -1. [=Unsafely set HTML=] using [=this=], |html|, and |options|. +1. [=Set and filter HTML=] using [=this=] (as target), [=this=] (as context element), + |html|, |options|, and `safe` set to `false`. + +TODO: Is this the correct context and target for a shadow root? </div> <div algorithm="ShadowRoot-setHTML" export> {{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTML</dfn>(|html|, |options|?)</dfn> method steps are: -1. [=Safely set HTML=] using [=this=], |html|, and |options|. +1. [=Set and filter HTML=] using [=this=] (as target), [=this=] (as context element), + |html|, |options|, and `safe` set to `true`. </div> @@ -189,11 +193,8 @@ The <dfn for="DOM/Document">parseHTML</dfn>(|html|, |options|?) method steps are 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. -NOTE: An actual implementation would presumably merge the two [=sanitize=] calls. </div> - - ## The Configuration Dictionary ## {#config} <pre class=idl> @@ -226,69 +227,22 @@ dictionary SanitizerConfig { sequence&lt;SanitizerAttribute> removeAttributes; boolean comments; + boolean dataAttributes; }; </pre> -## Canonical Configuration ## {#config-canonical} - -For the purpose of specifying these algorithms, we define a <dfn>canonical -configuration</dfn>. These [=canonical configurations=] are meant to be a subset -of allowed configurations, that eliminate redundant ways to express the same thing. - -For example, the regular configuration allows element or attributes to be described -by string containing its name (in a default namespace); by a dictionary with a -name string and an implied namespace, -or by a dictionary with both name and namespace given explicitly. The canonical -configuration allows only the latter form, a dictionary with explicit name and -namespaces. - -The [=canonical configuration=] is chiefly a specification tool that users -do not need to concern themselves with. But it allows us to specify the -Sanitizer operation in two steps: First canonicalize the configuration, and then -have a (simpler) algorithm that will do the actual sanitization. - -<pre class=idl> -dictionary CanonicalSanitizerName { - required DOMString name; - required DOMString _namespace; -}; -dictionary CanonicalSanitizerNameWithAttributes : CanonicalSanitizerName { - sequence&lt;CanonicalSanitizerName> attributes; - sequence&lt;CanonicalSanitizerName> removeAttributes; -}; -dictionary CanonicalSanitizerConfig { - sequence&lt;CanonicalSanitizerNameWithAttributes> elements; - sequence&lt;CanonicalSanitizerName> removeElements; - sequence&lt;CanonicalSanitizerName> replaceWithChildrenElements; - sequence&lt;CanonicalSanitizerName> attributes; - sequence&lt;CanonicalSanitizerName> removeAttributes; - required boolean comments; -}; -</pre> +TODO: The functionality for {{SanitizerConfig/dataAttributes}} was agreed, but +not the name. # Algorithms # {#algorithms} <div algorithm> -To <dfn>unsafely set HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|: - -1. Let |config| be the result of calling [=canonicalize a configuration=] on - |options|[`"sanitizer"`] and `false`. -1. Run [=set and filter HTML=] on |target|, |contextElement|, |html|, and |config|. - -</div> - -<div algorithm> -To <dfn>safely set HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|: +To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} +|target|, an {{Element}} |contextElement|, a [=string=] |html|, and a +[=dictionary=] |options|, and a [=boolean=] flag |safe|, run these steps: 1. Let |config| be the result of calling [=canonicalize a configuration=] on - |options|[`"sanitizer"`] and `true`. -1. Run [=set and filter HTML=] on |target|, |contextElement|, |html|, and |config|. - -</div> - -<div algorithm> -To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=canonical=] |config|, run these steps: - + |options|[`"sanitizer"`] and |safe|. 1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] given |contextElement|, |html|, and `true`. 1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. @@ -302,9 +256,9 @@ To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} <div algorithm="sanitize"> For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a -[=canonical=] {{SanitizerConfig}} |config|, run these steps: +[=SanitizerConfig/canonical=] {{SanitizerConfig}} |config|, run these steps: -1. [=Assert=]: |config| is [=canonical=]. +1. [=Assert=]: |config| is [=SanitizerConfig/canonical=]. 1. Initialize |current| with |node|. 1. [=list/iterate|For each=] |child| in |current|'s [=tree/children=]: 1. [=Assert=]: |child| [=implements=] {{Text}}, {{Comment}}, or {{Element}}. @@ -315,195 +269,282 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a 1. If |child| [=implements=] {{Text}}: 1. Do nothing. 1. else if |child| [=implements=] {{Comment}}: - 1. If |config|'s {{CanonicalSanitizerConfig/comments}} is not `true`: - 1. {{Node/removeChild()}} |child| from |current|. + 1. If |config|'s {{SanitizerConfig/comments}} is not `true`: + 1. [=/remove=] |child|. 1. else if |child| [=implements=] {{Element}}: - 1. Let |element-name| be a {{CanonicalSanitizerName}} with |child|'s + 1. Let |elementName| be a {{SanitizerElementNamespace}} with |child|'s [=Element/local name=] and [=Element/namespace=]. - 1. If |config|[{{CanonicalSanitizerConfig/elements}}] exists and - |config|[{{CanonicalSanitizerConfig/elements}}] does not [=list/contain=] - [|element-name|]: - 1. Call {{Node/removeChild()}} on |child|. - 1. else if |config|[{{CanonicalSanitizerConfig/removeElements}}] exists and - |config|[{{CanonicalSanitizerConfig/removeElements}}] [=list/contains=] - [|element-name|]: - 1. Call {{Node/removeChild()}} on |child|. - 1. If |config|[{{CanonicalSanitizerConfig/replaceWithChildrenElements}}] exists and |config|[{{CanonicalSanitizerConfig/replaceWithChildrenElements}}] [=list/contains=] |element-name|: + 1. If |config|["{{SanitizerConfig/elements}}"] exists and + |config|["{{SanitizerConfig/elements}}"] does not [=list/contain=] + [|elementName|]: + 1. [=/remove=] |child|. + 1. else if |config|["{{SanitizerConfig/removeElements}}"] exists and + |config|["{{SanitizerConfig/removeElements}}"] [=list/contains=] + [|elementName|]: + 1. [=/remove=] |child|. + 1. If |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] exists and |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] [=list/contains=] |elementName|: 1. Call [=sanitize=] on |child| with |config|. - 1. Call {{ParentNode/replaceChildren()}} on |child| with |child|'s - [=tree/children=] as arguments. + 1. Call [=replace all=] with |child|'s [=tree/children=] within |child|. + 1. If |elementName| [=equals=] &laquo;[ `"name"` &rightarrow; `"template"`, + `"namespace"` &rightarrow; [=HTML namespace=] ]&raquo; + 1. Then call [=sanitize=] on |child|'s [=template contents=] with |config|. + 1. If |child| is a [=shadow host=]: + 1. Then call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. 1. [=list/iterate|For each=] |attr| in |current|'s [=Element/attribute list=]: - 1. Let |attr-name| be a {{CanonicalSanitizerName}} with |attr|'s + 1. Let |attrName| be a {{SanitizerAttributeNamespace}} with |attr|'s [=Attr/local name=] and [=Attr/namespace=]. - 1. If |config|[{{CanonicalSanitizerConfig/attributes}}] exists and - |config|[{{CanonicalSanitizerConfig/attributes}}] does not [=list/contain=] - [attr-name|: + 1. If |config|["{{SanitizerConfig/attributes}}"] exists and + |config|["{{SanitizerConfig/attributes}}"] does not [=list/contain=] + |attrName|: + 1. If "data-" is a [=code unit prefix=] of [=Attr/local name=] and + if [=Attr/namespace=] is "" and + if |config|["{{SanitizerConfig/attributes}}"] exists and + if |config|["{{SanitizerConfig/dataAttributes}}"] exists and is `true`: + 1. Do nothing. + 1. Else: + 1. Remove |attr| from |child|. + 1. else if |config|["{{SanitizerConfig/removeAttributes}}"] exists and + |config|["{{SanitizerConfig/removeAttributes}}"] [=list/contains=] + |attrName|: 1. Remove |attr| from |child|. - 1. else if |config|[{{CanonicalSanitizerConfig/removeAttributes}}] exists and - |config|[{{CanonicalSanitizerConfig/removeAttributes}}] [=list/contains=] - [attr-name|: - 1. Remove |attr| from |child|. - 1. If |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|] exists, + 1. If |config|["{{SanitizerConfig/elements}}"][|elementName|] exists, and if - |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/attributes}}] + |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/attributes}}"] exists, and if - |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/attributes}}] - does not [=list/contain=] |attr-name|: + |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/attributes}}"] + does not [=list/contain=] |attrName|: 1. Remove |attr| from |child|. - 1. If |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|] exists, + 1. If |config|["{{SanitizerConfig/elements}}"][|elementName|] exists, and if - |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/removeAttributes}}] + |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] exists, and if - |config|[{{CanonicalSanitizerConfig/elements}}][|element-name|][{{CanonicalSanitizerNameWithAttributes/removeAttributes}}] - [=list/contains=] |attr-name|: - 1. If |child| is a [=Element/shadow host=]: + |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] + [=list/contains=] |attrName|: + 1. Remove |attr| from |child|. + 1. If &laquo;[|elementName|, |attrName|]&raquo; matches an entry in the + [=navigating URL attributes list=], and if |attr|'s [=protocol=] is + `"javascript:"`: + 1. Then remove |attr| from |child|. 1. Call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. 1. else: - 1. Call {{Node/removeChild()}} on |child|. + 1. [=/remove=] |child|. 1. else: 1. [=Assert=]: We shouldn't reach this branch. -TODO: Add "funky elements" / handling of `javascript:`-URLs back in. - </div> ## Configuration Processing ## {#configuration-processing} <div algorithm> -A |config| is <dfn>valid</dfn> if all these conditions are met: - -1. |config| [=conforms=] to {{SanitizerConfig}}. -1. |config| [=map/keys=] contains either {{SanitizerConfig/elements}} or - {{SanitizerConfig/removeElements}}, or neither of them, but not both. -1. |config| [=map/keys=] contains either {{SanitizerConfig/removeAttributes}} - or {{SanitizerConfig/attributes}}, or neither, but not both. -1. If |config|[{{SanitizerConfig/elements}}] exists, then none of its members' - [=map/keys=] contains both {{SanitizerElementNamespaceWithAttributes/attributes}} - and {{SanitizerElementNamespaceWithAttributes/removeAttributes}}. -1. TODO: check that name dictionaries must contain "name" +A |config| is <dfn for="SanitizerConfig">valid</dfn> if all these conditions are met: + +1. |config| is a [=dictionary=] +1. |config|'s [=map/keys|key set=] does not contain both + "{{SanitizerConfig/elements}}" and "{{SanitizerConfig/removeElements}}" +1. |config|'s [=map/keys|key set=] does not contain both + "{{SanitizerConfig/removeAttributes}}" and "{{SanitizerConfig/attributes}}". +1. [=list/iterate|For any=] |key| of &laquo;[ + "{{SanitizerConfig/elements}}", + "{{SanitizerConfig/removeElements}}", + "{{SanitizerConfig/replaceWithChildrenElements}}", + "{{SanitizerConfig/attributes}}", + "{{SanitizerConfig/removeAttributes}}" + ]&raquo; where |config|[|key|] [=map/exists=]: + 1. |config|[|key|] is [=SanitizerNameList/valid=]. +1. If |config|["{{SanitizerConfig/elements}}"] exists, then + [=list/iterate|for any=] |element| in |config|[|key|] that is a [=dictionary=]: + 1. |element| does not contain both + "{{SanitizerElementNamespaceWithAttributes/attributes}}" and + "{{SanitizerElementNamespaceWithAttributes/removeAttributes}}". + 1. If either |element|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] + or |element|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] + [=map/exists=], then it is [=SanitizerNameList/valid=]. </div> <div algorithm> -A |config| is <dfn>canonical</dfn> if all these conditions are met: - -1. |config| is [=valid=]. -1. |config| [=strictly conforms=] to {{CanonicalSanitizerConfig}}. -1. |config|'s [=map/keys|key set=] [=set/equals=] any of: - 1. &laquo; - {{SanitizerConfig/elements}}, - {{SanitizerConfig/attributes}}, - {{SanitizerConfig/comments}} - &raquo; - 1. &laquo; - {{SanitizerConfig/elements}}, - {{SanitizerConfig/replaceWithChildrenElements}}, - {{SanitizerConfig/attributes}}, - {{SanitizerConfig/comments}} - &raquo; - 1. &laquo; - {{SanitizerConfig/removeElements}}, - {{SanitizerConfig/removeAttributes}}, - {{SanitizerConfig/comments}} - &raquo; - 1. &laquo; - {{SanitizerConfig/removeElements}}, - {{SanitizerConfig/removeAttributes}}, - {{SanitizerConfig/replaceWithChildrenElements}}, - {{SanitizerConfig/comments}} - &raquo; -1. TODO: Elements with attributes +A |list| of names is <dfn for="SanitizerNameList">valid</dfn> if all these +conditions are met: + +1. |list| is a [=/list=]. +1. [=list/iterate|For all=] of its members |name|: + 1. |name| is a {{string}} or a [=dictionary=]. + 1. If |name| is a [=dictionary=]: + 1. |name|["{{SanitizerElementNamespace/name}}"] [=map/exists=] and is a {{string}}. </div> +<div algorithm> +A |config| is <dfn for="SanitizerConfig">canonical</dfn> if all these conditions are met: + +1. |config| is [=SanitizerConfig/valid=]. +1. |config|'s [=map/keys|key set=] is a [=set/subset=] of + &laquo;[ + "{{SanitizerConfig/elements}}", + "{{SanitizerConfig/removeElements}}", + "{{SanitizerConfig/replaceWithChildrenElements}}", + "{{SanitizerConfig/attributes}}", + "{{SanitizerConfig/removeAttributes}}", + "{{SanitizerConfig/comments}}", + "{{SanitizerConfig/dataAttributes}}", + "safe" + ]&raquo; +1. |config|'s [=map/keys|key set=] contains either: + 1. both "{{SanitizerConfig/elements}}" and "{{SanitizerConfig/attributes}}", + but neither of + "{{SanitizerConfig/removeElements}}" or "{{SanitizerConfig/removeAttributes}}". + 1. or both + "{{SanitizerConfig/removeElements}}" and "{{SanitizerConfig/removeAttributes}}", + but neither of + "{{SanitizerConfig/elements}}" or "{{SanitizerConfig/attributes}}". +1. For any |key| of &laquo;[ + "{{SanitizerConfig/replaceWithChildrenElements}}", + "{{SanitizerConfig/removeElements}}", + "{{SanitizerConfig/attributes}}", + "{{SanitizerConfig/removeAttributes}}" + ]&raquo; where |config|[|key|] [=map/exists=]: + 1. |config|[|key|] is [=SanitizerNameList/canonical=]. +1. If |config|["{{SanitizerConfig/elements}}"] [=map/exists=]: + 1. |config|["{{SanitizerConfig/elements}}"] is [=SanitizerNameWithAttributesList/canonical=]. +1. For any |key| of &laquo;[ + "{{SanitizerConfig/comments}}", + "{{SanitizerConfig/dataAttributes}}", + "safe" + ]&raquo;: + 1. if |config|[|key|] [=map/exists=], |config|[|key|] is a {{boolean}}. + +</div> + +<div algorithm> +A |list| of names is <dfn for="SanitizerNameList">canonical</dfn> if all these +conditions are met: + +1. |list|[|key|] is a [=/list=]. +1. [=list/iterate|For all=] of its |list|[|key|]'s members |name|: + 1. |name| is a [=dictionary=]. + 1. |name|'s [=map/keys|key set=] [=set/equals=] &laquo;[ + "{{SanitizerElementNamespace/name}}", "{{SanitizerElementNamespace/namespace}}" + ]&raquo; + 1. |name|'s [=map/values=] are [=string=]s. + +</div> + +<div algorithm> +A |list| of names is <dfn for="SanitizerNameWithAttributesList">canonical</dfn> +if all these conditions are met: + +1. |list|[|key|] is a [=/list=]. +1. [=list/iterate|For all=] of its |list|[|key|]'s members |name|: + 1. |name| is a [=dictionary=]. + 1. |name|'s [=map/keys|key set=] [=set/equals=] one of: + 1. &laquo;[ + "{{SanitizerElementNamespace/name}}", + "{{SanitizerElementNamespace/namespace}}" + ]&raquo; + 1. &laquo;[ + "{{SanitizerElementNamespace/name}}", + "{{SanitizerElementNamespace/namespace}}", + "{{SanitizerElementNamespaceWithAttributes/attributes}}" + ]&raquo; + 1. &laquo;[ + "{{SanitizerElementNamespace/name}}", + "{{SanitizerElementNamespace/namespace}}", + "{{SanitizerElementNamespaceWithAttributes/removeAttributes}}" + ]&raquo; + 1. |name|["{{SanitizerElementNamespace/name}}"] and + |name|["{{SanitizerElementNamespace/namespace}}"] are [=string=]s. + 1. |name|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] and + |name|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] + are [=SanitizerNameList/canonical=] if they [=map/exist=]. + +</div> + + <div algorithm> In order to <dfn>canonicalize a configuration</dfn> |config| with a boolean parameter |safe|, run the following steps: TODO: Handle empty |config|. -1. If |config| is not [=valid=], then [=throw=] a {{TypeError}}. +1. If |config| is not [=SanitizerConfig/valid=], then [=throw=] a {{TypeError}}. 1. Let |result| be a new [=dictionary=]. -1. For each |key| of - {{SanitizerConfig/elements}}, - {{SanitizerConfig/removeElements}}, - {{SanitizerConfig/replaceWithChildrenElements}}: +1. For each |key| of &laquo;[ + "{{SanitizerConfig/elements}}", + "{{SanitizerConfig/removeElements}}", + "{{SanitizerConfig/replaceWithChildrenElements}}" ]&raquo;: 1. If |config|[|key|] exists, set |result|[|key|] to the result of running [=canonicalize a sanitizer element list=] on |config|[|key|] with [=HTML namespace=] as the default namespace. -1. For each |key| of - {{SanitizerConfig/attributes}}, - {{SanitizerConfig/removeAttributes}}: +1. For each |key| of &laquo;[ + "{{SanitizerConfig/attributes}}", + "{{SanitizerConfig/removeAttributes}}" ]&raquo;: 1. If |config|[|key|] exists, set |result|[|key|] to the result of running [=canonicalize a sanitizer element list=] on |config|[|key|] with `""` as the default namespace. -1. Set |result|[{{SanitizerConfig/comments}}] to - |config|[{{SanitizerConfig/comments}}]. +1. Set |result|["{{SanitizerConfig/comments}}"] to + |config|["{{SanitizerConfig/comments}}"]. 1. Let |default| be the result of [=canonicalizing a configuration=] for the [=built-in default config=]. 1. If |safe|: - 1. Let |known elements| be an [=ordered set=] of all elements known to the - [[HTML]] specification, where the set members [=strictly conform=] to - {{CanonicalSanitizerName}}. - 1. Let |known attributes| be an [=ordered set=] of all attributes known to the - [[HTML]] specification, where the set members [=strictly conform=] to - {{CanonicalSanitizerName}}. - 1. If |config|[{{SanitizerConfig/elements}}] [=map/exists=]: - 1. Set |result|[{{SanitizerConfig/elements}}] to the - [=intersection complement=] of |result|[{{SanitizerConfig/elements}}] and - the [=intersection complement=] of |known elements| and - |default|[{{SanitizerConfig/elements}}]. - - Note: This sounds more complicated than it is. This the same as the - [=set/intersection=] of |result|[{{SanitizerConfig/elements}}] and - |default|[{{SanitizerConfig/elements}}], except that it also - preserves unknown HTML elements, which a plain [=set/intersection=] - would remove. - 1. If |config|[{{SanitizerConfig/removeElements}}] [=map/exists=]: - 1. Set |result|[{{SanitizerConfig/elements}}] to the - [=intersection complement=] of |default|[{{SanitizerConfig/elements}}] - and |result|[{{SanitizerConfig/removeElements}}]. - 1. [=set/Remove=] {{SanitizerConfig/removeElements}} from |result|. - 1. If neither |config|[{{SanitizerConfig/elements}}] nor - |config|[{{SanitizerConfig/removeElements}}] [=map/exist=]: - 1. Set |result|[{{SanitizerConfig/elements}}] to - |default|[{{SanitizerConfig/elements}}]. - 1. If |config|[{{SanitizerConfig/attributes}}] [=map/exists=]: - 1. Set |result|[{{SanitizerConfig/attributes}}] to the - [=intersection complement=] of |result|[{{SanitizerConfig/attributes}}] and - the [=intersection complement=] attributes |known attributes| and - |default|[{{SanitizerConfig/attributes}}]. - 1. If |config|[{{SanitizerConfig/removeAttributes}}] [=map/exists=]: - 1. Set |result|[{{SanitizerConfig/attributes}}] to the - [=intersection complement=] of |default|[{{SanitizerConfig/attributes}}] - and |result|[{{SanitizerConfig/removeAttributes}}]. - 1. [=set/Remove=] {{SanitizerConfig/removeAttributes}} from |result|. - 1. If neither |config|[{{SanitizerConfig/attributes}}] nor - |config|[{{SanitizerConfig/removeAttributes}}] [=map/exist=]: - 1. Set |result|[{{SanitizerConfig/attributes}}] to - |default|[{{SanitizerConfig/attributes}}]. + 1. If |config|["{{SanitizerConfig/elements}}"] [=map/exists=]: + 1. Let |elementBlockList| be the [=set/difference=] between + [=known elements=] |default|["{{SanitizerConfig/elements}}"]. + + Note: The "natural" way to enforce the default element list would be + to intersect with it. But that would also eliminate any unknown + (i.e., non-HTML supplied element, like &lt;foo&gt;). So we + construct this helper to be able to use it to subtract any "unsafe" + elements. + 1. Set |result|["{{SanitizerConfig/elements}}"] to the + [=set/difference=] of |result|["{{SanitizerConfig/elements}}"] and + |elementBlockList|. + 1. If |config|["{{SanitizerConfig/removeElements}}"] [=map/exists=]: + 1. Set |result|["{{SanitizerConfig/elements}}"] to the + [=set/difference=] of |default|["{{SanitizerConfig/elements}}"] + and |result|["{{SanitizerConfig/removeElements}}"]. + 1. [=set/Remove=] "{{SanitizerConfig/removeElements}}" from |result|. + 1. If neither |config|["{{SanitizerConfig/elements}}"] nor + |config|["{{SanitizerConfig/removeElements}}"] [=map/exist=]: + 1. Set |result|["{{SanitizerConfig/elements}}"] to + |default|["{{SanitizerConfig/elements}}"]. + 1. If |config|["{{SanitizerConfig/attributes}}"] [=map/exists=]: + 1. Let |attributeBlockList| be the [=set/difference=] between + [=known attributes=] and |default|["{{SanitizerConfig/attributes}}"]; + 1. Set |result|["{{SanitizerConfig/attributes}}"] to the + [=set/difference=] of |result|["{{SanitizerConfig/attributes}}"] and + |attributeBlockList|. + 1. If |config|["{{SanitizerConfig/removeAttributes}}"] [=map/exists=]: + 1. Set |result|["{{SanitizerConfig/attributes}}"] to the + [=set/difference=] of |default|["{{SanitizerConfig/attributes}}"] + and |result|["{{SanitizerConfig/removeAttributes}}"]. + 1. [=set/Remove=] "{{SanitizerConfig/removeAttributes}}" from |result|. + 1. If neither |config|["{{SanitizerConfig/attributes}}"] nor + |config|["{{SanitizerConfig/removeAttributes}}"] [=map/exist=]: + 1. Set |result|["{{SanitizerConfig/attributes}}"] to + |default|["{{SanitizerConfig/attributes}}"]. 1. Else (if not |safe|): - 1. If neither |config|[{{SanitizerConfig/elements}}] nor - |config|[{{SanitizerConfig/removeElements}} [=map/exist=]: - 1. Set |result|[{{SanitizerConfig/elements}}] to - |default|[{{SanitizerConfig/elements}}]. - 1. If neither |config|[{{SanitizerConfig/attributes}}] nor - |config|[{{SanitizerConfig/removeAttributes}} [=map/exist=]: - 1. Set |result|[{{SanitizerConfig/attributes}}] to - |default|[{{SanitizerConfig/attributes}}]. -1. [=Assert=]: |result| is [=valid=]. -1. [=Assert=]: |result| is [=canonical=]. + 1. If neither |config|["{{SanitizerConfig/elements}}"] nor + |config|["{{SanitizerConfig/removeElements}}"] [=map/exist=]: + 1. Set |result|["{{SanitizerConfig/elements}}"] to + |default|["{{SanitizerConfig/elements}}"]. + 1. If neither |config|["{{SanitizerConfig/attributes}}"] nor + |config|["{{SanitizerConfig/removeAttributes}}"] [=map/exist=]: + 1. Set |result|["{{SanitizerConfig/attributes}}"] to + |default|["{{SanitizerConfig/attributes}}"]. +1. Set |result|["safe"] to |safe|. +1. [=Assert=]: |result| is [=SanitizerConfig/valid=]. +1. [=Assert=]: |result| is [=SanitizerConfig/canonical=]. 1. Return |result|. </div> <div algorithm> In order to <dfn>canonicalize a sanitizer element list</dfn> |list|, with a -default namespace |default namespace|, run the following steps: +default namespace |defaultNamespace|, run the following steps: 1. Let |result| be a new [=ordered set=]. 2. [=list/iterate|For each=] |name| in |list|, call - [=canonicalize a sanitizer name=] on |name| with |default namespace| and + [=canonicalize a sanitizer name=] on |name| with |defaultNamespace| and [=set/append=] to |result|. 3. Return |result|. @@ -511,63 +552,43 @@ default namespace |default namespace|, run the following steps: <div algorithm> In order to <dfn>canonicalize a sanitizer name</dfn> |name|, with a default -namespace |default namespace|, run the following steps: +namespace |defaultNamespace|, run the following steps: 1. [=Assert=]: |name| is either a {{DOMString}} or a [=dictionary=]. 1. If |name| is a {{DOMString}}: - 1. Return &laquo;[ `"name"` &rightarrow; |name|, `"namespace"` &rightarrow; |default namespace|]&raquo;. + 1. Return &laquo;[ `"name"` &rightarrow; |name|, `"namespace"` &rightarrow; |defaultNamespace|]&raquo;. 1. [=Assert=]: |name| is a [=dictionary=] and |name|["name"] [=map/exists=]. 1. Return &laquo;[ <br> `"name"` &rightarrow; |name|["name"], <br> - `"namespace"` &rightarrow; |name|["namespace"] if it [=map/exists=], otherwise |default namespace| <br> + `"namespace"` &rightarrow; |name|["namespace"] if it [=map/exists=], otherwise |defaultNamespace| <br> ]&raquo;. </div> ## Supporting Algorithms ## {#alg-support} -<div algorithm> -The <dfn>intersection complement</dfn> of two [=ordered sets=] |A| and |B|, is -the result of creating a new [=ordered set=] |set| and, [=list/iterate|for each=] -|item| of |A|, if |B| does not [=set/contain=] item, [=set/appending=] |item| to -|set|. - -Note: [=intersection complement=] is the same as [=set/intersection=], but with the - complement of parameter |B|. -</div> +Set difference (or set subtraction) is a clone of a set A, but with all members +removed that occur in a set B. <div algorithm> -[=Ordered sets=] |A| and |B| are <dfn for=set>equal</dfn> if both |A| is a [=superset=] of -|B| and |B| is a [=superset=] of |A|. +To compute the <dfn for="set">difference</dfn> of two [=ordered sets=] |A| and |B|: -Note: Equality for [=ordered sets=] is equality of its members, but without -regard to order. -</div> +1. Let |set| be a new [=ordered set=]. +1. [=list/iterate|For each=] |item| of |A|: + 1. If |B| does not [=set/contain=] |item|, then [=set/append=] |item| + to |set|. +1. Return |set|. -<div algorithm> -A value |D| <dfn>conforms</dfn> to a -[=dictionary|dictionary definition=] if |D| is a [=map=] and all of |D|'s [=map/entries=] -corrspond to [=dictionary members=], as long as those entries have the correct -types, and there are [=map/entries=] present for any [=dictionary member/required=] or -[=dictionary member/default value|defaulted=] dictionary members, and any [=dictionary=]-typed values [=conform=] to their [=dictionary member=]'s type. - -Note: This largely corresponds to language in [=dictionary=], but re-words this -as a predicate. </div> -<div algorithm> -A value |D| <dfn>strictly conforms</dfn> to a -[=dictionary|dictionary definition=] if - -1. |D| [=conforms=] to the definition, -1. there are no [=map/entries=] present that do not have a corresponding - [=dictionary member=], and -1. [=dictionary=]-valued members [=strictly conform=] to their - [=dictionary member=]'s type. +Equality for [=ordered sets=] is equality of its members, but without +regard to order. +<div algorithm> +[=Ordered sets=] |A| and |B| are <dfn for=set>equal</dfn> if both |A| is a +[=superset=] of |B| and |B| is a [=superset=] of |A|. </div> - ## Defaults ## {#sanitization-defaults} The <dfn>built-in default config</dfn> is as follows: @@ -576,9 +597,73 @@ The <dfn>built-in default config</dfn> is as follows: elements: [....], attributes: [....], comments: true, + safe: true, } ``` +The <dfn>known elements</dfn> are as follows: +``` +[ + { name: "div", namespace: "http://www.w3.org/1999/xhtml"" }, + ... +] +``` + +The <dfn>known attributes</dfn> are as follows: +``` +[ + { name: "class", namespace: "" }, + ... +] +``` + +1. [=Assert=]: [=built-in default config=] is [=SanitizerConfig/canonical=] +1. [=Assert=]: [=built-in default config=]["elements"] is a [=subset=] of [=known elements=]. +1. [=Assert=]: [=built-in default config=]["attributes"] is a [=subset=] of [=known attributes=]. +1. [=Assert=]: &laquo;[ + "elements" &rightarrow; [=known elements=], + "attributes" &rightarrow; [=known attributes=], + "safe" &rightarrow; `false`, + ]&raquo; is [=SanitizerConfig/canonical=]. + +Note: The [=known elements=] and [=known attributes=] should be derived from the + HTML5 specification, rather than being explicitly listed here. Currently, + there are no mechanics to do so. + +<div> +The <dfn>navigating URL attributes list</dfn>, for which "`javascript:`" +navigations are unsafe, are as follows: + +&laquo;[ + <br> + [ + { `"name"` &rightarrow; `"a"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, + { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; "" } + ], + <br> + [ + { `"name"` &rightarrow; `"area"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, + { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; "" } + ], + <br> + [ + { `"name"` &rightarrow; `"form"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, + { `"name"` &rightarrow; `"action"`, `"namespace"` &rightarrow; "" } + ], + <br> + [ + { `"name"` &rightarrow; `"input"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, + { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; "" } + ], + <br> + [ + { `"name"` &rightarrow; `"button"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, + { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; "" } + ], + <br> +]&raquo; +</div> + # Security Considerations # {#security-considerations} From 6cf1f81a1066d7b6ad7dd066c5c44a42ebec41e7 Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Tue, 30 Jan 2024 15:49:43 +0100 Subject: [PATCH 05/11] Add validity condition to not specify the same element/attribute in different lists. --- index.bs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/index.bs b/index.bs index cf7aa07..9816940 100644 --- a/index.bs +++ b/index.bs @@ -359,6 +359,40 @@ A |config| is <dfn for="SanitizerConfig">valid</dfn> if all these conditions are 1. If either |element|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] or |element|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] [=map/exists=], then it is [=SanitizerNameList/valid=]. +1. Let |tmp| be a [=dictionary=], and for any |key| &laquo;[ + "{{SanitizerConfig/elements}}", + "{{SanitizerConfig/removeElements}}", + "{{SanitizerConfig/replaceWithChildrenElements}}", + "{{SanitizerConfig/attributes}}", + "{{SanitizerConfig/removeAttributes}}" + ]&raquo; |tmp|[|key|] is set to the result of [=canonicalize a sanitizer + element list=] called on |config|[|key|], and [=HTML namespace=] as default + namespace for the element lists, and "" as default namespace for the + attributes lists. + + Given theses canonlicalized name lists, all of the following conditions hold: + + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/elements}}"] and + |tmp|["{{SanitizerConfig/removeElements}}"] + is empty. + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/removeElements}}"] + |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] + is empty. + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] and + |tmp|["{{SanitizerConfig/elements}}"] + is empty. + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/attributes}}"] and + |tmp|["{{SanitizerConfig/removeAttributes}}"] + is empty. + + Note: The intent here is to detect duplicates, but without regard of + whether the string shortcut syntax or the explicit dictionary + syntax is used. An implementation might well do this without + explicitly canonicalizing the lists at this point. </div> From bee5caa3cbe579b5291b0a9f380aaa2c60d5cc8f Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Tue, 20 Feb 2024 18:03:43 +0100 Subject: [PATCH 06/11] Feedback from Feb 7 meeting (plus fixing indent issues). --- index.bs | 188 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 108 insertions(+), 80 deletions(-) diff --git a/index.bs b/index.bs index 9816940..048956f 100644 --- a/index.bs +++ b/index.bs @@ -175,6 +175,7 @@ The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|?) method ste 1. If |options| is set: 1. Let |config| be the result of calling [=canonicalize a configuration=] on |options|[`"sanitizer"`] and `false`. +1. If |config| exists: 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. @@ -241,13 +242,18 @@ To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|, and a [=boolean=] flag |safe|, run these steps: +1. If |safe| and |contextElement|'s [=Element/local name=] is `"script"` and + |contextElement|'s [=Element/namespace=] is the [=HTML namespace=] or the + [=SVG namespace=]: + 1. Return. 1. Let |config| be the result of calling [=canonicalize a configuration=] on |options|[`"sanitizer"`] and |safe|. 1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] given |contextElement|, |html|, and `true`. 1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. 1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] |node| to |fragment|. -1. Run [=sanitize=] on |fragment| using |config|. +1. If |config| exists: + 1. Run [=sanitize=] on |fragment| using |config|. 1. [=Replace all=] with |fragment| within |target|. </div> @@ -264,25 +270,26 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a 1. [=Assert=]: |child| [=implements=] {{Text}}, {{Comment}}, or {{Element}}. Note: Currently, this algorithm is only be called on output of the HTML - parser, for which this assertion should hold. If this is to be - generalized, this algorithm needs to be re-examined. + parser for which this assertion should hold. If in the future + this algorithm will be used in different contexts, this assumption + needs to be re-examined. 1. If |child| [=implements=] {{Text}}: 1. Do nothing. 1. else if |child| [=implements=] {{Comment}}: 1. If |config|'s {{SanitizerConfig/comments}} is not `true`: 1. [=/remove=] |child|. - 1. else if |child| [=implements=] {{Element}}: + 1. else: 1. Let |elementName| be a {{SanitizerElementNamespace}} with |child|'s [=Element/local name=] and [=Element/namespace=]. 1. If |config|["{{SanitizerConfig/elements}}"] exists and - |config|["{{SanitizerConfig/elements}}"] does not [=list/contain=] + |config|["{{SanitizerConfig/elements}}"] does not [=SanitizerConfig/contain=] [|elementName|]: 1. [=/remove=] |child|. 1. else if |config|["{{SanitizerConfig/removeElements}}"] exists and - |config|["{{SanitizerConfig/removeElements}}"] [=list/contains=] + |config|["{{SanitizerConfig/removeElements}}"] [=SanitizerConfig/contains=] [|elementName|]: 1. [=/remove=] |child|. - 1. If |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] exists and |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] [=list/contains=] |elementName|: + 1. If |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] exists and |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] [=SanitizerConfig/contains=] |elementName|: 1. Call [=sanitize=] on |child| with |config|. 1. Call [=replace all=] with |child|'s [=tree/children=] within |child|. 1. If |elementName| [=equals=] &laquo;[ `"name"` &rightarrow; `"template"`, @@ -294,17 +301,17 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a 1. Let |attrName| be a {{SanitizerAttributeNamespace}} with |attr|'s [=Attr/local name=] and [=Attr/namespace=]. 1. If |config|["{{SanitizerConfig/attributes}}"] exists and - |config|["{{SanitizerConfig/attributes}}"] does not [=list/contain=] + |config|["{{SanitizerConfig/attributes}}"] does not [=SanitizerConfig/contain=] |attrName|: 1. If "data-" is a [=code unit prefix=] of [=Attr/local name=] and - if [=Attr/namespace=] is "" and + if [=Attr/namespace=] is `null` and if |config|["{{SanitizerConfig/attributes}}"] exists and if |config|["{{SanitizerConfig/dataAttributes}}"] exists and is `true`: 1. Do nothing. 1. Else: 1. Remove |attr| from |child|. 1. else if |config|["{{SanitizerConfig/removeAttributes}}"] exists and - |config|["{{SanitizerConfig/removeAttributes}}"] [=list/contains=] + |config|["{{SanitizerConfig/removeAttributes}}"] [=SanitizerConfig/contains=] |attrName|: 1. Remove |attr| from |child|. 1. If |config|["{{SanitizerConfig/elements}}"][|elementName|] exists, @@ -312,14 +319,14 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/attributes}}"] exists, and if |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/attributes}}"] - does not [=list/contain=] |attrName|: + does not [=SanitizerConfig/contain=] |attrName|: 1. Remove |attr| from |child|. 1. If |config|["{{SanitizerConfig/elements}}"][|elementName|] exists, and if |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] exists, and if |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] - [=list/contains=] |attrName|: + [=SanitizerConfig/contains=] |attrName|: 1. Remove |attr| from |child|. 1. If &laquo;[|elementName|, |attrName|]&raquo; matches an entry in the [=navigating URL attributes list=], and if |attr|'s [=protocol=] is @@ -328,8 +335,6 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a 1. Call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. 1. else: 1. [=/remove=] |child|. - 1. else: - 1. [=Assert=]: We shouldn't reach this branch. </div> @@ -339,9 +344,9 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a A |config| is <dfn for="SanitizerConfig">valid</dfn> if all these conditions are met: 1. |config| is a [=dictionary=] -1. |config|'s [=map/keys|key set=] does not contain both +1. |config|'s [=map/keys|key set=] does not [=list/contain=] both "{{SanitizerConfig/elements}}" and "{{SanitizerConfig/removeElements}}" -1. |config|'s [=map/keys|key set=] does not contain both +1. |config|'s [=map/keys|key set=] does not [=list/contain=] both "{{SanitizerConfig/removeAttributes}}" and "{{SanitizerConfig/attributes}}". 1. [=list/iterate|For any=] |key| of &laquo;[ "{{SanitizerConfig/elements}}", @@ -353,46 +358,55 @@ A |config| is <dfn for="SanitizerConfig">valid</dfn> if all these conditions are 1. |config|[|key|] is [=SanitizerNameList/valid=]. 1. If |config|["{{SanitizerConfig/elements}}"] exists, then [=list/iterate|for any=] |element| in |config|[|key|] that is a [=dictionary=]: - 1. |element| does not contain both + 1. |element| does not [=list/contain=] both "{{SanitizerElementNamespaceWithAttributes/attributes}}" and "{{SanitizerElementNamespaceWithAttributes/removeAttributes}}". 1. If either |element|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] or |element|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] [=map/exists=], then it is [=SanitizerNameList/valid=]. -1. Let |tmp| be a [=dictionary=], and for any |key| &laquo;[ - "{{SanitizerConfig/elements}}", - "{{SanitizerConfig/removeElements}}", - "{{SanitizerConfig/replaceWithChildrenElements}}", - "{{SanitizerConfig/attributes}}", - "{{SanitizerConfig/removeAttributes}}" - ]&raquo; |tmp|[|key|] is set to the result of [=canonicalize a sanitizer - element list=] called on |config|[|key|], and [=HTML namespace=] as default - namespace for the element lists, and "" as default namespace for the - attributes lists. - - Given theses canonlicalized name lists, all of the following conditions hold: - - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/elements}}"] and - |tmp|["{{SanitizerConfig/removeElements}}"] - is empty. - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/removeElements}}"] - |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] - is empty. - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] and - |tmp|["{{SanitizerConfig/elements}}"] - is empty. - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/attributes}}"] and - |tmp|["{{SanitizerConfig/removeAttributes}}"] - is empty. - - Note: The intent here is to detect duplicates, but without regard of - whether the string shortcut syntax or the explicit dictionary - syntax is used. An implementation might well do this without - explicitly canonicalizing the lists at this point. + 1. Let |tmp| be a [=dictionary=], and for any |key| &laquo;[ + "{{SanitizerConfig/elements}}", + "{{SanitizerConfig/removeElements}}", + "{{SanitizerConfig/replaceWithChildrenElements}}", + "{{SanitizerConfig/attributes}}", + "{{SanitizerConfig/removeAttributes}}" + ]&raquo; |tmp|[|key|] is set to the result of [=canonicalize a sanitizer + element list=] called on |config|[|key|], and [=HTML namespace=] as default + namespace for the element lists, and `null` as default namespace for the + attributes lists. + + Note: The intent here is to assert about list erlements, but without regard + of whether the string shortcut syntax or the explicit dictionary + syntax is used. For example, having "img" in `elements` and + `{ name: "img" }` in `removeElements`. An implementation might well + do this without explicitly canonicalizing the lists at this point. + + 1. Given theses canonlicalized name lists, all of the following conditions hold: + + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/elements}}"] and + |tmp|["{{SanitizerConfig/removeElements}}"] + is empty. + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/removeElements}}"] + |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] + is empty. + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] and + |tmp|["{{SanitizerConfig/elements}}"] + is empty. + 1. The [=set/intersection=] between + |tmp|["{{SanitizerConfig/attributes}}"] and + |tmp|["{{SanitizerConfig/removeAttributes}}"] + is empty. + + 1. Let |tmpattrs| be |tmp|["{{SanitizerConfig/attributes}}"] if it exists, + and otherwise [=built-in default config=]["{{SanitizerConfig/attributes}}"]. + 1. [=list/iterate|For any=] |item| in |tmp|["{{SanitizerConfig/elements}}"]: + 1. If either |item|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] + or |item|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] + exists: + 1. Then the [=set/difference=] between it and |tmpattrs| is empty. </div> @@ -420,10 +434,9 @@ A |config| is <dfn for="SanitizerConfig">canonical</dfn> if all these conditions "{{SanitizerConfig/attributes}}", "{{SanitizerConfig/removeAttributes}}", "{{SanitizerConfig/comments}}", - "{{SanitizerConfig/dataAttributes}}", - "safe" + "{{SanitizerConfig/dataAttributes}}" ]&raquo; -1. |config|'s [=map/keys|key set=] contains either: +1. |config|'s [=map/keys|key set=] [=list/contains=] either: 1. both "{{SanitizerConfig/elements}}" and "{{SanitizerConfig/attributes}}", but neither of "{{SanitizerConfig/removeElements}}" or "{{SanitizerConfig/removeAttributes}}". @@ -442,8 +455,7 @@ A |config| is <dfn for="SanitizerConfig">canonical</dfn> if all these conditions 1. |config|["{{SanitizerConfig/elements}}"] is [=SanitizerNameWithAttributesList/canonical=]. 1. For any |key| of &laquo;[ "{{SanitizerConfig/comments}}", - "{{SanitizerConfig/dataAttributes}}", - "safe" + "{{SanitizerConfig/dataAttributes}}" ]&raquo;: 1. if |config|[|key|] [=map/exists=], |config|[|key|] is a {{boolean}}. @@ -498,8 +510,20 @@ if all these conditions are met: In order to <dfn>canonicalize a configuration</dfn> |config| with a boolean parameter |safe|, run the following steps: -TODO: Handle empty |config|. +Note: The initial set of [=assert=]s assert properties of the built-in + constants, like the [=built-in default config|defaults=] and + the lists of known [=known elements|elements=] and + [=known attributes|attributes=]. +1. [=Assert=]: [=built-in default config=] is [=SanitizerConfig/canonical=]. +1. [=Assert=]: [=built-in default config=]["elements"] is a [=subset=] of [=known elements=]. +1. [=Assert=]: [=built-in default config=]["attributes"] is a [=subset=] of [=known attributes=]. +1. [=Assert=]: &laquo;[ + "elements" &rightarrow; [=known elements=], + "attributes" &rightarrow; [=known attributes=], + ]&raquo; is [=SanitizerConfig/canonical=]. +1. If |config| is empty is not |safe|: + 1. Return. 1. If |config| is not [=SanitizerConfig/valid=], then [=throw=] a {{TypeError}}. 1. Let |result| be a new [=dictionary=]. 1. For each |key| of &laquo;[ @@ -513,7 +537,7 @@ TODO: Handle empty |config|. "{{SanitizerConfig/attributes}}", "{{SanitizerConfig/removeAttributes}}" ]&raquo;: 1. If |config|[|key|] exists, set |result|[|key|] to the result of running - [=canonicalize a sanitizer element list=] on |config|[|key|] with `""` as + [=canonicalize a sanitizer element list=] on |config|[|key|] with `null` as the default namespace. 1. Set |result|["{{SanitizerConfig/comments}}"] to |config|["{{SanitizerConfig/comments}}"]. @@ -565,7 +589,6 @@ TODO: Handle empty |config|. |config|["{{SanitizerConfig/removeAttributes}}"] [=map/exist=]: 1. Set |result|["{{SanitizerConfig/attributes}}"] to |default|["{{SanitizerConfig/attributes}}"]. -1. Set |result|["safe"] to |safe|. 1. [=Assert=]: |result| is [=SanitizerConfig/valid=]. 1. [=Assert=]: |result| is [=SanitizerConfig/canonical=]. 1. Return |result|. @@ -601,10 +624,21 @@ namespace |defaultNamespace|, run the following steps: ## Supporting Algorithms ## {#alg-support} -Set difference (or set subtraction) is a clone of a set A, but with all members -removed that occur in a set B. +<div algorithm> +For the [=canonicalize a sanitizer name|canonicalized=] +{{SanitizerElementNamespace|element}} and {{SanitizerAttributeNamespace|attribute name}} lists +used in this spec, list membership is based on matching both `"name"` and `"namespace"` +entries: +A Sanitizer name |list| <dfn for="SanitizerConfig">contains</dfn> an |item| +if there exists an |entry| of |list| that is an [=ordered map=], and where +|item|["name"] [=equals=] |entry|["name"] and +|item|["namespace"] [=equals=] |entry|["namespace"]. + +</div> <div algorithm> +Set difference (or set subtraction) is a clone of a set A, but with all members +removed that occur in a set B: To compute the <dfn for="set">difference</dfn> of two [=ordered sets=] |A| and |B|: 1. Let |set| be a new [=ordered set=]. @@ -615,16 +649,19 @@ To compute the <dfn for="set">difference</dfn> of two [=ordered sets=] |A| and | </div> -Equality for [=ordered sets=] is equality of its members, but without -regard to order. - <div algorithm> +Equality for [=ordered sets=] is equality of its members, but without +regard to order: [=Ordered sets=] |A| and |B| are <dfn for=set>equal</dfn> if both |A| is a [=superset=] of |B| and |B| is a [=superset=] of |A|. + </div> ## Defaults ## {#sanitization-defaults} +Note: The defaults should follow a certain form, which is checked for at the + beginning of [=canonicalize a configuration=]. + The <dfn>built-in default config</dfn> is as follows: ``` { @@ -638,7 +675,7 @@ The <dfn>built-in default config</dfn> is as follows: The <dfn>known elements</dfn> are as follows: ``` [ - { name: "div", namespace: "http://www.w3.org/1999/xhtml"" }, + { name: "div", namespace: "http://www.w3.org/1999/xhtml" }, ... ] ``` @@ -646,20 +683,11 @@ The <dfn>known elements</dfn> are as follows: The <dfn>known attributes</dfn> are as follows: ``` [ - { name: "class", namespace: "" }, + { name: "class", namespace: null }, ... ] ``` -1. [=Assert=]: [=built-in default config=] is [=SanitizerConfig/canonical=] -1. [=Assert=]: [=built-in default config=]["elements"] is a [=subset=] of [=known elements=]. -1. [=Assert=]: [=built-in default config=]["attributes"] is a [=subset=] of [=known attributes=]. -1. [=Assert=]: &laquo;[ - "elements" &rightarrow; [=known elements=], - "attributes" &rightarrow; [=known attributes=], - "safe" &rightarrow; `false`, - ]&raquo; is [=SanitizerConfig/canonical=]. - Note: The [=known elements=] and [=known attributes=] should be derived from the HTML5 specification, rather than being explicitly listed here. Currently, there are no mechanics to do so. @@ -672,27 +700,27 @@ navigations are unsafe, are as follows: <br> [ { `"name"` &rightarrow; `"a"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; "" } + { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; `null` } ], <br> [ { `"name"` &rightarrow; `"area"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; "" } + { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; `null` } ], <br> [ { `"name"` &rightarrow; `"form"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"action"`, `"namespace"` &rightarrow; "" } + { `"name"` &rightarrow; `"action"`, `"namespace"` &rightarrow; `null` } ], <br> [ { `"name"` &rightarrow; `"input"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; "" } + { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; `null` } ], <br> [ { `"name"` &rightarrow; `"button"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; "" } + { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; `null` } ], <br> ]&raquo; From e4d3ebeb516b2dfb913ecffb763b9790db9bc15d Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Wed, 21 Feb 2024 16:08:34 +0100 Subject: [PATCH 07/11] Quotes should be outside of the code. --- index.bs | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/index.bs b/index.bs index 048956f..d5e2463 100644 --- a/index.bs +++ b/index.bs @@ -174,7 +174,7 @@ The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|?) method ste 1. [=Parse HTML=] from a string given |document| and |html|. 1. If |options| is set: 1. Let |config| be the result of calling [=canonicalize a configuration=] on - |options|[`"sanitizer"`] and `false`. + |options|["`sanitizer`"] and `false`. 1. If |config| exists: 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. @@ -190,7 +190,7 @@ The <dfn for="DOM/Document">parseHTML</dfn>(|html|, |options|?) method steps are 1. Set |document|'s [=allow declarative shadow roots=] to `true`. 1. [=Parse HTML=] from a string given |document| and |html|. 1. Let |config| be the result of calling [=canonicalize a configuration=] on - |options|[`"sanitizer"`] and `true`. + |options|["`sanitizer`"] and `true`. 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. @@ -242,12 +242,12 @@ To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a [=dictionary=] |options|, and a [=boolean=] flag |safe|, run these steps: -1. If |safe| and |contextElement|'s [=Element/local name=] is `"script"` and +1. If |safe| and |contextElement|'s [=Element/local name=] is "`script`" and |contextElement|'s [=Element/namespace=] is the [=HTML namespace=] or the [=SVG namespace=]: 1. Return. 1. Let |config| be the result of calling [=canonicalize a configuration=] on - |options|[`"sanitizer"`] and |safe|. + |options|["`sanitizer`"] and |safe|. 1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] given |contextElement|, |html|, and `true`. 1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. @@ -292,8 +292,8 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a 1. If |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] exists and |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] [=SanitizerConfig/contains=] |elementName|: 1. Call [=sanitize=] on |child| with |config|. 1. Call [=replace all=] with |child|'s [=tree/children=] within |child|. - 1. If |elementName| [=equals=] &laquo;[ `"name"` &rightarrow; `"template"`, - `"namespace"` &rightarrow; [=HTML namespace=] ]&raquo; + 1. If |elementName| [=equals=] &laquo;[ "`name`" &rightarrow; "`template`", + "`namespace`" &rightarrow; [=HTML namespace=] ]&raquo; 1. Then call [=sanitize=] on |child|'s [=template contents=] with |config|. 1. If |child| is a [=shadow host=]: 1. Then call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. @@ -330,7 +330,7 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a 1. Remove |attr| from |child|. 1. If &laquo;[|elementName|, |attrName|]&raquo; matches an entry in the [=navigating URL attributes list=], and if |attr|'s [=protocol=] is - `"javascript:"`: + "`javascript:`": 1. Then remove |attr| from |child|. 1. Call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. 1. else: @@ -613,11 +613,11 @@ namespace |defaultNamespace|, run the following steps: 1. [=Assert=]: |name| is either a {{DOMString}} or a [=dictionary=]. 1. If |name| is a {{DOMString}}: - 1. Return &laquo;[ `"name"` &rightarrow; |name|, `"namespace"` &rightarrow; |defaultNamespace|]&raquo;. + 1. Return &laquo;[ "`name`" &rightarrow; |name|, "`namespace`" &rightarrow; |defaultNamespace|]&raquo;. 1. [=Assert=]: |name| is a [=dictionary=] and |name|["name"] [=map/exists=]. 1. Return &laquo;[ <br> - `"name"` &rightarrow; |name|["name"], <br> - `"namespace"` &rightarrow; |name|["namespace"] if it [=map/exists=], otherwise |defaultNamespace| <br> + "`name`" &rightarrow; |name|["name"], <br> + "`namespace`" &rightarrow; |name|["namespace"] if it [=map/exists=], otherwise |defaultNamespace| <br> ]&raquo;. </div> @@ -627,7 +627,7 @@ namespace |defaultNamespace|, run the following steps: <div algorithm> For the [=canonicalize a sanitizer name|canonicalized=] {{SanitizerElementNamespace|element}} and {{SanitizerAttributeNamespace|attribute name}} lists -used in this spec, list membership is based on matching both `"name"` and `"namespace"` +used in this spec, list membership is based on matching both "`name`" and "`namespace`" entries: A Sanitizer name |list| <dfn for="SanitizerConfig">contains</dfn> an |item| if there exists an |entry| of |list| that is an [=ordered map=], and where @@ -699,28 +699,28 @@ navigations are unsafe, are as follows: &laquo;[ <br> [ - { `"name"` &rightarrow; `"a"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; `null` } + { "`name`" &rightarrow; "`a`", "`namespace`" &rightarrow; "[=HTML namespace=]" }, + { "`name`" &rightarrow; "`href`", "`namespace`" &rightarrow; `null` } ], <br> [ - { `"name"` &rightarrow; `"area"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"href"`, `"namespace"` &rightarrow; `null` } + { "`name`" &rightarrow; "`area`", "`namespace`" &rightarrow; "[=HTML namespace=]" }, + { "`name`" &rightarrow; "`href`", "`namespace`" &rightarrow; `null` } ], <br> [ - { `"name"` &rightarrow; `"form"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"action"`, `"namespace"` &rightarrow; `null` } + { "`name`" &rightarrow; "`form`", "`namespace`" &rightarrow; "[=HTML namespace=]" }, + { "`name`" &rightarrow; "`action`", "`namespace`" &rightarrow; `null` } ], <br> [ - { `"name"` &rightarrow; `"input"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; `null` } + { "`name`" &rightarrow; "`input`", "`namespace`" &rightarrow; "[=HTML namespace=]" }, + { "`name`" &rightarrow; "`formaction`", "`namespace`" &rightarrow; `null` } ], <br> [ - { `"name"` &rightarrow; `"button"`, `"namespace"` &rightarrow; "[=HTML namespace=]" }, - { `"name"` &rightarrow; `"formaction"`, `"namespace"` &rightarrow; `null` } + { "`name`" &rightarrow; "`button`", "`namespace`" &rightarrow; "[=HTML namespace=]" }, + { "`name`" &rightarrow; "`formaction`", "`namespace`" &rightarrow; `null` } ], <br> ]&raquo; From 24e6b7e46ba33499414d306ad86515cd0aa997ee Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Tue, 5 Mar 2024 18:58:47 +0100 Subject: [PATCH 08/11] Minor edit --- index.bs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/index.bs b/index.bs index d5e2463..0f5cdd8 100644 --- a/index.bs +++ b/index.bs @@ -232,9 +232,6 @@ dictionary SanitizerConfig { }; </pre> -TODO: The functionality for {{SanitizerConfig/dataAttributes}} was agreed, but -not the name. - # Algorithms # {#algorithms} <div algorithm> @@ -668,7 +665,6 @@ The <dfn>built-in default config</dfn> is as follows: elements: [....], attributes: [....], comments: true, - safe: true, } ``` From c33f085b8cebb2705c98599fe49ab17e20928c8e Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Fri, 15 Mar 2024 18:15:21 +0100 Subject: [PATCH 09/11] Review comments --- index.bs | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/index.bs b/index.bs index 0f5cdd8..e983127 100644 --- a/index.bs +++ b/index.bs @@ -113,19 +113,19 @@ partial interface Element { </pre> <div algorithm="DOM-Element-setHTMLUnsafe" export> -{{Element}}'s <dfn for="DOM/Element">setHTMLUnsafe</dfn>(|html|, |options|?) method steps are: +{{Element}}'s <dfn for="DOM/Element">setHTMLUnsafe</dfn>(|html|, |options|) method steps are: 1. Let |target| be |this|'s [=template contents=] if [=this=] is {{HTMLTemplateElement|template}} element; otherwise |this|. -1. [=Set and filter HTML=] given |target|, [=this=], |html|, |options|, and `safe` set to `false`. +1. [=Set and filter HTML=] given |target|, [=this=], |html|, |options|, and false. </div> <div algorithm="DOM-Element-setHTML" export> -{{Element}}'s <dfn for="DOM/Element">setHTML</dfn>(|html|, |options|?) method steps are: +{{Element}}'s <dfn for="DOM/Element">setHTML</dfn>(|html|, |options|) method steps are: 1. Let |target| be |this|'s [=template contents=] if [=this=] is a {{HTMLTemplateElement|template}}; otherwise |this|. -1. [=Set and filter HTML=] given |target|, [=this=], |html|, |options|, and `safe` set to `true`. +1. [=Set and filter HTML=] given |target|, [=this=], |html|, |options|, and true. </div> @@ -139,20 +139,19 @@ partial interface ShadowRoot { These methods are mirrored on the {{ShadowRoot}}: <div algorithm="ShadowRoot-setHTMLUnsafe" export> -{{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTMLUnsafe</dfn>(|html|, |options|?) method steps are: +{{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTMLUnsafe</dfn>(|html|, |options|) method steps are: -1. [=Set and filter HTML=] using [=this=] (as target), [=this=] (as context element), - |html|, |options|, and `safe` set to `false`. - -TODO: Is this the correct context and target for a shadow root? +1. [=Set and filter HTML=] using [=this=], + [=this=]'s [=shadow host=] (as context element), + |html|, |options|, and false. </div> <div algorithm="ShadowRoot-setHTML" export> -{{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTML</dfn>(|html|, |options|?)</dfn> method steps are: +{{ShadowRoot}}'s <dfn for="DOM/ShadowRoot">setHTML</dfn>(|html|, |options|)</dfn> method steps are: 1. [=Set and filter HTML=] using [=this=] (as target), [=this=] (as context element), - |html|, |options|, and `safe` set to `true`. + |html|, |options|, and true. </div> @@ -166,15 +165,14 @@ partial interface Document { </pre> <div algorithm="parseHTMLUnsafe" export> -The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|?) method steps are: +The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|) method steps are: 1. Let |document| be a new {{Document}}, whose [=Document/content type=] is "text/html". Note: Since document does not have a browsing context, scripting is disabled. -1. Set |document|'s [=allow declarative shadow roots=] to `true`. +1. Set |document|'s [=allow declarative shadow roots=] to true. 1. [=Parse HTML=] from a string given |document| and |html|. -1. If |options| is set: 1. Let |config| be the result of calling [=canonicalize a configuration=] on - |options|["`sanitizer`"] and `false`. + |options|["`sanitizer`"] and false. 1. If |config| exists: 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. @@ -183,14 +181,14 @@ The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|?) method ste <div algorithm="parseHTML" export> -The <dfn for="DOM/Document">parseHTML</dfn>(|html|, |options|?) method steps are: +The <dfn for="DOM/Document">parseHTML</dfn>(|html|, |options|) method steps are: 1. Let |document| be a new {{Document}}, whose [=Document/content type=] is "text/html". Note: Since document does not have a browsing context, scripting is disabled. -1. Set |document|'s [=allow declarative shadow roots=] to `true`. +1. Set |document|'s [=allow declarative shadow roots=] to true. 1. [=Parse HTML=] from a string given |document| and |html|. 1. Let |config| be the result of calling [=canonicalize a configuration=] on - |options|["`sanitizer`"] and `true`. + |options|["`sanitizer`"] and true. 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. @@ -246,7 +244,7 @@ To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} 1. Let |config| be the result of calling [=canonicalize a configuration=] on |options|["`sanitizer`"] and |safe|. 1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] - given |contextElement|, |html|, and `true`. + given |contextElement|, |html|, and true. 1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. 1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] |node| to |fragment|. 1. If |config| exists: @@ -262,7 +260,7 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a [=SanitizerConfig/canonical=] {{SanitizerConfig}} |config|, run these steps: 1. [=Assert=]: |config| is [=SanitizerConfig/canonical=]. -1. Initialize |current| with |node|. +1. Let |current| be |node|. 1. [=list/iterate|For each=] |child| in |current|'s [=tree/children=]: 1. [=Assert=]: |child| [=implements=] {{Text}}, {{Comment}}, or {{Element}}. @@ -271,9 +269,9 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a this algorithm will be used in different contexts, this assumption needs to be re-examined. 1. If |child| [=implements=] {{Text}}: - 1. Do nothing. + 1. [=continue=]. 1. else if |child| [=implements=] {{Comment}}: - 1. If |config|'s {{SanitizerConfig/comments}} is not `true`: + 1. If |config|'s {{SanitizerConfig/comments}} is not true: 1. [=/remove=] |child|. 1. else: 1. Let |elementName| be a {{SanitizerElementNamespace}} with |child|'s @@ -302,10 +300,7 @@ For the main <dfn>sanitize</dfn> operation, using a {{ParentNode}} |node|, a |attrName|: 1. If "data-" is a [=code unit prefix=] of [=Attr/local name=] and if [=Attr/namespace=] is `null` and - if |config|["{{SanitizerConfig/attributes}}"] exists and - if |config|["{{SanitizerConfig/dataAttributes}}"] exists and is `true`: - 1. Do nothing. - 1. Else: + if |config|["{{SanitizerConfig/dataAttributes}}"] exists and is false: 1. Remove |attr| from |child|. 1. else if |config|["{{SanitizerConfig/removeAttributes}}"] exists and |config|["{{SanitizerConfig/removeAttributes}}"] [=SanitizerConfig/contains=] From b945edb14c7bf79c47d7a26eb2bc67ea7b1e4ab8 Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Mon, 18 Mar 2024 16:43:14 +0100 Subject: [PATCH 10/11] Address IDL validation errors. --- index.bs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/index.bs b/index.bs index e983127..8649961 100644 --- a/index.bs +++ b/index.bs @@ -107,8 +107,8 @@ markup, and an optional configuration. <pre class=idl> partial interface Element { - [CEReactions] undefined setHTMLUnsafe(DOMString html, optional SanitizerConfig config); - [CEReactions] undefined setHTML(DOMString html, optional SanitizerConfig config); + [CEReactions] undefined setHTMLUnsafe(DOMString html, optional SanitizerConfig config = {}); + [CEReactions] undefined setHTML(DOMString html, optional SanitizerConfig config = {}); }; </pre> @@ -131,8 +131,8 @@ partial interface Element { <pre class=idl> partial interface ShadowRoot { - [CEReactions] undefined setHTMLUnsafe(DOMString html, optional SanitizerConfig config); - [CEReactions] undefined setHTML(DOMString html, optional SanitizerConfig config); + [CEReactions] undefined setHTMLUnsafe(DOMString html, optional SanitizerConfig config = {}); + [CEReactions] undefined setHTML(DOMString html, optional SanitizerConfig config = {}); }; </pre> @@ -159,8 +159,8 @@ The {{Document}} interface gains two new methods which parse an entire {{Documen <pre class=idl> partial interface Document { - static Document parseHTMLUnsafe(DOMString html, optional SanitizerConfig config); - static Document parseHTML(DOMString html, optional SanitizerConfig config); + static Document parseHTMLUnsafe(DOMString html, optional SanitizerConfig config = {}); + static Document parseHTML(DOMString html, optional SanitizerConfig config = {}); }; </pre> From 1138f3ca52d766aa5bc090df830338fd70d4966a Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim <vogelheim@chromium.org> Date: Mon, 18 Mar 2024 18:42:20 +0100 Subject: [PATCH 11/11] More review feedback. --- index.bs | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/index.bs b/index.bs index 8649961..18f2078 100644 --- a/index.bs +++ b/index.bs @@ -168,13 +168,14 @@ partial interface Document { The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|) method steps are: 1. Let |document| be a new {{Document}}, whose [=Document/content type=] is "text/html". - Note: Since document does not have a browsing context, scripting is disabled. + + Note: Since |document| does not have a browsing context, scripting is disabled. 1. Set |document|'s [=allow declarative shadow roots=] to true. 1. [=Parse HTML=] from a string given |document| and |html|. 1. Let |config| be the result of calling [=canonicalize a configuration=] on |options|["`sanitizer`"] and false. -1. If |config| exists: - 1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. +1. If |config| is not [=list/empty=], + then call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. 1. Return |document|. </div> @@ -184,7 +185,8 @@ The <dfn for="DOM/Document">parseHTMLUnsafe</dfn>(|html|, |options|) method step The <dfn for="DOM/Document">parseHTML</dfn>(|html|, |options|) method steps are: 1. Let |document| be a new {{Document}}, whose [=Document/content type=] is "text/html". - Note: Since document does not have a browsing context, scripting is disabled. + + Note: Since |document| does not have a browsing context, scripting is disabled. 1. Set |document|'s [=allow declarative shadow roots=] to true. 1. [=Parse HTML=] from a string given |document| and |html|. 1. Let |config| be the result of calling [=canonicalize a configuration=] on @@ -235,20 +237,18 @@ dictionary SanitizerConfig { <div algorithm> To <dfn>set and filter HTML</dfn>, given an {{Element}} or {{DocumentFragment}} |target|, an {{Element}} |contextElement|, a [=string=] |html|, and a -[=dictionary=] |options|, and a [=boolean=] flag |safe|, run these steps: +[=dictionary=] |options|, and a [=boolean=] |safe|: 1. If |safe| and |contextElement|'s [=Element/local name=] is "`script`" and |contextElement|'s [=Element/namespace=] is the [=HTML namespace=] or the - [=SVG namespace=]: - 1. Return. + [=SVG namespace=], then return. 1. Let |config| be the result of calling [=canonicalize a configuration=] on |options|["`sanitizer`"] and |safe|. 1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm=] given |contextElement|, |html|, and true. 1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. 1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] |node| to |fragment|. -1. If |config| exists: - 1. Run [=sanitize=] on |fragment| using |config|. +1. If |config| is not [=list/empty=], then run [=sanitize=] on |fragment| using |config|. 1. [=Replace all=] with |fragment| within |target|. </div> @@ -378,19 +378,19 @@ A |config| is <dfn for="SanitizerConfig">valid</dfn> if all these conditions are 1. The [=set/intersection=] between |tmp|["{{SanitizerConfig/elements}}"] and |tmp|["{{SanitizerConfig/removeElements}}"] - is empty. + is [=set/empty=]. 1. The [=set/intersection=] between |tmp|["{{SanitizerConfig/removeElements}}"] |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] - is empty. + is [=set/empty=]. 1. The [=set/intersection=] between |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] and |tmp|["{{SanitizerConfig/elements}}"] - is empty. + is [=set/empty=]. 1. The [=set/intersection=] between |tmp|["{{SanitizerConfig/attributes}}"] and |tmp|["{{SanitizerConfig/removeAttributes}}"] - is empty. + is [=set/empty=]. 1. Let |tmpattrs| be |tmp|["{{SanitizerConfig/attributes}}"] if it exists, and otherwise [=built-in default config=]["{{SanitizerConfig/attributes}}"]. @@ -398,7 +398,7 @@ A |config| is <dfn for="SanitizerConfig">valid</dfn> if all these conditions are 1. If either |item|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] or |item|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] exists: - 1. Then the [=set/difference=] between it and |tmpattrs| is empty. + 1. Then the [=set/difference=] between it and |tmpattrs| is [=set/empty=]. </div> @@ -499,8 +499,7 @@ if all these conditions are met: <div algorithm> -In order to <dfn>canonicalize a configuration</dfn> |config| with a boolean -parameter |safe|, run the following steps: +To <dfn>canonicalize a configuration</dfn> |config| with a [=boolean=] |safe|: Note: The initial set of [=assert=]s assert properties of the built-in constants, like the [=built-in default config|defaults=] and @@ -514,8 +513,7 @@ Note: The initial set of [=assert=]s assert properties of the built-in "elements" &rightarrow; [=known elements=], "attributes" &rightarrow; [=known attributes=], ]&raquo; is [=SanitizerConfig/canonical=]. -1. If |config| is empty is not |safe|: - 1. Return. +1. If |config| is [=list/empty=] and not |safe|, then return &laquo;[]&raquo; 1. If |config| is not [=SanitizerConfig/valid=], then [=throw=] a {{TypeError}}. 1. Let |result| be a new [=dictionary=]. 1. For each |key| of &laquo;[ @@ -604,8 +602,7 @@ In order to <dfn>canonicalize a sanitizer name</dfn> |name|, with a default namespace |defaultNamespace|, run the following steps: 1. [=Assert=]: |name| is either a {{DOMString}} or a [=dictionary=]. -1. If |name| is a {{DOMString}}: - 1. Return &laquo;[ "`name`" &rightarrow; |name|, "`namespace`" &rightarrow; |defaultNamespace|]&raquo;. +1. If |name| is a {{DOMString}}, then return &laquo;[ "`name`" &rightarrow; |name|, "`namespace`" &rightarrow; |defaultNamespace|]&raquo;. 1. [=Assert=]: |name| is a [=dictionary=] and |name|["name"] [=map/exists=]. 1. Return &laquo;[ <br> "`name`" &rightarrow; |name|["name"], <br>