From 52e0a9959c5a18b76533c812de6304cf64a9a248 Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim Date: Tue, 13 Aug 2024 18:42:11 +0200 Subject: [PATCH] Draft new config extension. --- explainer.md | 38 ++++ index.bs | 584 +++++++++++++++++++++------------------------------ 2 files changed, 273 insertions(+), 349 deletions(-) diff --git a/explainer.md b/explainer.md index c306521..8ead847 100644 --- a/explainer.md +++ b/explainer.md @@ -363,6 +363,44 @@ element.setHTML("XXXXXX", {sanitizer: config_comments}); //
XXXXXX
``` +### Modifying Existing Configurations + +The `Sanitizer` object offers multiple methods to easily modify or tailor +an existing configuration. The query methods (`get()` and `getUnsafe()`) can +be used to retrieve a dictionary representation of a Sanitizer, +for introspection, or for use with the Sanitizer constructor to create a new +Sanitizer. Additionally, there are methods that directly manipulate the filter +functionality of the Sanitizer. + +The following methods are offered on the Sanitizer object: + +- `allow(x, options)` + - `options` is an optional dictionary argument. + Supported keys are: `"attributes":` and `"removeAttributes":.` +- `removeElement(x)` +- `replaceWithChildren(x)` +- `allowAttribute(x)` +- `removeAttribute(x)` + +These correspond 1:1 to the keys in the configuration dictionary. + +Adding an element or attribute to any of the allow- or deny-lists will also +remove that element or attribute from the other lists for its type. E.g., +calling `allow(x)` will also remove `x` from the removeElements and +replaceWithChildrenElements lists. + +Any name can be given as either a string, or a dictionary with name or +namespace, just as with the configuration dictionary. + +```js +const s = new Sanitizer({ elements: ["div", "p", "b"] }); +s.element("span"); +s.removeElement("b"); +s.get(); // { elements: ["div", "p", "span"], removeElements: ["b"] } + // Really, all these entries will be dictionaries with name and + // namespace entries. +``` + ### Configuration Errors The configuration allows expressing redundant or even contradictory options. diff --git a/index.bs b/index.bs index ae81ef6..08e53ca 100644 --- a/index.bs +++ b/index.bs @@ -42,6 +42,18 @@ text: parse HTML from a string; type: dfn; url: https://html.spec.whatwg.org/#pa } } + + # Introduction # {#intro} @@ -181,10 +193,9 @@ The parseHTMLUnsafe(|html|, |options|) method s Note: Since |document| does not have a browsing context, scripting is disabled. 1. Set |document|'s [=allow declarative shadow roots=] to true. 1. [=Parse HTML from a string=] given |document| and |compliantHTML|. -1. Let |config| be the result of calling [=get a sanitizer config from options=] - with |options| and false. -1. If |config| is not [=list/empty=], - then call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. +1. Let |sanitizer| be the result of calling [=get a sanitizer instance from options=] + with |options|. +1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |sanitizer|. 1. Return |document|. @@ -198,9 +209,9 @@ The parseHTML(|html|, |options|) method steps a Note: Since |document| does not have a browsing context, scripting is disabled. 1. Set |document|'s [=allow declarative shadow roots=] to true. 1. [=Parse HTML from a string=] given |document| and |html|. -1. Let |config| be the result of calling [=get a sanitizer config from options=] - with |options| and true. -1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |config|. +1. Let |sanitizer| be the result of calling [=get a sanitizer instance from options=] + with |options|. +1. Call [=sanitize=] on |document|'s [=tree/root|root node=] with |sanitizer|. 1. Return |document|. @@ -223,7 +234,7 @@ lifetime, and can then be used whenever needed. This allows implementations to pre-process configurations. The configuration object is also query-able and can return -[=SanitizerConfig/canonical=] configuration dictionaries, +configuration dictionaries, in both safe and unsafe variants. This allows a page to query and predict what effect a given configuration will have, or to build a new configuration based on an existing one. @@ -232,8 +243,20 @@ to build a new configuration based on an existing one. [Exposed=(Window,Worker)] interface Sanitizer { constructor(optional SanitizerConfig config = {}); + + // Query configurations: SanitizerConfig get(); SanitizerConfig getUnsafe(); + + // Modifying a Sanitizer: + undefined element(SanitizerElementNamespaceWithAttributes element); + undefined removeElement(SanitizerElement element); + undefined replaceWithChildren(SanitizerElement element); + undefined allowAttribute(SanitizerAttribute attribute); + undefined removeAttribute(SanitizerAttribute attribute); + undefined setComment(boolean allow); + undefined setDataAttributes(boolean allow); + undefined setOtherMarkup(boolean allow); }; @@ -241,23 +264,102 @@ interface Sanitizer { The constructor(|config|) method steps are: -1. Store |config| in [=this=]'s [=internal slot=]. +1. [=Set a config|Set=] |config| on [=this=]. + +Issue: This abandons all error handling, because setting a config will + just overwrite contradictory entries. Do we want this?
The get() method steps are: -1. Return the result of [=canonicalize a configuration=] with the value of - [=this=]'s [=internal slot=] and true. +1. Return the result of calling [=safeify=] on the result of + [=Sanitizer/getUnsafe=].
The getUnsafe() method steps are: -1. Return the result of [=canonicalize a configuration=] with the value of - [=this=]'s [=internal slot=] and false. +1. Return the value of [=this=]'s [=internal slot=]. + +
+ +
+The element(|element|) method steps are: + +1. Let |name| be the result of [=canonicalize a sanitizer name=] |element| with [=HTML namespace=] as the default namespace. +1. [=list/Append=] |name| to [=this=]'s [=internal slot=]'s {{SanitizerConfig/elements}} list. +1. [=list/Remove=] |name| from [=this=]'s [=internal slot=]'s {{SanitizerConfig/removeElements}}. +1. [=list/Remove=] |name| from [=this=]'s [=internal slot=]'s + {{SanitizerConfig/replaceWithChildrenElements}}. + +ISSUE: This does not handle per-element attribute allow/remove lists. +
+ +
+The removeElement(|element|) method steps are: + +1. Let |name| be the result of [=canonicalize a sanitizer name=] |element| with [=HTML namespace=] as the default namespace. +1. [=list/Append=] |name| from [=this=]'s [=internal slot=]'s {{SanitizerConfig/removeElements}}. +1. [=list/Remove=] |name| to [=this=]'s [=internal slot=]'s {{SanitizerConfig/elements}} list. +1. [=list/Remove=] |name| from [=this=]'s [=internal slot=]'s + {{SanitizerConfig/replaceWithChildrenElements}}. + +
+ + +
+The replaceWithChildren(|element|) method steps are: + +1. Let |name| be the result of [=canonicalize a sanitizer name=] |element| with [=HTML namespace=] as the default namespace. +1. [=list/Append=] |name| from [=this=]'s [=internal slot=]'s + {{SanitizerConfig/replaceWithChildrenElements}}. +1. [=list/Remove=] |name| from [=this=]'s [=internal slot=]'s {{SanitizerConfig/removeElements}}. +1. [=list/Remove=] |name| to [=this=]'s [=internal slot=]'s {{SanitizerConfig/elements}} list. + +
+ +
+The allowAttribute(|attribute|) method steps are: + +1. Let |name| be the result of [=canonicalize a sanitizer name=] |attribute| with the `null` as the default namespace. +1. [=list/Append=] |name| from [=this=]'s [=internal slot=]'s + {{SanitizerConfig/attributes}}. +1. [=list/Remove=] |name| from [=this=]'s [=internal slot=]'s {{SanitizerConfig/removeAttributes}}. + +
+ + +
+The removeAttribute(|attribute|) method steps are: + +1. Let |name| be the result of [=canonicalize a sanitizer name=] |attribute| with the `null` as the default namespace. +1. [=list/Append=] |name| from [=this=]'s [=internal slot=]'s {{SanitizerConfig/removeAttributes}}. +1. [=list/Remove=] |name| from [=this=]'s [=internal slot=]'s + {{SanitizerConfig/attributes}}. + +
+ +
+The setComment(|allow|) method steps are: + +1. Set [=this=]'s [=internal slot=]'s {{SanitizerConfig/comments}} to |allow|. + +
+ +
+The setDataAttributes(|allow|) method steps are: + +1. Set [=this=]'s [=internal slot=]'s {{SanitizerConfig/dataAttributes}} to |allow|. + +
+ +
+The setOtherMarkup(|allow|) method steps are: + +1. Set [=this=]'s [=internal slot=]'s {{SanitizerConfig/otherMarkup}} to |allow|.
@@ -294,6 +396,7 @@ dictionary SanitizerConfig { boolean comments; boolean dataAttributes; + boolean otherMarkup; }; @@ -308,40 +411,53 @@ To set and filter HTML, given an {{Element}} or {{DocumentFragment}} 1. If |safe| and |contextElement|'s [=Element/local name=] is "`script`" and |contextElement|'s [=Element/namespace=] is the [=HTML namespace=] or the [=SVG namespace=], then return. -1. Let |config| be the result of calling [=get a sanitizer config from options=] - with |options| and |safe|. +1. Let |sanitizer| be the result of calling [=get a sanitizer instance from options=] + with |options|. 1. Let |newChildren| be the result of the HTML [=fragment parsing algorithm steps=] given |contextElement|, |html|, and true. 1. Let |fragment| be a new {{DocumentFragment}} whose [=node document=] is |contextElement|'s [=node document=]. 1. [=list/iterate|For each=] |node| in |newChildren|, [=list/append=] |node| to |fragment|. -1. If |config| is not [=list/empty=], then run [=sanitize=] on |fragment| using |config|. +1. Run [=sanitize=] on |fragment| using |sanitizer| and |safe|. 1. [=Replace all=] with |fragment| within |target|.
-To get a sanitizer config from options for -an options dictionary |options| and a boolean |safe|, do: +To get a sanitizer instance from options for +an options dictionary |options|, do: 1. Assert: |options| is a [=dictionary=]. -1. If |options|["`sanitizer`"] doesn't [=map/exist=], then return undefined. +1. If |options|["`sanitizer`"] doesn't [=map/exist=], + then return new {{Sanitizer}}(). 1. Assert: |options|["`sanitizer`"] is either a {{Sanitizer}} instance or a [=dictionary=]. 1. If |options|["`sanitizer`"] is a {{Sanitizer}} instance: - 1. Then let |config| be the value of |options|["`sanitizer`"]'s [=internal slot=]. - 1. Otherwise let |config| be the value of |options|["`sanitizer`"]. -1. Return the result of calling [=canonicalize a configuration=] on - |config| and |safe|. + Then return |options|["`sanitizer`"]. +1. Assert: |options|["`sanitizer`"] is a [=dictionary=]. +1. Return new {{Sanitizer}}(|options|["`sanitizer`"]).
## Sanitization Algorithms ## {#sanitization} -
+
For the main sanitize operation, using a {{ParentNode}} |node|, a -[=SanitizerConfig/canonical=] {{SanitizerConfig}} |config|, run these steps: +{{Sanitizer}} |sanitizer| and a [=boolean=] |safe|, run these steps: + +1. Let |config| be the value of |sanitizer|'s [=internal slot=]. +1. If |safe|, let |config| be the result of calling [=safeify=] on |config|. +1. Call [=sanitize core=] on |node|, |config|, and |safe| (as value for + handling javascript navigation urls). + +
+ +
+The sanitize core operation, +using a {{ParentNode}} |node|, a {{SanitizerConfig}} |config|, and a +[=boolean=] |handle javascript navigation urls|, iterates over the DOM tree +beginning with |node|, and may recurse to handle some special cases (e.g. +template contents). It consistes of these steps: -1. [=Assert=]: |config| is [=SanitizerConfig/canonical=]. 1. Let |current| be |node|. 1. [=list/iterate|For each=] |child| in |current|'s [=tree/children=]: 1. [=Assert=]: |child| [=implements=] {{Text}}, {{Comment}}, or {{Element}}. @@ -358,324 +474,87 @@ For the main sanitize operation, using a {{ParentNode}} |node|, a 1. else: 1. Let |elementName| be a {{SanitizerElementNamespace}} with |child|'s [=Element/local name=] and [=Element/namespace=]. - 1. If |config|["{{SanitizerConfig/elements}}"] exists and - |config|["{{SanitizerConfig/elements}}"] does not [=SanitizerConfig/contain=] - [|elementName|]: + 1. If |config|["{{SanitizerConfig/removeElements}}"] [=SanitizerConfig/contains=] |elementName|, or if |config|["{{SanitizerConfig/elements}}"] does not [=SanitizerConfig/contain=] |elementName| and |config|["{{SanitizerConfig/otherMarkup}}"] is false: 1. [=/remove=] |child|. - 1. else if |config|["{{SanitizerConfig/removeElements}}"] exists and - |config|["{{SanitizerConfig/removeElements}}"] [=SanitizerConfig/contains=] - [|elementName|]: - 1. [=/remove=] |child|. - 1. If |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] exists and |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] [=SanitizerConfig/contains=] |elementName|: - 1. Call [=sanitize=] on |child| with |config|. + 1. If |config|["{{SanitizerConfig/replaceWithChildrenElements}}"] [=SanitizerConfig/contains=] |elementName|: + 1. Call [=sanitize core=] on |child| with |config| and + |handle javascript navigation urls|. 1. Call [=replace all=] with |child|'s [=tree/children=] within |child|. 1. If |elementName| [=equals=] «[ "`name`" → "`template`", "`namespace`" → [=HTML namespace=] ]» - 1. Then call [=sanitize=] on |child|'s [=template contents=] with |config|. + 1. Then call [=sanitize core=] on |child|'s [=template contents=] with + |config| and |handle javascript navigation urls|. 1. If |child| is a [=shadow host=]: - 1. Then call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. - 1. [=list/iterate|For each=] |attr| in |current|'s [=Element/attribute list=]: + 1. Then call [=sanitize core=] on |child|'s [=Element/shadow root=] with + |config| and |handle javascript navigation urls|. + 1. [=list/iterate|For each=] |attr| in |child|'s [=Element/attribute list=]: 1. Let |attrName| be a {{SanitizerAttributeNamespace}} with |attr|'s [=Attr/local name=] and [=Attr/namespace=]. - 1. If |config|["{{SanitizerConfig/attributes}}"] exists and - |config|["{{SanitizerConfig/attributes}}"] does not [=SanitizerConfig/contain=] - |attrName|: - 1. If "data-" is a [=code unit prefix=] of [=Attr/local name=] and - if [=Attr/namespace=] is `null` and - if |config|["{{SanitizerConfig/dataAttributes}}"] exists and is false: - 1. Remove |attr| from |child|. - 1. else if |config|["{{SanitizerConfig/removeAttributes}}"] exists and - |config|["{{SanitizerConfig/removeAttributes}}"] [=SanitizerConfig/contains=] - |attrName|: - 1. Remove |attr| from |child|. - 1. If |config|["{{SanitizerConfig/elements}}"][|elementName|] exists, - and if - |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/attributes}}"] - exists, and if - |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/attributes}}"] - does not [=SanitizerConfig/contain=] |attrName|: + 1. If |config|["{{SanitizerConfig/removeAttributes}}"] + [=SanitizerConfig/contains=] |attrName|: 1. Remove |attr| from |child|. - 1. If |config|["{{SanitizerConfig/elements}}"][|elementName|] exists, - and if - |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] - exists, and if - |config|["{{SanitizerConfig/elements}}"][|elementName|]["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] - [=SanitizerConfig/contains=] |attrName|: + 1. If |config|["{{SanitizerConfig/elements}}"]["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] + [=SanitizerConfig/contains=] |attrName|: 1. Remove |attr| from |child|. - 1. If «[|elementName|, |attrName|]» matches an entry in the + + 1. If all of the following are false, then remove |attr| from |child|. + - |config|["{{SanitizerConfig/attributes}}"] + [=SanitizerConfig/contains=] |attrName| + - |config|["{{SanitizerConfig/elements}}"]["{{SanitizerElementNamespaceWithAttributes/attributes}}"] + [=SanitizerConfig/contains=] |attrName| + - "data-" is a [=code unit prefix=] of [=Attr/local name=] and + [=Attr/namespace=] is `null` and + |config|["{{SanitizerConfig/dataAttributes}}"] is true + - |config|["{{SanitizerConfig/otherMarkup}}"] + 1. If |handle javascript navigation urls|and «[|elementName|, |attrName|]» matches an entry in the [=navigating URL attributes list=], and if |attr|'s [=protocol=] is "`javascript:`": 1. Then remove |attr| from |child|. - 1. Call [=sanitize=] on |child|'s [=Element/shadow root=] with |config|. - 1. else: - 1. [=/remove=] |child|.
## Configuration Processing ## {#configuration-processing}
-A |config| is valid if all these conditions are met: - -1. |config| is a [=dictionary=] -1. |config|'s [=map/keys|key set=] does not [=list/contain=] both - "{{SanitizerConfig/elements}}" and "{{SanitizerConfig/removeElements}}" -1. |config|'s [=map/keys|key set=] does not [=list/contain=] both - "{{SanitizerConfig/removeAttributes}}" and "{{SanitizerConfig/attributes}}". -1. [=list/iterate|For any=] |key| of «[ - "{{SanitizerConfig/elements}}", - "{{SanitizerConfig/removeElements}}", - "{{SanitizerConfig/replaceWithChildrenElements}}", - "{{SanitizerConfig/attributes}}", - "{{SanitizerConfig/removeAttributes}}" - ]» where |config|[|key|] [=map/exists=]: - 1. |config|[|key|] is [=SanitizerNameList/valid=]. -1. If |config|["{{SanitizerConfig/elements}}"] exists, then - [=list/iterate|for any=] |element| in |config|[|key|] that is a [=dictionary=]: - 1. |element| does not [=list/contain=] both - "{{SanitizerElementNamespaceWithAttributes/attributes}}" and - "{{SanitizerElementNamespaceWithAttributes/removeAttributes}}". - 1. If either |element|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] - or |element|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] - [=map/exists=], then it is [=SanitizerNameList/valid=]. - 1. Let |tmp| be a [=dictionary=], and for any |key| «[ - "{{SanitizerConfig/elements}}", - "{{SanitizerConfig/removeElements}}", - "{{SanitizerConfig/replaceWithChildrenElements}}", - "{{SanitizerConfig/attributes}}", - "{{SanitizerConfig/removeAttributes}}" - ]» |tmp|[|key|] is set to the result of [=canonicalize a sanitizer - element list=] called on |config|[|key|], and [=HTML namespace=] as default - namespace for the element lists, and `null` as default namespace for the - attributes lists. - - Note: The intent here is to assert about list elements, but without regard - to whether the string shortcut syntax or the explicit dictionary - syntax is used. For example, having "img" in `elements` and - `{ name: "img" }` in `removeElements`. An implementation might well - do this without explicitly canonicalizing the lists at this point. - - 1. Given theses canonicalized name lists, all of the following conditions hold: - - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/elements}}"] and - |tmp|["{{SanitizerConfig/removeElements}}"] - is [=set/empty=]. - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/removeElements}}"] - |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] - is [=set/empty=]. - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/replaceWithChildrenElements}}"] and - |tmp|["{{SanitizerConfig/elements}}"] - is [=set/empty=]. - 1. The [=set/intersection=] between - |tmp|["{{SanitizerConfig/attributes}}"] and - |tmp|["{{SanitizerConfig/removeAttributes}}"] - is [=set/empty=]. - - 1. Let |tmpattrs| be |tmp|["{{SanitizerConfig/attributes}}"] if it exists, - and otherwise [=built-in default config=]["{{SanitizerConfig/attributes}}"]. - 1. [=list/iterate|For any=] |item| in |tmp|["{{SanitizerConfig/elements}}"]: - 1. If either |item|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] - or |item|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] - exists: - 1. Then the [=set/difference=] between it and |tmpattrs| is [=set/empty=]. - -
- -
-A |list| of names is valid if all these -conditions are met: - -1. |list| is a [=/list=]. -1. [=list/iterate|For all=] of its members |name|: - 1. |name| is a {{string}} or a [=dictionary=]. - 1. If |name| is a [=dictionary=]: - 1. |name|["{{SanitizerElementNamespace/name}}"] [=map/exists=] and is a {{string}}. - -
- -
-A |config| is canonical if all these conditions are met: - -1. |config| is [=SanitizerConfig/valid=]. -1. |config|'s [=map/keys|key set=] is a [=set/subset=] of - «[ - "{{SanitizerConfig/elements}}", - "{{SanitizerConfig/removeElements}}", - "{{SanitizerConfig/replaceWithChildrenElements}}", - "{{SanitizerConfig/attributes}}", - "{{SanitizerConfig/removeAttributes}}", - "{{SanitizerConfig/comments}}", - "{{SanitizerConfig/dataAttributes}}" - ]» -1. |config|'s [=map/keys|key set=] [=list/contains=] either: - 1. both "{{SanitizerConfig/elements}}" and "{{SanitizerConfig/attributes}}", - but neither of - "{{SanitizerConfig/removeElements}}" or "{{SanitizerConfig/removeAttributes}}". - 1. or both - "{{SanitizerConfig/removeElements}}" and "{{SanitizerConfig/removeAttributes}}", - but neither of - "{{SanitizerConfig/elements}}" or "{{SanitizerConfig/attributes}}". -1. For any |key| of «[ - "{{SanitizerConfig/replaceWithChildrenElements}}", - "{{SanitizerConfig/removeElements}}", - "{{SanitizerConfig/attributes}}", - "{{SanitizerConfig/removeAttributes}}" - ]» where |config|[|key|] [=map/exists=]: - 1. |config|[|key|] is [=SanitizerNameList/canonical=]. -1. If |config|["{{SanitizerConfig/elements}}"] [=map/exists=]: - 1. |config|["{{SanitizerConfig/elements}}"] is [=SanitizerNameWithAttributesList/canonical=]. -1. For any |key| of «[ - "{{SanitizerConfig/comments}}", - "{{SanitizerConfig/dataAttributes}}" - ]»: - 1. if |config|[|key|] [=map/exists=], |config|[|key|] is a {{boolean}}. - -
- -
-A |list| of names is canonical if all these -conditions are met: - -1. |list|[|key|] is a [=/list=]. -1. [=list/iterate|For all=] of its |list|[|key|]'s members |name|: - 1. |name| is a [=dictionary=]. - 1. |name|'s [=map/keys|key set=] [=set/equals=] «[ - "{{SanitizerElementNamespace/name}}", "{{SanitizerElementNamespace/namespace}}" - ]» - 1. |name|'s [=map/values=] are [=string=]s. - -
- -
-A |list| of names is canonical -if all these conditions are met: - -1. |list|[|key|] is a [=/list=]. -1. [=list/iterate|For all=] of its |list|[|key|]'s members |name|: - 1. |name| is a [=dictionary=]. - 1. |name|'s [=map/keys|key set=] [=set/equals=] one of: - 1. «[ - "{{SanitizerElementNamespace/name}}", - "{{SanitizerElementNamespace/namespace}}" - ]» - 1. «[ - "{{SanitizerElementNamespace/name}}", - "{{SanitizerElementNamespace/namespace}}", - "{{SanitizerElementNamespaceWithAttributes/attributes}}" - ]» - 1. «[ - "{{SanitizerElementNamespace/name}}", - "{{SanitizerElementNamespace/namespace}}", - "{{SanitizerElementNamespaceWithAttributes/removeAttributes}}" - ]» - 1. |name|["{{SanitizerElementNamespace/name}}"] and - |name|["{{SanitizerElementNamespace/namespace}}"] are [=string=]s. - 1. |name|["{{SanitizerElementNamespaceWithAttributes/attributes}}"] and - |name|["{{SanitizerElementNamespaceWithAttributes/removeAttributes}}"] - are [=SanitizerNameList/canonical=] if they [=map/exist=]. - -
- - -
-To canonicalize a configuration |config| with a [=boolean=] |safe|: - -Note: The initial set of [=assert=]s assert properties of the built-in - constants, like the [=built-in default config|defaults=] and - the lists of known [=known elements|elements=] and - [=known attributes|attributes=]. - -1. [=Assert=]: [=built-in default config=] is [=SanitizerConfig/canonical=]. -1. [=Assert=]: [=built-in default config=]["elements"] is a [=subset=] of [=known elements=]. -1. [=Assert=]: [=built-in default config=]["attributes"] is a [=subset=] of [=known attributes=]. -1. [=Assert=]: «[ - "elements" → [=known elements=], - "attributes" → [=known attributes=], - ]» is [=SanitizerConfig/canonical=]. -1. If |config| is [=list/empty=] and not |safe|, then return «[]» -1. If |config| is not [=SanitizerConfig/valid=], then [=throw=] a {{TypeError}}. -1. Let |result| be a new [=dictionary=]. -1. For each |key| of «[ - "{{SanitizerConfig/elements}}", - "{{SanitizerConfig/removeElements}}", - "{{SanitizerConfig/replaceWithChildrenElements}}" ]»: - 1. If |config|[|key|] exists, set |result|[|key|] to the result of running - [=canonicalize a sanitizer element list=] on |config|[|key|] with - [=HTML namespace=] as the default namespace. -1. For each |key| of «[ - "{{SanitizerConfig/attributes}}", - "{{SanitizerConfig/removeAttributes}}" ]»: - 1. If |config|[|key|] exists, set |result|[|key|] to the result of running - [=canonicalize a sanitizer element list=] on |config|[|key|] with `null` as - the default namespace. -1. Set |result|["{{SanitizerConfig/comments}}"] to - |config|["{{SanitizerConfig/comments}}"]. -1. Let |default| be the result of [=canonicalizing a configuration=] for the - [=built-in default config=]. -1. If |safe|: - 1. If |config|["{{SanitizerConfig/elements}}"] [=map/exists=]: - 1. Let |elementBlockList| be the [=set/difference=] between - [=known elements=] |default|["{{SanitizerConfig/elements}}"]. - - Note: The "natural" way to enforce the default element list would be - to intersect with it. But that would also eliminate any unknown - (i.e., non-HTML supplied element, like <foo>). So we - construct this helper to be able to use it to subtract any "unsafe" - elements. - 1. Set |result|["{{SanitizerConfig/elements}}"] to the - [=set/difference=] of |result|["{{SanitizerConfig/elements}}"] and - |elementBlockList|. - 1. If |config|["{{SanitizerConfig/removeElements}}"] [=map/exists=]: - 1. Set |result|["{{SanitizerConfig/elements}}"] to the - [=set/difference=] of |default|["{{SanitizerConfig/elements}}"] - and |result|["{{SanitizerConfig/removeElements}}"]. - 1. [=set/Remove=] "{{SanitizerConfig/removeElements}}" from |result|. - 1. If neither |config|["{{SanitizerConfig/elements}}"] nor - |config|["{{SanitizerConfig/removeElements}}"] [=map/exist=]: - 1. Set |result|["{{SanitizerConfig/elements}}"] to - |default|["{{SanitizerConfig/elements}}"]. - 1. If |config|["{{SanitizerConfig/attributes}}"] [=map/exists=]: - 1. Let |attributeBlockList| be the [=set/difference=] between - [=known attributes=] and |default|["{{SanitizerConfig/attributes}}"]; - 1. Set |result|["{{SanitizerConfig/attributes}}"] to the - [=set/difference=] of |result|["{{SanitizerConfig/attributes}}"] and - |attributeBlockList|. - 1. If |config|["{{SanitizerConfig/removeAttributes}}"] [=map/exists=]: - 1. Set |result|["{{SanitizerConfig/attributes}}"] to the - [=set/difference=] of |default|["{{SanitizerConfig/attributes}}"] - and |result|["{{SanitizerConfig/removeAttributes}}"]. - 1. [=set/Remove=] "{{SanitizerConfig/removeAttributes}}" from |result|. - 1. If neither |config|["{{SanitizerConfig/attributes}}"] nor - |config|["{{SanitizerConfig/removeAttributes}}"] [=map/exist=]: - 1. Set |result|["{{SanitizerConfig/attributes}}"] to - |default|["{{SanitizerConfig/attributes}}"]. -1. Else (if not |safe|): - 1. If neither |config|["{{SanitizerConfig/elements}}"] nor - |config|["{{SanitizerConfig/removeElements}}"] [=map/exist=]: - 1. Set |result|["{{SanitizerConfig/elements}}"] to - |default|["{{SanitizerConfig/elements}}"]. - 1. If neither |config|["{{SanitizerConfig/attributes}}"] nor - |config|["{{SanitizerConfig/removeAttributes}}"] [=map/exist=]: - 1. Set |result|["{{SanitizerConfig/attributes}}"] to - |default|["{{SanitizerConfig/attributes}}"]. -1. [=Assert=]: |result| is [=SanitizerConfig/valid=]. -1. [=Assert=]: |result| is [=SanitizerConfig/canonical=]. +To safeify a |config|, do this: + +1. [=Assert=]: The [=built-in safe baseline config=] has + {{SanitizerConfig/removeElements}} and {{SanitizerConfig/removeAttributes}} + keys set, but not {{SanitizerConfig/elements}}, + {{SanitizerConfig/replaceWithChildrenElements}}, or + {{SanitizerConfig/attributes}}. +1. Let |result| be a copy of |config|. +1. [=list/For each=] |elem| in + [=built-in safe baseline config=][{{SanitizerConfig/removeElements}}]: + 1. Call |result|.removeElement(|elem|) +1. [=list/For each=] |attr| in + [=built-in safe baseline config=][{{SanitizerConfig/removeAttributes}}]: + 1. Call |result|.removeAttributes(|attr|) 1. Return |result|.
-In order to canonicalize a sanitizer element list |list|, with a -default namespace |defaultNamespace|, run the following steps: - -1. Let |result| be a new [=ordered set=]. -2. [=list/iterate|For each=] |name| in |list|, call - [=canonicalize a sanitizer name=] on |name| with |defaultNamespace| and - [=set/append=] to |result|. -3. Return |result|. +To set a config |config| on a {{Sanitizer}} |sanitizer|, do this: + +1. [=Assert=]: |config| is a [=dictionary=]. +1. [=list/iterate|For each=] |item| of |config|[{{SanitizerConfig/elements}}] do: + 1. Call |sanitizer|.element(|item|). +1. [=list/iterate|For each=] |item| of |config|[{{SanitizerConfig/removeElements}}] do: + 1. Call |sanitizer|.removeElement(|item|). +1. [=list/iterate|For each=] |item| of |config|[{{SanitizerConfig/replaceWithChildrenElements}}] do: + 1. Call |sanitizer|.replaceWithChildren(|item|). +1. [=list/iterate|For each=] |item| of |config|[{{SanitizerConfig/attributes}}] do: + 1. Call |sanitizer|.attribute(|item|). +1. [=list/iterate|For each=] |item| of |config|[{{SanitizerConfig/removeAttributes}}] do: + 1. Call |sanitizer|.removeAttributes(|item|). +1. Call |sanitizer|.comments(|config|[{{SanitizerConfig/comments}}]). +1. Call |sanitizer|.dataAttributes(|config|[{{SanitizerConfig/dataAttributes}}]). +1. Call |sanitizer|.otherMarkup(|config|[{{SanitizerConfig/otherMarkup}}]). + +Note: Previous versions of this spec had elaborate definitions of how to + canonicalize a config. This has now effectively been moved into the method + definitions.
@@ -688,7 +567,7 @@ namespace |defaultNamespace|, run the following steps: 1. [=Assert=]: |name| is a [=dictionary=] and |name|["name"] [=map/exists=]. 1. Return «[
"`name`" → |name|["name"],
- "`namespace`" → |name|["namespace"] if it [=map/exists=], otherwise |defaultNamespace|
+ "`namespace`" → ( |name|["namespace"] if it [=map/exists=], otherwise |defaultNamespace| )
]».
@@ -730,38 +609,45 @@ regard to order: ## Defaults ## {#sanitization-defaults} -Note: The defaults should follow a certain form, which is checked for at the - beginning of [=canonicalize a configuration=]. +There are four builtins: + +* The [=built-in safe default config=], +* the [=built-in unsafe default config=], +* the [=built-in safe baseline config=], and +* the [=navigating URL attributes list=]. -The built-in default config is as follows: +The built-in safe default config is the same as the [=built-in safe baseline config=]. + +ISSUE: Determine if this actually holds. + + +The built-in unsafe default config is meant to allow anything. +It is as follows: ``` { - elements: [....], - attributes: [....], + allow: [], + removeElements: [], + attributes: [], + removeAttributes: [], comments: true, + otherMarkup: true, } ``` -The known elements are as follows: -``` -[ - { name: "div", namespace: "http://www.w3.org/1999/xhtml" }, - ... -] -``` - -The known attributes are as follows: +The built-in safe baseline config is meant to block only +script-content, and nothing else. It is as follows: ``` -[ - { name: "class", namespace: null }, - ... -] +{ + removeElements: [ + { name: "script", namespace: "http://www.w3.org/1999/xhtml" }, + { name: "script", namespace: "http://www.w3.org/2000/svg" } + ], + removeAttributes: [....], + comments: true, + otherMarkup: true +} ``` -Note: The [=known elements=] and [=known attributes=] should be derived from the - HTML5 specification, rather than being explicitly listed here. Currently, - there are no mechanics to do so. -
The navigating URL attributes list, for which "`javascript:`" navigations are unsafe, are as follows: @@ -769,27 +655,27 @@ navigations are unsafe, are as follows: «[
[ - { "`name`" → "`a`", "`namespace`" → "[=HTML namespace=]" }, + { "`name`" → "`a`", "`namespace`" → [=HTML namespace=] }, { "`name`" → "`href`", "`namespace`" → `null` } ],
[ - { "`name`" → "`area`", "`namespace`" → "[=HTML namespace=]" }, + { "`name`" → "`area`", "`namespace`" → [=HTML namespace=] }, { "`name`" → "`href`", "`namespace`" → `null` } ],
[ - { "`name`" → "`form`", "`namespace`" → "[=HTML namespace=]" }, + { "`name`" → "`form`", "`namespace`" → [=HTML namespace=] }, { "`name`" → "`action`", "`namespace`" → `null` } ],
[ - { "`name`" → "`input`", "`namespace`" → "[=HTML namespace=]" }, + { "`name`" → "`input`", "`namespace`" → [=HTML namespace=] }, { "`name`" → "`formaction`", "`namespace`" → `null` } ],
[ - { "`name`" → "`button`", "`namespace`" → "[=HTML namespace=]" }, + { "`name`" → "`button`", "`namespace`" → [=HTML namespace=] }, { "`name`" → "`formaction`", "`namespace`" → `null` } ],