From 8e598d7f2395d65e64716bb34f28f6b800740815 Mon Sep 17 00:00:00 2001 From: adon Date: Mon, 17 Aug 2015 13:51:25 +0800 Subject: [PATCH 1/4] refactored a misspelled var name --- src/html-purify.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/html-purify.js b/src/html-purify.js index 21fe11d..7dfb7e1 100755 --- a/src/html-purify.js +++ b/src/html-purify.js @@ -11,7 +11,7 @@ See the accompanying LICENSE file for terms. derivedState = require('./derived-states.js'), xssFilters = require('xss-filters'), CssParser = require('css-js'), - hrefAttribtues = tagAttList.HrefAttributes, + hrefAttributes = tagAttList.HrefAttributes, voidElements = tagAttList.VoidElements; function Purifier(config) { @@ -116,7 +116,7 @@ See the accompanying LICENSE file for terms. attrValString += ' ' + key; if (value !== null) { - attrValString += '="' + (hrefAttribtues[key] ? xssFilters.uriInDoubleQuotedAttr(decodeURI(value)) : value) + '"'; + attrValString += '="' + (hrefAttributes[key] ? xssFilters.uriInDoubleQuotedAttr(decodeURI(value)) : value) + '"'; } } } From be7e501fded29797cfe962caf51471a11b7ad986 Mon Sep 17 00:00:00 2001 From: adon Date: Mon, 17 Aug 2015 18:19:04 +0800 Subject: [PATCH 2/4] fix tag balancing logics - logics simplified - optional tags are ignored during balancing - ignore self-closing solidus, which is required only for foreign elements --- src/html-purify.js | 67 ++++++++++++++++++++++++++------------- src/tag-attr-list.js | 3 ++ tests/test-vectors.js | 22 +++++++++---- tests/unit/html-purify.js | 6 ++-- 4 files changed, 67 insertions(+), 31 deletions(-) diff --git a/src/html-purify.js b/src/html-purify.js index 7dfb7e1..e48f0e8 100755 --- a/src/html-purify.js +++ b/src/html-purify.js @@ -12,7 +12,8 @@ See the accompanying LICENSE file for terms. xssFilters = require('xss-filters'), CssParser = require('css-js'), hrefAttributes = tagAttList.HrefAttributes, - voidElements = tagAttList.VoidElements; + voidElements = tagAttList.VoidElements, + optionalElements = tagAttList.OptionalElements; function Purifier(config) { var that = this; @@ -41,14 +42,14 @@ See the accompanying LICENSE file for terms. that.cssParser = new CssParser({"ver": "strict", "throwError": false}); } - // TODO: introduce polyfill for Array.indexOf - function contains(arr, element) { - for (var i = 0, len = arr.length; i < len; i++) { + // TODO: introduce polyfill for Array.lastIndexOf + function arrayLastIndexOf(arr, element) { + for (var i = arr.length - 1; i >= 0; i--) { if (arr[i] === element) { - return true; + return i; } } - return false; + return -1; } function processTransition(prevState, nextState, i) { @@ -68,30 +69,40 @@ See the accompanying LICENSE file for terms. idx = parser.getCurrentTagIndex(); tagName = parser.getCurrentTag(idx); - if (contains(this.tagsWhitelist, tagName)) { + if (arrayLastIndexOf(this.tagsWhitelist, tagName) !== -1) { if (idx) { - if (this.config.enableTagBalancing) { - // add closing tags for any opened ones before closing the current one - while((openedTag = this.openedTags.pop()) && openedTag !== tagName) { - this.output += ''; - } - // openedTag is undefined if tagName is never found in all openedTags, no output needed - if (openedTag) { - this.output += ''; + if (this.config.enableTagBalancing && !optionalElements[tagName]) { + // relaxed tag balancing, accept it as long as the tag exists in the stack + idx = arrayLastIndexOf(this.openedTags, tagName); + + if (idx >= 0) { + this.output += ''; + this.openedTags.splice(idx, 1); } + + // // add closing tags for any opened ones before closing the current one + // while((openedTag = this.openedTags.pop()) && openedTag !== tagName) { + // this.output += ''; + // } + // // openedTag is undefined if tagName is never found in all openedTags, no output needed + // if (openedTag) { + // this.output += ''; + // } } else { this.output += ''; } } else { - // - void elements only have a start tag; end tags must not be specified for void elements. - this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName]; + // void elements only have a start tag; end tags must not be specified for void elements. + // this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName]; + this.hasSelfClosing = voidElements[tagName]; // push the tagName into the openedTags stack if not found: // - a self-closing tag or a void element - this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName); + // this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName); + this.config.enableTagBalancing && !this.hasSelfClosing && !optionalElements[tagName] && this.openedTags.push(tagName); if (prevState === 35 || prevState === 36 || @@ -101,7 +112,7 @@ See the accompanying LICENSE file for terms. attrValString = ''; for (key in this.attrVals) { - if (contains(this.attributesWhitelist, key)) { + if (arrayLastIndexOf(this.attributesWhitelist, key) !== -1) { value = this.attrVals[key]; if (key === "style") { // TODO: move style to a const @@ -123,12 +134,13 @@ See the accompanying LICENSE file for terms. // handle self-closing tags this.output += '<' + tagName + attrValString + (this.hasSelfClosing ? ' />' : '>'); + // this.output += '<' + tagName + attrValString + '>'; } } // reinitialize once tag has been written to output this.attrVals = {}; - this.hasSelfClosing = 0; + // this.hasSelfClosing = false; break; case derivedState.TransitionName.ATTR_TO_AFTER_ATTR: @@ -148,7 +160,18 @@ See the accompanying LICENSE file for terms. if (prevState === 35) { this.attrVals[parser.getAttributeName()] = null; } - this.hasSelfClosing = 1; + + /* According to https://html.spec.whatwg.org/multipage/syntax.html#start-tags + * "Then, if the element is one of the void elements, or if the element is a foreign element, then there may be a single U+002F SOLIDUS character (/). + * This character has no effect on void elements, but on foreign elements it marks the start tag as self-closing." + */ + // that means only foreign elements can self-close (self-closing is optional for void elements) + // no foreign elements will be allowed, so the following logic can be commented + // openedTag = parser.getStartTagName(); + // if (openedTag === 'svg' || openedTag === 'math') { // ... + // this.hasSelfClosing = true; + // } + break; } } @@ -159,7 +182,7 @@ See the accompanying LICENSE file for terms. that.output = ''; that.openedTags = []; that.attrVals = {}; - that.hasSelfClosing = 0; + // that.hasSelfClosing = false; that.parser.reset(); that.parser.contextualize(data); diff --git a/src/tag-attr-list.js b/src/tag-attr-list.js index 04cacbb..eac58ec 100644 --- a/src/tag-attr-list.js +++ b/src/tag-attr-list.js @@ -261,3 +261,6 @@ exports.HrefAttributes = { // Void elements only have a start tag; end tags must not be specified for void elements. // https://html.spec.whatwg.org/multipage/syntax.html#void-elements exports.VoidElements = {"area":1, "base":1, "br":1, "col":1, "embed":1, "hr":1, "img":1, "input":1, "keygen":1, "link":1, "menuitem":1, "meta":1, "param":1, "source":1, "track":1, "wbr":1}; + +// https://html.spec.whatwg.org/multipage/syntax.html#optional-tags +exports.OptionalElements = {/*"plaintext":1, "html":1, "head":1, "body":1,*/ "li":1, "dt":1, "dd":1, "p":1, "rt":1, "rp":1, "optgroup":1, "option":1, "colgroup":1, "caption":1, "thead":1, "tbody":1, "tfoot":1, "tr":1, "td":1, "th":1}; diff --git a/tests/test-vectors.js b/tests/test-vectors.js index 7db885a..6d98c65 100644 --- a/tests/test-vectors.js +++ b/tests/test-vectors.js @@ -628,12 +628,12 @@ var generalVectors = [ { id: 10, input: "
normal tag made standalone with bogus trunc attr", - output: "
normal tag made standalone with bogus trunc attr" + output: "
normal tag made standalone with bogus trunc attr" }, { id: 11, input: "
normal tag made standalone with trunc attr bad val", - output: "
normal tag made standalone with trunc attr bad val" + output: "
normal tag made standalone with trunc attr bad val" }, { id: 12, @@ -648,7 +648,7 @@ var generalVectors = [ { id: 14, input: "
normal tag made standalone with value", - output: "
normal tag made standalone with value" + output: "
normal tag made standalone with value" }, //style attribute tests { @@ -779,7 +779,7 @@ var generalVectors = [ { id: 40, input: "