Skip to content
This repository has been archived by the owner on Jul 15, 2019. It is now read-only.

Commit

Permalink
simplified tag balancing logic
Browse files Browse the repository at this point in the history
- ignored balancing optional elements
  • Loading branch information
adon committed Aug 17, 2015
1 parent a3a15a5 commit da0c27c
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 30 deletions.
67 changes: 45 additions & 22 deletions src/html-purify.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ See the accompanying LICENSE file for terms.
xssFilters = require('xss-filters'),
CssParser = require('css-js'),
hrefAttributes = tagAttList.HrefAttributes,
voidElements = tagAttList.VoidElements;
voidElements = tagAttList.VoidElements,
optionalElements = tagAttList.OptionalElements;

function Purifier(config) {
var that = this;
Expand Down Expand Up @@ -43,14 +44,14 @@ See the accompanying LICENSE file for terms.
that.cssParser = new CssParser({"ver": "strict", "throwError": false});
}

// TODO: introduce polyfill for Array.indexOf
function contains(arr, element) {
for (var i = 0, len = arr.length; i < len; i++) {
// TODO: introduce polyfill for Array.lastIndexOf
function arrayLastIndexOf(arr, element) {
for (var i = arr.length - 1; i >= 0; i--) {
if (arr[i] === element) {
return true;
return i;
}
}
return false;
return -1;
}

function processTransition(prevState, nextState, i) {
Expand All @@ -70,30 +71,40 @@ See the accompanying LICENSE file for terms.
idx = parser.getCurrentTagIndex();
tagName = parser.getCurrentTag(idx);

if (contains(this.tagsWhitelist, tagName)) {
if (arrayLastIndexOf(this.tagsWhitelist, tagName) !== -1) {

if (idx) {
if (this.config.enableTagBalancing) {
// add closing tags for any opened ones before closing the current one
while((openedTag = this.openedTags.pop()) && openedTag !== tagName) {
this.output += '</' + openedTag + '>';
}
// openedTag is undefined if tagName is never found in all openedTags, no output needed
if (openedTag) {
this.output += '</' + openedTag + '>';
if (this.config.enableTagBalancing && !optionalElements[tagName]) {
// relaxed tag balancing, accept it as long as the tag exists in the stack
idx = arrayLastIndexOf(this.openedTags, tagName);

if (idx >= 0) {
this.output += '</' + tagName + '>';
this.openedTags.splice(idx, 1);
}

// // add closing tags for any opened ones before closing the current one
// while((openedTag = this.openedTags.pop()) && openedTag !== tagName) {
// this.output += '</' + openedTag + '>';
// }
// // openedTag is undefined if tagName is never found in all openedTags, no output needed
// if (openedTag) {
// this.output += '</' + openedTag + '>';
// }
}
else {
this.output += '</' + tagName + '>';
}
}
else {
// - void elements only have a start tag; end tags must not be specified for void elements.
this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
// void elements only have a start tag; end tags must not be specified for void elements.
// this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
this.hasSelfClosing = voidElements[tagName];

// push the tagName into the openedTags stack if not found:
// - a self-closing tag or a void element
this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName);
// this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName);
this.config.enableTagBalancing && !this.hasSelfClosing && !optionalElements[tagName] && this.openedTags.push(tagName);

if (prevState === 35 ||
prevState === 36 ||
Expand All @@ -103,7 +114,7 @@ See the accompanying LICENSE file for terms.

attrValString = '';
for (key in this.attrVals) {
if (contains(this.attributesWhitelist, key)) {
if (arrayLastIndexOf(this.attributesWhitelist, key) !== -1) {
value = this.attrVals[key];

if (key === "style") { // TODO: move style to a const
Expand All @@ -125,12 +136,13 @@ See the accompanying LICENSE file for terms.

// handle self-closing tags
this.output += '<' + tagName + attrValString + (this.hasSelfClosing ? ' />' : '>');
// this.output += '<' + tagName + attrValString + '>';

}
}
// reinitialize once tag has been written to output
this.attrVals = {};
this.hasSelfClosing = 0;
// this.hasSelfClosing = false;
break;

case derivedState.TransitionName.ATTR_TO_AFTER_ATTR:
Expand All @@ -150,7 +162,18 @@ See the accompanying LICENSE file for terms.
if (prevState === 35) {
this.attrVals[parser.getAttributeName()] = null;
}
this.hasSelfClosing = 1;

/* According to https://html.spec.whatwg.org/multipage/syntax.html#start-tags
* "Then, if the element is one of the void elements, or if the element is a foreign element, then there may be a single U+002F SOLIDUS character (/).
* This character has no effect on void elements, but on foreign elements it marks the start tag as self-closing."
*/
// that means only foreign elements can self-close (self-closing is optional for void elements)
// no foreign elements will be allowed, so the following logic can be commented
// openedTag = parser.getStartTagName();
// if (openedTag === 'svg' || openedTag === 'math') { // ...
// this.hasSelfClosing = true;
// }

break;
}
}
Expand All @@ -161,7 +184,7 @@ See the accompanying LICENSE file for terms.
that.output = '';
that.openedTags = [];
that.attrVals = {};
that.hasSelfClosing = 0;
// that.hasSelfClosing = false;
that.parser.reset();
that.parser.contextualize(data);

Expand Down
3 changes: 3 additions & 0 deletions src/tag-attr-list.js
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,6 @@ exports.HrefAttributes = {
// Void elements only have a start tag; end tags must not be specified for void elements.
// https://html.spec.whatwg.org/multipage/syntax.html#void-elements
exports.VoidElements = {"area":1, "base":1, "br":1, "col":1, "embed":1, "hr":1, "img":1, "input":1, "keygen":1, "link":1, "menuitem":1, "meta":1, "param":1, "source":1, "track":1, "wbr":1};

// https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
exports.OptionalElements = {/*"plaintext":1, "html":1, "head":1, "body":1,*/ "li":1, "dt":1, "dd":1, "p":1, "rt":1, "rp":1, "optgroup":1, "option":1, "colgroup":1, "caption":1, "thead":1, "tbody":1, "tfoot":1, "tr":1, "td":1, "th":1};
10 changes: 5 additions & 5 deletions tests/test-vectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -628,12 +628,12 @@ var generalVectors = [
{
id: 10,
input: "<table><tr bz/></table>normal tag made standalone with bogus trunc attr",
output: "<table><tr /></table>normal tag made standalone with bogus trunc attr"
output: "<table><tr></table>normal tag made standalone with bogus trunc attr"
},
{
id: 11,
input: "<table><tr color/></table>normal tag made standalone with trunc attr bad val",
output: "<table><tr /></table>normal tag made standalone with trunc attr bad val"
output: "<table><tr></table>normal tag made standalone with trunc attr bad val"
},
{
id: 12,
Expand All @@ -648,7 +648,7 @@ var generalVectors = [
{
id: 14,
input: "<table><tr size=\"4\"/></table>normal tag made standalone with value",
output: "<table><tr size=\"4\" /></table>normal tag made standalone with value"
output: "<table><tr size=\"4\"></table>normal tag made standalone with value"
},
//style attribute tests
{
Expand Down Expand Up @@ -779,7 +779,7 @@ var generalVectors = [
{
id: 40,
input: "<option selected />",
output: "<option selected />"
output: "<option selected>"
},
{
id: 41,
Expand All @@ -799,7 +799,7 @@ var generalVectors = [
{
id: 44,
input: "<option selected/>",
output: "<option selected />"
output: "<option selected>"
},
{
id: 45,
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/html-purify.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ Authors: Aditya Mahendrakar <[email protected]>
var html = "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed>123</embed><br /><br/><p>";

// with tag balancing enabled by default
var output = (new Purifier()).purify(html);
assert.equal(output, 'foobar<a href=\"123\">hello<b>world</b></a><embed />123<br /><br /><p></p>');
var output = (new Purifier({enableTagBalancing:true})).purify(html);
assert.equal(output, 'foobar<a href="123">hello<b>world</a><embed />123<br /><br /><p></b>');

// with tag balancing disabled
var output = (new Purifier({enableTagBalancing:false})).purify(html);
assert.equal(output, "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed />123</embed><br /><br /><p>");
assert.equal(output, '</div>foo</h2>bar<a href="123">hello<b>world</a><embed />123</embed><br /><br /><p>');
});

it('should handle all vectors mentioned in https://html5sec.org', function(){
Expand Down

0 comments on commit da0c27c

Please sign in to comment.