Skip to content
This repository has been archived by the owner on Jul 15, 2019. It is now read-only.

preserve quoting in attribute values #21

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/derived-states.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ See the accompanying LICENSE file for terms.
// this will eventually move to context parser and it will not be a sparse matrix
// Transition table based on https://html.spec.whatwg.org/multipage/syntax.html
var DerivedState = {};

/*
// used to generate DerivedState.Transitions
DerivedState.TransitionsSparse = {
1: {1: 1},
10: {1: 2, 43: 6},
Expand All @@ -24,6 +27,7 @@ DerivedState.TransitionsSparse = {
42: {1: 2, 43: 6},
43: {1: 2},
};
*/

DerivedState.TransitionName = {};

Expand Down
62 changes: 50 additions & 12 deletions src/html-purify.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@ See the accompanying LICENSE file for terms.
derivedState = require('./derived-states.js'),
xssFilters = require('xss-filters'),
CssParser = require('css-js'),
hrefAttribtues = tagAttList.HrefAttributes,
voidElements = tagAttList.VoidElements;
hrefAttributes = tagAttList.HrefAttributes,
voidElements = tagAttList.VoidElements,
attrFilter = {
38: xssFilters.uriInDoubleQuotedAttr,
39: xssFilters.uriInSingleQuotedAttr,
40: xssFilters.uriInUnQuotedAttr
},
quote = {38: '"', 39: '\'', 40: ''};

function Purifier(config) {
var that = this;
Expand Down Expand Up @@ -53,11 +59,13 @@ See the accompanying LICENSE file for terms.
return false;
}


function processTransition(prevState, nextState, i) {
/* jshint validthis: true */
/* jshint expr: true */
var parser = this.parser,
idx, tagName, attrValString, openedTag, key, value;
idx, tagName, attrValString, openedTag, key, value,
quoteState, quoteChar, attrValObj;


switch (derivedState.Transitions[prevState][nextState]) {
Expand Down Expand Up @@ -98,13 +106,18 @@ See the accompanying LICENSE file for terms.
if (prevState === 35 ||
prevState === 36 ||
prevState === 40) {
this.attrVals[parser.getAttributeName()] = parser.getAttributeValue();
this.attrVals.push({
'name': parser.getAttributeName(),
'value': parser.getAttributeValue(),
'quoteState': prevState
});
}

attrValString = '';
for (key in this.attrVals) {
for (var j = 0, len = this.attrVals.length ; j < len; j++) {
key = this.attrVals[j].name;
if (contains(this.attributesWhitelist, key)) {
value = this.attrVals[key];
value = this.attrVals[j].value;

if (key === "style") { // TODO: move style to a const
if (value === null) {
Expand All @@ -118,7 +131,9 @@ See the accompanying LICENSE file for terms.

attrValString += ' ' + key;
if (value !== null) {
attrValString += '="' + (hrefAttribtues[key] ? xssFilters.uriInDoubleQuotedAttr(decodeURI(value)) : value) + '"';
quoteState = this.attrVals[j].quoteState;
quoteChar = quote[quoteState];
attrValString += '=' + quoteChar + (hrefAttributes[key] ? attrFilter[quoteState](decodeURI(value)) : value) + quoteChar;
}
}
}
Expand All @@ -129,16 +144,36 @@ See the accompanying LICENSE file for terms.
}
}
// reinitialize once tag has been written to output
this.attrVals = {};
this.attrVals = [];
this.hasSelfClosing = 0;
break;

case derivedState.TransitionName.ATTR_TO_AFTER_ATTR:
this.attrVals[parser.getAttributeName()] = null;
//this.attrVals[parser.getAttributeName()]['value'] = null;
this.attrVals.push({
'name': parser.getAttributeName(),
'value': null
});
break;

case derivedState.TransitionName.ATTR_VAL_TO_AFTER_ATTR_VAL:
this.attrVals[parser.getAttributeName()] = parser.getAttributeValue() || '';
// remove the element
attrValObj = {
'name': parser.getAttributeName(),
'value': parser.getAttributeValue() || '',
'quoteState': prevState
};

idx = this.attrVals.length - 1;

// if the attribute was added to attrVals in case ATTR_TO_AFTER_ATTR,
// then rewrite its value
if (this.attrVals.length > 0 && this.attrVals[idx].name === parser.getAttributeName() ){
this.attrVals[idx] = attrValObj;

} else {
this.attrVals.push(attrValObj);
}
break;

//case derivedState.TransitionName.TAG_OPEN_TO_MARKUP_OPEN:
Expand All @@ -148,7 +183,10 @@ See the accompanying LICENSE file for terms.
case derivedState.TransitionName.TO_SELF_CLOSING_START:
// boolean attributes may not have a value
if (prevState === 35) {
this.attrVals[parser.getAttributeName()] = null;
this.attrVals.push({
'name': parser.getAttributeName(),
'value': null
});
}
this.hasSelfClosing = 1;
break;
Expand All @@ -160,7 +198,7 @@ See the accompanying LICENSE file for terms.

that.output = '';
that.openedTags = [];
that.attrVals = {};
that.attrVals = [];
that.hasSelfClosing = 0;
that.parser.reset();
that.parser.contextualize(data);
Expand Down
29 changes: 17 additions & 12 deletions tests/test-vectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ var html5secVectors = [
{
id: 10,
input: "<video poster=javascript:alert(1)//></video>",
output: "<video poster=\"x-javascript:alert(1)//\"></video>"
output: "<video poster=x-javascript:alert(1)//></video>"
},
{
id: 11,
Expand All @@ -75,7 +75,7 @@ var html5secVectors = [
{
id: 14,
input: "<input pattern=^((a+.)a)+$ value=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!>",
output: "<input pattern=\"^((a+.)a)+$\" value=\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!\" />"
output: "<input pattern=^((a+.)a)+$ value=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa! />"
},
{
id: 15,
Expand Down Expand Up @@ -120,7 +120,7 @@ var html5secVectors = [
{
id: 23,
input: "<form id=test onforminput=alert(1)><input></form><button form=test onformchange=alert(2)>X</button>",
output: "<form id=\"test\"><input /></form><button form=\"test\">X</button>"
output: "<form id=test><input /></form><button form=test>X</button>"
},
{
id: 24,
Expand Down Expand Up @@ -180,7 +180,7 @@ var html5secVectors = [
{
id: 35,
input: "1<a href=#><line xmlns=urn:schemas-microsoft-com:vml style=behavior:url(#default#vml);position:absolute href=javascript:alert(1) strokecolor=white strokeweight=1000px from=0 to=1000 /></a>",
output: "1<a href=\"#\"></a>"
output: "1<a href=#></a>"
},
{
id: 36,
Expand All @@ -190,7 +190,7 @@ var html5secVectors = [
{
id: 37,
input: "<!--<img src=\"--><img src=x onerror=alert(1)//\">",
output: "<img src=\"x\" />"
output: "<img src=x />"
},
{
id: 38,
Expand All @@ -205,7 +205,7 @@ var html5secVectors = [
{
id: 40,
input: "<style><img src=\"</style><img src=x onerror=alert(1)//\">",
output: "<img src=\"x\" />"
output: "<img src=x />"
},
{
id: 41,
Expand Down Expand Up @@ -490,12 +490,12 @@ var html5secVectors = [
{
id: 98,
input: "<!-- IE 5-9 -->\r\n<div id=d><x xmlns=\"><iframe onload=alert(1)\"></div>\n<script>d.innerHTML+=\'\';</script>\r\n\r\n<!-- IE 10 in IE5-9 Standards mode -->\r\n<div id=d><x xmlns=\'\"><iframe onload=alert(2)//\'></div>\n<script>d.innerHTML+=\'\';</script>",
output: "\n<div id=\"d\"></div>\n\n\n\n<div id=\"d\"></div>\n"
output: "\n<div id=d></div>\n\n\n\n<div id=d></div>\n"
},
{
id: 99,
input: "<div id=d><div style=\"font-family:\'sans\\27\\2F\\2A\\22\\2A\\2F\\3B color\\3Ared\\3B\'\">X</div></div>\n<script>with(document.getElementById(\"d\"))innerHTML=innerHTML</script>",
output: "<div id=\"d\"><div style=\"font-family:\'sans\\27\\2F\\2A\\22\\2A\\2F\\3B color\\3Ared\\3B\'\">X</div></div>\n"
output: "<div id=d><div style=\"font-family:\'sans\\27\\2F\\2A\\22\\2A\\2F\\3B color\\3Ared\\3B\'\">X</div></div>\n"
},
{
id: 100,
Expand Down Expand Up @@ -545,7 +545,7 @@ var html5secVectors = [
{
id: 109,
input: "<!-- IE 5-8 standards mode -->\r\n<a href=http://foo.bar/#x=`y></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\">\r\n\r\n<!-- IE 5-9 standards mode -->\r\n<!a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(2)//\">\r\n<?a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(3)//\">",
output: "\n<a href=\"http://foo.bar/#x&#61;&#96;y\"></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\" />\n\n\n<img alt=\"`><img src=xx:x onerror=alert(2)//\" />\n<img alt=\"`><img src=xx:x onerror=alert(3)//\" />"
output: "\n<a href=http://foo.bar/#x&#61;&#96;y></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\" />\n\n\n<img alt=\"`><img src=xx:x onerror=alert(2)//\" />\n<img alt=\"`><img src=xx:x onerror=alert(3)//\" />"
},
{
id: 110,
Expand Down Expand Up @@ -804,12 +804,12 @@ var generalVectors = [
{
id: 45,
input: "<img id=\'foo\'/>",
output: "<img id=\"foo\" />"
output: "<img id=\'foo\' />"
},
{
id: 46,
input: "<img id=\'foo\' />",
output: "<img id=\"foo\" />"
output: "<img id=\'foo\' />"
},
{
id: 47,
Expand All @@ -819,7 +819,7 @@ var generalVectors = [
{
id: 48,
input: "<img id=\'\' />",
output: "<img id=\"\" />"
output: "<img id=\'\' />"
},
{
id: 49,
Expand All @@ -831,6 +831,11 @@ var generalVectors = [
id: 50,
input: "abc <!-- 123",
output: "abc "
},
{
id: 51,
input: "<img src=\"x\" id=\'\" onerror=\"alert(1)\' />",
output: "<img src=\"x\" id=\'\" onerror=\"alert(1)\' />"
}
];

Expand Down