From 361e71481e008469a403dd356f131a64b6a4619f Mon Sep 17 00:00:00 2001 From: Ashok Argent-Katwala Date: Tue, 26 May 2015 05:16:22 -0400 Subject: [PATCH 1/6] Add basic tests for sanitize in blacklist mode. --- test/script.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/script.js b/test/script.js index 66221b8..f742f65 100644 --- a/test/script.js +++ b/test/script.js @@ -20,6 +20,20 @@ vows.describe('script tests').addBatch({ var HTML = bleach.sanitize(HTML1, {mode: 'white', list:[]}); assert.equal(HTML, HTML3); } + }, + + 'blacklist mode': { + topic: function (){ return HTML1; }, + + 'eliminates script tags but keeps unlisted tags': function (HTML1){ + var HTML = bleach.sanitize(HTML1, {mode: 'black', list:['script']}); + assert.equal(HTML, HTML2); + }, + + 'eliminates all tags when all are blacklisted': function (HTML1){ + var HTML = bleach.sanitize(HTML1, {mode: 'black', list:['a', 'script']}); + assert.equal(HTML, HTML3); + } } }).export(module); From 4c4d8e2e27ac5893e812cb282f8788e858b79427 Mon Sep 17 00:00:00 2001 From: Ashok Argent-Katwala Date: Tue, 26 May 2015 05:28:01 -0400 Subject: [PATCH 2/6] Rename variables to prepare for adding another test string. --- test/script.js | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/test/script.js b/test/script.js index f742f65..292787d 100644 --- a/test/script.js +++ b/test/script.js @@ -2,37 +2,37 @@ var vows = require('vows'), assert = require('assert'), bleach = require('../lib/bleach'); -var HTML1 = 'This is HTML with a SCRIPT', - HTML2 = 'This is HTML with a SCRIPT', - HTML3 = 'This is HTML with a SCRIPT'; +var HTML_LINK_SCRIPT = 'This is HTML with a SCRIPT', + HTML_LINK = 'This is HTML with a SCRIPT', + HTML_PLAIN = 'This is HTML with a SCRIPT'; vows.describe('script tests').addBatch({ 'whitelist mode': { - topic: function (){ return HTML1; }, + topic: function (){ return HTML_LINK_SCRIPT; }, - 'eliminates script tags but keeps listed tags': function (HTML1){ - var HTML = bleach.sanitize(HTML1, {mode: 'white', list:['a']}); - assert.equal(HTML, HTML2); + 'eliminates script tags but keeps listed tags': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:['a']}); + assert.equal(HTML, HTML_LINK); }, - 'eliminates all tags when given an empty list': function (HTML1){ - var HTML = bleach.sanitize(HTML1, {mode: 'white', list:[]}); - assert.equal(HTML, HTML3); + 'eliminates all tags when given an empty list': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:[]}); + assert.equal(HTML, HTML_PLAIN); } }, 'blacklist mode': { - topic: function (){ return HTML1; }, + topic: function (){ return HTML_LINK_SCRIPT; }, - 'eliminates script tags but keeps unlisted tags': function (HTML1){ - var HTML = bleach.sanitize(HTML1, {mode: 'black', list:['script']}); - assert.equal(HTML, HTML2); + 'eliminates script tags but keeps unlisted tags': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['script']}); + assert.equal(HTML, HTML_LINK); }, - 'eliminates all tags when all are blacklisted': function (HTML1){ - var HTML = bleach.sanitize(HTML1, {mode: 'black', list:['a', 'script']}); - assert.equal(HTML, HTML3); + 'eliminates all tags when all are blacklisted': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['a', 'script']}); + assert.equal(HTML, HTML_PLAIN); } } From df7ac9544dfd6c0825eed4bb7b875ea8fc1ec80d Mon Sep 17 00:00:00 2001 From: Ashok Argent-Katwala Date: Tue, 26 May 2015 05:41:54 -0400 Subject: [PATCH 3/6] Add failing test for mis-nested script tags. --- test/script.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/script.js b/test/script.js index 292787d..4bd8b02 100644 --- a/test/script.js +++ b/test/script.js @@ -3,6 +3,7 @@ var vows = require('vows'), bleach = require('../lib/bleach'); var HTML_LINK_SCRIPT = 'This is HTML with a SCRIPT', + HTML_LINK_MISNESTED_SCRIPT = 'This is HTML with a ipt src="evil.js">SCRIPT', HTML_LINK = 'This is HTML with a SCRIPT', HTML_PLAIN = 'This is HTML with a SCRIPT'; @@ -34,6 +35,20 @@ vows.describe('script tests').addBatch({ var HTML = bleach.sanitize(topic, {mode: 'black', list:['a', 'script']}); assert.equal(HTML, HTML_PLAIN); } + }, + + 'nested malformed tags': { + topic: function (){ return HTML_LINK_MISNESTED_SCRIPT; }, + + 'are eliminated but whitelisted tags are kept': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:['a']}); + assert.equal(HTML, HTML_LINK); + }, + + 'are eliminated when blacklisted': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['script']}); + assert.equal(HTML, HTML_LINK); + }, } }).export(module); From 68573a8c6fe6f9d308595ea86d0d9307143d9028 Mon Sep 17 00:00:00 2001 From: Ashok Argent-Katwala Date: Tue, 26 May 2015 06:19:35 -0400 Subject: [PATCH 4/6] Run sanitization until we hit a fixed-point. This disallows mis-nesting script tags to produce out that still has a script tag in. --- lib/bleach.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/bleach.js b/lib/bleach.js index 71fde85..e26a469 100644 --- a/lib/bleach.js +++ b/lib/bleach.js @@ -60,6 +60,21 @@ var bleach = { }, sanitize: function(html, options) { + var previousHtml; + var sanitizedHtml = html; + + // Apply sanitization until the length stabilizes. + // This is guaranteed to terminate if sanitizeOnce never makes the + // string longer. + do { + previousHtml = sanitizedHtml; + sanitizedHtml = this.sanitizeOnce(previousHtml, options); + } while (sanitizedHtml.length != previousHtml.length); + + return sanitizedHtml; + }, + + sanitizeOnce: function(html, options) { html = String(html) || ''; options = options || {}; From 6a657b3860c0ac9234be11c6df1967b087e8a565 Mon Sep 17 00:00:00 2001 From: Ashok Argent-Katwala Date: Tue, 26 May 2015 06:23:37 -0400 Subject: [PATCH 5/6] Add failing test for a closing tag with extra content. In practice, this isn't so bad, as the script (or style) tags are still eliminated. It's odd that the body is still left in the resulting document, though. --- test/script.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/script.js b/test/script.js index 4bd8b02..346febd 100644 --- a/test/script.js +++ b/test/script.js @@ -3,6 +3,7 @@ var vows = require('vows'), bleach = require('../lib/bleach'); var HTML_LINK_SCRIPT = 'This is HTML with a SCRIPT', + HTML_LINK_SPACE_CLOSED_SCRIPT = 'This is HTML with a SCRIPT', HTML_LINK_MISNESTED_SCRIPT = 'This is HTML with a ipt src="evil.js">SCRIPT', HTML_LINK = 'This is HTML with a SCRIPT', HTML_PLAIN = 'This is HTML with a SCRIPT'; @@ -49,6 +50,20 @@ vows.describe('script tests').addBatch({ var HTML = bleach.sanitize(topic, {mode: 'black', list:['script']}); assert.equal(HTML, HTML_LINK); }, + }, + + 'oddly closed script tags': { + topic: function (){ return HTML_LINK_SPACE_CLOSED_SCRIPT; }, + + 'are eliminated but whitelisted tags are kept': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:['a']}); + assert.equal(HTML, HTML_LINK); + }, + + 'are eliminated when blacklisted': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['script']}); + assert.equal(HTML, HTML_LINK); + } } }).export(module); From 5e25bc099a35c1f417a8b5acd5dd26accf6bf985 Mon Sep 17 00:00:00 2001 From: Ashok Argent-Katwala Date: Tue, 26 May 2015 06:27:09 -0400 Subject: [PATCH 6/6] Strip script and style tags, including their bodies, even when their closing tag has extra content. --- lib/bleach.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bleach.js b/lib/bleach.js index e26a469..34f3ed6 100644 --- a/lib/bleach.js +++ b/lib/bleach.js @@ -85,13 +85,13 @@ var bleach = { if ((mode == 'white' && list.indexOf('script') == -1) || (mode == 'black' && list.indexOf('script') != -1)) { - html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/script>/gim, ''); + html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/script.*?>/gim, ''); } if ((mode == 'white' && list.indexOf('style') == -1) || (mode == 'black' && list.indexOf('style') != -1)) { - html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/style>/gim, ''); + html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/style.*?>/gim, ''); } matches.forEach(function(tag){