diff --git a/lib/bleach.js b/lib/bleach.js index 71fde85..34f3ed6 100644 --- a/lib/bleach.js +++ b/lib/bleach.js @@ -60,6 +60,21 @@ var bleach = { }, sanitize: function(html, options) { + var previousHtml; + var sanitizedHtml = html; + + // Apply sanitization until the length stabilizes. + // This is guaranteed to terminate if sanitizeOnce never makes the + // string longer. + do { + previousHtml = sanitizedHtml; + sanitizedHtml = this.sanitizeOnce(previousHtml, options); + } while (sanitizedHtml.length != previousHtml.length); + + return sanitizedHtml; + }, + + sanitizeOnce: function(html, options) { html = String(html) || ''; options = options || {}; @@ -70,13 +85,13 @@ var bleach = { if ((mode == 'white' && list.indexOf('script') == -1) || (mode == 'black' && list.indexOf('script') != -1)) { - html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/script>/gim, ''); + html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/script.*?>/gim, ''); } if ((mode == 'white' && list.indexOf('style') == -1) || (mode == 'black' && list.indexOf('style') != -1)) { - html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/style>/gim, ''); + html = html.replace(/(.*?[\r\n])*?(.*?)(.*?[\r\n])*?<\/style.*?>/gim, ''); } matches.forEach(function(tag){ diff --git a/test/script.js b/test/script.js index 66221b8..346febd 100644 --- a/test/script.js +++ b/test/script.js @@ -2,23 +2,67 @@ var vows = require('vows'), assert = require('assert'), bleach = require('../lib/bleach'); -var HTML1 = 'This is HTML with a SCRIPT', - HTML2 = 'This is HTML with a SCRIPT', - HTML3 = 'This is HTML with a SCRIPT'; +var HTML_LINK_SCRIPT = 'This is HTML with a SCRIPT', + HTML_LINK_SPACE_CLOSED_SCRIPT = 'This is HTML with a SCRIPT', + HTML_LINK_MISNESTED_SCRIPT = 'This is HTML with a ipt src="evil.js">SCRIPT', + HTML_LINK = 'This is HTML with a SCRIPT', + HTML_PLAIN = 'This is HTML with a SCRIPT'; vows.describe('script tests').addBatch({ 'whitelist mode': { - topic: function (){ return HTML1; }, + topic: function (){ return HTML_LINK_SCRIPT; }, - 'eliminates script tags but keeps listed tags': function (HTML1){ - var HTML = bleach.sanitize(HTML1, {mode: 'white', list:['a']}); - assert.equal(HTML, HTML2); + 'eliminates script tags but keeps listed tags': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:['a']}); + assert.equal(HTML, HTML_LINK); }, - 'eliminates all tags when given an empty list': function (HTML1){ - var HTML = bleach.sanitize(HTML1, {mode: 'white', list:[]}); - assert.equal(HTML, HTML3); + 'eliminates all tags when given an empty list': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:[]}); + assert.equal(HTML, HTML_PLAIN); + } + }, + + 'blacklist mode': { + topic: function (){ return HTML_LINK_SCRIPT; }, + + 'eliminates script tags but keeps unlisted tags': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['script']}); + assert.equal(HTML, HTML_LINK); + }, + + 'eliminates all tags when all are blacklisted': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['a', 'script']}); + assert.equal(HTML, HTML_PLAIN); + } + }, + + 'nested malformed tags': { + topic: function (){ return HTML_LINK_MISNESTED_SCRIPT; }, + + 'are eliminated but whitelisted tags are kept': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:['a']}); + assert.equal(HTML, HTML_LINK); + }, + + 'are eliminated when blacklisted': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['script']}); + assert.equal(HTML, HTML_LINK); + }, + }, + + 'oddly closed script tags': { + topic: function (){ return HTML_LINK_SPACE_CLOSED_SCRIPT; }, + + 'are eliminated but whitelisted tags are kept': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'white', list:['a']}); + assert.equal(HTML, HTML_LINK); + }, + + 'are eliminated when blacklisted': function (topic){ + var HTML = bleach.sanitize(topic, {mode: 'black', list:['script']}); + assert.equal(HTML, HTML_LINK); } }