Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use regex for ALL search: 30% faster #28

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 120 additions & 12 deletions benchmark/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,20 @@
/**
* Globals for benchmark.js
*/
global.escapeHtml = require('..')
var lib = require('..')
global.escapeHtml = lib.escapeHtml
global.escapeHtmlFast = lib.escapeHtmlFast
global.escapeHtmlNoRegex = lib.escapeHtmlNoRegex

/**
* Module dependencies.
*/
var benchmark = require('benchmark')
var benchmarks = require('beautify-benchmark')
var fs = require('fs')
var hugeHTML = fs.readFileSync("mathematica.html").toString()

const MIN_SAMPLES = 3

for (var dep in process.versions) {
console.log(' %s@%s', dep, process.versions[dep])
Expand All @@ -17,28 +24,129 @@ for (var dep in process.versions) {
console.log('')

var suite = new benchmark.Suite()
const fn = function() { escapeHtmlFast(str) }

// suite.add({
// 'name': 'no special characters',
// 'minSamples': MIN_SAMPLES,
// 'fn': function() { escapeHtml(str) },,
// 'setup': function() { str = "Hello, World!" }
// })

// suite.add({
// 'name': 'no special characters (large)',
// 'minSamples': MIN_SAMPLES,
// 'fn': function() { escapeHtml(str) },,
// 'setup': function() { str = "Hello, World!".repeat(1000) }
// })


suite.add({
'name': 'no special characters',
'minSamples': 100,
'fn': 'escapeHtml(str)',
'setup': 'str = "Hello, World!"'
'name': 'Long HTML page',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtml(hugeHTML) },
})

suite.add({
'name': 'Long HTML page REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlFast(hugeHTML) },
})

suite.add({
'name': 'Short HTML page',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtml(hugeHTML.substring(1,30000)) },
})

suite.add({
'name': 'Short HTML page REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlFast(hugeHTML.substring(1,30000)) },
})


suite.add({
'name': 'single special character',
'minSamples': 100,
'fn': 'escapeHtml(str)',
'setup': 'str = "Hello, World&!"'
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtml(str) },
'setup': function() { str = "Hello, World&!" }
})

suite.add({
'name': 'single special character REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlFast(str) },
'setup': function() { str = "Hello, World&!" }
})

suite.add({
'name': 'single special character (large)',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtml(str) },
'setup': function() {
str = "Hello, World!".repeat(500)
+ "&"
+ "Hello, World!".repeat(500)
}
})

suite.add({
'name': 'single special character (large) REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlFast(str) },
'setup': function() {
str = "Hello, World!".repeat(500)
+ "&"
+ "Hello, World!".repeat(500)
}
})


suite.add({
'name': 'many special characters',
'minSamples': 100,
'fn': 'escapeHtml(str)',
'setup': 'str = "\'>\'\\"\\"&>h<e>&<y>"'
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtml(str) },
'setup': function() { str = '\'>\'\\"\\"&>h<e>&<y>"' }
})

suite.add({
'name': 'many special characters REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlFast(str) },
'setup': function() { str = '\'>\'\\"\\"&>h<e>&<y>"' }
})

suite.add({
'name': 'many special characters NO REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlNoRegex(str) },
'setup': function() { str = '\'>\'\\"\\"&>h<e>&<y>"' }
})


suite.add({
'name': 'many special characters (large)',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtml(str) },
'setup': function() { str = '\'>\'\\"\\"&>h<e>&<y>"'.repeat(1000) }
})

suite.add({
'name': 'many special characters (large) REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlFast(str) },
'setup': function() { str = '\'>\'\\"\\"&>h<e>&<y>"'.repeat(1000) }
})

suite.add({
'name': 'many special characters (large) NO REGEX',
'minSamples': MIN_SAMPLES,
'fn': function() { escapeHtmlNoRegex(str) },
'setup': function() { str = '\'>\'\\"\\"&>h<e>&<y>"'.repeat(1000) }
})


suite.on('cycle', function onCycle (event) {
benchmarks.add(event.target)
})
Expand All @@ -47,4 +155,4 @@ suite.on('complete', function onComplete () {
benchmarks.log()
})

suite.run({ 'async': false })
suite.run({ 'async': false, maxTime: 0.001 })
84 changes: 83 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ var matchHtmlRegExp = /["'&<>]/
* @public
*/

module.exports = escapeHtml
module.exports = { escapeHtml, escapeHtmlFast, escapeHtmlNoRegex }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make sure your syntax works on the Node.js versions declared in our package.json; generally ES5. If there are now multiple exports, they of course should all be documented on the README.

I also see there is no longer any default export, definately making this a major version bump, at least.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's why I said:

If we agree on a path forward I can of course clean up the PR to remove the extra code, etc.

I pushed what I had so that anyone could easily test it exactly as I had. (see both results side by side, tweak both, etc) Unless we decide to change the API these changes don't need to happen. I'm just first trying to get agreement on what we're doing here.

generally ES5.

Ah, I'm spoiled by our ES6 codebase, but I can fix. (or again, simply revert)

I also see there is no longer any default export, definately making this a major version bump, at least.

I'd agree if we are going to have multiple functions.


/**
* Escape special characters in the given string of text.
Expand All @@ -30,6 +30,88 @@ module.exports = escapeHtml
* @public
*/

var matchHtmlRegExpFast = /["'&<>]/g
function escapeHtmlFast (string) {
var str = '' + string

var lastIndex = 0
var html = ''
var escape = ''
var match

while (match = matchHtmlRegExpFast.test(str)) {
switch (str.charCodeAt(matchHtmlRegExpFast.lastIndex-1)) {
case 34: // "
escape = '&quot;'
break
case 38: // &
escape = '&amp;'
break
case 39: // '
escape = '&#39;'
break
case 60: // <
escape = '&lt;'
break
case 62: // >
escape = '&gt;'
break
}
// console.log(lastIndex, matchHtmlRegExpFast.lastIndex, str.length)
html += str.substring(lastIndex, matchHtmlRegExpFast.lastIndex - 1)
lastIndex = matchHtmlRegExpFast.lastIndex
html += escape
}
return html + str.substring(lastIndex)
}

function escapeHtmlNoRegex (str) {
// var str = '' + string
// var match = matchHtmlRegExp.exec(str)

// if (!match) {
// return str
// }

var escape
var html = ''
var index = 0
var lastIndex = 0

for (index = 0; index < str.length; index++) {
switch (str.charCodeAt(index)) {
case 34: // "
escape = '&quot;'
break
case 38: // &
escape = '&amp;'
break
case 39: // '
escape = '&#39;'
break
case 60: // <
escape = '&lt;'
break
case 62: // >
escape = '&gt;'
break
default:
continue
}

if (lastIndex !== index) {
html += str.substring(lastIndex, index)
}

lastIndex = index + 1
html += escape
}

return lastIndex !== index
? html + str.substring(lastIndex, index)
: html
}

function escapeHtml (string) {
var str = '' + string
var match = matchHtmlRegExp.exec(str)
Expand Down
Loading