diff --git a/bower.json b/bower.json index 6fa6c82..550bf89 100644 --- a/bower.json +++ b/bower.json @@ -1,6 +1,6 @@ { "name": "context-parser", - "version": "2.0.0", + "version": "2.0.1", "homepage": "https://github.com/yahoo/context-parser", "description": "HTML5 Context Parser", "main": "src/context-parser.js", diff --git a/dist/context-parser.js b/dist/context-parser.js index c679681..36338f3 100644 --- a/dist/context-parser.js +++ b/dist/context-parser.js @@ -8,44 +8,149 @@ Authors: Nera Liu Albert Yu Adonis Fung */ +/*jshint -W030 */ (function() { "use strict"; -var stateMachine = require('./html5-state-machine.js'); +var stateMachine = require('./html5-state-machine.js'), + htmlState = stateMachine.State, + reInputPreProcessing = /(?:\r\n?|[\x01-\x08\x0B\x0E-\x1F\x7F-\x9F\uFDD0-\uFDEF\uFFFE\uFFFF]|[\uD83F\uD87F\uD8BF\uD8FF\uD93F\uD97F\uD9BF\uD9FF\uDA3F\uDA3F\uDA7F\uDABF\uDAFF\uDB3F\uDB7F\uDBBF\uDBFF][\uDFFE\uDFFF])/g; /** * @class FastParser * @constructor FastParser */ -function FastParser() { - this.state = stateMachine.State.STATE_DATA; /* Save the current status */ - this.tagNames = ['', '']; /* Save the current tag name */ - this.tagNameIdx = ''; - this.attributeName = ''; /* Save the current attribute name */ - this.attributeValue = ''; /* Save the current attribute value */ +function FastParser(config) { + var self = this, k; + + // deep copy config to this.config + self.config = {}; + if (config) { + for (k in config) { + self.config[k] = config[k]; + } + } + config = self.config; + + // config enabled by default - no conversion needed + // config.enableInputPreProcessing = (config.enableInputPreProcessing !== false); + + self.listeners = {}; + self.reset(); } /** - * @function FastParser#contextualize + * @function FastParser#reset + * + * @description + * Reset all internal states, as if being created with the new operator + */ + FastParser.prototype.reset = function () { + var self = this; + + self.state = stateMachine.State.STATE_DATA; /* Save the current status */ + self.tags = ['', '']; /* Save the current tag name */ + self.tagIdx = 0; + self.attrName = ''; /* Save the current attribute name */ + self.attributeValue = null; /* Save the current attribute value */ + self.input = ''; + self.inputLen = 0; + + return self; + }; + +/** + * @function FastParser#on * - * @param {string} input - The byte stream of the HTML5 web page. - * @returns {integer} The return code of success or failure of parsing. + * @param {string} eventType - the event type + * @param {function} listener - the event listener + * @returns this * * @description - *

The context analyzing function, it analyzes the output context of each character based on - * the HTML5 WHATWG - https://html.spec.whatwg.org/multipage/

+ *

register the given event listener to the given eventType

* */ -FastParser.prototype.contextualize = function(input) { - var len = input.length; - - for(var i = 0; i < len; ++i) { - i = this.beforeWalk(i, input); - if ( i >= len ) { break; } - i = this.walk(i, input); - if ( i >= len ) { break; } - this.afterWalk(input[i], i); +FastParser.prototype.on = function (eventType, listener) { + var l = this.listeners[eventType]; + if (listener) { + if (l) { + l.push(listener); + } else { + this.listeners[eventType] = [listener]; + } } + return this; +}; + +/** + * @function FastParser#once + * + * @param {string} eventType - the event type (e.g., preWalk, reWalk, postWalk, ...) + * @param {function} listener - the event listener + * @returns this + * + * @description + *

register the given event listener to the given eventType, for which it will be fired only once

+ * + */ +FastParser.prototype.once = function(eventType, listener) { + var self = this, onceListener; + if (listener) { + onceListener = function () { + self.off(eventType, onceListener); + listener.apply(self, arguments); + }; + return this.on(eventType, onceListener); + } + return this; +}; + +/** + * @function FastParser#off + * + * @param {string} eventType - the event type (e.g., preWalk, reWalk, postWalk, ...) + * @param {function} listener - the event listener + * @returns this + * + * @description + *

remove the listener from being fired when the eventType happen

+ * + */ +FastParser.prototype.off = function (eventType, listener) { + if (listener) { + var i, len, listeners = this.listeners[eventType]; + if (listeners) { + for (i = 0; listeners[i]; i++) { + if (listeners[i] === listener) { + listeners.splice(i, 1); + break; + } + } + } + } + return this; +}; + +/** + * @function FastParser#emit + * + * @param {string} eventType - the event type (e.g., preWalk, reWalk, postWalk, ...) + * @returns this + * + * @description + *

fire those listeners correspoding to the given eventType

+ * + */ +FastParser.prototype.emit = function (listeners, args) { + if (listeners) { + var i = -1, len; + if ((len = listeners.length)) { + while (++i < len) { + listeners[i].apply(this, args || []); + } + } + } + return this; }; /* @@ -56,7 +161,7 @@ FastParser.prototype.contextualize = function(input) { * @returns {integer} the new location of the current character. * */ -FastParser.prototype.walk = function(i, input) { +FastParser.prototype.walk = function(i, input, endsWithEOF) { var ch = input[i], symbol = this.lookupChar(ch), @@ -73,7 +178,7 @@ FastParser.prototype.walk = function(i, input) { case 3: this.appendTagName(ch); break; case 4: this.resetEndTag(ch); break; case 6: /* match end tag token with start tag token's tag name */ - if(this.tagNames[0] === this.tagNames[1]) { + if(this.tags[0].toLowerCase() === this.tags[1].toLowerCase()) { reconsume = 0; /* see 12.2.4.13 - switch state for the following case, otherwise, reconsume. */ this.matchEndTagWithStartTag(symbol); } @@ -86,10 +191,7 @@ FastParser.prototype.walk = function(i, input) { } if (reconsume) { /* reconsume the character */ - if( this.states) { - // This is error prone. May need to change the way we walk the stream to avoid this. - this.states[i] = this.state; - } + this.listeners.reWalk && this.emit(this.listeners.reWalk, [this.state, i, endsWithEOF]); return this.walk(i, input); } @@ -97,22 +199,22 @@ FastParser.prototype.walk = function(i, input) { }; FastParser.prototype.createStartTag = function (ch) { - this.tagNameIdx = 0; - this.tagNames[0] = ch.toLowerCase(); + this.tagIdx = 0; + this.tags[0] = ch; }; FastParser.prototype.createEndTag = function (ch) { - this.tagNameIdx = 1; - this.tagNames[1] = ch.toLowerCase(); + this.tagIdx = 1; + this.tags[1] = ch; }; FastParser.prototype.appendTagName = function (ch) { - this.tagNames[this.tagNameIdx] += ch.toLowerCase(); + this.tags[this.tagIdx] += ch; }; FastParser.prototype.resetEndTag = function (ch) { - this.tagNameIdx = 1; - this.tagNames[1] = ''; + this.tagIdx = 1; + this.tags[1] = ''; }; FastParser.prototype.matchEndTagWithStartTag = function (symbol) { @@ -124,8 +226,9 @@ FastParser.prototype.matchEndTagWithStartTag = function (symbol) { GREATER-THAN SIGN (>): If the current end tag token is an appropriate end tag token, then switch to the data state and emit the current tag token. Otherwise, treat it as per the 'anything else' entry below. */ - this.tagNames[0] = ''; - this.tagNames[1] = ''; + this.tags[0] = ''; + this.tags[1] = ''; + switch (symbol) { case stateMachine.Symbol.SPACE: /** Whitespaces */ this.state = stateMachine.State.STATE_BEFORE_ATTRIBUTE_NAME; @@ -141,14 +244,14 @@ FastParser.prototype.matchEndTagWithStartTag = function (symbol) { FastParser.prototype.matchEscapedScriptTag = function (ch) { /* switch to the script data double escaped state if we see