diff --git a/scratch.js b/scratch.js index e8d2d9dbd..c309b9c16 100644 --- a/scratch.js +++ b/scratch.js @@ -19,17 +19,23 @@ const fresh = require('./test/unit/lib/freshPrince.js'); // r.contractions().debug(); //===timer -console.time('parse'); -let r = nlp(fresh); -console.timeEnd('parse'); - -console.time('match'); -r.match('#Determiner (story|thing|#Adjective)', true); -console.timeEnd('match'); - -console.time('tag'); -r.tag('#Person'); -console.timeEnd('tag'); - -// r = nlp('6 am').debug(); -// console.log(r.people().data()); +// console.time('parse'); +// let r = nlp(fresh); +// console.timeEnd('parse'); +// +// console.time('match'); +// r.match('#Determiner (story|thing|#Adjective)', true); +// console.timeEnd('match'); +// +// console.time('tag'); +// r.tag('#Person'); +// console.timeEnd('tag'); + +let r = nlp('i\'d contemplate'); +console.log(r.normalize().out('text')); + +r = nlp('i\'d contemplated'); +console.log(r.normalize().out('text')); + +r = nlp('i\'d really contemplated'); +console.log(r.normalize().out('text')); diff --git a/src/result/methods/loops.js b/src/result/methods/loops.js index ac2099303..228e25c58 100644 --- a/src/result/methods/loops.js +++ b/src/result/methods/loops.js @@ -21,7 +21,7 @@ const methods = [ 'tagger', - 'tag', + // 'tag', 'unTag', ]; @@ -34,6 +34,18 @@ const addMethods = (Text) => { return this; }; }); + + //add an extra optimisation for tag method + Text.prototype.tag = function() { + //fail-fast optimisation + if (this.list.length === 0) { + return this; + } + for(let i = 0; i < this.list.length; i++) { + this.list[i].tag.apply(this.list[i], arguments); + } + return this; + }; }; module.exports = addMethods; diff --git a/src/result/methods/match/index.js b/src/result/methods/match/index.js index dc400eb45..ab7dde0e0 100644 --- a/src/result/methods/match/index.js +++ b/src/result/methods/match/index.js @@ -51,7 +51,7 @@ const splitMethods = (Text) => { /** do a regex-like search through terms and return a subset */ match: function (reg, verbose) { //fail-fast - if (reg === undefined || reg === null) { + if (this.list.length === 0 || reg === undefined || reg === null) { let parent = this.parent || this; return new Text([], this.lexicon, parent); } diff --git a/src/tagger/contraction/03-easyOnes.js b/src/tagger/contraction/03-easyOnes.js index cf576dabe..8d3e3c1aa 100644 --- a/src/tagger/contraction/03-easyOnes.js +++ b/src/tagger/contraction/03-easyOnes.js @@ -5,7 +5,7 @@ const split = require('./split'); //the formulaic contraction types: const easy_ends = { 'll': 'will', - 'd': 'would', + // 'd': 'would', 've': 'have', 're': 'are', 'm': 'am', @@ -16,6 +16,7 @@ const easy_ends = { }; + //unambiguous contractions, like "'ll" const easyOnes = (ts) => { for(let i = 0; i < ts.terms.length; i++) { @@ -25,6 +26,26 @@ const easyOnes = (ts) => { } let parts = split(ts.terms[i]); if (parts) { + + //handle i'd -> 'i would' vs 'i had' + if (parts.end = 'd') { + //assume 'would' + let arr = [ + parts.start, + 'would' + ]; + //if next verb is past-tense, choose 'had' + if (ts.terms[i + 1] && ts.terms[i + 1].tags.PastTense) { + arr[1] = 'had'; + } + //also support '#Adverb #PastTense' + if (ts.terms[i + 2] && ts.terms[i + 2].tags.PastTense && ts.terms[i + 1].tags.Adverb) { + arr[1] = 'had'; + } + ts = fixContraction(ts, arr, i); + i += 1; + } + //make sure its an easy one if (easy_ends[parts.end]) { let arr = [ diff --git a/src/terms/match/index.js b/src/terms/match/index.js index 0df650650..ed63b710d 100644 --- a/src/terms/match/index.js +++ b/src/terms/match/index.js @@ -10,7 +10,11 @@ const matchMethods = (Terms) => { //support regex-like whitelist-match match: function (reg, verbose) { - //fail-fast + //fail-fast #1 + if (this.terms.length === 0) { + return new Text([], this.lexicon, this.parent); + } + //fail-fast #2 if (!reg) { return new Text([], this.lexicon, this.parent); } @@ -23,6 +27,10 @@ const matchMethods = (Terms) => { /**return first match */ matchOne: function (str) { + //fail-fast + if (this.terms.length === 0) { + return null; + } let regs = syntax(str); for (let t = 0; t < this.terms.length; t++) { //don't loop through if '^' diff --git a/src/terms/match/lib/fastPass.js b/src/terms/match/lib/fastPass.js index d6c0c6c8e..3ed1a5eeb 100644 --- a/src/terms/match/lib/fastPass.js +++ b/src/terms/match/lib/fastPass.js @@ -1,7 +1,7 @@ 'use strict'; // //find easy reasons to skip running the full match on this -const fastPass = (ts, regs, verbose) => { +const fastPass = (ts, regs) => { for(let i = 0; i < regs.length; i++) { let reg = regs[i]; let found = false; @@ -10,9 +10,6 @@ const fastPass = (ts, regs, verbose) => { } //look-for missing term-matches if (reg.normal !== undefined) { - if (verbose) { - console.log(reg); - } for(let o = 0; o < ts.terms.length; o++) { if (ts.terms[o].normal === reg.normal || ts.terms[o].silent_term === reg.normal) { found = true; diff --git a/test/unit/subset/contractions/basic.test.js b/test/unit/subset/contractions/basic.test.js index d9342b727..db34501bf 100644 --- a/test/unit/subset/contractions/basic.test.js +++ b/test/unit/subset/contractions/basic.test.js @@ -91,3 +91,14 @@ test('contracted', function(t) { t.equal(str, `I will`, 'expanded'); t.end(); }); + +test('would-or-did', function(t) { + var r = nlp(`i'd contemplate`); + var str = r.contractions().expand().all().out('text'); + t.equal(str, `i would contemplate`, 'i-would'); + + r = nlp(`i'd contemplated`); + str = r.contractions().expand().all().out('text'); + t.equal(str, `i had contemplated`, 'i-had'); + t.end(); +});