Skip to content

Commit

Permalink
tag optimisation and #352 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
spencermountain committed Apr 2, 2017
1 parent 422e936 commit bcb29bf
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 22 deletions.
34 changes: 20 additions & 14 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,23 @@ const fresh = require('./test/unit/lib/freshPrince.js');
// r.contractions().debug();

//===timer
console.time('parse');
let r = nlp(fresh);
console.timeEnd('parse');

console.time('match');
r.match('#Determiner (story|thing|#Adjective)', true);
console.timeEnd('match');

console.time('tag');
r.tag('#Person');
console.timeEnd('tag');

// r = nlp('6 am').debug();
// console.log(r.people().data());
// console.time('parse');
// let r = nlp(fresh);
// console.timeEnd('parse');
//
// console.time('match');
// r.match('#Determiner (story|thing|#Adjective)', true);
// console.timeEnd('match');
//
// console.time('tag');
// r.tag('#Person');
// console.timeEnd('tag');

let r = nlp('i\'d contemplate');
console.log(r.normalize().out('text'));

r = nlp('i\'d contemplated');
console.log(r.normalize().out('text'));

r = nlp('i\'d really contemplated');
console.log(r.normalize().out('text'));
14 changes: 13 additions & 1 deletion src/result/methods/loops.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const methods = [

'tagger',

'tag',
// 'tag',
'unTag',
];

Expand All @@ -34,6 +34,18 @@ const addMethods = (Text) => {
return this;
};
});

//add an extra optimisation for tag method
Text.prototype.tag = function() {
//fail-fast optimisation
if (this.list.length === 0) {
return this;
}
for(let i = 0; i < this.list.length; i++) {
this.list[i].tag.apply(this.list[i], arguments);
}
return this;
};
};

module.exports = addMethods;
2 changes: 1 addition & 1 deletion src/result/methods/match/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const splitMethods = (Text) => {
/** do a regex-like search through terms and return a subset */
match: function (reg, verbose) {
//fail-fast
if (reg === undefined || reg === null) {
if (this.list.length === 0 || reg === undefined || reg === null) {
let parent = this.parent || this;
return new Text([], this.lexicon, parent);
}
Expand Down
23 changes: 22 additions & 1 deletion src/tagger/contraction/03-easyOnes.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const split = require('./split');
//the formulaic contraction types:
const easy_ends = {
'll': 'will',
'd': 'would',
// 'd': 'would',
've': 'have',
're': 'are',
'm': 'am',
Expand All @@ -16,6 +16,7 @@ const easy_ends = {
};



//unambiguous contractions, like "'ll"
const easyOnes = (ts) => {
for(let i = 0; i < ts.terms.length; i++) {
Expand All @@ -25,6 +26,26 @@ const easyOnes = (ts) => {
}
let parts = split(ts.terms[i]);
if (parts) {

//handle i'd -> 'i would' vs 'i had'
if (parts.end = 'd') {
//assume 'would'
let arr = [
parts.start,
'would'
];
//if next verb is past-tense, choose 'had'
if (ts.terms[i + 1] && ts.terms[i + 1].tags.PastTense) {
arr[1] = 'had';
}
//also support '#Adverb #PastTense'
if (ts.terms[i + 2] && ts.terms[i + 2].tags.PastTense && ts.terms[i + 1].tags.Adverb) {
arr[1] = 'had';
}
ts = fixContraction(ts, arr, i);
i += 1;
}

//make sure its an easy one
if (easy_ends[parts.end]) {
let arr = [
Expand Down
10 changes: 9 additions & 1 deletion src/terms/match/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ const matchMethods = (Terms) => {

//support regex-like whitelist-match
match: function (reg, verbose) {
//fail-fast
//fail-fast #1
if (this.terms.length === 0) {
return new Text([], this.lexicon, this.parent);
}
//fail-fast #2
if (!reg) {
return new Text([], this.lexicon, this.parent);
}
Expand All @@ -23,6 +27,10 @@ const matchMethods = (Terms) => {

/**return first match */
matchOne: function (str) {
//fail-fast
if (this.terms.length === 0) {
return null;
}
let regs = syntax(str);
for (let t = 0; t < this.terms.length; t++) {
//don't loop through if '^'
Expand Down
5 changes: 1 addition & 4 deletions src/terms/match/lib/fastPass.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict';
//
//find easy reasons to skip running the full match on this
const fastPass = (ts, regs, verbose) => {
const fastPass = (ts, regs) => {
for(let i = 0; i < regs.length; i++) {
let reg = regs[i];
let found = false;
Expand All @@ -10,9 +10,6 @@ const fastPass = (ts, regs, verbose) => {
}
//look-for missing term-matches
if (reg.normal !== undefined) {
if (verbose) {
console.log(reg);
}
for(let o = 0; o < ts.terms.length; o++) {
if (ts.terms[o].normal === reg.normal || ts.terms[o].silent_term === reg.normal) {
found = true;
Expand Down
11 changes: 11 additions & 0 deletions test/unit/subset/contractions/basic.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,14 @@ test('contracted', function(t) {
t.equal(str, `I will`, 'expanded');
t.end();
});

test('would-or-did', function(t) {
var r = nlp(`i'd contemplate`);
var str = r.contractions().expand().all().out('text');
t.equal(str, `i would contemplate`, 'i-would');

r = nlp(`i'd contemplated`);
str = r.contractions().expand().all().out('text');
t.equal(str, `i had contemplated`, 'i-had');
t.end();
});

0 comments on commit bcb29bf

Please sign in to comment.