From b723253b98d734644eb23e58fb1eafa28fb92194 Mon Sep 17 00:00:00 2001 From: Mark Brockington Date: Fri, 9 Jan 2015 07:57:00 -0700 Subject: [PATCH 1/3] First attempt at pattern matching in JS. --- vm/src/lib.js | 62 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/vm/src/lib.js b/vm/src/lib.js index 1cbff01..d4c74b1 100644 --- a/vm/src/lib.js +++ b/vm/src/lib.js @@ -152,6 +152,7 @@ function translatePattern (pattern) { // TODO Add support for balanced character matching (not sure this is easily achieveable). pattern = '' + pattern; + console.log('Initial Pattern:',pattern); for (var i in ROSETTA_STONE) { if (ROSETTA_STONE.hasOwnProperty(i)) { @@ -159,6 +160,48 @@ } } + var n = 0, l, character; + l = pattern.length; + + for (i = 0; i < l; i++) { + character = pattern.substr(i, 1); + + if (character == '\\') { + // Skip the next character since we've escaped this one. + if (i < l) { i++; } + } else if (character == '[' ) { + + // We've started the set, we need to end the set. + + // Get the first character in the set. + i++; character = pattern.substr(i,1); + // The carat immediately allows us to use the next character as the unskippable one. + if (character == '^') { + i++; character = pattern.substr(i,1); + } + // Check if the first character is a ]. If it is, it's special + // (as far as Lua is concerned) and needs to be escaped. + if (character == ']') { + pattern = pattern.substr(0, i) + '\\' + pattern.substr(i++); + l++; + } + + do { + if (i == l) { + // Throw an error. + } + if (character == '\\') { + if (i < l) { i++; } + } + if (i < l) { i++; } + character = pattern.substr(i,1); + } while (character != ']'); + + } + + } + + console.log('Resulting Pattern:',pattern); return pattern; } @@ -1862,10 +1905,25 @@ var matches = s.match(new RegExp(translatePattern (pattern))); - if (!matches) return; - if (!matches[1]) return matches[0]; + console.log('matches:', matches); + + if (!matches) { + console.log('match returns nil'); + return; + } + + console.log('matches[0]:', matches[0]); + + if (!matches[1]) { + console.log('match returns:',matches[0]); + return matches[0]; + } + + console.log('matches[1]:', matches[1]); matches.shift(); + + console.log('match returns:',matches); return matches; }, From ad9fbceab57374d4893ec75f8a2da12f25293f46 Mon Sep 17 00:00:00 2001 From: Mark Brockington Date: Fri, 9 Jan 2015 15:17:16 -0700 Subject: [PATCH 2/3] Bug in test ... you are searching for character patterns by including the square brackets. --- test/scripts/control-structures.lua | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/scripts/control-structures.lua b/test/scripts/control-structures.lua index 10d5b93..3bb8d33 100644 --- a/test/scripts/control-structures.lua +++ b/test/scripts/control-structures.lua @@ -69,11 +69,10 @@ for key, val in pairs(u) do a = a..'['..tostring(key)..'=='..tostring(val)..']' end - -assertTrue (string.find(a, '[6.28==tau]') ~= nil, 'for/pairs iteration should include items with double as key.') -assertTrue (string.find(a, '[@!#==qbert]') ~= nil, 'for/pairs iteration should include items with string as key.') -assertTrue (string.find(a, '[table: 0x%d+==1729]') ~= nil, 'for/pairs iteration should include items with table as key.') -assertTrue (string.find(a, '[function: 0x%d+==test]') ~= nil, 'for/pairs iteration should include items with function as key.') +assertTrue (string.find(a, '6.28==tau') ~= nil, 'for/pairs iteration should include items with double as key.') +assertTrue (string.find(a, '@!#==qbert') ~= nil, 'for/pairs iteration should include items with string as key.') +assertTrue (string.find(a, 'table: 0x%d+==1729') ~= nil, 'for/pairs iteration should include items with table as key.') +assertTrue (string.find(a, 'function: 0x%d+==test') ~= nil, 'for/pairs iteration should include items with function as key.') From afd9b53dd4ceacdedc2d77198b34d4479bfba2ee Mon Sep 17 00:00:00 2001 From: Mark Brockington Date: Fri, 9 Jan 2015 15:18:11 -0700 Subject: [PATCH 3/3] First attempt at changing groups to alternativeGroup regular expressions for matching. --- test/scripts/lib-string.lua | 5 +- vm/src/lib.js | 93 +++++++++++++++++++++++++++++++++++-- 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/test/scripts/lib-string.lua b/test/scripts/lib-string.lua index 277eac7..b250e15 100644 --- a/test/scripts/lib-string.lua +++ b/test/scripts/lib-string.lua @@ -545,10 +545,13 @@ assertTrue(l == nil, 'string.match() should handle nested char-sets correctly [2 local a = string.match('[123x456]','[0%x]') local b = string.match('123x456','[0%D]') +local c = string.match('123x456','[0%Dfg%p2]') +local d = string.match('123x456','[0%Dfg%p5]') assertTrue(a == '1', 'string.match() should return the correct values from character classes in char-sets [1]') assertTrue(b == 'x', 'string.match() should return the correct values from character classes in char-sets [2]') - +assertTrue(c == '2', 'string.match() should return the first match from character classes in char-sets [1]') +assertTrue(d == 'x', 'string.match() should return the first match from character classes in char-sets [2]') diff --git a/vm/src/lib.js b/vm/src/lib.js index d4c74b1..5b62443 100644 --- a/vm/src/lib.js +++ b/vm/src/lib.js @@ -43,7 +43,7 @@ '%c': '[\x00-\x1f]', '%C': '[^\x00-\x1f]', '%d': '\\d', - '%D': '[^\d]', + '%D': '[^\\d]', '%l': '[a-z]', '%L': '[^a-z]', '%p': '[\.\,\"\'\?\!\;\:\#\$\%\&\(\)\*\+\-\/\<\>\=\@\[\]\\\^\_\{\}\|\~]', @@ -146,22 +146,109 @@ return ('0' + (Math.floor((dayOfYear - offset) / 7) + 1)).substr(-2); } + function groupToAlternativeGroupMatch(pattern) { + // Translates from [0%Dafd%xwombat] to ([0] | %D | [afd] | %x | [wombat]) + var l = pattern.length; + var alternativeMatch = '('; + var i, character; + for (i = 1; i < (l-1); i++) { + character = pattern.substr(i, 1); + + var characterToEscape = pattern.substr(i+1,1); + var charactersToExpand = "aAcCdDlLpPsSuUwWxX"; + if (character == '%' && charactersToExpand.indexOf(characterToEscape) > -1) { + if (i != 1) { + alternativeMatch += "]|"; + } + + alternativeMatch += pattern.substr(i,2); + i += 1; + + if (i+1 < (l-1)) { + alternativeMatch += "|["; + } else { + alternativeMatch += ')'; + return alternativeMatch; + } + } else { + if (i == 1) { + alternativeMatch += '['; + } + alternativeMatch += character; + } + } + alternativeMatch += '])'; + return alternativeMatch; + } function translatePattern (pattern) { // TODO Add support for balanced character matching (not sure this is easily achieveable). pattern = '' + pattern; console.log('Initial Pattern:',pattern); + var n = 0, l, character; + l = pattern.length; + + for (i = 0; i < l; i++) { + character = pattern.substr(i, 1); + + if (character == '%') { + // Skip the next character since we've escaped this one. + if (i < l) { i++; } + } else if (character == '[' ) { + + // We've started the set, we need to end the set. + var startCharacter = i; + var foundEscape = false; + + // Get the first character in the set. + i++; character = pattern.substr(i,1); + // The carat immediately allows us to use the next character as the unskippable one. + if (character == '^') { + i++; character = pattern.substr(i,1); + } + + do { + if (i == l) { + // Throw an error. + } + if (character == '%') { + if (i < l) { i++; } + character = pattern.substr(i,1); + var charactersToExpand = "aAcCdDlLpPsSuUwWxX"; + if (charactersToExpand.indexOf(character) > -1) { + foundEscape = true; + } + } + if (i < l) { i++; } + character = pattern.substr(i,1); + } while (character != ']'); + + if (foundEscape == true) { + l = pattern.length + var groupPatternLength = i-startCharacter; + var altPattern = groupToAlternativeGroupMatch(pattern.substr(startCharacter, groupPatternLength+1)); + console.log('Pattern Before Concatenation:',pattern); + pattern = pattern.substr(1,startCharacter-1) + altPattern + pattern.substr(i, l-i-1); + console.log('Pattern After Concatenation:',pattern); + i = startCharacter + altPattern.length + 1; character = pattern.substr(i,1); + } + } + + } + + console.log('Pattern (step 1) :',pattern); + for (var i in ROSETTA_STONE) { if (ROSETTA_STONE.hasOwnProperty(i)) { pattern = pattern.replace(new RegExp(i, 'g'), ROSETTA_STONE[i]); } } - var n = 0, l, character; - l = pattern.length; + console.log('Pattern (step 2) :',pattern); + l = pattern.length; for (i = 0; i < l; i++) { character = pattern.substr(i, 1);