-
Notifications
You must be signed in to change notification settings - Fork 0
/
libStringDistance.min.js
26 lines (26 loc) · 4.37 KB
/
libStringDistance.min.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
// ==UserScript==
// ==UserLibrary==
// @name libStringDistance
// @namespace https://openuserjs.org/users/Anakunda
// @version 1.01
// @license GPL-3.0-or-later
// @copyright 2021, Anakunda (https://openuserjs.org/users/Anakunda)
// @description String Similarity Comparision
// @exclude *
// ==/UserScript==
// ==/UserLibrary==
function hammingDistance(strA,strB){if('string'!=typeof strA||'string'!=typeof strB)throw'Invalid arguments';for(var d=0,h=strA^strB;h>0;)++d,h&=h-1;return d}function levenshteinDistance(strA,strB){if('string'!=typeof strA||'string'!=typeof strB)throw'Invalid arguments';if(strA.length<=0)return strB.length;if(strB.length<=0)return strA.length;
// increment along the first column of each row
for(var matrix=[],i=0;i<=strB.length;++i)matrix[i]=[i];
// increment each column in the first row
for(var j=0;j<=strA.length;++j)matrix[0][j]=j;
// Fill in the rest of the matrix
for(i=1;i<=strB.length;++i)for(j=1;j<=strA.length;++j)matrix[i][j]=strB.charAt(i-1)==strA.charAt(j-1)?matrix[i-1][j-1]:Math.min(matrix[i-1][j-1]+1,Math.min(matrix[i][j-1]+1,matrix[i-1][j]+1));return matrix[strB.length][strA.length]}function cosineSimilarity(strA,strB){if('string'!=typeof strA||'string'!=typeof strB)throw'Invalid arguments';function termFreqMap(str){var words=str.split(' '),termFreq={};return words.forEach(w=>{termFreq[w]=(termFreq[w]||0)+1}),termFreq}function addKeysToDict(map,dict){for(let key in map)dict[key]=!0}function termFreqMapToVector(map,dict){var termFreqVector=[];for(let term in dict)termFreqVector.push(map[term]||0);return termFreqVector}function vecMagnitude(vec){var sum=0;for(let i=0;i<vec.length;++i)sum+=vec[i]*vec[i];return Math.sqrt(sum)}var termFreqA=termFreqMap(strA),termFreqB=termFreqMap(strB),dict={};return addKeysToDict(termFreqA,dict),addKeysToDict(termFreqB,dict),function(vecA,vecB){return function(vecA,vecB){var product=0;for(let i=0;i<vecA.length;i++)product+=vecA[i]*vecB[i];return product}(vecA,vecB)/(vecMagnitude(vecA)*vecMagnitude(vecB))}(termFreqMapToVector(termFreqA,dict),termFreqMapToVector(termFreqB,dict))}function jaroWrinkerSimilarity(strA,strB){if('string'!=typeof strA||'string'!=typeof strB)throw'Invalid arguments';
// Exit early if either are empty.
if(strA.length<=0||strB.length<=0)return 0;
// Exit early if they're an exact match.
if(strA==strB)return 1;let m=0,range=Math.floor(Math.max(strA.length,strB.length)/2)-1,s1Matches=new Array(strA.length),s2Matches=new Array(strB.length);for(let i=0;i<strA.length;++i){let low=i>=range?i-range:0,high=i+range<=strB.length?i+range:strB.length-1;for(let j=low;j<=high;++j)if(!0!==s1Matches[i]&&!0!==s2Matches[j]&&strA[i]===strB[j]){++m,s1Matches[i]=s2Matches[j]=!0;break}}
// Exit early if no matches were found.
if(0==m)return 0;
// Count the transpositions.
let k=0,n_trans=0;for(let j,i=0;i<strA.length;++i)if(!0===s1Matches[i]){for(j=k;j<strB.length;++j)if(!0===s2Matches[j]){k=j+1;break}strA[i]!==strB[j]&&++n_trans}var weight=(m/strA.length+m/strB.length+(m-n_trans/2)/m)/3,l=0;if(weight>.7){for(;strA[l]===strB[l]&&l<4;)++l;weight+=.1*l*(1-weight)}return weight}function trigramIndex(inputPhrases){function asTrigrams(phrase,callback){for(var rawData=" ".concat(phrase," "),i=rawData.length-3;i>=0;i-=1)callback.call(this,rawData.slice(i,i+3))}if(!Array.isArray(inputPhrases))return null;var instance={phrases:[],trigramIndex:[],index:function(phrase){if(phrase&&!this.phrases.includes(phrase)){var phraseIndex=this.phrases.push(phrase)-1;asTrigrams.call(this,phrase,(function(trigram){var phrasesForTrigram=this.trigramIndex[trigram];phrasesForTrigram||(phrasesForTrigram=[]),phrasesForTrigram.indexOf(phraseIndex)<0&&phrasesForTrigram.push(phraseIndex),this.trigramIndex[trigram]=phrasesForTrigram}))}},find:function(phrase){var phraseMatches=[];asTrigrams.call(this,phrase,(function(trigram){var phrasesForTrigram=this.trigramIndex[trigram];if(1,phrasesForTrigram)for(var j in phrasesForTrigram){let phraseIndex=phrasesForTrigram[j];phraseMatches[phraseIndex]||(phraseMatches[phraseIndex]=0),phraseMatches[phraseIndex]+=1}}));var result=[];for(var i in phraseMatches)result.push({phrase:this.phrases[i],matches:phraseMatches[i]});return result.sort((a,b)=>b.matches-a.matches),result}};for(let i in inputPhrases)instance.index(inputPhrases[i]);return instance}function trigramCompare(strA,strB){if('string'!=typeof strA||'string'!=typeof strB)throw'Invalid arguments';return trigramIndex([strA]).find(strB)}