Skip to content

Commit

Permalink
improve fuzzy search
Browse files Browse the repository at this point in the history
  • Loading branch information
vincerubinetti committed Jan 9, 2025
1 parent 0d52059 commit 105ec95
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 22 deletions.
Binary file modified bun.lockb
Binary file not shown.
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
"react": "^19.0.0",
"react-dom": "^19.0.0",
"tippy.js": "^6.3.7",
"trigram-similarity": "^1.0.7",
"zustand": "^5.0.3"
},
"devDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion src/components/Table.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ const Table = <Datum extends object>({
? col.render(cell, row)
: typeof cell === "number"
? formatNumber(cell, false)
: String(cell).split("_").join(" ")}
: String(cell).replaceAll("_", " ")}
</td>
);
})}
Expand Down
6 changes: 2 additions & 4 deletions src/sections/HeaderBg.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ export default HeaderBg;
color: gray,
alpha: 0,
spin: Math.random() * 360,
radius: particleSize,
radius: 0,
animations: [],
}));

Expand Down Expand Up @@ -196,9 +196,7 @@ export default HeaderBg;
const mouse = point.matrixTransform(ctx.getTransform().inverse());
/** bulge particles */
for (const particle of particles) {
const bulge =
particleSize +
20 * particleSize * 1.01 ** -dist(particle.position, mouse);
const bulge = 20 * particleSize * 1.01 ** -dist(particle.position, mouse);
gsap.to(particle, { radius: bulge });
}
});
Expand Down
6 changes: 0 additions & 6 deletions src/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,3 @@ declare module "*.svg?react" {

export default ReactComponent;
}

/** library doesn't provide type defs */
declare module "trigram-similarity" {
const trigramSimilarity: (input1: string, input2: string) => number;
export default trigramSimilarity;
}
48 changes: 38 additions & 10 deletions src/workers/worker.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { expose } from "comlink";
import trigramSimilarity from "trigram-similarity";

/**
* note: every time you communicate with a web worker, the message content must
Expand All @@ -21,6 +20,10 @@ export const expensiveFunction = () => {
return total;
};

/** normalize strings for comparison */
const normalize = (string: string) =>
string.replaceAll("_", " ").replaceAll(/\s/g, " ").toLowerCase();

/** exact (case-insensitive) search on large list of items */
export const exactSearch = <Entry extends Record<string, unknown>>(
/** array of objects */
Expand All @@ -31,14 +34,15 @@ export const exactSearch = <Entry extends Record<string, unknown>>(
search: string,
) =>
list.filter((entry) =>
keys
.map((key) => String(entry[key] ?? ""))
.join(" ")
.toLowerCase()
.includes(search.toLowerCase()),
normalize(
keys
.map((key) => String(entry[key] ?? ""))
.join(" ")
.toLowerCase(),
).includes(normalize(search)),
);

/** fuzzy (trigram) search on large list of items */
/** fuzzy search on large list of items */
export const fuzzySearch = <Entry extends Record<string, unknown>>(
/** array of objects */
list: readonly Entry[],
Expand All @@ -51,9 +55,9 @@ export const fuzzySearch = <Entry extends Record<string, unknown>>(
): Entry[] =>
list.filter(
(entry) =>
trigramSimilarity(
String(keys.map((key) => String(entry[key] ?? "")).join(" ")),
search,
nGramSimilarity(
normalize(keys.map((key) => String(entry[key] ?? "")).join(" ")),
normalize(search),
) > threshold,
);

Expand All @@ -67,3 +71,27 @@ let progress: OnProgress | undefined;
export const onProgress = (callback: OnProgress) => (progress = callback);

expose({ expensiveFunction, exactSearch, fuzzySearch, onProgress });

/** split string into n-grams */
const nGrams = (value: string, n = 3) => {
/** add start/end padding */
const pad = " ".repeat(n - 1);
value = pad + value + pad;
/** chunk */
return Array(value.length - n + 1)
.fill("")
.map((_, index) => value.slice(index, index + n));
};

/** calc similarity score https://stackoverflow.com/a/79343803/2180570 */
const nGramSimilarity = (stringA: string, stringB: string, n = 3) => {
if (stringA === stringB) return 1;

const a = new Set(nGrams(stringA, n));
const b = new Set(nGrams(stringB, n));

const common = a.intersection(b);
const total = a.union(b);

return common.size / (total.size || Infinity);
};

0 comments on commit 105ec95

Please sign in to comment.