Update regexp with escapes in TextTokenizer

Resolves #49
readium · Sep 12, 2024 · 68e9b60 · 68e9b60
1 parent 753e7d7
commit 68e9b60
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/shared/src/util/tokenizer/TextTokenizer.ts b/shared/src/util/tokenizer/TextTokenizer.ts
@@ -114,11 +114,13 @@ export class NaiveTextTokenizer {
     }
 }
 
+const trimmedMatcher = new RegExp("[\\p{L}\\p{N}]+", "u");
+
 // Unicode-aware of checking if there's anything that can be spoken in a string
 // "Spoken" in this case means at least one unicode letter or unicode number character
 export const speakableToken = (token: string): string | null => {
     const trimmedToken = token.trimEnd();
     if(trimmedToken.length === 0) return null;
-    if(trimmedToken.match(/[\p{L}\p{N}]+/u) === null) return null;
+    if(trimmedToken.match(trimmedMatcher) === null) return null;
     return trimmedToken;
 }