diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py
index 79e2f2ea..1b3a666c 100644
--- a/se/se_epub_lint.py
+++ b/se/se_epub_lint.py
@@ -3614,9 +3614,14 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
# Check and log missing glossary keys
if ebook_flags["has_glossary_search_key_map"] and filename.name not in IGNORED_FILENAMES:
- source_text = dom.xpath("/html/body")[0].inner_text()
+ # Remove all noterefs, as their anchor text will otherwise immediately follow a potential glossary term, defeating the below regex.
+ dom_copy = deepcopy(dom)
+ for node in dom_copy.xpath(".//a[contains(@epub:type, 'noteref')]"):
+ node.remove()
+
+ source_text = dom_copy.xpath("/html/body")[0].inner_text()
if dom.xpath("/html/body//section[contains(@epub:type, 'glossary')]"):
- nodes = dom.xpath("/html/body//dd[contains(@epub:type, 'glossdef')]")
+ nodes = dom_copy.xpath("/html/body//dd[contains(@epub:type, 'glossdef')]")
source_text = " ".join([node.inner_text() for node in nodes])
for glossary_index, glossary_value in enumerate(glossary_usage):
if glossary_value[1] is False and regex.search(r"(?(?!\w)", source_text, flags=regex.IGNORECASE, val=[glossary_value[0]]):
diff --git a/tests/lint/metadata/m-070/in/src/epub/glossary-search-key-map.xml b/tests/lint/metadata/m-070/in/src/epub/glossary-search-key-map.xml
index d50708e4..266070a0 100644
--- a/tests/lint/metadata/m-070/in/src/epub/glossary-search-key-map.xml
+++ b/tests/lint/metadata/m-070/in/src/epub/glossary-search-key-map.xml
@@ -12,4 +12,7 @@
A common theory was R+L=J.
A ’versal truth.
An unknown M.O.
+Unsiker1 is an unusual term.