Skip to content

Commit

Permalink
Properly handle noterefs in m-070
Browse files Browse the repository at this point in the history
  • Loading branch information
apasel422 committed Jul 15, 2024
1 parent 1f78ebb commit 9545297
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
9 changes: 7 additions & 2 deletions se/se_epub_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3614,9 +3614,14 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N

# Check and log missing glossary keys
if ebook_flags["has_glossary_search_key_map"] and filename.name not in IGNORED_FILENAMES:
source_text = dom.xpath("/html/body")[0].inner_text()
# Remove all noterefs, as their anchor text will otherwise immediately follow a potential glossary term, defeating the below regex.
dom_copy = deepcopy(dom)
for node in dom_copy.xpath(".//a[contains(@epub:type, 'noteref')]"):
node.remove()

source_text = dom_copy.xpath("/html/body")[0].inner_text()
if dom.xpath("/html/body//section[contains(@epub:type, 'glossary')]"):
nodes = dom.xpath("/html/body//dd[contains(@epub:type, 'glossdef')]")
nodes = dom_copy.xpath("/html/body//dd[contains(@epub:type, 'glossdef')]")
source_text = " ".join([node.inner_text() for node in nodes])
for glossary_index, glossary_value in enumerate(glossary_usage):
if glossary_value[1] is False and regex.search(r"(?<!\w)\L<val>(?!\w)", source_text, flags=regex.IGNORECASE, val=[glossary_value[0]]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,7 @@
<search-key-group href="text/glossary.xhtml#abc">
<match value="’versal"/>
</search-key-group>
<search-key-group href="text/glossary.xhtml#def">
<match value="unsiker"/>
</search-key-group>
</search-key-map>
1 change: 1 addition & 0 deletions tests/lint/metadata/m-070/in/src/epub/text/chapter-1.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
<p>A common theory was R+L=J.</p>
<p>A ’versal truth.</p>
<p>An unknown M.O.</p>
<p>Unsiker<a href="endnotes.xhtml#note-1" epub:type="noteref">1</a> is an unusual term.</p>
</section>
</body>
</html>

0 comments on commit 9545297

Please sign in to comment.