diff --git a/se/formatting.py b/se/formatting.py index 6d361daa..9d4d5bab 100644 --- a/se/formatting.py +++ b/se/formatting.py @@ -145,7 +145,7 @@ def semanticate(xhtml: str) -> str: xhtml = regex.sub(r"""([^\p{Letter}>\"])([vxVX])(\b[^\-]|st\b|nd\b|rd\b|th\b)""", r"""\1\2\3""", xhtml) # We can assume a lowercase i is always a Roman numeral unless followed by ’ - xhtml = regex.sub(r"""([^\p{Letter}<>/\"])i\b(?!’)""", r"""\1i""", xhtml) + xhtml = regex.sub(r"""([^\p{Letter}<>/\"])i\b(?!’)(?![^<>]+>)""", r"""\1i""", xhtml) # Fix obscured names starting with I, V, or X xhtml = regex.sub(fr"""([IVX]){se.WORD_JOINER}⸺""", fr"""\1{se.WORD_JOINER}⸺""", xhtml) diff --git a/tests/draft_commands/semanticate/test-1/golden/semanticate.xhtml b/tests/draft_commands/semanticate/test-1/golden/semanticate.xhtml index 00cb212c..3a8986b6 100644 --- a/tests/draft_commands/semanticate/test-1/golden/semanticate.xhtml +++ b/tests/draft_commands/semanticate/test-1/golden/semanticate.xhtml @@ -168,6 +168,9 @@
I gave him an I.O.U.
The picture was almost 3D.
His name was abbreviated Chas.
+ +Edition i. Pages i and ii. Number i’.
+See Appendix.