Skip to content

Commit

Permalink
tests/test_story.py: added test for #3813().
Browse files Browse the repository at this point in the history
This is addressed in mupdf master.
  • Loading branch information
julian-smith-artifex-com committed Oct 31, 2024
1 parent f06f1c7 commit d08937a
Showing 1 changed file with 65 additions and 0 deletions.
65 changes: 65 additions & 0 deletions tests/test_story.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,68 @@ def contentfn(positions):
def test_archive_creation():
s = pymupdf.Story(archive=pymupdf.Archive('.'))
s = pymupdf.Story(archive='.')


def test_3813():
import pymupdf

HTML = """
<p>Count is fine:</p>
<ol>
<li>Lorem
<ol>
<li>Sub Lorem</li>
<li>Sub Lorem</li>
</ol>
</li>
<li>Lorem</li>
<li>Lorem</li>
</ol>
<p>Broken count:</p>
<ol>
<li>Lorem
<ul>
<li>Sub Lorem</li>
<li>Sub Lorem</li>
</ul>
</li>
<li>Lorem</li>
<li>Lorem</li>
</ol>
"""
MEDIABOX = pymupdf.paper_rect("A4")
WHERE = MEDIABOX + (36, 36, -36, -36)

story = pymupdf.Story(html=HTML)
path = os.path.normpath(f'{__file__}/../../tests/test_3813_out.pdf')
writer = pymupdf.DocumentWriter(path)

more = 1

while more:
device = writer.begin_page(MEDIABOX)
more, _ = story.place(WHERE)
story.draw(device)
writer.end_page()

writer.close()

with pymupdf.open(path) as document:
page = document[0]
text = page.get_text()
text_utf8 = text.encode()

if pymupdf.mupdf_version_tuple < (1, 25):
# MuPDF gets things wrong.
text_expected_utf8 = b'Count is \xef\xac\x81ne:\n1. Lorem\n1. Sub Lorem\n2. Sub Lorem\n2. Lorem\n3. Lorem\nBroken count:\n1. Lorem\n\xe2\x80\xa2 Sub Lorem\n\xe2\x80\xa2 Sub Lorem\n4. Lorem\n5. Lorem\n'
else:
text_expected_utf8 = b'Count is \xef\xac\x81ne:\n1. Lorem\n1. Sub Lorem\n2. Sub Lorem\n2. Lorem\n3. Lorem\nBroken count:\n1. Lorem\n\xe2\x80\xa2 Sub Lorem\n\xe2\x80\xa2 Sub Lorem\n2. Lorem\n3. Lorem\n'
text_expected = text_expected_utf8.decode()

print(f'text_utf8:\n {text_utf8!r}')
print(f'text_expected_utf8:\n {text_expected_utf8!r}')
print(f'text:\n {textwrap.indent(text, " ")}')
print(f'text_expected:\n {textwrap.indent(text_expected, " ")}')

assert text == text_expected

0 comments on commit d08937a

Please sign in to comment.