Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jules #4231

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open

Jules #4231

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ Using with Pyodide
See :doc:`pyodide`.


.. _installation_ocr:

Enabling Integrated OCR Support
---------------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/recipes-ocr.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ OCR - Optical Character Recognition

|PyMuPDF| has integrated support for OCR (Optical Character Recognition). It is possible to use OCR for both, images (via the :ref:`Pixmap` class) and for document pages.

The feature is currently based on Tesseract-OCR which must be installed as a separate application -- see the installation chapter.
The feature is currently based on Tesseract-OCR which must be installed as a separate application -- see the :ref:`installation_ocr`.

How to OCR an Image
--------------------
Expand Down
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def get_fds():
pymupdf._log_items_clear()
pymupdf._log_items_active(True)

JM_annot_id_stem = pymupdf.JM_annot_id_stem

# Run the test.
rep = yield

Expand All @@ -57,6 +59,9 @@ def get_fds():
log_items = pymupdf._log_items()
assert not log_items, f'log() was called; {len(log_items)=}.'

assert pymupdf.JM_annot_id_stem == JM_annot_id_stem, \
f'pymupdf.JM_annot_id_stem has changed from {JM_annot_id_stem!r} to {pymupdf.JM_annot_id_stem!r}'

if platform.system() == 'Linux':
# Show detailed information about leaked fds.
open_fds_after, open_fds_after_l = get_fds()
Expand Down
74 changes: 40 additions & 34 deletions tests/test_annots.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import gentle_compare


pymupdf.TOOLS.set_annot_stem("jorj")

red = (1, 0, 0)
blue = (0, 0, 1)
gold = (1, 1, 0)
Expand Down Expand Up @@ -226,38 +224,46 @@ def test_1645():
'''
Test fix for #1645.
'''
path_in = os.path.abspath( f'{__file__}/../resources/symbol-list.pdf')

if pymupdf.mupdf_version_tuple >= (1, 26):
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.26.pdf')
elif pymupdf.mupdf_version_tuple >= (1, 25):
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.25.pdf')
elif pymupdf.mupdf_version_tuple >= (1, 24, 2):
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.2.pdf')
else:
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.pdf')
path_out = os.path.abspath( f'{__file__}/../test_1645_out.pdf')
doc = pymupdf.open(path_in)
page = doc[0]
page_bounds = page.bound()
annot_loc = pymupdf.Rect(page_bounds.x0, page_bounds.y0, page_bounds.x0 + 75, page_bounds.y0 + 15)
# Check type of page.derotation_matrix - this is #2911.
assert isinstance(page.derotation_matrix, pymupdf.Matrix), \
f'Bad type for page.derotation_matrix: {type(page.derotation_matrix)=} {page.derotation_matrix=}.'
page.add_freetext_annot(
annot_loc * page.derotation_matrix,
"TEST",
fontsize=18,
fill_color=pymupdf.utils.getColor("FIREBRICK1"),
rotate=page.rotation,
)
doc.save(path_out, garbage=1, deflate=True, no_new_id=True)
print(f'Have created {path_out}. comparing with {path_expected}.')
with open( path_out, 'rb') as f:
out = f.read()
with open( path_expected, 'rb') as f:
expected = f.read()
assert out == expected, f'Files differ: {path_out} {path_expected}'
# The expected output files assume annot_stem is 'jorj'. We need to always
# restore this before returning (this is checked by conftest.py).
annot_stem = pymupdf.JM_annot_id_stem
pymupdf.TOOLS.set_annot_stem('jorj')
try:
path_in = os.path.abspath( f'{__file__}/../resources/symbol-list.pdf')

if pymupdf.mupdf_version_tuple >= (1, 26):
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.26.pdf')
elif pymupdf.mupdf_version_tuple >= (1, 25):
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.25.pdf')
elif pymupdf.mupdf_version_tuple >= (1, 24, 2):
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.2.pdf')
else:
path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.pdf')
path_out = os.path.abspath( f'{__file__}/../test_1645_out.pdf')
doc = pymupdf.open(path_in)
page = doc[0]
page_bounds = page.bound()
annot_loc = pymupdf.Rect(page_bounds.x0, page_bounds.y0, page_bounds.x0 + 75, page_bounds.y0 + 15)
# Check type of page.derotation_matrix - this is #2911.
assert isinstance(page.derotation_matrix, pymupdf.Matrix), \
f'Bad type for page.derotation_matrix: {type(page.derotation_matrix)=} {page.derotation_matrix=}.'
page.add_freetext_annot(
annot_loc * page.derotation_matrix,
"TEST",
fontsize=18,
fill_color=pymupdf.utils.getColor("FIREBRICK1"),
rotate=page.rotation,
)
doc.save(path_out, garbage=1, deflate=True, no_new_id=True)
print(f'Have created {path_out}. comparing with {path_expected}.')
with open( path_out, 'rb') as f:
out = f.read()
with open( path_expected, 'rb') as f:
expected = f.read()
assert out == expected, f'Files differ: {path_out} {path_expected}'
finally:
# Restore annot_stem.
pymupdf.TOOLS.set_annot_stem(annot_stem)

def test_1824():
'''
Expand Down
10 changes: 5 additions & 5 deletions tests/test_toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,10 @@ def test_3347():
})

links_expected = [
(0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'jorj-L0'}),
(0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'jorj-L1'}),
(1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'jorj-L0'}),
(1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'jorj-L1'}),
(0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'fitz-L0'}),
(0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'fitz-L1'}),
(1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'fitz-L0'}),
(1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'fitz-L1'}),
]

path = os.path.normpath(f'{__file__}/../../tests/test_3347_out.pdf')
Expand Down Expand Up @@ -264,7 +264,7 @@ def test_3400():
print(f'Saved to {path=}.')

links_expected = [
(1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'jorj-L0'})
(1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'fitz-L0'})
]

links_actual = list()
Expand Down
Loading