diff --git a/docs/installation.rst b/docs/installation.rst index 59dbc3f88..f4c01d2a5 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -269,6 +269,8 @@ Using with Pyodide See :doc:`pyodide`. +.. _installation_ocr: + Enabling Integrated OCR Support --------------------------------------------------------- diff --git a/docs/recipes-ocr.rst b/docs/recipes-ocr.rst index 32cd50934..fca4455c6 100644 --- a/docs/recipes-ocr.rst +++ b/docs/recipes-ocr.rst @@ -18,7 +18,7 @@ OCR - Optical Character Recognition |PyMuPDF| has integrated support for OCR (Optical Character Recognition). It is possible to use OCR for both, images (via the :ref:`Pixmap` class) and for document pages. -The feature is currently based on Tesseract-OCR which must be installed as a separate application -- see the installation chapter. +The feature is currently based on Tesseract-OCR which must be installed as a separate application -- see the :ref:`installation_ocr`. How to OCR an Image -------------------- diff --git a/tests/conftest.py b/tests/conftest.py index e11553859..3f41d9943 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,6 +42,8 @@ def get_fds(): pymupdf._log_items_clear() pymupdf._log_items_active(True) + JM_annot_id_stem = pymupdf.JM_annot_id_stem + # Run the test. rep = yield @@ -57,6 +59,9 @@ def get_fds(): log_items = pymupdf._log_items() assert not log_items, f'log() was called; {len(log_items)=}.' + assert pymupdf.JM_annot_id_stem == JM_annot_id_stem, \ + f'pymupdf.JM_annot_id_stem has changed from {JM_annot_id_stem!r} to {pymupdf.JM_annot_id_stem!r}' + if platform.system() == 'Linux': # Show detailed information about leaked fds. open_fds_after, open_fds_after_l = get_fds() diff --git a/tests/test_annots.py b/tests/test_annots.py index 973701b9e..882b26692 100644 --- a/tests/test_annots.py +++ b/tests/test_annots.py @@ -10,8 +10,6 @@ import gentle_compare -pymupdf.TOOLS.set_annot_stem("jorj") - red = (1, 0, 0) blue = (0, 0, 1) gold = (1, 1, 0) @@ -226,38 +224,46 @@ def test_1645(): ''' Test fix for #1645. ''' - path_in = os.path.abspath( f'{__file__}/../resources/symbol-list.pdf') - - if pymupdf.mupdf_version_tuple >= (1, 26): - path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.26.pdf') - elif pymupdf.mupdf_version_tuple >= (1, 25): - path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.25.pdf') - elif pymupdf.mupdf_version_tuple >= (1, 24, 2): - path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.2.pdf') - else: - path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.pdf') - path_out = os.path.abspath( f'{__file__}/../test_1645_out.pdf') - doc = pymupdf.open(path_in) - page = doc[0] - page_bounds = page.bound() - annot_loc = pymupdf.Rect(page_bounds.x0, page_bounds.y0, page_bounds.x0 + 75, page_bounds.y0 + 15) - # Check type of page.derotation_matrix - this is #2911. - assert isinstance(page.derotation_matrix, pymupdf.Matrix), \ - f'Bad type for page.derotation_matrix: {type(page.derotation_matrix)=} {page.derotation_matrix=}.' - page.add_freetext_annot( - annot_loc * page.derotation_matrix, - "TEST", - fontsize=18, - fill_color=pymupdf.utils.getColor("FIREBRICK1"), - rotate=page.rotation, - ) - doc.save(path_out, garbage=1, deflate=True, no_new_id=True) - print(f'Have created {path_out}. comparing with {path_expected}.') - with open( path_out, 'rb') as f: - out = f.read() - with open( path_expected, 'rb') as f: - expected = f.read() - assert out == expected, f'Files differ: {path_out} {path_expected}' + # The expected output files assume annot_stem is 'jorj'. We need to always + # restore this before returning (this is checked by conftest.py). + annot_stem = pymupdf.JM_annot_id_stem + pymupdf.TOOLS.set_annot_stem('jorj') + try: + path_in = os.path.abspath( f'{__file__}/../resources/symbol-list.pdf') + + if pymupdf.mupdf_version_tuple >= (1, 26): + path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.26.pdf') + elif pymupdf.mupdf_version_tuple >= (1, 25): + path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.25.pdf') + elif pymupdf.mupdf_version_tuple >= (1, 24, 2): + path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.2.pdf') + else: + path_expected = os.path.abspath( f'{__file__}/../../tests/resources/test_1645_expected_1.24.pdf') + path_out = os.path.abspath( f'{__file__}/../test_1645_out.pdf') + doc = pymupdf.open(path_in) + page = doc[0] + page_bounds = page.bound() + annot_loc = pymupdf.Rect(page_bounds.x0, page_bounds.y0, page_bounds.x0 + 75, page_bounds.y0 + 15) + # Check type of page.derotation_matrix - this is #2911. + assert isinstance(page.derotation_matrix, pymupdf.Matrix), \ + f'Bad type for page.derotation_matrix: {type(page.derotation_matrix)=} {page.derotation_matrix=}.' + page.add_freetext_annot( + annot_loc * page.derotation_matrix, + "TEST", + fontsize=18, + fill_color=pymupdf.utils.getColor("FIREBRICK1"), + rotate=page.rotation, + ) + doc.save(path_out, garbage=1, deflate=True, no_new_id=True) + print(f'Have created {path_out}. comparing with {path_expected}.') + with open( path_out, 'rb') as f: + out = f.read() + with open( path_expected, 'rb') as f: + expected = f.read() + assert out == expected, f'Files differ: {path_out} {path_expected}' + finally: + # Restore annot_stem. + pymupdf.TOOLS.set_annot_stem(annot_stem) def test_1824(): ''' diff --git a/tests/test_toc.py b/tests/test_toc.py index 198fe62ee..bad82c7ef 100644 --- a/tests/test_toc.py +++ b/tests/test_toc.py @@ -187,10 +187,10 @@ def test_3347(): }) links_expected = [ - (0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'jorj-L0'}), - (0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'jorj-L1'}), - (1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'jorj-L0'}), - (1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'jorj-L1'}), + (0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'fitz-L0'}), + (0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'fitz-L1'}), + (1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'fitz-L0'}), + (1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'fitz-L1'}), ] path = os.path.normpath(f'{__file__}/../../tests/test_3347_out.pdf') @@ -264,7 +264,7 @@ def test_3400(): print(f'Saved to {path=}.') links_expected = [ - (1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'jorj-L0'}) + (1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'fitz-L0'}) ] links_actual = list()