From e9c8504180a565258df8526f71c3a49c8c2b363e Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Wed, 30 Oct 2024 11:43:26 +0000 Subject: [PATCH 1/2] scripts/gh_release.py: restrict testing of linux-aarch64 because of github timeout. Testing on linux aarch64 seems to have got a lot slower recently and gets killed by github after 6h. --- scripts/gh_release.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/gh_release.py b/scripts/gh_release.py index b08d42aef..b9453cd5b 100755 --- a/scripts/gh_release.py +++ b/scripts/gh_release.py @@ -389,7 +389,8 @@ def set_cibuild_test(): env_pass('PYMUPDF_SETUP_PY_LIMITED_API') CIBW_BUILD_old = env_extra.get('CIBW_BUILD') assert CIBW_BUILD_old is not None - env_set('CIBW_BUILD', 'cp39*') + cp = cps.split()[0] + env_set('CIBW_BUILD', cp) log(f'Building single wheel.') run( f'cibuildwheel{platform_arg}', env_extra=env_extra) @@ -413,7 +414,16 @@ def set_cibuild_test(): # env_set('CIBW_REPAIR_WHEEL_COMMAND', '') - log(f'Testing on all python versions using wheels in wheelhouse/.') + if platform.system() == 'Linux' and env_extra.get('CIBW_ARCHS_LINUX') == 'aarch64': + log(f'Testing all Python versions on linux-aarch64 is too slow and is killed by github after 6h.') + log(f'Testing on restricted python versions using wheels in wheelhouse/.') + # Testing only on first and last python versions. + cp1 = cps.split()[0] + cp2 = cps.split()[-1] + cp = cp1 if cp1 == cp2 else f'{cp1} {cp2}' + env_set('CIBW_BUILD', cp) + else: + log(f'Testing on all python versions using wheels in wheelhouse/.') run( f'cibuildwheel{platform_arg}', env_extra=env_extra) elif inputs_flavours: From c690d35b4557f5c7f82a8a2bd198e22dccb40d46 Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Wed, 30 Oct 2024 16:34:32 +0000 Subject: [PATCH 2/2] src/__init__.py tests/: address #4004 - avoid segv when trying to get page from annot. The fix requires MuPDF >= 1.25, specifically this MuPDF commit: When annotation is deleted from page, remove link from annotation to page. --- src/__init__.py | 42 +++++++++++++++++++++---------- tests/resources/test_4004.pdf | Bin 0 -> 6433 bytes tests/test_annots.py | 19 ++++++++++++++ tests/test_widgets.py | 46 ++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+), 13 deletions(-) create mode 100644 tests/resources/test_4004.pdf diff --git a/src/__init__.py b/src/__init__.py index f1f2ab989..f34751288 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -514,6 +514,21 @@ def _as_pdf_page(page, required=True): assert 0, f'Unrecognised {type(page)=}' +def _pdf_annot_page(annot): + ''' + Wrapper for mupdf.pdf_annot_page() which raises an exception if + is not bound to a page instead of returning a mupdf.PdfPage with + `.m_internal=None`. + + [Some other MuPDF functions such as pdf_update_annot()` already raise a + similar exception if a pdf_annot's .page field is null.] + ''' + page = mupdf.pdf_annot_page(annot) + if not page.m_internal: + raise RuntimeError('Annot is not bound to a page') + return page + + # Fixme: we don't support JM_MEMORY=1. JM_MEMORY = 0 @@ -599,7 +614,7 @@ def _setAP(self, buffer_, rect=0): try: annot = self.this annot_obj = mupdf.pdf_annot_obj( annot) - page = mupdf.pdf_annot_page( annot) + page = _pdf_annot_page(annot) apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N')) if not apobj.m_internal: raise RuntimeError( MSG_BAD_APN) @@ -619,7 +634,7 @@ def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotat annot = self.this assert annot.m_internal annot_obj = mupdf.pdf_annot_obj( annot) - page = mupdf.pdf_annot_page( annot) + page = _pdf_annot_page(annot) pdf = page.doc() type_ = mupdf.pdf_annot_type( annot) nfcol, fcol = JM_color_FromSequence(fill_color) @@ -830,7 +845,7 @@ def delete_responses(self): CheckParent(self) annot = self.this annot_obj = mupdf.pdf_annot_obj(annot) - page = mupdf.pdf_annot_page(annot) + page = _pdf_annot_page(annot) while 1: irt_annot = JM_find_annot_irt(annot) if not irt_annot.m_internal: @@ -942,7 +957,7 @@ def get_parent(self): try: ret = getattr( self, 'parent') except AttributeError: - page = mupdf.pdf_annot_page(self.this) + page = _pdf_annot_page(self.this) assert isinstance( page, mupdf.PdfPage) document = Document( page.doc()) if page.m_internal else None ret = Page(page, document) @@ -1374,7 +1389,7 @@ def set_irt_xref(self, xref): ''' annot = self.this annot_obj = mupdf.pdf_annot_obj( annot) - page = mupdf.pdf_annot_page( annot) + page = _pdf_annot_page(annot) if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()): raise ValueError( MSG_BAD_XREF) irt = mupdf.pdf_new_indirect( page.doc(), xref, 0) @@ -1429,7 +1444,7 @@ def set_opacity(self, opacity): return mupdf.pdf_set_annot_opacity(annot, opacity) if opacity < 1.0: - page = mupdf.pdf_annot_page(annot) + page = _pdf_annot_page(annot) page.transparency = 1 def set_open(self, is_open): @@ -1444,7 +1459,7 @@ def set_popup(self, rect): ''' CheckParent(self) annot = self.this - pdfpage = mupdf.pdf_annot_page( annot) + pdfpage = _pdf_annot_page(annot) rot = JM_rotate_page_matrix(pdfpage) r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot) mupdf.pdf_set_annot_popup(annot, r) @@ -1454,7 +1469,7 @@ def set_rect(self, rect): CheckParent(self) annot = self.this - pdfpage = mupdf.pdf_annot_page(annot) + pdfpage = _pdf_annot_page(annot) rot = JM_rotate_page_matrix(pdfpage) r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot) if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r): @@ -1850,7 +1865,7 @@ def vertices(self): annot = self.this assert isinstance(annot, mupdf.PdfAnnot) annot_obj = mupdf.pdf_annot_obj(annot) - page = mupdf.pdf_annot_page(annot) + page = _pdf_annot_page(annot) page_ctm = mupdf.FzMatrix() # page transformation matrix dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform(). mupdf.pdf_page_transform(page, dummy, page_ctm) @@ -14470,7 +14485,7 @@ def JM_add_annot_id(annot, stem): Append a number to 'stem' such that the result is a unique name. ''' assert isinstance(annot, mupdf.PdfAnnot) - page = mupdf.pdf_annot_page( annot) + page = _pdf_annot_page(annot) annot_obj = mupdf.pdf_annot_obj( annot) names = JM_get_annot_id_list(page) i = 0 @@ -15315,7 +15330,7 @@ def JM_find_annot_irt(annot): annot_obj = mupdf.pdf_annot_obj(annot) found = 0 # loop thru MuPDF's internal annots array - page = mupdf.pdf_annot_page(annot) + page = _pdf_annot_page(annot) irt_annot = mupdf.pdf_first_annot(page) while 1: assert isinstance(irt_annot, mupdf.PdfAnnot) @@ -15781,7 +15796,7 @@ def JM_get_widget_properties(annot, Widget): #log( '{type(annot)=}') annot_obj = mupdf.pdf_annot_obj(annot.this) #log( 'Have called mupdf.pdf_annot_obj()') - page = mupdf.pdf_annot_page(annot.this) + page = _pdf_annot_page(annot.this) pdf = page.doc() tw = annot @@ -17596,7 +17611,8 @@ def JM_set_widget_properties(annot, Widget): if isinstance( annot, Annot): annot = annot.this assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}' - page = mupdf.pdf_annot_page(annot) + page = _pdf_annot_page(annot) + assert page.m_internal, 'Annot is not bound to a page' annot_obj = mupdf.pdf_annot_obj(annot) pdf = page.doc() def GETATTR(name): diff --git a/tests/resources/test_4004.pdf b/tests/resources/test_4004.pdf new file mode 100644 index 0000000000000000000000000000000000000000..772f5124d11976aa22e3b5df4696e769f7659955 GIT binary patch literal 6433 zcmeHMX;@U{9%mD1gj6gQ$vlp+U70z{oLPktm_`Xh6^MchhL)LD8sU|)a--!v!!R=jbp6nW`@!=(%=5nI{jb0G z_b%s8c!pBVL;0fM@GDnOoeAckLI{B<-Mrw~SUA;aA+@w|KBhBn_)gT88XZfs>Vau}V~wph^^|a1}1X6$vUyf?6dI zDiak62_jr1RLWzOBB4SgL8JmTs>W4`5{XohC=#OyDg~-i3DuZc0?dL_NlO7!2nkU(no>4V37kby49L8Z%|js!LIDIe zh9{s)4aPhsY#@{%6cP7jA{K-B6G`A~NFafy zLlXRfIP>%KwfXt$2``^r-vzzCq{{o5=C&m2FE~_C4}@c zfsNQBIEg#BL{ICekc|LyxMtyCb7R+8FX_Ni(`Xsm2#D#7m6i5r&xSo>00gCoY?8@? z!Ey%dgR@I?49iN^u#=zZx_KnQxM#eW!(xI$SyMR}WapruiQ*C#DVpTiIGLOQW>MbS> zRVVJA6nCkJZGDAuQGb#`j@vGk$uVgfB zZ-4Y09CXsuIm?I}kN)x=yX4?t<9%SGe-1X5i)8H2K_Hy7^&icoaVp4{5;+0X&F`p!# zvoXhqUgrP|wQt9h~fHL0}jUcY_1WJ3SM z*k$j;VerHxeTj)2C(WQNTDYObauEsoXzuZJq%pZ|SNsSlFD|CO)vIVhVdc2OszWa{ zO|M;R>NvA@)X4L@XLqgZ`7LRc?m7M{RpvjBx70>$RJ}XvNxQu})OS&dVg;pN`^1Zh zuls*}AoN1!!Kml!eSh9?wEN}GGTHFh?{}U^AJfrd*mnKNVJ&f2g^f_xq)X?2BX4TX ztlOQCdFS0D%PySxNBk>x->9q4c7{z`9DjIa%7~5EPq*$!fA9Dc4F@eX;_l}{cGPWN zb&hu=a&+|m=B(lc!FxZN_#oK*!hb3RQuRn$C}2ao!)xuKNIBpL(AS2Puu_I*PqudU)^0be)HJw zvzzNHCj`gUem4Bzw!p_lLpD9zb0hJ}($H%YN}t*ilM@~CfU49mb+=;smZf0dPT*gj46monYlG9|7qVJzPN??hrH~!D>QW^ zsU(zyVXVNo@*^Zk8c9de;+GD;oxkINbaUGTMUqd1_nDy7k;UutPbeg_Kl|XTmzcXh zn?=8Y1s>6KVxb-}h-1Lsj|pHz1fwVq zmC7(w299m#!dYvDZkS9VKZaXrq-eQWl7{tJP5@OdOZXI&wYE#LQNgBa4miNRm%bX1Gji zg~6c+52U0gTsqbg+T;jLPr#&!G_!XCpob2q2QbxWT&e@QGn)t0w0hw2fF2kC?gRrW zY4wE^VRr-hfFgTW(rK9lRwkFCiwq{p-kY#vK6+t_1rf_Ml-_74SAu(p984q5LlGV( z%|V4Sp+JU8d5BnsAhGZtB;8OuY$(8;72K}}uz>s&Y>ui+=&7NZaEN!^96hiK_$*?Jmox0=n0tt+YHPo>Xs;@^BPKcv1qD@bpr#PK4<7r~%hIK-FkG zb^F;(wty?Vg&_OvVnUrnC(#l-ok5TDL?RsJNyHMACm^K;y;P@_2n|G^T@PIkyC4%8 zfJ0wGko2JVJRszu?4+Cw1O@1V#=;N27(lX=q-_8|Iao7p>Asgv`B<1;#U87=cgt|R z1$zeAuVB}P<-uOpQvd54?nM52+b)ZboVcKPn8qWx9tI_s%pe}ol;g(66$l2Q`Q=|Vb!UH(|$2%)?M!nh@@XW5=qb_!MgU`EbMME$359ZGc zt-ZxBS1>5ztd9>CO=fR2{1i>hCnrwXl4C$+r49pEcW}vk3&({*T-Yu|a>Yo-`4z7> z!KbaM{HFAjn9RVard$MT6SNC3k_>YLA(*8SZzu&+B literal 0 HcmV?d00001 diff --git a/tests/test_annots.py b/tests/test_annots.py index 8fe7b54d4..6c579cc33 100644 --- a/tests/test_annots.py +++ b/tests/test_annots.py @@ -439,3 +439,22 @@ def test_3758(): page.apply_redactions() wt = pymupdf.TOOLS.mupdf_warnings() assert wt + + +def test_parent(): + """Test invalidating parent on page re-assignment.""" + doc = pymupdf.open() + page = doc.new_page() + a = page.add_highlight_annot(page.rect) # insert annotation on page 0 + page = doc.new_page() # make a new page, should orphanate annotation + try: + print(a) # should raise + except Exception as e: + if pymupdf.mupdf_version_tuple >= (1, 25): + assert isinstance(e, pymupdf.mupdf.FzErrorArgument) + assert str(e) == 'code=4: annotation not bound to any page' + else: + assert isinstance(e, ReferenceError) + assert str(e) == 'weakly-referenced object no longer exists' + else: + assert 0, f'Failed to get expected exception.' diff --git a/tests/test_widgets.py b/tests/test_widgets.py index d14ee8ccb..d1652b23f 100644 --- a/tests/test_widgets.py +++ b/tests/test_widgets.py @@ -333,3 +333,49 @@ def test_3950(): '{{ policy_period_end_date }}', '{{ insurance_line }}', ] + + +def test_4004(): + if pymupdf.mupdf_version_tuple < (1, 25): + print(f'test_4004(): not running because requires MuPDF >= 1.25.') + return + + import collections + + def get_widgets_by_name(doc): + """ + Extracts and returns a dictionary of widgets indexed by their names. + """ + widgets_by_name = collections.defaultdict(list) + for page_num in range(len(doc)): + page = doc.load_page(page_num) + for field in page.widgets(): + widgets_by_name[field.field_name].append({ + "page_num": page_num, + "widget": field + }) + return widgets_by_name + + # Open document and get widgets + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4004.pdf') + doc = pymupdf.open(path) + widgets_by_name = get_widgets_by_name(doc) + + # Print widget information + for name, widgets in widgets_by_name.items(): + print(f"Widget Name: {name}") + for entry in widgets: + widget = entry["widget"] + page_num = entry["page_num"] + print(f" Page: {page_num + 1}, Type: {widget.field_type}, Value: {widget.field_value}, Rect: {widget.rect}") + + # Attempt to update field value + w = widgets_by_name["Text1"][0] + field = w['widget'] + field.value = "1234567890" + try: + field.update() + except Exception as e: + assert str(e) == 'Annot is not bound to a page' + + doc.close()