Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jules misc post 1.23.9 #3017

Merged
merged 5 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ Change Log
* **Fixed** `2969 <https://github.com/pymupdf/PyMuPDF/issues/2969>`_: annot.next throws AttributeError
* **Fixed** `2978 <https://github.com/pymupdf/PyMuPDF/issues/2978>`_: 1.23.9rc1: module 'fitz.mupdf' has no attribute 'fz_copy_pixmap_rect'

* **Fixed** `2907 <https://github.com/pymupdf/PyMuPDF/issues/2907>`_: segfault trying to call clean_contents on certain pdfs with python 3.12
* **Fixed** `2905 <https://github.com/pymupdf/PyMuPDF/issues/2905>`_: SystemError: <built-in function TextPage_extractIMGINFO> returned a result with an exception set
* **Fixed** `2742 <https://github.com/pymupdf/PyMuPDF/issues/2742>`_: Segmentation Fault when inserting three (but not two) copies of the same source page into one destination page

* Other:

* Add optional setting of opacity to `Page.insert_htmlbox()`.
Expand Down
232 changes: 75 additions & 157 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5911,31 +5911,6 @@ def xref_xml_metadata(self):
xref = mupdf.pdf_to_num( xml)
return xref

if mupdf_version_tuple < (1, 22):
@property
def has_old_style_xrefs(self):
'''
Check if xref table is old style.
'''
if self.is_closed:
raise ValueError("document closed")
pdf = _as_pdf_document(self)
if pdf.m_internal and pdf.m_internal.has_old_style_xrefs:
return True
return False

@property
def has_xref_streams(self):
'''
Check if xref table is a stream.
'''
if self.is_closed:
raise ValueError("document closed")
pdf = _as_pdf_document(self)
if pdf.m_internal and pdf.m_internal.has_xref_streams:
return True
return False

__slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')

outline = property(lambda self: self._outline)
Expand Down Expand Up @@ -7211,51 +7186,48 @@ def _make_PdfFilterOptions(
filter_.instance_forms = instance_forms
filter_.ascii = ascii

if mupdf_version_tuple >= (1, 22):
filter_.no_update = no_update
if sanitize:
# We want to use a PdfFilterFactory whose `.filter` fn pointer is
# set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
# get access to this raw fn in Python; and on Windows raw MuPDF
# functions are not even available to C++.
#
# So we use SWIG Director to implement our own
# PdfFilterFactory whose `filter()` method calls
# `mupdf.ll_pdf_new_sanitize_filter()`.
if sopts:
assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
else:
sopts = mupdf.PdfSanitizeFilterOptions()
class Factory(mupdf.PdfFilterFactory2):
def __init__(self):
super().__init__()
self.use_virtual_filter()
self.sopts = sopts
def filter(self, ctx, doc, chain, struct_parents, transform, options):
if 0:
log(f'sanitize filter.filter():')
log(f' {self=}')
log(f' {ctx=}')
log(f' {doc=}')
log(f' {chain=}')
log(f' {struct_parents=}')
log(f' {transform=}')
log(f' {options=}')
log(f' {self.sopts.internal()=}')
return mupdf.ll_pdf_new_sanitize_filter(
doc,
chain,
struct_parents,
transform,
options,
self.sopts.internal(),
)
filter_.no_update = no_update
if sanitize:
# We want to use a PdfFilterFactory whose `.filter` fn pointer is
# set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
# get access to this raw fn in Python; and on Windows raw MuPDF
# functions are not even available to C++.
#
# So we use SWIG Director to implement our own
# PdfFilterFactory whose `filter()` method calls
# `mupdf.ll_pdf_new_sanitize_filter()`.
if sopts:
assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
else:
sopts = mupdf.PdfSanitizeFilterOptions()
class Factory(mupdf.PdfFilterFactory2):
def __init__(self):
super().__init__()
self.use_virtual_filter()
self.sopts = sopts
def filter(self, ctx, doc, chain, struct_parents, transform, options):
if 0:
log(f'sanitize filter.filter():')
log(f' {self=}')
log(f' {ctx=}')
log(f' {doc=}')
log(f' {chain=}')
log(f' {struct_parents=}')
log(f' {transform=}')
log(f' {options=}')
log(f' {self.sopts.internal()=}')
return mupdf.ll_pdf_new_sanitize_filter(
doc,
chain,
struct_parents,
transform,
options,
self.sopts.internal(),
)

factory = Factory()
filter_.add_factory(factory.internal())
filter_._factory = factory
else:
filter_.sanitize = sanitize
factory = Factory()
filter_.add_factory(factory.internal())
filter_._factory = factory
return filter_


Expand Down Expand Up @@ -7781,13 +7753,13 @@ def _insert_image(self,
arg_pix,
mupdf.FzColorspace(0),
mupdf.FzColorspace(0),
mupdf.FzDefaultColorspaces(0),
mupdf.FzDefaultColorspaces(None),
mupdf.FzColorParams(),
1,
)
pm.alpha = 0
pm.colorspace = None
mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage(0))
mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
#goto have_image()
do_process_stream = 0
Expand Down Expand Up @@ -9446,7 +9418,7 @@ def __init__(self, *args):
spix,
cs,
mupdf.FzColorspace(0),
mupdf.FzDefaultColorspaces(0),
mupdf.FzDefaultColorspaces(None),
mupdf.FzColorParams(),
1
)
Expand Down Expand Up @@ -15893,8 +15865,7 @@ def JM_image_filter(opaque, ctm, name, image):
assert isinstance(ctm, mupdf.FzMatrix)
r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
if mupdf_version_tuple >= (1, 22):
q = mupdf.fz_transform_quad( q, g_img_info_matrix)
q = mupdf.fz_transform_quad( q, g_img_info_matrix)
temp = name, JM_py_from_quad(q)
g_img_info.append(temp)

Expand Down Expand Up @@ -15953,92 +15924,38 @@ def JM_image_profile( imagedata, keep_image):
return result


if mupdf_version_tuple >= (1, 22):

def JM_image_reporter(page):
doc = page.doc()
global g_img_info_matrix
g_img_info_matrix = mupdf.FzMatrix()
mediabox = mupdf.FzRect()
mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)

class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
def __init__(self):
super().__init__()
self.use_virtual_image_filter()
def image_filter(self, ctx, ctm, name, image):
JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)

sanitize_filter_options = SanitizeFilterOptions()

filter_options = _make_PdfFilterOptions(
instance_forms=1,
ascii=1,
no_update=1,
sanitize=1,
sopts=sanitize_filter_options,
)

global g_img_info
g_img_info = []

mupdf.pdf_filter_page_contents( doc, page, filter_options)

rc = tuple(g_img_info)
g_img_info = []
return rc

else:

def JM_filter_content_stream(
doc,
in_stm,
in_res,
transform,
filter_,
struct_parents,
):
'''
Returns (out_buf, out_res).
'''
out_buf = mupdf.FzBuffer( 1024)
proc_buffer = mupdf.pdf_new_buffer_processor( out_buf, filter_.ascii)
if filter_.sanitize:
out_res = mupdf.pdf_new_dict( doc, 1)
proc_filter = mupdf.pdf_new_filter_processor( doc, proc_buffer, in_res, out_res, struct_parents, transform, filter_)
mupdf.pdf_process_contents( proc_filter, doc, in_res, in_stm, mupdf.FzCookie())
mupdf.pdf_close_processor( proc_filter)
else:
out_res = in_res # mupdf.pdf_keep_obj( in_res)
mupdf.pdf_process_contents( proc_buffer, doc, in_res, in_stm, mupdf.FzCookie())
mupdf.pdf_close_processor( proc_buffer)
return out_buf, out_res

def JM_image_reporter(page):
doc = page.doc()
def JM_image_reporter(page):
doc = page.doc()
global g_img_info_matrix
g_img_info_matrix = mupdf.FzMatrix()
mediabox = mupdf.FzRect()
mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)

class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
def __init__(self):
super().__init__()
self.use_virtual_image_filter()
def image_filter(self, ctx, ctm, name, image):
JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)

sanitize_filter_options = SanitizeFilterOptions()

filter_options = _make_PdfFilterOptions(
instance_forms=1,
ascii=1,
no_update=1,
sanitize=1,
sopts=sanitize_filter_options,
)

filter_ = JM_image_reporter_Filter()
global g_img_info
g_img_info = []

filter_._page = page
filter_.recurse = 0
filter_.instance_forms = 1
filter_.sanitize = 1
filter_.ascii = 1
mupdf.pdf_filter_page_contents( doc, page, filter_options)

ctm = mupdf.FzMatrix()
mupdf.pdf_page_transform( page, mupdf.FzRect(0, 0, 0, 0), ctm)
struct_parents_obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('StructParents'))
struct_parents = -1
if mupdf.pdf_is_number( struct_parents_obj):
struct_parents = mupdf.pdf_to_int( struct_parents_obj)

contents = mupdf.pdf_page_contents( page)
old_res = mupdf.pdf_page_resources( page)
global g_img_info
g_img_info = []
buffer_, new_res = JM_filter_content_stream( doc, contents, old_res, ctm, filter_, struct_parents)
rc = tuple( g_img_info)
return rc
rc = tuple(g_img_info)
g_img_info = []
return rc


def JM_fitz_config():
Expand Down Expand Up @@ -21735,6 +21652,7 @@ def _atexit():
Document.set_toc_item = utils.set_toc_item
Document.subset_fonts = utils.subset_fonts
Document.tobytes = Document.write
Document.xref_copy = utils.xref_copy

IRect.get_area = utils.get_area

Expand Down
Binary file added tests/resources/test_delete_image.pdf
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/test_extractimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,10 @@ def test_2348():
print(f'jpeg_extension={jpeg_extension!r} png_extension={png_extension!r}')
assert jpeg_extension == 'jpeg'
assert png_extension == 'png'

def test_delete_image():

doc = fitz.open(os.path.abspath(f'{__file__}/../../tests/resources/test_delete_image.pdf'))
page = doc[0]
xref = page.get_images()[0][0]
page.delete_image(xref)
28 changes: 28 additions & 0 deletions tests/test_font.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,31 @@ def test_fontarchive():
'path': None
}
]

def test_load_system_font():
if not hasattr(fitz, 'mupdf'):
print(f'test_load_system_font(): Not running on classic.')
return
if fitz.mupdf_version_tuple < (1, 24):
print(f'test_load_system_font(): Not running because mupdf version < 1.24.')
return
trace = list()
def font_f(name, bold, italic, needs_exact_metrics):
trace.append((name, bold, italic, needs_exact_metrics))
print(f'font_f(): Looking for font: {name=} {bold=} {italic=} {needs_exact_metrics=}.')
return None
def f_cjk(name, ordering, serif):
trace.append((name, ordering, serif))
print(f'f_cjk(): Looking for font: {name=} {ordering=} {serif=}.')
return None
def f_fallback(script, language, serif, bold, italic):
trace.append((script, language, serif, bold, italic))
print(f'f_fallback(): looking for font: {script=} {language=} {serif=} {bold=} {italic=}.')
return None
fitz.mupdf.fz_install_load_system_font_funcs(font_f, f_cjk, f_fallback)
f = fitz.mupdf.fz_load_system_font("some-font-name", 0, 0, 0)
assert trace == [
('some-font-name', 0, 0, 0),
], f'Incorrect {trace=}.'
print(f'test_load_system_font(): {f.m_internal=}')

Loading