diff --git a/README.md b/README.md index 8a05961c1..0cfae89b1 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# PyMuPDF 1.19.4 +# PyMuPDF 1.19.5 ![logo](https://github.com/pymupdf/PyMuPDF/blob/master/demo/pymupdf.jpg) -Release date: January 01, 2022 +Release date: February 01, 2022 On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [![Downloads](https://static.pepy.tech/personalized-badge/pymupdf?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads)](https://pepy.tech/project/pymupdf) @@ -11,7 +11,7 @@ On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [![Downloads] # Introduction -PyMuPDF (current version 1.19.4) is a Python binding with support for [MuPDF](https://mupdf.com/) (current version 1.19.*), a lightweight PDF, XPS, and E-book viewer, renderer, and toolkit, which is maintained and developed by Artifex Software, Inc. +PyMuPDF (current version 1.19.5) is a Python binding with support for [MuPDF](https://mupdf.com/) (current version 1.19.*), a lightweight PDF, XPS, and E-book viewer, renderer, and toolkit, which is maintained and developed by Artifex Software, Inc. MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB and FB2 (e-books) formats, and it is known for its top performance and high rendering quality. @@ -70,7 +70,7 @@ The latest changelog can be viewed [here](https://pymupdf.readthedocs.io/en/late PyMuPDF **requires Python 3.6 or later**. -Python wheels exist for **Windows** (32bit and 64bit), **Linux** (64bit, Intel and ARM) and **Mac OSX** (64bit, Intel only), so it can be installed from [PyPI](https://pypi.org/search/?q=pymupdf) in the usual way: +For versions 3.7 and up, Python wheels exist for **Windows** (32bit and 64bit), **Linux** (64bit, Intel and ARM) and **Mac OSX** (64bit, Intel only), so it can be installed from [PyPI](https://pypi.org/search/?q=pymupdf) in the usual way: ``` python -m pip install --upgrade pip @@ -87,11 +87,9 @@ There are **no mandatory** external dependencies. However, some **optional featu Older wheels - also with support for older Python versions - can be found [here](https://github.com/pymupdf/PyMuPDF-Optional-Material/tree/master/wheels-upto-Py3.5>) and on PyPI. -> Starting with v1.18.15, to minimize network traffic we no longer redundantly store wheels in this repository's `releases` folder. You can find older versions back to v1.9.2 on [PyPI](https://pypi.org/project/PyMuPDF/#history). Sources for every release continue to be stored in [here](https://github.com/pymupdf/PyMuPDF/releases). - Other platforms **require installation from sources**, follow [these](https://pymupdf.readthedocs.io/en/latest/installation.html) instructions in the documentation. -> **Note:** If `pip` cannot find a wheel that is compatible with your platform, it will automatically start an installation from sources - **_which will fail_** if MuPDF is not installed on your system. +> **Note:** If `pip` cannot find a wheel that is compatible with your platform, it will automatically try an installation from sources - **_which will fail_** if MuPDF (including its sources) is not installed on your system. This repo's folder [installation](https://github.com/pymupdf/PyMuPDF/tree/master/installation) contains several platform-specific source installation scripts contributed by users. You may also find the following Wiki pages useful: diff --git a/changes.txt b/changes.txt index 8dc7c8ca0..84c73fae6 100644 --- a/changes.txt +++ b/changes.txt @@ -1,6 +1,31 @@ Change Log =========== +------ + +**Changes in Version 1.19.5** + +* **Fixed** `#1518 `_. A limited "fix": in some cases, rectangles and quadrupels were not correctly encoded to support re-drawing by :ref:`Shape`. + +* **Fixed** `#1521 `_. This had the same ultimate reason behind issue #1510. + +* **Fixed** `#1513 `_. Some Optional Content functions did not support non-ASCII characters. + +* **Fixed** `#1510 `_. Support more soft-mask image subtypes. + +* **Fixed** `#1507 `_. Immunize against items in the outlines chain, that are ``"null"`` objects. + +* **Fixed** re-opened `#1417 `_. ("too many open files"). This was due to insufficient calls to MuPDF's ``fz_drop_document()``. This also fixes `#1550 `_. + +* **Fixed** several undocumented issues in relation to incorrectly setting the text span origin :data:`point_like`. + +* **Fixed** undocumented error computing the character bbox in method :meth:`Page.get_texttrace` when text is **flipped** (as opposed to just rotated). + +* **Added** items to the dictionary returned by :meth:`image_properties`: ``orientation`` and ``transform`` report the natural image orientation (EXIF data). + +* **Added** method :meth:`Document.xref_copy`. It will make a given target PDF object an exact copy of a source object. + + ------ **Changes in Version 1.19.4** diff --git a/fitz/__init__.py b/fitz/__init__.py index 75e130416..2d6254db3 100644 --- a/fitz/__init__.py +++ b/fitz/__init__.py @@ -77,6 +77,7 @@ fitz.Document.subset_fonts = fitz.utils.subset_fonts fitz.Document.get_oc = fitz.utils.get_oc fitz.Document.set_oc = fitz.utils.set_oc +fitz.Document.xref_copy = fitz.utils.xref_copy # ------------------------------------------------------------------------------ @@ -429,7 +430,7 @@ def deprecated_function(*args, **kw): _alias(fitz, "PaperSize", "paper_size") _alias(fitz, "PaperRect", "paper_rect") _alias(fitz, "paperSizes", "paper_sizes") - _alias(fitz, "ImageProperties", "image_properties") + _alias(fitz, "ImageProperties", "image_profile") _alias(fitz, "planishLine", "planish_line") _alias(fitz, "getTextLength", "get_text_length") _alias(fitz, "getTextlength", "get_text_length") @@ -449,4 +450,5 @@ def deprecated_function(*args, **kw): 64 if sys.maxsize > 2 ** 32 else 32, ) -restore_aliases() +if VersionBind.startswith("1.19"): # don't generate aliases after this + restore_aliases() diff --git a/fitz/fitz.i b/fitz/fitz.i index 3b5f43f3e..428150512 100644 --- a/fitz/fitz.i +++ b/fitz/fitz.i @@ -282,6 +282,9 @@ struct Document { DEBUGMSG1("Document"); fz_document *this_doc = (fz_document *) $self; + while (this_doc->refs > 1) { + fz_drop_document(gctx, this_doc); + } fz_drop_document(gctx, this_doc); DEBUGMSG2; } @@ -764,8 +767,11 @@ struct Document if (!first) goto finished; xrefs = PyList_New(0); // pre-allocate an empty list xrefs = JM_outline_xrefs(gctx, first, xrefs); - Py_ssize_t i, n = PySequence_Size(xrefs); + Py_ssize_t i, n = PySequence_Size(xrefs), m = PySequence_Size(items); if (!n) goto finished; + if (n != m) { + THROWMSG(gctx, "internal error finding outline xrefs"); + } int xref; // update all TOC item dictionaries @@ -3832,9 +3838,9 @@ if basestate: case (2): type = "radiobox"; break; default: type = "label"; break; } - PyObject *item = Py_BuildValue("{s:i,s:s,s:i,s:s,s:O,s:O}", + PyObject *item = Py_BuildValue("{s:i,s:N,s:i,s:s,s:N,s:N}", "number", i, - "text", info.text, + "text", JM_EscapeStrFromStr(info.text), "depth", info.depth, "type", type, "on", JM_BOOL(info.selected), @@ -6221,7 +6227,7 @@ def get_oc_items(self) -> list: if rect == None: return self.cropbox mb = self.mediabox - return Rect(rect[0], mb.y1 - rect[1], rect[2], mb.y1 - rect[3]) + return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) @property def trimbox(self): @@ -6230,7 +6236,7 @@ def get_oc_items(self) -> list: if rect == None: return self.cropbox mb = self.mediabox - return Rect(rect[0], mb.y1 - rect[1], rect[2], mb.y1 - rect[3]) + return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) @property def bleedbox(self): @@ -6239,7 +6245,7 @@ def get_oc_items(self) -> list: if rect == None: return self.cropbox mb = self.mediabox - return Rect(rect[0], mb.y1 - rect[1], rect[2], mb.y1 - rect[3]) + return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1]) def _set_pagebox(self, boxtype, rect): doc = self.parent @@ -6713,7 +6719,18 @@ def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None, del pymupdf_fonts # install the font for the page - val = self._insertFont(fontname, bfname, fontfile, fontbuffer, set_simple, idx, + if fontfile != None: + if type(fontfile) is str: + fontfile_str = fontfile + elif hasattr(fontfile, "absolute"): + fontfile_str = str(fontfile) + elif hasattr(fontfile, "name"): + fontfile_str = fontfile.name + else: + raise ValueError("bad fontfile") + else: + fontfile_str = None + val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx, wmode, serif, encoding, CJK_number) if not val: # did not work, error return @@ -7111,6 +7128,9 @@ Pixmap(PDFdoc, xref) - from an image xref in a PDF document. fz_pixmap *src_pix = (fz_pixmap *) spix; fz_try(gctx) { fz_irect bbox = JM_irect_from_py(clip); + if (clip != Py_None && (fz_is_infinite_irect(bbox) || fz_is_empty_irect(bbox))) { + THROWMSG(gctx, "bad clip parameter"); + } if (!fz_is_infinite_irect(bbox)) { pm = fz_scale_pixmap(gctx, src_pix, src_pix->x, src_pix->y, w, h, &bbox); } else { @@ -7266,7 +7286,9 @@ Pixmap(PDFdoc, xref) - from an image xref in a PDF document. THROWMSG(gctx, "bad xref"); ref = pdf_new_indirect(gctx, pdf, xref, 0); type = pdf_dict_get(gctx, ref, PDF_NAME(Subtype)); - if (!pdf_name_eq(gctx, type, PDF_NAME(Image))) + if (!pdf_name_eq(gctx, type, PDF_NAME(Image)) && + !pdf_name_eq(gctx, type, PDF_NAME(Alpha)) && + !pdf_name_eq(gctx, type, PDF_NAME(Luminosity))) THROWMSG(gctx, "not an image"); img = pdf_load_image(gctx, pdf, ref); pix = fz_get_pixmap_from_image(gctx, img, NULL, NULL, NULL, NULL); @@ -8159,12 +8181,14 @@ Includes alpha byte if applicable.""") return self def __exit__(self, *args): - self.__swig_destroy__(self) + if getattr(self, "thisown", False): + self.__swig_destroy__(self) def __del__(self): if not type(self) is Pixmap: return - self.__swig_destroy__(self) + if getattr(self, "thisown", False): + self.__swig_destroy__(self) %} } @@ -8440,7 +8464,8 @@ struct Annot ~Annot() { DEBUGMSG1("Annot"); - pdf_drop_annot(gctx, (pdf_annot *) $self); + pdf_annot *this_annot = (pdf_annot *) $self; + pdf_drop_annot(gctx, this_annot); DEBUGMSG2; } //---------------------------------------------------------------- @@ -10313,7 +10338,8 @@ struct Link %extend { ~Link() { DEBUGMSG1("Link"); - fz_drop_link(gctx, (fz_link *) $self); + fz_link *this_link = (fz_link *) $self; + fz_drop_link(gctx, this_link); DEBUGMSG2; } @@ -10530,7 +10556,8 @@ struct DisplayList { { ~DisplayList() { DEBUGMSG1("DisplayList"); - fz_drop_display_list(gctx, (fz_display_list *) $self); + fz_display_list *this_dl = (fz_display_list *) $self; + fz_drop_display_list(gctx, this_dl); DEBUGMSG2; } FITZEXCEPTION(DisplayList, !result) @@ -10618,7 +10645,8 @@ struct DisplayList { def __del__(self): if not type(self) is DisplayList: return - self.__swig_destroy__(self) + if getattr(self, "thisown", False): + self.__swig_destroy__(self) %} } }; @@ -10631,7 +10659,8 @@ struct TextPage { ~TextPage() { DEBUGMSG1("TextPage"); - fz_drop_stext_page(gctx, (fz_stext_page *) $self); + fz_stext_page *this_tp = (fz_stext_page *) $self; + fz_drop_stext_page(gctx, this_tp); DEBUGMSG2; } @@ -11146,8 +11175,10 @@ struct TextPage { return val def __del__(self): - if not type(self) is TextPage: return - self.__swig_destroy__(self) + if not type(self) is TextPage: + return + if getattr(self, "thisown", False): + self.__swig_destroy__(self) %} } }; @@ -11162,7 +11193,8 @@ struct Graftmap ~Graftmap() { DEBUGMSG1("Graftmap"); - pdf_drop_graft_map(gctx, (pdf_graft_map *) $self); + pdf_graft_map *this_gm = (pdf_graft_map *) $self; + pdf_drop_graft_map(gctx, this_gm); DEBUGMSG2; } @@ -11183,8 +11215,10 @@ struct Graftmap %pythoncode %{ def __del__(self): - if not type(self) is Graftmap: return - self.__swig_destroy__(self) + if not type(self) is Graftmap: + return + if getattr(self, "thisown", False): + self.__swig_destroy__(self) %} } }; @@ -11199,7 +11233,8 @@ struct TextWriter ~TextWriter() { DEBUGMSG1("TextWriter"); - fz_drop_text(gctx, (fz_text *) $self); + fz_text *this_tw = (fz_text *) $self; + fz_drop_text(gctx, this_tw); DEBUGMSG2; } @@ -11488,8 +11523,10 @@ struct TextWriter } %pythoncode %{ def __del__(self): - if not type(self) is TextWriter: return - self.__swig_destroy__(self) + if not type(self) is TextWriter: + return + if getattr(self, "thisown", False): + self.__swig_destroy__(self) %} } }; @@ -11505,7 +11542,8 @@ struct Font ~Font() { DEBUGMSG1("Font"); - fz_drop_font(gctx, (fz_font *) $self); + fz_font *this_font = (fz_font *) $self; + fz_drop_font(gctx, this_font); DEBUGMSG2; } @@ -11833,8 +11871,10 @@ struct Font return "Font('%s')" % self.name def __del__(self): - if not type(self) is Font: return - self.__swig_destroy__(self) + if not type(self) is Font: + return + if getattr(self, "thisown", False): + self.__swig_destroy__(self) %} } }; @@ -12846,8 +12886,10 @@ def _le_rclosedarrow(self, annot, p1, p2, lr, fill_color): return ap def __del__(self): - if not type(self) is Tools: return - self.__swig_destroy__(self) + if not type(self) is Tools: + return + if getattr(self, "thisown", False): + self.__swig_destroy__(self) %} } }; diff --git a/fitz/helper-devices.i b/fitz/helper-devices.i index a9aae8d8d..7ff5bc76f 100644 --- a/fitz/helper-devices.i +++ b/fitz/helper-devices.i @@ -25,6 +25,9 @@ static fz_point dev_lastpoint = {0, 0}; static fz_rect dev_pathrect; static float dev_pathfactor = 0; static int dev_linecount = 0; +static int path_type = 0; +#define FILL_PATH 1 +#define STROKE_PATH 2 static void @@ -103,9 +106,9 @@ jm_checkquad() PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("qu")); /* relationship of float array to quad points: - (0, 1) = ul, (2, 3) = ur, (6, 7) = ll, (4, 5) = lr + (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr */ - fz_quad q = fz_make_quad(f[0], f[1], f[2], f[3], f[6], f[7], f[4], f[5]); + fz_quad q = fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5]); PyTuple_SET_ITEM(rect, 1, JM_py_from_quad(q)); finish:; PyList_SetItem(items, len - 4, rect); // replace item -4 by rect @@ -155,13 +158,13 @@ jm_checkrect() --------------------------------------------------------------------- */ if (ll.y != lr.y) { // not horizontal - goto make_quad; + goto drop_out; } if (lr.x != ur.x) { // not vertical - goto make_quad; + goto drop_out; } if (ur.y != ul.y) { // not horizontal - goto make_quad; + goto drop_out; } // we have a rect, determine orientation if (ll.x < lr.x) { // move left to right @@ -186,16 +189,9 @@ jm_checkrect() PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re")); PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r)); PyTuple_SET_ITEM(rect, 2, PyLong_FromLong(orientation)); - goto finish; - - make_quad:; - rect = PyTuple_New(2); - PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("qu")); - fz_quad q = fz_make_quad(ul.x, ul.y, ur.x, ur.y, ll.x, ll.y, lr.x, lr.y); - PyTuple_SET_ITEM(rect, 1, JM_py_from_quad(q)); - finish:; PyList_SetItem(items, len - 3, rect); // replace item -3 by rect PyList_SetSlice(items, len - 2, len, NULL); // delete remaining 2 items + drop_out:; return 1; } @@ -235,7 +231,7 @@ trace_lineto(fz_context *ctx, void *dev_, float x, float y) PyObject *items = PyDict_GetItem(dev_pathdict, dictkey_items); LIST_APPEND_DROP(items, list); dev_linecount += 1; // counts consecutive lines - if (dev_linecount >= 4) { // shrink to "re" or "qu" item + if (dev_linecount >= 4 && path_type != FILL_PATH) { // shrink to "re" or "qu" item jm_checkquad(); } } @@ -350,7 +346,7 @@ jm_tracedraw_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path, jm_tracedraw_device *dev = (jm_tracedraw_device *) dev_; PyObject *out = dev->out; trace_device_ctm = ctm; //fz_concat(ctm, trace_device_ptm); - + path_type = FILL_PATH; jm_tracedraw_path(ctx, dev, path); if (!dev_pathdict) { return; @@ -380,6 +376,7 @@ jm_tracedraw_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path, dev_pathfactor = fz_abs(ctm.a); } trace_device_ctm = ctm; // fz_concat(ctm, trace_device_ptm); + path_type = STROKE_PATH; jm_tracedraw_path(ctx, dev, path); if (!dev_pathdict) { @@ -436,14 +433,11 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, float x0, y0, x1, y1; asc = (double) JM_font_ascender(ctx, span->font); dsc = (double) JM_font_descender(ctx, span->font); - if ((asc - dsc) >= 1 && small_glyph_heights == 0) { - ; - } else { - if (asc < 1e-3) { - dsc = -0.1; - asc = 0.9; - } + if (asc < 1e-3) { // probably Tesseract font + dsc = -0.1; + asc = 0.9; } + double ascsize = asc * fsize / (asc - dsc); double dscsize = dsc * fsize / (asc - dsc); int fflags = 0; @@ -468,6 +462,10 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_rect span_bbox; dir = fz_normalize_vector(dir); fz_matrix rot = fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0); + if (dir.x == -1) { // left-right flip + rot.d = 1; + } + for (i = 0; i < span->len; i++) { adv = 0; if (span->items[i].gid >= 0) { @@ -486,8 +484,13 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, m1 = fz_concat(m1, fz_make_matrix(1, 0, 0, 1, char_orig.x, char_orig.y)); x0 = char_orig.x; x1 = x0 + adv; - y0 = char_orig.y - ascsize; - y1 = char_orig.y - dscsize; + if (dir.x == 1 && span->trm.d < 0) { // up-down flip + y0 = char_orig.y + dscsize; + y1 = char_orig.y + ascsize; + } else { + y0 = char_orig.y - ascsize; + y1 = char_orig.y - dscsize; + } fz_rect char_bbox = fz_make_rect(x0, y0, x1, y1); char_bbox = fz_transform_rect(char_bbox, m1); PyTuple_SET_ITEM(chars, (Py_ssize_t) i, Py_BuildValue("ii(ff)(ffff)", @@ -518,7 +521,8 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, DICT_SETITEM_DROP(span_dict, dictkey_font, Py_BuildValue("s",fontname)); DICT_SETITEM_DROP(span_dict, dictkey_wmode, PyLong_FromLong((long) span->wmode)); DICT_SETITEM_DROP(span_dict, dictkey_flags, PyLong_FromLong((long) fflags)); - DICT_SETITEMSTR_DROP(span_dict, "bidi", PyLong_FromLong((long) span->bidi_level)); + DICT_SETITEMSTR_DROP(span_dict, "bidi_lvl", PyLong_FromLong((long) span->bidi_level)); + DICT_SETITEMSTR_DROP(span_dict, "bidi_dir", PyLong_FromLong((long) span->markup_dir)); DICT_SETITEM_DROP(span_dict, dictkey_ascender, PyFloat_FromDouble(asc)); DICT_SETITEM_DROP(span_dict, dictkey_descender, PyFloat_FromDouble(dsc)); if (colorspace) { diff --git a/fitz/helper-geo-c.i b/fitz/helper-geo-c.i index 283b9e79f..948d6c01a 100644 --- a/fitz/helper-geo-c.i +++ b/fitz/helper-geo-c.i @@ -98,7 +98,7 @@ JM_py_from_rect(fz_rect r) static fz_irect JM_irect_from_py(PyObject *r) { - if (!PySequence_Check(r) || PySequence_Size(r) != 4) + if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4) return fz_infinite_irect; int x[4]; Py_ssize_t i; diff --git a/fitz/helper-other.i b/fitz/helper-other.i index 82a49f1b3..228b7128b 100644 --- a/fitz/helper-other.i +++ b/fitz/helper-other.i @@ -714,16 +714,20 @@ PyObject *JM_outline_xrefs(fz_context *ctx, pdf_obj *obj, PyObject *xrefs) thisobj = obj; while (thisobj) { newxref = PyLong_FromLong((long) pdf_to_num(ctx, thisobj)); - if (PySequence_Contains(xrefs, newxref)) { + if (PySequence_Contains(xrefs, newxref) || + pdf_dict_get(ctx, thisobj, PDF_NAME(Type))) { + // circular ref or top of chain: terminate Py_DECREF(newxref); break; } LIST_APPEND_DROP(xrefs, newxref); first = pdf_dict_get(ctx, thisobj, PDF_NAME(First)); // try go down - if (first) xrefs = JM_outline_xrefs(ctx, first, xrefs); + if (pdf_is_dict(ctx, first)) xrefs = JM_outline_xrefs(ctx, first, xrefs); thisobj = pdf_dict_get(ctx, thisobj, PDF_NAME(Next)); // try go next parent = pdf_dict_get(ctx, thisobj, PDF_NAME(Parent)); // get parent - if (!thisobj) thisobj = parent; // goto parent if no next + if (!pdf_is_dict(ctx, thisobj)) { + thisobj = parent; + } } return xrefs; } diff --git a/fitz/helper-pixmap.i b/fitz/helper-pixmap.i index c5e043d38..9381b8481 100644 --- a/fitz/helper-pixmap.i +++ b/fitz/helper-pixmap.i @@ -178,14 +178,20 @@ PyObject *JM_image_profile(fz_context *ctx, PyObject *imagedata, int keep_image) res = fz_new_buffer_from_shared_data(ctx, c, (size_t) len); } image = fz_new_image_from_buffer(ctx, res); - int xres, yres; + int xres, yres, orientation; + fz_matrix ctm = fz_image_orientation_matrix(ctx, image); fz_image_resolution(image, &xres, &yres); + orientation = (int) fz_image_orientation(ctx, image); const char *cs_name = fz_colorspace_name(ctx, image->colorspace); result = PyDict_New(); DICT_SETITEM_DROP(result, dictkey_width, Py_BuildValue("i", image->w)); DICT_SETITEM_DROP(result, dictkey_height, Py_BuildValue("i", image->h)); + DICT_SETITEMSTR_DROP(result, "orientation", + Py_BuildValue("i", orientation)); + DICT_SETITEM_DROP(result, dictkey_matrix, + JM_py_from_matrix(ctm)); DICT_SETITEM_DROP(result, dictkey_xres, Py_BuildValue("i", xres)); DICT_SETITEM_DROP(result, dictkey_yres, diff --git a/fitz/helper-python.i b/fitz/helper-python.i index e0cdecff7..a13aee53f 100644 --- a/fitz/helper-python.i +++ b/fitz/helper-python.i @@ -1088,7 +1088,7 @@ def planish_line(p1: point_like, p2: point_like) -> Matrix: return Matrix(TOOLS._hor_matrix(p1, p2)) -def image_properties(img: typing.ByteString) -> dict: +def image_profile(img: typing.ByteString) -> dict: """ Return basic properties of an image. Args: diff --git a/fitz/helper-stext.i b/fitz/helper-stext.i index 9b922be43..0c5319a60 100644 --- a/fitz/helper-stext.i +++ b/fitz/helper-stext.i @@ -119,7 +119,7 @@ JM_char_quad(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch) dsc = dsc * fsize / asc_dsc; /* ------------------------------ - Re-compute quad with adjusted ascender / descender values: + Re-compute quad with the adjusted ascender / descender values: Move ch->origin to (0,0) and de-rotate quad, then adjust the corners, re-rotate and move back to ch->origin location. ------------------------------ */ @@ -129,6 +129,10 @@ JM_char_quad(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch) s = line->dir.y; // sine trm1 = fz_make_matrix(c, -s, s, c, 0, 0); // derotate trm2 = fz_make_matrix(c, s, -s, c, 0, 0); // rotate + if (c == -1) { // left-right flip + trm1.d = 1; + trm2.d = 1; + } xlate1 = fz_make_matrix(1, 0, 0, 1, -ch->origin.x, -ch->origin.y); xlate2 = fz_make_matrix(1, 0, 0, 1, ch->origin.x, ch->origin.y); @@ -136,11 +140,17 @@ JM_char_quad(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch) quad = fz_transform_quad(quad, trm1); // de-rotate corners // adjust vertical coordinates - - quad.ll.y = -dsc; - quad.lr.y = -dsc; - quad.ul.y = -asc; - quad.ur.y = -asc; + if (c == 1 && quad.ul.y > 0) { // up-down flip + quad.ul.y = asc; + quad.ur.y = asc; + quad.ll.y = dsc; + quad.lr.y = dsc; + } else { + quad.ul.y = -asc; + quad.ur.y = -asc; + quad.ll.y = -dsc; + quad.lr.y = -dsc; + } // adjust horizontal coordinates that are too crazy: // (1) left x must be >= 0 @@ -574,9 +584,6 @@ JM_make_spanlist(fz_context *ctx, PyObject *line_dict, } span_rect = fz_union_rect(span_rect, r); - if (origin.y > span_origin.y) { - span_origin.y = origin.y; - } if (raw) { // make and append a char dict char_dict = PyDict_New(); diff --git a/fitz/utils.py b/fitz/utils.py index 0cd78aa6e..092c57e05 100644 --- a/fitz/utils.py +++ b/fitz/utils.py @@ -5206,7 +5206,7 @@ def build_subset(buffer, unc_set, gid_set): pass try: # invoke fontTools subsetter fts.main(args) - font = fitz.Font(fontfile=newfont_path) + font = Font(fontfile=newfont_path) new_buffer = font.buffer if len(font.valid_codepoints()) == 0: new_buffer = None @@ -5278,7 +5278,7 @@ def get_fontnames(doc, item): font_ext = f[1] # font file extension basename = f[3] # font basename - if font_ext not in ( # supported by fontTools + if font_ext not in ( # skip if not supported by fontTools "otf", "ttf", "woff", @@ -5298,7 +5298,7 @@ def get_fontnames(doc, item): xref_set.add(font_xref) for name in names: name_set.add(name) - font = fitz.Font(fontbuffer=fontbuffer) + font = Font(fontbuffer=fontbuffer) name_set.add(font.name) del font font_buffers[fontbuffer] = (name_set, xref_set, subsets) @@ -5372,3 +5372,43 @@ def find_buffer_by_name(name): new_fontsize += len(new_buffer) return old_fontsize - new_fontsize + + +# ------------------------------------------------------------------- +# Copy XREF object to another XREF +# ------------------------------------------------------------------- +def xref_copy(doc: Document, source: int, target: int, *, keep: list = None) -> None: + """Copy a PDF dictionary object to another one given their xref numbers. + + Args: + doc: PDF document object + source: source xref number + target: target xref number, the xref must already exist + keep: an optional list of 1st level keys in target that should not be + removed before copying. + Notes: + This works similar to the copy() method of dictionaries in Python. The + source may be a stream object. + """ + if doc.xref_is_stream(source): + # read new xref stream, maintaining compression + stream = doc.xref_stream_raw(source) + doc.update_stream( + target, + stream, + compress=False, # keeps source compression + new=True, # in case target is no stream + ) + + # empty the target completely, observe exceptions + if keep is None: + keep = [] + for key in doc.xref_get_keys(target): + if key in keep: + continue + doc.xref_set_key(target, key, "null") + # copy over all source dict items + for key in doc.xref_get_keys(source): + item = doc.xref_get_key(source, key) + doc.xref_set_key(target, key, item[1]) + return None diff --git a/fitz/version.i b/fitz/version.i index 382d30579..22933f357 100644 --- a/fitz/version.i +++ b/fitz/version.i @@ -1,6 +1,6 @@ %pythoncode %{ VersionFitz = "1.19.0" -VersionBind = "1.19.4" -VersionDate = "2022-01-01 00:00:01" -version = (VersionBind, VersionFitz, "20220101000001") +VersionBind = "1.19.5" +VersionDate = "2022-02-01 00:00:01" +version = (VersionBind, VersionFitz, "20220201000001") %} \ No newline at end of file diff --git a/setup.py b/setup.py index 97228507d..8f329f8e0 100644 --- a/setup.py +++ b/setup.py @@ -187,7 +187,7 @@ def load_libraries(): setup( name="PyMuPDF", - version="1.19.4", + version="1.19.5", description="Python bindings for the PDF toolkit and renderer MuPDF", long_description=readme, long_description_content_type="text/markdown",