diff --git a/changes.txt b/changes.txt
index a4ea51dbf..1555e8e02 100644
--- a/changes.txt
+++ b/changes.txt
@@ -22,6 +22,13 @@ Change Log
   * **Fixed** `4004 <https://github.com/pymupdf/PyMuPDF/issues/4004>`_: Segmentation Fault When Updating PDF Form Field Value
   * **Fixed** `3751 <https://github.com/pymupdf/PyMuPDF/issues/3751>`_: apply_redactions causes part of the page content to be hidden / transparent
 
+* Other:
+
+  * New Page method "recolor" which changes the color component count of text, image and vector graphic objects.
+  * New Document method "recolor" invokes the same-named "Page" method for all pages in the PDF.
+  * Image support for "Stamp" annotations.
+  * Accessing the object definition for an (orphaned) cross reference number no longer raises an exception.
+
 
 **Changes in version 1.24.14 (2024-11-19)**
 
diff --git a/docs/document.rst b/docs/document.rst
index 4172a61ca..b248d4889 100644
--- a/docs/document.rst
+++ b/docs/document.rst
@@ -96,6 +96,7 @@ For details on **embedded files** refer to Appendix 3.
 :meth:`Document.pdf_catalog`            PDF only: :data:`xref` of catalog (root)
 :meth:`Document.pdf_trailer`            PDF only: trailer source
 :meth:`Document.prev_location`          return (chapter, pno) of preceding page
+:meth:`Document.recolor`                PDF only: execute :meth:`Page.recolor` for all pages
 :meth:`Document.reload_page`            PDF only: provide a new copy of a page
 :meth:`Document.resolve_names`          PDF only: Convert destination names into a Python dict
 :meth:`Document.save`                   PDF only: save the document
@@ -594,6 +595,16 @@ For details on **embedded files** refer to Appendix 3.
 
      To maintain a consistent API, for document types not supporting a chapter structure (like PDFs), :attr:`Document.chapter_count` is 1, and pages can also be loaded via tuples *(0, pno)*. See this [#f3]_ footnote for comments on performance improvements.
 
+
+  .. method:: recolor(components=1)
+
+    PDF only: Change the color component counts for all object types text, image and vector graphics for all pages.
+
+    :arg int components: desired color space indicated by the number of color components: 1 = DeviceGRAY, 3 = DeviceRGB, 4 = DeviceCMYK.
+
+    The typical use case is 1 (DeviceGRAY) which converts the PDF to grayscale.
+
+
   .. method:: reload_page(page)
 
     * New in v1.16.10
@@ -924,14 +935,14 @@ For details on **embedded files** refer to Appendix 3.
 
   .. method:: get_page_fonts(pno, full=False)
 
-    PDF only: Return a list of all fonts (directly or indirectly) referenced by the page.
+    PDF only: Return a list of all fonts (directly or indirectly) referenced by the page object definition.
 
     :arg int pno: page number, 0-based, `-∞ < pno < page_count`.
     :arg bool full: whether to also include the referencer's :data:`xref`. If *True*, the returned items are one entry longer. Use this option if you need to know, whether the page directly references the font. In this case the last entry is 0. If the font is referenced by an `/XObject` of the page, you will find its :data:`xref` here.
 
     :rtype: list
 
-    :returns: a list of fonts referenced by this page. Each entry looks like
+    :returns: a list of fonts referenced by the object definition of the page. Each entry looks like
 
     **(xref, ext, type, basefont, name, encoding, referencer)**,
 
@@ -959,7 +970,12 @@ For details on **embedded files** refer to Appendix 3.
 
     .. note::
         * This list has no duplicate entries: the combination of :data:`xref`, *name* and *referencer* is unique.
-        * In general, this is a superset of the fonts actually in use by this page. The PDF creator may e.g. have specified some global list, of which each page only makes partial use.
+        * In general, this is a true superset of the fonts actually in use by this page. The PDF creator may e.g. have specified some global list, of which each page make only partial use.
+        * Be aware that font names returned by some variants of :meth:`Page.get_text` (respectively :ref:`TextPage` methods) need not (exactly) equal the base font name shown here. Reasons for any differences include:
+
+           - This method always shows any subset prefixes (the pattern ``ABCDEF+``), whereas text extractions do not do this by default.
+           - Text extractions use the base library to access the font name, which has a length cap of 31 bytes and generally interrogates the font file binary to access the name. Method ``get_page_fonts()`` however looks at the PDF definition source.
+           - Text extractions work for all supported document types in exactly the same way -- not just for PDFs. Consequently they do not contain PDF-specifics.
 
   .. method:: get_page_text(pno, output="text", flags=3, textpage=None, sort=False)
 
diff --git a/docs/images/img-imagestamp.png b/docs/images/img-imagestamp.png
new file mode 100644
index 000000000..58fc2cb16
Binary files /dev/null and b/docs/images/img-imagestamp.png differ
diff --git a/docs/page.rst b/docs/page.rst
index 7f3fa7754..cbcadfec5 100644
--- a/docs/page.rst
+++ b/docs/page.rst
@@ -106,6 +106,7 @@ In a nutshell, this is what you can do with PyMuPDF:
 :meth:`Page.load_widget`           PDF only: load a specific field
 :meth:`Page.load_links`            return the first link on a page
 :meth:`Page.new_shape`             PDF only: create a new :ref:`Shape`
+:meth:`Page.recolor`               PDF only: change the colorspace of objects
 :meth:`Page.remove_rotation`       PDF only: set page rotation to 0
 :meth:`Page.replace_image`         PDF only: replace an image
 :meth:`Page.search_for`            search for a string
@@ -491,7 +492,7 @@ In a nutshell, this is what you can do with PyMuPDF:
            * ``bbox``: the bounding box of the table as a tuple `(x0, y0, x1, y1)`.
            * ``cells``: bounding boxes of the table's cells (list of tuples). A cell may also be `None`.
            * ``extract()``: this method returns the text content of each table cell as a list of list of strings.
-           * ``to_markdown()``: this method returns the table as a **string in markdown format** (compatible to Github). Supporting viewers can render the string as a table. This output is optimized for **small token** sizes, which is especially beneficial for LLM/RAG feeds. Pandas DataFrames (see method `to_pandas()` below) offer an equivalent markdown table output which however is better readable for the human eye.
+           * ``to_markdown()``: this method returns the table as a **string in markdown format** compatible to Github. Supporting viewers can render the string as a table. This output is optimized for **small token sizes**, which is especially beneficial for LLM/RAG feeds. Pandas DataFrame (see method `to_pandas()` below) also offers a markdown output. While better readable for the human eye, it generally is a larger string than produced by the native method.
            * `to_pandas()`: this method returns the table as a `pandas <https://pypi.org/project/pandas/>`_ `DataFrame <https://pandas.pydata.org/docs/reference/frame.html>`_. DataFrames are very versatile objects allowing a plethora of table manipulation methods and outputs to almost 20 well-known formats, among them Excel files, CSV, JSON, markdown-formatted tables and more. `DataFrame.to_markdown()` generates a Github-compatible markdown format optimized for human readability. This method however requires the package `tabulate <https://pypi.org/project/tabulate/>`_ to be installed in addition to pandas itself.
            * ``header``: a `TableHeader` object containing header information of the table.
            * ``col_count``: an integer containing the number of table columns.
@@ -503,11 +504,11 @@ In a nutshell, this is what you can do with PyMuPDF:
            * ``bbox``: the bounding box of the header.
            * `cells`: a list of bounding boxes containing the name of the respective column.
            * `names`: a list of strings containing the text of each of the cell bboxes. They represent the column names -- which are used when exporting the table to pandas DataFrames, markdown, etc.
-           * `external`: a bool indicating whether the header bbox is outside the table body (`True`) or not. Table headers are never identified by the `TableFinder` logic. Therefore, if `external` is true, then the header cells are not part of any cell identified by `TableFinder`. If `external == False`, then the first table row is the header.
+           * `external`: a bool indicating whether the header bbox is outside the table body (`True`) or not. Table headers are never identified by the `TableFinder` logic. Therefore, if `external` is true, then the header cells are not part of any cell identified by `TableFinder`. If `external == False`, then the first original table row is the header.
 
          Please have a look at these `Jupyter notebooks <https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/table-analysis>`_, which cover standard situations like multiple tables on one page or joining table fragments across multiple pages.
 
-         .. caution:: The lifetime of the `TableFinder` object, as well as that of all its tables **equals the lifetime of the page**. If the page object is deleted or reassigned, all tables are no longer valid.
+         .. caution:: The lifetime of the `TableFinder` object, as well as that of all its tables **equals the lifetime of the page**. If the page object is deleted or reassigned, all **table objects are no longer valid.**
          
             The only way to keep table content beyond the page's availability is to **extract it** via methods `Table.to_markdown()`, `Table.to_pandas()` or a copy of `Table.extract()` (e.g. `Table.extract()[:]`).
 
@@ -535,24 +536,33 @@ In a nutshell, this is what you can do with PyMuPDF:
          There is also the `pdf2docx extract tables method`_ which is capable of table extraction if you prefer.
 
 
-   .. method:: add_stamp_annot(rect, stamp=0)
+   .. method:: add_stamp_annot(rect, stamp=0, *, image=None)
 
-      PDF only: Add a "rubber stamp" like annotation to e.g. indicate the document's intended use ("DRAFT", "CONFIDENTIAL", etc.).
+      PDF only: Add a "rubber stamp"-like annotation to e.g. indicate the document's intended use ("DRAFT", "CONFIDENTIAL", etc.). Instead of text, an image may also be shown.
 
       :arg rect_like rect: rectangle where to place the annotation.
-
       :arg int stamp: id number of the stamp text. For available stamps see :ref:`StampIcons`.
+      :arg multiple image: if not ``None``, an image specification is assumed and the ``stamp`` parameter will be ignored. Valid argument types are
+      
+         * a string specifying an image file path,
+         * a ``bytes``, ``bytearray`` or ``io.BytesIO`` object for an image in memory, and
+         * a :ref:`Pixmap`.
+         
+      1. **Text-based stamps**
 
-      .. note::
-
-         * The stamp's text and its border line will automatically be sized and be put horizontally and vertically centered in the given rectangle. :attr:`Annot.rect` is automatically calculated to fit the given **width** and will usually be smaller than this parameter.
+         * :attr:`Annot.rect` is automatically calculated as the largest rectangle with an aspect ratio of ``width/height = 3.8`` that fits in the provided ``rect``. Its position is vertically and horizontally centered.
          * The font chosen is "Times Bold" and the text will be upper case.
-         * The appearance can be changed using :meth:`Annot.set_opacity` and by setting the "stroke" color (no "fill" color supported).
-         * This can be used to create watermark images: on a temporary PDF page create a stamp annotation with a low opacity value, make a pixmap from it with *alpha=True* (and potentially also rotate it), discard the temporary PDF page and use the pixmap with :meth:`insert_image` for your target PDF.
+         * The appearance can be modified using :meth:`Annot.set_opacity` and by setting the "stroke" color. By PDF specification, stamp annotations have no "fill" color.
 
+         .. image:: images/img-stampannot.*
 
-      .. image:: images/img-stampannot.*
-         :scale: 80
+      2. **Image-based stamps**
+
+         * At first, a rectangle is computed like for text stamps: vertically and horizontally centered, aspect ratio ``width/height = 3.8``.
+         * Into that rectangle, the image will be inserted aligned left and vertically centered. The resulting image boundary box becomes :attr:`Annot.rect`.
+         * The annotation can be modified via :meth:`Annot.set_opacity`. This is a way to display images without alpha channel with transparency. Setting colors has no effect on image stamps.
+         
+         .. image:: images/img-imagestamp.*
 
    .. method:: add_widget(widget)
 
@@ -1924,6 +1934,14 @@ In a nutshell, this is what you can do with PyMuPDF:
 
       :arg int rotate: An integer specifying the required rotation in degrees. Must be an integer multiple of 90. Values will be converted to one of 0, 90, 180, 270.
 
+   .. method:: recolor(components=1)
+
+      PDF only: Change the colorspace components of all objects on page.
+
+      :arg int components: The desired count of color components. Must be one of 1, 3 or 4, which results in color spaces DeviceGray, DeviceRGB or DeviceCMYK respectively. The method affects text, images and vector graphics. For instance, with the default value 1, a page will be converted to gray-scale.
+
+      The changes made are **permanent** and cannot be reverted.
+
    .. method:: remove_rotation()
 
       PDF only: Set page rotation to 0 while maintaining appearance and page content.
diff --git a/docs/pymupdf4llm/api.rst b/docs/pymupdf4llm/api.rst
index 53ca304bb..17b1363c3 100644
--- a/docs/pymupdf4llm/api.rst
+++ b/docs/pymupdf4llm/api.rst
@@ -16,7 +16,7 @@ The |PyMuPDF4LLM| API
 
     Prints the version of the library.
 
-.. method:: to_markdown(doc: pymupdf.Document | str, *, pages: list | range | None = None, hdr_info: Any = None, write_images: bool = False, embed_images: bool = False, dpi: int = 150, image_path="", image_format="png", image_size_limit=0.05, force_text=True, margins=(0, 50, 0, 50), page_chunks: bool = False, page_width: float = 612, page_height: float = None, table_strategy="lines_strict", graphics_limit: int = None, ignore_code: bool = False, extract_words: bool = False, show_progress: bool = True) -> str | list[dict]
+.. method:: to_markdown(doc: pymupdf.Document | str, *, pages: list | range | None = None, filename=None, hdr_info: Any = None, write_images: bool = False, embed_images: bool = False, dpi: int = 150, image_path="", image_format="png", image_size_limit=0.05, force_text=True, margins=(0, 50, 0, 50), page_chunks: bool = False, page_width: float = 612, page_height: float = None, table_strategy="lines_strict", graphics_limit: int = None, ignore_code: bool = False, extract_words: bool = False, show_progress: bool = True) -> str | list[dict]
 
     Read the pages of the file and outputs the text of its pages in |Markdown| format. How this should happen in detail can be influenced by a number of parameters. Please note that there exists **support for building page chunks** from the |Markdown|  text.
 
@@ -24,6 +24,8 @@ The |PyMuPDF4LLM| API
 
     :arg list pages: optional, the pages to consider for output (caution: specify 0-based page numbers). If omitted all pages are processed.
 
+    :arg filename: optional. Use this if you want to provide or override the file name. This may especially be useful when the document is opened from memory streams (which have no name and where thus ``doc.name`` is the empty string). This parameter will be used in all places where normally ``doc.name`` would have been used.
+
     :arg hdr_info: optional. Use this if you want to provide your own header detection logic. This may be a callable or an object having a method named `get_header_id`. It must accept a text span (a span dictionary as contained in :meth:`~.extractDICT`) and a keyword parameter "page" (which is the owning :ref:`Page <page>` object). It must return a string "" or up to 6 "#" characters followed by 1 space. If omitted, a full document scan will be performed to find the most popular font sizes and derive header levels based on them. To completely avoid this behavior specify `hdr_info=lambda s, page=None: ""` or `hdr_info=False`.
 
     :arg bool write_images: when encountering images or vector graphics, images will be created from the respective page area and stored in the specified folder. Markdown references will be generated pointing to these images. Any text contained in these areas will not be included in the text output (but appear as part of the images). Therefore, if for instance your document has text written on full page images, make sure to set this parameter to `False`.
diff --git a/src/__init__.py b/src/__init__.py
index 7ed758abd..a39732c14 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -1031,6 +1031,7 @@ def get_textpage(self, clip=None, flags=0):
             ret.parent = p
         else:
             ret.parent = weakref.proxy(p)
+        ret._dev_flags = flags
         return ret
 
     @property
@@ -2782,6 +2783,7 @@ def get_textpage(self, flags=3):
         stext_options.flags = flags
         val = mupdf.FzStextPage(self.this, stext_options)
         val.thisown = True
+        val._dev_flags = flags
         return val
 
     @property
@@ -5434,6 +5436,19 @@ def resolve_link(self, uri=None, chapters=0):
         pno = mupdf.fz_page_number_from_location(self.this, loc)
         return pno, xp, yp
 
+    def recolor(self, components=1):
+        """Change the color component count on all pages.
+
+        Args:
+            components: (int) desired color component count, one of 1, 3, 4.
+
+        Invokes the same-named method for all pages.
+        """
+        if not self.is_pdf:
+            raise ValueError("is no PDF")
+        for i in range(self.page_count):
+            self.load_page(i).recolor(components)
+
     def resolve_names(self):
         """Convert the PDF's destination names into a Python dict.
 
@@ -6091,17 +6106,18 @@ def xref_object(self, xref, compressed=0, ascii=0):
         """Get xref object source as a string."""
         if self.is_closed:
             raise ValueError("document closed")
-        if g_use_extra:
-            ret = extra.xref_object( self.this, xref, compressed, ascii)
-            return ret
         pdf = _as_pdf_document(self)
         xreflen = mupdf.pdf_xref_len(pdf)
         if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
             raise ValueError( MSG_BAD_XREF)
-        if xref > 0:
-            obj = mupdf.pdf_load_object(pdf, xref)
-        else:
-            obj = mupdf.pdf_trailer(pdf)
+        try:
+            if xref > 0:
+                obj = mupdf.pdf_load_object(pdf, xref)
+            else:
+                obj = mupdf.pdf_trailer(pdf)
+        except Exception:
+            message(f"No object at {xref=}")
+            return "null"
         res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
         text = JM_EscapeStrFromBuffer(res)
         return text
@@ -6152,7 +6168,7 @@ def xref_stream(self, xref):
         xreflen = mupdf.pdf_xref_len( pdf)
         if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
             raise ValueError( MSG_BAD_XREF)
-        if xref >= 0:
+        if xref > 0:
             obj = mupdf.pdf_new_indirect( pdf, xref, 0)
         else:
             obj = mupdf.pdf_trailer( pdf)
@@ -6170,7 +6186,7 @@ def xref_stream_raw(self, xref):
         xreflen = mupdf.pdf_xref_len( pdf)
         if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
             raise ValueError( MSG_BAD_XREF)
-        if xref >= 0:
+        if xref > 0:
             obj = mupdf.pdf_new_indirect( pdf, xref, 0)
         else:
             obj = mupdf.pdf_trailer( pdf)
@@ -7956,6 +7972,7 @@ def _get_textpage(self, clip=None, flags=0, matrix=None):
         if g_use_extra:
             ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
             tpage = mupdf.FzStextPage(ll_tpage)
+            tpage._dev_flags = flags
             return tpage
         page = self.this
         options = mupdf.FzStextOptions(flags)
@@ -7975,6 +7992,7 @@ def _get_textpage(self, clip=None, flags=0, matrix=None):
             assert 0, f'Unrecognised {type(page)=}'
         mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
         mupdf.fz_close_device(dev)
+        tpage._dev_flags = flags
         return tpage
 
     def _insert_image(self,
@@ -8295,6 +8313,8 @@ def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=
         # 1. insert Xobject in Resources
         #-------------------------------------------------------------
         resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
+        if not resources.m_internal:
+            resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'),5)
         subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
         if not subres.m_internal:
             subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
@@ -8538,11 +8558,30 @@ def add_squiggly_annot(
             q = CheckMarkerArg(quads)
         return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
 
-    def add_stamp_annot(self, rect: rect_like, stamp: int =0) -> Annot:
+    def add_stamp_annot(self, rect: rect_like, stamp: int =0, *, image=None) -> Annot:
         """Add a ('rubber') 'Stamp' annotation."""
+        if isinstance(image, Pixmap):
+            buf = image.tobytes()
+        elif isinstance(image, str):
+            buf = pathlib.Path(image).read_bytes()
+        elif isinstance(image, (bytes, bytearray)):
+            buf = image
+        elif isinstance(image, io.BytesIO):
+            buf = image.getvalue()
+        else:
+            buf = None
         old_rotation = annot_preprocess(self)
         try:
             annot = self._add_stamp_annot(rect, stamp)
+            if buf:
+                fzbuff = mupdf.fz_new_buffer_from_copied_data(buf)
+                img = mupdf.fz_new_image_from_buffer(fzbuff)
+                mupdf.pdf_set_annot_stamp_image(annot, img)
+                self.parent.xref_set_key(annot.xref, "Name", "null")
+                self.parent.xref_set_key(
+                    annot.xref, "Contents", "(Image Stamp)")
+                buf = None
+                fzbuff = None
         finally:
             if old_rotation != 0:
                 self.set_rotation(old_rotation)
@@ -8629,6 +8668,19 @@ def annots(self, types=None):
             annot._yielded=True
             yield annot
 
+    def recolor(self, components=1):
+        """Convert colorspaces of objects on the page.
+        
+        Valid values are 1, 3 and 4.
+        """
+        if components not in (1, 3, 4):
+            raise ValueError("components must be one of 1, 3, 4")
+        pdfdoc = _as_pdf_document(self.parent)
+        ropt = mupdf.pdf_recolor_options()
+        ropt.num_comp = components
+        ropts = mupdf.PdfRecolorOptions(ropt)
+        mupdf.pdf_recolor_page(pdfdoc, self.number, ropts)
+
     @property
     def artbox(self):
         """The ArtBox"""
@@ -9096,6 +9148,12 @@ def remove_rotation(self):
         if rot == 0:
             return  Identity # nothing to do
 
+        # save annotation rectangle information before we do anything
+        rot_matrix = self.rotation_matrix  # original rotation matrix
+        annots=[(a.xref,a.rect * rot_matrix) for a in self.annots()]
+        get_links = self.get_links()
+        widgets = [(w.xref,w.rect * rot_matrix) for w in self.widgets()]
+
         # need to derotate the page's content
         mb = self.mediabox  # current mediabox
 
@@ -9126,20 +9184,22 @@ def remove_rotation(self):
         self.set_rotation(0)
         rot = ~mat  # inverse of the derotation matrix
 
-        for annot in self.annots():  # modify rectangles of annotations
-            r = annot.rect * rot
-            # TODO: only try to set rectangle for applicable annot types
-            annot.set_rect(r)
-        for link in self.get_links():  # modify 'from' rectangles of links
-            r = link["from"] * rot
+        for xref, rect in annots:  # modify rectangles of annotations
+            annot = self.load_annot(xref)
+            # TODO: only do this for applicable annot types
+            annot.set_rect(rect)
+            
+        for link in get_links:  # modify 'from' rectangles of links
+            r = link["from"] * rot_matrix
             self.delete_link(link)
             link["from"] = r
             try:  # invalid links remain deleted
                 self.insert_link(link)
             except Exception:
                 pass
-        for widget in self.widgets():  # modify field rectangles
-            r = widget.rect * rot
+            
+        for xref, rect in widgets:  # modify field rectangles
+            widget = self.load_widget(xref)
             widget.rect = r
             widget.update()
         return rot  # the inverse of the generated derotation matrix
@@ -9386,6 +9446,7 @@ def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "
                 self.set_rotation(old_rotation)
         textpage = TextPage(textpage)
         textpage.parent = weakref.proxy(self)
+        textpage._dev_flags = flags
         return textpage
 
     def get_texttrace(self):
@@ -13426,6 +13487,13 @@ def width(self):
 TEXT_OUTPUT_XML = 3
 TEXT_OUTPUT_XHTML = 4
 
+TEXT_STRIKEOUT = 1
+TEXT_UNDERLINE = 2
+TEXT_SYNTHETIC = 4
+TEXT_BOLD = 8
+TEXT_FILLED = 16
+TEXT_STROKED = 32
+
 TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
 TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
 TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
@@ -13446,6 +13514,7 @@ def width(self):
     TEXT_COLLECT_VECTORS = 1024
     TEXT_IGNORE_ACTUALTEXT = 2048
     TEXT_STEXT_SEGMENT = 4096
+TEXT_COLLECT_FLAGS = 32768  # mupdf.FZ_STEXT_COLLECT_FLAGS
 
 TEXTFLAGS_WORDS = (0
         | TEXT_PRESERVE_LIGATURES
@@ -16394,9 +16463,13 @@ def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
     mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
 
 
-def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
+def JM_make_spanlist(line_dict, line, raw, buff, tp_rect, dev_flags):
     if g_use_extra:
-        return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
+        return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect, dev_flags)
+    # relevant MuPDF versions
+    MUPDF1250 = (1, 25, 0)
+    MUPDF1251 = (1, 25, 1)
+    THIS_MUPDF = mupdf_version_tuple
     char_list = None
     span_list = []
     mupdf.fz_clear_buffer(buff)
@@ -16407,18 +16480,24 @@ class char_style:
         def __init__(self, rhs=None):
             if rhs:
                 self.size = rhs.size
+                self.font_flags = rhs.font_flags
                 self.flags = rhs.flags
                 self.font = rhs.font
                 self.color = rhs.color
                 self.asc = rhs.asc
                 self.desc = rhs.desc
+                self.bidi = rhs.bidi
+                self.opacity = rhs.opacity
             else:
                 self.size = -1
-                self.flags = -1
-                self.font = ''
-                self.color = -1
+                self.font_flags = 0
+                self.flags = 0
+                self.font = ""
+                self.color = 0
                 self.asc = 0
                 self.desc = 0
+                self.bidi = 0
+                self.opacity = 1
         def __str__(self):
             return f'{self.size} {self.flags} {self.font} {self.color} {self.asc} {self.desc}'
 
@@ -16435,24 +16514,32 @@ def __str__(self):
                 ):
             continue
 
-        flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
+        font_flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
         origin = mupdf.FzPoint(ch.m_internal.origin)
         style.size = ch.m_internal.size
-        style.flags = flags
+        style.font_flags = font_flags
+        style.flags = ch.m_internal.flags
         style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
-        if mupdf_version_tuple >= (1, 25):
-            style.color = ch.m_internal.argb
+        if THIS_MUPDF >= MUPDF1250:
+            style.opacity = (ch.m_internal.argb >> 24) / 255
+            style.color = ch.m_internal.argb & ~0xff000000
         else:
             style.color = ch.m_internal.color
         style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
         style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
 
-        if (style.size != old_style.size
-                or style.flags != old_style.flags
-                or style.color != old_style.color
-                or style.font != old_style.font
-                ):
-            if old_style.size >= 0:
+        if (
+            0
+            or style.size != old_style.size
+            or style.bidi != old_style.bidi
+            or style.font_flags != old_style.font_flags
+            # compare flags w/o synthetic property
+            or (style.flags & ~4) != (old_style.flags & ~4)
+            or style.color != old_style.color
+            or style.opacity != old_style.opacity
+            or style.font != old_style.font
+        ):
+            if old_style.size > 0:
                 # not first one, output previous
                 if raw:
                     # put character list in the span
@@ -16460,13 +16547,13 @@ def __str__(self):
                     char_list = None
                 else:
                     # put text string in the span
-                    span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
+                    span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
                     mupdf.fz_clear_buffer(buff)
 
                 span[dictkey_origin] = JM_py_from_point(span_origin)
                 span[dictkey_bbox] = JM_py_from_rect(span_rect)
                 line_rect = mupdf.fz_union_rect(line_rect, span_rect)
-                span_list.append( span)
+                span_list.append(span)
                 span = None
 
             span = dict()
@@ -16476,12 +16563,33 @@ def __str__(self):
                 asc = 0.9
                 desc = -0.1
 
+            span["bidi"] = style.bidi
             span[dictkey_size] = style.size
-            span[dictkey_flags] = style.flags
+            span[dictkey_flags] = style.font_flags
             span[dictkey_font] = JM_EscapeStrFromStr(style.font)
             span[dictkey_color] = style.color
             span["ascender"] = asc
             span["descender"] = desc
+            span["opacity"] = style.opacity
+            # add more keys depending on MuPDF version
+            if THIS_MUPDF >= MUPDF1250:  # separate if because not flags-dependent
+                span["opacity"] = style.opacity
+                # rest of keys only make sense for FZ_STEXT_COLLECT_FLAGS
+                if dev_flags & TEXT_COLLECT_FLAGS:
+                    span["underline"] = bool(style.flags & TEXT_UNDERLINE)
+                    span["strikeout"] = bool(style.flags & TEXT_STRIKEOUT)
+                else:
+                    span["underline"] = None
+                    span["strikeout"] = None
+
+            if THIS_MUPDF > MUPDF1251:
+                if dev_flags & TEXT_COLLECT_FLAGS:
+                    span["bold"] = bool(style.flags & TEXT_BOLD)
+                else:
+                    span["bold"] = None
+                span["filled"] = bool(style.flags & TEXT_FILLED)
+                span["stroked"] = bool(style.flags & TEXT_STROKED)
+                span["clipped"] = bool(style.flags & TEXT_CLIPPED)
 
             # Need to be careful here - doing 'old_style=style' does a shallow
             # copy, but we need to keep old_style as a distinct instance.
@@ -16495,6 +16603,8 @@ def __str__(self):
             char_dict = dict()
             char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
             char_dict[dictkey_bbox] = JM_py_from_rect(r)
+            if THIS_MUPDF >= MUPDF1250:
+                char_dict["synthetic"] = bool(ch.m_internal.flags & TEXT_SYNTHETIC)
             char_dict[dictkey_c] = chr(ch.m_internal.c)
 
             if char_list is None:
@@ -16558,9 +16668,9 @@ def JM_make_image_block(block, block_dict):
     block_dict[ dictkey_image] = bytes_
 
 
-def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
+def JM_make_text_block(block, block_dict, raw, buff, tp_rect, dev_flags):
     if g_use_extra:
-        return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
+        return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal, dev_flags)
     line_list = []
     block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
     #log(f'{block=}')
@@ -16571,7 +16681,7 @@ def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
                 ):
             continue
         line_dict = dict()
-        line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
+        line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect, dev_flags)
         block_rect = mupdf.fz_union_rect(block_rect, line_rect)
         line_dict[dictkey_wmode] = line.m_internal.wmode
         line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
@@ -16583,7 +16693,7 @@ def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
 
 def JM_make_textpage_dict(tp, page_dict, raw):
     if g_use_extra:
-        return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
+        return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw, tp._dev_flags)
     text_buffer = mupdf.fz_new_buffer(128)
     block_list = []
     tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
@@ -16608,7 +16718,7 @@ def JM_make_textpage_dict(tp, page_dict, raw):
             block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
             JM_make_image_block(block, block_dict)
         else:
-            JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
+            JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect, tp._dev_flags)
 
         block_list.append(block_dict)
     page_dict[dictkey_blocks] = block_list
@@ -16722,6 +16832,8 @@ def JM_merge_resources( page, temp_res):
     '''
     # page objects /Resources, /Resources/ExtGState, /Resources/Font
     resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
+    if not resources.m_internal:
+        resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'),5)
     main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
     main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
 
@@ -21118,14 +21230,12 @@ def get_text(
         pages=None,
         method='single',
         concurrency=None,
-        
         option='text',
         clip=None,
         flags=None,
         textpage=None,
         sort=False,
         delimiters=None,
-        
         _stats=False,
         ):
     '''
diff --git a/src/extra.i b/src/extra.i
index f8f88a6c5..ce214b2bb 100644
--- a/src/extra.i
+++ b/src/extra.i
@@ -3014,9 +3014,16 @@ mupdf::FzRect JM_make_spanlist(
         mupdf::FzStextLine& line,
         int raw,
         mupdf::FzBuffer& buff,
-        mupdf::FzRect& tp_rect
+        mupdf::FzRect& tp_rect,
+        int dev_flags
         )
 {
+    // relevant MuPDF versions as integers
+    #define MUPDF1250 (1 << 16) + (25 << 8) + 0   // 1.25.0
+    #define MUPDF1251 MUPDF1250 + 1               // 1.25.1
+    // current MuPDF version as an integer
+    #define THIS_MUPDF (FZ_VERSION_MAJOR << 16) + (FZ_VERSION_MINOR << 8) + (FZ_VERSION_PATCH) 
+
     PyObject *span = NULL, *char_list = NULL, *char_dict;
     PyObject *span_list = PyList_New(0);
     mupdf::fz_clear_buffer(buff);
@@ -3026,11 +3033,14 @@ mupdf::FzRect JM_make_spanlist(
     struct char_style
     {
         float size = -1;
-        int flags = -1;
+        int font_flags = 0;
+        uint16_t flags = 0;
         const char *font = "";
-        unsigned int color = -1;
+        uint32_t color = 0;
+        float opacity = 1;
         float asc = 0;
         float desc = 0;
+        uint16_t bidi = 0;
     };
     char_style old_style;
     char_style style;
@@ -3042,27 +3052,36 @@ mupdf::FzRect JM_make_spanlist(
         {
             continue;
         }
-        int flags = JM_char_font_flags( ch.m_internal->font, line.m_internal, ch.m_internal);
+        int font_flags = JM_char_font_flags(ch.m_internal->font, line.m_internal, ch.m_internal);
         fz_point origin = ch.m_internal->origin;
         style.size = ch.m_internal->size;
-        style.flags = flags;
+        style.font_flags = font_flags;
+        style.flags = ch.m_internal->flags;
         style.font = JM_font_name(ch.m_internal->font);
-        #if (FZ_VERSION_MAJOR > 1 || (FZ_VERSION_MAJOR == 1 && FZ_VERSION_MINOR >= 25))
-            style.color = ch.m_internal->argb;
+        style.bidi = ch.m_internal->bidi;
+        #if (THIS_MUPDF >= MUPDF1250)
+            style.opacity = ((float) (ch.m_internal->argb >> 24)) / 255;
+            style.color = (ch.m_internal->argb << 8) >> 8;
         #else
-            style.color = ch.m_internal->color;
+            style.color = (uint32_t) ch.m_internal->color;
         #endif
         style.asc = JM_font_ascender(ch.m_internal->font);
         style.desc = JM_font_descender(ch.m_internal->font);
 
+        // any change in character style causes a span break
+        // except: synthetic spaces
         if (0
-                || style.size != old_style.size
-                || style.flags != old_style.flags
-                || style.color != old_style.color
-                || strcmp(style.font, old_style.font) != 0
-                )
+            || style.bidi != old_style.bidi
+            || style.size != old_style.size
+            || style.font_flags != old_style.font_flags
+            // compare flags w/o synthetic property
+            || (style.flags & ~4) != (old_style.flags & ~4)
+            || style.color != old_style.color
+            || style.opacity != old_style.opacity
+            || strcmp(style.font, old_style.font) != 0
+            )
         {
-            if (old_style.size >= 0)
+            if (old_style.size > 0)
             {
                 // not first one, output previous
                 if (raw)
@@ -3093,13 +3112,45 @@ mupdf::FzRect JM_make_spanlist(
                 desc = -0.1f;
             }
 
+            DICT_SETITEMSTR_DROP(span, "bidi", Py_BuildValue("i", style.bidi));
             DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size));
-            DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("i", style.flags));
+            DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("i", style.font_flags));
             DICT_SETITEM_DROP(span, dictkey_font, JM_EscapeStrFromStr(style.font));
-            DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("i", style.color));
+            DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("k", style.color));
             DICT_SETITEMSTR_DROP(span, "ascender", Py_BuildValue("f", asc));
             DICT_SETITEMSTR_DROP(span, "descender", Py_BuildValue("f", desc));
 
+            // depending on MuPDF version, add supported keys
+            #if (THIS_MUPDF >= MUPDF1250)  // separate #if because not flags-dependent
+                DICT_SETITEMSTR_DROP(span, "opacity", Py_BuildValue("f", style.opacity));
+            #endif
+
+            // rest of keys only make sense if FZ_STEXT_COLLECT_FLAGS (32768) was set
+            #if (THIS_MUPDF >= MUPDF1250)
+                if (dev_flags & 32768)
+                {
+                    DICT_SETITEMSTR_DROP(span, "underline", JM_BOOL(style.flags & FZ_STEXT_UNDERLINE));
+                    DICT_SETITEMSTR_DROP(span, "strikeout", JM_BOOL(style.flags & FZ_STEXT_STRIKEOUT));
+                }
+                else
+                {
+                    DICT_SETITEMSTR_DROP(span, "underline", Py_BuildValue("s", NULL));
+                    DICT_SETITEMSTR_DROP(span, "strikeout", Py_BuildValue("s", NULL));
+                }
+            #endif
+            #if (THIS_MUPDF > MUPDF1251)
+                if (dev_flags & 32768) // FZ_STEXT_COLLECT_FLAGS = 32768
+                {
+                    DICT_SETITEMSTR_DROP(span, "bold", JM_BOOL(style.flags & 8)); // FZ_STEXT_BOLD = 8
+                }
+                else
+                {
+                    DICT_SETITEMSTR_DROP(span, "bold", Py_BuildValue("s", NULL));
+                }
+                DICT_SETITEMSTR_DROP(span, "filled", JM_BOOL(style.flags & FZ_STEXT_FILLED));
+                DICT_SETITEMSTR_DROP(span, "stroked", JM_BOOL(style.flags & FZ_STEXT_STROKED));
+                DICT_SETITEMSTR_DROP(span, "clipped", JM_BOOL(style.flags & FZ_STEXT_CLIPPED));
+            #endif
             old_style = style;
             span_rect = r;
             span_origin = origin;
@@ -3114,7 +3165,9 @@ mupdf::FzRect JM_make_spanlist(
             DICT_SETITEM_DROP(char_dict, dictkey_origin, JM_py_from_point(ch.m_internal->origin));
 
             DICT_SETITEM_DROP(char_dict, dictkey_bbox, JM_py_from_rect(r));
-
+            #if (THIS_MUPDF >= MUPDF1250)
+                DICT_SETITEMSTR_DROP(char_dict, "synthetic", JM_BOOL(ch.m_internal->flags & FZ_STEXT_SYNTHETIC));
+            #endif
             DICT_SETITEM_DROP(char_dict, dictkey_c, Py_BuildValue("C", ch.m_internal->c));
 
             if (!char_list)
@@ -3514,7 +3567,7 @@ void JM_make_image_block(fz_stext_block *block, PyObject *block_dict)
     return;
 }
 
-static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect)
+static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int raw, fz_buffer *buff, fz_rect tp_rect, int dev_flags)
 {
     fz_stext_line *line;
     PyObject *line_list = PyList_New(0), *line_dict;
@@ -3533,7 +3586,8 @@ static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int
                 line2,
                 raw,
                 buff2,
-                tp_rect2
+                tp_rect2,
+                dev_flags
                 );
         fz_rect& line_rect = *line_rect2.internal();
         block_rect = fz_union_rect(block_rect, line_rect);
@@ -3549,7 +3603,7 @@ static void JM_make_text_block(fz_stext_block *block, PyObject *block_dict, int
     return;
 }
 
-void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw)
+void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw, int dev_flags)
 {
     fz_context* ctx = mupdf::internal_context_get();
     fz_stext_block *block;
@@ -3576,7 +3630,7 @@ void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw)
             DICT_SETITEM_DROP(block_dict, dictkey_bbox, JM_py_from_rect(block->bbox));
             JM_make_image_block(block, block_dict);
         } else {
-            JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect);
+            JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect, dev_flags);
         }
 
         LIST_APPEND_DROP(block_list, block_dict);
@@ -4056,7 +4110,8 @@ mupdf::FzRect JM_make_spanlist(
         mupdf::FzStextLine& line,
         int raw,
         mupdf::FzBuffer& buff,
-        mupdf::FzRect& tp_rect
+        mupdf::FzRect& tp_rect,
+        int dev_flags
         );
 
 PyObject* extractWORDS(mupdf::FzStextPage& this_tpage, PyObject *delimiters);
@@ -4071,7 +4126,7 @@ fz_stext_page* page_get_textpage(
         PyObject* matrix
         );
 
-void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw);
+void JM_make_textpage_dict(fz_stext_page *tp, PyObject *page_dict, int raw, int dev_flags);
 PyObject *pixmap_pixel(fz_pixmap* pm, int x, int y);
 int pixmap_n(mupdf::FzPixmap& pixmap);
 
diff --git a/src/table.py b/src/table.py
index b9301e65a..871ded7b5 100644
--- a/src/table.py
+++ b/src/table.py
@@ -79,6 +79,7 @@
 from collections.abc import Sequence
 from dataclasses import dataclass
 from operator import itemgetter
+import weakref
 
 # -------------------------------------------------------------------
 # Start of PyMuPDF interface code
@@ -1367,33 +1368,57 @@ def char_in_bbox(char, bbox) -> bool:
 
         return table_arr
 
-    def to_markdown(self, clean=True):
+    def to_markdown(self, clean=False, fill_empty=True):
         """Output table content as a string in Github-markdown format.
 
-        If clean is true, markdown syntax is removed from cell content."""
+        If "clean" then markdown syntax is removed from cell content.
+        If "fill_empty" then cell content None is replaced by the values
+        above (columns) or left (rows) in an effort to approximate row and
+        columns spans.
+
+        """
         output = "|"
+        rows = self.row_count
+        cols = self.col_count
+        cells = self.extract()[:]  # make local copy of table text content
+
+        if fill_empty:  # fill "None" cells where possible
 
-        # generate header string and MD underline
+            # for rows, copy content from left to right
+            for j in range(rows):
+                for i in range(cols - 1):
+                    if cells[j][i + 1] is None:
+                        cells[j][i + 1] = cells[j][i]
+
+            # for columns, copy top to bottom
+            for i in range(cols):
+                for j in range(rows - 1):
+                    if cells[j + 1][i] is None:
+                        cells[j + 1][i] = cells[j][i]
+
+        # generate header string and MD separator
         for i, name in enumerate(self.header.names):
-            if name is None or name == "":  # generate a name if empty
+            if not name:  # generate a name if empty
                 name = f"Col{i+1}"
-            name = name.replace("\n", " ")  # remove any line breaks
+            name = name.replace("\n", "<br>")  # use HTML line breaks
             if clean:  # remove sensitive syntax
                 name = html.escape(name.replace("-", "&#45;"))
             output += name + "|"
 
         output += "\n"
+        # insert GitHub header line separator
         output += "|" + "|".join("---" for i in range(self.col_count)) + "|\n"
 
         # skip first row in details if header is part of the table
         j = 0 if self.header.external else 1
 
         # iterate over detail rows
-        for row in self.extract()[j:]:
+        for row in cells[j:]:
             line = "|"
             for i, cell in enumerate(row):
-                # output None cells with empty string
-                cell = "" if cell is None else cell.replace("\n", " ")
+                # replace None cells with empty string
+                # use HTML line break tag
+                cell = "" if not cell else cell.replace("\n", "<br>")
                 if clean:  # remove sensitive syntax
                     cell = html.escape(cell.replace("-", "&#45;"))
                 line += cell + "|"
@@ -1462,22 +1487,19 @@ def _get_header(self, y_tolerance=3):
         page = self.page
         y_delta = y_tolerance
 
-        def top_row_is_bold(bbox):
-            """Check if row 0 has bold text anywhere.
-
-            If this is true, then any non-bold text in lines above disqualify
-            these lines as header.
+        def row_has_bold(bbox):
+            """Check if a row contains some bold text.
 
-            bbox is the (potentially repaired) row 0 bbox.
+            If e.g. true for the top row, then it will be used as (internal)
+            column header row if any of the following is true:
+            * the previous (above) text line has no bold span
+            * the second table row text has no bold span
 
-            Returns True or False
+            Returns True if any spans are bold else False.
             """
-            for b in page.get_text("dict", flags=TEXTFLAGS_TEXT, clip=bbox)["blocks"]:
-                for l in b["lines"]:
-                    for s in l["spans"]:
-                        if s["flags"] & 16:
-                            return True
-            return False
+            blocks = page.get_text("dict", flags=TEXTFLAGS_TEXT, clip=bbox)["blocks"]
+            spans = [s for b in blocks for l in b["lines"] for s in l["spans"]]
+            return any([bool(s["flags"] & 16) for s in spans])
 
         try:
             row = self.rows[0]
@@ -1489,44 +1511,54 @@ def top_row_is_bold(bbox):
         # return this if we determine that the top row is the header
         header_top_row = TableHeader(bbox, cells, self.extract()[0], False)
 
-        # one-line tables have no extra header
+        # 1-line tables have no extra header
         if len(self.rows) < 2:
             return header_top_row
 
-        # x-ccordinates of columns between x0 and x1 of the table
+        # 1-column tables have no extra header
         if len(cells) < 2:
             return header_top_row
 
-        col_x = [
-            c[2] if c is not None else None for c in cells[:-1]
-        ]  # column (x) coordinates
+        # assume top row is the header if second row is empty
+        row2 = self.rows[1]  # second row
+        if all([c is None for c in row2.cells]):
+            return header_top_row
 
         # Special check: is top row bold?
-        # If first line above table is not bold, but top-left table cell is bold,
-        # we take first table row as header
-        top_row_bold = top_row_is_bold(bbox)
+        top_row_bold = row_has_bold(bbox)
+
+        # assume top row is header if it is bold and any cell
+        # of 2nd row is non-bold
+        if top_row_bold and not row_has_bold(row2.bbox):
+            return header_top_row
 
-        # clip = area above table
+        # column coordinates (x1 values) in top row
+        col_x = [c[2] if c is not None else None for c in cells[:-1]]
+
+        # clip = page area above the table
         # We will inspect this area for text qualifying as column header.
         clip = +bbox  # take row 0 bbox
         clip.y0 = 0  # start at top of page
         clip.y1 = bbox.y0  # end at top of table
 
-        spans = []  # the text spans inside clip
-        for b in page.get_text("dict", clip=clip, flags=TEXTFLAGS_TEXT)["blocks"]:
-            for l in b["lines"]:
-                for s in l["spans"]:
-                    if (
-                        not s["flags"] & 1 and s["text"].strip()
-                    ):  # ignore superscripts and empty text
-                        spans.append(s)
+        blocks = page.get_text("dict", clip=clip, flags=TEXTFLAGS_TEXT)["blocks"]
+        # non-empty, non-superscript spans above table, sorted descending by y1
+        spans = sorted(
+            [
+                s
+                for b in blocks
+                for l in b["lines"]
+                for s in l["spans"]
+                if not (white_spaces.issuperset(s["text"]) or s["flags"] & 1)
+            ],
+            key=lambda s: s["bbox"][3],
+            reverse=True,
+        )
 
         select = []  # y1 coordinates above, sorted descending
         line_heights = []  # line heights above, sorted descending
         line_bolds = []  # bold indicator per line above, same sorting
 
-        # spans sorted descending
-        spans.sort(key=lambda s: s["bbox"][3], reverse=True)
         # walk through the spans and fill above 3 lists
         for i in range(len(spans)):
             s = spans[i]
@@ -1541,7 +1573,7 @@ def top_row_is_bold(bbox):
                 line_bolds.append(bold)
                 continue
 
-            # get last items from the 3 lists
+            # get previous items from the 3 lists
             y0 = select[-1]
             h0 = line_heights[-1]
             bold0 = line_bolds[-1]
@@ -1565,13 +1597,13 @@ def top_row_is_bold(bbox):
         if select == []:  # nothing above the table?
             return header_top_row
 
-        select = select[:5]  # only accept up to 5 lines in any header
+        select = select[:5]  # accept up to 5 lines for an external header
 
-        # take top row as header if text above table is too far apart
+        # assume top row as header if text above is too far away
         if bbox.y0 - select[0] >= line_heights[0]:
             return header_top_row
 
-        # if top table row is bold, but line above is not:
+        # accept top row as header if bold, but line above is not
         if top_row_bold and not line_bolds[0]:
             return header_top_row
 
@@ -1738,7 +1770,7 @@ class TableFinder:
     """
 
     def __init__(self, page, settings=None):
-        self.page = page
+        self.page = weakref.proxy(page)
         self.settings = TableSettings.resolve(settings)
         self.edges = self.get_edges()
         self.intersections = edges_to_intersections(
@@ -1942,7 +1974,7 @@ def make_chars(page, clip=None):
 # We are ignoring Bézier curves completely and are converting everything
 # else to lines.
 # ------------------------------------------------------------------------
-def make_edges(page, clip=None, tset=None, add_lines=None):
+def make_edges(page, clip=None, tset=None, paths=None, add_lines=None, add_boxes=None):
     global EDGES
     snap_x = tset.snap_x_tolerance
     snap_y = tset.snap_y_tolerance
@@ -1995,16 +2027,20 @@ def are_neighbors(r1, r2):
             return True
         return False
 
-    def clean_graphics():
+    def clean_graphics(npaths=None):
         """Detect and join rectangles of "connected" vector graphics."""
-
-        paths = []  # paths relevant for table detection
-        for p in page.get_drawings():
-            # ignore fill-only graphics if they do not simulate lines,
-            # which means one of width or height are small.
+        if npaths is None:
+            allpaths = page.get_drawings()
+        else:
+            allpaths = npaths[:]  # paths relevant for table detection
+        paths = []
+        for p in allpaths:
+            # If only looking at lines, we ignore fill-only path
+            # except when simulating lines, i.e. width or height
+            # are small.
             if (
-                p["type"] == "f"
-                and lines_strict
+                lines_strict
+                and p["type"] == "f"
                 and p["rect"].width > snap_x
                 and p["rect"].height > snap_y
             ):
@@ -2039,7 +2075,7 @@ def clean_graphics():
 
         return new_rects, paths
 
-    bboxes, paths = clean_graphics()
+    bboxes, paths = clean_graphics(npaths=paths)
 
     def is_parallel(p1, p2):
         """Check if line is roughly axis-parallel."""
@@ -2210,6 +2246,25 @@ def make_line(p, p1, p2, clip):
         if line_dict:
             EDGES.append(line_to_edge(line_dict))
 
+    if add_boxes is not None:  # add user-specified rectangles
+        assert isinstance(add_boxes, (tuple, list))
+    else:
+        add_boxes = []
+    for box in add_boxes:
+        r = Rect(box)
+        line_dict = make_line(path, r.tl, r.bl, clip)
+        if line_dict:
+            EDGES.append(line_to_edge(line_dict))
+        line_dict = make_line(path, r.bl, r.br, clip)
+        if line_dict:
+            EDGES.append(line_to_edge(line_dict))
+        line_dict = make_line(path, r.br, r.tr, clip)
+        if line_dict:
+            EDGES.append(line_to_edge(line_dict))
+        line_dict = make_line(path, r.tr, r.tl, clip)
+        if line_dict:
+            EDGES.append(line_to_edge(line_dict))
+
 
 def page_rotation_set0(page):
     """Nullify page rotation.
@@ -2291,7 +2346,9 @@ def find_tables(
     text_x_tolerance=3,
     text_y_tolerance=3,
     strategy=None,  # offer abbreviation
-    add_lines=None,  # optional user-specified lines
+    add_lines=None,  # user-specified lines
+    add_boxes=None,  # user-specified rectangles
+    paths=None,  # accept vector graphics as parameter
 ):
     global CHARS, EDGES
     CHARS = []
@@ -2345,7 +2402,12 @@ def find_tables(
 
     make_chars(page, clip=clip)  # create character list of page
     make_edges(
-        page, clip=clip, tset=tset, add_lines=add_lines
+        page,
+        clip=clip,
+        tset=tset,
+        paths=paths,
+        add_lines=add_lines,
+        add_boxes=add_boxes,
     )  # create lines and curves
     tables = TableFinder(page, settings=tset)
 
diff --git a/tests/resources/test_4141.pdf b/tests/resources/test_4141.pdf
new file mode 100644
index 000000000..a8fc73e23
Binary files /dev/null and b/tests/resources/test_4141.pdf differ
diff --git a/tests/test_4141.py b/tests/test_4141.py
new file mode 100644
index 000000000..bd13988f6
--- /dev/null
+++ b/tests/test_4141.py
@@ -0,0 +1,19 @@
+import pymupdf
+
+import os.path
+
+
+def test_4141():
+    """survive missing /Resources object in a number of cases."""
+    path = os.path.abspath(f"{__file__}/../../tests/resources/test_4141.pdf")
+    doc = pymupdf.open(path)
+    page = doc[0]
+    # make sure the right test file
+    assert doc.xref_get_key(page.xref, "Resources") == ("null", "null")
+    page.insert_htmlbox((100, 100, 200, 200), "Hallo")  # will fail without the fix
+    doc.close()
+    doc = pymupdf.open(doc.name)
+    page = doc[0]
+    tw = pymupdf.TextWriter(page.rect)
+    tw.append((100, 100), "Hallo")
+    tw.write_text(page)  # will fail without the fix
diff --git a/tests/test_tables.py b/tests/test_tables.py
index ca8aef74c..29ef6a9d2 100644
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -288,10 +288,10 @@ def test_markdown():
     text = (
         "|Header1|Header2|Header3|\n"
         "|---|---|---|\n"
-        "|Col11 Col12|Col21 Col22|Col31 Col32 Col33|\n"
-        "|Col13|Col23|Col34 Col35|\n"
+        "|Col11<br>Col12|Col21<br>Col22|Col31<br>Col32<br>Col33|\n"
+        "|Col13|Col23|Col34<br>Col35|\n"
         "|Col14|Col24|Col36|\n"
-        "|Col15|Col25 Col26||\n\n"
+        "|Col15|Col25<br>Col26||\n\n"
     )
     assert tab.to_markdown() == text