diff --git a/README.md b/README.md
index 39a1a32d..28d1b1c9 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ There's a hosted API for marker available [here](https://www.datalab.to/):
 
 - Supports PDFs, word documents, and powerpoints 
 - 1/4th the price of leading cloud-based competitors
-- High uptime (99.99%), quality, and speed (.25s/page for 50 page doc)
+- High uptime (99.99%), quality, and speed (around 15 seconds to convert a 250 page PDF)
 
 # Community
 
diff --git a/benchmarks/overall.py b/benchmarks/overall.py
index 6564b256..f6fb9591 100644
--- a/benchmarks/overall.py
+++ b/benchmarks/overall.py
@@ -16,6 +16,8 @@
 import subprocess
 import shutil
 from tabulate import tabulate
+
+from marker.settings import settings
 from scoring import score_text
 
 configure_logging()
@@ -53,7 +55,7 @@ def main(in_folder: str, reference_folder: str, out_file: str, nougat: bool, md_
         md_filename = fname.rsplit(".", 1)[0] + ".md"
 
         reference_filename = os.path.join(reference_folder, md_filename)
-        with open(reference_filename, "r", encoding="utf-8") as f:
+        with open(reference_filename, "r") as f:
             reference = f.read()
 
         pdf_filename = os.path.join(in_folder, fname)
diff --git a/data/images/overall.png b/data/images/overall.png
index 0b7f5318..0946421a 100644
Binary files a/data/images/overall.png and b/data/images/overall.png differ
diff --git a/data/images/per_doc.png b/data/images/per_doc.png
index 6c864a57..ed26cfb9 100644
Binary files a/data/images/per_doc.png and b/data/images/per_doc.png differ
diff --git a/marker/builders/document.py b/marker/builders/document.py
index d9729beb..60c42749 100644
--- a/marker/builders/document.py
+++ b/marker/builders/document.py
@@ -33,13 +33,15 @@ def __call__(self, provider: PdfProvider, layout_builder: LayoutBuilder, ocr_bui
 
     def build_document(self, provider: PdfProvider):
         PageGroupClass: PageGroup = get_block_class(BlockTypes.Page)
+        lowres_images = provider.get_images(provider.page_range, self.lowres_image_dpi)
+        highres_images = provider.get_images(provider.page_range, self.highres_image_dpi)
         initial_pages = [
             PageGroupClass(
-                page_id=i,
-                lowres_image=provider.get_image(i, self.lowres_image_dpi),
-                highres_image=provider.get_image(i, self.highres_image_dpi),
-                polygon=provider.get_page_bbox(i)
-            ) for i in provider.page_range
+                page_id=p,
+                lowres_image=lowres_images[i],
+                highres_image=highres_images[i],
+                polygon=provider.get_page_bbox(p)
+            ) for i, p in enumerate(provider.page_range)
         ]
         DocumentClass: Document = get_block_class(BlockTypes.Document)
         return DocumentClass(filepath=provider.filepath, pages=initial_pages)
diff --git a/marker/builders/layout.py b/marker/builders/layout.py
index afacf5a4..8a17bdda 100644
--- a/marker/builders/layout.py
+++ b/marker/builders/layout.py
@@ -82,6 +82,10 @@ def add_blocks_to_pages(self, pages: List[PageGroup], layout_results: List[Layou
                 layout_block.polygon = layout_block.polygon.rescale(layout_page_size, provider_page_size)
                 page.add_structure(layout_block)
 
+            # Ensure page has non-empty structure
+            if page.structure is None:
+                page.structure = []
+
     def merge_blocks(self, document_pages: List[PageGroup], provider_page_lines: ProviderPageLines):
         good_pages = []
         for document_page in document_pages:
diff --git a/marker/builders/ocr.py b/marker/builders/ocr.py
index 93c5ca19..5a9fb537 100644
--- a/marker/builders/ocr.py
+++ b/marker/builders/ocr.py
@@ -1,10 +1,10 @@
 from typing import List
 
+from ftfy import fix_text
 from surya.model.detection.model import EfficientViTForSemanticSegmentation
 from surya.model.recognition.encoderdecoder import OCREncoderDecoderModel
 from surya.ocr import run_ocr
 
-from marker.settings import settings
 from marker.builders import BaseBuilder
 from marker.providers import ProviderOutput, ProviderPageLines
 from marker.providers.pdf import PdfProvider
@@ -14,6 +14,7 @@
 from marker.schema.registry import get_block_class
 from marker.schema.text.line import Line
 from marker.schema.text.span import Span
+from marker.settings import settings
 
 
 class OcrBuilder(BaseBuilder):
@@ -96,13 +97,13 @@ def ocr_extraction(self, document: Document, provider: PdfProvider) -> ProviderP
                 )
                 spans = [
                     SpanClass(
-                        text=ocr_line.text + "\n",
+                        text=fix_text(ocr_line.text) + "\n",
                         formats=['plain'],
                         page_id=page_id,
                         polygon=polygon,
                         minimum_position=0,
                         maximum_position=0,
-                        font='',
+                        font='Unknown',
                         font_weight=0,
                         font_size=0,
                     )
diff --git a/marker/config/parser.py b/marker/config/parser.py
index 11601ac7..7a1ee9f5 100644
--- a/marker/config/parser.py
+++ b/marker/config/parser.py
@@ -32,6 +32,8 @@ def common_options(fn):
                           help="Path to JSON file with additional configuration.")(fn)
         fn = click.option("--languages", type=str, default=None, help="Comma separated list of languages to use for OCR.")(fn)
         fn = click.option("--disable_multiprocessing", is_flag=True, default=False, help="Disable multiprocessing.")(fn)
+        fn = click.option("--paginate_output", is_flag=True, default=False, help="Paginate output.")(fn)
+        fn = click.option("--disable_image_extraction", is_flag=True, default=False, help="Disable image extraction.")(fn)
         return fn
 
     def generate_config_dict(self) -> Dict[str, any]:
@@ -61,6 +63,12 @@ def generate_config_dict(self) -> Dict[str, any]:
                 case "disable_multiprocessing":
                     if v:
                         config["pdftext_workers"] = 1
+                case "paginate_output":
+                    if v:
+                        config["paginate_output"] = True
+                case "disable_image_extraction":
+                    if v:
+                        config["extract_images"] = False
         return config
 
     def get_renderer(self):
diff --git a/marker/converters/pdf.py b/marker/converters/pdf.py
index 978a8651..b77eb1e8 100644
--- a/marker/converters/pdf.py
+++ b/marker/converters/pdf.py
@@ -10,6 +10,7 @@
 from marker.builders.ocr import OcrBuilder
 from marker.builders.structure import StructureBuilder
 from marker.converters import BaseConverter
+from marker.processors.blockquote import BlockquoteProcessor
 from marker.processors.code import CodeProcessor
 from marker.processors.debug import DebugProcessor
 from marker.processors.document_toc import DocumentTOCProcessor
@@ -17,6 +18,7 @@
 from marker.processors.footnote import FootnoteProcessor
 from marker.processors.ignoretext import IgnoreTextProcessor
 from marker.processors.line_numbers import LineNumbersProcessor
+from marker.processors.list import ListProcessor
 from marker.processors.page_header import PageHeaderProcessor
 from marker.processors.sectionheader import SectionHeaderProcessor
 from marker.processors.table import TableProcessor
@@ -52,16 +54,18 @@ def __init__(self, artifact_dict: Dict[str, Any], processor_list: List[str] | No
             processor_list = strings_to_classes(processor_list)
         else:
             processor_list = [
-                FootnoteProcessor,
-                PageHeaderProcessor,
-                EquationProcessor,
-                TableProcessor,
-                SectionHeaderProcessor,
-                TextProcessor,
+                BlockquoteProcessor,
                 CodeProcessor,
                 DocumentTOCProcessor,
+                EquationProcessor,
+                FootnoteProcessor,
                 IgnoreTextProcessor,
                 LineNumbersProcessor,
+                ListProcessor,
+                PageHeaderProcessor,
+                SectionHeaderProcessor,
+                TableProcessor,
+                TextProcessor,
                 DebugProcessor,
             ]
 
diff --git a/marker/output.py b/marker/output.py
index e47c861f..ce209afa 100644
--- a/marker/output.py
+++ b/marker/output.py
@@ -6,6 +6,7 @@
 from marker.renderers.html import HTMLOutput
 from marker.renderers.json import JSONOutput
 from marker.renderers.markdown import MarkdownOutput
+from marker.settings import settings
 
 
 def output_exists(output_dir: str, fname_base: str):
@@ -29,11 +30,12 @@ def text_from_rendered(rendered: BaseModel):
 
 def save_output(rendered: BaseModel, output_dir: str, fname_base: str):
     text, ext, images = text_from_rendered(rendered)
+    text = text.encode(settings.OUTPUT_ENCODING, errors='replace').decode(settings.OUTPUT_ENCODING)
 
-    with open(os.path.join(output_dir, f"{fname_base}.{ext}"), "w+") as f:
+    with open(os.path.join(output_dir, f"{fname_base}.{ext}"), "w+", encoding=settings.OUTPUT_ENCODING) as f:
         f.write(text)
-    with open(os.path.join(output_dir, f"{fname_base}_meta.json"), "w+") as f:
+    with open(os.path.join(output_dir, f"{fname_base}_meta.json"), "w+", encoding=settings.OUTPUT_ENCODING) as f:
         f.write(json.dumps(rendered.metadata, indent=2))
 
     for img_name, img in images.items():
-        img.save(os.path.join(output_dir, img_name), "PNG")
+        img.save(os.path.join(output_dir, img_name), "PNG", optimize=False, compress_level=3)
diff --git a/marker/processors/blockquote.py b/marker/processors/blockquote.py
new file mode 100644
index 00000000..cc71e3ab
--- /dev/null
+++ b/marker/processors/blockquote.py
@@ -0,0 +1,49 @@
+from marker.processors import BaseProcessor
+from marker.schema import BlockTypes
+from marker.schema.document import Document
+
+
+class BlockquoteProcessor(BaseProcessor):
+    """
+    A processor for tagging blockquotes
+    """
+    block_types = (BlockTypes.Text, BlockTypes.TextInlineMath)
+    min_x_indent = 0.05  # % of block width
+    x_start_tolerance = 0.01  # % of block width
+    x_end_tolerance = 0.01  # % of block width
+
+    def __init__(self, config):
+        super().__init__(config)
+
+    def __call__(self, document: Document):
+        for page in document.pages:
+            for block in page.contained_blocks(document, self.block_types):
+                if block.structure is None:
+                    continue
+
+                if not len(block.structure) >= 2:
+                    continue
+
+                next_block = page.get_next_block(block)
+                if next_block is None:
+                    continue
+                if next_block.block_type not in self.block_types:
+                    continue
+                if next_block.structure is None:
+                    continue
+                if next_block.ignore_for_output:
+                    continue
+
+                matching_x_end = abs(next_block.polygon.x_end - block.polygon.x_end) < self.x_end_tolerance * block.polygon.width
+                matching_x_start = abs(next_block.polygon.x_start - block.polygon.x_start) < self.x_start_tolerance * block.polygon.width
+                x_indent = next_block.polygon.x_start > block.polygon.x_start + (self.min_x_indent * block.polygon.width)
+                y_indent = next_block.polygon.y_start > block.polygon.y_end
+
+                if block.blockquote:
+                    next_block.blockquote = (matching_x_end and matching_x_start) or (x_indent and y_indent)
+                    next_block.blockquote_level = block.blockquote_level
+                    if (x_indent and y_indent):
+                        next_block.blockquote_level += 1
+                elif len(next_block.structure) >= 2 and (x_indent and y_indent):
+                    next_block.blockquote = True
+                    next_block.blockquote_level = 1
diff --git a/marker/processors/debug.py b/marker/processors/debug.py
index 05f85b58..3d46b046 100644
--- a/marker/processors/debug.py
+++ b/marker/processors/debug.py
@@ -69,7 +69,7 @@ def __call__(self, document: Document):
             print(f"Dumped block debug data to {self.debug_data_folder}")
 
     def draw_pdf_debug_images(self, document: Document):
-        for idx, page in enumerate(document.pages):
+        for page in document.pages:
             png_image = page.highres_image.copy()
 
             line_bboxes = []
@@ -87,12 +87,12 @@ def draw_pdf_debug_images(self, document: Document):
 
             png_image = self.render_layout_boxes(page, png_image)
 
-            debug_file = os.path.join(self.debug_folder, f"pdf_page_{idx}.png")
+            debug_file = os.path.join(self.debug_folder, f"pdf_page_{page.page_id}.png")
             png_image.save(debug_file)
 
 
     def draw_layout_debug_images(self, document: Document, pdf_mode=False):
-        for idx, page in enumerate(document.pages):
+        for page in document.pages:
             img_size = page.highres_image.size
             png_image = Image.new("RGB", img_size, color="white")
 
@@ -110,7 +110,7 @@ def draw_layout_debug_images(self, document: Document, pdf_mode=False):
 
             png_image = self.render_layout_boxes(page, png_image)
 
-            debug_file = os.path.join(self.debug_folder, f"layout_page_{idx}.png")
+            debug_file = os.path.join(self.debug_folder, f"layout_page_{page.page_id}.png")
             png_image.save(debug_file)
 
 
@@ -143,7 +143,7 @@ def render_layout_boxes(self, page, png_image):
     def dump_block_debug_data(self, document: Document):
         debug_file = os.path.join(self.debug_folder, f"blocks.json")
         debug_data = []
-        for idx, page in enumerate(document.pages):
+        for page in document.pages:
             page_data = page.model_dump(exclude=["lowres_image", "highres_image"])
             debug_data.append(page_data)
 
diff --git a/marker/processors/equation.py b/marker/processors/equation.py
index 5a990d8c..5da8436d 100644
--- a/marker/processors/equation.py
+++ b/marker/processors/equation.py
@@ -29,7 +29,7 @@ class EquationProcessor(BaseProcessor):
     """
     block_types = (BlockTypes.Equation, )
     model_max_length = 384
-    batch_size = None
+    texify_batch_size = None
     token_buffer = 256
 
     def __init__(self, texify_model: GenerateVisionEncoderDecoderModel, config=None):
@@ -68,8 +68,8 @@ def __call__(self, document: Document):
             block.latex = prediction
 
     def get_batch_size(self):
-        if self.batch_size is not None:
-            return self.batch_size
+        if self.texify_batch_size is not None:
+            return self.texify_batch_size
         elif settings.TORCH_DEVICE_MODEL == "cuda":
             return 6
         elif settings.TORCH_DEVICE_MODEL == "mps":
diff --git a/marker/processors/list.py b/marker/processors/list.py
new file mode 100644
index 00000000..ff394a4a
--- /dev/null
+++ b/marker/processors/list.py
@@ -0,0 +1,90 @@
+from typing import List
+
+from marker.processors import BaseProcessor
+from marker.schema import BlockTypes
+from marker.schema.blocks import ListItem
+from marker.schema.document import Document
+
+
+class ListProcessor(BaseProcessor):
+    """
+    A processor for merging lists across pages and columns
+    """
+    block_types = (BlockTypes.ListGroup,)
+    ignored_block_types = (BlockTypes.PageHeader, BlockTypes.PageFooter)
+    min_x_indent = 0.01  # % of page width
+
+    def __init__(self, config):
+        super().__init__(config)
+
+    def __call__(self, document: Document):
+        self.list_group_continuation(document)
+        self.list_group_indentation(document)
+
+    def list_group_continuation(self, document: Document):
+        for page in document.pages:
+            for block in page.contained_blocks(document, self.block_types):
+                next_block = document.get_next_block(block, self.ignored_block_types)
+                if next_block is None:
+                    continue
+                if next_block.block_type not in self.block_types:
+                    continue
+                if next_block.structure is None:
+                    continue
+                if next_block.ignore_for_output:
+                    continue
+
+                column_break, page_break = False, False
+                next_block_in_first_quadrant = False
+
+                if next_block.page_id == block.page_id:  # block on the same page
+                    # we check for a column break
+                    column_break = next_block.polygon.y_start <= block.polygon.y_end
+                else:
+                    page_break = True
+                    next_page = document.get_page(next_block.page_id)
+                    next_block_in_first_quadrant = (next_block.polygon.x_start < next_page.polygon.width // 2) and \
+                        (next_block.polygon.y_start < next_page.polygon.height // 2)
+
+                block.has_continuation = column_break or (page_break and next_block_in_first_quadrant)
+
+    def list_group_indentation(self, document: Document):
+        for page in document.pages:
+            for block in page.contained_blocks(document, self.block_types):
+                if block.structure is None:
+                    continue
+                if block.ignore_for_output:
+                    continue
+
+                stack: List[ListItem] = [block.get_next_block(page, None)]
+                for list_item_id in block.structure:
+                    list_item_block: ListItem = page.get_block(list_item_id)
+
+                    while stack and list_item_block.polygon.x_start <= stack[-1].polygon.x_start + (self.min_x_indent * page.polygon.width):
+                        stack.pop()
+
+                    if stack and list_item_block.polygon.y_start > stack[-1].polygon.y_start:
+                        list_item_block.list_indent_level = stack[-1].list_indent_level
+                        if list_item_block.polygon.x_start > stack[-1].polygon.x_start + (self.min_x_indent * page.polygon.width):
+                            list_item_block.list_indent_level += 1
+
+                    next_list_item_block = block.get_next_block(page, list_item_block)
+                    if next_list_item_block is not None and next_list_item_block.polygon.x_start > list_item_block.polygon.x_end:
+                        stack = [next_list_item_block]  # reset stack on column breaks
+                    else:
+                        stack.append(list_item_block)
+
+                stack: List[ListItem] = [block.get_next_block(page, None)]
+                for list_item_id in block.structure.copy():
+                    list_item_block: ListItem = page.get_block(list_item_id)
+
+                    while stack and list_item_block.list_indent_level <= stack[-1].list_indent_level:
+                        stack.pop()
+
+                    if stack:
+                        current_parent = stack[-1]
+                        current_parent.add_structure(list_item_block)
+                        current_parent.polygon = current_parent.polygon.merge([list_item_block.polygon])
+
+                        block.remove_structure_items([list_item_id])
+                    stack.append(list_item_block)
diff --git a/marker/processors/page_header.py b/marker/processors/page_header.py
index 0972084a..989f9d50 100644
--- a/marker/processors/page_header.py
+++ b/marker/processors/page_header.py
@@ -8,7 +8,7 @@ class PageHeaderProcessor(BaseProcessor):
     """
     A processor for moving PageHeaders to the top
     """
-    block_types = (BlockTypes.PageHeader, BlockTypes.PageFooter)
+    block_types = (BlockTypes.PageHeader)
 
     def __call__(self, document: Document):
         for page in document.pages:
diff --git a/marker/processors/table.py b/marker/processors/table.py
index 163a7465..853f1205 100644
--- a/marker/processors/table.py
+++ b/marker/processors/table.py
@@ -1,4 +1,5 @@
 
+from ftfy import fix_text
 from surya.input.pdflines import get_page_text_lines
 from surya.model.detection.model import EfficientViTForSemanticSegmentation
 from surya.model.recognition.encoderdecoder import OCREncoderDecoderModel
@@ -100,6 +101,8 @@ def __call__(self, document: Document):
         for table_d, table_res in zip(table_data, tables):
             block = document.get_block(table_d["block_id"])
             cells = assign_rows_columns(table_res, table_d["img_size"])
+            for cell in cells:
+                cell.text = fix_text(cell.text)
             block.cells = cells
 
     def get_detector_batch_size(self):
diff --git a/marker/processors/text.py b/marker/processors/text.py
index 5294659e..e13d699b 100644
--- a/marker/processors/text.py
+++ b/marker/processors/text.py
@@ -19,6 +19,7 @@ class TextProcessor(BaseProcessor):
             Default is 0.02.
     """
     block_types = (BlockTypes.Text, BlockTypes.TextInlineMath)
+    ignored_block_types = (BlockTypes.PageHeader, BlockTypes.PageFooter)
     column_gap_ratio = 0.02  # column gaps are atleast 2% of the current column width
 
     def __init__(self, config):
@@ -32,64 +33,42 @@ def __call__(self, document: Document):
 
                 if not len(block.structure) >= 2:  # Skip single lines
                     continue
-                
+
+                next_block = document.get_next_block(block, self.ignored_block_types)
+                if next_block is None: # we've reached the end of the document
+                    continue
+                if next_block.block_type not in self.block_types:
+                    continue # we found a non-text block
+                if next_block.structure is None:
+                    continue  # This is odd though, why do we have text blocks with no structure?
+                if next_block.ignore_for_output:
+                    continue # skip ignored blocks
+
                 column_gap = block.polygon.width * self.column_gap_ratio
 
                 column_break, page_break = False, False
-                next_block = page.get_next_block(block)
+                next_block_starts_indented = True
+                next_block_in_first_quadrant = False
+                last_line_is_full_width = False
+                last_line_is_hyphentated = False
+                new_block_lines = []
 
-                if  next_block is not None: # next block exists
+                if next_block.page_id == block.page_id: # block on the same page
                     # we check for a column break
                     column_break = (
-                        math.floor(next_block.polygon.y_start) <= math.floor(block.polygon.y_start) and
+                        math.floor(next_block.polygon.y_start) <= math.ceil(block.polygon.y_start) and
                         next_block.polygon.x_start > (block.polygon.x_end + column_gap)
                     )
-                else:  # It's a page break since we don't have a next block in the page
+                else:
                     page_break = True
+                    next_page = document.get_page(next_block.page_id)
+                    next_block_in_first_quadrant = (next_block.polygon.x_start < next_page.polygon.width // 2) and \
+                                        (next_block.polygon.y_start < next_page.polygon.height // 2)
 
                 if not (column_break or page_break):
                     continue
-
-                next_block_starts_indented = True
-                next_block_in_first_quadrant = False
-                last_line_is_full_width = False
-                last_line_is_hyphentated = False
-                new_block_lines = []
-
-                if column_break:
-                    if next_block.block_type not in self.block_types:
-                        continue
-                    if next_block.structure is None:  # This is odd though, why do we have text blocks with no structure?
-                        continue
-
-                    new_block_lines = next_block.structure_blocks(document)
-                else:  # page break
-                    next_page = document.get_next_page(page)
-                    if next_page is None:
-                        continue  # we're on the last page, so we don't worry about merging
-
-                    # Go through the next page only
-                    for next_page_block_id in next_page.structure:
-                        if next_page_block_id.block_type in [BlockTypes.PageHeader, BlockTypes.PageFooter]:
-                            continue  # skip headers and footers
-
-                        # we have our block
-                        next_page_block = next_page.get_block(next_page_block_id)
-                        if next_page_block.ignore_for_output:
-                            continue # skip ignored blocks
-
-                        if not (next_page_block.structure is not None and \
-                            next_page_block.block_type in self.block_types): 
-                            # we found a non-text block or an empty text block, so we can stop looking
-                            break
-
-                        new_block_lines = next_page_block.structure_blocks(document)
-
-                        next_block_in_first_quadrant = (next_page_block.polygon.x_start < next_page.polygon.width // 2) and \
-                            (next_page_block.polygon.y_start < next_page.polygon.height // 2)
-                        break
-                    else:
-                        continue  # we didn't break anywhere so we continue
+    
+                new_block_lines = next_block.structure_blocks(document)
 
                 # we check for next_block indentation
                 if len(new_block_lines):
diff --git a/marker/providers/__init__.py b/marker/providers/__init__.py
index 02007853..6b389065 100644
--- a/marker/providers/__init__.py
+++ b/marker/providers/__init__.py
@@ -1,5 +1,6 @@
 from typing import List, Optional, Dict
 
+from PIL import Image
 from pydantic import BaseModel
 
 from marker.schema.text import Span
@@ -11,6 +12,10 @@ class ProviderOutput(BaseModel):
     line: Line
     spans: List[Span]
 
+    @property
+    def raw_text(self):
+        return "".join(span.text for span in self.spans)
+
 ProviderPageLines = Dict[int, List[ProviderOutput]]
 
 class BaseProvider:
@@ -21,7 +26,7 @@ def __init__(self, filepath: str, config: Optional[BaseModel | dict] = None):
     def __len__(self):
         pass
 
-    def get_image(self, idx: int, dpi: int):
+    def get_images(self, idxs: List[int], dpi: int) -> List[Image.Image]:
         pass
 
     def get_page_bbox(self, idx: int) -> List[float]:
diff --git a/marker/providers/pdf.py b/marker/providers/pdf.py
index f5fbbe79..30b3f835 100644
--- a/marker/providers/pdf.py
+++ b/marker/providers/pdf.py
@@ -1,6 +1,10 @@
 import atexit
 import re
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures.process import ProcessPoolExecutor
+from itertools import repeat
 from typing import List, Set
+import multiprocessing as mp
 
 import pypdfium2 as pdfium
 from ftfy import fix_text
@@ -195,12 +199,17 @@ def detect_bad_ocr(self, text):
 
         return False
 
-    def get_image(self, idx: int, dpi: int) -> Image.Image:
-        page = self.doc[idx]
+    @staticmethod
+    def _render_image(pdf: pdfium.PdfDocument, idx: int, dpi: int) -> Image.Image:
+        page = pdf[idx]
         image = page.render(scale=dpi / 72, draw_annots=False).to_pil()
         image = image.convert("RGB")
         return image
 
+    def get_images(self, idxs: List[int], dpi: int) -> List[Image.Image]:
+        images = [self._render_image(self.doc, idx, dpi) for idx in idxs]
+        return images
+
     def get_page_bbox(self, idx: int) -> PolygonBox | None:
         bbox = self.page_bboxes.get(idx)
         if bbox:
diff --git a/marker/renderers/__init__.py b/marker/renderers/__init__.py
index d2358188..c8bf79da 100644
--- a/marker/renderers/__init__.py
+++ b/marker/renderers/__init__.py
@@ -8,13 +8,16 @@
 from pydantic import BaseModel
 
 from marker.schema import BlockTypes
-from marker.schema.blocks.base import BlockOutput, BlockId
+from marker.schema.blocks.base import BlockId, BlockOutput
+from marker.schema.document import Document
+from marker.settings import settings
 from marker.util import assign_config
 
 
 class BaseRenderer:
     remove_blocks: list = [BlockTypes.PageHeader, BlockTypes.PageFooter]
     image_blocks: list = [BlockTypes.Picture, BlockTypes.Figure]
+    extract_images: bool = True
 
     def __init__(self, config: Optional[BaseModel | dict] = None):
         assign_config(self, config)
@@ -24,7 +27,7 @@ def __call__(self, document):
         raise NotImplementedError
 
     @staticmethod
-    def extract_image(document, image_id, to_base64=False):
+    def extract_image(document: Document, image_id, to_base64=False):
         image_block = document.get_block(image_id)
         page = document.get_page(image_block.page_id)
         page_img = page.highres_image
@@ -33,7 +36,7 @@ def extract_image(document, image_id, to_base64=False):
         if to_base64:
             image_buffer = io.BytesIO()
             cropped.save(image_buffer, format='PNG')
-            cropped = base64.b64encode(image_buffer.getvalue()).decode('utf-8')
+            cropped = base64.b64encode(image_buffer.getvalue()).decode(settings.OUTPUT_ENCODING)
         return cropped
 
     @staticmethod
@@ -54,7 +57,7 @@ def replace_whitespace(match):
 
         return html
 
-    def generate_page_stats(self, document, document_output):
+    def generate_page_stats(self, document: Document, document_output):
         page_stats = []
         for page in document.pages:
             block_counts = Counter([str(block.block_type) for block in page.children]).most_common()
@@ -65,7 +68,7 @@ def generate_page_stats(self, document, document_output):
             })
         return page_stats
 
-    def generate_document_metadata(self, document, document_output):
+    def generate_document_metadata(self, document: Document, document_output):
         metadata =  {
             "table_of_contents": document.table_of_contents,
             "page_stats": self.generate_page_stats(document, document_output),
@@ -75,7 +78,7 @@ def generate_document_metadata(self, document, document_output):
 
         return metadata
 
-    def extract_block_html(self, document, block_output):
+    def extract_block_html(self, document: Document, block_output: BlockOutput):
         soup = BeautifulSoup(block_output.html, 'html.parser')
 
         content_refs = soup.find_all('content-ref')
@@ -91,14 +94,13 @@ def extract_block_html(self, document, block_output):
                     ref_block_id: BlockId = item.id
                     break
 
-            if ref_block_id.block_type in self.image_blocks:
+            if ref_block_id.block_type in self.image_blocks and self.extract_images:
                 images[ref_block_id] = self.extract_image(document, ref_block_id, to_base64=True)
             else:
                 images.update(sub_images)
                 ref.replace_with(BeautifulSoup(content, 'html.parser'))
 
-        if block_output.id.block_type in self.image_blocks:
+        if block_output.id.block_type in self.image_blocks and self.extract_images:
             images[block_output.id] = self.extract_image(document, block_output.id, to_base64=True)
 
         return str(soup), images
-
diff --git a/marker/renderers/html.py b/marker/renderers/html.py
index 9d7bfa18..29ca6be3 100644
--- a/marker/renderers/html.py
+++ b/marker/renderers/html.py
@@ -1,4 +1,5 @@
 import re
+from typing import Literal
 
 from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
 from pydantic import BaseModel
@@ -21,11 +22,12 @@ class HTMLOutput(BaseModel):
 class HTMLRenderer(BaseRenderer):
     page_blocks: list = [BlockTypes.Page]
     paginate_output: bool = False
+    image_extraction_mode: Literal["lowres", "highres"] = "highres"
 
     def extract_image(self, document, image_id):
         image_block = document.get_block(image_id)
         page = document.get_page(image_block.page_id)
-        page_img = page.highres_image
+        page_img = page.lowres_image if self.image_extraction_mode == "lowres" else page.highres_image
         image_box = image_block.polygon.rescale(page.polygon.size, page_img.size)
         cropped = page_img.crop(image_box.bbox)
         return cropped
@@ -49,10 +51,13 @@ def extract_html(self, document, document_output, level=0):
             if ref_block_id.block_type in self.remove_blocks:
                 ref.replace_with('')
             elif ref_block_id.block_type in self.image_blocks:
-                image = self.extract_image(document, ref_block_id)
-                image_name = f"{ref_block_id.to_path()}.png"
-                images[image_name] = image
-                ref.replace_with(BeautifulSoup(f"<p><img src='{image_name}'></p>", 'html.parser'))
+                if self.extract_images:
+                    image = self.extract_image(document, ref_block_id)
+                    image_name = f"{ref_block_id.to_path()}.png"
+                    images[image_name] = image
+                    ref.replace_with(BeautifulSoup(f"<p><img src='{image_name}'></p>", 'html.parser'))
+                else:
+                    ref.replace_with('')
             elif ref_block_id.block_type in self.page_blocks:
                 images.update(sub_images)
                 if self.paginate_output:
diff --git a/marker/renderers/json.py b/marker/renderers/json.py
index 0388717d..de2a16d4 100644
--- a/marker/renderers/json.py
+++ b/marker/renderers/json.py
@@ -1,12 +1,13 @@
 from __future__ import annotations
 
-from typing import List, Dict
+from typing import Dict, List
 
 from pydantic import BaseModel
 
-from marker.schema.blocks import Block
 from marker.renderers import BaseRenderer
 from marker.schema import BlockTypes
+from marker.schema.blocks import Block, BlockOutput
+from marker.schema.document import Document
 from marker.schema.registry import get_block_class
 
 
@@ -37,7 +38,7 @@ class JSONRenderer(BaseRenderer):
     image_blocks: list = [BlockTypes.Picture, BlockTypes.Figure]
     page_blocks: list = [BlockTypes.Page]
 
-    def extract_json(self, document, block_output):
+    def extract_json(self, document: Document, block_output: BlockOutput):
         cls = get_block_class(block_output.id.block_type)
         if cls.__base__ == Block:
             html, images = self.extract_block_html(document, block_output)
@@ -64,7 +65,7 @@ def extract_json(self, document, block_output):
                 section_hierarchy=reformat_section_hierarchy(block_output.section_hierarchy)
             )
 
-    def __call__(self, document) -> JSONOutput:
+    def __call__(self, document: Document) -> JSONOutput:
         document_output = document.render()
         json_output = []
         for page_output in document_output.children:
diff --git a/marker/renderers/markdown.py b/marker/renderers/markdown.py
index 0e0f2f80..0cadaf16 100644
--- a/marker/renderers/markdown.py
+++ b/marker/renderers/markdown.py
@@ -5,8 +5,10 @@
 from pydantic import BaseModel
 
 from marker.renderers.html import HTMLRenderer
+from marker.schema import BlockTypes
 from marker.schema.document import Document
 
+
 def cleanup_text(full_text):
     full_text = re.sub(r'\n{3,}', '\n\n', full_text)
     full_text = re.sub(r'(\n\s){3,}', '\n\n', full_text)
@@ -32,9 +34,13 @@ def convert_p(self, el, text, *args):
         hyphens = r'-—¬'
         has_continuation = el.has_attr('class') and 'has-continuation' in el['class']
         if has_continuation:
-            if regex.compile(rf'.*[\p{{Ll}}|\d][{hyphens}]\s?$', regex.DOTALL).match(text):  # handle hypenation across pages
-                return regex.split(rf"[{hyphens}]\s?$", text)[0]
-            return f"{text} "
+            block_type = BlockTypes[el['block-type']]
+            if block_type in [BlockTypes.TextInlineMath, BlockTypes.Text]:
+                if regex.compile(rf'.*[\p{{Ll}}|\d][{hyphens}]\s?$', regex.DOTALL).match(text):  # handle hypenation across pages
+                    return regex.split(rf"[{hyphens}]\s?$", text)[0]
+                return f"{text} "
+            if block_type == BlockTypes.ListGroup:
+                return f"{text}"
         return f"{text}\n\n" if text else ""  # default convert_p behavior
 
 
diff --git a/marker/schema/__init__.py b/marker/schema/__init__.py
index 7312c0f1..1957639c 100644
--- a/marker/schema/__init__.py
+++ b/marker/schema/__init__.py
@@ -1,7 +1,7 @@
-from enum import auto, IntEnum
+from enum import auto, StrEnum
 
 
-class BlockTypes(IntEnum):
+class BlockTypes(StrEnum):
     Line = auto()
     Span = auto()
     FigureGroup = auto()
diff --git a/marker/schema/blocks/base.py b/marker/schema/blocks/base.py
index c4a7a41b..fb295031 100644
--- a/marker/schema/blocks/base.py
+++ b/marker/schema/blocks/base.py
@@ -64,7 +64,7 @@ class Block(BaseModel):
     page_id: Optional[int] = None
     text_extraction_method: Optional[Literal['pdftext', 'surya']] = None
     structure: List[BlockId] | None = None  # The top-level page structure, which is the block ids in order
-    ignore_for_output: bool = False # Whether this block should be ignored in output
+    ignore_for_output: bool = False  # Whether this block should be ignored in output
     source: Literal['layout', 'heuristics', 'processor'] = 'layout'
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -87,6 +87,32 @@ def structure_blocks(self, document_page: Document | PageGroup) -> List[Block]:
             return []
         return [document_page.get_block(block_id) for block_id in self.structure]
 
+    def get_prev_block(self, document_page: Document | PageGroup, block: Block, ignored_block_types: Optional[List[BlockTypes]] = None):
+        if ignored_block_types is None:
+            ignored_block_types = []
+        
+        structure_idx = self.structure.index(block.id)
+        if structure_idx == 0:
+            return None
+        
+        for prev_block_id in reversed(self.structure[:structure_idx]):
+            if prev_block_id.block_type not in ignored_block_types:
+                return document_page.get_block(prev_block_id)
+
+    def get_next_block(self, document_page: Document | PageGroup, block: Optional[Block] = None, ignored_block_types: Optional[List[BlockTypes]] = None):
+        if ignored_block_types is None:
+            ignored_block_types = []
+
+        structure_idx = 0
+        if block is not None:
+            structure_idx = self.structure.index(block.id) + 1
+
+        for next_block_id in self.structure[structure_idx:]:
+            if next_block_id.block_type not in ignored_block_types:
+                return document_page.get_block(next_block_id)
+
+        return None  # No valid next block found
+
     def add_structure(self, block: Block):
         if self.structure is None:
             self.structure = [block.id]
@@ -170,7 +196,7 @@ def render(self, document: Document, parent_structure: Optional[List[str]], sect
             for block_id in self.structure:
                 block = document.get_block(block_id)
                 rendered = block.render(document, self.structure, section_hierarchy)
-                section_hierarchy = rendered.section_hierarchy  # Update the section hierarchy from the peer blocks
+                section_hierarchy = rendered.section_hierarchy.copy()  # Update the section hierarchy from the peer blocks
                 child_content.append(rendered)
 
         return BlockOutput(
diff --git a/marker/schema/blocks/inlinemath.py b/marker/schema/blocks/inlinemath.py
index 99f46759..1b446ae7 100644
--- a/marker/schema/blocks/inlinemath.py
+++ b/marker/schema/blocks/inlinemath.py
@@ -5,6 +5,8 @@
 class InlineMath(Block):
     block_type: BlockTypes = BlockTypes.TextInlineMath
     has_continuation: bool = False
+    blockquote: bool = False
+    blockquote_level: int = 0
 
     def assemble_html(self, child_blocks, parent_structure):
         if self.ignore_for_output:
@@ -13,7 +15,14 @@ def assemble_html(self, child_blocks, parent_structure):
         template = super().assemble_html(child_blocks, parent_structure)
         template = template.replace("\n", " ")
 
-        class_attr = ""
+        el_attr = f" block-type='{self.block_type}'"
         if self.has_continuation:
-            class_attr = " class='has-continuation'"
-        return f"<p{class_attr}>{template}</p>"
+            el_attr += " class='has-continuation'"
+
+        if self.blockquote:
+            # Add indentation for blockquote levels
+            blockquote_prefix = "<blockquote>" * self.blockquote_level
+            blockquote_suffix = "</blockquote>" * self.blockquote_level
+            return f"{blockquote_prefix}<p{el_attr}>{template}</p>{blockquote_suffix}"
+        else:
+            return f"<p{el_attr}>{template}</p>"
diff --git a/marker/schema/blocks/listitem.py b/marker/schema/blocks/listitem.py
index 51ab0839..fef515a4 100644
--- a/marker/schema/blocks/listitem.py
+++ b/marker/schema/blocks/listitem.py
@@ -12,16 +12,21 @@ def replace_bullets(child_blocks):
         child_blocks = first_block.children
 
     if first_block is not None and first_block.id.block_type == BlockTypes.Line:
-        bullet_pattern = r"(^|[\n ]|<[^>]*>)[•●○■▪▫–—-]( )"
+        bullet_pattern = r"(^|[\n ]|<[^>]*>)[•●○ഠ ം◦■▪▫–—-]( )"
         first_block.html = re.sub(bullet_pattern, r"\1\2", first_block.html)
 
 
 class ListItem(Block):
     block_type: BlockTypes = BlockTypes.ListItem
+    list_indent_level: int = 0
 
     def assemble_html(self, child_blocks, parent_structure):
         template = super().assemble_html(child_blocks, parent_structure)
         template = template.replace("\n", " ")
         # Remove the first bullet character
         replace_bullets(child_blocks)
-        return f"<li>{template}</li>"
+
+        el_attr = f" block-type='{self.block_type}'"
+        if self.list_indent_level:
+            return f"<ul><li{el_attr} class='list-indent-{self.list_indent_level}'>{template}</li></ul>"
+        return f"<li{el_attr}>{template}</li>"
diff --git a/marker/schema/blocks/pageheader.py b/marker/schema/blocks/pageheader.py
index 4414b1b0..d304490e 100644
--- a/marker/schema/blocks/pageheader.py
+++ b/marker/schema/blocks/pageheader.py
@@ -3,7 +3,7 @@
 
 
 class PageHeader(Block):
-    block_type: str = BlockTypes.PageHeader
+    block_type: BlockTypes = BlockTypes.PageHeader
 
     def assemble_html(self, child_blocks, parent_structure):
         if self.ignore_for_output:
diff --git a/marker/schema/blocks/text.py b/marker/schema/blocks/text.py
index 89fba932..4c2dea86 100644
--- a/marker/schema/blocks/text.py
+++ b/marker/schema/blocks/text.py
@@ -5,6 +5,8 @@
 class Text(Block):
     block_type: BlockTypes = BlockTypes.Text
     has_continuation: bool = False
+    blockquote: bool = False
+    blockquote_level: int = 0
 
     def assemble_html(self, child_blocks, parent_structure):
         if self.ignore_for_output:
@@ -13,7 +15,13 @@ def assemble_html(self, child_blocks, parent_structure):
         template = super().assemble_html(child_blocks, parent_structure)
         template = template.replace("\n", " ")
 
-        class_attr = ""
+        el_attr = f" block-type='{self.block_type}'"
         if self.has_continuation:
-            class_attr += " class='has-continuation'"
-        return f"<p{class_attr}>{template}</p>"
+            el_attr += " class='has-continuation'"
+
+        if self.blockquote:
+            blockquote_prefix = "<blockquote>" * self.blockquote_level
+            blockquote_suffix = "</blockquote>" * self.blockquote_level
+            return f"{blockquote_prefix}<p{el_attr}>{template}</p>{blockquote_suffix}"
+        else:
+            return f"<p{el_attr}>{template}</p>"
diff --git a/marker/schema/document.py b/marker/schema/document.py
index 19ca34a4..d7ca4c73 100644
--- a/marker/schema/document.py
+++ b/marker/schema/document.py
@@ -42,15 +42,23 @@ def get_page(self, page_id):
                 return page
         return None
 
-    def get_next_block(self, block: Block):
+    def get_next_block(self, block: Block, ignored_block_types: List[BlockTypes] = None):
+        if ignored_block_types is None:
+            ignored_block_types = []
+        next_block = None
+
+        # Try to find the next block in the current page
         page = self.get_page(block.page_id)
-        next_block = page.get_next_block(block)
+        next_block = page.get_next_block(block, ignored_block_types)
         if next_block:
             return next_block
-        next_page = self.get_next_page(page)
-        if not next_page:
-            return None
-        return next_page.get_block(next_page.structure[0])
+
+        # If no block found, search subsequent pages
+        for page in self.pages[self.pages.index(page) + 1:]:
+            next_block = page.get_next_block(None, ignored_block_types)
+            if next_block:
+                return next_block
+        return None
 
     def get_next_page(self, page: PageGroup):
         page_idx = self.pages.index(page)
@@ -85,7 +93,7 @@ def render(self):
         section_hierarchy = None
         for page in self.pages:
             rendered = page.render(self, None, section_hierarchy)
-            section_hierarchy = rendered.section_hierarchy
+            section_hierarchy = rendered.section_hierarchy.copy()
             child_content.append(rendered)
 
         return DocumentOutput(
diff --git a/marker/schema/groups/list.py b/marker/schema/groups/list.py
index 0149211f..8e8ee3ab 100644
--- a/marker/schema/groups/list.py
+++ b/marker/schema/groups/list.py
@@ -4,7 +4,12 @@
 
 class ListGroup(Group):
     block_type: BlockTypes = BlockTypes.ListGroup
+    has_continuation: bool = False
 
     def assemble_html(self, child_blocks, parent_structure):
         template = super().assemble_html(child_blocks, parent_structure)
-        return f"<p><ul>{template}</ul></p>"
+
+        el_attr = f" block-type='{self.block_type}'"
+        if self.has_continuation:
+            el_attr += " class='has-continuation'"
+        return f"<p{el_attr}><ul>{template}</ul></p>"
diff --git a/marker/schema/groups/page.py b/marker/schema/groups/page.py
index 1f216612..c00af1f5 100644
--- a/marker/schema/groups/page.py
+++ b/marker/schema/groups/page.py
@@ -1,7 +1,6 @@
 from collections import defaultdict
-from typing import Dict, List, TYPE_CHECKING, Sequence, Tuple
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
-import numpy as np
 from PIL import Image
 
 from marker.providers import ProviderOutput
@@ -18,7 +17,7 @@ class PageGroup(Group):
     block_type: BlockTypes = BlockTypes.Page
     lowres_image: Image.Image | None = None
     highres_image: Image.Image | None = None
-    children: List[Block] | None = None
+    children: List[Union[Any, Block]] | None = None
     layout_sliced: bool = False # Whether the layout model had to slice the image (order may be wrong)
     excluded_block_types: Sequence[BlockTypes] = (BlockTypes.Line, BlockTypes.Span,)
     maximum_assignment_distance: float = 20 # pixels
@@ -35,11 +34,20 @@ def add_child(self, block: Block):
         else:
             self.children.append(block)
 
-    def get_next_block(self, block: Block):
-        block_idx = self.structure.index(block.id)
-        if block_idx + 1 < len(self.structure):
-            return self.get_block(self.structure[block_idx + 1])
-        return None
+    def get_next_block(self, block: Optional[Block] = None, ignored_block_types: Optional[List[BlockTypes]] = None):
+        if ignored_block_types is None:
+            ignored_block_types = []
+        
+        structure_idx = 0
+        if block is not None:
+            structure_idx = self.structure.index(block.id) + 1
+
+        # Iterate over blocks following the given block
+        for next_block_id in self.structure[structure_idx:]:
+            if next_block_id.block_type not in ignored_block_types:
+                return self.get_block(next_block_id)
+
+        return None  # No valid next block found
 
     def get_prev_block(self, block: Block):
         block_idx = self.structure.index(block.id)
@@ -123,6 +131,11 @@ def identify_missing_blocks(
             if line_idx in assigned_line_idxs:
                 continue
 
+            # if the unassociated line is a new line with minimal area, we can skip it
+            if provider_outputs[line_idx].line.polygon.area <= 1 and \
+                provider_outputs[line_idx].raw_text == "\n":
+                continue
+
             if new_block is None:
                 new_block = [(line_idx, provider_outputs[line_idx])]
             elif all([
diff --git a/marker/settings.py b/marker/settings.py
index 40a739d1..0a510568 100644
--- a/marker/settings.py
+++ b/marker/settings.py
@@ -14,6 +14,9 @@ class Settings(BaseSettings):
     FONT_DIR: str = os.path.join(BASE_DIR, "static", "fonts")
     DEBUG_DATA_FOLDER: str = os.path.join(BASE_DIR, "debug_data")
 
+    # General
+    OUTPUT_ENCODING: str = "utf-8"
+
     # General models
     TORCH_DEVICE: Optional[str] = None # Note: MPS device does not work for text detection, and will default to CPU
 
diff --git a/marker_app.py b/marker_app.py
index 32579ee9..208ddbe8 100644
--- a/marker_app.py
+++ b/marker_app.py
@@ -25,8 +25,7 @@ def load_models():
     return create_model_dict()
 
 
-def convert_pdf(fname: str, **kwargs) -> (str, Dict[str, Any], dict):
-    config_parser = ConfigParser(kwargs)
+def convert_pdf(fname: str, config_parser: ConfigParser) -> (str, Dict[str, Any], dict):
     config_dict = config_parser.generate_config_dict()
     config_dict["pdftext_workers"] = 1
     converter = PdfConverter(
@@ -122,18 +121,24 @@ def page_count(pdf_file):
     st.stop()
 
 # Run Marker
-with tempfile.NamedTemporaryFile(suffix=".pdf") as temp_pdf:
+with tempfile.NamedTemporaryFile(suffix=".pdf", mode="wb+") as temp_pdf:
     temp_pdf.write(in_file.getvalue())
     temp_pdf.seek(0)
     filename = temp_pdf.name
+    cli_options = {
+        "output_format": output_format,
+        "page_range": page_range,
+        "force_ocr": force_ocr,
+        "debug": debug,
+        "output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
+    }
+    config_parser = ConfigParser(cli_options)
     rendered = convert_pdf(
         filename,
-        page_range=page_range,
-        force_ocr=force_ocr,
-        output_format=output_format,
-        output_dir=settings.DEBUG_DATA_FOLDER if debug else None,
-        debug=debug
+        config_parser
     )
+    page_range = config_parser.generate_config_dict()["page_range"]
+    first_page = page_range[0] if page_range else 0
 
 text, ext, images = text_from_rendered(rendered)
 with col2:
@@ -149,10 +154,10 @@ def page_count(pdf_file):
     with col1:
         debug_data_path = rendered.metadata.get("debug_data_path")
         if debug_data_path:
-            pdf_image_path = os.path.join(debug_data_path, f"pdf_page_0.png")
+            pdf_image_path = os.path.join(debug_data_path, f"pdf_page_{first_page}.png")
             img = Image.open(pdf_image_path)
             st.image(img, caption="PDF debug image", use_container_width=True)
-            layout_image_path = os.path.join(debug_data_path, f"layout_page_0.png")
+            layout_image_path = os.path.join(debug_data_path, f"layout_page_{first_page}.png")
             img = Image.open(layout_image_path)
             st.image(img, caption="Layout debug image", use_container_width=True)
 
diff --git a/marker_server.py b/marker_server.py
index aa5a3178..2f8bbe21 100644
--- a/marker_server.py
+++ b/marker_server.py
@@ -18,6 +18,7 @@
 from fastapi import FastAPI, Form, File, UploadFile
 from marker.converters.pdf import PdfConverter
 from marker.models import create_model_dict
+from marker.settings import settings
 
 app_data = {}
 
@@ -110,7 +111,7 @@ async def _convert_pdf(params: CommonParams):
     for k, v in images.items():
         byte_stream = io.BytesIO()
         v.save(byte_stream, format="PNG")
-        encoded[k] = base64.b64encode(byte_stream.getvalue()).decode("utf-8")
+        encoded[k] = base64.b64encode(byte_stream.getvalue()).decode(settings.OUTPUT_ENCODING)
 
     return {
         "format": params.output_format,
@@ -140,7 +141,7 @@ async def convert_pdf_upload(
     ),
 ):
     upload_path = os.path.join(UPLOAD_DIRECTORY, file.filename)
-    with open(upload_path, "wb") as upload_file:
+    with open(upload_path, "wb+") as upload_file:
         file_contents = await file.read()
         upload_file.write(file_contents)
 
diff --git a/pyproject.toml b/pyproject.toml
index 61bdc89f..830d0d7d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "marker-pdf"
-version = "1.0.0"
+version = "1.0.1"
 description = "Convert PDF to markdown with high speed and accuracy."
 authors = ["Vik Paruchuri <github@vikas.sh>"]
 readme = "README.md"
diff --git a/tests/builders/test_rotated_bboxes.py b/tests/builders/test_rotated_bboxes.py
index 90632551..d62d0438 100644
--- a/tests/builders/test_rotated_bboxes.py
+++ b/tests/builders/test_rotated_bboxes.py
@@ -11,7 +11,7 @@ def test_rotated_bboxes(pdf_document):
     # Ensure we match all text lines up properly
     text_lines = first_page.contained_blocks(pdf_document, (BlockTypes.Line,))
     text_blocks = first_page.contained_blocks(pdf_document, (BlockTypes.Text,))
-    assert len(text_lines) == 97
+    assert len(text_lines) == 95
 
     # Ensure the bbox sizes match up
     max_line_position = max([line.polygon.x_end for line in text_lines])
diff --git a/tests/providers/test_pdf_provider.py b/tests/providers/test_pdf_provider.py
index a399b06e..40b96b90 100644
--- a/tests/providers/test_pdf_provider.py
+++ b/tests/providers/test_pdf_provider.py
@@ -4,8 +4,8 @@
 @pytest.mark.config({"page_range": [0]})
 def test_pdf_provider(pdf_provider):
     assert len(pdf_provider) == 12
-    assert pdf_provider.get_image(0, 72).size == (612, 792)
-    assert pdf_provider.get_image(0, 96).size == (816, 1056)
+    assert pdf_provider.get_images([0], 72)[0].size == (612, 792)
+    assert pdf_provider.get_images([0], 96)[0].size == (816, 1056)
 
     page_lines = pdf_provider.get_page_lines(0)
     assert len(page_lines) == 93
diff --git a/tests/renderers/test_markdown_renderer.py b/tests/renderers/test_markdown_renderer.py
index 8f82de4c..0752b100 100644
--- a/tests/renderers/test_markdown_renderer.py
+++ b/tests/renderers/test_markdown_renderer.py
@@ -25,4 +25,13 @@ def test_markdown_renderer_pagination(pdf_document):
 def test_markdown_renderer_metadata(pdf_document):
     renderer = MarkdownRenderer({"paginate_output": True})
     metadata = renderer(pdf_document).metadata
-    assert "table_of_contents" in metadata
\ No newline at end of file
+    assert "table_of_contents" in metadata
+
+
+@pytest.mark.config({"page_range": [0, 1]})
+def test_markdown_renderer_images(pdf_document):
+    renderer = MarkdownRenderer({"extract_images": False})
+    markdown_output = renderer(pdf_document)
+    
+    assert len(markdown_output.images) == 0
+    assert '![](' not in markdown_output.markdown