diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
new file mode 100644
index 00000000..3aee82de
--- /dev/null
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,21 @@
+name: Benchmark
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install Hatch
+        uses: pypa/hatch@install
+      - name: Run benchmarks
+        run: |
+          hatch run bench:all
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index caa625e6..500b4f9a 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,4 +1,4 @@
-name: Run all tests
+name: Test
 on:
   push:
     branches: [ "main" ]
@@ -17,4 +17,4 @@ jobs:
       - name: Install Hatch
         uses: pypa/hatch@install
       - name: Run tests
-        run: hatch test
+        run: hatch test --cover
diff --git a/README.md b/README.md
index b36cd396..e34972c0 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# PLAYA Ain't a LAYout Analyzer 🏖️
+# **P**LAYA ain't a **LAY**out **A**nalyzer 🏖️
 
 ## About
 
@@ -28,7 +28,110 @@ Notably this does *not* include the largely undocumented heuristic
 to understand due to a Java-damaged API based on deeply nested class
 hierarchies, and because layout analysis is best done
 probabilistically/visually.  Also, pdfplumber does its own, much
-nicer, layout analysis.
+nicer, layout analysis.  Also, if you just want to extract text from a
+PDF, there are a lot of better and faster tools and libraries out
+there, see [benchmarks]() for a summary (TL;DR pypdfium2 is probably
+what you want, but pdfplumber does a nice job of converting PDF to
+ASCII art).
+
+## Usage
+
+Do you want to get stuff out of a PDF?  You have come to the right
+place!  Let's open up a PDF and see what's in it:
+
+```python
+pdf = playa.open("my_awesome_document.pdf")
+raw_byte_stream = pdf.buffer
+a_bunch_of_tokens = list(pdf.tokens)
+a_bunch_of_objects = list(pdf)
+a_particular_indirect_object = pdf[42]
+```
+
+The raw PDF tokens and objects are probably not terribly useful to
+you, but you might find them interesting.
+
+It probably has some pages.  How many?  What are their numbers/labels?
+(they could be things like "xviii", 'a", or "42", for instance)
+
+```python
+npages = len(pdf.pages)
+page_numbers = [page.label for page in pdf.pages]
+```
+
+What's in the table of contents?
+
+```python
+for entry in pdf.outlines:
+    ...
+```
+
+If you are lucky it has a "logical structure tree".  The elements here
+might even be referenced from the table of contents!  (or, they might
+not... with PDF you never know)
+
+```python
+structure = pdf.structtree
+for element in structure:
+   for child in element:
+       ...
+```
+
+Now perhaps we want to look at a specific page.  Okay!
+```python
+page = pdf.pages[0]        # they are numbered from 0
+page = pdf.pages["xviii"]  # but you can get them by label
+page = pdf.pages["42"]  # or "logical" page number (also a label)
+a_few_content_streams = list(page.contents)
+raw_bytes = b"".join(stream.buffer for stream in page.contents)
+```
+
+This page probably has text, graphics, etc, etc, in it.  Remember that
+**P**LAYA ain't a **LAY**out **A**nalyzer!  You can either look at the
+stream of tokens or mysterious PDF objects:
+```python
+for token in page.tokens:
+    ...
+for object in page:
+    ...
+```
+
+Or you can access individual characters, lines, curves, and rectangles
+(if you wanted to, for instance, do layout analysis):
+```python
+for item in page.layout:
+    ...
+```
+
+Do we make you spelunk in a dank class hierarchy to know what these
+items are?  No, we do not! They are just NamedTuples with a very
+helpful field *telling* you what they are, as a string.
+
+In particular you can also extract all these items into a dataframe
+using the library of your choosing (I like [Polars]()) and I dunno do
+some Artifishul Intelligents or something with them:
+```python
+```
+
+Or just write them to a CSV file:
+```python
+```
+
+Note again that PLAYA doesn't guarantee that these characters come at
+you in anything other than the order they occur in the file (but it
+does guarantee that).  It does, however, put them in (hopefully) the
+right absolute positions on the page, and keep track of the clipping
+path and the graphics state, so yeah, you *could* "render" them like
+`pdfminer.six` pretended to do.
+
+Certain PDF tools and/or authors are notorious for using "whiteout"
+(set the color to the background color) or "scissors" (the clipping
+path) to hide arbitrary text that maybe *you* don't want to see
+either. PLAYA gives you some rudimentary tools to detect this:
+```python
+```
+
+For everything else, there's pdfplumber, pdfium2, pikepdf, pypdf,
+borb, pydyf, etc, etc, etc.
 
 ## Acknowledgement
 
diff --git a/playa/__init__.py b/playa/__init__.py
index 20f87e01..1020e0d8 100644
--- a/playa/__init__.py
+++ b/playa/__init__.py
@@ -10,7 +10,7 @@
 from os import PathLike
 from typing import Union
 
-from playa.pdfdocument import PDFDocument
+from playa.document import PDFDocument
 
 __version__ = "0.0.1"
 
diff --git a/playa/cmapdb.py b/playa/cmapdb.py
index bde0dde3..bd23e428 100644
--- a/playa/cmapdb.py
+++ b/playa/cmapdb.py
@@ -32,8 +32,8 @@
 )
 
 from playa.encodingdb import name2unicode
-from playa.exceptions import PSEOF, PDFException, PDFTypeError, PSSyntaxError
-from playa.psparser import KWD, PSKeyword, PSLiteral, PSStackParser, literal_name
+from playa.exceptions import PDFException, PDFTypeError, PSSyntaxError
+from playa.parser import KWD, Parser, PSKeyword, PSLiteral, literal_name
 from playa.utils import choplist, nunpack
 
 log = logging.getLogger(__name__)
@@ -275,7 +275,7 @@ def get_unicode_map(cls, name: str, vertical: bool = False) -> UnicodeMap:
         return cls._umap_cache[name][vertical]
 
 
-class CMapParser(PSStackParser[PSKeyword]):
+class CMapParser(Parser[PSKeyword]):
     def __init__(self, cmap: CMapBase, data: bytes) -> None:
         super().__init__(data)
         self.cmap = cmap
@@ -284,10 +284,7 @@ def __init__(self, cmap: CMapBase, data: bytes) -> None:
         self._warnings: Set[str] = set()
 
     def run(self) -> None:
-        try:
-            self.nextobject()
-        except PSEOF:
-            pass
+        next(self, None)
 
     KEYWORD_BEGINCMAP = KWD(b"begincmap")
     KEYWORD_ENDCMAP = KWD(b"endcmap")
diff --git a/playa/pdfcolor.py b/playa/color.py
similarity index 96%
rename from playa/pdfcolor.py
rename to playa/color.py
index b4c2021f..1bc1bcb4 100644
--- a/playa/pdfcolor.py
+++ b/playa/color.py
@@ -1,7 +1,7 @@
 import collections
 from typing import Dict
 
-from playa.psparser import LIT
+from playa.parser import LIT
 
 LITERAL_DEVICE_GRAY = LIT("DeviceGray")
 LITERAL_DEVICE_RGB = LIT("DeviceRGB")
diff --git a/playa/pdfdocument.py b/playa/document.py
similarity index 91%
rename from playa/pdfdocument.py
rename to playa/document.py
index ed609a5c..27a5fab3 100644
--- a/playa/pdfdocument.py
+++ b/playa/document.py
@@ -1,6 +1,7 @@
 import io
 import itertools
 import logging
+import mmap
 import re
 import struct
 from collections import deque
@@ -28,10 +29,8 @@
 
 from playa import settings
 from playa.arcfour import Arcfour
-from playa.cmapdb import CMap, CMapBase, CMapDB
 from playa.data_structures import NameTree, NumberTree
 from playa.exceptions import (
-    PSEOF,
     PDFEncryptionError,
     PDFException,
     PDFFontError,
@@ -45,19 +44,23 @@
     PDFTypeError,
     PSException,
 )
-from playa.pdffont import (
-    PDFCIDFont,
-    PDFFont,
-    PDFTrueTypeFont,
-    PDFType1Font,
-    PDFType3Font,
+from playa.font import PDFCIDFont, PDFFont, PDFTrueTypeFont, PDFType1Font, PDFType3Font
+from playa.page import Page
+from playa.parser import (
+    KEYWORD_OBJ,
+    KEYWORD_TRAILER,
+    KEYWORD_XREF,
+    LIT,
+    ContentStreamParser,
+    PDFParser,
+    PSBaseParserToken,
+    PSLiteral,
+    literal_name,
 )
-from playa.pdfpage import PDFPage
-from playa.pdfparser import KEYWORD_XREF, PDFParser, PDFStreamParser
 from playa.pdftypes import (
+    ContentStream,
     DecipherCallable,
-    PDFObjRef,
-    PDFStream,
+    ObjRef,
     decipher_all,
     dict_value,
     int_value,
@@ -67,7 +70,6 @@
     stream_value,
     uint_value,
 )
-from playa.psparser import KWD, LIT, PSLiteral, literal_name
 from playa.utils import (
     choplist,
     decode_text,
@@ -89,7 +91,6 @@
 LITERAL_CATALOG = LIT("Catalog")
 LITERAL_PAGE = LIT("Page")
 LITERAL_PAGES = LIT("Pages")
-KEYWORD_OBJ = KWD(b"obj")
 INHERITABLE_PAGE_ATTRS = {"Resources", "MediaBox", "CropBox", "Rotate"}
 
 
@@ -116,14 +117,11 @@ def __init__(self, parser: PDFParser) -> None:
         self._load(parser)
 
     def _load(self, parser: PDFParser) -> None:
-        while True:
-            try:
-                (pos, line) = parser.nextline()
-                line = line.strip()
-                if not line:
-                    continue
-            except PSEOF:
-                raise PDFNoValidXRef("Unexpected EOF - file corrupted?")
+        lines = parser.iter_lines()
+        for pos, line in lines:
+            line = line.strip()
+            if not line:
+                continue
             if line.startswith(b"trailer"):
                 parser.seek(pos)
                 break
@@ -137,11 +135,8 @@ def _load(self, parser: PDFParser) -> None:
                 error_msg = f"Invalid line: {parser!r}: line={line!r}"
                 raise PDFNoValidXRef(error_msg)
             for objid in range(start, start + nobjs):
-                try:
-                    (_, line) = parser.nextline()
-                    line = line.strip()
-                except PSEOF:
-                    raise PDFNoValidXRef("Unexpected EOF - file corrupted?")
+                _, line = next(lines)
+                line = line.strip()
                 f = line.split(b" ")
                 if len(f) != 3:
                     error_msg = f"Invalid XRef format: {parser!r}, line={line!r}"
@@ -156,9 +151,16 @@ def _load(self, parser: PDFParser) -> None:
     def _load_trailer(self, parser: PDFParser) -> None:
         try:
             (_, kwd) = parser.nexttoken()
-            assert kwd is KWD(b"trailer"), str(kwd)
-            (_, dic) = parser.nextobject()
-        except PSEOF:
+            if kwd is not KEYWORD_TRAILER:
+                raise PDFSyntaxError(
+                    "Expected %r, got %r"
+                    % (
+                        KEYWORD_TRAILER,
+                        kwd,
+                    )
+                )
+            (_, dic) = next(parser)
+        except StopIteration:
             x = parser.pop(1)
             if not x:
                 raise PDFNoValidXRef("Unexpected EOF - file corrupted")
@@ -190,11 +192,7 @@ def __repr__(self) -> str:
 
     def _load(self, parser: PDFParser) -> None:
         parser.seek(0)
-        while 1:
-            try:
-                (pos, line_bytes) = parser.nextline()
-            except PSEOF:
-                break
+        for pos, line_bytes in parser.iter_lines():
             if line_bytes.startswith(b"trailer"):
                 parser.seek(pos)
                 self._load_trailer(parser)
@@ -210,8 +208,8 @@ def _load(self, parser: PDFParser) -> None:
             self.offsets[objid] = (None, pos, genno)
             # expand ObjStm.
             parser.seek(pos)
-            (_, obj) = parser.nextobject()
-            if isinstance(obj, PDFStream) and obj.get("Type") is LITERAL_OBJSTM:
+            (_, obj) = next(parser)
+            if isinstance(obj, ContentStream) and obj.get("Type") is LITERAL_OBJSTM:
                 stream = stream_value(obj)
                 try:
                     n = stream["N"]
@@ -222,14 +220,9 @@ def _load(self, parser: PDFParser) -> None:
                 doc = parser.doc()
                 if doc is None:
                     raise RuntimeError("Document no longer exists!")
-                parser1 = PDFStreamParser(stream.get_data(), doc)
-                objs: List[int] = []
-                try:
-                    while 1:
-                        (_, obj) = parser1.nextobject()
-                        objs.append(cast(int, obj))
-                except PSEOF:
-                    pass
+                parser1 = ContentStreamParser(stream.get_data(), doc)
+                objs: List = [obj for _, obj in parser1]
+                # FIXME: This is choplist
                 n = min(n, len(objs) // 2)
                 for index in range(n):
                     objid1 = objs[index * 2]
@@ -255,9 +248,12 @@ def _load(self, parser: PDFParser) -> None:
         (_, objid) = parser.nexttoken()  # ignored
         (_, genno) = parser.nexttoken()  # ignored
         (_, kwd) = parser.nexttoken()
-        (_, stream) = parser.nextobject()
-        if not isinstance(stream, PDFStream) or stream.get("Type") is not LITERAL_XREF:
-            raise PDFNoValidXRef("Invalid PDF stream spec.")
+        (_, stream) = next(parser)
+        if (
+            not isinstance(stream, ContentStream)
+            or stream.get("Type") is not LITERAL_XREF
+        ):
+            raise PDFNoValidXRef(f"Invalid PDF stream spec {stream!r}")
         size = stream["Size"]
         index_array = stream.get("Index", (0, size))
         if len(index_array) % 2 != 0:
@@ -704,79 +700,7 @@ class OutlineItem(NamedTuple):
     # FIXME: Create Destination and Action types
     dest: Union[PSLiteral, bytes, list, None]
     action: Union[dict, None]
-    se: Union[PDFObjRef, None]
-
-
-class PDFResourceManager:
-    """Repository of shared resources.
-
-    ResourceManager facilitates reuse of shared resources
-    such as fonts and images so that large objects are not
-    allocated multiple times.
-    """
-
-    def __init__(self, caching: bool = True) -> None:
-        self.caching = caching
-        self._cached_fonts: Dict[object, PDFFont] = {}
-
-    def get_procset(self, procs: Sequence[object]) -> None:
-        for proc in procs:
-            if proc is LITERAL_PDF or proc is LITERAL_TEXT:
-                pass
-            else:
-                pass
-
-    def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase:
-        try:
-            return CMapDB.get_cmap(cmapname)
-        except CMapDB.CMapNotFound:
-            if strict:
-                raise
-            return CMap()
-
-    def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont:
-        if objid and objid in self._cached_fonts:
-            font = self._cached_fonts[objid]
-        else:
-            log.debug("get_font: create: objid=%r, spec=%r", objid, spec)
-            if settings.STRICT:
-                if spec["Type"] is not LITERAL_FONT:
-                    raise PDFFontError("Type is not /Font")
-            # Create a Font object.
-            if "Subtype" in spec:
-                subtype = literal_name(spec["Subtype"])
-            else:
-                if settings.STRICT:
-                    raise PDFFontError("Font Subtype is not specified.")
-                subtype = "Type1"
-            if subtype in ("Type1", "MMType1"):
-                # Type1 Font
-                font = PDFType1Font(spec)
-            elif subtype == "TrueType":
-                # TrueType Font
-                font = PDFTrueTypeFont(spec)
-            elif subtype == "Type3":
-                # Type3 Font
-                font = PDFType3Font(spec)
-            elif subtype in ("CIDFontType0", "CIDFontType2"):
-                # CID Font
-                font = PDFCIDFont(spec)
-            elif subtype == "Type0":
-                # Type0 Font
-                dfonts = list_value(spec["DescendantFonts"])
-                assert dfonts
-                subspec = dict_value(dfonts[0]).copy()
-                for k in ("Encoding", "ToUnicode"):
-                    if k in spec:
-                        subspec[k] = resolve1(spec[k])
-                font = self.get_font(None, subspec)
-            else:
-                if settings.STRICT:
-                    raise PDFFontError("Invalid Font spec: %r" % spec)
-                font = PDFType1Font(spec)  # FIXME: this is so wrong!
-            if objid and self.caching:
-                self._cached_fonts[objid] = font
-        return font
+    se: Union[ObjRef, None]
 
 
 class PDFDocument:
@@ -800,7 +724,7 @@ class PDFDocument:
     """
 
     _fp: Union[BinaryIO, None] = None
-    _pages: Union[List[PDFPage], None] = None
+    _pages: Union[List[Page], None] = None
 
     def __enter__(self) -> "PDFDocument":
         return self
@@ -823,10 +747,26 @@ def __init__(
         self.decipher: Optional[DecipherCallable] = None
         self._cached_objs: Dict[int, Tuple[object, int]] = {}
         self._parsed_objs: Dict[int, Tuple[List[object], int]] = {}
+        self._cached_fonts: Dict[object, PDFFont] = {}
         if isinstance(fp, io.TextIOBase):
             raise PSException("fp is not a binary file")
-        self.pdf_version = read_header(fp)
-        self.parser = PDFParser(fp, self)
+        # The header is frequently mangled, in which case we will try to read the
+        # file anyway.
+        try:
+            self.pdf_version = read_header(fp)
+        except PDFSyntaxError:
+            log.warning("PDF header not found, will try to read the file anyway")
+            self.pdf_version = "UNKNOWN"
+        try:
+            self.buffer: Union[bytes, mmap.mmap] = mmap.mmap(
+                fp.fileno(), 0, access=mmap.ACCESS_READ
+            )
+        except io.UnsupportedOperation:
+            log.warning("mmap not supported on %r, reading document into memory", fp)
+            self.buffer = fp.read()
+        except ValueError as e:
+            raise PSException from e
+        self.parser = PDFParser(self.buffer, self)
         self.is_printable = self.is_modifiable = self.is_extractable = True
         # Getting the XRef table and trailer is done non-lazily
         # because they contain encryption information among other
@@ -868,8 +808,6 @@ def __init__(
         if self.catalog.get("Type") is not LITERAL_CATALOG:
             if settings.STRICT:
                 raise PDFSyntaxError("Catalog not found!")
-        # NOTE: This does nearly nothing at all
-        self.rsrcmgr = PDFResourceManager(True)
 
     def _initialize_password(self, password: str = "") -> None:
         """Initialize the decryption handler with a given password, if any.
@@ -899,7 +837,20 @@ def _initialize_password(self, password: str = "") -> None:
         assert self.parser is not None
         self.parser.fallback = False  # need to read streams with exact length
 
-    def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> object:
+    def __iter__(self) -> Iterator[Tuple[int, object]]:
+        """Iterate over (position, object) tuples, raising StopIteration at EOF."""
+        # FIXME: Should create a new parser
+        self.parser.seek(0)
+        return self.parser
+
+    @property
+    def tokens(self) -> Iterator[Tuple[int, PSBaseParserToken]]:
+        """Iterate over (position, token) tuples, raising StopIteration at EOF."""
+        # FIXME: Should create a new parser
+        self.parser.seek(0)
+        return self.parser.tokens
+
+    def _getobj_objstm(self, stream: ContentStream, index: int, objid: int) -> object:
         if stream.objid in self._parsed_objs:
             (objs, n) = self._parsed_objs[stream.objid]
         else:
@@ -913,7 +864,7 @@ def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> object:
             raise PDFSyntaxError("index too big: %r" % index)
         return obj
 
-    def _get_objects(self, stream: PDFStream) -> Tuple[List[object], int]:
+    def _get_objects(self, stream: ContentStream) -> Tuple[List[object], int]:
         if stream.get("Type") is not LITERAL_OBJSTM:
             if settings.STRICT:
                 raise PDFSyntaxError("Not a stream object: %r" % stream)
@@ -923,14 +874,8 @@ def _get_objects(self, stream: PDFStream) -> Tuple[List[object], int]:
             if settings.STRICT:
                 raise PDFSyntaxError("N is not defined: %r" % stream)
             n = 0
-        parser = PDFStreamParser(stream.get_data(), self)
-        objs: List[object] = []
-        try:
-            while 1:
-                (_, obj) = parser.nextobject()
-                objs.append(obj)
-        except PSEOF:
-            pass
+        parser = ContentStreamParser(stream.get_data(), self)
+        objs: List[object] = [obj for _, obj in parser]
         return (objs, n)
 
     def _getobj_parse(self, pos: int, objid: int) -> object:
@@ -951,7 +896,7 @@ def _getobj_parse(self, pos: int, objid: int) -> object:
             while True:
                 try:
                     (_, token) = self.parser.nexttoken()
-                except PSEOF:
+                except StopIteration:
                     raise PDFSyntaxError(
                         f"object {objid!r} not found at or after position {pos}"
                     )
@@ -966,7 +911,7 @@ def _getobj_parse(self, pos: int, objid: int) -> object:
             (_, kwd) = self.parser.nexttoken()
             if kwd != KEYWORD_OBJ:
                 raise PDFSyntaxError("Invalid object spec: offset=%r" % pos)
-        (_, obj) = self.parser.nextobject()
+        (_, obj) = next(self.parser)
         return obj
 
     def __getitem__(self, objid: int) -> object:
@@ -996,10 +941,10 @@ def __getitem__(self, objid: int) -> object:
                         if self.decipher:
                             obj = decipher_all(self.decipher, objid, genno, obj)
 
-                    if isinstance(obj, PDFStream):
+                    if isinstance(obj, ContentStream):
                         obj.set_objid(objid, genno)
                     break
-                except (PSEOF, PDFSyntaxError):
+                except (StopIteration, PDFSyntaxError):
                     continue
             if obj is None:
                 raise IndexError(f"Object with ID {objid} not found")
@@ -1007,6 +952,50 @@ def __getitem__(self, objid: int) -> object:
             self._cached_objs[objid] = (obj, genno)
         return obj
 
+    def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont:
+        if objid and objid in self._cached_fonts:
+            font = self._cached_fonts[objid]
+        else:
+            log.debug("get_font: create: objid=%r, spec=%r", objid, spec)
+            if settings.STRICT:
+                if spec["Type"] is not LITERAL_FONT:
+                    raise PDFFontError("Type is not /Font")
+            # Create a Font object.
+            if "Subtype" in spec:
+                subtype = literal_name(spec["Subtype"])
+            else:
+                if settings.STRICT:
+                    raise PDFFontError("Font Subtype is not specified.")
+                subtype = "Type1"
+            if subtype in ("Type1", "MMType1"):
+                # Type1 Font
+                font = PDFType1Font(spec)
+            elif subtype == "TrueType":
+                # TrueType Font
+                font = PDFTrueTypeFont(spec)
+            elif subtype == "Type3":
+                # Type3 Font
+                font = PDFType3Font(spec)
+            elif subtype in ("CIDFontType0", "CIDFontType2"):
+                # CID Font
+                font = PDFCIDFont(spec)
+            elif subtype == "Type0":
+                # Type0 Font
+                dfonts = list_value(spec["DescendantFonts"])
+                assert dfonts
+                subspec = dict_value(dfonts[0]).copy()
+                for k in ("Encoding", "ToUnicode"):
+                    if k in spec:
+                        subspec[k] = resolve1(spec[k])
+                font = self.get_font(None, subspec)
+            else:
+                if settings.STRICT:
+                    raise PDFFontError("Invalid Font spec: %r" % spec)
+                font = PDFType1Font(spec)  # FIXME: this is so wrong!
+            if objid:
+                self._cached_fonts[objid] = font
+        return font
+
     @property
     def outlines(self) -> Iterator[OutlineItem]:
         if "Outlines" not in self.catalog:
@@ -1080,11 +1069,11 @@ def get_page_objects(self) -> Iterator[Tuple[int, PageType]]:
         visited = set()
         while stack:
             (obj, parent) = stack.pop()
-            if isinstance(obj, PDFObjRef):
+            if isinstance(obj, ObjRef):
                 # The PDF specification *requires* both the Pages
                 # element of the catalog and the entries in Kids in
                 # the page tree to be indirect references.
-                object_id = obj.objid
+                object_id = int(obj.objid)
             elif isinstance(obj, int):
                 # Should not happen in a valid PDF, but probably does?
                 log.warning("Page tree contains bare integer: %r in %r", obj, parent)
@@ -1118,23 +1107,28 @@ def get_page_objects(self) -> Iterator[Tuple[int, PageType]]:
                 log.debug("Page: %r", object_properties)
                 yield object_id, object_properties
 
+    # FIXME: Make an object that can be indexed by int or str
     @property
-    def pages(self) -> List[PDFPage]:
+    def pages(self) -> List[Page]:
         if self._pages is None:
             try:
                 page_labels: Iterator[Optional[str]] = self.page_labels
             except PDFNoPageLabels:
                 page_labels = itertools.repeat(None)
             try:
-                self._pages = [PDFPage(self, objid, properties, label, page_number + 1)
-                               for page_number, ((objid, properties), label) in enumerate(
-                                       zip(self.get_page_objects(), page_labels)
-                               )]
+                self._pages = [
+                    Page(self, objid, properties, label, page_idx)
+                    for page_idx, ((objid, properties), label) in enumerate(
+                        zip(self.get_page_objects(), page_labels)
+                    )
+                ]
             except PDFNoPageTree:
-                self._pages = [PDFPage(self, objid, properties, label, page_number + 1)
-                               for page_number, ((objid, properties), label) in enumerate(
-                                       zip(self.get_pages_from_xrefs(), page_labels)
-                               )]
+                self._pages = [
+                    Page(self, objid, properties, label, page_idx)
+                    for page_idx, ((objid, properties), label) in enumerate(
+                        zip(self.get_pages_from_xrefs(), page_labels)
+                    )
+                ]
         return self._pages
 
     @property
@@ -1184,7 +1178,8 @@ def find_xref(self) -> int:
         prev = b""
         # FIXME: This will scan *the whole file* looking for an xref
         # table, it should maybe give up sooner?
-        for line in self.parser.revreadlines():
+        self.parser.seek(self.parser.end)
+        for line in self.parser.reverse_iter_lines():
             line = line.strip()
             log.debug("find_xref: %r", line)
             if line == b"startxref":
@@ -1210,7 +1205,7 @@ def read_xref_from(
         self.parser.reset()
         try:
             (pos, token) = self.parser.nexttoken()
-        except PSEOF:
+        except StopIteration:
             raise PDFNoValidXRef("Unexpected EOF at {start}")
         log.debug("read_xref_from: start=%d, token=%r", start, token)
         if isinstance(token, int):
@@ -1220,7 +1215,7 @@ def read_xref_from(
             xref: PDFXRef = PDFXRefStream(self.parser)
         else:
             if token is KEYWORD_XREF:
-                self.parser.nextline()
+                next(self.parser.iter_lines())
             xref = PDFXRefTable(self.parser)
         xrefs.append(xref)
         trailer = xref.trailer
diff --git a/playa/encodingdb.py b/playa/encodingdb.py
index c44a2742..259f1f14 100644
--- a/playa/encodingdb.py
+++ b/playa/encodingdb.py
@@ -5,7 +5,7 @@
 from playa.exceptions import PDFKeyError
 from playa.glyphlist import glyphname2unicode
 from playa.latin_enc import ENCODING
-from playa.psparser import PSLiteral
+from playa.parser import PSLiteral
 
 HEXADECIMAL = re.compile(r"[0-9a-fA-F]+")
 
diff --git a/playa/exceptions.py b/playa/exceptions.py
index a886bbf6..7bbf8e78 100644
--- a/playa/exceptions.py
+++ b/playa/exceptions.py
@@ -7,10 +7,6 @@ class PSException(Exception):
     pass
 
 
-class PSEOF(PSException):
-    pass
-
-
 class PSSyntaxError(PSException):
     pass
 
diff --git a/playa/pdffont.py b/playa/font.py
similarity index 98%
rename from playa/pdffont.py
rename to playa/font.py
index d326e21e..c35c5277 100644
--- a/playa/pdffont.py
+++ b/playa/font.py
@@ -27,7 +27,6 @@
 )
 from playa.encodingdb import EncodingDB, name2unicode
 from playa.exceptions import (
-    PSEOF,
     PDFException,
     PDFFontError,
     PDFKeyError,
@@ -35,8 +34,16 @@
     PDFValueError,
 )
 from playa.fontmetrics import FONT_METRICS
+from playa.parser import (
+    KWD,
+    LIT,
+    Parser,
+    PSKeyword,
+    PSLiteral,
+    literal_name,
+)
 from playa.pdftypes import (
-    PDFStream,
+    ContentStream,
     dict_value,
     int_value,
     list_value,
@@ -45,14 +52,6 @@
     resolve_all,
     stream_value,
 )
-from playa.psparser import (
-    KWD,
-    LIT,
-    PSKeyword,
-    PSLiteral,
-    PSStackParser,
-    literal_name,
-)
 from playa.utils import Matrix, Point, Rect, apply_matrix_norm, choplist, nunpack
 
 log = logging.getLogger(__name__)
@@ -106,8 +105,8 @@ def get_metrics(cls, fontname: str) -> Tuple[Dict[str, object], Dict[str, int]]:
         return FONT_METRICS[fontname]
 
 
-# int here means that we're not extending PSStackParser with additional types.
-class Type1FontHeaderParser(PSStackParser[int]):
+# int here means that we're not extending Parser with additional types.
+class Type1FontHeaderParser(Parser[int]):
     KEYWORD_BEGIN = KWD(b"begin")
     KEYWORD_END = KWD(b"end")
     KEYWORD_DEF = KWD(b"def")
@@ -137,8 +136,8 @@ def get_encoding(self) -> Dict[int, str]:
         """
         while 1:
             try:
-                (cid, name) = self.nextobject()
-            except PSEOF:
+                (cid, name) = next(self)
+            except StopIteration:
                 break
             try:
                 self._cid2unicode[cid] = name2unicode(cast(str, name))
@@ -1070,7 +1069,7 @@ def __init__(
             ttf = TrueTypeFont(self.basefont, BytesIO(self.fontfile.get_data()))
         self.unicode_map: Optional[UnicodeMap] = None
         if "ToUnicode" in spec:
-            if isinstance(spec["ToUnicode"], PDFStream):
+            if isinstance(spec["ToUnicode"], ContentStream):
                 strm = stream_value(spec["ToUnicode"])
                 self.unicode_map = FileUnicodeMap()
                 CMapParser(self.unicode_map, strm.get_data()).run()
@@ -1148,8 +1147,8 @@ def _get_cmap_name(spec: Mapping[str, Any], strict: bool) -> str:
             if strict:
                 raise PDFFontError("Encoding is unspecified")
 
-        if type(cmap_name) is PDFStream:  # type: ignore[comparison-overlap]
-            cmap_name_stream: PDFStream = cast(PDFStream, cmap_name)
+        if type(cmap_name) is ContentStream:  # type: ignore[comparison-overlap]
+            cmap_name_stream: ContentStream = cast(ContentStream, cmap_name)
             if "CMapName" in cmap_name_stream:
                 cmap_name = cmap_name_stream.get("CMapName").name
             elif strict:
diff --git a/playa/image.py b/playa/image.py
index 714bd2af..5c35cfa4 100644
--- a/playa/image.py
+++ b/playa/image.py
@@ -4,16 +4,16 @@
 from io import BytesIO
 from typing import BinaryIO, Literal, Tuple
 
-from playa.exceptions import PDFValueError
-from playa.jbig2 import JBIG2StreamReader, JBIG2StreamWriter
-from playa.layout import LTImage
-from playa.pdfcolor import (
+from playa.color import (
     LITERAL_DEVICE_CMYK,
     LITERAL_DEVICE_GRAY,
     LITERAL_DEVICE_RGB,
     LITERAL_INLINE_DEVICE_GRAY,
     LITERAL_INLINE_DEVICE_RGB,
 )
+from playa.exceptions import PDFValueError
+from playa.jbig2 import JBIG2StreamReader, JBIG2StreamWriter
+from playa.layout import LTImage
 from playa.pdftypes import (
     LITERALS_DCT_DECODE,
     LITERALS_FLATE_DECODE,
diff --git a/playa/layout.py b/playa/layout.py
index 0085b12f..ee311284 100644
--- a/playa/layout.py
+++ b/playa/layout.py
@@ -1,22 +1,18 @@
 import logging
 from typing import (
-    Generic,
     Iterable,
     Iterator,
     List,
     Optional,
     Tuple,
-    TypeVar,
     Union,
-    cast,
 )
 
+from playa.color import PDFColorSpace
 from playa.exceptions import PDFValueError
-from playa.pdfcolor import PDFColorSpace
-from playa.pdffont import PDFFont
-from playa.pdftypes import PDFStream
+from playa.font import PDFFont
+from playa.pdftypes import ContentStream
 from playa.utils import (
-    INF,
     Matrix,
     PathSegment,
     Point,
@@ -85,31 +81,10 @@ def __repr__(self) -> str:
         )
 
 
-class LTItem:
-    """Interface for things that can be analyzed"""
-
-    # Any item could be in a marked content section
-    mcid: Optional[int] = None
-    # Which could have a tag
-    tag: Optional[str] = None
-
-
-class LTText:
-    """Interface for things that have text"""
-
-    def __repr__(self) -> str:
-        return f"<{self.__class__.__name__} {self.get_text()!r}>"
-
-    def get_text(self) -> str:
-        """Text contained in this object"""
-        raise NotImplementedError
-
-
-class LTComponent(LTItem):
+class LTComponent:
     """Object with a bounding box"""
 
     def __init__(self, bbox: Rect) -> None:
-        LTItem.__init__(self)
         self.set_bbox(bbox)
 
     def __repr__(self) -> str:
@@ -298,7 +273,7 @@ class LTImage(LTComponent):
     Embedded images can be in JPEG, Bitmap or JBIG2.
     """
 
-    def __init__(self, name: str, stream: PDFStream, bbox: Rect) -> None:
+    def __init__(self, name: str, stream: ContentStream, bbox: Rect) -> None:
         LTComponent.__init__(self, bbox)
         self.name = name
         self.stream = stream
@@ -313,22 +288,7 @@ def __repr__(self) -> str:
         return f"<{self.__class__.__name__}({self.name}) {bbox2str(self.bbox)} {self.srcsize!r}>"
 
 
-class LTAnno(LTItem, LTText):
-    """Actual letter in the text as a Unicode string.
-
-    Note that, while a LTChar object has actual boundaries, LTAnno objects does
-    not, as these are "virtual" characters, inserted by a layout analyzer
-    according to the relationship between two characters (e.g. a space).
-    """
-
-    def __init__(self, text: str) -> None:
-        self._text = text
-
-    def get_text(self) -> str:
-        return self._text
-
-
-class LTChar(LTComponent, LTText):
+class LTChar(LTComponent):
     """Actual letter in the text as a Unicode string."""
 
     def __init__(
@@ -348,7 +308,6 @@ def __init__(
         stroking_color: Optional[Color] = None,
         non_stroking_color: Optional[Color] = None,
     ) -> None:
-        LTText.__init__(self)
         self._text = text
         self.matrix = matrix
         self.fontname = font.fontname
@@ -396,54 +355,7 @@ def get_text(self) -> str:
         return self._text
 
 
-LTItemT = TypeVar("LTItemT", bound=LTItem)
-
-
-class LTContainer(LTComponent, Generic[LTItemT]):
-    """Object that can be extended and analyzed"""
-
-    def __init__(self, bbox: Rect) -> None:
-        LTComponent.__init__(self, bbox)
-        self._objs: List[LTItemT] = []
-
-    def __iter__(self) -> Iterator[LTItemT]:
-        return iter(self._objs)
-
-    def __len__(self) -> int:
-        return len(self._objs)
-
-    def add(self, obj: LTItemT) -> None:
-        self._objs.append(obj)
-
-    def extend(self, objs: Iterable[LTItemT]) -> None:
-        for obj in objs:
-            self.add(obj)
-
-
-class LTExpandableContainer(LTContainer[LTItemT]):
-    def __init__(self) -> None:
-        LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
-
-    # Incompatible override: we take an LTComponent (with bounding box), but
-    # super() LTContainer only considers LTItem (no bounding box).
-    def add(self, obj: LTComponent) -> None:  # type: ignore[override]
-        LTContainer.add(self, cast(LTItemT, obj))
-        self.set_bbox(
-            (
-                min(self.x0, obj.x0),
-                min(self.y0, obj.y0),
-                max(self.x1, obj.x1),
-                max(self.y1, obj.y1),
-            ),
-        )
-
-
-class LTLayoutContainer(LTContainer[LTComponent]):
-    def __init__(self, bbox: Rect) -> None:
-        LTContainer.__init__(self, bbox)
-
-
-class LTFigure(LTLayoutContainer):
+class LTFigure(LTComponent):
     """Represents an area used by PDF Form objects.
 
     PDF Forms can be used to present figures or pictures by embedding yet
@@ -457,23 +369,21 @@ def __init__(self, name: str, bbox: Rect, matrix: Matrix) -> None:
         (x, y, w, h) = bbox
         bounds = ((x, y), (x + w, y), (x, y + h), (x + w, y + h))
         bbox = get_bound(apply_matrix_pt(matrix, (p, q)) for (p, q) in bounds)
-        LTLayoutContainer.__init__(self, bbox)
-
-    def __repr__(self) -> str:
-        return f"<{self.__class__.__name__}({self.name}) {bbox2str(self.bbox)} matrix={matrix2str(self.matrix)}>"
+        LTComponent.__init__(self, bbox)
+        self._objs: List[LTComponent] = []
 
+    def __iter__(self) -> Iterator[LTComponent]:
+        return iter(self._objs)
 
-class LTPage(LTLayoutContainer):
-    """Represents an entire page.
+    def __len__(self) -> int:
+        return len(self._objs)
 
-    Like any other LTLayoutContainer, an LTPage can be iterated to obtain child
-    objects like LTTextBox, LTFigure, LTImage, LTRect, LTCurve and LTLine.
-    """
+    def add(self, obj: LTComponent) -> None:
+        self._objs.append(obj)
 
-    def __init__(self, pageid: int, bbox: Rect, rotate: float = 0) -> None:
-        LTLayoutContainer.__init__(self, bbox)
-        self.pageid = pageid
-        self.rotate = rotate
+    def extend(self, objs: Iterable[LTComponent]) -> None:
+        for obj in objs:
+            self.add(obj)
 
     def __repr__(self) -> str:
-        return f"<{self.__class__.__name__}({self.pageid!r}) {bbox2str(self.bbox)} rotate={self.rotate!r}>"
+        return f"<{self.__class__.__name__}({self.name}) {bbox2str(self.bbox)} matrix={matrix2str(self.matrix)}>"
diff --git a/playa/pdfpage.py b/playa/page.py
similarity index 82%
rename from playa/pdfpage.py
rename to playa/page.py
index fff7a579..73cddd0d 100644
--- a/playa/pdfpage.py
+++ b/playa/page.py
@@ -5,6 +5,7 @@
     TYPE_CHECKING,
     Dict,
     Iterable,
+    Iterator,
     List,
     Optional,
     Sequence,
@@ -15,14 +16,15 @@
 
 from playa import settings
 from playa.casting import safe_float
+from playa.color import PREDEFINED_COLORSPACE, PDFColorSpace
 from playa.exceptions import (
-    PSEOF,
     PDFInterpreterError,
     PDFSyntaxError,
     PDFUnicodeNotDefined,
     PDFValueError,
     PSTypeError,
 )
+from playa.font import PDFFont
 from playa.layout import (
     Color,
     LTChar,
@@ -30,36 +32,26 @@
     LTCurve,
     LTFigure,
     LTImage,
-    LTLayoutContainer,
     LTLine,
-    LTPage,
     LTRect,
     PDFGraphicState,
 )
-from playa.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace
-from playa.pdffont import (
-    PDFFont,
-)
+from playa.parser import Parser, PDFStackT, PSBaseParserToken
 from playa.pdftypes import (
-    LITERALS_ASCII85_DECODE,
-    PDFObjRef,
-    PDFStream,
-    dict_value,
-    int_value,
-    list_value,
-    resolve1,
-    stream_value,
-)
-from playa.psparser import (
     KWD,
     LIT,
-    PSBaseParserToken,
+    LITERALS_ASCII85_DECODE,
+    ContentStream,
+    ObjRef,
     PSKeyword,
     PSLiteral,
-    PSStackParser,
-    PSStackType,
+    dict_value,
+    int_value,
     keyword_name,
+    list_value,
     literal_name,
+    resolve1,
+    stream_value,
 )
 from playa.utils import (
     MATRIX_IDENTITY,
@@ -77,7 +69,7 @@
 )
 
 if TYPE_CHECKING:
-    from playa.pdfdocument import PDFDocument, PDFResourceManager
+    from playa.document import PDFDocument
 
 log = logging.getLogger(__name__)
 
@@ -89,47 +81,48 @@
 PDFTextSeq = Iterable[Union[int, float, bytes]]
 
 
-class PDFPage:
+class Page:
     """An object that holds the information about a page.
 
-    A PDFPage object is merely a convenience class that has a set
+    A Page object is merely a convenience class that has a set
     of keys and values, which describe the properties of a page
     and point to its contents.
 
     Attributes
     ----------
-      pageid: any Python object that can uniquely identify the page.
+      pageid: the integer object ID associated with the page in the page tree
       attrs: a dictionary of page attributes.
-      contents: a list of PDFStream objects that represents the page content.
+      contents: a list of ContentStream objects that represents the page content.
       resources: a dictionary of resources used by the page.
       mediabox: the physical size of the page.
       cropbox: the crop rectangle of the page.
       rotate: the page rotation (in degree).
       label: the page's label (typically, the logical page number).
+      page_number: the "physical" page number, indexed from 1.
 
     """
 
     def __init__(
         self,
         doc: "PDFDocument",
-        pageid: object,
-        attrs: object,
+        pageid: int,
+        attrs: Dict,
         label: Optional[str],
-        page_number: int = 1,
+        page_idx: int = 0,
     ) -> None:
         """Initialize a page object.
 
         doc: a PDFDocument object.
-        pageid: any Python object that can uniquely identify the page.
+        pageid: the integer PDF object ID associated with the page in the page tree.
         attrs: a dictionary of page attributes.
         label: page label string.
-        page_number: page number (starting from 1)
+        page_idx: 0-based index of the page in the document.
         """
         self.doc = weakref.ref(doc)
         self.pageid = pageid
-        self.attrs = dict_value(attrs)
+        self.attrs = attrs
         self.label = label
-        self.page_number = page_number
+        self.page_idx = page_idx
         self.lastmod = resolve1(self.attrs.get("LastModified"))
         self.resources: Dict[object, object] = resolve1(
             self.attrs.get("Resources", dict()),
@@ -162,32 +155,16 @@ def __init__(
                 self.contents = [self.contents]
         else:
             self.contents = []
-        self._layout: Optional["LTPage"] = None
 
     @property
-    def layout(self) -> "LTPage":
-        if self._layout is not None:
-            return self._layout
-
-        doc = self.doc()
-        if doc is None:
-            raise RuntimeError("Document no longer exists!")
-        # Q: How many classes does does it take a Java programmer to
-        # install a lightbulb?
-        device = PDFLayoutAnalyzer(
-            doc.rsrcmgr,
-            pageno=self.page_number,
-        )
-        interpreter = PDFPageInterpreter(doc.rsrcmgr, device)
-        interpreter.process_page(self)
-        assert device.result is not None
-        self._layout = device.result
-        return self._layout
+    def layout(self) -> Iterator[LTComponent]:
+        return iter(PageInterpreter(self))
 
     def __repr__(self) -> str:
-        return f"<PDFPage: Resources={self.resources!r}, MediaBox={self.mediabox!r}>"
+        return f"<Page: Resources={self.resources!r}, MediaBox={self.mediabox!r}>"
 
 
+# FIXME: Make a dataclass or NamedTuple
 class PDFTextState:
     matrix: Matrix
     linematrix: Point
@@ -248,7 +225,7 @@ def reset(self) -> None:
 KEYWORD_EI = KWD(b"EI")
 
 
-class PDFContentParser(PSStackParser[Union[PSKeyword, PDFStream]]):
+class PDFContentParser(Parser[Union[PSKeyword, ContentStream]]):
     """Parse the concatenation of multiple content streams, as
     described in the spec (PDF 1.7, p.86):
 
@@ -265,20 +242,21 @@ def __init__(self, streams: Sequence[object]) -> None:
         self.streamiter = iter(streams)
         try:
             stream = stream_value(next(self.streamiter))
+            log.debug("PDFContentParser starting stream %r", stream)
+            super().__init__(stream.get_data())
         except StopIteration:
-            raise PSEOF
-        log.debug("PDFContentParser starting stream %r", stream)
-        super().__init__(stream.get_data())
+            log.debug("PDFContentParser has no content, returning nothing")
+            super().__init__(b"")
 
-    def __next__(self) -> Tuple[int, PSBaseParserToken]:
+    def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
         while True:
             try:
-                return super().__next__()
+                return super().nexttoken()
             except StopIteration:
                 # Will also raise StopIteration if there are no more,
                 # which is exactly what we want
                 stream = stream_value(next(self.streamiter))
-                log.debug("PDFContentParser starting stream %r", stream)
+                log.debug("PDFContentParser starting new stream %r", stream)
                 self.reinit(stream.get_data())
 
     def flush(self) -> None:
@@ -323,7 +301,7 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
                     (pos, data) = self.get_inline_data(target=eos)
                 if pos == -1:
                     raise PDFSyntaxError("End of inline stream %r not found" % eos)
-                obj = PDFStream(d, data)
+                obj = ContentStream(d, data)
                 self.push((pos, obj))
                 # This was included in the data but we need to "parse" it
                 if eos == b"EI":
@@ -335,592 +313,257 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
             self.push((pos, token))
 
 
-PDFStackT = PSStackType[PDFStream]
-"""Types that may appear on the PDF argument stack."""
+class PageInterpreter:
+    """Processor for the content of a PDF page
 
+    Reference: PDF Reference, Appendix A, Operator Summary
+    """
 
-class PDFLayoutAnalyzer:
-    cur_item: LTLayoutContainer
     ctm: Matrix
     cur_mcid: Optional[int] = None
     cur_tag: Optional[str] = None
 
     def __init__(
         self,
-        rsrcmgr: "PDFResourceManager",
-        pageno: int = 1,
+        page: Page,
+        resources: Union[Dict, None] = None,
+        contents: Union[List, None] = None,
     ) -> None:
-        self.rsrcmgr = rsrcmgr
-        self.pageno = pageno
-        self._stack: List[LTLayoutContainer] = []
-        self.result: Optional[LTPage] = None
+        self.page = page
+        self.contents = page.contents if contents is None else contents
+        (x0, y0, x1, y1) = page.mediabox
+        # FIXME: NO, this is bad, pdfplumber has a bug related to it
+        # (specifically the translation, the rotation is kind of okay
+        # it seems)
+        if page.rotate == 90:
+            ctm = (0, -1, 1, 0, -y0, x1)
+        elif page.rotate == 180:
+            ctm = (-1, 0, 0, -1, x1, y1)
+        elif page.rotate == 270:
+            ctm = (0, 1, -1, 0, y1, -x0)
+        else:
+            ctm = (1, 0, 0, 1, -x0, -y0)
+        self.init_resources(page, page.resources if resources is None else resources)
+        self.init_state(ctm)
+
+    def init_resources(self, page: Page, resources: Dict) -> None:
+        """Prepare the fonts and XObjects listed in the Resource attribute."""
+        self.resources = resources
+        self.fontmap: Dict[object, PDFFont] = {}
+        self.xobjmap = {}
+        self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy()
+        if not self.resources:
+            return
+        doc = page.doc()
+        if doc is None:
+            raise RuntimeError("Document no longer exists!")
+
+        def get_colorspace(spec: object) -> Optional[PDFColorSpace]:
+            if isinstance(spec, list):
+                name = literal_name(spec[0])
+            else:
+                name = literal_name(spec)
+            if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2:
+                return PDFColorSpace(name, stream_value(spec[1])["N"])
+            elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2:
+                return PDFColorSpace(name, len(list_value(spec[1])))
+            else:
+                return PREDEFINED_COLORSPACE.get(name)
+
+        for k, v in dict_value(self.resources).items():
+            log.debug("Resource: %r: %r", k, v)
+            if k == "Font":
+                for fontid, spec in dict_value(v).items():
+                    objid = None
+                    if isinstance(spec, ObjRef):
+                        objid = spec.objid
+                    spec = dict_value(spec)
+                    self.fontmap[fontid] = doc.get_font(objid, spec)
+            elif k == "ColorSpace":
+                for csid, spec in dict_value(v).items():
+                    colorspace = get_colorspace(resolve1(spec))
+                    if colorspace is not None:
+                        self.csmap[csid] = colorspace
+            elif k == "ProcSet":
+                pass  # called get_procset which did exactly
+                # nothing. perhaps we want to do something?
+            elif k == "XObject":
+                for xobjid, xobjstrm in dict_value(v).items():
+                    self.xobjmap[xobjid] = xobjstrm
 
-    def set_ctm(self, ctm: Matrix) -> None:
+    def init_state(self, ctm: Matrix) -> None:
+        """Initialize the text and graphic states for rendering a page."""
+        # gstack: stack for graphical states.
+        self.gstack: List[Tuple[Matrix, PDFTextState, PDFGraphicState]] = []
         self.ctm = ctm
+        self.textstate = PDFTextState()
+        self.graphicstate = PDFGraphicState()
+        self.curpath: List[PathSegment] = []
+        # argstack: stack for command arguments.
+        self.argstack: List[PDFStackT] = []
+        # set some global states.
+        self.scs: Optional[PDFColorSpace] = None
+        self.ncs: Optional[PDFColorSpace] = None
+        if self.csmap:
+            self.scs = self.ncs = next(iter(self.csmap.values()))
 
-    def begin_page(self, page: PDFPage, ctm: Matrix) -> None:
-        (x0, y0, x1, y1) = page.mediabox
-        (x0, y0) = apply_matrix_pt(ctm, (x0, y0))
-        (x1, y1) = apply_matrix_pt(ctm, (x1, y1))
-        mediabox = (0, 0, abs(x0 - x1), abs(y0 - y1))
-        self.cur_item = LTPage(self.pageno, mediabox)
-
-    def end_page(self, page: PDFPage) -> None:
-        assert not self._stack, str(len(self._stack))
-        assert isinstance(self.cur_item, LTPage), str(type(self.cur_item))
-        self.pageno += 1
-        self.receive_layout(self.cur_item)
-
-    def begin_figure(self, name: str, bbox: Rect, matrix: Matrix) -> None:
-        self._stack.append(self.cur_item)
-        self.cur_item = LTFigure(name, bbox, mult_matrix(matrix, self.ctm))
-
-    def end_figure(self, _: str) -> None:
-        fig = self.cur_item
-        assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
-        self.cur_item = self._stack.pop()
-        self.cur_item.add(fig)
+    def __iter__(self) -> Iterator[LTComponent]:
+        log.debug(
+            "PageInterpreter: resources=%r, streams=%r, ctm=%r",
+            self.resources,
+            self.contents,
+            self.ctm,
+        )
+        parser = PDFContentParser(self.contents)
+        for _, obj in parser:
+            if isinstance(obj, PSKeyword):
+                name = keyword_name(obj)
+                method = "do_%s" % name.replace("*", "_a").replace('"', "_w").replace(
+                    "'",
+                    "_q",
+                )
+                if hasattr(self, method):
+                    func = getattr(self, method)
+                    nargs = func.__code__.co_argcount - 1
+                    if nargs:
+                        args = self.pop(nargs)
+                        log.debug("exec: %s %r", name, args)
+                        if len(args) == nargs:
+                            gen = func(*args)
+                        else:
+                            error_msg = (
+                                "Insufficient arguments (%d) for operator: %r"
+                                % (len(args), name)
+                            )
+                            raise PDFInterpreterError(error_msg)
+                    else:
+                        log.debug("exec: %s", name)
+                        gen = func()
+                    if gen is not None:
+                        yield from gen
+                elif settings.STRICT:
+                    error_msg = "Unknown operator: %r" % name
+                    raise PDFInterpreterError(error_msg)
+            else:
+                self.push(obj)
 
-    def begin_tag(self, tag: PSLiteral, props: Optional[PDFStackT] = None) -> None:
-        """Handle beginning of tag, setting current MCID if any."""
-        self.cur_tag = decode_text(tag.name)
-        if isinstance(props, dict) and "MCID" in props:
-            self.cur_mcid = props["MCID"]
-        else:
-            self.cur_mcid = None
+    def push(self, obj: PDFStackT) -> None:
+        self.argstack.append(obj)
 
-    def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None:
-        pass
+    def pop(self, n: int) -> List[PDFStackT]:
+        if n == 0:
+            return []
+        x = self.argstack[-n:]
+        self.argstack = self.argstack[:-n]
+        return x
 
-    def end_tag(self) -> None:
-        """Handle beginning of tag, clearing current MCID."""
-        self.cur_tag = None
-        self.cur_mcid = None
+    def get_current_state(self) -> Tuple[Matrix, PDFTextState, PDFGraphicState]:
+        return (self.ctm, self.textstate.copy(), self.graphicstate.copy())
 
-    def add_item(self, item: LTComponent) -> None:
-        item.mcid = self.cur_mcid
-        item.tag = self.cur_tag
-        self.cur_item.add(item)
+    def set_current_state(
+        self,
+        state: Tuple[Matrix, PDFTextState, PDFGraphicState],
+    ) -> None:
+        (self.ctm, self.textstate, self.graphicstate) = state
 
-    def render_image(self, name: str, stream: PDFStream) -> None:
-        assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
-        item = LTImage(
-            name,
-            stream,
-            (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1),
-        )
-        self.add_item(item)
+    def do_q(self) -> None:
+        """Save graphics state"""
+        self.gstack.append(self.get_current_state())
 
-    def paint_path(
+    def do_Q(self) -> None:
+        """Restore graphics state"""
+        if self.gstack:
+            self.set_current_state(self.gstack.pop())
+
+    def do_cm(
         self,
-        gstate: PDFGraphicState,
-        stroke: bool,
-        fill: bool,
-        evenodd: bool,
-        path: Sequence[PathSegment],
-        ncs: Optional[PDFColorSpace] = None,
-        scs: Optional[PDFColorSpace] = None,
+        a1: PDFStackT,
+        b1: PDFStackT,
+        c1: PDFStackT,
+        d1: PDFStackT,
+        e1: PDFStackT,
+        f1: PDFStackT,
     ) -> None:
-        """Paint paths described in section 4.4 of the PDF reference manual"""
-        shape = "".join(x[0] for x in path)
+        """Concatenate matrix to current transformation matrix"""
+        self.ctm = mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm)
 
-        if shape[:1] != "m":
-            # Per PDF Reference Section 4.4.1, "path construction operators may
-            # be invoked in any sequence, but the first one invoked must be m
-            # or re to begin a new subpath." Since pdfminer.six already
-            # converts all `re` (rectangle) operators to their equivelent
-            # `mlllh` representation, paths ingested by `.paint_path(...)` that
-            # do not begin with the `m` operator are invalid.
-            pass
+    def do_w(self, linewidth: PDFStackT) -> None:
+        """Set line width"""
+        self.graphicstate.linewidth = cast(float, linewidth)
 
-        elif shape.count("m") > 1:
-            # recurse if there are multiple m's in this shape
-            for m in re.finditer(r"m[^m]+", shape):
-                subpath = path[m.start(0) : m.end(0)]
-                self.paint_path(gstate, stroke, fill, evenodd, subpath, ncs, scs)
+    def do_J(self, linecap: PDFStackT) -> None:
+        """Set line cap style"""
+        self.graphicstate.linecap = linecap
 
-        else:
-            # Although the 'h' command does not not literally provide a
-            # point-position, its position is (by definition) equal to the
-            # subpath's starting point.
-            #
-            # And, per Section 4.4's Table 4.9, all other path commands place
-            # their point-position in their final two arguments. (Any preceding
-            # arguments represent control points on Bézier curves.)
-            raw_pts = [
-                cast(Point, p[-2:] if p[0] != "h" else path[0][-2:]) for p in path
-            ]
-            pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts]
+    def do_j(self, linejoin: PDFStackT) -> None:
+        """Set line join style"""
+        self.graphicstate.linejoin = linejoin
 
-            operators = [str(operation[0]) for operation in path]
-            transformed_points = [
-                [
-                    apply_matrix_pt(self.ctm, (float(operand1), float(operand2)))
-                    for operand1, operand2 in zip(operation[1::2], operation[2::2])
-                ]
-                for operation in path
-            ]
-            transformed_path = [
-                cast(PathSegment, (o, *p))
-                for o, p in zip(operators, transformed_points)
-            ]
+    def do_M(self, miterlimit: PDFStackT) -> None:
+        """Set miter limit"""
+        self.graphicstate.miterlimit = miterlimit
 
-            if shape in {"mlh", "ml"}:
-                # single line segment
-                #
-                # Note: 'ml', in conditional above, is a frequent anomaly
-                # that we want to support.
-                line = LTLine(
-                    gstate.linewidth,
-                    pts[0],
-                    pts[1],
-                    stroke,
-                    fill,
-                    evenodd,
-                    gstate.scolor,
-                    gstate.ncolor,
-                    original_path=transformed_path,
-                    dashing_style=gstate.dash,
-                    ncs=ncs,
-                    scs=scs,
-                )
-                self.add_item(line)
+    def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None:
+        """Set line dash pattern"""
+        self.graphicstate.dash = (dash, phase)
 
-            elif shape in {"mlllh", "mllll"}:
-                (x0, y0), (x1, y1), (x2, y2), (x3, y3), _ = pts
+    def do_ri(self, intent: PDFStackT) -> None:
+        """Set color rendering intent"""
+        self.graphicstate.intent = intent
 
-                is_closed_loop = pts[0] == pts[4]
-                has_square_coordinates = (
-                    x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0
-                ) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)
-                if is_closed_loop and has_square_coordinates:
-                    rect = LTRect(
-                        gstate.linewidth,
-                        (*pts[0], *pts[2]),
-                        stroke,
-                        fill,
-                        evenodd,
-                        gstate.scolor,
-                        gstate.ncolor,
-                        transformed_path,
-                        gstate.dash,
-                        ncs,
-                        scs,
-                    )
-                    self.add_item(rect)
-                else:
-                    curve = LTCurve(
-                        gstate.linewidth,
-                        pts,
-                        stroke,
-                        fill,
-                        evenodd,
-                        gstate.scolor,
-                        gstate.ncolor,
-                        transformed_path,
-                        gstate.dash,
-                        ncs,
-                        scs,
-                    )
-                    self.add_item(curve)
-            else:
-                curve = LTCurve(
-                    gstate.linewidth,
-                    pts,
-                    stroke,
-                    fill,
-                    evenodd,
-                    gstate.scolor,
-                    gstate.ncolor,
-                    transformed_path,
-                    gstate.dash,
-                    ncs,
-                    scs,
-                )
-                self.add_item(curve)
+    def do_i(self, flatness: PDFStackT) -> None:
+        """Set flatness tolerance"""
+        self.graphicstate.flatness = flatness
 
-    def render_char(
-        self,
-        matrix: Matrix,
-        font: PDFFont,
-        fontsize: float,
-        scaling: float,
-        rise: float,
-        cid: int,
-        ncs: PDFColorSpace,
-        graphicstate: PDFGraphicState,
-        scs: Optional[PDFColorSpace] = None,
-    ) -> float:
-        try:
-            text = font.to_unichr(cid)
-            assert isinstance(text, str), str(type(text))
-        except PDFUnicodeNotDefined:
-            text = self.handle_undefined_char(font, cid)
-        textwidth = font.char_width(cid)
-        textdisp = font.char_disp(cid)
-        item = LTChar(
-            matrix,
-            font,
-            fontsize,
-            scaling,
-            rise,
-            text,
-            textwidth,
-            textdisp,
-            ncs,
-            graphicstate,
-            scs,
-            graphicstate.scolor,
-            graphicstate.ncolor,
-        )
-        self.add_item(item)
-        return item.adv
+    def do_gs(self, name: PDFStackT) -> None:
+        """Set parameters from graphics state parameter dictionary"""
+        # TODO
 
-    def render_string(
-        self,
-        textstate: "PDFTextState",
-        seq: PDFTextSeq,
-        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState",
-        scs: Optional[PDFColorSpace] = None,
-    ) -> None:
-        assert self.ctm is not None
-        matrix = mult_matrix(textstate.matrix, self.ctm)
-        font = textstate.font
-        fontsize = textstate.fontsize
-        scaling = textstate.scaling * 0.01
-        charspace = textstate.charspace * scaling
-        wordspace = textstate.wordspace * scaling
-        rise = textstate.rise
-        assert font is not None
-        if font.is_multibyte():
-            wordspace = 0
-        dxscale = 0.001 * fontsize * scaling
-        if font.is_vertical():
-            textstate.linematrix = self.render_string_vertical(
-                seq,
-                matrix,
-                textstate.linematrix,
-                font,
-                fontsize,
-                scaling,
-                charspace,
-                wordspace,
-                rise,
-                dxscale,
-                ncs,
-                graphicstate,
-                scs,
-            )
-        else:
-            textstate.linematrix = self.render_string_horizontal(
-                seq,
-                matrix,
-                textstate.linematrix,
-                font,
-                fontsize,
-                scaling,
-                charspace,
-                wordspace,
-                rise,
-                dxscale,
-                ncs,
-                graphicstate,
-                scs,
-            )
+    def do_m(self, x: PDFStackT, y: PDFStackT) -> None:
+        """Begin new subpath"""
+        self.curpath.append(("m", cast(float, x), cast(float, y)))
 
-    def render_string_horizontal(
-        self,
-        seq: PDFTextSeq,
-        matrix: Matrix,
-        pos: Point,
-        font: PDFFont,
-        fontsize: float,
-        scaling: float,
-        charspace: float,
-        wordspace: float,
-        rise: float,
-        dxscale: float,
-        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState",
-        scs: Optional[PDFColorSpace] = None,
-    ) -> Point:
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if isinstance(obj, (int, float)):
-                x -= obj * dxscale
-                needcharspace = True
-            else:
-                if isinstance(obj, str):
-                    obj = make_compat_bytes(obj)
-                if not isinstance(obj, bytes):
-                    continue
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        x += charspace
-                    x += self.render_char(
-                        translate_matrix(matrix, (x, y)),
-                        font,
-                        fontsize,
-                        scaling,
-                        rise,
-                        cid,
-                        ncs,
-                        graphicstate,
-                        scs,
-                    )
-                    if cid == 32 and wordspace:
-                        x += wordspace
-                    needcharspace = True
-        return (x, y)
+    def do_l(self, x: PDFStackT, y: PDFStackT) -> None:
+        """Append straight line segment to path"""
+        self.curpath.append(("l", cast(float, x), cast(float, y)))
 
-    def render_string_vertical(
+    def do_c(
         self,
-        seq: PDFTextSeq,
-        matrix: Matrix,
-        pos: Point,
-        font: PDFFont,
-        fontsize: float,
-        scaling: float,
-        charspace: float,
-        wordspace: float,
-        rise: float,
-        dxscale: float,
-        ncs: PDFColorSpace,
-        graphicstate: "PDFGraphicState",
-        scs: Optional[PDFColorSpace] = None,
-    ) -> Point:
-        (x, y) = pos
-        needcharspace = False
-        for obj in seq:
-            if isinstance(obj, (int, float)):
-                y -= obj * dxscale
-                needcharspace = True
-            else:
-                if isinstance(obj, str):
-                    obj = make_compat_bytes(obj)
-                if not isinstance(obj, bytes):
-                    continue
-                for cid in font.decode(obj):
-                    if needcharspace:
-                        y += charspace
-                    y += self.render_char(
-                        translate_matrix(matrix, (x, y)),
-                        font,
-                        fontsize,
-                        scaling,
-                        rise,
-                        cid,
-                        ncs,
-                        graphicstate,
-                        scs,
-                    )
-                    if cid == 32 and wordspace:
-                        y += wordspace
-                    needcharspace = True
-        return (x, y)
-
-    def handle_undefined_char(self, font: PDFFont, cid: int) -> str:
-        log.debug("undefined: %r, %r", font, cid)
-        return "(cid:%d)" % cid
-
-    def receive_layout(self, ltpage: LTPage) -> None:
-        self.result = ltpage
-
-
-class PDFPageInterpreter:
-    """Processor for the content of a PDF page
+        x1: PDFStackT,
+        y1: PDFStackT,
+        x2: PDFStackT,
+        y2: PDFStackT,
+        x3: PDFStackT,
+        y3: PDFStackT,
+    ) -> None:
+        """Append curved segment to path (three control points)"""
+        self.curpath.append(
+            (
+                "c",
+                cast(float, x1),
+                cast(float, y1),
+                cast(float, x2),
+                cast(float, y2),
+                cast(float, x3),
+                cast(float, y3),
+            ),
+        )
 
-    Reference: PDF Reference, Appendix A, Operator Summary
-    """
+    def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None:
+        """Append curved segment to path (initial point replicated)"""
+        self.curpath.append(
+            ("v", cast(float, x2), cast(float, y2), cast(float, x3), cast(float, y3)),
+        )
 
-    def __init__(
-        self, rsrcmgr: "PDFResourceManager", device: "PDFLayoutAnalyzer"
-    ) -> None:
-        self.rsrcmgr = rsrcmgr
-        self.device = device
+    def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None:
+        """Append curved segment to path (final point replicated)"""
+        self.curpath.append(
+            ("y", cast(float, x1), cast(float, y1), cast(float, x3), cast(float, y3)),
+        )
 
-    def dup(self) -> "PDFPageInterpreter":
-        return self.__class__(self.rsrcmgr, self.device)
-
-    def init_resources(self, resources: Dict[object, object]) -> None:
-        """Prepare the fonts and XObjects listed in the Resource attribute."""
-        self.resources = resources
-        self.fontmap: Dict[object, PDFFont] = {}
-        self.xobjmap = {}
-        self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy()
-        if not resources:
-            return
-
-        def get_colorspace(spec: object) -> Optional[PDFColorSpace]:
-            if isinstance(spec, list):
-                name = literal_name(spec[0])
-            else:
-                name = literal_name(spec)
-            if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2:
-                return PDFColorSpace(name, stream_value(spec[1])["N"])
-            elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2:
-                return PDFColorSpace(name, len(list_value(spec[1])))
-            else:
-                return PREDEFINED_COLORSPACE.get(name)
-
-        for k, v in dict_value(resources).items():
-            log.debug("Resource: %r: %r", k, v)
-            if k == "Font":
-                for fontid, spec in dict_value(v).items():
-                    objid = None
-                    if isinstance(spec, PDFObjRef):
-                        objid = spec.objid
-                    spec = dict_value(spec)
-                    self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
-            elif k == "ColorSpace":
-                for csid, spec in dict_value(v).items():
-                    colorspace = get_colorspace(resolve1(spec))
-                    if colorspace is not None:
-                        self.csmap[csid] = colorspace
-            elif k == "ProcSet":
-                self.rsrcmgr.get_procset(list_value(v))
-            elif k == "XObject":
-                for xobjid, xobjstrm in dict_value(v).items():
-                    self.xobjmap[xobjid] = xobjstrm
-
-    def init_state(self, ctm: Matrix) -> None:
-        """Initialize the text and graphic states for rendering a page."""
-        # gstack: stack for graphical states.
-        self.gstack: List[Tuple[Matrix, PDFTextState, PDFGraphicState]] = []
-        self.ctm = ctm
-        self.device.set_ctm(self.ctm)
-        self.textstate = PDFTextState()
-        self.graphicstate = PDFGraphicState()
-        self.curpath: List[PathSegment] = []
-        # argstack: stack for command arguments.
-        self.argstack: List[PDFStackT] = []
-        # set some global states.
-        self.scs: Optional[PDFColorSpace] = None
-        self.ncs: Optional[PDFColorSpace] = None
-        if self.csmap:
-            self.scs = self.ncs = next(iter(self.csmap.values()))
-
-    def push(self, obj: PDFStackT) -> None:
-        self.argstack.append(obj)
-
-    def pop(self, n: int) -> List[PDFStackT]:
-        if n == 0:
-            return []
-        x = self.argstack[-n:]
-        self.argstack = self.argstack[:-n]
-        return x
-
-    def get_current_state(self) -> Tuple[Matrix, PDFTextState, PDFGraphicState]:
-        return (self.ctm, self.textstate.copy(), self.graphicstate.copy())
-
-    def set_current_state(
-        self,
-        state: Tuple[Matrix, PDFTextState, PDFGraphicState],
-    ) -> None:
-        (self.ctm, self.textstate, self.graphicstate) = state
-        self.device.set_ctm(self.ctm)
-
-    def do_q(self) -> None:
-        """Save graphics state"""
-        self.gstack.append(self.get_current_state())
-
-    def do_Q(self) -> None:
-        """Restore graphics state"""
-        if self.gstack:
-            self.set_current_state(self.gstack.pop())
-
-    def do_cm(
-        self,
-        a1: PDFStackT,
-        b1: PDFStackT,
-        c1: PDFStackT,
-        d1: PDFStackT,
-        e1: PDFStackT,
-        f1: PDFStackT,
-    ) -> None:
-        """Concatenate matrix to current transformation matrix"""
-        self.ctm = mult_matrix(cast(Matrix, (a1, b1, c1, d1, e1, f1)), self.ctm)
-        self.device.set_ctm(self.ctm)
-
-    def do_w(self, linewidth: PDFStackT) -> None:
-        """Set line width"""
-        self.graphicstate.linewidth = cast(float, linewidth)
-
-    def do_J(self, linecap: PDFStackT) -> None:
-        """Set line cap style"""
-        self.graphicstate.linecap = linecap
-
-    def do_j(self, linejoin: PDFStackT) -> None:
-        """Set line join style"""
-        self.graphicstate.linejoin = linejoin
-
-    def do_M(self, miterlimit: PDFStackT) -> None:
-        """Set miter limit"""
-        self.graphicstate.miterlimit = miterlimit
-
-    def do_d(self, dash: PDFStackT, phase: PDFStackT) -> None:
-        """Set line dash pattern"""
-        self.graphicstate.dash = (dash, phase)
-
-    def do_ri(self, intent: PDFStackT) -> None:
-        """Set color rendering intent"""
-        self.graphicstate.intent = intent
-
-    def do_i(self, flatness: PDFStackT) -> None:
-        """Set flatness tolerance"""
-        self.graphicstate.flatness = flatness
-
-    def do_gs(self, name: PDFStackT) -> None:
-        """Set parameters from graphics state parameter dictionary"""
-        # TODO
-
-    def do_m(self, x: PDFStackT, y: PDFStackT) -> None:
-        """Begin new subpath"""
-        self.curpath.append(("m", cast(float, x), cast(float, y)))
-
-    def do_l(self, x: PDFStackT, y: PDFStackT) -> None:
-        """Append straight line segment to path"""
-        self.curpath.append(("l", cast(float, x), cast(float, y)))
-
-    def do_c(
-        self,
-        x1: PDFStackT,
-        y1: PDFStackT,
-        x2: PDFStackT,
-        y2: PDFStackT,
-        x3: PDFStackT,
-        y3: PDFStackT,
-    ) -> None:
-        """Append curved segment to path (three control points)"""
-        self.curpath.append(
-            (
-                "c",
-                cast(float, x1),
-                cast(float, y1),
-                cast(float, x2),
-                cast(float, y2),
-                cast(float, x3),
-                cast(float, y3),
-            ),
-        )
-
-    def do_v(self, x2: PDFStackT, y2: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None:
-        """Append curved segment to path (initial point replicated)"""
-        self.curpath.append(
-            ("v", cast(float, x2), cast(float, y2), cast(float, x3), cast(float, y3)),
-        )
-
-    def do_y(self, x1: PDFStackT, y1: PDFStackT, x3: PDFStackT, y3: PDFStackT) -> None:
-        """Append curved segment to path (final point replicated)"""
-        self.curpath.append(
-            ("y", cast(float, x1), cast(float, y1), cast(float, x3), cast(float, y3)),
-        )
-
-    def do_h(self) -> None:
-        """Close subpath"""
-        self.curpath.append(("h",))
+    def do_h(self) -> None:
+        """Close subpath"""
+        self.curpath.append(("h",))
 
     def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None:
         """Append rectangle to path"""
@@ -934,21 +577,21 @@ def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None:
         self.curpath.append(("l", x, y + h))
         self.curpath.append(("h",))
 
-    def do_S(self) -> None:
+    def do_S(self) -> Iterator[LTComponent]:
         """Stroke path"""
-        self.device.paint_path(
+        yield from self.paint_path(
             self.graphicstate, True, False, False, self.curpath, self.ncs, self.scs
         )
         self.curpath = []
 
-    def do_s(self) -> None:
+    def do_s(self) -> Iterator[LTComponent]:
         """Close and stroke path"""
         self.do_h()
-        self.do_S()
+        yield from self.do_S()
 
-    def do_f(self) -> None:
+    def do_f(self) -> Iterator[LTComponent]:
         """Fill path using nonzero winding number rule"""
-        self.device.paint_path(
+        yield from self.paint_path(
             self.graphicstate, False, True, False, self.curpath, self.ncs, self.scs
         )
         self.curpath = []
@@ -956,36 +599,36 @@ def do_f(self) -> None:
     def do_F(self) -> None:
         """Fill path using nonzero winding number rule (obsolete)"""
 
-    def do_f_a(self) -> None:
+    def do_f_a(self) -> Iterator[LTComponent]:
         """Fill path using even-odd rule"""
-        self.device.paint_path(
+        yield from self.paint_path(
             self.graphicstate, False, True, True, self.curpath, self.ncs, self.scs
         )
         self.curpath = []
 
-    def do_B(self) -> None:
+    def do_B(self) -> Iterator[LTComponent]:
         """Fill and stroke path using nonzero winding number rule"""
-        self.device.paint_path(
+        yield from self.paint_path(
             self.graphicstate, True, True, False, self.curpath, self.ncs, self.scs
         )
         self.curpath = []
 
-    def do_B_a(self) -> None:
+    def do_B_a(self) -> Iterator[LTComponent]:
         """Fill and stroke path using even-odd rule"""
-        self.device.paint_path(
+        yield from self.paint_path(
             self.graphicstate, True, True, True, self.curpath, self.ncs, self.scs
         )
         self.curpath = []
 
-    def do_b(self) -> None:
+    def do_b(self) -> Iterator[LTComponent]:
         """Close, fill, and stroke path using nonzero winding number rule"""
         self.do_h()
-        self.do_B()
+        yield from self.do_B()
 
-    def do_b_a(self) -> None:
+    def do_b_a(self) -> Iterator[LTComponent]:
         """Close, fill, and stroke path using even-odd rule"""
         self.do_h()
-        self.do_B_a()
+        yield from self.do_B_a()
 
     def do_n(self) -> None:
         """End path without filling or stroking"""
@@ -1107,23 +750,23 @@ def do_EX(self) -> None:
 
     def do_MP(self, tag: PDFStackT) -> None:
         """Define marked-content point"""
-        self.device.do_tag(cast(PSLiteral, tag))
+        self.do_tag(cast(PSLiteral, tag))
 
     def do_DP(self, tag: PDFStackT, props: PDFStackT) -> None:
         """Define marked-content point with property list"""
-        self.device.do_tag(cast(PSLiteral, tag), props)
+        self.do_tag(cast(PSLiteral, tag), props)
 
     def do_BMC(self, tag: PDFStackT) -> None:
         """Begin marked-content sequence"""
-        self.device.begin_tag(cast(PSLiteral, tag))
+        self.begin_tag(cast(PSLiteral, tag))
 
     def do_BDC(self, tag: PDFStackT, props: PDFStackT) -> None:
         """Begin marked-content sequence with property list"""
-        self.device.begin_tag(cast(PSLiteral, tag), props)
+        self.begin_tag(cast(PSLiteral, tag), props)
 
     def do_EMC(self) -> None:
         """End marked-content sequence"""
-        self.device.end_tag()
+        self.end_tag()
 
     def do_Tc(self, space: PDFStackT) -> None:
         """Set character spacing.
@@ -1171,7 +814,10 @@ def do_Tf(self, fontid: PDFStackT, fontsize: PDFStackT) -> None:
         except KeyError:
             if settings.STRICT:
                 raise PDFInterpreterError("Undefined Font id: %r" % fontid)
-            self.textstate.font = self.rsrcmgr.get_font(None, {})
+            doc = self.page.doc()
+            if doc is None:
+                raise RuntimeError("Document no longer exists!")
+            self.textstate.font = doc.get_font(None, {})
         self.textstate.fontsize = cast(float, fontsize)
 
     def do_Tr(self, render: PDFStackT) -> None:
@@ -1252,7 +898,7 @@ def do_T_a(self) -> None:
         )
         self.textstate.linematrix = (0, 0)
 
-    def do_TJ(self, seq: PDFStackT) -> None:
+    def do_TJ(self, seq: PDFStackT) -> Iterator[LTComponent]:
         """Show text, allowing individual glyph positioning"""
         if self.textstate.font is None:
             if settings.STRICT:
@@ -1261,7 +907,7 @@ def do_TJ(self, seq: PDFStackT) -> None:
         # FIXME: Are we sure?
         assert self.ncs is not None
         assert self.scs is not None
-        self.device.render_string(
+        yield from self.render_string(
             self.textstate,
             cast(PDFTextSeq, seq),
             self.ncs,
@@ -1269,26 +915,28 @@ def do_TJ(self, seq: PDFStackT) -> None:
             self.scs,
         )
 
-    def do_Tj(self, s: PDFStackT) -> None:
+    def do_Tj(self, s: PDFStackT) -> Iterator[LTComponent]:
         """Show text"""
-        self.do_TJ([s])
+        yield from self.do_TJ([s])
 
-    def do__q(self, s: PDFStackT) -> None:
+    def do__q(self, s: PDFStackT) -> Iterator[LTComponent]:
         """Move to next line and show text
 
         The ' (single quote) operator.
         """
         self.do_T_a()
-        self.do_TJ([s])
+        yield from self.do_TJ([s])
 
-    def do__w(self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT) -> None:
+    def do__w(
+        self, aw: PDFStackT, ac: PDFStackT, s: PDFStackT
+    ) -> Iterator[LTComponent]:
         """Set word and character spacing, move to next line, and show text
 
         The " (double quote) operator.
         """
         self.do_Tw(aw)
         self.do_Tc(ac)
-        self.do_TJ([s])
+        yield from self.do_TJ([s])
 
     def do_BI(self) -> None:
         """Begin inline image object"""
@@ -1296,15 +944,15 @@ def do_BI(self) -> None:
     def do_ID(self) -> None:
         """Begin inline image data"""
 
-    def do_EI(self, obj: PDFStackT) -> None:
+    def do_EI(self, obj: PDFStackT) -> Iterator[LTComponent]:
         """End inline image object"""
-        if isinstance(obj, PDFStream) and "W" in obj and "H" in obj:
+        if isinstance(obj, ContentStream) and "W" in obj and "H" in obj:
             iobjid = str(id(obj))
-            self.device.begin_figure(iobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
-            self.device.render_image(iobjid, obj)
-            self.device.end_figure(iobjid)
+            fig = LTFigure(iobjid, (0, 0, 1, 1), self.ctm)
+            fig.add(self.render_image(iobjid, obj, fig))
+            yield fig
 
-    def do_Do(self, xobjid_arg: PDFStackT) -> None:
+    def do_Do(self, xobjid_arg: PDFStackT) -> Iterator[LTComponent]:
         """Invoke named XObject"""
         xobjid = literal_name(xobjid_arg)
         try:
@@ -1316,7 +964,6 @@ def do_Do(self, xobjid_arg: PDFStackT) -> None:
         log.debug("Processing xobj: %r", xobj)
         subtype = xobj.get("Subtype")
         if subtype is LITERAL_FORM and "BBox" in xobj:
-            interpreter = self.dup()
             bbox = cast(Rect, list_value(xobj["BBox"]))
             matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY)))
             # According to PDF reference 1.7 section 4.9.1, XObjects in
@@ -1324,92 +971,367 @@ def do_Do(self, xobjid_arg: PDFStackT) -> None:
             # instead of having their own Resources entry.
             xobjres = xobj.get("Resources")
             if xobjres:
-                resources = dict_value(xobjres)
+                interpreter = PageInterpreter(
+                    self.page, resources=dict_value(xobjres), contents=[xobj]
+                )
             else:
-                resources = self.resources.copy()
-            self.device.begin_figure(xobjid, bbox, matrix)
-            interpreter.render_contents(
-                resources,
-                [xobj],
-                ctm=mult_matrix(matrix, self.ctm),
-            )
-            self.device.end_figure(xobjid)
+                interpreter = PageInterpreter(self.page, contents=[xobj])
+            interpreter.ctm = mult_matrix(matrix, self.ctm)
+            fig = LTFigure(xobjid, bbox, interpreter.ctm)
+            for item in interpreter:
+                fig.add(item)
+            yield fig
         elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj:
-            self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
-            self.device.render_image(xobjid, xobj)
-            self.device.end_figure(xobjid)
+            fig = LTFigure(xobjid, (0, 0, 1, 1), self.ctm)
+            fig.add(self.render_image(xobjid, xobj, fig))
+            yield fig
         else:
             # unsupported xobject type.
             pass
 
-    def process_page(self, page: PDFPage) -> None:
-        log.debug("Processing page: %r", page)
-        (x0, y0, x1, y1) = page.mediabox
-        # FIXME: NO, this is bad, pdfplumber has a bug related to it
-        # (specifically the translation, the rotation is kind of okay
-        # it seems)
-        if page.rotate == 90:
-            ctm = (0, -1, 1, 0, -y0, x1)
-        elif page.rotate == 180:
-            ctm = (-1, 0, 0, -1, x1, y1)
-        elif page.rotate == 270:
-            ctm = (0, 1, -1, 0, y1, -x0)
+    def begin_tag(self, tag: PSLiteral, props: Optional[PDFStackT] = None) -> None:
+        """Handle beginning of tag, setting current MCID if any."""
+        self.cur_tag = decode_text(tag.name)
+        if isinstance(props, dict) and "MCID" in props:
+            self.cur_mcid = props["MCID"]
         else:
-            ctm = (1, 0, 0, 1, -x0, -y0)
-        self.device.begin_page(page, ctm)
-        self.render_contents(page.resources, page.contents, ctm=ctm)
-        self.device.end_page(page)
+            self.cur_mcid = None
 
-    def render_contents(
-        self,
-        resources: Dict[object, object],
-        streams: Sequence[object],
-        ctm: Matrix = MATRIX_IDENTITY,
-    ) -> None:
-        """Render the content streams.
+    def do_tag(self, tag: PSLiteral, props: Optional["PDFStackT"] = None) -> None:
+        pass
 
-        This method may be called recursively.
-        """
-        log.debug(
-            "render_contents: resources=%r, streams=%r, ctm=%r",
-            resources,
-            streams,
-            ctm,
+    def end_tag(self) -> None:
+        """Handle beginning of tag, clearing current MCID."""
+        self.cur_tag = None
+        self.cur_mcid = None
+
+    def render_image(
+        self, name: str, stream: ContentStream, figure: LTFigure
+    ) -> LTImage:
+        return LTImage(
+            name,
+            stream,
+            (figure.x0, figure.y0, figure.x1, figure.y1),
         )
-        self.init_resources(resources)
-        self.init_state(ctm)
-        self.execute(list_value(streams))
 
-    def execute(self, streams: Sequence[object]) -> None:
-        try:
-            parser = PDFContentParser(streams)
-        except PSEOF:
-            # empty page
-            return
-        while True:
-            try:
-                (_, obj) = parser.nextobject()
-            except PSEOF:
-                break
-            if isinstance(obj, PSKeyword):
-                name = keyword_name(obj)
-                method = "do_%s" % name.replace("*", "_a").replace('"', "_w").replace(
-                    "'",
-                    "_q",
+    def paint_path(
+        self,
+        gstate: PDFGraphicState,
+        stroke: bool,
+        fill: bool,
+        evenodd: bool,
+        path: Sequence[PathSegment],
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
+    ) -> Iterator[LTComponent]:
+        """Paint paths described in section 4.4 of the PDF reference manual"""
+        shape = "".join(x[0] for x in path)
+
+        if shape[:1] != "m":
+            # Per PDF Reference Section 4.4.1, "path construction operators may
+            # be invoked in any sequence, but the first one invoked must be m
+            # or re to begin a new subpath." Since pdfminer.six already
+            # converts all `re` (rectangle) operators to their equivelent
+            # `mlllh` representation, paths ingested by `.paint_path(...)` that
+            # do not begin with the `m` operator are invalid.
+            pass
+
+        elif shape.count("m") > 1:
+            # recurse if there are multiple m's in this shape
+            for m in re.finditer(r"m[^m]+", shape):
+                subpath = path[m.start(0) : m.end(0)]
+                yield from self.paint_path(
+                    gstate, stroke, fill, evenodd, subpath, ncs, scs
                 )
-                if hasattr(self, method):
-                    func = getattr(self, method)
-                    nargs = func.__code__.co_argcount - 1
-                    if nargs:
-                        args = self.pop(nargs)
-                        log.debug("exec: %s %r", name, args)
-                        if len(args) == nargs:
-                            func(*args)
-                    else:
-                        log.debug("exec: %s", name)
-                        func()
-                elif settings.STRICT:
-                    error_msg = "Unknown operator: %r" % name
-                    raise PDFInterpreterError(error_msg)
-            else:
-                self.push(obj)
+
+        else:
+            # Although the 'h' command does not not literally provide a
+            # point-position, its position is (by definition) equal to the
+            # subpath's starting point.
+            #
+            # And, per Section 4.4's Table 4.9, all other path commands place
+            # their point-position in their final two arguments. (Any preceding
+            # arguments represent control points on Bézier curves.)
+            raw_pts = [
+                cast(Point, p[-2:] if p[0] != "h" else path[0][-2:]) for p in path
+            ]
+            pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts]
+
+            operators = [str(operation[0]) for operation in path]
+            transformed_points = [
+                [
+                    apply_matrix_pt(self.ctm, (float(operand1), float(operand2)))
+                    for operand1, operand2 in zip(operation[1::2], operation[2::2])
+                ]
+                for operation in path
+            ]
+            transformed_path = [
+                cast(PathSegment, (o, *p))
+                for o, p in zip(operators, transformed_points)
+            ]
+
+            if shape in {"mlh", "ml"}:
+                # single line segment
+                #
+                # Note: 'ml', in conditional above, is a frequent anomaly
+                # that we want to support.
+                line = LTLine(
+                    gstate.linewidth,
+                    pts[0],
+                    pts[1],
+                    stroke,
+                    fill,
+                    evenodd,
+                    gstate.scolor,
+                    gstate.ncolor,
+                    original_path=transformed_path,
+                    dashing_style=gstate.dash,
+                    ncs=ncs,
+                    scs=scs,
+                )
+                yield line
+
+            elif shape in {"mlllh", "mllll"}:
+                (x0, y0), (x1, y1), (x2, y2), (x3, y3), _ = pts
+
+                is_closed_loop = pts[0] == pts[4]
+                has_square_coordinates = (
+                    x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0
+                ) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)
+                if is_closed_loop and has_square_coordinates:
+                    rect = LTRect(
+                        gstate.linewidth,
+                        (*pts[0], *pts[2]),
+                        stroke,
+                        fill,
+                        evenodd,
+                        gstate.scolor,
+                        gstate.ncolor,
+                        transformed_path,
+                        gstate.dash,
+                        ncs,
+                        scs,
+                    )
+                    yield rect
+                else:
+                    curve = LTCurve(
+                        gstate.linewidth,
+                        pts,
+                        stroke,
+                        fill,
+                        evenodd,
+                        gstate.scolor,
+                        gstate.ncolor,
+                        transformed_path,
+                        gstate.dash,
+                        ncs,
+                        scs,
+                    )
+                    yield curve
+            else:
+                curve = LTCurve(
+                    gstate.linewidth,
+                    pts,
+                    stroke,
+                    fill,
+                    evenodd,
+                    gstate.scolor,
+                    gstate.ncolor,
+                    transformed_path,
+                    gstate.dash,
+                    ncs,
+                    scs,
+                )
+                yield curve
+
+    def render_char(
+        self,
+        matrix: Matrix,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        rise: float,
+        cid: int,
+        ncs: PDFColorSpace,
+        graphicstate: PDFGraphicState,
+        scs: Optional[PDFColorSpace] = None,
+    ) -> LTChar:
+        try:
+            text = font.to_unichr(cid)
+            assert isinstance(text, str), str(type(text))
+        except PDFUnicodeNotDefined:
+            text = self.handle_undefined_char(font, cid)
+        textwidth = font.char_width(cid)
+        textdisp = font.char_disp(cid)
+        item = LTChar(
+            matrix,
+            font,
+            fontsize,
+            scaling,
+            rise,
+            text,
+            textwidth,
+            textdisp,
+            ncs,
+            graphicstate,
+            scs,
+            graphicstate.scolor,
+            graphicstate.ncolor,
+        )
+        return item
+
+    def render_string(
+        self,
+        textstate: "PDFTextState",
+        seq: PDFTextSeq,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState",
+        scs: Optional[PDFColorSpace] = None,
+    ) -> Iterator[LTComponent]:
+        assert self.ctm is not None
+        matrix = mult_matrix(textstate.matrix, self.ctm)
+        font = textstate.font
+        fontsize = textstate.fontsize
+        scaling = textstate.scaling * 0.01
+        charspace = textstate.charspace * scaling
+        wordspace = textstate.wordspace * scaling
+        rise = textstate.rise
+        assert font is not None
+        if font.is_multibyte():
+            wordspace = 0
+        dxscale = 0.001 * fontsize * scaling
+        if font.is_vertical():
+            textstate.linematrix, chars = self.render_string_vertical(
+                seq,
+                matrix,
+                textstate.linematrix,
+                font,
+                fontsize,
+                scaling,
+                charspace,
+                wordspace,
+                rise,
+                dxscale,
+                ncs,
+                graphicstate,
+                scs,
+            )
+        else:
+            textstate.linematrix, chars = self.render_string_horizontal(
+                seq,
+                matrix,
+                textstate.linematrix,
+                font,
+                fontsize,
+                scaling,
+                charspace,
+                wordspace,
+                rise,
+                dxscale,
+                ncs,
+                graphicstate,
+                scs,
+            )
+        yield from chars
+
+    def render_string_horizontal(
+        self,
+        seq: PDFTextSeq,
+        matrix: Matrix,
+        pos: Point,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        charspace: float,
+        wordspace: float,
+        rise: float,
+        dxscale: float,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState",
+        scs: Optional[PDFColorSpace] = None,
+    ) -> Tuple[Point, List[LTChar]]:
+        (x, y) = pos
+        needcharspace = False
+        chars = []
+        for obj in seq:
+            if isinstance(obj, (int, float)):
+                x -= obj * dxscale
+                needcharspace = True
+            else:
+                if isinstance(obj, str):
+                    obj = make_compat_bytes(obj)
+                if not isinstance(obj, bytes):
+                    continue
+                for cid in font.decode(obj):
+                    if needcharspace:
+                        x += charspace
+                    item = self.render_char(
+                        translate_matrix(matrix, (x, y)),
+                        font,
+                        fontsize,
+                        scaling,
+                        rise,
+                        cid,
+                        ncs,
+                        graphicstate,
+                        scs,
+                    )
+                    x += item.adv
+                    chars.append(item)
+                    if cid == 32 and wordspace:
+                        x += wordspace
+                    needcharspace = True
+        return ((x, y), chars)
+
+    def render_string_vertical(
+        self,
+        seq: PDFTextSeq,
+        matrix: Matrix,
+        pos: Point,
+        font: PDFFont,
+        fontsize: float,
+        scaling: float,
+        charspace: float,
+        wordspace: float,
+        rise: float,
+        dxscale: float,
+        ncs: PDFColorSpace,
+        graphicstate: "PDFGraphicState",
+        scs: Optional[PDFColorSpace] = None,
+    ) -> Tuple[Point, List[LTChar]]:
+        (x, y) = pos
+        needcharspace = False
+        chars = []
+        for obj in seq:
+            if isinstance(obj, (int, float)):
+                y -= obj * dxscale
+                needcharspace = True
+            else:
+                if isinstance(obj, str):
+                    obj = make_compat_bytes(obj)
+                if not isinstance(obj, bytes):
+                    continue
+                for cid in font.decode(obj):
+                    if needcharspace:
+                        y += charspace
+                    item = self.render_char(
+                        translate_matrix(matrix, (x, y)),
+                        font,
+                        fontsize,
+                        scaling,
+                        rise,
+                        cid,
+                        ncs,
+                        graphicstate,
+                        scs,
+                    )
+                    chars.append(item)
+                    y += item.adv
+                    if cid == 32 and wordspace:
+                        y += wordspace
+                    needcharspace = True
+        return ((x, y), chars)
+
+    def handle_undefined_char(self, font: PDFFont, cid: int) -> str:
+        log.debug("undefined: %r, %r", font, cid)
+        return "(cid:%d)" % cid
diff --git a/playa/parser.py b/playa/parser.py
new file mode 100644
index 00000000..f13e5da4
--- /dev/null
+++ b/playa/parser.py
@@ -0,0 +1,621 @@
+import logging
+import mmap
+import re
+import weakref
+from binascii import unhexlify
+from collections import deque
+from typing import (
+    TYPE_CHECKING,
+    Deque,
+    Dict,
+    Generic,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+)
+
+from playa import settings
+from playa.casting import safe_int
+from playa.exceptions import PDFSyntaxError, PSException, PSSyntaxError, PSTypeError
+from playa.pdftypes import (
+    KWD,
+    LIT,
+    ContentStream,
+    ObjRef,
+    PSKeyword,
+    PSLiteral,
+    dict_value,
+    int_value,
+    literal_name,
+    name_str,
+)
+from playa.utils import choplist
+
+log = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from playa.document import PDFDocument
+
+# Intern a bunch of important keywords
+KEYWORD_PROC_BEGIN = KWD(b"{")
+KEYWORD_PROC_END = KWD(b"}")
+KEYWORD_ARRAY_BEGIN = KWD(b"[")
+KEYWORD_ARRAY_END = KWD(b"]")
+KEYWORD_DICT_BEGIN = KWD(b"<<")
+KEYWORD_DICT_END = KWD(b">>")
+KEYWORD_GT = KWD(b">")
+KEYWORD_R = KWD(b"R")
+KEYWORD_NULL = KWD(b"null")
+KEYWORD_ENDOBJ = KWD(b"endobj")
+KEYWORD_STREAM = KWD(b"stream")
+KEYWORD_XREF = KWD(b"xref")
+KEYWORD_STARTXREF = KWD(b"startxref")
+KEYWORD_OBJ = KWD(b"obj")
+KEYWORD_TRAILER = KWD(b"trailer")
+
+
+EOL = b"\r\n"
+WHITESPACE = b" \t\n\r\f\v"
+NUMBER = b"0123456789"
+HEX = NUMBER + b"abcdef" + b"ABCDEF"
+NOTLITERAL = b"#/%[]()<>{}" + WHITESPACE
+NOTKEYWORD = b"#/%[]()<>{}" + WHITESPACE
+NOTSTRING = b"()\\"
+OCTAL = b"01234567"
+ESC_STRING = {
+    b"b": 8,
+    b"t": 9,
+    b"n": 10,
+    b"f": 12,
+    b"r": 13,
+    b"(": 40,
+    b")": 41,
+    b"\\": 92,
+}
+
+
+PSBaseParserToken = Union[float, bool, PSLiteral, PSKeyword, bytes]
+LEXER = re.compile(
+    rb"""(?:
+      (?P<whitespace> \s+)
+    | (?P<comment> %[^\r\n]*[\r\n])
+    | (?P<name> /(?: \#[A-Fa-f\d][A-Fa-f\d] | [^#/%\[\]()<>{}\s])+ )
+    | (?P<number> [-+]? (?: \d*\.\d+ | \d+ ) )
+    | (?P<keyword> [A-Za-z] [^#/%\[\]()<>{}\s]*)
+    | (?P<startstr> \([^()\\]*)
+    | (?P<hexstr> <[A-Fa-f\d\s]*>)
+    | (?P<startdict> <<)
+    | (?P<enddict> >>)
+    | (?P<other> .)
+)
+""",
+    re.VERBOSE,
+)
+STRLEXER = re.compile(
+    rb"""(?:
+      (?P<octal> \\[0-7]{1,3})
+    | (?P<linebreak> \\(?:\r\n?|\n))
+    | (?P<escape> \\.)
+    | (?P<parenleft> \()
+    | (?P<parenright> \))
+    | (?P<newline> \r\n?|\n)
+    | (?P<other> .)
+)""",
+    re.VERBOSE,
+)
+HEXDIGIT = re.compile(rb"#([A-Fa-f\d][A-Fa-f\d])")
+EOLR = re.compile(rb"\r\n?|\n")
+SPC = re.compile(rb"\s")
+
+
+class Lexer:
+    """Lexer for PDF data."""
+
+    def __init__(self, data: Union[bytes, mmap.mmap]) -> None:
+        self.data = data
+        self.pos = 0
+        self.end = len(data)
+        self._tokens: Deque[Tuple[int, PSBaseParserToken]] = deque()
+
+    def seek(self, pos: int) -> None:
+        """Seek to a position and reinitialize parser state."""
+        self.pos = pos
+        self._curtoken = b""
+        self._curtokenpos = 0
+        self._tokens.clear()
+
+    def tell(self) -> int:
+        """Get the current position in the buffer."""
+        return self.pos
+
+    def read(self, objlen: int) -> bytes:
+        """Read data from current position, advancing to the end of
+        this data."""
+        pos = self.pos
+        self.pos = min(pos + objlen, len(self.data))
+        return self.data[pos : self.pos]
+
+    def iter_lines(self) -> Iterator[Tuple[int, bytes]]:
+        r"""Iterate over lines that end either with \r, \n, or \r\n,
+        starting at the current position."""
+        while self.pos < self.end:
+            linepos = self.pos
+            m = EOLR.search(self.data, self.pos)
+            if m is None:
+                self.pos = self.end
+            else:
+                self.pos = m.end()
+            yield (linepos, self.data[linepos : self.pos])
+
+    def reverse_iter_lines(self) -> Iterator[bytes]:
+        """Iterate backwards over lines starting at the current position.
+
+        This is used to locate the trailers at the end of a file.
+        """
+        endline = self.pos
+        while True:
+            nidx = self.data.rfind(b"\n", 0, self.pos)
+            ridx = self.data.rfind(b"\r", 0, self.pos)
+            best = max(nidx, ridx)
+            if best == -1:
+                yield self.data[:endline]
+                break
+            yield self.data[best + 1 : endline]
+            endline = best + 1
+            self.pos = best
+            if self.pos > 0 and self.data[self.pos - 1 : self.pos + 1] == b"\r\n":
+                self.pos -= 1
+
+    def get_inline_data(
+        self, target: bytes = b"EI", blocksize: int = -1
+    ) -> Tuple[int, bytes]:
+        """Get the data for an inline image up to the target
+        end-of-stream marker.
+
+        Returns a tuple of the position of the target in the data and the
+        data *including* the end of stream marker.  Advances the file
+        pointer to a position after the end of the stream.
+
+        The caller is responsible for removing the end-of-stream if
+        necessary (this depends on the filter being used) and parsing
+        the end-of-stream token (likewise) if necessary.
+        """
+        tpos = self.data.find(target, self.pos)
+        if tpos != -1:
+            nextpos = tpos + len(target)
+            result = (tpos, self.data[self.pos : nextpos])
+            self.pos = nextpos
+            return result
+        return (-1, b"")
+
+    def __iter__(self) -> Iterator[Tuple[int, PSBaseParserToken]]:
+        """Iterate over tokens."""
+        return self
+
+    def __next__(self) -> Tuple[int, PSBaseParserToken]:
+        """Get the next token in iteration, raising StopIteration when
+        done."""
+        while True:
+            m = LEXER.match(self.data, self.pos)
+            if m is None:  # can only happen at EOS
+                raise StopIteration
+            self._curtokenpos = m.start()
+            self.pos = m.end()
+            if m.lastgroup not in ("whitespace", "comment"):  # type: ignore
+                # Okay, we got a token or something
+                break
+        self._curtoken = m[0]
+        if m.lastgroup == "name":  # type: ignore
+            self._curtoken = m[0][1:]
+            self._curtoken = HEXDIGIT.sub(
+                lambda x: bytes((int(x[1], 16),)), self._curtoken
+            )
+            tok = LIT(name_str(self._curtoken))
+            return (self._curtokenpos, tok)
+        if m.lastgroup == "number":  # type: ignore
+            if b"." in self._curtoken:
+                return (self._curtokenpos, float(self._curtoken))
+            else:
+                return (self._curtokenpos, int(self._curtoken))
+        if m.lastgroup == "startdict":  # type: ignore
+            return (self._curtokenpos, KEYWORD_DICT_BEGIN)
+        if m.lastgroup == "enddict":  # type: ignore
+            return (self._curtokenpos, KEYWORD_DICT_END)
+        if m.lastgroup == "startstr":  # type: ignore
+            return self._parse_endstr(self.data[m.start() + 1 : m.end()], m.end())
+        if m.lastgroup == "hexstr":  # type: ignore
+            self._curtoken = SPC.sub(b"", self._curtoken[1:-1])
+            if len(self._curtoken) % 2 == 1:
+                self._curtoken += b"0"
+            return (self._curtokenpos, unhexlify(self._curtoken))
+        # Anything else is treated as a keyword (whether explicitly matched or not)
+        if self._curtoken == b"true":
+            return (self._curtokenpos, True)
+        elif self._curtoken == b"false":
+            return (self._curtokenpos, False)
+        else:
+            return (self._curtokenpos, KWD(self._curtoken))
+
+    def _parse_endstr(self, start: bytes, pos: int) -> Tuple[int, PSBaseParserToken]:
+        """Parse the remainder of a string."""
+        # Handle nonsense CRLF conversion in strings (PDF 1.7, p.15)
+        parts = [EOLR.sub(b"\n", start)]
+        paren = 1
+        for m in STRLEXER.finditer(self.data, pos):
+            self.pos = m.end()
+            if m.lastgroup == "parenright":  # type: ignore
+                paren -= 1
+                if paren == 0:
+                    # By far the most common situation!
+                    break
+                parts.append(m[0])
+            elif m.lastgroup == "parenleft":  # type: ignore
+                parts.append(m[0])
+                paren += 1
+            elif m.lastgroup == "escape":  # type: ignore
+                chr = m[0][1:2]
+                if chr not in ESC_STRING:
+                    log.warning("Unrecognized escape %r", m[0])
+                    parts.append(chr)
+                else:
+                    parts.append(bytes((ESC_STRING[chr],)))
+            elif m.lastgroup == "octal":  # type: ignore
+                chrcode = int(m[0][1:], 8)
+                if chrcode >= 256:
+                    # PDF1.7 p.16: "high-order overflow shall be
+                    # ignored."
+                    log.warning("Invalid octal %r (%d)", m[0][1:], chrcode)
+                else:
+                    parts.append(bytes((chrcode,)))
+            elif m.lastgroup == "newline":  # type: ignore
+                # Handle nonsense CRLF conversion in strings (PDF 1.7, p.15)
+                parts.append(b"\n")
+            elif m.lastgroup == "linebreak":  # type: ignore
+                pass
+            else:
+                parts.append(m[0])
+        if paren != 0:
+            log.warning("Unterminated string at %d", pos)
+            raise StopIteration
+        return (self._curtokenpos, b"".join(parts))
+
+
+# Stack slots may by occupied by any of:
+#  * the name of a literal
+#  * the PSBaseParserToken types
+#  * list (via KEYWORD_ARRAY)
+#  * dict (via KEYWORD_DICT)
+#  * subclass-specific extensions (e.g. PDFStream, PDFObjRef) via ExtraT
+ExtraT = TypeVar("ExtraT")
+PSStackType = Union[str, float, bool, PSLiteral, bytes, List, Dict, ExtraT]
+PSStackEntry = Tuple[int, PSStackType[ExtraT]]
+PDFStackT = PSStackType[ContentStream]  # FIXME: Not entirely correct here
+
+
+class Parser(Generic[ExtraT]):
+    """Basic parser for PDF objects in a bytes-like object."""
+
+    def __init__(self, data: Union[bytes, mmap.mmap]) -> None:
+        self.reinit(data)
+
+    def reinit(self, data: Union[bytes, mmap.mmap]) -> None:
+        """Reinitialize with new data (FIXME: Should go away, use a
+        new parser for each stream as it's clearer and safer)"""
+        self._lexer = Lexer(data)
+        self.reset()
+
+    def reset(self) -> None:
+        """Reset parser state."""
+        self.context: List[Tuple[int, Optional[str], List[PSStackEntry[ExtraT]]]] = []
+        self.curtype: Optional[str] = None
+        self.curstack: List[PSStackEntry[ExtraT]] = []
+        self.results: List[PSStackEntry[ExtraT]] = []
+
+    def push(self, *objs: PSStackEntry[ExtraT]) -> None:
+        """Push some objects onto the stack."""
+        self.curstack.extend(objs)
+
+    def pop(self, n: int) -> List[PSStackEntry[ExtraT]]:
+        """Pop some objects off the stack."""
+        objs = self.curstack[-n:]
+        self.curstack[-n:] = []
+        return objs
+
+    def popall(self) -> List[PSStackEntry[ExtraT]]:
+        """Pop all the things off the stack."""
+        objs = self.curstack
+        self.curstack = []
+        return objs
+
+    def add_results(self, *objs: PSStackEntry[ExtraT]) -> None:
+        """Move some objects to the output."""
+        try:
+            log.debug("add_results: %r", objs)
+        except Exception:
+            log.debug("add_results: (unprintable object)")
+        self.results.extend(objs)
+
+    def start_type(self, pos: int, type: str) -> None:
+        """Start a composite object (array, dict, etc)."""
+        self.context.append((pos, self.curtype, self.curstack))
+        (self.curtype, self.curstack) = (type, [])
+        log.debug("start_type: pos=%r, type=%r", pos, type)
+
+    def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]:
+        """End a composite object (array, dict, etc)."""
+        if self.curtype != type:
+            raise PSTypeError(f"Type mismatch: {self.curtype!r} != {type!r}")
+        objs = [obj for (_, obj) in self.curstack]
+        (pos, self.curtype, self.curstack) = self.context.pop()
+        log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs)
+        return (pos, objs)
+
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
+        """Handle a PDF keyword."""
+        pass
+
+    def flush(self) -> None:
+        """Add objects from stack to output (or, actually, not)."""
+        return
+
+    def __next__(self) -> PSStackEntry[ExtraT]:
+        """Return the next object, raising StopIteration at EOF.
+
+        Arrays and dictionaries are represented as Python lists and
+        dictionaries.
+        """
+        while not self.results:
+            (pos, token) = self.nexttoken()
+            if isinstance(token, (int, float, bool, str, bytes, PSLiteral)):
+                # normal token
+                self.push((pos, token))
+            elif token == KEYWORD_ARRAY_BEGIN:
+                # begin array
+                self.start_type(pos, "a")
+            elif token == KEYWORD_ARRAY_END:
+                # end array
+                try:
+                    self.push(self.end_type("a"))
+                except PSTypeError:
+                    if settings.STRICT:
+                        raise
+            elif token == KEYWORD_DICT_BEGIN:
+                # begin dictionary
+                self.start_type(pos, "d")
+            elif token == KEYWORD_DICT_END:
+                # end dictionary
+                try:
+                    (pos, objs) = self.end_type("d")
+                    if len(objs) % 2 != 0:
+                        error_msg = "Invalid dictionary construct: %r" % objs
+                        raise PSSyntaxError(error_msg)
+                    d = {
+                        literal_name(k): v
+                        for (k, v) in choplist(2, objs)
+                        if v is not None
+                    }
+                    self.push((pos, d))
+                except PSTypeError:
+                    if settings.STRICT:
+                        raise
+            elif token == KEYWORD_PROC_BEGIN:
+                # begin proc
+                self.start_type(pos, "p")
+            elif token == KEYWORD_PROC_END:
+                # end proc
+                try:
+                    self.push(self.end_type("p"))
+                except PSTypeError:
+                    if settings.STRICT:
+                        raise
+            elif isinstance(token, PSKeyword):
+                log.debug(
+                    "do_keyword: pos=%r, token=%r, stack=%r",
+                    pos,
+                    token,
+                    self.curstack,
+                )
+                self.do_keyword(pos, token)
+            else:
+                log.error(
+                    "unknown token: pos=%r, token=%r, stack=%r",
+                    pos,
+                    token,
+                    self.curstack,
+                )
+                self.do_keyword(pos, token)
+                raise PSException
+            if self.context:
+                continue
+            else:
+                self.flush()
+        pos, obj = self.results.pop(0)
+        try:
+            log.debug("__next__: object at %d: %r", pos, obj)
+        except Exception:
+            log.debug("__next__: (unprintable object) at %d", pos)
+        return pos, obj
+
+    def __iter__(self) -> Iterator[PSStackEntry[ExtraT]]:
+        """Iterate over (position, object) tuples, raising StopIteration at EOF."""
+        return self
+
+    @property
+    def tokens(self) -> Iterator[Tuple[int, PSBaseParserToken]]:
+        """Iterate over (position, token) tuples, raising StopIteration at EOF."""
+        return self._lexer
+
+    # Delegation follows
+    def seek(self, pos: int) -> None:
+        """Seek to a position and reset parser state."""
+        self._lexer.seek(pos)
+        self.reset()
+
+    def tell(self) -> int:
+        """Get the current position in the file."""
+        return self._lexer.tell()
+
+    @property
+    def end(self) -> int:
+        """End (or size) of file, for use with seek()."""
+        return self._lexer.end
+
+    def iter_lines(self) -> Iterator[Tuple[int, bytes]]:
+        r"""Iterate over lines that end either with \r, \n, or \r\n."""
+        return self._lexer.iter_lines()
+
+    def reverse_iter_lines(self) -> Iterator[bytes]:
+        """Iterate over lines starting at the end of the file
+
+        This is used to locate the trailers at the end of a file.
+        """
+        return self._lexer.reverse_iter_lines()
+
+    def read(self, objlen: int) -> bytes:
+        """Read data from a specified position, moving the current
+        position to the end of this data."""
+        return self._lexer.read(objlen)
+
+    def get_inline_data(self, target: bytes = b"EI") -> Tuple[int, bytes]:
+        """Get the data for an inline image up to the target
+        end-of-stream marker."""
+        return self._lexer.get_inline_data(target)
+
+    def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
+        """Get the next token in iteration, raising StopIteration when
+        done."""
+        return next(self._lexer)
+
+
+class PDFParser(Parser[Union[PSKeyword, ContentStream, ObjRef, None]]):
+    """PDFParser fetches PDF objects from a file stream.
+    It holds a weak reference to the document in order to
+    resolve indirect references.  If the document is deleted
+    then this will obviously no longer work.
+
+    Typical usage:
+      parser = PDFParser(fp, doc)
+      parser.seek(offset)
+      for object in parser:
+          ...
+
+    """
+
+    def __init__(self, data: Union[bytes, mmap.mmap], doc: "PDFDocument") -> None:
+        super().__init__(data)
+        self.doc = weakref.ref(doc)
+        self.fallback = False
+
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
+        """Handles PDF-related keywords."""
+        if token in (KEYWORD_XREF, KEYWORD_STARTXREF):
+            self.add_results(*self.pop(1))
+
+        elif token is KEYWORD_ENDOBJ:
+            # objid genno "obj" ... and the object itself
+            self.add_results(*self.pop(4))
+
+        elif token is KEYWORD_NULL:
+            # null object
+            self.push((pos, None))
+
+        elif token is KEYWORD_R:
+            # reference to indirect object
+            if len(self.curstack) >= 2:
+                (_, _object_id), _ = self.pop(2)
+                object_id = safe_int(_object_id)
+                if object_id is not None:
+                    obj = ObjRef(self.doc, object_id)
+                    self.push((pos, obj))
+
+        elif token is KEYWORD_STREAM:
+            # stream dictionary, which precedes "stream"
+            ((_, dic),) = self.pop(1)
+            dic = dict_value(dic)
+            objlen = 0
+            if not self.fallback:
+                try:
+                    objlen = int_value(dic["Length"])
+                except KeyError:
+                    if settings.STRICT:
+                        raise PDFSyntaxError("/Length is undefined: %r" % dic)
+            # back up and read the entire line including 'stream' as
+            # the data starts after the trailing newline
+            self.seek(pos)
+            try:
+                _, line = next(self.iter_lines())  # 'stream\n'
+            except StopIteration:
+                if settings.STRICT:
+                    raise PDFSyntaxError("Unexpected EOF")
+                return
+            pos = self.tell()
+            data = self.read(objlen)
+            # FIXME: This is ... not really the right way to do this.
+            for linepos, line in self.iter_lines():
+                if b"endstream" in line:
+                    i = line.index(b"endstream")
+                    objlen += i
+                    if self.fallback:
+                        data += line[:i]
+                    break
+                objlen += len(line)
+                if self.fallback:
+                    data += line
+            self.seek(pos + objlen)
+            # XXX limit objlen not to exceed object boundary
+            log.debug(
+                "ContentStream: pos=%d, objlen=%d, dic=%r, data=%r...",
+                pos,
+                objlen,
+                dic,
+                data[:10],
+            )
+            doc = self.doc()
+            if doc is None:
+                raise RuntimeError("Document no longer exists!")
+            stream = ContentStream(dic, bytes(data), doc.decipher)
+            self.push((pos, stream))
+
+        else:
+            # others
+            self.push((pos, token))
+
+
+class ContentStreamParser(PDFParser):
+    """StreamParser is used to parse PDF content streams and object
+    streams.  These have slightly different rules for how objects are
+    described than the top-level PDF file contents.
+    """
+
+    def __init__(self, data: bytes, doc: "PDFDocument") -> None:
+        super().__init__(data, doc)
+
+    def flush(self) -> None:
+        self.add_results(*self.popall())
+
+    def do_keyword(self, pos: int, token: PSKeyword) -> None:
+        if token is KEYWORD_R:
+            # reference to indirect object
+            try:
+                (_, _object_id), _ = self.pop(2)
+            except ValueError:
+                raise PDFSyntaxError(
+                    "Expected generation and object id in indirect object reference"
+                )
+            object_id = safe_int(_object_id)
+            if object_id is not None:
+                obj = ObjRef(self.doc, object_id)
+                self.push((pos, obj))
+            return
+
+        elif token in (KEYWORD_OBJ, KEYWORD_ENDOBJ):
+            if settings.STRICT:
+                # See PDF Spec 3.4.6: Only the object values are stored in the
+                # stream; the obj and endobj keywords are not used.
+                raise PDFSyntaxError("Keyword endobj found in stream")
+            return
+
+        # others
+        self.push((pos, token))
diff --git a/playa/pdfparser.py b/playa/pdfparser.py
deleted file mode 100644
index e02b486b..00000000
--- a/playa/pdfparser.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import logging
-import weakref
-from typing import TYPE_CHECKING, BinaryIO, Union
-
-from playa import settings
-from playa.casting import safe_int
-from playa.exceptions import PSEOF, PDFSyntaxError
-from playa.pdftypes import PDFObjRef, PDFStream, dict_value, int_value
-from playa.psparser import KWD, PSKeyword, PSStackParser
-
-if TYPE_CHECKING:
-    from playa.pdfdocument import PDFDocument
-
-log = logging.getLogger(__name__)
-
-# Important keywords
-KEYWORD_R = KWD(b"R")
-KEYWORD_NULL = KWD(b"null")
-KEYWORD_ENDOBJ = KWD(b"endobj")
-KEYWORD_STREAM = KWD(b"stream")
-KEYWORD_XREF = KWD(b"xref")
-KEYWORD_STARTXREF = KWD(b"startxref")
-KEYWORD_OBJ = KWD(b"obj")
-
-
-# PDFParser stack holds all the base types plus PDFStream, PDFObjRef, and None
-class PDFParser(PSStackParser[Union[PSKeyword, PDFStream, PDFObjRef, None]]):
-    """PDFParser fetch PDF objects from a file stream.
-    It can handle indirect references by referring to
-    a PDF document set by set_document method.
-    It also reads XRefs at the end of every PDF file.
-
-    Typical usage:
-      parser = PDFParser(fp)
-      parser.read_xref()
-      parser.read_xref(fallback=True) # optional
-      parser.set_document(doc)
-      parser.seek(offset)
-      parser.nextobject()
-
-    """
-
-    def __init__(self, data: Union[BinaryIO, bytes], doc: "PDFDocument") -> None:
-        super().__init__(data)
-        self.doc = weakref.ref(doc)
-        self.fallback = False
-
-    def do_keyword(self, pos: int, token: PSKeyword) -> None:
-        """Handles PDF-related keywords."""
-        if token in (KEYWORD_XREF, KEYWORD_STARTXREF):
-            self.add_results(*self.pop(1))
-
-        elif token is KEYWORD_ENDOBJ:
-            self.add_results(*self.pop(4))
-
-        elif token is KEYWORD_NULL:
-            # null object
-            self.push((pos, None))
-
-        elif token is KEYWORD_R:
-            # reference to indirect object
-            if len(self.curstack) >= 2:
-                (_, _object_id), _ = self.pop(2)
-                object_id = safe_int(_object_id)
-                if object_id is not None:
-                    obj = PDFObjRef(self.doc, object_id)
-                    self.push((pos, obj))
-
-        elif token is KEYWORD_STREAM:
-            # stream object
-            ((_, dic),) = self.pop(1)
-            dic = dict_value(dic)
-            objlen = 0
-            if not self.fallback:
-                try:
-                    objlen = int_value(dic["Length"])
-                except KeyError:
-                    if settings.STRICT:
-                        raise PDFSyntaxError("/Length is undefined: %r" % dic)
-            # back up and read the entire line including 'stream' as
-            # the data starts after the trailing newline
-            self.seek(pos)
-            try:
-                (_, line) = self.nextline()  # 'stream\n'
-            except PSEOF:
-                if settings.STRICT:
-                    raise PDFSyntaxError("Unexpected EOF")
-                return
-            pos = self.tell()
-            data = self.read(objlen)
-            while True:
-                try:
-                    (linepos, line) = self.nextline()
-                except PSEOF:
-                    if settings.STRICT:
-                        raise PDFSyntaxError("Unexpected EOF")
-                    break
-                if b"endstream" in line:
-                    i = line.index(b"endstream")
-                    objlen += i
-                    if self.fallback:
-                        data += line[:i]
-                    break
-                objlen += len(line)
-                if self.fallback:
-                    data += line
-            self.seek(pos + objlen)
-            # XXX limit objlen not to exceed object boundary
-            log.debug(
-                "Stream: pos=%d, objlen=%d, dic=%r, data=%r...",
-                pos,
-                objlen,
-                dic,
-                data[:10],
-            )
-            doc = self.doc()
-            if doc is None:
-                raise RuntimeError("Document no longer exists!")
-            stream = PDFStream(dic, bytes(data), doc.decipher)
-            self.push((pos, stream))
-
-        else:
-            # others
-            self.push((pos, token))
-
-
-class PDFStreamParser(PDFParser):
-    """PDFStreamParser is used to parse PDF content streams
-    that is contained in each page and has instructions
-    for rendering the page. A reference to a PDF document is
-    needed because a PDF content stream can also have
-    indirect references to other objects in the same document.
-    """
-
-    def __init__(self, data: bytes, doc: "PDFDocument") -> None:
-        super().__init__(data, doc)
-
-    def flush(self) -> None:
-        self.add_results(*self.popall())
-
-    def do_keyword(self, pos: int, token: PSKeyword) -> None:
-        if token is KEYWORD_R:
-            # reference to indirect object
-            try:
-                (_, _object_id), _ = self.pop(2)
-            except ValueError:
-                raise PDFSyntaxError(
-                    "Expected generation and object id in indirect object reference"
-                )
-            object_id = safe_int(_object_id)
-            if object_id is not None:
-                obj = PDFObjRef(self.doc, object_id)
-                self.push((pos, obj))
-            return
-
-        elif token in (KEYWORD_OBJ, KEYWORD_ENDOBJ):
-            if settings.STRICT:
-                # See PDF Spec 3.4.6: Only the object values are stored in the
-                # stream; the obj and endobj keywords are not used.
-                raise PDFSyntaxError("Keyword endobj found in stream")
-            return
-
-        # others
-        self.push((pos, token))
diff --git a/playa/pdftypes.py b/playa/pdftypes.py
index 8c4b71f7..f52d9b42 100644
--- a/playa/pdftypes.py
+++ b/playa/pdftypes.py
@@ -6,11 +6,14 @@
     TYPE_CHECKING,
     Any,
     Dict,
+    Generic,
     Iterable,
     List,
     Optional,
     Protocol,
     Tuple,
+    Type,
+    TypeVar,
     Union,
     cast,
 )
@@ -23,19 +26,85 @@
     PDFNotImplementedError,
     PDFTypeError,
     PDFValueError,
+    PSTypeError,
 )
 from playa.lzw import lzwdecode
-from playa.psparser import LIT
 from playa.runlength import rldecode
 from playa.utils import apply_png_predictor
 
 if TYPE_CHECKING:
-    from playa.pdfdocument import PDFDocument
+    from playa.document import PDFDocument
 
 logger = logging.getLogger(__name__)
 
-LITERAL_CRYPT = LIT("Crypt")
 
+class PSLiteral:
+    """A class that represents a PostScript literal.
+
+    Postscript literals are used as identifiers, such as
+    variable names, property names and dictionary keys.
+    Literals are case sensitive and denoted by a preceding
+    slash sign (e.g. "/Name")
+
+    Note: Do not create an instance of PSLiteral directly.
+    Always use PSLiteralTable.intern().
+    """
+
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    def __repr__(self) -> str:
+        return "/%r" % self.name
+
+
+class PSKeyword:
+    """A class that represents a PostScript keyword.
+
+    PostScript keywords are a dozen of predefined words.
+    Commands and directives in PostScript are expressed by keywords.
+    They are also used to denote the content boundaries.
+
+    Note: Do not create an instance of PSKeyword directly.
+    Always use PSKeywordTable.intern().
+    """
+
+    def __init__(self, name: bytes) -> None:
+        self.name = name
+
+    def __repr__(self) -> str:
+        return "/%r" % self.name
+
+
+_SymbolT = TypeVar("_SymbolT", PSLiteral, PSKeyword)
+_NameT = TypeVar("_NameT", str, bytes)
+
+
+class PSSymbolTable(Generic[_SymbolT, _NameT]):
+    """Store globally unique name objects or language keywords."""
+
+    def __init__(self, table_type: Type[_SymbolT], name_type: Type[_NameT]) -> None:
+        self.dict: Dict[_NameT, _SymbolT] = {}
+        self.table_type: Type[_SymbolT] = table_type
+        self.name_type: Type[_NameT] = name_type
+
+    def intern(self, name: _NameT) -> _SymbolT:
+        if not isinstance(name, self.name_type):
+            raise ValueError(f"{self.table_type} can only store {self.name_type}")
+        if name in self.dict:
+            lit = self.dict[name]
+        else:
+            lit = self.table_type(name)  # type: ignore
+        self.dict[name] = lit
+        return lit
+
+
+PSLiteralTable = PSSymbolTable(PSLiteral, str)
+PSKeywordTable = PSSymbolTable(PSKeyword, bytes)
+LIT = PSLiteralTable.intern
+KWD = PSKeywordTable.intern
+
+# Intern a bunch of important literals
+LITERAL_CRYPT = LIT("Crypt")
 # Abbreviation of Filter names in PDF 4.8.6. "Inline Images"
 LITERALS_FLATE_DECODE = (LIT("FlateDecode"), LIT("Fl"))
 LITERALS_LZW_DECODE = (LIT("LZWDecode"), LIT("LZW"))
@@ -48,6 +117,51 @@
 LITERALS_JPX_DECODE = (LIT("JPXDecode"),)
 
 
+def name_str(x: bytes) -> str:
+    """Get the string representation for a name object.
+
+    According to the PDF 1.7 spec (p.18):
+
+    > Ordinarily, the bytes making up the name are never treated as
+    > text to be presented to a human user or to an application
+    > external to a conforming reader. However, occasionally the need
+    > arises to treat a name object as text... In such situations, the
+    > sequence of bytes (after expansion of NUMBER SIGN sequences, if
+    > any) should be interpreted according to UTF-8.
+
+    Accordingly, if they *can* be decoded to UTF-8, then they *will*
+    be, and if not, we will just decode them as ISO-8859-1 since that
+    gives a unique (if possibly nonsensical) value for an 8-bit string.
+    """
+    try:
+        return x.decode("utf-8")
+    except UnicodeDecodeError:
+        return x.decode("iso-8859-1")
+
+
+def literal_name(x: Any) -> str:
+    if not isinstance(x, PSLiteral):
+        if settings.STRICT:
+            raise PSTypeError(f"Literal required: {x!r}")
+        return str(x)
+    else:
+        return x.name
+
+
+def keyword_name(x: Any) -> str:
+    if not isinstance(x, PSKeyword):
+        if settings.STRICT:
+            raise PSTypeError("Keyword required: %r" % x)
+        else:
+            return str(x)
+    else:
+        # PDF keywords are *not* UTF-8 (they aren't ISO-8859-1 either,
+        # but this isn't very important, we just want some
+        # unique representation of 8-bit characters, as above)
+        name = x.name.decode("iso-8859-1")
+    return name
+
+
 class DecipherCallable(Protocol):
     """Fully typed a decipher callback, with optional parameter."""
 
@@ -64,7 +178,7 @@ def __call__(
 _DEFAULT = object()
 
 
-class PDFObjRef:
+class ObjRef:
     def __init__(
         self,
         doc: weakref.ReferenceType["PDFDocument"],
@@ -83,7 +197,7 @@ def __init__(
         self.objid = objid
 
     def __repr__(self) -> str:
-        return "<PDFObjRef:%d>" % (self.objid)
+        return "<ObjRef:%d>" % (self.objid)
 
     def resolve(self, default: object = None) -> Any:
         doc = self.doc()
@@ -101,7 +215,7 @@ def resolve1(x: object, default: object = None) -> Any:
     If this is an array or dictionary, it may still contains
     some indirect objects inside.
     """
-    while isinstance(x, PDFObjRef):
+    while isinstance(x, ObjRef):
         x = x.resolve(default=default)
     return x
 
@@ -112,7 +226,7 @@ def resolve_all(x: object, default: object = None) -> Any:
     Make sure there is no indirect reference within the nested object.
     This procedure might be slow.
     """
-    while isinstance(x, PDFObjRef):
+    while isinstance(x, ObjRef):
         x = x.resolve(default=default)
     if isinstance(x, list):
         x = [resolve_all(v, default=default) for v in x]
@@ -140,7 +254,7 @@ def int_value(x: object) -> int:
     x = resolve1(x)
     if not isinstance(x, int):
         if settings.STRICT:
-            raise PDFTypeError("Integer required: %r" % x)
+            raise PDFTypeError("Integer required: %r" % (x,))
         return 0
     return x
 
@@ -149,7 +263,7 @@ def float_value(x: object) -> float:
     x = resolve1(x)
     if not isinstance(x, float):
         if settings.STRICT:
-            raise PDFTypeError("Float required: %r" % x)
+            raise PDFTypeError("Float required: %r" % (x,))
         return 0.0
     return x
 
@@ -200,12 +314,12 @@ def dict_value(x: object) -> Dict[Any, Any]:
     return x
 
 
-def stream_value(x: object) -> "PDFStream":
+def stream_value(x: object) -> "ContentStream":
     x = resolve1(x)
-    if not isinstance(x, PDFStream):
+    if not isinstance(x, ContentStream):
         if settings.STRICT:
-            raise PDFTypeError("PDFStream required: %r" % x)
-        return PDFStream({}, b"")
+            raise PDFTypeError("ContentStream required: %r" % x)
+        return ContentStream({}, b"")
     return x
 
 
@@ -230,7 +344,7 @@ def decompress_corrupted(data: bytes) -> bytes:
     return result_str
 
 
-class PDFStream:
+class ContentStream:
     def __init__(
         self,
         attrs: Dict[str, Any],
@@ -252,14 +366,14 @@ def set_objid(self, objid: int, genno: int) -> None:
     def __repr__(self) -> str:
         if self.data is None:
             assert self.rawdata is not None
-            return "<PDFStream(%r): raw=%d, %r>" % (
+            return "<ContentStream(%r): raw=%d, %r>" % (
                 self.objid,
                 len(self.rawdata),
                 self.attrs,
             )
         else:
             assert self.data is not None
-            return "<PDFStream(%r): len=%d, %r>" % (
+            return "<ContentStream(%r): len=%d, %r>" % (
                 self.objid,
                 len(self.data),
                 self.attrs,
diff --git a/playa/psparser.py b/playa/psparser.py
deleted file mode 100755
index c2700af7..00000000
--- a/playa/psparser.py
+++ /dev/null
@@ -1,1051 +0,0 @@
-#!/usr/bin/env python3
-import io
-import logging
-import mmap
-import re
-from binascii import unhexlify
-from collections import deque
-from typing import (
-    Any,
-    BinaryIO,
-    Deque,
-    Dict,
-    Generic,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-)
-
-from playa import settings
-from playa.exceptions import (
-    PSEOF,
-    PSException,
-    PSSyntaxError,
-    PSTypeError,
-)
-from playa.utils import choplist
-
-log = logging.getLogger(__name__)
-
-
-class PSLiteral:
-    """A class that represents a PostScript literal.
-
-    Postscript literals are used as identifiers, such as
-    variable names, property names and dictionary keys.
-    Literals are case sensitive and denoted by a preceding
-    slash sign (e.g. "/Name")
-
-    Note: Do not create an instance of PSLiteral directly.
-    Always use PSLiteralTable.intern().
-    """
-
-    def __init__(self, name: str) -> None:
-        self.name = name
-
-    def __repr__(self) -> str:
-        return "/%r" % self.name
-
-
-class PSKeyword:
-    """A class that represents a PostScript keyword.
-
-    PostScript keywords are a dozen of predefined words.
-    Commands and directives in PostScript are expressed by keywords.
-    They are also used to denote the content boundaries.
-
-    Note: Do not create an instance of PSKeyword directly.
-    Always use PSKeywordTable.intern().
-    """
-
-    def __init__(self, name: bytes) -> None:
-        self.name = name
-
-    def __repr__(self) -> str:
-        return "/%r" % self.name
-
-
-_SymbolT = TypeVar("_SymbolT", PSLiteral, PSKeyword)
-_NameT = TypeVar("_NameT", str, bytes)
-
-
-class PSSymbolTable(Generic[_SymbolT, _NameT]):
-    """Store globally unique name objects or language keywords."""
-
-    def __init__(self, table_type: Type[_SymbolT], name_type: Type[_NameT]) -> None:
-        self.dict: Dict[_NameT, _SymbolT] = {}
-        self.table_type: Type[_SymbolT] = table_type
-        self.name_type: Type[_NameT] = name_type
-
-    def intern(self, name: _NameT) -> _SymbolT:
-        if not isinstance(name, self.name_type):
-            raise ValueError(f"{self.table_type} can only store {self.name_type}")
-        if name in self.dict:
-            lit = self.dict[name]
-        else:
-            lit = self.table_type(name)  # type: ignore
-        self.dict[name] = lit
-        return lit
-
-
-PSLiteralTable = PSSymbolTable(PSLiteral, str)
-PSKeywordTable = PSSymbolTable(PSKeyword, bytes)
-LIT = PSLiteralTable.intern
-KWD = PSKeywordTable.intern
-KEYWORD_PROC_BEGIN = KWD(b"{")
-KEYWORD_PROC_END = KWD(b"}")
-KEYWORD_ARRAY_BEGIN = KWD(b"[")
-KEYWORD_ARRAY_END = KWD(b"]")
-KEYWORD_DICT_BEGIN = KWD(b"<<")
-KEYWORD_DICT_END = KWD(b">>")
-KEYWORD_GT = KWD(b">")
-
-
-def name_str(x: bytes) -> str:
-    """Get the string representation for a name object.
-
-    According to the PDF 1.7 spec (p.18):
-
-    > Ordinarily, the bytes making up the name are never treated as
-    > text to be presented to a human user or to an application
-    > external to a conforming reader. However, occasionally the need
-    > arises to treat a name object as text... In such situations, the
-    > sequence of bytes (after expansion of NUMBER SIGN sequences, if
-    > any) should be interpreted according to UTF-8.
-
-    Accordingly, if they *can* be decoded to UTF-8, then they *will*
-    be, and if not, we will just decode them as ISO-8859-1 since that
-    gives a unique (if possibly nonsensical) value for an 8-bit string.
-    """
-    try:
-        return x.decode("utf-8")
-    except UnicodeDecodeError:
-        return x.decode("iso-8859-1")
-
-
-def literal_name(x: Any) -> str:
-    if not isinstance(x, PSLiteral):
-        if settings.STRICT:
-            raise PSTypeError(f"Literal required: {x!r}")
-        return str(x)
-    else:
-        return x.name
-
-
-def keyword_name(x: Any) -> str:
-    if not isinstance(x, PSKeyword):
-        if settings.STRICT:
-            raise PSTypeError("Keyword required: %r" % x)
-        else:
-            return str(x)
-    else:
-        # PDF keywords are *not* UTF-8 (they aren't ISO-8859-1 either,
-        # but this isn't very important, we just want some
-        # unique representation of 8-bit characters, as above)
-        name = x.name.decode("iso-8859-1")
-    return name
-
-
-EOL = b"\r\n"
-WHITESPACE = b" \t\n\r\f\v"
-NUMBER = b"0123456789"
-HEX = NUMBER + b"abcdef" + b"ABCDEF"
-NOTLITERAL = b"#/%[]()<>{}" + WHITESPACE
-NOTKEYWORD = b"#/%[]()<>{}" + WHITESPACE
-NOTSTRING = b"()\\"
-OCTAL = b"01234567"
-ESC_STRING = {
-    b"b": 8,
-    b"t": 9,
-    b"n": 10,
-    b"f": 12,
-    b"r": 13,
-    b"(": 40,
-    b")": 41,
-    b"\\": 92,
-}
-
-
-PSBaseParserToken = Union[float, bool, PSLiteral, PSKeyword, bytes]
-
-
-class PSFileParser:
-    """
-    Parser (actually a lexer) for PDF data from a buffered file object.
-    """
-
-    def __init__(self, fp: BinaryIO) -> None:
-        self.fp = fp
-        self._tokens: Deque[Tuple[int, PSBaseParserToken]] = deque()
-        self.seek(0)
-
-    def reinit(self, fp: BinaryIO) -> None:
-        """Reinitialize parser with a new file."""
-        self.fp = fp
-        self.seek(0)
-
-    def seek(self, pos: int) -> None:
-        """Seek to a position and reinitialize parser state."""
-        self.fp.seek(pos)
-        self._parse1 = self._parse_main
-        self._curtoken = b""
-        self._curtokenpos = 0
-        self._tokens.clear()
-
-    def tell(self) -> int:
-        """Get the current position in the file."""
-        return self.fp.tell()
-
-    def read(self, objlen: int) -> bytes:
-        """Read data from a specified position, moving the current
-        position to the end of this data."""
-        return self.fp.read(objlen)
-
-    def nextline(self) -> Tuple[int, bytes]:
-        r"""Fetches a next line that ends either with \r, \n, or
-        \r\n."""
-        linepos = self.fp.tell()
-        # readline() is implemented on BinarIO so just use that
-        # (except that it only accepts \n as a separator)
-        line_or_lines = self.fp.readline()
-        if line_or_lines == b"":
-            raise PSEOF
-        first, sep, rest = line_or_lines.partition(b"\r")
-        if len(rest) == 0:
-            return (linepos, line_or_lines)
-        elif rest != b"\n":
-            self.fp.seek(linepos + len(first) + 1)
-            return (linepos, first + sep)
-        else:
-            self.fp.seek(linepos + len(first) + 2)
-            return (linepos, first + b"\r\n")
-
-    def revreadlines(self) -> Iterator[bytes]:
-        """Fetches a next line backwards.
-
-        This is used to locate the trailers at the end of a file.
-        """
-        self.fp.seek(0, io.SEEK_END)
-        pos = self.fp.tell()
-        buf = b""
-        while pos > 0:
-            # NOTE: This can obviously be optimized to use regular
-            # expressions on the (known to exist) buffer in
-            # self.fp...
-            pos -= 1
-            self.fp.seek(pos)
-            c = self.fp.read(1)
-            if c in b"\r\n":
-                yield buf
-                buf = c
-                if c == b"\n" and pos > 0:
-                    self.fp.seek(pos - 1)
-                    cc = self.fp.read(1)
-                    if cc == b"\r":
-                        pos -= 1
-                        buf = cc + buf
-            else:
-                buf = c + buf
-        yield buf
-
-    def get_inline_data(
-        self, target: bytes = b"EI", blocksize: int = 4096
-    ) -> Tuple[int, bytes]:
-        """Get the data for an inline image up to the target
-        end-of-stream marker.
-
-        Returns a tuple of the position of the target in the data and the
-        data *including* the end of stream marker.  Advances the file
-        pointer to a position after the end of the stream.
-
-        The caller is responsible for removing the end-of-stream if
-        necessary (this depends on the filter being used) and parsing
-        the end-of-stream token (likewise) if necessary.
-        """
-        # PDF 1.7, p. 216: The bytes between the ID and EI operators
-        # shall be treated the same as a stream object’s data (see
-        # 7.3.8, "Stream Objects"), even though they do not follow the
-        # standard stream syntax.
-        data = []  # list of blocks
-        partial = b""  # partially seen target
-        pos = 0
-        while True:
-            # Did we see part of the target at the end of the last
-            # block?  Then scan ahead and try to find the rest (we
-            # assume the stream is buffered)
-            if partial:
-                extra_len = len(target) - len(partial)
-                extra = self.fp.read(extra_len)
-                if partial + extra == target:
-                    pos -= len(partial)
-                    data.append(extra)
-                    break
-                # Put it back (assume buffering!)
-                self.fp.seek(-extra_len, io.SEEK_CUR)
-                partial = b""
-                # Fall through (the target could be at the beginning)
-            buf = self.fp.read(blocksize)
-            if not buf:
-                return (-1, b"")
-            tpos = buf.find(target)
-            if tpos != -1:
-                data.append(buf[: tpos + len(target)])
-                # Put the extra back (assume buffering!)
-                self.fp.seek(tpos - len(buf) + len(target), io.SEEK_CUR)
-                pos += tpos
-                break
-            else:
-                pos += len(buf)
-                # look for the longest partial match at the end
-                plen = len(target) - 1
-                while plen > 0:
-                    ppos = len(buf) - plen
-                    if buf[ppos:] == target[:plen]:
-                        partial = buf[ppos:]
-                        break
-                    plen -= 1
-                data.append(buf)
-        return (pos, b"".join(data))
-
-    def __iter__(self) -> Iterator[Tuple[int, PSBaseParserToken]]:
-        """Iterate over tokens."""
-        return self
-
-    def __next__(self) -> Tuple[int, PSBaseParserToken]:
-        """Get the next token in iteration, raising StopIteration when
-        done."""
-        while True:
-            c = self._parse1()
-            # print(c, self._curtoken, self._parse1)
-            if self._tokens or c == b"":
-                break
-        if not self._tokens:
-            raise StopIteration
-        return self._tokens.popleft()
-
-    def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
-        """Get the next token in iteration, raising PSEOF when done."""
-        try:
-            return self.__next__()
-        except StopIteration:
-            raise PSEOF
-
-    def _parse_main(self) -> bytes:
-        """Initial/default state for the lexer."""
-        c = self.fp.read(1)
-        # note that b"" (EOF) is in everything, which is fine
-        if c in WHITESPACE:
-            return c
-        self._curtokenpos = self.fp.tell() - 1
-        if c == b"%":
-            self._curtoken = b"%"
-            self._parse1 = self._parse_comment
-        elif c == b"/":
-            self._curtoken = b""
-            self._parse1 = self._parse_literal
-        elif c in b"-+" or c in NUMBER:
-            self._curtoken = c
-            self._parse1 = self._parse_number
-        elif c == b".":
-            self._curtoken = c
-            self._parse1 = self._parse_float
-        elif c.isalpha():
-            self._curtoken = c
-            self._parse1 = self._parse_keyword
-        elif c == b"(":
-            self._curtoken = b""
-            self.paren = 1
-            self._parse1 = self._parse_string
-        elif c == b"<":
-            self._curtoken = b""
-            self._parse1 = self._parse_wopen
-        elif c == b">":
-            self._curtoken = b""
-            self._parse1 = self._parse_wclose
-        elif c == b"\x00":
-            pass
-        else:
-            self._add_token(KWD(c))
-        return c
-
-    def _add_token(self, obj: PSBaseParserToken) -> None:
-        """Add a succesfully parsed token."""
-        self._tokens.append((self._curtokenpos, obj))
-
-    def _parse_comment(self) -> bytes:
-        """Comment state for the lexer"""
-        c = self.fp.read(1)
-        if c in EOL:  # this includes b"", i.e. EOF
-            self._parse1 = self._parse_main
-            # We ignore comments.
-            # self._tokens.append(self._curtoken)
-        else:
-            self._curtoken += c
-        return c
-
-    def _parse_literal(self) -> bytes:
-        """Literal (keyword) state for the lexer."""
-        c = self.fp.read(1)
-        if c == b"#":
-            self.hex = b""
-            self._parse1 = self._parse_literal_hex
-        elif c in NOTLITERAL:
-            if c:
-                self.fp.seek(-1, io.SEEK_CUR)
-            self._add_token(LIT(name_str(self._curtoken)))
-            self._parse1 = self._parse_main
-        else:
-            self._curtoken += c
-        return c
-
-    def _parse_literal_hex(self) -> bytes:
-        """State for escaped hex characters in literal names"""
-        # Consume a hex digit only if we can ... consume a hex digit
-        if len(self.hex) >= 2:  # it actually can't exceed 2
-            self._curtoken += bytes((int(self.hex, 16),))
-            self._parse1 = self._parse_literal
-            return b"/"
-        c = self.fp.read(1)
-        if c and c in HEX:
-            self.hex += c
-        else:
-            if c:  # not EOF, but not hex either
-                log.warning("Invalid hex digit %r in literal", c)
-                self.fp.seek(-1, io.SEEK_CUR)
-                # Add the intervening junk, just in case
-                tok = LIT(name_str(self._curtoken))
-                self._add_token(tok)
-                self._curtokenpos = self.tell() - 1 - len(self.hex)
-                self._add_token(KWD(b"#" + self.hex))
-            self._parse1 = self._parse_main
-        return c
-
-    def _parse_number(self) -> bytes:
-        """State for numeric objects."""
-        c = self.fp.read(1)
-        if c and c in NUMBER:
-            self._curtoken += c
-        elif c == b".":
-            self._curtoken += c
-            self._parse1 = self._parse_float
-        else:
-            if c:
-                self.fp.seek(-1, io.SEEK_CUR)
-            try:
-                self._add_token(int(self._curtoken))
-            except ValueError:
-                log.warning("Invalid int literal: %r", self._curtoken)
-            self._parse1 = self._parse_main
-        return c
-
-    def _parse_float(self) -> bytes:
-        """State for fractional part of numeric objects."""
-        c = self.fp.read(1)
-        # b"" is in everything so we have to add an extra check
-        if not c or c not in NUMBER:
-            if c:
-                self.fp.seek(-1, io.SEEK_CUR)
-            try:
-                self._add_token(float(self._curtoken))
-            except ValueError:
-                log.warning("Invalid float literal: %r", self._curtoken)
-            self._parse1 = self._parse_main
-        else:
-            self._curtoken += c
-        return c
-
-    def _parse_keyword(self) -> bytes:
-        """State for keywords."""
-        c = self.fp.read(1)
-        if c in NOTKEYWORD:  # includes EOF
-            if c:
-                self.fp.seek(-1, io.SEEK_CUR)
-            if self._curtoken == b"true":
-                self._add_token(True)
-            elif self._curtoken == b"false":
-                self._add_token(False)
-            else:
-                self._add_token(KWD(self._curtoken))
-            self._parse1 = self._parse_main
-        else:
-            self._curtoken += c
-        return c
-
-    def _parse_string(self) -> bytes:
-        """State for string objects."""
-        c = self.fp.read(1)
-        if c and c in NOTSTRING:  # does not include EOF
-            if c == b"\\":
-                self._parse1 = self._parse_string_esc
-                return c
-            elif c == b"(":
-                self.paren += 1
-                self._curtoken += c
-                return c
-            elif c == b")":
-                self.paren -= 1
-                if self.paren:
-                    self._curtoken += c
-                    return c
-            # We saw the last parenthesis and fell through (it will be
-            # consumed, but not added to self._curtoken)
-            self._add_token(self._curtoken)
-            self._parse1 = self._parse_main
-        elif c == b"\r":
-            # PDF 1.7 page 15: An end-of-line marker appearing within
-            # a literal string without a preceding REVERSE SOLIDUS
-            # shall be treated as a byte value of (0Ah), irrespective
-            # of whether the end-of-line marker was a CARRIAGE RETURN
-            # (0Dh), a LINE FEED (0Ah), or both.
-            cc = self.fp.read(1)
-            # Put it back if it isn't \n
-            if cc and cc != b"\n":
-                self.fp.seek(-1, io.SEEK_CUR)
-            self._curtoken += b"\n"
-        else:
-            self._curtoken += c
-        return c
-
-    def _parse_string_esc(self) -> bytes:
-        """State for escapes in literal strings.  We have seen a
-        backslash and nothing else."""
-        c = self.fp.read(1)
-        if c and c in OCTAL:  # exclude EOF
-            self.oct = c
-            self._parse1 = self._parse_string_octal
-            return c
-        elif c and c in ESC_STRING:
-            self._curtoken += bytes((ESC_STRING[c],))
-        elif c == b"\n":  # Skip newline after backslash
-            pass
-        elif c == b"\r":  # Also skip CRLF after
-            cc = self.fp.read(1)
-            # Put it back if it isn't \n
-            if cc and cc != b"\n":
-                self.fp.seek(-1, io.SEEK_CUR)
-        elif c == b"":
-            log.warning("EOF inside escape %r", self._curtoken)
-        else:
-            log.warning("Unrecognized escape %r", c)
-            self._curtoken += c
-        self._parse1 = self._parse_string
-        return c
-
-    def _parse_string_octal(self) -> bytes:
-        """State for an octal escape."""
-        c = self.fp.read(1)
-        if c and c in OCTAL:  # exclude EOF
-            self.oct += c
-            done = len(self.oct) >= 3  # it can't be > though
-        else:
-            if c:
-                self.fp.seek(-1, io.SEEK_CUR)
-            else:
-                log.warning("EOF in octal escape %r", self._curtoken)
-            done = True
-        if done:
-            chrcode = int(self.oct, 8)
-            if chrcode >= 256:
-                # PDF1.7 p.16: "high-order overflow shall be ignored."
-                log.warning("Invalid octal %r (%d)", self.oct, chrcode)
-            else:
-                self._curtoken += bytes((chrcode,))
-            # Back to normal string parsing
-            self._parse1 = self._parse_string
-        return c
-
-    def _parse_wopen(self) -> bytes:
-        """State for start of dictionary or hex string."""
-        c = self.fp.read(1)
-        if c == b"<":
-            self._add_token(KEYWORD_DICT_BEGIN)
-            self._parse1 = self._parse_main
-        else:
-            if c:
-                self.fp.seek(-1, io.SEEK_CUR)
-            self._parse1 = self._parse_hexstring
-        return c
-
-    def _parse_wclose(self) -> bytes:
-        """State for end of dictionary (accessed from initial state only)"""
-        c = self.fp.read(1)
-        if c == b">":
-            self._add_token(KEYWORD_DICT_END)
-        else:
-            # Assuming this is a keyword (which means nothing)
-            self._add_token(KEYWORD_GT)
-            if c:
-                self.fp.seek(-1, io.SEEK_CUR)
-        self._parse1 = self._parse_main
-        return c
-
-    def _parse_hexstring(self) -> bytes:
-        """State for parsing hexadecimal literal strings."""
-        c = self.fp.read(1)
-        if not c:
-            log.warning("EOF in hex string %r", self._curtoken)
-        elif c in WHITESPACE:
-            pass
-        elif c in HEX:
-            self._curtoken += c
-        elif c == b">":
-            if len(self._curtoken) % 2 == 1:
-                self._curtoken += b"0"
-            token = unhexlify(self._curtoken)
-            self._add_token(token)
-            self._parse1 = self._parse_main
-        else:
-            log.warning("unexpected character %r in hex string %r", c, self._curtoken)
-        return c
-
-
-LEXER = re.compile(
-    rb"""(?:
-      (?P<whitespace> \s+)
-    | (?P<comment> %[^\r\n]*[\r\n])
-    | (?P<name> /(?: \#[A-Fa-f\d][A-Fa-f\d] | [^#/%\[\]()<>{}\s])+ )
-    | (?P<number> [-+]? (?: \d*\.\d+ | \d+ ) )
-    | (?P<keyword> [A-Za-z] [^#/%\[\]()<>{}\s]*)
-    | (?P<startstr> \([^()\\]*)
-    | (?P<hexstr> <[A-Fa-f\d\s]*>)
-    | (?P<startdict> <<)
-    | (?P<enddict> >>)
-    | (?P<other> .)
-)
-""",
-    re.VERBOSE,
-)
-STRLEXER = re.compile(
-    rb"""(?:
-      (?P<octal> \\[0-7]{1,3})
-    | (?P<linebreak> \\(?:\r\n?|\n))
-    | (?P<escape> \\.)
-    | (?P<parenleft> \()
-    | (?P<parenright> \))
-    | (?P<newline> \r\n?|\n)
-    | (?P<other> .)
-)""",
-    re.VERBOSE,
-)
-HEXDIGIT = re.compile(rb"#([A-Fa-f\d][A-Fa-f\d])")
-EOLR = re.compile(rb"\r\n?|\n")
-SPC = re.compile(rb"\s")
-
-
-class PSInMemoryParser:
-    """
-    Parser for in-memory data streams.
-    """
-
-    def __init__(self, data: Union[bytes, mmap.mmap]) -> None:
-        self.data = data
-        self.pos = 0
-        self.end = len(data)
-        self._tokens: Deque[Tuple[int, PSBaseParserToken]] = deque()
-
-    def reinit(self, data: bytes) -> None:
-        """Reinitialize parser with a new buffer."""
-        self.data = data
-        self.seek(0)
-
-    def seek(self, pos: int) -> None:
-        """Seek to a position and reinitialize parser state."""
-        self.pos = pos
-        self._curtoken = b""
-        self._curtokenpos = 0
-        self._tokens.clear()
-
-    def tell(self) -> int:
-        """Get the current position in the buffer."""
-        return self.pos
-
-    def read(self, objlen: int) -> bytes:
-        """Read data from current position, advancing to the end of
-        this data."""
-        pos = self.pos
-        self.pos = min(pos + objlen, len(self.data))
-        return self.data[pos : self.pos]
-
-    def nextline(self) -> Tuple[int, bytes]:
-        r"""Fetches a next line that ends either with \r, \n, or \r\n."""
-        if self.pos == self.end:
-            raise PSEOF
-        linepos = self.pos
-        m = EOLR.search(self.data, self.pos)
-        if m is None:
-            self.pos = self.end
-        else:
-            self.pos = m.end()
-        return (linepos, self.data[linepos : self.pos])
-
-    def revreadlines(self) -> Iterator[bytes]:
-        """Fetches a next line backwards.
-
-        This is used to locate the trailers at the end of a file.  So,
-        it isn't actually used in PSInMemoryParser, but is here for
-        completeness.
-        """
-        endline = pos = self.end
-        while True:
-            nidx = self.data.rfind(b"\n", 0, pos)
-            ridx = self.data.rfind(b"\r", 0, pos)
-            best = max(nidx, ridx)
-            if best == -1:
-                yield self.data[:endline]
-                break
-            yield self.data[best + 1 : endline]
-            endline = best + 1
-            pos = best
-            if pos > 0 and self.data[pos - 1 : pos + 1] == b"\r\n":
-                pos -= 1
-
-    def get_inline_data(
-        self, target: bytes = b"EI", blocksize: int = -1
-    ) -> Tuple[int, bytes]:
-        """Get the data for an inline image up to the target
-        end-of-stream marker.
-
-        Returns a tuple of the position of the target in the data and the
-        data *including* the end of stream marker.  Advances the file
-        pointer to a position after the end of the stream.
-
-        The caller is responsible for removing the end-of-stream if
-        necessary (this depends on the filter being used) and parsing
-        the end-of-stream token (likewise) if necessary.
-        """
-        tpos = self.data.find(target, self.pos)
-        if tpos != -1:
-            nextpos = tpos + len(target)
-            result = (tpos, self.data[self.pos : nextpos])
-            self.pos = nextpos
-            return result
-        return (-1, b"")
-
-    def __iter__(self) -> Iterator[Tuple[int, PSBaseParserToken]]:
-        """Iterate over tokens."""
-        return self
-
-    def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
-        """Get the next token in iteration, raising PSEOF when done."""
-        try:
-            return self.__next__()
-        except StopIteration:
-            raise PSEOF
-
-    def __next__(self) -> Tuple[int, PSBaseParserToken]:
-        """Get the next token in iteration, raising StopIteration when
-        done."""
-        while True:
-            m = LEXER.match(self.data, self.pos)
-            if m is None:  # can only happen at EOS
-                raise StopIteration
-            self._curtokenpos = m.start()
-            self.pos = m.end()
-            if m.lastgroup not in ("whitespace", "comment"):  # type: ignore
-                # Okay, we got a token or something
-                break
-        self._curtoken = m[0]
-        if m.lastgroup == "name":  # type: ignore
-            self._curtoken = m[0][1:]
-            self._curtoken = HEXDIGIT.sub(
-                lambda x: bytes((int(x[1], 16),)), self._curtoken
-            )
-            tok = LIT(name_str(self._curtoken))
-            return (self._curtokenpos, tok)
-        if m.lastgroup == "number":  # type: ignore
-            if b"." in self._curtoken:
-                return (self._curtokenpos, float(self._curtoken))
-            else:
-                return (self._curtokenpos, int(self._curtoken))
-        if m.lastgroup == "startdict":  # type: ignore
-            return (self._curtokenpos, KEYWORD_DICT_BEGIN)
-        if m.lastgroup == "enddict":  # type: ignore
-            return (self._curtokenpos, KEYWORD_DICT_END)
-        if m.lastgroup == "startstr":  # type: ignore
-            return self._parse_endstr(self.data[m.start() + 1 : m.end()], m.end())
-        if m.lastgroup == "hexstr":  # type: ignore
-            self._curtoken = SPC.sub(b"", self._curtoken[1:-1])
-            if len(self._curtoken) % 2 == 1:
-                self._curtoken += b"0"
-            return (self._curtokenpos, unhexlify(self._curtoken))
-        # Anything else is treated as a keyword (whether explicitly matched or not)
-        if self._curtoken == b"true":
-            return (self._curtokenpos, True)
-        elif self._curtoken == b"false":
-            return (self._curtokenpos, False)
-        else:
-            return (self._curtokenpos, KWD(self._curtoken))
-
-    def _parse_endstr(self, start: bytes, pos: int) -> Tuple[int, PSBaseParserToken]:
-        """Parse the remainder of a string."""
-        # Handle nonsense CRLF conversion in strings (PDF 1.7, p.15)
-        parts = [EOLR.sub(b"\n", start)]
-        paren = 1
-        for m in STRLEXER.finditer(self.data, pos):
-            self.pos = m.end()
-            if m.lastgroup == "parenright":  # type: ignore
-                paren -= 1
-                if paren == 0:
-                    # By far the most common situation!
-                    break
-                parts.append(m[0])
-            elif m.lastgroup == "parenleft":  # type: ignore
-                parts.append(m[0])
-                paren += 1
-            elif m.lastgroup == "escape":  # type: ignore
-                chr = m[0][1:2]
-                if chr not in ESC_STRING:
-                    log.warning("Unrecognized escape %r", m[0])
-                    parts.append(chr)
-                else:
-                    parts.append(bytes((ESC_STRING[chr],)))
-            elif m.lastgroup == "octal":  # type: ignore
-                chrcode = int(m[0][1:], 8)
-                if chrcode >= 256:
-                    # PDF1.7 p.16: "high-order overflow shall be
-                    # ignored."
-                    log.warning("Invalid octal %r (%d)", m[0][1:], chrcode)
-                else:
-                    parts.append(bytes((chrcode,)))
-            elif m.lastgroup == "newline":  # type: ignore
-                # Handle nonsense CRLF conversion in strings (PDF 1.7, p.15)
-                parts.append(b"\n")
-            elif m.lastgroup == "linebreak":  # type: ignore
-                pass
-            else:
-                parts.append(m[0])
-        if paren != 0:
-            log.warning("Unterminated string at %d", pos)
-            raise StopIteration
-        return (self._curtokenpos, b"".join(parts))
-
-
-# Stack slots may by occupied by any of:
-#  * the name of a literal
-#  * the PSBaseParserToken types
-#  * list (via KEYWORD_ARRAY)
-#  * dict (via KEYWORD_DICT)
-#  * subclass-specific extensions (e.g. PDFStream, PDFObjRef) via ExtraT
-ExtraT = TypeVar("ExtraT")
-PSStackType = Union[str, float, bool, PSLiteral, bytes, List, Dict, ExtraT]
-PSStackEntry = Tuple[int, PSStackType[ExtraT]]
-
-
-class PSStackParser(Generic[ExtraT]):
-    """Basic parser for PDF objects, can take a file or a `bytes` as
-    input."""
-
-    _mmap: Optional[mmap.mmap] = None
-
-    def __init__(self, reader: Union[BinaryIO, bytes]) -> None:
-        self.reinit(reader)
-
-    def __del__(self):
-        if self._mmap is not None:
-            self._mmap.close()
-
-    def reinit(self, reader: Union[BinaryIO, bytes]) -> None:
-        """Reinitialize parser with a new file or buffer."""
-        if isinstance(reader, bytes):
-            self._parser: Union[PSInMemoryParser, PSFileParser] = PSInMemoryParser(
-                reader
-            )
-        else:
-            try:
-                if self._mmap is not None:
-                    self._mmap.close()
-                    self._mmap = None
-                self._mmap = mmap.mmap(reader.fileno(), 0, access=mmap.ACCESS_READ)
-                self._parser = PSInMemoryParser(self._mmap)
-            except io.UnsupportedOperation:
-                log.warning(
-                    "mmap not supported on %r, falling back to file parser", reader
-                )
-                self._parser = PSFileParser(reader)
-        self.reset()
-
-    def reset(self) -> None:
-        """Reset parser state."""
-        self.context: List[Tuple[int, Optional[str], List[PSStackEntry[ExtraT]]]] = []
-        self.curtype: Optional[str] = None
-        self.curstack: List[PSStackEntry[ExtraT]] = []
-        self.results: List[PSStackEntry[ExtraT]] = []
-
-    def seek(self, pos: int) -> None:
-        """Seek to a position and reset parser state."""
-        self._parser.seek(pos)
-        self.reset()
-
-    def tell(self) -> int:
-        """Get the current position in the file."""
-        return self._parser.tell()
-
-    def push(self, *objs: PSStackEntry[ExtraT]) -> None:
-        """Push some objects onto the stack."""
-        self.curstack.extend(objs)
-
-    def pop(self, n: int) -> List[PSStackEntry[ExtraT]]:
-        """Pop some objects off the stack."""
-        objs = self.curstack[-n:]
-        self.curstack[-n:] = []
-        return objs
-
-    def popall(self) -> List[PSStackEntry[ExtraT]]:
-        """Pop all the things off the stack."""
-        objs = self.curstack
-        self.curstack = []
-        return objs
-
-    def add_results(self, *objs: PSStackEntry[ExtraT]) -> None:
-        """Move some objects to the output."""
-        try:
-            log.debug("add_results: %r", objs)
-        except Exception:
-            log.debug("add_results: (unprintable object)")
-        self.results.extend(objs)
-
-    def start_type(self, pos: int, type: str) -> None:
-        """Start a composite object (array, dict, etc)."""
-        self.context.append((pos, self.curtype, self.curstack))
-        (self.curtype, self.curstack) = (type, [])
-        log.debug("start_type: pos=%r, type=%r", pos, type)
-
-    def end_type(self, type: str) -> Tuple[int, List[PSStackType[ExtraT]]]:
-        """End a composite object (array, dict, etc)."""
-        if self.curtype != type:
-            raise PSTypeError(f"Type mismatch: {self.curtype!r} != {type!r}")
-        objs = [obj for (_, obj) in self.curstack]
-        (pos, self.curtype, self.curstack) = self.context.pop()
-        log.debug("end_type: pos=%r, type=%r, objs=%r", pos, type, objs)
-        return (pos, objs)
-
-    def do_keyword(self, pos: int, token: PSKeyword) -> None:
-        """Handle a PDF keyword."""
-        pass
-
-    def flush(self) -> None:
-        """Get everything off the stack and into the output?"""
-        pass
-
-    def nextobject(self) -> PSStackEntry[ExtraT]:
-        """Yields a list of objects.
-
-        Arrays and dictionaries are represented as Python lists and
-        dictionaries.
-
-        :return: keywords, literals, strings, numbers, arrays and dictionaries.
-        """
-        while not self.results:
-            (pos, token) = self.nexttoken()
-            if isinstance(token, (int, float, bool, str, bytes, PSLiteral)):
-                # normal token
-                self.push((pos, token))
-            elif token == KEYWORD_ARRAY_BEGIN:
-                # begin array
-                self.start_type(pos, "a")
-            elif token == KEYWORD_ARRAY_END:
-                # end array
-                try:
-                    self.push(self.end_type("a"))
-                except PSTypeError:
-                    if settings.STRICT:
-                        raise
-            elif token == KEYWORD_DICT_BEGIN:
-                # begin dictionary
-                self.start_type(pos, "d")
-            elif token == KEYWORD_DICT_END:
-                # end dictionary
-                try:
-                    (pos, objs) = self.end_type("d")
-                    if len(objs) % 2 != 0:
-                        error_msg = "Invalid dictionary construct: %r" % objs
-                        raise PSSyntaxError(error_msg)
-                    d = {
-                        literal_name(k): v
-                        for (k, v) in choplist(2, objs)
-                        if v is not None
-                    }
-                    self.push((pos, d))
-                except PSTypeError:
-                    if settings.STRICT:
-                        raise
-            elif token == KEYWORD_PROC_BEGIN:
-                # begin proc
-                self.start_type(pos, "p")
-            elif token == KEYWORD_PROC_END:
-                # end proc
-                try:
-                    self.push(self.end_type("p"))
-                except PSTypeError:
-                    if settings.STRICT:
-                        raise
-            elif isinstance(token, PSKeyword):
-                log.debug(
-                    "do_keyword: pos=%r, token=%r, stack=%r",
-                    pos,
-                    token,
-                    self.curstack,
-                )
-                self.do_keyword(pos, token)
-            else:
-                log.error(
-                    "unknown token: pos=%r, token=%r, stack=%r",
-                    pos,
-                    token,
-                    self.curstack,
-                )
-                self.do_keyword(pos, token)
-                raise PSException
-            if self.context:
-                continue
-            else:
-                self.flush()  # Does nothing here, but in subclasses... (ugh)
-        obj = self.results.pop(0)
-        try:
-            log.debug("nextobject: %r", obj)
-        except Exception:
-            log.debug("nextobject: (unprintable object)")
-        return obj
-
-    # Delegation follows
-    def nextline(self) -> Tuple[int, bytes]:
-        r"""Fetches a next line that ends either with \r, \n, or
-        \r\n."""
-        return self._parser.nextline()
-
-    def revreadlines(self) -> Iterator[bytes]:
-        """Fetches a next line backwards.
-
-        This is used to locate the trailers at the end of a file.
-        """
-        return self._parser.revreadlines()
-
-    def read(self, objlen: int) -> bytes:
-        """Read data from a specified position, moving the current
-        position to the end of this data."""
-        return self._parser.read(objlen)
-
-    def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
-        """Get the next token in iteration, raising PSEOF when done."""
-        try:
-            return self.__next__()
-        except StopIteration:
-            raise PSEOF
-
-    def get_inline_data(self, target: bytes = b"EI") -> Tuple[int, bytes]:
-        """Get the data for an inline image up to the target
-        end-of-stream marker."""
-        return self._parser.get_inline_data(target)
-
-    def __iter__(self) -> Iterator[Tuple[int, PSBaseParserToken]]:
-        """Iterate over tokens."""
-        return self
-
-    def __next__(self) -> Tuple[int, PSBaseParserToken]:
-        """Get the next token in iteration, raising StopIteration when
-        done."""
-        return self._parser.__next__()
diff --git a/playa/pdfstructtree.py b/playa/structtree.py
similarity index 94%
rename from playa/pdfstructtree.py
rename to playa/structtree.py
index 5f1c69e8..39f07501 100644
--- a/playa/pdfstructtree.py
+++ b/playa/structtree.py
@@ -17,16 +17,15 @@
 
 from playa.data_structures import NumberTree
 from playa.exceptions import PDFNoStructTree
-from playa.pdfpage import PDFPage
-from playa.pdfparser import KEYWORD_NULL
-from playa.pdftypes import PDFObjRef, resolve1
-from playa.psparser import PSLiteral
+from playa.page import Page
+from playa.parser import KEYWORD_NULL, PSLiteral
+from playa.pdftypes import ObjRef, resolve1
 from playa.utils import decode_text
 
 logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
-    from playa.pdfdocument import PDFDocument
+    from playa.document import PDFDocument
 
 
 MatchFunc = Callable[["PDFStructElement"], bool]
@@ -103,7 +102,7 @@ class PDFStructElement(Findable):
     alt_text: Union[str, None]
     actual_text: Union[str, None]
     title: Union[str, None]
-    page_number: Union[int, None]
+    page_idx: Union[int, None]
     attributes: Dict[str, Any] = field(default_factory=dict)
     mcids: List[int] = field(default_factory=list)
     children: List["PDFStructElement"] = field(default_factory=list)
@@ -117,12 +116,12 @@ def all_mcids(self) -> Iterator[Tuple[Union[int, None], int]]:
         """
         # Collect them depth-first to preserve ordering
         for mcid in self.mcids:
-            yield self.page_number, mcid
+            yield self.page_idx, mcid
         d = deque(self.children)
         while d:
             el = d.popleft()
             for mcid in el.mcids:
-                yield el.page_number, mcid
+                yield el.page_idx, mcid
             d.extendleft(reversed(el.children))
 
     def to_dict(self) -> Dict[str, Any]:
@@ -154,17 +153,17 @@ class PDFStructTree(Findable):
 
     Args:
       doc: Document from which to extract structure tree
-      pages: List of (number, page) pairs - numbers will be used to
-             identify pages in the tree through the `page_number`
+      pages: List of (index, page) pairs - indices will be used to
+             identify pages in the tree through the `page_idx`
              attribute of `PDFStructElement`.
     """
 
-    page: Union[PDFPage, None]
+    page: Union[Page, None]
 
     def __init__(
         self,
         doc: "PDFDocument",
-        pages: Union[Iterable[PDFPage], None] = None,
+        pages: Union[Iterable[Page], None] = None,
     ):
         if "StructTreeRoot" not in doc.catalog:
             raise PDFNoStructTree("Catalog has no 'StructTreeRoot' entry")
@@ -175,11 +174,11 @@ def __init__(
         self.page_dict: Dict[Any, Union[int, None]]
 
         if pages is None:
-            self.page_dict = {page.pageid: page.page_number for page in doc.pages}
+            self.page_dict = {page.pageid: page.page_idx for page in doc.pages}
             self._parse_struct_tree()
         else:
             pagelist = list(pages)
-            self.page_dict = {page.pageid: page.page_number for page in pagelist}
+            self.page_dict = {page.pageid: page.page_idx for page in pagelist}
             parent_tree_obj = self.root.get("ParentTree")
             # If we have a single page then we will work backwards from
             # its ParentTree - this is because structure elements could
@@ -258,12 +257,12 @@ def _make_element(
         # We hopefully caught these earlier
         assert "MCID" not in obj, "Uncaught MCR: %s" % obj
         assert "Obj" not in obj, "Uncaught OBJR: %s" % obj
-        # Get page number if necessary
-        page_number = None
+        # Get page index if necessary
+        page_idx = None
         if self.page_dict is not None and "Pg" in obj:
             page_objid = obj["Pg"].objid
             assert page_objid in self.page_dict, "Object on unparsed page: %s" % obj
-            page_number = self.page_dict[page_objid]
+            page_idx = self.page_dict[page_objid]
         obj_tag = ""
         if "S" in obj:
             obj_tag = decode_text(obj["S"].name)
@@ -286,7 +285,7 @@ def _make_element(
         element = PDFStructElement(
             type=obj_tag,
             id=element_id,
-            page_number=page_number,
+            page_idx=page_idx,
             revision=revision,
             lang=lang,
             title=title,
@@ -368,7 +367,7 @@ def _parse_struct_tree(self) -> None:
                         child = obj["Obj"]
                     elif "MCID" in obj:
                         continue
-                if isinstance(child, PDFObjRef):
+                if isinstance(child, ObjRef):
                     d.append(child)
 
         # Traverse depth-first, removing empty elements (unsure how to
@@ -438,7 +437,7 @@ def _resolve_children(self, seen: Dict[str, Any]) -> None:
                     elif "Obj" in obj:
                         child = obj["Obj"]
                 # NOTE: if, not elif, in case of OBJR above
-                if isinstance(child, PDFObjRef):
+                if isinstance(child, ObjRef):
                     child_element, _ = seen.get(repr(child), (None, None))
                     if child_element is not None:
                         element.children.append(child_element)
diff --git a/pyproject.toml b/pyproject.toml
index 5a14530d..ac290cc2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,3 +67,14 @@ ban-relative-imports = "all"
 [tool.pytest.ini_options]
 testpaths = [ "tests" ]
 
+[tool.hatch.envs.hatch-test]
+extra-dependencies = [ "pdfminer.six" ]
+
+[tool.hatch.envs.bench]
+dependencies = [ "pdfminer.six" ]
+
+[tool.hatch.envs.bench.scripts]
+all = [
+    "python tests/benchmark_parser.py",
+    "python tests/benchmark_convert.py",
+]
diff --git a/tests/benchmark_convert.py b/tests/benchmark_convert.py
index e1ced060..01519e7e 100644
--- a/tests/benchmark_convert.py
+++ b/tests/benchmark_convert.py
@@ -30,10 +30,10 @@ def benchmark_one_pdf(path: Path):
 
     passwords = PASSWORDS.get(path.name, [""])
     for password in passwords:
-        LOG.debug("Reading %s", path)
+        LOG.info("Reading %s", path)
         with playa.open(path, password=password) as pdf:
             for page in pdf.pages:
-                _ = page.layout
+                _ = list(page.layout)
 
 
 def benchmark_one_pdfminer(path: Path):
@@ -57,17 +57,21 @@ def benchmark_one_pdfminer(path: Path):
 
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
+    # Silence warnings about broken PDFs
+    logging.basicConfig(level=logging.ERROR)
     niter = 10
-    if len(sys.argv) == 1 or "pdfminer" in sys.argv[1:]:
-        start = time.time()
-        for _ in range(niter):
-            for path in ALLPDFS:
-                benchmark_one_pdfminer(path)
-        LOG.info("pdfminer.six took %f", time.time() - start)
-    if len(sys.argv) == 1 or "playa" in sys.argv[1:]:
-        start = time.time()
-        for _ in range(niter):
-            for path in ALLPDFS:
+    miner_time = beach_time = 0.0
+    for iter in range(niter + 1):
+        for path in ALLPDFS:
+            if len(sys.argv) == 1 or "playa" in sys.argv[1:]:
+                start = time.time()
                 benchmark_one_pdf(path)
-        LOG.info("PLAYA took %f", time.time() - start)
+                if iter != 0:
+                    beach_time += time.time() - start
+            if len(sys.argv) == 1 or "pdfminer" in sys.argv[1:]:
+                start = time.time()
+                benchmark_one_pdfminer(path)
+                if iter != 0:
+                    miner_time += time.time() - start
+    print("pdfminer.six took %.2fs / iter" % (miner_time / niter,))
+    print("PLAYA took %.2fs / iter" % (beach_time / niter,))
diff --git a/tests/benchmark_parser.py b/tests/benchmark_parser.py
index df999449..f67385d3 100644
--- a/tests/benchmark_parser.py
+++ b/tests/benchmark_parser.py
@@ -272,21 +272,21 @@
 
 
 def bench_bytes():
-    from playa.psparser import PSInMemoryParser
+    from playa.parser import Lexer
 
     runs = 100
     start = time.time()
-    parser = PSInMemoryParser(DATA * runs)
+    parser = Lexer(DATA * runs)
     _ = list(parser)
     print(
-        "PLAYA Parser (bytes): %fms / run" % ((time.time() - start) / runs * 1000),
+        "PLAYA Lexer (bytes): %fms / run" % ((time.time() - start) / runs * 1000),
     )
 
 
 def bench_mmap():
     import mmap
 
-    from playa.psparser import PSInMemoryParser
+    from playa.parser import Lexer
 
     with tempfile.NamedTemporaryFile() as tf:
         runs = 100
@@ -295,55 +295,17 @@ def bench_mmap():
         with open(tf.name, "rb") as infh:
             start = time.time()
             mapping = mmap.mmap(infh.fileno(), 0, access=mmap.ACCESS_READ)
-            parser = PSInMemoryParser(mapping)
+            parser = Lexer(mapping)
             _ = list(parser)
             print(
-                "PLAYA Parser (mmap): %fms / run"
+                "PLAYA Lexer (mmap): %fms / run"
                 % ((time.time() - start) / runs * 1000),
             )
 
 
-def bench_bytesio():
-    from pdfminer.psparser import PSEOF, PSBaseParser
-
-    runs = 100
-    start = time.time()
-    parser = PSBaseParser(BytesIO(DATA * runs))
-    while True:
-        try:
-            _ = parser.nexttoken()
-        except PSEOF:
-            break
-    print(
-        "pdfminer.six Parser (BytesIO): %fms / run"
-        % ((time.time() - start) / runs * 1000),
-    )
-
-
 def bench_playa():
-    from playa.pdfdocument import PDFDocument
-    from playa.pdfpage import PDFPage
-    from playa.psparser import PSFileParser
+    from playa.document import PDFDocument
 
-    runs = 100
-    start = time.time()
-    parser = PSFileParser(BytesIO(DATA * runs))
-    _ = list(parser)
-    print(
-        "PLAYA Parser (BytesIO): %fms / run" % ((time.time() - start) / runs * 1000),
-    )
-    with tempfile.NamedTemporaryFile() as tf:
-        runs = 100
-        with open(tf.name, "wb") as outfh:
-            outfh.write(DATA * runs)
-        with open(tf.name, "rb") as infh:
-            start = time.time()
-            parser = PSFileParser(infh)
-            _ = list(parser)
-            print(
-                "PLAYA Parser (BinaryIO): %fms / run"
-                % ((time.time() - start) / runs * 1000),
-            )
     bench_bytes()
     bench_mmap()
 
@@ -352,7 +314,7 @@ def bench_playa():
     for _ in range(runs):
         with open(TESTDIR / "contrib" / "pagelabels.pdf", "rb") as infh:
             doc = PDFDocument(infh)
-            page = next(PDFPage.create_pages(doc))
+            page = doc.pages[0]
             _ = page.layout
     print(
         "PLAYA Interpreter: %dms / run" % ((time.time() - start) / runs * 1000),
@@ -376,7 +338,7 @@ def bench_pdfminer():
         except PSEOF:
             break
     print(
-        "pdfminer.six Parser (BytesIO): %fms / run"
+        "pdfminer.six Lexer (BytesIO): %fms / run"
         % ((time.time() - start) / runs * 1000),
     )
     with tempfile.NamedTemporaryFile() as tf:
@@ -391,7 +353,7 @@ def bench_pdfminer():
                 except PSEOF:
                     break
             print(
-                "pdfminer.six Parser (BinaryIO): %fms / run"
+                "pdfminer.six Lexer (BinaryIO): %fms / run"
                 % ((time.time() - start) / runs * 1000),
             )
     runs = 20
@@ -418,7 +380,5 @@ def bench_pdfminer():
         bench_playa()
     if len(sys.argv) > 1 and sys.argv[1] == "bytes":
         bench_bytes()
-    if len(sys.argv) > 1 and sys.argv[1] == "bytesio":
-        bench_bytesio()
     if len(sys.argv) > 1 and sys.argv[1] == "mmap":
         bench_mmap()
diff --git a/tests/test_open.py b/tests/test_open.py
index fe67b616..79699f8a 100644
--- a/tests/test_open.py
+++ b/tests/test_open.py
@@ -24,36 +24,58 @@
 
 @pytest.mark.parametrize("path", ALLPDFS, ids=str)
 def test_open(path: Path) -> None:
-    """Open all the documents"""
+    """Open all the documents and compare with pdfplumber"""
+    from pdfminer.converter import PDFPageAggregator
+    from pdfminer.pdfdocument import PDFDocument
+    from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager
+    from pdfminer.pdfpage import PDFPage
+    from pdfminer.pdfparser import PDFParser
+
     passwords = PASSWORDS.get(path.name, [""])
     for password in passwords:
-        with playa.open(TESTDIR / path, password=password) as _pdf:
-            pass
-
-
-def test_analyze() -> None:
-    """Test the layout analyzer (FIXME: PLAYA Ain't a Layout Analyzer)"""
-    with playa.open(
-        TESTDIR / "2023-04-06-ODJ et Résolutions-séance xtra 6 avril 2023.pdf"
-    ) as pdf:
-        for page in pdf.pages:
-            page_objs = list(page.layout)
-            print(len(page_objs))
+        miner = []
+        with open(path, "rb") as infh:
+            try:
+                rsrc = PDFResourceManager()
+                agg = PDFPageAggregator(rsrc, pageno=1)
+                interp = PDFPageInterpreter(rsrc, agg)
+                pdf = PDFDocument(PDFParser(infh), password=password)
+                for page in PDFPage.create_pages(pdf):
+                    interp.process_page(page)
+                    layout = agg.result
+                    for item in layout:
+                        miner.append((type(item).__name__, item.bbox))
+            except Exception:
+                continue
+
+        itor = iter(miner)
+        with playa.open(path, password=password) as pdf:
+            for page in pdf.pages:
+                for item in page.layout:
+                    thingy = (type(item).__name__, item.bbox)
+                    assert thingy == next(itor)
 
 
 def test_inline_data() -> None:
-    # No, there's no easy way to unit test PDFContentParser directly.
-    # The necessary mocking would be useless considering that I will
-    # shortly demolish these redundant and confusing APIs.
     with playa.open(TESTDIR / "contrib" / "issue-1008-inline-ascii85.pdf") as doc:
-        _ = doc.pages[0].layout
+        page = doc.pages[0]
+        items = list(page.layout)
+        assert len(items) == 456
 
 
 def test_multiple_contents() -> None:
     with playa.open(TESTDIR / "jo.pdf") as doc:
         page = doc.pages[0]
         assert len(page.contents) > 1
-        _ = page.layout
+        items = list(page.layout)
+        assert len(items) == 898
+
+
+def test_xobjects() -> None:
+    with playa.open(TESTDIR / "encryption/aes-256.pdf", password="foo") as doc:
+        for page in doc.pages:
+            for item in page.layout:
+                print(item)
 
 
 def test_weakrefs() -> None:
@@ -67,4 +89,6 @@ def test_weakrefs() -> None:
 
 
 if __name__ == "__main__":
-    test_open(TESTDIR / "simple5.pdf")
+    import logging
+    logging.basicConfig(level=logging.DEBUG)
+    test_xobjects()
diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py
index 969643dc..af020f94 100644
--- a/tests/test_pdfdocument.py
+++ b/tests/test_pdfdocument.py
@@ -10,12 +10,10 @@
 import playa
 import playa.settings
 from playa.data_structures import NameTree
+from playa.document import read_header
 from playa.exceptions import PDFSyntaxError
-from playa.pdfdocument import read_header
 from playa.utils import decode_text
 
-playa.settings.STRICT = True
-
 TESTDIR = Path(__file__).parent.parent / "samples"
 
 
@@ -34,6 +32,12 @@ def test_read_header():
     assert read_header(BytesIO(b"%PDF-1.7")) == "1.7"
 
 
+def test_objects():
+    with playa.open(TESTDIR / "simple1.pdf") as doc:
+        for obj in doc:
+            print(obj)
+
+
 def test_page_labels():
     with playa.open(TESTDIR / "contrib" / "pagelabels.pdf") as doc:
         labels = [label for _, label in zip(range(10), doc.page_labels)]
diff --git a/tests/test_pdfparser.py b/tests/test_pdfparser.py
new file mode 100644
index 00000000..e603fa05
--- /dev/null
+++ b/tests/test_pdfparser.py
@@ -0,0 +1,22 @@
+from pathlib import Path
+
+from playa.parser import PDFParser
+
+TESTDIR = Path(__file__).parent.parent / "samples"
+
+
+class MockDoc:
+    def __call__(self):
+        return self
+
+    decipher = None
+
+
+def test_indirect_objects():
+    """Verify that indirect objects are parsed properly."""
+    with open(TESTDIR / "simple2.pdf", "rb") as infh:
+        data = infh.read()
+    doc = MockDoc()
+    parser = PDFParser(data, doc)
+    for obj in parser:
+        print(obj)
diff --git a/tests/test_pdfstructtree.py b/tests/test_pdfstructtree.py
index ae5fe15b..90eb37c0 100644
--- a/tests/test_pdfstructtree.py
+++ b/tests/test_pdfstructtree.py
@@ -3,7 +3,7 @@
 from pathlib import Path
 
 import playa
-from playa.pdfstructtree import PDFStructTree
+from playa.structtree import PDFStructTree
 
 TESTDIR = Path(__file__).parent.parent / "samples"
 
@@ -64,14 +64,14 @@ def test_all_mcids(self) -> None:
             stree = PDFStructTree(pdf)
             sect = next(stree.find_all("Sect"))
             mcids = list(sect.all_mcids())
-            page_numbers = set(page for page, mcid in mcids)
-            assert 1 in page_numbers
-            assert 2 in page_numbers
+            page_indices = set(page for page, mcid in mcids)
+            assert 0 in page_indices
+            assert 1 in page_indices
 
             stree = PDFStructTree(pdf, [pdf.pages[1]])
             sect = next(stree.find_all("Sect"))
             mcids = list(sect.all_mcids())
-            page_numbers = set(page for page, mcid in mcids)
-            assert page_numbers == {2}
+            page_indices = set(page for page, mcid in mcids)
+            assert page_indices == {1}
             for p in sect.find_all("P"):
                 assert set(mcid for page, mcid in p.all_mcids()) == set(p.mcids)
diff --git a/tests/test_psparser.py b/tests/test_psparser.py
index 948a042b..ce05d9f1 100644
--- a/tests/test_psparser.py
+++ b/tests/test_psparser.py
@@ -1,30 +1,24 @@
 import logging
-import tempfile
-from io import BytesIO
 from typing import Any, List, Tuple
 
 import pytest
 
-from playa.exceptions import PSEOF
-from playa.psparser import (
+from playa.parser import (
     KEYWORD_DICT_BEGIN,
     KEYWORD_DICT_END,
+    Lexer,
+    Parser,
+)
+from playa.pdftypes import (
     KWD,
     LIT,
-    PSFileParser,
-    PSInMemoryParser,
-    PSStackParser,
     keyword_name,
     literal_name,
 )
 
 logger = logging.getLogger(__name__)
 
-
-class TestPSFileParser:
-    """Simplistic Test cases"""
-
-    TESTDATA = rb"""%!PS
+TESTDATA1 = rb"""%!PS
 begin end
  "  @ #
 /a/BCD /Some_Name /foo#5f#xbaa
@@ -43,132 +37,113 @@ class TestPSFileParser:
 [ 1 (z) ! ]
 << /foo (bar) >>
 """
+TOKENS1 = [
+    (5, KWD(b"begin")),
+    (11, KWD(b"end")),
+    (16, KWD(b'"')),
+    (19, KWD(b"@")),
+    (21, KWD(b"#")),
+    (23, LIT("a")),
+    (25, LIT("BCD")),
+    (30, LIT("Some_Name")),
+    (41, LIT("foo_")),
+    (48, KWD(b"#")),
+    (49, KWD(b"xbaa")),
+    (54, 0),
+    (56, 1),
+    (59, -2),
+    (62, 0.5),
+    (65, 1.234),
+    (71, b"abc"),
+    (77, b""),
+    (80, b"abc ( def ) ghi"),
+    (98, b"def \x00 4ghi"),
+    (118, b"bach\\slask"),
+    (132, b"foo\nbaa"),
+    (143, b"this % is not a comment."),
+    (170, b"foo\nbaa"),
+    (180, b"foobaa"),
+    (191, b""),
+    (194, b" "),
+    (199, b"@@ "),
+    (211, b"\xab\xcd\x00\x124\x50"),
+    (226, KWD(b"func")),
+    (230, LIT("a")),
+    (232, LIT("b")),
+    (234, KWD(b"{")),
+    (235, b"c"),
+    (238, KWD(b"do*")),
+    (241, KWD(b"}")),
+    (242, KWD(b"def")),
+    (246, KWD(b"[")),
+    (248, 1),
+    (250, b"z"),
+    (254, KWD(b"!")),
+    (256, KWD(b"]")),
+    (258, KWD(b"<<")),
+    (261, LIT("foo")),
+    (266, b"bar"),
+    (272, KWD(b">>")),
+]
+OBJS1 = [
+    (23, LIT("a")),
+    (25, LIT("BCD")),
+    (30, LIT("Some_Name")),
+    (41, LIT("foo_")),
+    (54, 0),
+    (56, 1),
+    (59, -2),
+    (62, 0.5),
+    (65, 1.234),
+    (71, b"abc"),
+    (77, b""),
+    (80, b"abc ( def ) ghi"),
+    (98, b"def \x00 4ghi"),
+    (118, b"bach\\slask"),
+    (132, b"foo\nbaa"),
+    (143, b"this % is not a comment."),
+    (170, b"foo\nbaa"),
+    (180, b"foobaa"),
+    (191, b""),
+    (194, b" "),
+    (199, b"@@ "),
+    (211, b"\xab\xcd\x00\x124\x50"),
+    (230, LIT("a")),
+    (232, LIT("b")),
+    (234, [b"c"]),
+    (246, [1, b"z"]),
+    (258, {"foo": b"bar"}),
+]
+
+
+def test_lexer_miner():
+    """Lexer test case from pdfminer"""
+    tokens = list(Lexer(TESTDATA1))
+    logger.info(tokens)
+    assert tokens == TOKENS1
+
+
+def test_parser_miner():
+    """Parser test case from pdfminer"""
 
-    TOKENS = [
-        (5, KWD(b"begin")),
-        (11, KWD(b"end")),
-        (16, KWD(b'"')),
-        (19, KWD(b"@")),
-        (21, KWD(b"#")),
-        (23, LIT("a")),
-        (25, LIT("BCD")),
-        (30, LIT("Some_Name")),
-        (41, LIT("foo_")),
-        (48, KWD(b"#")),
-        (49, KWD(b"xbaa")),
-        (54, 0),
-        (56, 1),
-        (59, -2),
-        (62, 0.5),
-        (65, 1.234),
-        (71, b"abc"),
-        (77, b""),
-        (80, b"abc ( def ) ghi"),
-        (98, b"def \x00 4ghi"),
-        (118, b"bach\\slask"),
-        (132, b"foo\nbaa"),
-        (143, b"this % is not a comment."),
-        (170, b"foo\nbaa"),
-        (180, b"foobaa"),
-        (191, b""),
-        (194, b" "),
-        (199, b"@@ "),
-        (211, b"\xab\xcd\x00\x124\x50"),
-        (226, KWD(b"func")),
-        (230, LIT("a")),
-        (232, LIT("b")),
-        (234, KWD(b"{")),
-        (235, b"c"),
-        (238, KWD(b"do*")),
-        (241, KWD(b"}")),
-        (242, KWD(b"def")),
-        (246, KWD(b"[")),
-        (248, 1),
-        (250, b"z"),
-        (254, KWD(b"!")),
-        (256, KWD(b"]")),
-        (258, KWD(b"<<")),
-        (261, LIT("foo")),
-        (266, b"bar"),
-        (272, KWD(b">>")),
-    ]
-
-    OBJS = [
-        (23, LIT("a")),
-        (25, LIT("BCD")),
-        (30, LIT("Some_Name")),
-        (41, LIT("foo_")),
-        (54, 0),
-        (56, 1),
-        (59, -2),
-        (62, 0.5),
-        (65, 1.234),
-        (71, b"abc"),
-        (77, b""),
-        (80, b"abc ( def ) ghi"),
-        (98, b"def \x00 4ghi"),
-        (118, b"bach\\slask"),
-        (132, b"foo\nbaa"),
-        (143, b"this % is not a comment."),
-        (170, b"foo\nbaa"),
-        (180, b"foobaa"),
-        (191, b""),
-        (194, b" "),
-        (199, b"@@ "),
-        (211, b"\xab\xcd\x00\x124\x50"),
-        (230, LIT("a")),
-        (232, LIT("b")),
-        (234, [b"c"]),
-        (246, [1, b"z"]),
-        (258, {"foo": b"bar"}),
-    ]
-
-    def get_tokens(self, s):
-        class MyParser(PSFileParser):
-            def flush(self):
-                self.add_results(*self.popall())
-
-        parser = MyParser(BytesIO(s))
-        r = []
-        try:
-            while True:
-                r.append(parser.nexttoken())
-        except PSEOF:
-            pass
-        return r
-
-    def get_objects(self, s):
-        class MyParser(PSStackParser):
-            def flush(self):
-                self.add_results(*self.popall())
-
-        parser = MyParser(s)
-        r = []
-        try:
-            while True:
-                r.append(parser.nextobject())
-        except PSEOF:
-            pass
-        return r
-
-    def test_1(self):
-        tokens = self.get_tokens(self.TESTDATA)
-        logger.info(tokens)
-        assert tokens == self.TOKENS
-
-    def test_2(self):
-        objs = self.get_objects(self.TESTDATA)
-        logger.info(objs)
-        assert objs == self.OBJS
-
-
-TESTDATA = b"""
+    # FIXME: Still relying on subclassing
+    class MyParser(Parser):
+        def flush(self) -> None:
+            objs = self.popall()
+            self.add_results(*objs)
+
+    objs = list(MyParser(TESTDATA1))
+    logger.info(objs)
+    assert objs == OBJS1
+
+
+TESTDATA2 = b"""
 ugh
 foo\r
 bar\rbaz
 quxx
 bog"""
-EXPECTED = [
+EXPECTED2 = [
     (0, b"\n"),
     (1, b"ugh\n"),
     (5, b"foo\r\n"),
@@ -179,54 +154,20 @@ def test_2(self):
 ]
 
 
-def run_parsers(data: bytes, expected: List[Any], makefunc: Any) -> None:
-    """Test stuff on both BytesIO and BinaryIO."""
-    bp = PSInMemoryParser(data)
-    output = []
-    func = makefunc(bp)
-    while True:
-        try:
-            output.append(func())
-        except PSEOF:
-            break
-    assert output == expected
-    with tempfile.NamedTemporaryFile() as tf:
-        with open(tf.name, "wb") as outfh:
-            outfh.write(data)
-        with open(tf.name, "rb") as infh:
-            fp = PSFileParser(infh)
-            func = makefunc(fp)
-            output = []
-            while True:
-                try:
-                    output.append(func())
-                except PSEOF:
-                    break
-            assert output == expected
-
-
-def test_nextline() -> None:
+def test_lines() -> None:
     """Verify that we replicate the old nextline method."""
-    run_parsers(TESTDATA, EXPECTED, lambda foo: foo.nextline)
+    parser = Lexer(TESTDATA2)
+    output = list(parser.iter_lines())
+    assert output == EXPECTED2
 
 
-def test_revreadlines() -> None:
+def test_revlines() -> None:
     """Verify that we replicate the old revreadlines method."""
-    expected = list(reversed([line for pos, line in EXPECTED]))
-
-    def make_next(parser: Any) -> Any:
-        itor = parser.revreadlines()
-
-        def nextor() -> Any:
-            try:
-                line = next(itor)
-            except StopIteration:
-                raise PSEOF
-            return line
-
-        return nextor
-
-    run_parsers(TESTDATA, expected, make_next)
+    expected = list(reversed([line for pos, line in EXPECTED2]))
+    parser = Lexer(TESTDATA2)
+    parser.seek(parser.end)
+    output = list(parser.reverse_iter_lines())
+    assert output == expected
 
 
 SIMPLE1 = b"""1 0 obj
@@ -258,22 +199,12 @@ def nextor() -> Any:
 
 
 def list_parsers(data: bytes, expected: List[Any], discard_pos: bool = False) -> None:
-    bp = PSInMemoryParser(data)
+    bp = Lexer(data)
     if discard_pos:
         tokens: List[Any] = [tok for pos, tok in list(bp)]
     else:
         tokens = list(bp)
     assert tokens == expected
-    with tempfile.NamedTemporaryFile() as tf:
-        with open(tf.name, "wb") as outfh:
-            outfh.write(data)
-        with open(tf.name, "rb") as infh:
-            fp = PSFileParser(infh)
-            if discard_pos:
-                tokens = [tok for pos, tok in list(fp)]
-            else:
-                tokens = list(fp)
-            assert tokens == expected
 
 
 def test_new_parser() -> None:
@@ -364,18 +295,10 @@ def inline_parsers(
     nexttoken: Any = None,
     blocksize: int = 16,
 ) -> None:
-    bp = PSInMemoryParser(data)
+    bp = Lexer(data)
     assert bp.get_inline_data(target=target, blocksize=blocksize) == expected
     if nexttoken is not None:
-        assert bp.nexttoken() == nexttoken
-    with tempfile.NamedTemporaryFile() as tf:
-        with open(tf.name, "wb") as outfh:
-            outfh.write(data)
-        with open(tf.name, "rb") as infh:
-            fp = PSFileParser(infh)
-            assert fp.get_inline_data(target=target, blocksize=blocksize) == expected
-            if nexttoken is not None:
-                assert fp.nexttoken() == nexttoken
+        assert next(bp) == nexttoken
 
 
 def test_get_inline_data() -> None: