diff --git a/playa/pdfdocument.py b/playa/pdfdocument.py
index 64c9238..e67f335 100644
--- a/playa/pdfdocument.py
+++ b/playa/pdfdocument.py
@@ -42,6 +42,7 @@
 from playa.pdfparser import KEYWORD_XREF, PDFParser, PDFStreamParser
 from playa.pdftypes import (
     DecipherCallable,
+    PDFObjRef,
     PDFStream,
     decipher_all,
     dict_value,
@@ -68,7 +69,10 @@
 LITERAL_OBJSTM = LIT("ObjStm")
 LITERAL_XREF = LIT("XRef")
 LITERAL_CATALOG = LIT("Catalog")
+LITERAL_PAGE = LIT("Page")
+LITERAL_PAGES = LIT("Pages")
 KEYWORD_OBJ = KWD(b"obj")
+INHERITABLE_PAGE_ATTRS = {"Resources", "MediaBox", "CropBox", "Rotate"}
 
 
 class PDFBaseXRef:
@@ -907,6 +911,72 @@ def get_page_labels(self) -> Iterator[str]:
 
         return page_labels.labels
 
+    PageType = Dict[Any, Dict[Any, Any]]
+
+    def get_pages_from_xrefs(self) -> Iterator[Tuple[int, PageType]]:
+        """Find pages from the cross-reference tables if the page tree
+        is missing (note that this only happens in invalid PDFs, but
+        it happens.)
+
+        Returns an iterator over (objid, dict) pairs.
+        """
+        for xref in self.xrefs:
+            for object_id in xref.get_objids():
+                try:
+                    obj = self.getobj(object_id)
+                    if isinstance(obj, dict) and obj.get("Type") is LITERAL_PAGE:
+                        yield object_id, obj
+                except PDFObjectNotFound:
+                    pass
+
+    def walk_page_tree(self) -> Iterator[Tuple[int, PageType]]:
+        """Iterate over the flattened page tree in reading order, propagating
+        inheritable attributes.  Returns an iterator over (objid, dict) pairs.
+
+        Will raise an IndexError if there is no page tree.
+        """
+        stack = [(self.catalog["Pages"], self.catalog)]
+        visited = set()
+        while stack:
+            (obj, parent) = stack.pop()
+            if isinstance(obj, PDFObjRef):
+                # The PDF specification *requires* both the Pages
+                # element of the catalog and the entries in Kids in
+                # the page tree to be indirect references.
+                object_id = obj.objid
+            elif isinstance(obj, int):
+                # Should not happen in a valid PDF, but probably does?
+                log.warning("Page tree contains bare integer: %r in %r", obj, parent)
+                object_id = obj
+            else:
+                log.warning("Page tree contains unknown object: %r", obj)
+            page_object = dict_value(self.getobj(object_id))
+
+            # Avoid recursion errors by keeping track of visited nodes
+            # (again, this should never actually happen in a valid PDF)
+            if object_id in visited:
+                log.warning("Circular reference %r in page tree", obj)
+                continue
+            visited.add(object_id)
+
+            # Propagate inheritable attributes
+            object_properties = page_object.copy()
+            for k, v in parent.items():
+                if k in INHERITABLE_PAGE_ATTRS and k not in object_properties:
+                    object_properties[k] = v
+
+            # Recurse, depth-first
+            object_type = object_properties.get("Type")
+            if object_type is None and not settings.STRICT:  # See #64
+                object_type = object_properties.get("type")
+            if object_type is LITERAL_PAGES and "Kids" in object_properties:
+                log.debug("Pages: Kids=%r", object_properties["Kids"])
+                for child in reversed(list_value(object_properties["Kids"])):
+                    stack.append((child, object_properties))
+            elif object_type is LITERAL_PAGE:
+                log.debug("Page: %r", object_properties)
+                yield object_id, object_properties
+
     def lookup_name(self, cat: str, key: Union[str, bytes]) -> Any:
         try:
             names = dict_value(self.catalog["Names"])
diff --git a/tests/test_pdfdocument.py b/tests/test_pdfdocument.py
index 940847c..7169a8b 100644
--- a/tests/test_pdfdocument.py
+++ b/tests/test_pdfdocument.py
@@ -35,4 +35,14 @@ def test_read_header():
 def test_page_labels():
     with playa.open(TESTDIR / "contrib" / "pagelabels.pdf") as doc:
         labels = [label for _, label in zip(range(10), doc.get_page_labels())]
-        assert labels == ['iii', 'iv', '1', '2', '1', '2', '3', '4', '5', '6']
+        assert labels == ["iii", "iv", "1", "2", "1", "2", "3", "4", "5", "6"]
+
+
+def test_page_tree():
+    with playa.open(TESTDIR / "contrib" / "PSC_Station.pdf") as doc:
+        page_objects = list(doc.walk_page_tree())
+        assert len(page_objects) == 15
+
+
+def test_pages():
+    pass