diff --git a/.gitignore b/.gitignore
index f136d47..63833d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,8 +13,6 @@ docs/_build
 /build/
 /dist/
 /pdfminer.six.egg-info/
-tests/*.xml
-tests/*.txt
 .idea/
 .tox/
 .nox/
diff --git a/TODO.md b/TODO.md
index 2d6291e..68cf29f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,5 +1,5 @@
 ## PLAYA 0.2.5
-- [ ] implement CMap parsing for CIDs (submit PR to pdfminer)
+- [x] implement CMap parsing for Encoding CMaps
 - [x] add "default" as a synonym of badly-named "user" space
 - [x] update `pdfplumber` branch and run `pdfplumber` tests in CI
   - [x] reimplement on top of ContentObject
diff --git a/playa/cmapdb.py b/playa/cmapdb.py
index 3ce8c09..6764930 100644
--- a/playa/cmapdb.py
+++ b/playa/cmapdb.py
@@ -9,6 +9,7 @@
 
 """
 
+from bisect import bisect_left
 import functools
 import gzip
 import logging
@@ -31,14 +32,12 @@
     cast,
 )
 
-from playa.encodingdb import name2unicode
 from playa.exceptions import PDFSyntaxError
 from playa.parser import (
     KWD,
     ObjectParser,
     PDFObject,
     PSKeyword,
-    PSLiteral,
     literal_name,
 )
 from playa.utils import choplist, nunpack
@@ -208,15 +207,13 @@ def _load_data(cls, name: str) -> Any:
     @classmethod
     def get_cmap(cls, name: str) -> CMapBase:
         if name == "Identity-H":
-            return IdentityCMap(WMode=0)
-        elif name == "Adobe-Identity-UCS":
-            return IdentityCMap(WMode=0)  # FIXME: WMode???
+            return IdentityCMap(CMapName=name, WMode=0)
         elif name == "Identity-V":
-            return IdentityCMap(WMode=1)
+            return IdentityCMap(CMapName=name, WMode=1)
         elif name == "OneByteIdentityH":
-            return IdentityCMapByte(WMode=0)
+            return IdentityCMapByte(CMapName=name, WMode=0)
         elif name == "OneByteIdentityV":
-            return IdentityCMapByte(WMode=1)
+            return IdentityCMapByte(CMapName=name, WMode=1)
         if name in cls._cmap_cache:
             return cls._cmap_cache[name]
         data = cls._load_data(name)
@@ -260,6 +257,7 @@ def decode_utf16_char(utf16: bytes) -> str:
 
 
 class FileUnicodeMap(UnicodeMap):
+    """ToUnicode map loaded from a PDF stream"""
     def add_cid2bytes(self, cid: int, utf16: bytes) -> None:
         self.add_cid2unichr(cid, decode_utf16_char(utf16))
 
@@ -267,12 +265,6 @@ def add_cid2code(self, cid: int, code: int) -> None:
         unichr = chr(code)
         self.add_cid2unichr(cid, unichr)
 
-    def add_cid2lit(self, cid: int, name: PSLiteral) -> None:
-        # Interpret as an Adobe glyph name.
-        assert isinstance(name.name, str)
-        unichr = name2unicode(name.name)
-        self.add_cid2unichr(cid, unichr)
-
     def add_cid2unichr(self, cid: int, unichr: str) -> None:
         # A0 = non-breaking space, some weird fonts can have a collision on a cid here.
         assert isinstance(unichr, str)
@@ -280,51 +272,29 @@ def add_cid2unichr(self, cid: int, unichr: str) -> None:
             return
         self.cid2unichr[cid] = unichr
 
-
-def add_cid_range(
-    cmap: FileUnicodeMap, start_byte: bytes, end_byte: bytes, cid: int
-) -> None:
-    start_prefix = start_byte[:-4]
-    end_prefix = end_byte[:-4]
-    if start_prefix != end_prefix:
-        log.warning(
-            "The prefix of the start and end byte of "
-            "begincidrange are not the same.",
-        )
-        return
-    svar = start_byte[-4:]
-    evar = end_byte[-4:]
-    start = nunpack(svar)
-    end = nunpack(evar)
-    vlen = len(svar)
-    for i in range(end - start + 1):
-        x = start_prefix + struct.pack(">L", start + i)[-vlen:]
-        cmap.add_cid2bytes(cid + i, x)
-
-
-def add_bf_range(
-    cmap: FileUnicodeMap, start_byte: bytes, end_byte: bytes, code: PDFObject
-) -> None:
-    start = nunpack(start_byte)
-    end = nunpack(end_byte)
-    if isinstance(code, list):
-        if len(code) != end - start + 1:
-            log.warning(
-                "The difference between the start and end "
-                "offsets does not match the code length.",
-            )
-        for cid, unicode_value in zip(range(start, end + 1), code):
-            assert isinstance(unicode_value, bytes)
-            cmap.add_cid2bytes(cid, unicode_value)
-    else:
-        assert isinstance(code, bytes)
-        var = code[-4:]
-        base = nunpack(var)
-        prefix = code[:-4]
-        vlen = len(var)
-        for i in range(end - start + 1):
-            x = prefix + struct.pack(">L", base + i)[-vlen:]
-            cmap.add_cid2bytes(start + i, x)
+    def add_bf_range(
+        self, start_byte: bytes, end_byte: bytes, code: PDFObject
+    ) -> None:
+        start = nunpack(start_byte)
+        end = nunpack(end_byte)
+        if isinstance(code, list):
+            if len(code) != end - start + 1:
+                log.warning(
+                    "The difference between the start and end "
+                    "offsets does not match the code length.",
+                )
+            for cid, unicode_value in zip(range(start, end + 1), code):
+                assert isinstance(unicode_value, bytes)
+                self.add_cid2bytes(cid, unicode_value)
+        else:
+            assert isinstance(code, bytes)
+            var = code[-4:]
+            base = nunpack(var)
+            prefix = code[:-4]
+            vlen = len(var)
+            for i in range(end - start + 1):
+                x = prefix + struct.pack(">L", base + i)[-vlen:]
+                self.add_cid2bytes(start + i, x)
 
 
 def parse_tounicode(data: bytes) -> FileUnicodeMap:
@@ -380,6 +350,132 @@ def parse_tounicode(data: bytes) -> FileUnicodeMap:
             del stack[:]
         elif obj is KEYWORD_BEGINCIDRANGE:
             del stack[:]
+        elif obj is KEYWORD_ENDCIDRANGE:
+            del stack[:]
+        elif obj is KEYWORD_BEGINCIDCHAR:
+            del stack[:]
+        elif obj is KEYWORD_ENDCIDCHAR:
+            del stack[:]
+        elif obj is KEYWORD_BEGINBFRANGE:
+            del stack[:]
+        elif obj is KEYWORD_ENDBFRANGE:
+            for start_byte, end_byte, code in choplist(3, stack):
+                if not isinstance(start_byte, bytes):
+                    log.warning("The start object is not a byte.")
+                    continue
+                if not isinstance(end_byte, bytes):
+                    log.warning("The end object is not a byte.")
+                    continue
+                if len(start_byte) != len(end_byte):
+                    log.warning("The start and end byte have different lengths.")
+                    continue
+                cmap.add_bf_range(start_byte, end_byte, code)
+            del stack[:]
+        elif obj is KEYWORD_BEGINBFCHAR:
+            del stack[:]
+        elif obj is KEYWORD_ENDBFCHAR:
+            for cid, code in choplist(2, stack):
+                if isinstance(cid, bytes) and isinstance(code, bytes):
+                    cmap.add_cid2bytes(nunpack(cid), code)
+            del stack[:]
+        elif obj is KEYWORD_BEGINNOTDEFRANGE:
+            del stack[:]
+        elif obj is KEYWORD_ENDNOTDEFRANGE:
+            del stack[:]
+        else:
+            # It's ... something else (probably bogus)
+            stack.append(obj)
+    return cmap
+
+
+class EncodingCMap(CMap):
+    """Encoding map loaded from a PDF stream."""
+    def __init__(self):
+        super().__init__()
+        self.bytes2cid: Dict[bytes, int] = {}
+        self.code_lengths = []
+
+    def decode(self, code: bytes) -> Tuple[int, ...]:
+        idx = 0
+        codes = []
+        # Match longest substring in bytes2cid
+        while idx < len(code):
+            for codelen in self.code_lengths[::-1]:
+                if code[idx: idx + codelen] in self.bytes2cid:
+                    codes.append(self.bytes2cid[code[idx: idx + codelen]])
+                    idx += codelen
+                    break
+            else:
+                log.warning("Unknown byte sequence %r", code[idx:])
+                idx += 1
+        return tuple(codes)
+
+    def add_bytes2cid(self, utf16: bytes, cid: int) -> None:
+        codelen = len(utf16)
+        pos = bisect_left(self.code_lengths, codelen)
+        if pos == len(self.code_lengths) or self.code_lengths[pos] != codelen:
+            self.code_lengths.insert(pos, codelen)
+        self.bytes2cid[utf16] = cid
+
+    def add_cid_range(
+        self, start_byte: bytes, end_byte: bytes, cid: int
+    ) -> None:
+        start_prefix = start_byte[:-4]
+        end_prefix = end_byte[:-4]
+        if start_prefix != end_prefix:
+            log.warning(
+                "The prefix of the start and end byte of "
+                "begincidrange are not the same.",
+            )
+            return
+        svar = start_byte[-4:]
+        evar = end_byte[-4:]
+        start = nunpack(svar)
+        end = nunpack(evar)
+        vlen = len(svar)
+        for i in range(end - start + 1):
+            x = start_prefix + struct.pack(">L", start + i)[-vlen:]
+            self.add_bytes2cid(x, cid + i)
+
+
+def parse_encoding(data: bytes) -> EncodingCMap:
+    """Parse an Encoding CMap."""
+    cmap = EncodingCMap()
+    stack: List[PDFObject] = []
+    parser = ObjectParser(data)
+
+    while True:
+        try:
+            pos, obj = next(parser)
+        except PDFSyntaxError as e:
+            log.debug("Ignoring syntax error: %s", e)
+            parser.reset()
+            continue
+        except StopIteration:
+            break
+
+        if not isinstance(obj, PSKeyword):
+            stack.append(obj)
+            continue
+        log.debug("keyword: %r (%r)", obj, stack)
+
+        if obj is KEYWORD_DEF:
+            try:
+                # Might fail with IndexError if the file is corrputed
+                v = stack.pop()
+                k = stack.pop()
+                cmap.set_attr(literal_name(k), v)
+            except (IndexError, TypeError):
+                pass
+        elif obj is KEYWORD_USECMAP:
+            log.warning("usecmap not supported for EncodingCMap")
+            del stack[:]
+        elif obj is KEYWORD_BEGINCODESPACERANGE:
+            del stack[:]
+        elif obj is KEYWORD_ENDCODESPACERANGE:
+            del stack[:]
+        elif obj is KEYWORD_BEGINCIDRANGE:
+            del stack[:]
         elif obj is KEYWORD_ENDCIDRANGE:
             for start_byte, end_byte, cid in choplist(3, stack):
                 if not isinstance(start_byte, bytes):
@@ -396,36 +492,22 @@ def parse_tounicode(data: bytes) -> FileUnicodeMap:
                         "The start and end byte of begincidrange have different lengths.",
                     )
                     return cmap
-                add_cid_range(cmap, start_byte, end_byte, cid)
+                cmap.add_cid_range(start_byte, end_byte, cid)
             del stack[:]
         elif obj is KEYWORD_BEGINCIDCHAR:
             del stack[:]
         elif obj is KEYWORD_ENDCIDCHAR:
-            for cid, code in choplist(2, stack):
+            for code, cid in choplist(2, stack):
                 if isinstance(code, bytes) and isinstance(cid, int):
-                    cmap.add_cid2bytes(cid, code)
+                    cmap.add_bytes2cid(code, cid)
             del stack[:]
         elif obj is KEYWORD_BEGINBFRANGE:
             del stack[:]
         elif obj is KEYWORD_ENDBFRANGE:
-            for start_byte, end_byte, code in choplist(3, stack):
-                if not isinstance(start_byte, bytes):
-                    log.warning("The start object is not a byte.")
-                    continue
-                if not isinstance(end_byte, bytes):
-                    log.warning("The end object is not a byte.")
-                    continue
-                if len(start_byte) != len(end_byte):
-                    log.warning("The start and end byte have different lengths.")
-                    continue
-                add_bf_range(cmap, start_byte, end_byte, code)
             del stack[:]
         elif obj is KEYWORD_BEGINBFCHAR:
             del stack[:]
         elif obj is KEYWORD_ENDBFCHAR:
-            for cid, code in choplist(2, stack):
-                if isinstance(cid, bytes) and isinstance(code, bytes):
-                    cmap.add_cid2bytes(nunpack(cid), code)
             del stack[:]
         elif obj is KEYWORD_BEGINNOTDEFRANGE:
             del stack[:]
diff --git a/playa/font.py b/playa/font.py
index 018b09a..8ec9cce 100644
--- a/playa/font.py
+++ b/playa/font.py
@@ -23,6 +23,7 @@
     CMapBase,
     CMapDB,
     parse_tounicode,
+    parse_encoding,
     FileUnicodeMap,
     IdentityUnicodeMap,
     UnicodeMap,
@@ -1067,10 +1068,14 @@ def __init__(
             if isinstance(spec["ToUnicode"], ContentStream):
                 strm = stream_value(spec["ToUnicode"])
                 self.unicode_map = parse_tounicode(strm.buffer)
-            if isinstance(spec["Encoding"], ContentStream):
+            # FIXME: For the moment only replace the cmap if we don't
+            # have a predefined one (this may or may not be correct)
+            # FIXME: self.cmap should just be None here, WTF pdfminer.six!
+            if self.cmap.attrs.get("CMapName") is None and isinstance(
+                spec["Encoding"], ContentStream
+            ):
                 strm = stream_value(spec["Encoding"])
-                # FIXME: it's not a tounicode, but it plays one on TV
-                # _ = parse_tounicode(strm.buffer)
+                self.cmap = parse_encoding(strm.buffer)
 
             if self.unicode_map is None:
                 cmap_name = literal_name(spec["ToUnicode"])
diff --git a/playa/page.py b/playa/page.py
index bbc1678..93f07ba 100644
--- a/playa/page.py
+++ b/playa/page.py
@@ -755,6 +755,7 @@ def init_resources(self, page: Page, resources: Dict) -> None:
                         spec = dict_value(spec)
                         self.fontmap[fontid] = doc.get_font(objid, spec)
                     except TypeError:
+                        # FIXME: This is very very wrong! DO NOT WANT!
                         log.warning("Broken/missing font spec for %r", fontid)
                         self.fontmap[fontid] = doc.get_font(objid, {})
             elif k == "ColorSpace":
diff --git a/tests/cmap-encoding.txt b/tests/cmap-encoding.txt
new file mode 100644
index 0000000..c0e5b70
--- /dev/null
+++ b/tests/cmap-encoding.txt
@@ -0,0 +1,261 @@
+%!PS-Adobe-3.0 Resource-CMap
+%%DocumentNeededResources: ProcSet (CIDInit)
+%%IncludeResource: ProcSet (CIDInit)
+%%BeginResource: CMap (Adobe-Identity-UCS)
+%%Title: (Adobe-Identity-UCS Adobe Identity 0)
+%%Version: 1
+%%EndComments
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo
+3 dict dup begin
+/Registry (Adobe) def
+/Ordering (Identity) def
+/Supplement 0 def
+end def
+/CMapName /Adobe-Identity-UCS def
+/CMapVersion 1 def
+/CMapType 0 def
+/WMode 0 def
+2 begincodespacerange
+<20> <20>
+<0000> <19FF>
+endcodespacerange
+229 begincidchar
+<0001> 1
+<0002> 2
+<0003> 3
+<0004> 4
+<0005> 5
+<0006> 6
+<0007> 7
+<0008> 8
+<0009> 9
+<000a> 10
+<000b> 11
+<000c> 12
+<000d> 13
+<000e> 14
+<000f> 15
+<0010> 16
+<0011> 17
+<0012> 18
+<0013> 19
+<0014> 20
+<0015> 21
+<0016> 22
+<0017> 23
+<0018> 24
+<0019> 25
+<001a> 26
+<001b> 27
+<001c> 28
+<001d> 29
+<001e> 30
+<001f> 31
+<0020> 32
+<0021> 33
+<0022> 34
+<0023> 35
+<0024> 36
+<0025> 37
+<0026> 38
+<0027> 39
+<0028> 40
+<0029> 41
+<002a> 42
+<002b> 43
+<002c> 44
+<002d> 45
+<002e> 46
+<002f> 47
+<0030> 48
+<0031> 49
+<0032> 50
+<0033> 51
+<0034> 52
+<0035> 53
+<0036> 54
+<0037> 55
+<0038> 56
+<0039> 57
+<003a> 58
+<003b> 59
+<003c> 60
+<003d> 61
+<003e> 62
+<003f> 63
+<0040> 64
+<0041> 65
+<0042> 66
+<0043> 67
+<0044> 68
+<0045> 69
+<0046> 70
+<0047> 71
+<0048> 72
+<0049> 73
+<004a> 74
+<004b> 75
+<004c> 76
+<004d> 77
+<004e> 78
+<004f> 79
+<0050> 80
+<0051> 81
+<0052> 82
+<0053> 83
+<0054> 84
+<0055> 85
+<0056> 86
+<0057> 87
+<0058> 88
+<0059> 89
+<005a> 90
+<005b> 91
+<005c> 92
+<005d> 93
+<005e> 94
+<005f> 95
+<0060> 96
+<0061> 97
+<0062> 98
+<0063> 99
+<0064> 100
+<0065> 101
+<0066> 102
+<0067> 103
+<0068> 104
+<0069> 105
+<006a> 106
+<006b> 107
+<006c> 108
+<006d> 109
+<006e> 110
+<006f> 111
+<0070> 112
+<0071> 113
+<0072> 114
+<0073> 115
+<0074> 116
+<0075> 117
+<0076> 118
+<0077> 119
+<0078> 120
+<0079> 121
+<007a> 122
+<007b> 123
+<007c> 124
+<007d> 125
+<007e> 126
+<007f> 127
+<0080> 128
+<0081> 129
+<0082> 130
+<0083> 131
+<0084> 132
+<0085> 133
+<0086> 134
+<0087> 135
+<0088> 136
+<0089> 137
+<008a> 138
+<008b> 139
+<008c> 140
+<008d> 141
+<008e> 142
+<008f> 143
+<0090> 144
+<0091> 145
+<0092> 146
+<0093> 147
+<0094> 148
+<0095> 149
+<0096> 150
+<0097> 151
+<0098> 152
+<0099> 153
+<009a> 154
+<009b> 155
+<009c> 156
+<009d> 157
+<009e> 158
+<009f> 159
+<00a0> 160
+<00a1> 161
+<00a2> 162
+<00a3> 163
+<00a4> 164
+<00a5> 165
+<00a6> 166
+<00a7> 167
+<00a8> 168
+<00a9> 169
+<00aa> 170
+<00ab> 171
+<00ac> 172
+<00ad> 173
+<00ae> 174
+<00af> 175
+<00b0> 176
+<00b1> 177
+<00b2> 178
+<00b3> 179
+<00b4> 180
+<00b5> 181
+<00b6> 182
+<00b7> 183
+<00b8> 184
+<00b9> 185
+<00ba> 186
+<00bb> 187
+<00bc> 188
+<00bd> 189
+<00be> 190
+<00bf> 191
+<00c0> 192
+<00c1> 193
+<00c2> 194
+<00c3> 195
+<00c4> 196
+<00c5> 197
+<00c6> 198
+<00c7> 199
+<00c8> 200
+<00c9> 201
+<00ca> 202
+<00cb> 203
+<00cc> 204
+<00cd> 205
+<00ce> 206
+<00cf> 207
+<00d0> 208
+<00d1> 209
+<00d2> 210
+<00d3> 211
+<00d4> 212
+<00d5> 213
+<00d6> 214
+<00d7> 215
+<00d8> 216
+<00d9> 217
+<00da> 218
+<00db> 219
+<00dc> 220
+<00dd> 221
+<00de> 222
+<00df> 223
+<00e0> 224
+<00e1> 225
+<00e2> 226
+<00e3> 227
+<00e4> 228
+<20> 229
+endcidchar
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
+%%EndResource
+%%EOF
diff --git a/tests/cmap-onebyte-encoding.txt b/tests/cmap-onebyte-encoding.txt
new file mode 100644
index 0000000..4ad1e30
--- /dev/null
+++ b/tests/cmap-onebyte-encoding.txt
@@ -0,0 +1,24 @@
+/CIDInit /ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo 3 dict dup begin
+/Registry (Adobe) def
+/Ordering (Identity) def
+/Supplement 0 def
+end def
+/CMapName /OneByteIdentityH def
+/CMapVersion 1.000 def
+/CMapType 1 def
+/UIDOffset 0 def
+/XUID [1 10 25404 9999] def
+/WMode 0 def
+1 begincodespacerange
+<00> <FF>
+endcodespacerange
+1 begincidrange
+<00> <FF> 0
+endcidrange
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
diff --git a/tests/cmap-tounicode.txt b/tests/cmap-tounicode.txt
new file mode 100644
index 0000000..3004818
--- /dev/null
+++ b/tests/cmap-tounicode.txt
@@ -0,0 +1,25 @@
+/CIDInit/ProcSet findresource begin
+12 dict begin
+begincmap
+/CIDSystemInfo<<
+/Registry (Adobe)
+/Ordering (UCS)
+/Supplement 0
+>> def
+/CMapName/Adobe-Identity-UCS def
+/CMapType 2 def
+1 begincodespacerange
+<00> <FF>
+endcodespacerange
+1 beginbfrange
+<006F> <0072> [<00E7> <00E9> <00E8> <00EA>]
+endbfrange
+3 beginbfchar
+<01> <0078>
+<02> <030C>
+<03> <0075>
+endbfchar
+endcmap
+CMapName currentdict /CMap defineresource pop
+end
+end
diff --git a/tests/data.py b/tests/data.py
index 405a961..04e879e 100644
--- a/tests/data.py
+++ b/tests/data.py
@@ -45,8 +45,10 @@
     # really rather broken.
     "issue9418.pdf",
     "bug1250079.pdf",
-    # FIXME: These can likely be fixed by correctly parsing CMaps
-    # (which should also be submitted as a PR to pdfminer.six)
+    # FIXME: We "accept" these but the Unicode mappings are incorrect.
+    # Need to see what pdf.js does for them - it seems falling back to
+    # the string may work, but it might be ASCII, PDFDocEncoding,
+    # UTF-16BE, or UTF-8 (each of these is different), so...
     "issue9915_reduced.pdf",
     "issue2931.pdf",
     "issue9534_reduced.pdf",
diff --git a/tests/test_cmapdb.py b/tests/test_cmapdb.py
index d5165a8..533762c 100644
--- a/tests/test_cmapdb.py
+++ b/tests/test_cmapdb.py
@@ -2,40 +2,18 @@
 Inadequately test CMap parsing and such.
 """
 
-from playa.cmapdb import parse_tounicode
+from pathlib import Path
+
+from playa.cmapdb import parse_tounicode, parse_encoding
 from playa.font import Type1FontHeaderParser
 
-STREAMDATA = b"""
-/CIDInit/ProcSet findresource begin
-12 dict begin
-begincmap
-/CIDSystemInfo<<
-/Registry (Adobe)
-/Ordering (UCS)
-/Supplement 0
->> def
-/CMapName/Adobe-Identity-UCS def
-/CMapType 2 def
-1 begincodespacerange
-<00> <FF>
-endcodespacerange
-1 beginbfrange
-<006F> <0072> [<00E7> <00E9> <00E8> <00EA>]
-endbfrange
-3 beginbfchar
-<01> <0078>
-<02> <030C>
-<03> <0075>
-endbfchar
-endcmap
-CMapName currentdict /CMap defineresource pop
-end
-end
-"""
+THISDIR = Path(__file__).parent
 
 
-def test_cmap_parser():
-    cmap = parse_tounicode(STREAMDATA)
+def test_parse_tounicode():
+    with open(THISDIR / "cmap-tounicode.txt", "rb") as infh:
+        data = infh.read()
+    cmap = parse_tounicode(data)
     assert cmap.cid2unichr == {
         1: "x",
         2: "̌",
@@ -47,6 +25,17 @@ def test_cmap_parser():
     }
 
 
+def test_parse_encoding():
+    with open(THISDIR / "cmap-encoding.txt", "rb") as infh:
+        data = infh.read()
+    cmap = parse_encoding(data)
+    cids = list(cmap.decode("hello world".encode("UTF-16-BE")))
+    assert cids == [ord(x) for x in "hello world"]
+    cids = list(cmap.decode(b"\x00W \x00T \x00F"))
+    assert cids == [87, 229, 84, 229, 70]
+
+
+
 # Basically the sort of stuff we try to find in a Type 1 font
 TYPE1DATA = b"""
 %!PS-AdobeFont-1.0: MyBogusFont 0.1
diff --git a/tests/test_open.py b/tests/test_open.py
index 77338f0..3631fa6 100644
--- a/tests/test_open.py
+++ b/tests/test_open.py
@@ -27,6 +27,16 @@
     "issue-1114-dedupe-chars.pdf",
     "malformed-from-issue-932.pdf",
     "mcid_example.pdf",
+    # FIXME: This can be fixed by correctly parsing Encoding CMaps,
+    # which be submitted as a PR to pdfminer.six
+    "issue7901.pdf",
+    # FIXME: These have invalid ToUnicode mappings but can be fixed by
+    # falling back to the input string (as PDFDocEncoding or UTF-16BE)
+    "issue9915_reduced.pdf",
+    "issue2931.pdf",
+    "issue9534_reduced.pdf",
+    # FIXME: And this here one is just UTF-8
+    "issue18117.pdf",
 }