Skip to content

Commit

Permalink
refactor!: remove more layers of class hierarchy
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Oct 25, 2024
1 parent 9b24dd4 commit 5585e9e
Show file tree
Hide file tree
Showing 10 changed files with 1,471 additions and 1,537 deletions.
435 changes: 0 additions & 435 deletions playa/converter.py

This file was deleted.

12 changes: 12 additions & 0 deletions playa/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ class PDFIOError(PDFException, IOError):
pass


class PDFInterpreterError(PDFException):
pass


class PDFNoValidXRef(PDFSyntaxError):
pass

Expand Down Expand Up @@ -85,3 +89,11 @@ class PDFPasswordIncorrect(PDFEncryptionError):

class PDFTextExtractionNotAllowed(PDFEncryptionError):
pass


class PDFFontError(PDFException):
pass


class PDFUnicodeNotDefined(PDFFontError):
pass
56 changes: 55 additions & 1 deletion playa/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from playa.exceptions import PDFValueError
from playa.pdfcolor import PDFColorSpace
from playa.pdffont import PDFFont
from playa.pdfinterp import Color, PDFGraphicState
from playa.pdftypes import PDFStream
from playa.utils import (
INF,
Expand All @@ -31,6 +30,61 @@
logger = logging.getLogger(__name__)


Color = Union[
float, # Greyscale
Tuple[float, float, float], # R, G, B
Tuple[float, float, float, float], # C, M, Y, K
]


class PDFGraphicState:
def __init__(self) -> None:
self.linewidth: float = 0
self.linecap: Optional[object] = None
self.linejoin: Optional[object] = None
self.miterlimit: Optional[object] = None
self.dash: Optional[Tuple[object, object]] = None
self.intent: Optional[object] = None
self.flatness: Optional[object] = None

# stroking color
self.scolor: Optional[Color] = None

# non stroking color
self.ncolor: Optional[Color] = None

def copy(self) -> "PDFGraphicState":
obj = PDFGraphicState()
obj.linewidth = self.linewidth
obj.linecap = self.linecap
obj.linejoin = self.linejoin
obj.miterlimit = self.miterlimit
obj.dash = self.dash
obj.intent = self.intent
obj.flatness = self.flatness
obj.scolor = self.scolor
obj.ncolor = self.ncolor
return obj

def __repr__(self) -> str:
return (
"<PDFGraphicState: linewidth=%r, linecap=%r, linejoin=%r, "
" miterlimit=%r, dash=%r, intent=%r, flatness=%r, "
" stroking color=%r, non stroking color=%r>"
% (
self.linewidth,
self.linecap,
self.linejoin,
self.miterlimit,
self.dash,
self.intent,
self.flatness,
self.scolor,
self.ncolor,
)
)


class LTItem:
"""Interface for things that can be analyzed"""

Expand Down
86 changes: 85 additions & 1 deletion playa/pdfdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Iterable,
Iterator,
List,
Mapping,
NamedTuple,
Optional,
Protocol,
Expand All @@ -27,11 +28,13 @@

from playa import settings
from playa.arcfour import Arcfour
from playa.cmapdb import CMap, CMapBase, CMapDB
from playa.data_structures import NameTree, NumberTree
from playa.exceptions import (
PSEOF,
PDFEncryptionError,
PDFException,
PDFFontError,
PDFKeyError,
PDFNoOutlines,
PDFNoPageLabels,
Expand All @@ -42,7 +45,13 @@
PDFTypeError,
PSException,
)
from playa.pdfinterp import PDFResourceManager
from playa.pdffont import (
PDFCIDFont,
PDFFont,
PDFTrueTypeFont,
PDFType1Font,
PDFType3Font,
)
from playa.pdfpage import PDFPage
from playa.pdfparser import KEYWORD_XREF, PDFParser, PDFStreamParser
from playa.pdftypes import (
Expand Down Expand Up @@ -72,6 +81,9 @@

# Some predefined literals and keywords (these can be defined wherever
# they are used as they are interned to the same objects)
LITERAL_PDF = LIT("PDF")
LITERAL_TEXT = LIT("Text")
LITERAL_FONT = LIT("Font")
LITERAL_OBJSTM = LIT("ObjStm")
LITERAL_XREF = LIT("XRef")
LITERAL_CATALOG = LIT("Catalog")
Expand Down Expand Up @@ -695,6 +707,78 @@ class OutlineItem(NamedTuple):
se: Union[PDFObjRef, None]


class PDFResourceManager:
"""Repository of shared resources.
ResourceManager facilitates reuse of shared resources
such as fonts and images so that large objects are not
allocated multiple times.
"""

def __init__(self, caching: bool = True) -> None:
self.caching = caching
self._cached_fonts: Dict[object, PDFFont] = {}

def get_procset(self, procs: Sequence[object]) -> None:
for proc in procs:
if proc is LITERAL_PDF or proc is LITERAL_TEXT:
pass
else:
pass

def get_cmap(self, cmapname: str, strict: bool = False) -> CMapBase:
try:
return CMapDB.get_cmap(cmapname)
except CMapDB.CMapNotFound:
if strict:
raise
return CMap()

def get_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont:
if objid and objid in self._cached_fonts:
font = self._cached_fonts[objid]
else:
log.debug("get_font: create: objid=%r, spec=%r", objid, spec)
if settings.STRICT:
if spec["Type"] is not LITERAL_FONT:
raise PDFFontError("Type is not /Font")
# Create a Font object.
if "Subtype" in spec:
subtype = literal_name(spec["Subtype"])
else:
if settings.STRICT:
raise PDFFontError("Font Subtype is not specified.")
subtype = "Type1"
if subtype in ("Type1", "MMType1"):
# Type1 Font
font = PDFType1Font(spec)
elif subtype == "TrueType":
# TrueType Font
font = PDFTrueTypeFont(spec)
elif subtype == "Type3":
# Type3 Font
font = PDFType3Font(spec)
elif subtype in ("CIDFontType0", "CIDFontType2"):
# CID Font
font = PDFCIDFont(spec)
elif subtype == "Type0":
# Type0 Font
dfonts = list_value(spec["DescendantFonts"])
assert dfonts
subspec = dict_value(dfonts[0]).copy()
for k in ("Encoding", "ToUnicode"):
if k in spec:
subspec[k] = resolve1(spec[k])
font = self.get_font(None, subspec)
else:
if settings.STRICT:
raise PDFFontError("Invalid Font spec: %r" % spec)
font = PDFType1Font(spec) # FIXME: this is so wrong!
if objid and self.caching:
self._cached_fonts[objid] = font
return font


class PDFDocument:
"""Representation of a PDF document on disk.
Expand Down
26 changes: 10 additions & 16 deletions playa/pdffont.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import struct
from io import BytesIO
from typing import (
TYPE_CHECKING,
Any,
BinaryIO,
Dict,
Expand All @@ -27,7 +26,14 @@
UnicodeMap,
)
from playa.encodingdb import EncodingDB, name2unicode
from playa.exceptions import PSEOF, PDFException, PDFKeyError, PDFValueError
from playa.exceptions import (
PSEOF,
PDFException,
PDFFontError,
PDFKeyError,
PDFUnicodeNotDefined,
PDFValueError,
)
from playa.fontmetrics import FONT_METRICS
from playa.pdftypes import (
PDFStream,
Expand All @@ -49,9 +55,6 @@
)
from playa.utils import Matrix, Point, Rect, apply_matrix_norm, choplist, nunpack

if TYPE_CHECKING:
from playa.pdfinterp import PDFResourceManager

log = logging.getLogger(__name__)


Expand Down Expand Up @@ -838,14 +841,6 @@ def create_unicode_map(self) -> FileUnicodeMap:
return unicode_map


class PDFFontError(PDFException):
pass


class PDFUnicodeNotDefined(PDFFontError):
pass


LITERAL_STANDARD_ENCODING = LIT("StandardEncoding")
LITERAL_TYPE1C = LIT("Type1C")

Expand Down Expand Up @@ -984,7 +979,7 @@ def to_unichr(self, cid: int) -> str:


class PDFType1Font(PDFSimpleFont):
def __init__(self, rsrcmgr: "PDFResourceManager", spec: Mapping[str, Any]) -> None:
def __init__(self, spec: Mapping[str, Any]) -> None:
try:
self.basefont = literal_name(spec["BaseFont"])
except KeyError:
Expand Down Expand Up @@ -1021,7 +1016,7 @@ def __repr__(self) -> str:


class PDFType3Font(PDFSimpleFont):
def __init__(self, rsrcmgr: "PDFResourceManager", spec: Mapping[str, Any]) -> None:
def __init__(self, spec: Mapping[str, Any]) -> None:
firstchar = int_value(spec.get("FirstChar", 0))
# lastchar = int_value(spec.get('LastChar', 0))
width_list = list_value(spec.get("Widths", [0] * 256))
Expand All @@ -1044,7 +1039,6 @@ class PDFCIDFont(PDFFont):

def __init__(
self,
rsrcmgr: "PDFResourceManager",
spec: Mapping[str, Any],
strict: bool = settings.STRICT,
) -> None:
Expand Down
Loading

0 comments on commit 5585e9e

Please sign in to comment.