Skip to content

Commit

Permalink
fix: Make sure to clear textobj!!!
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Dec 2, 2024
1 parent acbc65f commit 541e7ed
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions playa/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1975,7 +1975,6 @@ class TextObject(ContentObject):

textstate: TextState
items: List[TextItem]
_glyphs: Union[List[GlyphObject], None] = None
_chars: Union[List[str], None] = None

def _render_char(
Expand Down Expand Up @@ -2061,6 +2060,9 @@ def chars(self) -> str:
if self._chars is not None:
return "".join(self._chars)
self._chars = []
# This is not strictly necessary since we don't care about
# positioning, but perhaps we might in the future
self.textstate.reset()
for item in self.items:
# Only TJ and Tf are relevant to Unicode output
if item.operator == "TJ":
Expand All @@ -2082,6 +2084,8 @@ def chars(self) -> str:

def __iter__(self) -> Iterator[GlyphObject]:
"""Generate glyphs for this text object"""
# This corresponds to a BT operator so reset the textstate
self.textstate.reset()
for item in self.items:
if item.operator == "TJ":
for glyph in self._render_string(item):
Expand Down Expand Up @@ -2228,8 +2232,11 @@ def do_b_a(self) -> Iterator[ContentObject]:
def do_BT(self) -> None:
"""Update text state and begin text object.
All operators until ET will be normalized, but executed lazily.
First we handle any operarors that were seen before BT, so as
to get the initial textstate. Next, we collect any subsequent
operators until ET, and then execute them lazily.
"""
log.debug("executing ops before BT: %r", self.textobj)
for item in self.textobj:
self.textstate.update(item.operator, *item.args)
self.textobj = []
Expand All @@ -2246,8 +2253,13 @@ def do_ET(self) -> Iterator[ContentObject]:
if has_text:
yield self.create(TextObject, textstate=self.textstate, items=self.textobj)
else:
# We will not create a text object, so make sure to update
# the text/graphics state with anything we saw inside BT/ET
self.textstate.reset()
for item in self.textobj:
self.textstate.update(item.operator, *item.args)
# Make sure to clear textobj!!!
self.textobj = []

def do_Tc(self, space: PDFObject) -> None:
"""Set character spacing.
Expand Down

0 comments on commit 541e7ed

Please sign in to comment.