diff --git a/src/sec_certs/utils/pdf.py b/src/sec_certs/utils/pdf.py index f651057d..d94398d3 100644 --- a/src/sec_certs/utils/pdf.py +++ b/src/sec_certs/utils/pdf.py @@ -175,7 +175,7 @@ def segmented_pdf_to_text(segmented_pdf: list[dict[str, Any]]) -> str: for line in block["lines"]: spans = [] for span in line["spans"]: - spans.append(span.strip()) + spans.append(span["text"].strip()) line = " ".join(spans) if len(line.strip()) > 0: lines.append(line)