From 2dc75d665bb0263dfcea08fb97e911e1f0412015 Mon Sep 17 00:00:00 2001 From: Dominik Macko Date: Wed, 18 Oct 2023 17:14:53 +0200 Subject: [PATCH] Fix: take only span text not whole dictionary --- src/sec_certs/utils/pdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sec_certs/utils/pdf.py b/src/sec_certs/utils/pdf.py index f651057d..d94398d3 100644 --- a/src/sec_certs/utils/pdf.py +++ b/src/sec_certs/utils/pdf.py @@ -175,7 +175,7 @@ def segmented_pdf_to_text(segmented_pdf: list[dict[str, Any]]) -> str: for line in block["lines"]: spans = [] for span in line["spans"]: - spans.append(span.strip()) + spans.append(span["text"].strip()) line = " ".join(spans) if len(line.strip()) > 0: lines.append(line)