Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes and pdftext improvements #259

Merged
merged 6 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,15 @@ Setting the `LAYOUT_BATCH_SIZE` env var properly will make a big difference when
from PIL import Image
from surya.detection import batch_text_detection
from surya.layout import batch_layout_detection
from surya.model.layout.model import load_model, load_processor
from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
from surya.model.layout.model import load_model as load_layout_model
from surya.model.layout.processor import load_processor as load_layout_processor

image = Image.open(IMAGE_PATH)
model = load_model()
processor = load_processor()
det_model = load_model()
det_processor = load_processor()
model = load_layout_model()
processor = load_layout_processor()
det_model = load_det_model()
det_processor = load_det_processor()

# layout_predictions is a list of dicts, one per image
line_predictions = batch_text_detection([image], det_model, det_processor)
Expand Down
10 changes: 5 additions & 5 deletions ocr_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,22 +204,22 @@ def page_count(pdf_file):
if text_det:
det_img, pred = text_detection(pil_image)
with col1:
st.image(det_img, caption="Detected Text", use_column_width=True)
st.image(det_img, caption="Detected Text", use_container_width=True)
st.json(pred.model_dump(exclude=["heatmap", "affinity_map"]), expanded=True)


# Run layout
if layout_det:
layout_img, pred = layout_detection(pil_image)
with col1:
st.image(layout_img, caption="Detected Layout", use_column_width=True)
st.image(layout_img, caption="Detected Layout", use_container_width=True)
st.json(pred.model_dump(exclude=["segmentation_map"]), expanded=True)

# Run OCR
if text_rec:
rec_img, pred = ocr(pil_image, pil_image_highres, languages)
with col1:
st.image(rec_img, caption="OCR Result", use_column_width=True)
st.image(rec_img, caption="OCR Result", use_container_width=True)
json_tab, text_tab = st.tabs(["JSON", "Text Lines (for debugging)"])
with json_tab:
st.json(pred.model_dump(), expanded=True)
Expand All @@ -230,8 +230,8 @@ def page_count(pdf_file):
if table_rec:
table_img, pred = table_recognition(pil_image, pil_image_highres, in_file, page_number - 1 if page_number else None, use_pdf_boxes, skip_table_detection)
with col1:
st.image(table_img, caption="Table Recognition", use_column_width=True)
st.image(table_img, caption="Table Recognition", use_container_width=True)
st.json([p.model_dump() for p in pred], expanded=True)

with col2:
st.image(pil_image, caption="Uploaded Image", use_column_width=True)
st.image(pil_image, caption="Uploaded Image", use_container_width=True)
Loading
Loading