Skip to content

Commit

Permalink
Merge pull request #199 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
Fix error with images
  • Loading branch information
VikParuchuri authored Oct 8, 2024
2 parents b76b19e + 7af11c1 commit 986677b
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
10 changes: 8 additions & 2 deletions ocr_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pypdfium2
import streamlit as st
from pypdfium2 import PdfiumError

from surya.detection import batch_text_detection
from surya.input.pdflines import get_page_text_lines, get_table_blocks
from surya.layout import batch_layout_detection
Expand Down Expand Up @@ -93,8 +95,12 @@ def table_recognition(img, highres_img, filepath, page_idx: int, use_pdf_boxes:
)
layout_tables.append(highres_bbox)

page_text = get_page_text_lines(filepath, [page_idx], [highres_img.size])[0]
table_bboxes = get_table_blocks(layout_tables, page_text, highres_img.size)
try:
page_text = get_page_text_lines(filepath, [page_idx], [highres_img.size])[0]
table_bboxes = get_table_blocks(layout_tables, page_text, highres_img.size)
except PdfiumError:
# This happens when we try to get text from an image
table_bboxes = [[] for _ in layout_tables]

if not use_pdf_boxes or any(len(tb) == 0 for tb in table_bboxes):
det_results = batch_text_detection(table_imgs, det_model, det_processor)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "surya-ocr"
version = "0.6.0"
version = "0.6.1"
description = "OCR, layout, reading order, and table recognition in 90+ languages"
authors = ["Vik Paruchuri <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit 986677b

Please sign in to comment.