Skip to content

Commit

Permalink
Merge pull request #115 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
OCR speedup
  • Loading branch information
VikParuchuri authored May 27, 2024
2 parents 80889bd + 1eb828a commit 31e36e7
Show file tree
Hide file tree
Showing 11 changed files with 754 additions and 413 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,16 @@ rec_model, rec_processor = load_model(), load_processor()
predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
```

### Compilation

The OCR model can be compiled to get an ~15% speedup in total inference time. The first run will be slow while it compiles, though. First set `RECOGNITION_STATIC_CACHE=true`, then:

```python
import torch

rec_model.decoder.model.decoder = torch.compile(rec_model.decoder.model.decoder)
```

## Text line detection

This command will write out a json file with the detected bboxes.
Expand Down
11 changes: 11 additions & 0 deletions benchmark/recognition.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
from collections import defaultdict

import torch

from benchmark.scoring import overlap_score
from surya.model.recognition.model import load_model as load_recognition_model
from surya.model.recognition.processor import load_processor as load_recognition_processor
Expand All @@ -26,8 +28,12 @@ def main():
parser.add_argument("--tesseract", action="store_true", help="Run tesseract instead of surya.", default=False)
parser.add_argument("--langs", type=str, help="Specify certain languages to benchmark.", default=None)
parser.add_argument("--tess_cpus", type=int, help="Number of CPUs to use for tesseract.", default=28)
parser.add_argument("--compile", action="store_true", help="Compile the model.", default=False)
args = parser.parse_args()

if args.compile:
assert settings.RECOGNITION_STATIC_CACHE, "You must set RECOGNITION_STATIC_CACHE to compile the model."

rec_model = load_recognition_model()
rec_processor = load_recognition_processor()

Expand Down Expand Up @@ -56,6 +62,11 @@ def main():
else:
lang_list.append(l)

if args.compile:
rec_model.decoder.model.decoder = torch.compile(rec_model.decoder.model.decoder)
# Run through one batch to compile the model
run_recognition(images[:1], lang_list[:1], rec_model, rec_processor, bboxes=bboxes[:1])

start = time.time()
predictions_by_image = run_recognition(images, lang_list, rec_model, rec_processor, bboxes=bboxes)
surya_time = time.time() - start
Expand Down
2 changes: 2 additions & 0 deletions ocr_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import json
from collections import defaultdict

import torch

from surya.input.langs import replace_lang_with_code, get_unique_langs
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.model.detection.segformer import load_model as load_detection_model, load_processor as load_detection_processor
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "surya-ocr"
version = "0.4.8"
version = "0.4.9"
description = "OCR, layout, reading order, and line detection in 90+ languages"
authors = ["Vik Paruchuri <[email protected]>"]
readme = "README.md"
Expand Down
5 changes: 2 additions & 3 deletions surya/input/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,9 @@ def split_image(img, processor):
def prepare_image_detection(img, processor):
new_size = (processor.size["width"], processor.size["height"])

img.thumbnail(new_size, Image.Resampling.LANCZOS) # Shrink largest dimension to fit new size
img = img.resize(new_size, Image.Resampling.LANCZOS) # Stretch smaller dimension to fit new size

img = np.asarray(img, dtype=np.uint8)
img = cv2.resize(img, new_size, interpolation=cv2.INTER_LANCZOS4)

img = processor(img)["pixel_values"][0]
img = torch.from_numpy(img)
return img
Expand Down
448 changes: 121 additions & 327 deletions surya/model/recognition/decoder.py

Large diffs are not rendered by default.

Loading

0 comments on commit 31e36e7

Please sign in to comment.