Skip to content

Commit

Permalink
Merge pull request #270 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
Update layout model
  • Loading branch information
VikParuchuri authored Dec 30, 2024
2 parents 0774cef + 76754bc commit ac03917
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "surya-ocr"
version = "0.8.1"
version = "0.8.2"
description = "OCR, layout, reading order, and table recognition in 90+ languages"
authors = ["Vik Paruchuri <[email protected]>"]
readme = "README.md"
Expand Down
5 changes: 4 additions & 1 deletion surya/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,10 @@ def batch_layout_detection(images: List, model, processor, batch_size=None, top_
prediction["pause_tokens"] = last_prediction["pause_tokens"]
prediction["token"].fill_(model.decoder.config.pause_token_id)
batch_decoder_input[j, :] = model.decoder.config.pause_token_id
elif intersects_other_boxes(prediction["polygon"], [p["polygon"] for p in batch_predictions[j]], thresh=.4):
elif intersects_other_boxes(
prediction["polygon"],
[p["polygon"] for p in batch_predictions[j]], thresh=.4
) and model.decoder.config.max_pause_tokens > 0:
prediction["paused"] = True
prediction["pause_tokens"] = 1
prediction["token"].fill_(model.decoder.config.pause_token_id)
Expand Down
4 changes: 2 additions & 2 deletions surya/model/layout/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ def __init__(
aux_heads=0, # How many n-token-ahead heads to add
causal=True,
layer_norm_eps=1e-5,
pause_token_count=5,
max_pause_tokens=3,
pause_token_count=0,
max_pause_tokens=0,
**kwargs,
):
self.num_hidden_layers = num_hidden_layers
Expand Down
2 changes: 1 addition & 1 deletion surya/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def TORCH_DEVICE_MODEL(self) -> str:
RECOGNITION_ENCODER_BATCH_DIVISOR: int = 1 # Divisor for batch size in decoder

# Layout
LAYOUT_MODEL_CHECKPOINT: str = "datalab-to/surya_layout0"
LAYOUT_MODEL_CHECKPOINT: str = "datalab-to/surya_layout"
LAYOUT_IMAGE_SIZE: Dict = {"height": 768, "width": 768}
LAYOUT_SLICE_MIN: Dict = {"height": 1500, "width": 1500} # When to start slicing images
LAYOUT_SLICE_SIZE: Dict = {"height": 1200, "width": 1200} # Size of slices
Expand Down

0 comments on commit ac03917

Please sign in to comment.