Skip to content

Commit

Permalink
fix: add get_words_in_area function for OCR
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Oct 31, 2023
1 parent f459633 commit 2ea5e27
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 0 deletions.
93 changes: 93 additions & 0 deletions openfoodfacts/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,20 @@ def get_words_from_indices(
start_idx, end_idx, raises
)

def get_words_in_area(
self, bounding_box: Tuple[int, int, int, int]
) -> Optional[List["Word"]]:
"""Return the list of words that are in the provided area.
:param bounding_box: a bounding box with absolute coordinates
:return: the list of words that are included in `bounding_box` or None
if full text annotation is not available
"""
if self.full_text_annotation:
return self.full_text_annotation.get_words_in_area(bounding_box)

return None

def pprint(self):
"""Pretty print the full text annotation, if it is not null."""
if self.full_text_annotation:
Expand Down Expand Up @@ -484,6 +498,19 @@ def _generate_pretty_print_string(self) -> str:
strings.append(f" {repr(text)}")
return "\n".join(strings)

def get_words_in_area(
self, bounding_box: Tuple[int, int, int, int]
) -> List["Word"]:
"""Return the list of words that are in the provided area.
:param bounding_box: a bounding box with absolute coordinates
:return: the list of words that are included in `bounding_box`
"""
words = []
for page in self.pages:
words += page.get_words_in_area(bounding_box)
return words


class TextAnnotationPage:
"""Detected page from OCR."""
Expand Down Expand Up @@ -554,6 +581,19 @@ def get_words_from_indices(
break
return selected, remaining

def get_words_in_area(
self, bounding_box: Tuple[int, int, int, int]
) -> List["Word"]:
"""Return the list of words of the page that are in the provided area.
:param bounding_box: a bounding box with absolute coordinates
:return: the list of words that are included in `bounding_box`
"""
words = []
for block in self.blocks:
words += block.get_words_in_area(bounding_box)
return words


class Block:
"""Logical element on the page."""
Expand Down Expand Up @@ -651,6 +691,19 @@ def get_words_from_indices(
break
return selected, remaining

def get_words_in_area(
self, bounding_box: Tuple[int, int, int, int]
) -> List["Word"]:
"""Return the list of words of the block that are in the provided area.
:param bounding_box: a bounding box with absolute coordinates
:return: the list of words that are included in `bounding_box`
"""
words = []
for paragraph in self.paragraphs:
words += paragraph.get_words_in_area(bounding_box)
return words


class Paragraph:
"""Structural unit of text representing a number of words in certain
Expand Down Expand Up @@ -728,6 +781,17 @@ def get_words_from_indices(

return selected, remaining

def get_words_in_area(
self, bounding_box: Tuple[int, int, int, int]
) -> List["Word"]:
"""Return the list of words of the paragraph that are in the provided
area.
:param bounding_box: a bounding box with absolute coordinates
:return: the list of words that are included in `bounding_box`
"""
return get_words_in_area(self.words, bounding_box)


class Word:
"""A word representation."""
Expand Down Expand Up @@ -1004,6 +1068,35 @@ def compute_words_union_bounding_box(words: List[Word]) -> Tuple[int, int, int,
return (y_min, x_min, y_max, x_max) # type: ignore


def get_words_in_area(
words: List[Word], bounding_box: Tuple[int, int, int, int]
) -> List[Word]:
"""Return the list of words that are in the provided area.
:param words: a list of words
:param bounding_box: a bounding box with absolute coordinates
:return: the list of words that are included in `bounding_box`
"""
bb_y_min, bb_x_min, bb_y_max, bb_x_max = bounding_box
selected = []
for word in words:
vertices = word.bounding_poly.vertices
x_min = min(v[0] for v in vertices)
y_min = min(v[1] for v in vertices)
x_max = max(v[0] for v in vertices)
y_max = max(v[1] for v in vertices)

if (
x_min >= bb_x_min
and x_max <= bb_x_max
and y_min >= bb_y_min
and y_max <= bb_y_max
):
selected.append(word)

return selected


class OCRTextAnnotation:
__slots__ = ("locale", "text", "bounding_poly")

Expand Down
49 changes: 49 additions & 0 deletions tests/test_ocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import Optional

import pytest

from openfoodfacts.ocr import OCRResult


@pytest.mark.parametrize(
"ocr_url, bounding_box, expected_text",
[
(
# It corresponds to this OCR crop: https://robotoff.openfoodfacts.org/api/v1/images/crop?image_url=https://images.openfoodfacts.org/images/products/089/000/000/1202/1.jpg&y_min=0.08416666666666667&x_min=0.30077691453940064&y_max=0.09583333333333334&x_max=0.37735849056603776
"https://raw.githubusercontent.com/openfoodfacts/test-data/main/openfoodfacts-python/tests/unit/0890000001202_1.json",
[101, 271, 115, 340],
"Materne",
),
(
# same, but the bounding box is distinct from the logo area
"https://raw.githubusercontent.com/openfoodfacts/test-data/main/openfoodfacts-python/tests/unit/0890000001202_1.json",
[120, 271, 134, 340],
None,
),
(
# same, but the bounding box is distinct from the logo area
"https://raw.githubusercontent.com/openfoodfacts/test-data/main/openfoodfacts-python/tests/unit/0890000001202_1.json",
[120, 271, 134, 340],
None,
),
(
# [0.2808293402194977,0.37121888995170593,0.35544055700302124,0.49409016966819763]
# /540/091/030/1160/1.jpg
"https://raw.githubusercontent.com/openfoodfacts/test-data/main/openfoodfacts-python/tests/unit/5400910301160_1.json",
[337, 327, 427, 436],
"NUTRIDIA",
),
],
)
def test_get_words_in_area(
ocr_url: str, bounding_box: list[int, int, int, int], expected_text: Optional[str]
):
ocr_result = OCRResult.from_url(ocr_url)
words = ocr_result.get_words_in_area(bounding_box)

if expected_text is None:
assert words == []
else:
assert words is not None
assert len(words) == 1
assert words[0].text.strip() == expected_text

0 comments on commit 2ea5e27

Please sign in to comment.