Skip to content

Commit

Permalink
feat: add average hash support
Browse files Browse the repository at this point in the history
  • Loading branch information
pradishb committed Oct 20, 2023
1 parent 9a7fc70 commit ce80fab
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ pip install hash-ocr
import cv2

from hash_ocr import compute_distances
from hash_ocr import get_characters
from hash_ocr import get_word

img = cv2.imread("test_data/382.png", cv2.IMREAD_GRAYSCALE)
Expand Down
16 changes: 16 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import cv2

from hash_ocr import compute_distances
from hash_ocr import get_word

img = cv2.imread("test_data/382.png", cv2.IMREAD_GRAYSCALE)
img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)[1]

print(get_word(img))
# 382

for d in compute_distances(img):
print(d)
# [('3', 24.0), ('8', 66.0), ('2', 74.0), ('7', 77.0), ...]
# [('8', 24.0), ('6', 60.0), ('0', 62.0), ('3', 68.0), ...]
# [('2', 20.0), ('3', 70.0), ('1', 76.0), ('7', 85.0), ...]
38 changes: 29 additions & 9 deletions hash_ocr/ocr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
from functools import lru_cache
from typing import Literal
from typing import Optional
from typing import cast

Expand All @@ -11,7 +12,12 @@

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DEFAULT_MODEL = os.path.join(BASE_DIR, "model", "digits.png")
hash_obj = cv2.img_hash.BlockMeanHash.create()

# BlockMeanHash is 10x slower than AverageHash
img_hash = {
"average": cv2.img_hash.AverageHash.create(),
"block_mean": cv2.img_hash.BlockMeanHash.create(),
}

Hash = npt.NDArray[np.uint8]

Expand Down Expand Up @@ -46,14 +52,18 @@ def get_model(file_path: str = DEFAULT_MODEL):
return output


def compute_hash(img: MatLike) -> npt.NDArray[np.uint8]:
hsh = hash_obj.compute(img)
def compute_hash(
img: MatLike, method: Literal["average", "block_mean"] = "block_mean"
) -> npt.NDArray[np.uint8]:
hsh = img_hash[method].compute(img)
hsh = cast(npt.NDArray[np.uint8], hsh)
return hsh


def compute_distances(
threshed_img: MatLike, model_path: str = DEFAULT_MODEL
threshed_img: MatLike,
model_path: str = DEFAULT_MODEL,
method: Literal["average", "block_mean"] = "block_mean",
) -> list[list[tuple[str, float]]]:
model = get_model(file_path=model_path)
cnts, _ = cv2.findContours(
Expand All @@ -66,17 +76,24 @@ def compute_distances(
x, y, w, h = cv2.boundingRect(cnt)
letter_img: MatLike = threshed_img[y : y + h, x : x + w]
hsh = compute_hash(letter_img)
data = [(char, hash_obj.compare(hsh, hsh1)) for char, hsh1 in model]
data = [
(char, img_hash[method].compare(hsh, hsh1)) for char, hsh1 in model
]
data.sort(key=lambda d: d[1])
output.append(data)
return output


def get_characters(
threshed_img: MatLike, model_path: str = DEFAULT_MODEL, max_dist: int = 80
threshed_img: MatLike,
model_path: str = DEFAULT_MODEL,
max_dist: int = 80,
method: Literal["average", "block_mean"] = "block_mean",
) -> list[tuple[str, float]]:
output: list[tuple[str, float]] = []
distances = compute_distances(threshed_img, model_path=model_path)
distances = compute_distances(
threshed_img, model_path=model_path, method=method
)
for data in distances:
data = [d for d in data if d[1] <= max_dist]
if data == []:
Expand All @@ -87,13 +104,16 @@ def get_characters(


def get_word(
threshed_img: MatLike, model_path: str = DEFAULT_MODEL, max_dist: int = 80
threshed_img: MatLike,
model_path: str = DEFAULT_MODEL,
max_dist: int = 80,
method: Literal["average", "block_mean"] = "block_mean",
) -> str:
"""
This function assumes that the image contains only one word
"""
chars = get_characters(
threshed_img, model_path=model_path, max_dist=max_dist
threshed_img, model_path=model_path, max_dist=max_dist, method=method
)
text = "".join(char for char, _ in chars)
return text

0 comments on commit ce80fab

Please sign in to comment.