Skip to content

Commit

Permalink
fix: fix bug in directory selection for proof image
Browse files Browse the repository at this point in the history
Solves #615

When the proof image folder reached the limit of 1000, we created
a new image dir and stored the image in it, but without the 0 padding.
This directory was never used again.
That's why we had directories named `1`, `2`,... `10` in the image dir.
  • Loading branch information
raphael0202 committed Dec 11, 2024
1 parent 62ecaab commit fce20b5
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 14 deletions.
37 changes: 36 additions & 1 deletion open_prices/proofs/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
run_and_save_proof_prediction,
)
from open_prices.proofs.models import Proof
from open_prices.proofs.utils import fetch_and_save_ocr_data
from open_prices.proofs.utils import fetch_and_save_ocr_data, select_proof_image_dir

LOCATION_OSM_NODE_652825274 = {
"type": location_constants.TYPE_OSM,
Expand Down Expand Up @@ -473,3 +473,38 @@ def test_run_and_save_proof_prediction_proof(self):
proof_type_prediction.delete()
price_tag_prediction.delete()
proof.delete()


class TestSelectProofImageDir(TestCase):
def test_select_proof_image_dir_no_dir(self):
with tempfile.TemporaryDirectory() as tmpdir:
images_dir = Path(tmpdir) / "images"
images_dir.mkdir()
selected_dir = select_proof_image_dir(images_dir)
self.assertEqual(selected_dir, images_dir / "0001")

def test_select_proof_image_dir_existing_dir(self):
with tempfile.TemporaryDirectory() as tmpdir:
images_dir = Path(tmpdir) / "images"
images_dir.mkdir()
(images_dir / "0001").mkdir()
selected_dir = select_proof_image_dir(images_dir)
self.assertEqual(selected_dir, images_dir / "0001")

def test_select_proof_image_dir_existing_dir_second_dir(self):
with tempfile.TemporaryDirectory() as tmpdir:
images_dir = Path(tmpdir) / "images"
images_dir.mkdir()
(images_dir / "0001").mkdir()
(images_dir / "0002").mkdir()
selected_dir = select_proof_image_dir(images_dir)
self.assertEqual(selected_dir, images_dir / "0002")

def test_select_proof_image_dir_existing_dir_create_new_dir(self):
with tempfile.TemporaryDirectory() as tmpdir:
images_dir = Path(tmpdir) / "images"
images_dir.mkdir()
(images_dir / "0001").mkdir()
(images_dir / "0001" / "0001.jpg").touch()
selected_dir = select_proof_image_dir(images_dir, max_images_per_dir=1)
self.assertEqual(selected_dir, images_dir / "0002")
40 changes: 27 additions & 13 deletions open_prices/proofs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,31 +110,45 @@ def store_file(
# We store the images in directories containing up to 1000 images
# Once we reach 1000 images, we create a new directory by increasing the directory ID # noqa
# This is used to prevent the base image directory from containing too many files # noqa
images_dir = settings.IMAGES_DIR
current_dir_id = max(
(int(p.name) for p in images_dir.iterdir() if p.is_dir() and p.name.isdigit()),
default=1,
)
current_dir_id_str = f"{current_dir_id:04d}"
current_dir = images_dir / current_dir_id_str
if current_dir.exists() and len(list(current_dir.iterdir())) >= 1_000:
# if the current directory contains 1000 images, we create a new one
current_dir_id += 1
current_dir = images_dir / str(current_dir_id)
current_dir = select_proof_image_dir(settings.IMAGES_DIR)
current_dir.mkdir(exist_ok=True, parents=True)
file_full_path = generate_full_path(current_dir, file_stem, extension)
# write the content of the file to the new file
with file_full_path.open("wb") as f:
f.write(file.file.read())
# create a thumbnail
image_thumb_path = generate_thumbnail(
current_dir, current_dir_id_str, file_stem, extension, mimetype
current_dir, current_dir.name, file_stem, extension, mimetype
)
# Build file_path
file_path = generate_relative_path(current_dir_id_str, file_stem, extension)
file_path = generate_relative_path(current_dir.name, file_stem, extension)
return (file_path, mimetype, image_thumb_path)


def select_proof_image_dir(images_dir: Path, max_images_per_dir: int = 1_000) -> Path:
""" "Select the directory where to store the image.
We create a new directory when the current one contains more than 1000
images. The directories are named with a 4-digit number, starting at 0001.
:param images_dir: the directory where the images are stored
:param max_images_per_dir: the maximum number of images per directory
:return: the selected directory
"""
current_dir_id = max(
(int(p.name) for p in images_dir.iterdir() if p.is_dir() and p.name.isdigit()),
default=1,
)
current_dir_id_str = f"{current_dir_id:04d}"
current_dir = images_dir / current_dir_id_str
if current_dir.exists() and len(list(current_dir.iterdir())) >= max_images_per_dir:
# if the current directory contains 1000 images, we create a new one
current_dir_id += 1
current_dir_id_str = f"{current_dir_id:04d}"
current_dir = images_dir / current_dir_id_str
return current_dir


def run_ocr_on_image(image_path: Path | str, api_key: str) -> dict[str, Any] | None:
"""Run Google Cloud Vision OCR on the image stored at the given path.
Expand Down

0 comments on commit fce20b5

Please sign in to comment.