From 72bed4152a0228ed5b7ec56e990c5475937cd6a8 Mon Sep 17 00:00:00 2001 From: Felix Dittrich Date: Mon, 25 Nov 2024 10:58:13 +0100 Subject: [PATCH] [typings] Replace deprecated typing with built-in types (#1794) --- .github/verify_pr_labels.py | 4 +- api/app/routes/detection.py | 5 +- api/app/routes/kie.py | 5 +- api/app/routes/ocr.py | 5 +- api/app/routes/recognition.py | 5 +- api/app/schemas.py | 36 +++--- api/app/utils.py | 8 +- api/app/vision.py | 4 +- doctr/contrib/artefacts.py | 12 +- doctr/contrib/base.py | 14 +-- doctr/datasets/cord.py | 8 +- doctr/datasets/datasets/base.py | 29 ++--- doctr/datasets/datasets/pytorch.py | 6 +- doctr/datasets/datasets/tensorflow.py | 6 +- doctr/datasets/detection.py | 10 +- doctr/datasets/doc_artefacts.py | 4 +- doctr/datasets/funsd.py | 6 +- doctr/datasets/generator/base.py | 29 ++--- doctr/datasets/ic03.py | 4 +- doctr/datasets/ic13.py | 4 +- doctr/datasets/iiit5k.py | 4 +- doctr/datasets/iiithws.py | 4 +- doctr/datasets/imgur5k.py | 4 +- doctr/datasets/loader.py | 6 +- doctr/datasets/mjsynth.py | 4 +- doctr/datasets/ocr.py | 4 +- doctr/datasets/orientation.py | 4 +- doctr/datasets/recognition.py | 4 +- doctr/datasets/sroie.py | 4 +- doctr/datasets/svhn.py | 4 +- doctr/datasets/svt.py | 4 +- doctr/datasets/synthtext.py | 4 +- doctr/datasets/utils.py | 26 ++-- doctr/datasets/vocabs.py | 3 +- doctr/datasets/wildreceipt.py | 6 +- doctr/file_utils.py | 3 +- doctr/io/elements.py | 112 +++++++++--------- doctr/io/image/base.py | 3 +- doctr/io/image/pytorch.py | 3 +- doctr/io/image/tensorflow.py | 3 +- doctr/io/pdf.py | 8 +- doctr/io/reader.py | 8 +- doctr/models/_utils.py | 22 ++-- doctr/models/builder.py | 58 ++++----- .../classification/magc_resnet/pytorch.py | 18 +-- .../classification/magc_resnet/tensorflow.py | 14 +-- .../classification/mobilenet/pytorch.py | 8 +- .../classification/mobilenet/tensorflow.py | 14 +-- .../classification/predictor/pytorch.py | 9 +- .../classification/predictor/tensorflow.py | 11 +- doctr/models/classification/resnet/pytorch.py | 39 +++--- .../classification/resnet/tensorflow.py | 37 +++--- .../models/classification/textnet/pytorch.py | 18 +-- .../classification/textnet/tensorflow.py | 14 +-- doctr/models/classification/vgg/pytorch.py | 8 +- doctr/models/classification/vgg/tensorflow.py | 16 +-- doctr/models/classification/vit/pytorch.py | 14 +-- doctr/models/classification/vit/tensorflow.py | 10 +- doctr/models/classification/zoo.py | 6 +- doctr/models/core.py | 4 +- doctr/models/detection/_utils/base.py | 7 +- doctr/models/detection/core.py | 3 +- .../differentiable_binarization/base.py | 11 +- .../differentiable_binarization/pytorch.py | 31 ++--- .../differentiable_binarization/tensorflow.py | 28 ++--- doctr/models/detection/fast/base.py | 9 +- doctr/models/detection/fast/pytorch.py | 25 ++-- doctr/models/detection/fast/tensorflow.py | 24 ++-- doctr/models/detection/linknet/base.py | 9 +- doctr/models/detection/linknet/pytorch.py | 25 ++-- doctr/models/detection/linknet/tensorflow.py | 26 ++-- doctr/models/detection/predictor/pytorch.py | 6 +- .../models/detection/predictor/tensorflow.py | 8 +- doctr/models/detection/zoo.py | 4 +- doctr/models/kie_predictor/base.py | 6 +- doctr/models/kie_predictor/pytorch.py | 22 ++-- doctr/models/kie_predictor/tensorflow.py | 22 ++-- doctr/models/modules/layers/pytorch.py | 11 +- doctr/models/modules/layers/tensorflow.py | 10 +- doctr/models/modules/transformer/pytorch.py | 13 +- .../models/modules/transformer/tensorflow.py | 13 +- .../modules/vision_transformer/pytorch.py | 3 +- .../modules/vision_transformer/tensorflow.py | 4 +- doctr/models/predictor/base.py | 53 +++++---- doctr/models/predictor/pytorch.py | 4 +- doctr/models/predictor/tensorflow.py | 6 +- doctr/models/preprocessor/pytorch.py | 14 +-- doctr/models/preprocessor/tensorflow.py | 16 +-- doctr/models/recognition/core.py | 5 +- doctr/models/recognition/crnn/pytorch.py | 25 ++-- doctr/models/recognition/crnn/tensorflow.py | 20 ++-- doctr/models/recognition/master/base.py | 5 +- doctr/models/recognition/master/pytorch.py | 23 ++-- doctr/models/recognition/master/tensorflow.py | 22 ++-- doctr/models/recognition/parseq/base.py | 5 +- doctr/models/recognition/parseq/pytorch.py | 31 ++--- doctr/models/recognition/parseq/tensorflow.py | 28 ++--- doctr/models/recognition/predictor/_utils.py | 13 +- doctr/models/recognition/predictor/pytorch.py | 7 +- .../recognition/predictor/tensorflow.py | 8 +- doctr/models/recognition/sar/pytorch.py | 23 ++-- doctr/models/recognition/sar/tensorflow.py | 22 ++-- doctr/models/recognition/utils.py | 5 +- doctr/models/recognition/vitstr/base.py | 5 +- doctr/models/recognition/vitstr/pytorch.py | 19 +-- doctr/models/recognition/vitstr/tensorflow.py | 22 ++-- doctr/models/recognition/zoo.py | 4 +- doctr/models/utils/pytorch.py | 16 +-- doctr/models/utils/tensorflow.py | 17 +-- doctr/transforms/functional/base.py | 7 +- doctr/transforms/functional/pytorch.py | 9 +- doctr/transforms/functional/tensorflow.py | 16 +-- doctr/transforms/modules/base.py | 27 +++-- doctr/transforms/modules/pytorch.py | 21 ++-- doctr/transforms/modules/tensorflow.py | 29 ++--- doctr/utils/common_types.py | 15 ++- doctr/utils/data.py | 13 +- doctr/utils/fonts.py | 5 +- doctr/utils/geometry.py | 25 ++-- doctr/utils/metrics.py | 21 ++-- doctr/utils/multithreading.py | 5 +- doctr/utils/reconstitution.py | 16 +-- doctr/utils/repr.py | 3 +- doctr/utils/visualization.py | 30 ++--- references/detection/utils.py | 3 +- tests/tensorflow/test_datasets_loader_tf.py | 4 +- 126 files changed, 856 insertions(+), 880 deletions(-) diff --git a/.github/verify_pr_labels.py b/.github/verify_pr_labels.py index 7167b405dc..470751ed4c 100644 --- a/.github/verify_pr_labels.py +++ b/.github/verify_pr_labels.py @@ -11,7 +11,7 @@ with no labeling responsibility, so we don't want to bother them. """ -from typing import Any, Set, Tuple +from typing import Any import requests @@ -54,7 +54,7 @@ def query_repo(cmd: str, *, accept) -> Any: return response.json() -def get_pr_merger_and_labels(pr_number: int) -> Tuple[str, Set[str]]: +def get_pr_merger_and_labels(pr_number: int) -> tuple[str, set[str]]: # See https://docs.github.com/en/rest/reference/pulls#get-a-pull-request data = query_repo(f"pulls/{pr_number}", accept="application/vnd.github.v3+json") merger = data.get("merged_by", {}).get("login") diff --git a/api/app/routes/detection.py b/api/app/routes/detection.py index b55af39948..1c854e0e29 100644 --- a/api/app/routes/detection.py +++ b/api/app/routes/detection.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status @@ -15,8 +14,8 @@ router = APIRouter() -@router.post("/", response_model=List[DetectionOut], status_code=status.HTTP_200_OK, summary="Perform text detection") -async def text_detection(request: DetectionIn = Depends(), files: List[UploadFile] = [File(...)]): +@router.post("/", response_model=list[DetectionOut], status_code=status.HTTP_200_OK, summary="Perform text detection") +async def text_detection(request: DetectionIn = Depends(), files: list[UploadFile] = [File(...)]): """Runs docTR text detection model to analyze the input image""" try: predictor = init_predictor(request) diff --git a/api/app/routes/kie.py b/api/app/routes/kie.py index e929a62016..73370e685a 100644 --- a/api/app/routes/kie.py +++ b/api/app/routes/kie.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status @@ -14,8 +13,8 @@ router = APIRouter() -@router.post("/", response_model=List[KIEOut], status_code=status.HTTP_200_OK, summary="Perform KIE") -async def perform_kie(request: KIEIn = Depends(), files: List[UploadFile] = [File(...)]): +@router.post("/", response_model=list[KIEOut], status_code=status.HTTP_200_OK, summary="Perform KIE") +async def perform_kie(request: KIEIn = Depends(), files: list[UploadFile] = [File(...)]): """Runs docTR KIE model to analyze the input image""" try: predictor = init_predictor(request) diff --git a/api/app/routes/ocr.py b/api/app/routes/ocr.py index c815a39e47..8822e39c69 100644 --- a/api/app/routes/ocr.py +++ b/api/app/routes/ocr.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status @@ -14,8 +13,8 @@ router = APIRouter() -@router.post("/", response_model=List[OCROut], status_code=status.HTTP_200_OK, summary="Perform OCR") -async def perform_ocr(request: OCRIn = Depends(), files: List[UploadFile] = [File(...)]): +@router.post("/", response_model=list[OCROut], status_code=status.HTTP_200_OK, summary="Perform OCR") +async def perform_ocr(request: OCRIn = Depends(), files: list[UploadFile] = [File(...)]): """Runs docTR OCR model to analyze the input image""" try: # generator object to list diff --git a/api/app/routes/recognition.py b/api/app/routes/recognition.py index 65de3e07ba..f8e0bcafd7 100644 --- a/api/app/routes/recognition.py +++ b/api/app/routes/recognition.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status @@ -15,9 +14,9 @@ @router.post( - "/", response_model=List[RecognitionOut], status_code=status.HTTP_200_OK, summary="Perform text recognition" + "/", response_model=list[RecognitionOut], status_code=status.HTTP_200_OK, summary="Perform text recognition" ) -async def text_recognition(request: RecognitionIn = Depends(), files: List[UploadFile] = [File(...)]): +async def text_recognition(request: RecognitionIn = Depends(), files: list[UploadFile] = [File(...)]): """Runs docTR text recognition model to analyze the input image""" try: predictor = init_predictor(request) diff --git a/api/app/schemas.py b/api/app/schemas.py index b231a740f9..6084c51603 100644 --- a/api/app/schemas.py +++ b/api/app/schemas.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Tuple, Union +from typing import Any from pydantic import BaseModel, Field @@ -54,21 +54,21 @@ class RecognitionOut(BaseModel): class DetectionOut(BaseModel): name: str = Field(..., examples=["example.jpg"]) - geometries: List[List[float]] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) + geometries: list[list[float]] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) class OCRWord(BaseModel): value: str = Field(..., examples=["example"]) - geometry: List[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) + geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) objectness_score: float = Field(..., examples=[0.99]) confidence: float = Field(..., examples=[0.99]) - crop_orientation: Dict[str, Any] = Field(..., examples=[{"value": 0, "confidence": None}]) + crop_orientation: dict[str, Any] = Field(..., examples=[{"value": 0, "confidence": None}]) class OCRLine(BaseModel): - geometry: List[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) + geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) objectness_score: float = Field(..., examples=[0.99]) - words: List[OCRWord] = Field( + words: list[OCRWord] = Field( ..., examples=[ { @@ -83,9 +83,9 @@ class OCRLine(BaseModel): class OCRBlock(BaseModel): - geometry: List[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) + geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]]) objectness_score: float = Field(..., examples=[0.99]) - lines: List[OCRLine] = Field( + lines: list[OCRLine] = Field( ..., examples=[ { @@ -105,7 +105,7 @@ class OCRBlock(BaseModel): class OCRPage(BaseModel): - blocks: List[OCRBlock] = Field( + blocks: list[OCRBlock] = Field( ..., examples=[ { @@ -133,10 +133,10 @@ class OCRPage(BaseModel): class OCROut(BaseModel): name: str = Field(..., examples=["example.jpg"]) - orientation: Dict[str, Union[float, None]] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}]) - language: Dict[str, Union[str, float, None]] = Field(..., examples=[{"value": "en", "confidence": 0.99}]) - dimensions: Tuple[int, int] = Field(..., examples=[(100, 100)]) - items: List[OCRPage] = Field( + orientation: dict[str, float | None] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}]) + language: dict[str, str | float | None] = Field(..., examples=[{"value": "en", "confidence": 0.99}]) + dimensions: tuple[int, int] = Field(..., examples=[(100, 100)]) + items: list[OCRPage] = Field( ..., examples=[ { @@ -164,7 +164,7 @@ class OCROut(BaseModel): class KIEElement(BaseModel): class_name: str = Field(..., examples=["example"]) - items: List[Dict[str, Union[str, List[float], float, Dict[str, Any]]]] = Field( + items: list[dict[str, str | list[float] | float | dict[str, Any]]] = Field( ..., examples=[ { @@ -180,7 +180,7 @@ class KIEElement(BaseModel): class KIEOut(BaseModel): name: str = Field(..., examples=["example.jpg"]) - orientation: Dict[str, Union[float, None]] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}]) - language: Dict[str, Union[str, float, None]] = Field(..., examples=[{"value": "en", "confidence": 0.99}]) - dimensions: Tuple[int, int] = Field(..., examples=[(100, 100)]) - predictions: List[KIEElement] + orientation: dict[str, float | None] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}]) + language: dict[str, str | float | None] = Field(..., examples=[{"value": "en", "confidence": 0.99}]) + dimensions: tuple[int, int] = Field(..., examples=[(100, 100)]) + predictions: list[KIEElement] diff --git a/api/app/utils.py b/api/app/utils.py index 472bcb2985..7e8c73d232 100644 --- a/api/app/utils.py +++ b/api/app/utils.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. -from typing import Any, List, Tuple, Union +from typing import Any import numpy as np from fastapi import UploadFile @@ -14,20 +14,20 @@ def resolve_geometry( geom: Any, -) -> Union[Tuple[float, float, float, float], Tuple[float, float, float, float, float, float, float, float]]: +) -> tuple[float, float, float, float] | tuple[float, float, float, float, float, float, float, float]: if len(geom) == 4: return (*geom[0], *geom[1], *geom[2], *geom[3]) return (*geom[0], *geom[1]) -async def get_documents(files: List[UploadFile]) -> Tuple[List[np.ndarray], List[str]]: # pragma: no cover +async def get_documents(files: list[UploadFile]) -> tuple[list[np.ndarray], list[str]]: # pragma: no cover """Convert a list of UploadFile objects to lists of numpy arrays and their corresponding filenames Args: files: list of UploadFile objects Returns: - Tuple[List[np.ndarray], List[str]]: list of numpy arrays and their corresponding filenames + tuple[list[np.ndarray], list[str]]: list of numpy arrays and their corresponding filenames """ filenames = [] diff --git a/api/app/vision.py b/api/app/vision.py index 99f9c5e8e2..606394d598 100644 --- a/api/app/vision.py +++ b/api/app/vision.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. -from typing import Callable, Union +from collections.abc import Callable import torch @@ -25,7 +25,7 @@ def _move_to_device(predictor: Callable) -> Callable: return predictor.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) -def init_predictor(request: Union[KIEIn, OCRIn, RecognitionIn, DetectionIn]) -> Callable: +def init_predictor(request: KIEIn | OCRIn | RecognitionIn | DetectionIn) -> Callable: """Initialize the predictor based on the request Args: diff --git a/doctr/contrib/artefacts.py b/doctr/contrib/artefacts.py index cbc819e568..3aca3007bd 100644 --- a/doctr/contrib/artefacts.py +++ b/doctr/contrib/artefacts.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import cv2 import numpy as np @@ -14,7 +14,7 @@ __all__ = ["ArtefactDetector"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "yolov8_artefact": { "input_shape": (3, 1024, 1024), "labels": ["bar_code", "qr_code", "logo", "photo"], @@ -49,9 +49,9 @@ def __init__( self, arch: str = "yolov8_artefact", batch_size: int = 2, - model_path: Optional[str] = None, - labels: Optional[List[str]] = None, - input_shape: Optional[Tuple[int, int, int]] = None, + model_path: str | None = None, + labels: list[str] | None = None, + input_shape: tuple[int, int, int] | None = None, conf_threshold: float = 0.5, iou_threshold: float = 0.5, **kwargs: Any, @@ -65,7 +65,7 @@ def __init__( def preprocess(self, img: np.ndarray) -> np.ndarray: return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0) - def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]: + def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]: results = [] for batch in zip(output, input_images): diff --git a/doctr/contrib/base.py b/doctr/contrib/base.py index 806b109d43..765b521dbc 100644 --- a/doctr/contrib/base.py +++ b/doctr/contrib/base.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List, Optional +from typing import Any import numpy as np @@ -22,14 +22,14 @@ class _BasePredictor: **kwargs: additional arguments to be passed to `download_from_url` """ - def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None: + def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None: self.batch_size = batch_size self.session = self._init_model(url, model_path, **kwargs) - self._inputs: List[np.ndarray] = [] - self._results: List[Any] = [] + self._inputs: list[np.ndarray] = [] + self._results: list[Any] = [] - def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any: + def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any: """ Download the model from the given url if needed @@ -61,7 +61,7 @@ def preprocess(self, img: np.ndarray) -> np.ndarray: """ raise NotImplementedError - def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any: + def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any: """ Postprocess the model output @@ -74,7 +74,7 @@ def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarr """ raise NotImplementedError - def __call__(self, inputs: List[np.ndarray]) -> Any: + def __call__(self, inputs: list[np.ndarray]) -> Any: """ Call the model on the given inputs diff --git a/doctr/datasets/cord.py b/doctr/datasets/cord.py index 244d16d9a2..65f463da6b 100644 --- a/doctr/datasets/cord.py +++ b/doctr/datasets/cord.py @@ -6,7 +6,7 @@ import json import os from pathlib import Path -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np from tqdm import tqdm @@ -71,9 +71,9 @@ def __init__( + "To get the whole dataset with boxes and labels leave both parameters to False." ) - # List images + # list images tmp_root = os.path.join(self.root, "image") - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] self.train = train np_dtype = np.float32 for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))): @@ -90,7 +90,7 @@ def __init__( if len(word["text"]) > 0: x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"] y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"] - box: Union[List[float], np.ndarray] + box: list[float] | np.ndarray if use_polygons: # (x, y) coordinates of top left, top right, bottom right, bottom left corners box = np.array( diff --git a/doctr/datasets/datasets/base.py b/doctr/datasets/datasets/base.py index 61ed53eae8..08d5a2eedd 100644 --- a/doctr/datasets/datasets/base.py +++ b/doctr/datasets/datasets/base.py @@ -5,8 +5,9 @@ import os import shutil +from collections.abc import Callable from pathlib import Path -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any import numpy as np @@ -19,15 +20,15 @@ class _AbstractDataset: - data: List[Any] = [] - _pre_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None + data: list[Any] = [] + _pre_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None def __init__( self, - root: Union[str, Path], - img_transforms: Optional[Callable[[Any], Any]] = None, - sample_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None, - pre_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None, + root: str | Path, + img_transforms: Callable[[Any], Any] | None = None, + sample_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None, + pre_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None, ) -> None: if not Path(root).is_dir(): raise ValueError(f"expected a path to a reachable folder: {root}") @@ -41,10 +42,10 @@ def __init__( def __len__(self) -> int: return len(self.data) - def _read_sample(self, index: int) -> Tuple[Any, Any]: + def _read_sample(self, index: int) -> tuple[Any, Any]: raise NotImplementedError - def __getitem__(self, index: int) -> Tuple[Any, Any]: + def __getitem__(self, index: int) -> tuple[Any, Any]: # Read image img, target = self._read_sample(index) # Pre-transforms (format conversion at run-time etc.) @@ -95,13 +96,13 @@ class _VisionDataset(_AbstractDataset): def __init__( self, url: str, - file_name: Optional[str] = None, - file_hash: Optional[str] = None, + file_name: str | None = None, + file_hash: str | None = None, extract_archive: bool = False, download: bool = False, overwrite: bool = False, - cache_dir: Optional[str] = None, - cache_subdir: Optional[str] = None, + cache_dir: str | None = None, + cache_subdir: str | None = None, **kwargs: Any, ) -> None: cache_dir = ( @@ -114,7 +115,7 @@ def __init__( file_name = file_name if isinstance(file_name, str) else os.path.basename(url) # Download the file if not present - archive_path: Union[str, Path] = os.path.join(cache_dir, cache_subdir, file_name) + archive_path: str | Path = os.path.join(cache_dir, cache_subdir, file_name) if not os.path.exists(archive_path) and not download: raise ValueError("the dataset needs to be downloaded first with download=True") diff --git a/doctr/datasets/datasets/pytorch.py b/doctr/datasets/datasets/pytorch.py index a6cf541c07..da4040c3b0 100644 --- a/doctr/datasets/datasets/pytorch.py +++ b/doctr/datasets/datasets/pytorch.py @@ -5,7 +5,7 @@ import os from copy import deepcopy -from typing import Any, List, Tuple +from typing import Any import numpy as np import torch @@ -20,7 +20,7 @@ class AbstractDataset(_AbstractDataset): """Abstract class for all datasets""" - def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]: + def _read_sample(self, index: int) -> tuple[torch.Tensor, Any]: img_name, target = self.data[index] # Check target @@ -48,7 +48,7 @@ def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]: return img, deepcopy(target) @staticmethod - def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]: + def collate_fn(samples: list[tuple[torch.Tensor, Any]]) -> tuple[torch.Tensor, list[Any]]: images, targets = zip(*samples) images = torch.stack(images, dim=0) diff --git a/doctr/datasets/datasets/tensorflow.py b/doctr/datasets/datasets/tensorflow.py index 6e19ca2109..f0206cf035 100644 --- a/doctr/datasets/datasets/tensorflow.py +++ b/doctr/datasets/datasets/tensorflow.py @@ -5,7 +5,7 @@ import os from copy import deepcopy -from typing import Any, List, Tuple +from typing import Any import numpy as np import tensorflow as tf @@ -20,7 +20,7 @@ class AbstractDataset(_AbstractDataset): """Abstract class for all datasets""" - def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]: + def _read_sample(self, index: int) -> tuple[tf.Tensor, Any]: img_name, target = self.data[index] # Check target @@ -48,7 +48,7 @@ def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]: return img, deepcopy(target) @staticmethod - def collate_fn(samples: List[Tuple[tf.Tensor, Any]]) -> Tuple[tf.Tensor, List[Any]]: + def collate_fn(samples: list[tuple[tf.Tensor, Any]]) -> tuple[tf.Tensor, list[Any]]: images, targets = zip(*samples) images = tf.stack(images, axis=0) diff --git a/doctr/datasets/detection.py b/doctr/datasets/detection.py index a023b2f9a0..50fb7f6893 100644 --- a/doctr/datasets/detection.py +++ b/doctr/datasets/detection.py @@ -5,7 +5,7 @@ import json import os -from typing import Any, Dict, List, Tuple, Type, Union +from typing import Any import numpy as np @@ -46,13 +46,13 @@ def __init__( ) # File existence check - self._class_names: List = [] + self._class_names: list = [] if not os.path.exists(label_path): raise FileNotFoundError(f"unable to locate {label_path}") with open(label_path, "rb") as f: labels = json.load(f) - self.data: List[Tuple[str, Tuple[np.ndarray, List[str]]]] = [] + self.data: list[tuple[str, tuple[np.ndarray, list[str]]]] = [] np_dtype = np.float32 for img_name, label in labels.items(): # File existence check @@ -64,8 +64,8 @@ def __init__( self.data.append((img_name, (np.asarray(geoms, dtype=np_dtype), polygons_classes))) def format_polygons( - self, polygons: Union[List, Dict], use_polygons: bool, np_dtype: Type - ) -> Tuple[np.ndarray, List[str]]: + self, polygons: list | dict, use_polygons: bool, np_dtype: type + ) -> tuple[np.ndarray, list[str]]: """Format polygons into an array Args: diff --git a/doctr/datasets/doc_artefacts.py b/doctr/datasets/doc_artefacts.py index 5830f89f33..d96e81f6c2 100644 --- a/doctr/datasets/doc_artefacts.py +++ b/doctr/datasets/doc_artefacts.py @@ -5,7 +5,7 @@ import json import os -from typing import Any, Dict, List, Tuple +from typing import Any import numpy as np @@ -50,7 +50,7 @@ def __init__( tmp_root = os.path.join(self.root, "images") with open(os.path.join(self.root, "labels.json"), "rb") as f: labels = json.load(f) - self.data: List[Tuple[str, Dict[str, Any]]] = [] + self.data: list[tuple[str, dict[str, Any]]] = [] img_list = os.listdir(tmp_root) if len(labels) != len(img_list): raise AssertionError("the number of images and labels do not match") diff --git a/doctr/datasets/funsd.py b/doctr/datasets/funsd.py index 4529d0a18f..53e65f8014 100644 --- a/doctr/datasets/funsd.py +++ b/doctr/datasets/funsd.py @@ -6,7 +6,7 @@ import json import os from pathlib import Path -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np from tqdm import tqdm @@ -68,9 +68,9 @@ def __init__( # Use the subset subfolder = os.path.join("dataset", "training_data" if train else "testing_data") - # # List images + # # list images tmp_root = os.path.join(self.root, subfolder, "images") - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))): # File existence check if not os.path.exists(os.path.join(tmp_root, img_path)): diff --git a/doctr/datasets/generator/base.py b/doctr/datasets/generator/base.py index 2b868b26ee..ec4ebb6855 100644 --- a/doctr/datasets/generator/base.py +++ b/doctr/datasets/generator/base.py @@ -4,7 +4,8 @@ # See LICENSE or go to for full license details. import random -from typing import Any, Callable, List, Optional, Tuple, Union +from collections.abc import Callable +from typing import Any from PIL import Image, ImageDraw @@ -17,9 +18,9 @@ def synthesize_text_img( text: str, font_size: int = 32, - font_family: Optional[str] = None, - background_color: Optional[Tuple[int, int, int]] = None, - text_color: Optional[Tuple[int, int, int]] = None, + font_family: str | None = None, + background_color: tuple[int, int, int] | None = None, + text_color: tuple[int, int, int] | None = None, ) -> Image.Image: """Generate a synthetic text image @@ -59,9 +60,9 @@ def __init__( vocab: str, num_samples: int, cache_samples: bool = False, - font_family: Optional[Union[str, List[str]]] = None, - img_transforms: Optional[Callable[[Any], Any]] = None, - sample_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None, + font_family: str | list[str] | None = None, + img_transforms: Callable[[Any], Any] | None = None, + sample_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None, ) -> None: self.vocab = vocab self._num_samples = num_samples @@ -76,7 +77,7 @@ def __init__( self.img_transforms = img_transforms self.sample_transforms = sample_transforms - self._data: List[Image.Image] = [] + self._data: list[Image.Image] = [] if cache_samples: self._data = [ (synthesize_text_img(char, font_family=font), idx) # type: ignore[misc] @@ -87,7 +88,7 @@ def __init__( def __len__(self) -> int: return self._num_samples - def _read_sample(self, index: int) -> Tuple[Any, int]: + def _read_sample(self, index: int) -> tuple[Any, int]: # Samples are already cached if len(self._data) > 0: idx = index % len(self._data) @@ -108,9 +109,9 @@ def __init__( max_chars: int, num_samples: int, cache_samples: bool = False, - font_family: Optional[Union[str, List[str]]] = None, - img_transforms: Optional[Callable[[Any], Any]] = None, - sample_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None, + font_family: str | list[str] | None = None, + img_transforms: Callable[[Any], Any] | None = None, + sample_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None, ) -> None: self.vocab = vocab self.wordlen_range = (min_chars, max_chars) @@ -126,7 +127,7 @@ def __init__( self.img_transforms = img_transforms self.sample_transforms = sample_transforms - self._data: List[Image.Image] = [] + self._data: list[Image.Image] = [] if cache_samples: _words = [self._generate_string(*self.wordlen_range) for _ in range(num_samples)] self._data = [ @@ -141,7 +142,7 @@ def _generate_string(self, min_chars: int, max_chars: int) -> str: def __len__(self) -> int: return self._num_samples - def _read_sample(self, index: int) -> Tuple[Any, str]: + def _read_sample(self, index: int) -> tuple[Any, str]: # Samples are already cached if len(self._data) > 0: pil_img, target = self._data[index] # type: ignore[misc] diff --git a/doctr/datasets/ic03.py b/doctr/datasets/ic03.py index 50920952b5..73a70e241a 100644 --- a/doctr/datasets/ic03.py +++ b/doctr/datasets/ic03.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import os -from typing import Any, Dict, List, Tuple, Union +from typing import Any import defusedxml.ElementTree as ET import numpy as np @@ -70,7 +70,7 @@ def __init__( ) self.train = train - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] np_dtype = np.float32 # Load xml data diff --git a/doctr/datasets/ic13.py b/doctr/datasets/ic13.py index 725b665758..a4e1c6cc6c 100644 --- a/doctr/datasets/ic13.py +++ b/doctr/datasets/ic13.py @@ -6,7 +6,7 @@ import csv import os from pathlib import Path -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np from tqdm import tqdm @@ -65,7 +65,7 @@ def __init__( f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}" ) - self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] np_dtype = np.float32 img_names = os.listdir(img_folder) diff --git a/doctr/datasets/iiit5k.py b/doctr/datasets/iiit5k.py index a87d454b42..eb7e1d43a8 100644 --- a/doctr/datasets/iiit5k.py +++ b/doctr/datasets/iiit5k.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import os -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np import scipy.io as sio @@ -69,7 +69,7 @@ def __init__( mat_file = "trainCharBound" if self.train else "testCharBound" mat_data = sio.loadmat(os.path.join(tmp_root, f"{mat_file}.mat"))[mat_file][0] - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] np_dtype = np.float32 for img_path, label, box_targets in tqdm(iterable=mat_data, desc="Unpacking IIIT5K", total=len(mat_data)): diff --git a/doctr/datasets/iiithws.py b/doctr/datasets/iiithws.py index 0066b9a489..11c4e04930 100644 --- a/doctr/datasets/iiithws.py +++ b/doctr/datasets/iiithws.py @@ -5,7 +5,7 @@ import os from random import sample -from typing import Any, List, Tuple +from typing import Any from tqdm import tqdm @@ -51,7 +51,7 @@ def __init__( if not os.path.exists(label_path) or not os.path.exists(img_folder): raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}") - self.data: List[Tuple[str, str]] = [] + self.data: list[tuple[str, str]] = [] self.train = train with open(label_path) as f: diff --git a/doctr/datasets/imgur5k.py b/doctr/datasets/imgur5k.py index b99d8b4152..790fd01ef8 100644 --- a/doctr/datasets/imgur5k.py +++ b/doctr/datasets/imgur5k.py @@ -7,7 +7,7 @@ import json import os from pathlib import Path -from typing import Any, Dict, List, Tuple, Union +from typing import Any import cv2 import numpy as np @@ -72,7 +72,7 @@ def __init__( if not os.path.exists(label_path) or not os.path.exists(img_folder): raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}") - self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] self.train = train np_dtype = np.float32 diff --git a/doctr/datasets/loader.py b/doctr/datasets/loader.py index 583c1ee50d..8c17fa08a9 100644 --- a/doctr/datasets/loader.py +++ b/doctr/datasets/loader.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import math -from typing import Callable, Optional +from collections.abc import Callable import numpy as np import tensorflow as tf @@ -19,7 +19,7 @@ def default_collate(samples): samples: list of N tuples containing M elements Returns: - Tuple of M sequences contianing N elements each + tuple of M sequences contianing N elements each """ batch_data = zip(*samples) @@ -51,7 +51,7 @@ def __init__( shuffle: bool = True, batch_size: int = 1, drop_last: bool = False, - collate_fn: Optional[Callable] = None, + collate_fn: Callable | None = None, ) -> None: self.dataset = dataset self.shuffle = shuffle diff --git a/doctr/datasets/mjsynth.py b/doctr/datasets/mjsynth.py index 650cc01f40..e544bdaf43 100644 --- a/doctr/datasets/mjsynth.py +++ b/doctr/datasets/mjsynth.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import os -from typing import Any, List, Tuple +from typing import Any from tqdm import tqdm @@ -85,7 +85,7 @@ def __init__( if not os.path.exists(label_path) or not os.path.exists(img_folder): raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}") - self.data: List[Tuple[str, str]] = [] + self.data: list[tuple[str, str]] = [] self.train = train with open(label_path) as f: diff --git a/doctr/datasets/ocr.py b/doctr/datasets/ocr.py index 69a8471eb7..f47181305c 100644 --- a/doctr/datasets/ocr.py +++ b/doctr/datasets/ocr.py @@ -6,7 +6,7 @@ import json import os from pathlib import Path -from typing import Any, Dict, List, Tuple +from typing import Any import numpy as np @@ -40,7 +40,7 @@ def __init__( super().__init__(img_folder, **kwargs) # List images - self.data: List[Tuple[str, Dict[str, Any]]] = [] + self.data: list[tuple[str, dict[str, Any]]] = [] np_dtype = np.float32 with open(label_file, "rb") as f: data = json.load(f) diff --git a/doctr/datasets/orientation.py b/doctr/datasets/orientation.py index 2c008240f6..c10dff6856 100644 --- a/doctr/datasets/orientation.py +++ b/doctr/datasets/orientation.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import os -from typing import Any, List, Tuple +from typing import Any import numpy as np @@ -36,4 +36,4 @@ def __init__( ) # initialize dataset with 0 degree rotation targets - self.data: List[Tuple[str, np.ndarray]] = [(img_name, np.array([0])) for img_name in os.listdir(self.root)] + self.data: list[tuple[str, np.ndarray]] = [(img_name, np.array([0])) for img_name in os.listdir(self.root)] diff --git a/doctr/datasets/recognition.py b/doctr/datasets/recognition.py index b06eb1e264..f5cfddf78a 100644 --- a/doctr/datasets/recognition.py +++ b/doctr/datasets/recognition.py @@ -6,7 +6,7 @@ import json import os from pathlib import Path -from typing import Any, List, Tuple +from typing import Any from .datasets import AbstractDataset @@ -35,7 +35,7 @@ def __init__( ) -> None: super().__init__(img_folder, **kwargs) - self.data: List[Tuple[str, str]] = [] + self.data: list[tuple[str, str]] = [] with open(labels_path, encoding="utf-8") as f: labels = json.load(f) diff --git a/doctr/datasets/sroie.py b/doctr/datasets/sroie.py index 83e9e64442..eb6b3fc1b2 100644 --- a/doctr/datasets/sroie.py +++ b/doctr/datasets/sroie.py @@ -6,7 +6,7 @@ import csv import os from pathlib import Path -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np from tqdm import tqdm @@ -73,7 +73,7 @@ def __init__( self.train = train tmp_root = os.path.join(self.root, "images") - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] np_dtype = np.float32 for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking SROIE", total=len(os.listdir(tmp_root))): diff --git a/doctr/datasets/svhn.py b/doctr/datasets/svhn.py index 872c77d3c3..51e53540c4 100644 --- a/doctr/datasets/svhn.py +++ b/doctr/datasets/svhn.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import os -from typing import Any, Dict, List, Tuple, Union +from typing import Any import h5py import numpy as np @@ -71,7 +71,7 @@ def __init__( ) self.train = train - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] np_dtype = np.float32 tmp_root = os.path.join(self.root, "train" if train else "test") diff --git a/doctr/datasets/svt.py b/doctr/datasets/svt.py index 89b6e552bb..99d6c6c8c9 100644 --- a/doctr/datasets/svt.py +++ b/doctr/datasets/svt.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import os -from typing import Any, Dict, List, Tuple, Union +from typing import Any import defusedxml.ElementTree as ET import numpy as np @@ -61,7 +61,7 @@ def __init__( ) self.train = train - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] np_dtype = np.float32 # Load xml data diff --git a/doctr/datasets/synthtext.py b/doctr/datasets/synthtext.py index f8ceaadfdf..2b8dc84321 100644 --- a/doctr/datasets/synthtext.py +++ b/doctr/datasets/synthtext.py @@ -5,7 +5,7 @@ import glob import os -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np from PIL import Image @@ -64,7 +64,7 @@ def __init__( ) self.train = train - self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] np_dtype = np.float32 # Load mat data diff --git a/doctr/datasets/utils.py b/doctr/datasets/utils.py index a897faee88..77593694fd 100644 --- a/doctr/datasets/utils.py +++ b/doctr/datasets/utils.py @@ -6,10 +6,10 @@ import string import unicodedata from collections.abc import Sequence +from collections.abc import Sequence as SequenceType from functools import partial from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union -from typing import Sequence as SequenceType +from typing import Any, TypeVar import numpy as np from PIL import Image @@ -69,7 +69,7 @@ def translate( def encode_string( input_string: str, vocab: str, -) -> List[int]: +) -> list[int]: """Given a predefined mapping, encode the string to a sequence of numbers Args: @@ -89,7 +89,7 @@ def encode_string( def decode_sequence( - input_seq: Union[np.ndarray, SequenceType[int]], + input_seq: np.ndarray | SequenceType[int], mapping: str, ) -> str: """Given a predefined mapping, decode the sequence of numbers to a string @@ -110,12 +110,12 @@ def decode_sequence( def encode_sequences( - sequences: List[str], + sequences: list[str], vocab: str, - target_size: Optional[int] = None, + target_size: int | None = None, eos: int = -1, - sos: Optional[int] = None, - pad: Optional[int] = None, + sos: int | None = None, + pad: int | None = None, dynamic_seq_length: bool = False, ) -> np.ndarray: """Encode character sequences using a given vocab as mapping @@ -170,8 +170,8 @@ def encode_sequences( def convert_target_to_relative( - img: ImageTensor, target: Union[np.ndarray, Dict[str, Any]] -) -> Tuple[ImageTensor, Union[Dict[str, Any], np.ndarray]]: + img: ImageTensor, target: np.ndarray | dict[str, Any] +) -> tuple[ImageTensor, dict[str, Any] | np.ndarray]: """Converts target to relative coordinates Args: @@ -188,7 +188,7 @@ def convert_target_to_relative( return img, target -def crop_bboxes_from_image(img_path: Union[str, Path], geoms: np.ndarray) -> List[np.ndarray]: +def crop_bboxes_from_image(img_path: str | Path, geoms: np.ndarray) -> list[np.ndarray]: """Crop a set of bounding boxes from an image Args: @@ -208,7 +208,7 @@ def crop_bboxes_from_image(img_path: Union[str, Path], geoms: np.ndarray) -> Lis raise ValueError("Invalid geometry format") -def pre_transform_multiclass(img, target: Tuple[np.ndarray, List]) -> Tuple[np.ndarray, Dict[str, List]]: +def pre_transform_multiclass(img, target: tuple[np.ndarray, list]) -> tuple[np.ndarray, dict[str, list]]: """Converts multiclass target to relative coordinates. Args: @@ -220,7 +220,7 @@ def pre_transform_multiclass(img, target: Tuple[np.ndarray, List]) -> Tuple[np.n """ boxes = convert_to_relative_coords(target[0], get_img_shape(img)) boxes_classes = target[1] - boxes_dict: Dict = {k: [] for k in sorted(set(boxes_classes))} + boxes_dict: dict = {k: [] for k in sorted(set(boxes_classes))} for k, poly in zip(boxes_classes, boxes): boxes_dict[k].append(poly) boxes_dict = {k: np.stack(v, axis=0) for k, v in boxes_dict.items()} diff --git a/doctr/datasets/vocabs.py b/doctr/datasets/vocabs.py index 94942d58e3..36e5cbb4d4 100644 --- a/doctr/datasets/vocabs.py +++ b/doctr/datasets/vocabs.py @@ -4,12 +4,11 @@ # See LICENSE or go to for full license details. import string -from typing import Dict __all__ = ["VOCABS"] -VOCABS: Dict[str, str] = { +VOCABS: dict[str, str] = { "digits": string.digits, "ascii_letters": string.ascii_letters, "punctuation": string.punctuation, diff --git a/doctr/datasets/wildreceipt.py b/doctr/datasets/wildreceipt.py index f46b5da301..8da37a78ee 100644 --- a/doctr/datasets/wildreceipt.py +++ b/doctr/datasets/wildreceipt.py @@ -6,7 +6,7 @@ import json import os from pathlib import Path -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np @@ -71,13 +71,13 @@ def __init__( tmp_root = img_folder self.train = train np_dtype = np.float32 - self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = [] + self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] with open(label_path, "r") as file: data = file.read() # Split the text file into separate JSON strings json_strings = data.strip().split("\n") - box: Union[List[float], np.ndarray] + box: list[float] | np.ndarray _targets = [] for json_string in json_strings: json_data = json.loads(json_string) diff --git a/doctr/file_utils.py b/doctr/file_utils.py index 79858a3ed9..1ce5596b1e 100644 --- a/doctr/file_utils.py +++ b/doctr/file_utils.py @@ -9,7 +9,6 @@ import importlib.util import logging import os -from typing import Optional CLASS_NAME: str = "words" @@ -93,7 +92,7 @@ def ensure_keras_v2() -> None: # pragma: no cover ) -def requires_package(name: str, extra_message: Optional[str] = None) -> None: # pragma: no cover +def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover """ package requirement helper diff --git a/doctr/io/elements.py b/doctr/io/elements.py index f846b197fb..2f78401f5e 100644 --- a/doctr/io/elements.py +++ b/doctr/io/elements.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any from defusedxml import defuse_stdlib @@ -32,8 +32,8 @@ class Element(NestedObject): """Implements an abstract document element with exporting and text rendering capabilities""" - _children_names: List[str] = [] - _exported_keys: List[str] = [] + _children_names: list[str] = [] + _exported_keys: list[str] = [] def __init__(self, **kwargs: Any) -> None: for k, v in kwargs.items(): @@ -42,7 +42,7 @@ def __init__(self, **kwargs: Any) -> None: else: raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'") - def export(self) -> Dict[str, Any]: + def export(self) -> dict[str, Any]: """Exports the object into a nested dict format""" export_dict = {k: getattr(self, k) for k in self._exported_keys} for children_name in self._children_names: @@ -56,7 +56,7 @@ def export(self) -> Dict[str, Any]: return export_dict @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): raise NotImplementedError def render(self) -> str: @@ -75,16 +75,16 @@ class Word(Element): crop_orientation: the general orientation of the crop in degrees and its confidence """ - _exported_keys: List[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"] - _children_names: List[str] = [] + _exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"] + _children_names: list[str] = [] def __init__( self, value: str, confidence: float, - geometry: Union[BoundingBox, np.ndarray], + geometry: BoundingBox | np.ndarray, objectness_score: float, - crop_orientation: Dict[str, Any], + crop_orientation: dict[str, Any], ) -> None: super().__init__() self.value = value @@ -101,7 +101,7 @@ def extra_repr(self) -> str: return f"value='{self.value}', confidence={self.confidence:.2}" @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} return cls(**kwargs) @@ -116,8 +116,8 @@ class Artefact(Element): the page's size. """ - _exported_keys: List[str] = ["geometry", "type", "confidence"] - _children_names: List[str] = [] + _exported_keys: list[str] = ["geometry", "type", "confidence"] + _children_names: list[str] = [] def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None: super().__init__() @@ -133,7 +133,7 @@ def extra_repr(self) -> str: return f"type='{self.type}', confidence={self.confidence:.2}" @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} return cls(**kwargs) @@ -148,15 +148,15 @@ class Line(Element): all words in it. """ - _exported_keys: List[str] = ["geometry", "objectness_score"] - _children_names: List[str] = ["words"] - words: List[Word] = [] + _exported_keys: list[str] = ["geometry", "objectness_score"] + _children_names: list[str] = ["words"] + words: list[Word] = [] def __init__( self, - words: List[Word], - geometry: Optional[Union[BoundingBox, np.ndarray]] = None, - objectness_score: Optional[float] = None, + words: list[Word], + geometry: BoundingBox | np.ndarray | None = None, + objectness_score: float | None = None, ) -> None: # Compute the objectness score of the line if objectness_score is None: @@ -176,7 +176,7 @@ def render(self) -> str: return " ".join(w.render() for w in self.words) @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({ "words": [Word.from_dict(_dict) for _dict in save_dict["words"]], @@ -206,17 +206,17 @@ class Block(Element): all lines and artefacts in it. """ - _exported_keys: List[str] = ["geometry", "objectness_score"] - _children_names: List[str] = ["lines", "artefacts"] - lines: List[Line] = [] - artefacts: List[Artefact] = [] + _exported_keys: list[str] = ["geometry", "objectness_score"] + _children_names: list[str] = ["lines", "artefacts"] + lines: list[Line] = [] + artefacts: list[Artefact] = [] def __init__( self, - lines: List[Line] = [], - artefacts: List[Artefact] = [], - geometry: Optional[Union[BoundingBox, np.ndarray]] = None, - objectness_score: Optional[float] = None, + lines: list[Line] = [], + artefacts: list[Artefact] = [], + geometry: BoundingBox | np.ndarray | None = None, + objectness_score: float | None = None, ) -> None: # Compute the objectness score of the line if objectness_score is None: @@ -239,7 +239,7 @@ def render(self, line_break: str = "\n") -> str: return line_break.join(line.render() for line in self.lines) @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({ "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]], @@ -260,18 +260,18 @@ class Page(Element): language: a dictionary with the language value and confidence of the prediction """ - _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"] - _children_names: List[str] = ["blocks"] - blocks: List[Block] = [] + _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"] + _children_names: list[str] = ["blocks"] + blocks: list[Block] = [] def __init__( self, page: np.ndarray, - blocks: List[Block], + blocks: list[Block], page_idx: int, - dimensions: Tuple[int, int], - orientation: Optional[Dict[str, Any]] = None, - language: Optional[Dict[str, Any]] = None, + dimensions: tuple[int, int], + orientation: dict[str, Any] | None = None, + language: dict[str, Any] | None = None, ) -> None: super().__init__(blocks=blocks) self.page = page @@ -313,7 +313,7 @@ def synthesize(self, **kwargs) -> np.ndarray: """ return synthesize_page(self.export(), **kwargs) - def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]: + def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]: """Export the page as XML (hOCR-format) convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md @@ -418,7 +418,7 @@ def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[ return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr)) @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]}) return cls(**kwargs) @@ -436,18 +436,18 @@ class KIEPage(Element): language: a dictionary with the language value and confidence of the prediction """ - _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"] - _children_names: List[str] = ["predictions"] - predictions: Dict[str, List[Prediction]] = {} + _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"] + _children_names: list[str] = ["predictions"] + predictions: dict[str, list[Prediction]] = {} def __init__( self, page: np.ndarray, - predictions: Dict[str, List[Prediction]], + predictions: dict[str, list[Prediction]], page_idx: int, - dimensions: Tuple[int, int], - orientation: Optional[Dict[str, Any]] = None, - language: Optional[Dict[str, Any]] = None, + dimensions: tuple[int, int], + orientation: dict[str, Any] | None = None, + language: dict[str, Any] | None = None, ) -> None: super().__init__(predictions=predictions) self.page = page @@ -493,7 +493,7 @@ def synthesize(self, **kwargs) -> np.ndarray: """ return synthesize_kie_page(self.export(), **kwargs) - def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]: + def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]: """Export the page as XML (hOCR-format) convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md @@ -556,7 +556,7 @@ def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[ return ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr) @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({ "predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]] @@ -571,12 +571,12 @@ class Document(Element): pages: list of page elements """ - _children_names: List[str] = ["pages"] - pages: List[Page] = [] + _children_names: list[str] = ["pages"] + pages: list[Page] = [] def __init__( self, - pages: List[Page], + pages: list[Page], ) -> None: super().__init__(pages=pages) @@ -589,7 +589,7 @@ def show(self, **kwargs) -> None: for result in self.pages: result.show(**kwargs) - def synthesize(self, **kwargs) -> List[np.ndarray]: + def synthesize(self, **kwargs) -> list[np.ndarray]: """Synthesize all pages from their predictions Args: @@ -600,7 +600,7 @@ def synthesize(self, **kwargs) -> List[np.ndarray]: """ return [page.synthesize(**kwargs) for page in self.pages] - def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]: + def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]: """Export the document as XML (hOCR-format) Args: @@ -612,7 +612,7 @@ def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]: return [page.export_as_xml(**kwargs) for page in self.pages] @classmethod - def from_dict(cls, save_dict: Dict[str, Any], **kwargs): + def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]}) return cls(**kwargs) @@ -625,11 +625,11 @@ class KIEDocument(Document): pages: list of page elements """ - _children_names: List[str] = ["pages"] - pages: List[KIEPage] = [] # type: ignore[assignment] + _children_names: list[str] = ["pages"] + pages: list[KIEPage] = [] # type: ignore[assignment] def __init__( self, - pages: List[KIEPage], + pages: list[KIEPage], ) -> None: super().__init__(pages=pages) # type: ignore[arg-type] diff --git a/doctr/io/image/base.py b/doctr/io/image/base.py index c11caba034..5e82236a5d 100644 --- a/doctr/io/image/base.py +++ b/doctr/io/image/base.py @@ -4,7 +4,6 @@ # See LICENSE or go to for full license details. from pathlib import Path -from typing import Optional, Tuple import cv2 import numpy as np @@ -16,7 +15,7 @@ def read_img_as_numpy( file: AbstractFile, - output_size: Optional[Tuple[int, int]] = None, + output_size: tuple[int, int] | None = None, rgb_output: bool = True, ) -> np.ndarray: """Read an image file into numpy format diff --git a/doctr/io/image/pytorch.py b/doctr/io/image/pytorch.py index 48c719db51..e2b01affd1 100644 --- a/doctr/io/image/pytorch.py +++ b/doctr/io/image/pytorch.py @@ -4,7 +4,6 @@ # See LICENSE or go to for full license details. from io import BytesIO -from typing import Tuple import numpy as np import torch @@ -94,6 +93,6 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) - return img -def get_img_shape(img: torch.Tensor) -> Tuple[int, int]: +def get_img_shape(img: torch.Tensor) -> tuple[int, int]: """Get the shape of an image""" return img.shape[-2:] diff --git a/doctr/io/image/tensorflow.py b/doctr/io/image/tensorflow.py index 2b4435abc7..b8dc8256c5 100644 --- a/doctr/io/image/tensorflow.py +++ b/doctr/io/image/tensorflow.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Tuple import numpy as np import tensorflow as tf @@ -97,6 +96,6 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32) return img -def get_img_shape(img: tf.Tensor) -> Tuple[int, int]: +def get_img_shape(img: tf.Tensor) -> tuple[int, int]: """Get the shape of an image""" return img.shape[:2] diff --git a/doctr/io/pdf.py b/doctr/io/pdf.py index 51545f07c0..b56c1317d5 100644 --- a/doctr/io/pdf.py +++ b/doctr/io/pdf.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List, Optional +from typing import Any import numpy as np import pypdfium2 as pdfium @@ -15,11 +15,11 @@ def read_pdf( file: AbstractFile, - scale: float = 2, + scale: int = 2, rgb_mode: bool = True, - password: Optional[str] = None, + password: str | None = None, **kwargs: Any, -) -> List[np.ndarray]: +) -> list[np.ndarray]: """Read a PDF file and convert it into an image in numpy format >>> from doctr.io import read_pdf diff --git a/doctr/io/reader.py b/doctr/io/reader.py index cc969ff48a..e908d1ba9e 100644 --- a/doctr/io/reader.py +++ b/doctr/io/reader.py @@ -3,8 +3,8 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. +from collections.abc import Sequence from pathlib import Path -from typing import List, Sequence, Union import numpy as np @@ -22,7 +22,7 @@ class DocumentFile: """Read a document from multiple extensions""" @classmethod - def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]: + def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]: """Read a PDF file >>> from doctr.io import DocumentFile @@ -38,7 +38,7 @@ def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]: return read_pdf(file, **kwargs) @classmethod - def from_url(cls, url: str, **kwargs) -> List[np.ndarray]: + def from_url(cls, url: str, **kwargs) -> list[np.ndarray]: """Interpret a web page as a PDF document >>> from doctr.io import DocumentFile @@ -60,7 +60,7 @@ def from_url(cls, url: str, **kwargs) -> List[np.ndarray]: return cls.from_pdf(pdf_stream, **kwargs) @classmethod - def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwargs) -> List[np.ndarray]: + def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]: """Read an image file (or a collection of image files) and convert it into an image in numpy format >>> from doctr.io import DocumentFile diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 9fa7638e97..3770075f38 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -5,7 +5,7 @@ from math import floor from statistics import median_low -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any import cv2 import numpy as np @@ -31,7 +31,7 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float: def estimate_orientation( img: np.ndarray, - general_page_orientation: Optional[Tuple[int, float]] = None, + general_page_orientation: tuple[int, float] | None = None, n_ct: int = 70, ratio_threshold_for_lines: float = 3, min_confidence: float = 0.2, @@ -115,9 +115,9 @@ def estimate_orientation( def rectify_crops( - crops: List[np.ndarray], - orientations: List[int], -) -> List[np.ndarray]: + crops: list[np.ndarray], + orientations: list[int], +) -> list[np.ndarray]: """Rotate each crop of the list according to the predicted orientation: 0: already straight, no rotation 1: 90 ccw, rotate 3 times ccw @@ -135,8 +135,8 @@ def rectify_crops( def rectify_loc_preds( page_loc_preds: np.ndarray, - orientations: List[int], -) -> Optional[np.ndarray]: + orientations: list[int], +) -> np.ndarray | None: """Orient the quadrangle (Polygon4P) according to the predicted orientation, so that the points are in this order: top L, top R, bot R, bot L if the crop is readable """ @@ -153,7 +153,7 @@ def rectify_loc_preds( ) -def get_language(text: str) -> Tuple[str, float]: +def get_language(text: str) -> tuple[str, float]: """Get languages of a text using langdetect model. Get the language with the highest probability or no language if only a few words or a low probability @@ -173,9 +173,9 @@ def get_language(text: str) -> Tuple[str, float]: def invert_data_structure( - x: Union[List[Dict[str, Any]], Dict[str, List[Any]]], -) -> Union[List[Dict[str, Any]], Dict[str, List[Any]]]: - """Invert a List of Dict of elements to a Dict of list of elements and the other way around + x: list[dict[str, Any]] | dict[str, list[Any]], +) -> list[dict[str, Any]] | dict[str, list[Any]]: + """Invert a list of dict of elements to a dict of list of elements and the other way around Args: x: a list of dictionaries with the same keys or a dictionary of lists of the same length diff --git a/doctr/models/builder.py b/doctr/models/builder.py index ac93d4b2cd..99de966e41 100644 --- a/doctr/models/builder.py +++ b/doctr/models/builder.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import numpy as np from scipy.cluster.hierarchy import fclusterdata @@ -40,7 +40,7 @@ def __init__( self.export_as_straight_boxes = export_as_straight_boxes @staticmethod - def _sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + def _sort_boxes(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Sort bounding boxes from top to bottom, left to right Args: @@ -62,7 +62,7 @@ def _sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: boxes = np.concatenate((boxes.min(1), boxes.max(1)), -1) return (boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1])).argsort(), boxes - def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: List[int]) -> List[List[int]]: + def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: list[int]) -> list[list[int]]: """Split a line in sub_lines Args: @@ -100,7 +100,7 @@ def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: List[int]) -> List[Li return lines - def _resolve_lines(self, boxes: np.ndarray) -> List[List[int]]: + def _resolve_lines(self, boxes: np.ndarray) -> list[list[int]]: """Order boxes to group them in lines Args: @@ -146,7 +146,7 @@ def _resolve_lines(self, boxes: np.ndarray) -> List[List[int]]: return lines @staticmethod - def _resolve_blocks(boxes: np.ndarray, lines: List[List[int]]) -> List[List[List[int]]]: + def _resolve_blocks(boxes: np.ndarray, lines: list[list[int]]) -> list[list[list[int]]]: """Order lines to group them in blocks Args: @@ -198,7 +198,7 @@ def _resolve_blocks(boxes: np.ndarray, lines: List[List[int]]) -> List[List[List # Compute clusters clusters = fclusterdata(box_features, t=0.1, depth=4, criterion="distance", metric="euclidean") - _blocks: Dict[int, List[int]] = {} + _blocks: dict[int, list[int]] = {} # Form clusters for line_idx, cluster_idx in enumerate(clusters): if cluster_idx in _blocks.keys(): @@ -215,9 +215,9 @@ def _build_blocks( self, boxes: np.ndarray, objectness_scores: np.ndarray, - word_preds: List[Tuple[str, float]], - crop_orientations: List[Dict[str, Any]], - ) -> List[Block]: + word_preds: list[tuple[str, float]], + crop_orientations: list[dict[str, Any]], + ) -> list[Block]: """Gather independent words in structured blocks Args: @@ -284,14 +284,14 @@ def extra_repr(self) -> str: def __call__( self, - pages: List[np.ndarray], - boxes: List[np.ndarray], - objectness_scores: List[np.ndarray], - text_preds: List[List[Tuple[str, float]]], - page_shapes: List[Tuple[int, int]], - crop_orientations: List[Dict[str, Any]], - orientations: Optional[List[Dict[str, Any]]] = None, - languages: Optional[List[Dict[str, Any]]] = None, + pages: list[np.ndarray], + boxes: list[np.ndarray], + objectness_scores: list[np.ndarray], + text_preds: list[list[tuple[str, float]]], + page_shapes: list[tuple[int, int]], + crop_orientations: list[dict[str, Any]], + orientations: list[dict[str, Any]] | None = None, + languages: list[dict[str, Any]] | None = None, ) -> Document: """Re-arrange detected words into structured blocks @@ -370,14 +370,14 @@ class KIEDocumentBuilder(DocumentBuilder): def __call__( # type: ignore[override] self, - pages: List[np.ndarray], - boxes: List[Dict[str, np.ndarray]], - objectness_scores: List[Dict[str, np.ndarray]], - text_preds: List[Dict[str, List[Tuple[str, float]]]], - page_shapes: List[Tuple[int, int]], - crop_orientations: List[Dict[str, List[Dict[str, Any]]]], - orientations: Optional[List[Dict[str, Any]]] = None, - languages: Optional[List[Dict[str, Any]]] = None, + pages: list[np.ndarray], + boxes: list[dict[str, np.ndarray]], + objectness_scores: list[dict[str, np.ndarray]], + text_preds: list[dict[str, list[tuple[str, float]]]], + page_shapes: list[tuple[int, int]], + crop_orientations: list[dict[str, list[dict[str, Any]]]], + orientations: list[dict[str, Any]] | None = None, + languages: list[dict[str, Any]] | None = None, ) -> KIEDocument: """Re-arrange detected words into structured predictions @@ -409,7 +409,7 @@ def __call__( # type: ignore[override] if self.export_as_straight_boxes and len(boxes) > 0: # If boxes are already straight OK, else fit a bounding rect if next(iter(boxes[0].values())).ndim == 3: - straight_boxes: List[Dict[str, np.ndarray]] = [] + straight_boxes: list[dict[str, np.ndarray]] = [] # Iterate over pages for p_boxes in boxes: # Iterate over boxes of the pages @@ -455,9 +455,9 @@ def _build_blocks( # type: ignore[override] self, boxes: np.ndarray, objectness_scores: np.ndarray, - word_preds: List[Tuple[str, float]], - crop_orientations: List[Dict[str, Any]], - ) -> List[Prediction]: + word_preds: list[tuple[str, float]], + crop_orientations: list[dict[str, Any]], + ) -> list[Prediction]: """Gather independent words in structured blocks Args: diff --git a/doctr/models/classification/magc_resnet/pytorch.py b/doctr/models/classification/magc_resnet/pytorch.py index e51c4b0fbf..661697e516 100644 --- a/doctr/models/classification/magc_resnet/pytorch.py +++ b/doctr/models/classification/magc_resnet/pytorch.py @@ -7,7 +7,7 @@ import math from copy import deepcopy from functools import partial -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import torch from torch import nn @@ -20,7 +20,7 @@ __all__ = ["magc_resnet31"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "magc_resnet31": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -49,7 +49,7 @@ def __init__( headers: int = 8, attn_scale: bool = False, ratio: float = 0.0625, # bottleneck ratio of 1/16 as described in paper - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() @@ -104,12 +104,12 @@ def forward(self, inputs: torch.Tensor) -> torch.Tensor: def _magc_resnet( arch: str, pretrained: bool, - num_blocks: List[int], - output_channels: List[int], - stage_stride: List[int], - stage_conv: List[bool], - stage_pooling: List[Optional[Tuple[int, int]]], - ignore_keys: Optional[List[str]] = None, + num_blocks: list[int], + output_channels: list[int], + stage_stride: list[int], + stage_conv: list[bool], + stage_pooling: list[tuple[int, int] | None], + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> ResNet: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) diff --git a/doctr/models/classification/magc_resnet/tensorflow.py b/doctr/models/classification/magc_resnet/tensorflow.py index 442af37474..35d9f59be7 100644 --- a/doctr/models/classification/magc_resnet/tensorflow.py +++ b/doctr/models/classification/magc_resnet/tensorflow.py @@ -6,7 +6,7 @@ import math from copy import deepcopy from functools import partial -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import tensorflow as tf from tensorflow.keras import activations, layers @@ -20,7 +20,7 @@ __all__ = ["magc_resnet31"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "magc_resnet31": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -121,11 +121,11 @@ def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: def _magc_resnet( arch: str, pretrained: bool, - num_blocks: List[int], - output_channels: List[int], - stage_downsample: List[bool], - stage_conv: List[bool], - stage_pooling: List[Optional[Tuple[int, int]]], + num_blocks: list[int], + output_channels: list[int], + stage_downsample: list[bool], + stage_conv: list[bool], + stage_pooling: list[tuple[int, int] | None], origin_stem: bool = True, **kwargs: Any, ) -> ResNet: diff --git a/doctr/models/classification/mobilenet/pytorch.py b/doctr/models/classification/mobilenet/pytorch.py index fb8a1ac20c..306fb3d1c8 100644 --- a/doctr/models/classification/mobilenet/pytorch.py +++ b/doctr/models/classification/mobilenet/pytorch.py @@ -6,7 +6,7 @@ # Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py from copy import deepcopy -from typing import Any, Dict, List, Optional +from typing import Any from torchvision.models import mobilenetv3 from torchvision.models.mobilenetv3 import MobileNetV3 @@ -25,7 +25,7 @@ "mobilenet_v3_small_page_orientation", ] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "mobilenet_v3_large": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -74,8 +74,8 @@ def _mobilenet_v3( arch: str, pretrained: bool, - rect_strides: Optional[List[str]] = None, - ignore_keys: Optional[List[str]] = None, + rect_strides: list[str] | None = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> mobilenetv3.MobileNetV3: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) diff --git a/doctr/models/classification/mobilenet/tensorflow.py b/doctr/models/classification/mobilenet/tensorflow.py index 6b6532a345..574b6f87d0 100644 --- a/doctr/models/classification/mobilenet/tensorflow.py +++ b/doctr/models/classification/mobilenet/tensorflow.py @@ -6,7 +6,7 @@ # Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any import tensorflow as tf from tensorflow.keras import layers @@ -26,7 +26,7 @@ ] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "mobilenet_v3_large": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -76,7 +76,7 @@ def hard_swish(x: tf.Tensor) -> tf.Tensor: return x * tf.nn.relu6(x + 3.0) / 6.0 -def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: +def _make_divisible(v: float, divisor: int, min_value: int | None = None) -> int: if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) @@ -112,7 +112,7 @@ def __init__( out_channels: int, use_se: bool, activation: str, - stride: Union[int, Tuple[int, int]], + stride: int | tuple[int, int], width_mult: float = 1, ) -> None: self.input_channels = self.adjust_channels(input_channels, width_mult) @@ -200,12 +200,12 @@ class MobileNetV3(Sequential): def __init__( self, - layout: List[InvertedResidualConfig], + layout: list[InvertedResidualConfig], include_top: bool = True, head_chans: int = 1024, num_classes: int = 1000, - cfg: Optional[Dict[str, Any]] = None, - input_shape: Optional[Tuple[int, int, int]] = None, + cfg: dict[str, Any] | None = None, + input_shape: tuple[int, int, int] | None = None, ) -> None: _layers = [ Sequential( diff --git a/doctr/models/classification/predictor/pytorch.py b/doctr/models/classification/predictor/pytorch.py index e7c44d1df4..9626997581 100644 --- a/doctr/models/classification/predictor/pytorch.py +++ b/doctr/models/classification/predictor/pytorch.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List, Optional, Union import numpy as np import torch @@ -26,8 +25,8 @@ class OrientationPredictor(nn.Module): def __init__( self, - pre_processor: Optional[PreProcessor], - model: Optional[nn.Module], + pre_processor: PreProcessor | None, + model: nn.Module | None, ) -> None: super().__init__() self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None @@ -36,8 +35,8 @@ def __init__( @torch.inference_mode() def forward( self, - inputs: List[Union[np.ndarray, torch.Tensor]], - ) -> List[Union[List[int], List[float]]]: + inputs: list[np.ndarray | torch.Tensor], + ) -> list[list[int] | list[float]]: # Dimension check if any(input.ndim != 3 for input in inputs): raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.") diff --git a/doctr/models/classification/predictor/tensorflow.py b/doctr/models/classification/predictor/tensorflow.py index ec1337c1ec..904729277f 100644 --- a/doctr/models/classification/predictor/tensorflow.py +++ b/doctr/models/classification/predictor/tensorflow.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List, Optional, Union import numpy as np import tensorflow as tf @@ -24,20 +23,20 @@ class OrientationPredictor(NestedObject): model: core classification architecture (backbone + classification head) """ - _children_names: List[str] = ["pre_processor", "model"] + _children_names: list[str] = ["pre_processor", "model"] def __init__( self, - pre_processor: Optional[PreProcessor], - model: Optional[Model], + pre_processor: PreProcessor | None, + model: Model | None, ) -> None: self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None self.model = model if isinstance(model, Model) else None def __call__( self, - inputs: List[Union[np.ndarray, tf.Tensor]], - ) -> List[Union[List[int], List[float]]]: + inputs: list[np.ndarray | tf.Tensor], + ) -> list[list[int] | list[float]]: # Dimension check if any(input.ndim != 3 for input in inputs): raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.") diff --git a/doctr/models/classification/resnet/pytorch.py b/doctr/models/classification/resnet/pytorch.py index 10fea5302d..3460be471c 100644 --- a/doctr/models/classification/resnet/pytorch.py +++ b/doctr/models/classification/resnet/pytorch.py @@ -4,8 +4,9 @@ # See LICENSE or go to for full license details. +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any from torch import nn from torchvision.models.resnet import BasicBlock @@ -21,7 +22,7 @@ __all__ = ["ResNet", "resnet18", "resnet31", "resnet34", "resnet50", "resnet34_wide", "resnet_stage"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "resnet18": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -60,9 +61,9 @@ } -def resnet_stage(in_channels: int, out_channels: int, num_blocks: int, stride: int) -> List[nn.Module]: +def resnet_stage(in_channels: int, out_channels: int, num_blocks: int, stride: int) -> list[nn.Module]: """Build a ResNet stage""" - _layers: List[nn.Module] = [] + _layers: list[nn.Module] = [] in_chan = in_channels s = stride @@ -97,19 +98,19 @@ class ResNet(nn.Sequential): def __init__( self, - num_blocks: List[int], - output_channels: List[int], - stage_stride: List[int], - stage_conv: List[bool], - stage_pooling: List[Optional[Tuple[int, int]]], + num_blocks: list[int], + output_channels: list[int], + stage_stride: list[int], + stage_conv: list[bool], + stage_pooling: list[tuple[int, int] | None], origin_stem: bool = True, stem_channels: int = 64, - attn_module: Optional[Callable[[int], nn.Module]] = None, + attn_module: Callable[[int], nn.Module] | None = None, include_top: bool = True, num_classes: int = 1000, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: - _layers: List[nn.Module] + _layers: list[nn.Module] if origin_stem: _layers = [ *conv_sequence_pt(3, stem_channels, True, True, kernel_size=7, padding=3, stride=2), @@ -155,12 +156,12 @@ def __init__( def _resnet( arch: str, pretrained: bool, - num_blocks: List[int], - output_channels: List[int], - stage_stride: List[int], - stage_conv: List[bool], - stage_pooling: List[Optional[Tuple[int, int]]], - ignore_keys: Optional[List[str]] = None, + num_blocks: list[int], + output_channels: list[int], + stage_stride: list[int], + stage_conv: list[bool], + stage_pooling: list[tuple[int, int] | None], + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> ResNet: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) @@ -187,7 +188,7 @@ def _tv_resnet( arch: str, pretrained: bool, arch_fn, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> TVResNet: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) diff --git a/doctr/models/classification/resnet/tensorflow.py b/doctr/models/classification/resnet/tensorflow.py index b800272a7d..1bbc77cbfa 100644 --- a/doctr/models/classification/resnet/tensorflow.py +++ b/doctr/models/classification/resnet/tensorflow.py @@ -3,8 +3,9 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any import tensorflow as tf from tensorflow.keras import layers @@ -18,7 +19,7 @@ __all__ = ["ResNet", "resnet18", "resnet31", "resnet34", "resnet50", "resnet34_wide"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "resnet18": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -91,7 +92,7 @@ def conv_resnetblock( output_channels: int, kernel_size: int, strides: int = 1, - ) -> List[layers.Layer]: + ) -> list[layers.Layer]: return [ *conv_sequence(output_channels, "relu", bn=True, strides=strides, kernel_size=kernel_size), *conv_sequence(output_channels, None, bn=True, kernel_size=kernel_size), @@ -107,8 +108,8 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor: def resnet_stage( num_blocks: int, out_channels: int, shortcut: bool = False, downsample: bool = False -) -> List[layers.Layer]: - _layers: List[layers.Layer] = [ResnetBlock(out_channels, conv_shortcut=shortcut, strides=2 if downsample else 1)] +) -> list[layers.Layer]: + _layers: list[layers.Layer] = [ResnetBlock(out_channels, conv_shortcut=shortcut, strides=2 if downsample else 1)] for _ in range(1, num_blocks): _layers.append(ResnetBlock(out_channels, conv_shortcut=False)) @@ -135,18 +136,18 @@ class ResNet(Sequential): def __init__( self, - num_blocks: List[int], - output_channels: List[int], - stage_downsample: List[bool], - stage_conv: List[bool], - stage_pooling: List[Optional[Tuple[int, int]]], + num_blocks: list[int], + output_channels: list[int], + stage_downsample: list[bool], + stage_conv: list[bool], + stage_pooling: list[tuple[int, int] | None], origin_stem: bool = True, stem_channels: int = 64, - attn_module: Optional[Callable[[int], layers.Layer]] = None, + attn_module: Callable[[int], layers.Layer] | None = None, include_top: bool = True, num_classes: int = 1000, - cfg: Optional[Dict[str, Any]] = None, - input_shape: Optional[Tuple[int, int, int]] = None, + cfg: dict[str, Any] | None = None, + input_shape: tuple[int, int, int] | None = None, ) -> None: inplanes = stem_channels if origin_stem: @@ -186,11 +187,11 @@ def __init__( def _resnet( arch: str, pretrained: bool, - num_blocks: List[int], - output_channels: List[int], - stage_downsample: List[bool], - stage_conv: List[bool], - stage_pooling: List[Optional[Tuple[int, int]]], + num_blocks: list[int], + output_channels: list[int], + stage_downsample: list[bool], + stage_conv: list[bool], + stage_pooling: list[tuple[int, int] | None], origin_stem: bool = True, **kwargs: Any, ) -> ResNet: diff --git a/doctr/models/classification/textnet/pytorch.py b/doctr/models/classification/textnet/pytorch.py index 5dabb7586f..3b1a3ff419 100644 --- a/doctr/models/classification/textnet/pytorch.py +++ b/doctr/models/classification/textnet/pytorch.py @@ -5,7 +5,7 @@ from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any from torch import nn @@ -16,7 +16,7 @@ __all__ = ["textnet_tiny", "textnet_small", "textnet_base"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "textnet_tiny": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -47,21 +47,21 @@ class TextNet(nn.Sequential): Implementation based on the official Pytorch implementation: `_. Args: - stages (List[Dict[str, List[int]]]): List of dictionaries containing the parameters of each stage. + stages (list[dict[str, list[int]]]): list of dictionaries containing the parameters of each stage. include_top (bool, optional): Whether to include the classifier head. Defaults to True. num_classes (int, optional): Number of output classes. Defaults to 1000. - cfg (Optional[Dict[str, Any]], optional): Additional configuration. Defaults to None. + cfg (dict[str, Any], optional): Additional configuration. Defaults to None. """ def __init__( self, - stages: List[Dict[str, List[int]]], - input_shape: Tuple[int, int, int] = (3, 32, 32), + stages: list[dict[str, list[int]]], + input_shape: tuple[int, int, int] = (3, 32, 32), num_classes: int = 1000, include_top: bool = True, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: - _layers: List[nn.Module] = [ + _layers: list[nn.Module] = [ *conv_sequence_pt( in_channels=3, out_channels=64, relu=True, bn=True, kernel_size=3, stride=2, padding=(1, 1) ), @@ -97,7 +97,7 @@ def __init__( def _textnet( arch: str, pretrained: bool, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> TextNet: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) diff --git a/doctr/models/classification/textnet/tensorflow.py b/doctr/models/classification/textnet/tensorflow.py index 8e11f66435..0eeb8894fc 100644 --- a/doctr/models/classification/textnet/tensorflow.py +++ b/doctr/models/classification/textnet/tensorflow.py @@ -5,7 +5,7 @@ from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any from tensorflow.keras import Sequential, layers @@ -16,7 +16,7 @@ __all__ = ["textnet_tiny", "textnet_small", "textnet_base"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "textnet_tiny": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -47,19 +47,19 @@ class TextNet(Sequential): Implementation based on the official Pytorch implementation: `_. Args: - stages (List[Dict[str, List[int]]]): List of dictionaries containing the parameters of each stage. + stages (list[dict[str, list[int]]]): list of dictionaries containing the parameters of each stage. include_top (bool, optional): Whether to include the classifier head. Defaults to True. num_classes (int, optional): Number of output classes. Defaults to 1000. - cfg (Optional[Dict[str, Any]], optional): Additional configuration. Defaults to None. + cfg (dict[str, Any], optional): Additional configuration. Defaults to None. """ def __init__( self, - stages: List[Dict[str, List[int]]], - input_shape: Tuple[int, int, int] = (32, 32, 3), + stages: list[dict[str, list[int]]], + input_shape: tuple[int, int, int] = (32, 32, 3), num_classes: int = 1000, include_top: bool = True, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: _layers = [ *conv_sequence( diff --git a/doctr/models/classification/vgg/pytorch.py b/doctr/models/classification/vgg/pytorch.py index 2bea77ef14..d2b224d08f 100644 --- a/doctr/models/classification/vgg/pytorch.py +++ b/doctr/models/classification/vgg/pytorch.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, List, Optional +from typing import Any from torch import nn from torchvision.models import vgg as tv_vgg @@ -16,7 +16,7 @@ __all__ = ["vgg16_bn_r"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "vgg16_bn_r": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -32,7 +32,7 @@ def _vgg( pretrained: bool, tv_arch: str, num_rect_pools: int = 3, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> tv_vgg.VGG: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) @@ -45,7 +45,7 @@ def _vgg( # Build the model model = tv_vgg.__dict__[tv_arch](**kwargs, weights=None) - # List the MaxPool2d + # list the MaxPool2d pool_idcs = [idx for idx, m in enumerate(model.features) if isinstance(m, nn.MaxPool2d)] # Replace their kernel with rectangular ones for idx in pool_idcs[-num_rect_pools:]: diff --git a/doctr/models/classification/vgg/tensorflow.py b/doctr/models/classification/vgg/tensorflow.py index 74c991fa4b..4025257b4d 100644 --- a/doctr/models/classification/vgg/tensorflow.py +++ b/doctr/models/classification/vgg/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any from tensorflow.keras import layers from tensorflow.keras.models import Sequential @@ -16,7 +16,7 @@ __all__ = ["VGG", "vgg16_bn_r"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "vgg16_bn_r": { "mean": (0.5, 0.5, 0.5), "std": (1.0, 1.0, 1.0), @@ -42,13 +42,13 @@ class VGG(Sequential): def __init__( self, - num_blocks: List[int], - planes: List[int], - rect_pools: List[bool], + num_blocks: list[int], + planes: list[int], + rect_pools: list[bool], include_top: bool = False, num_classes: int = 1000, - input_shape: Optional[Tuple[int, int, int]] = None, - cfg: Optional[Dict[str, Any]] = None, + input_shape: tuple[int, int, int] | None = None, + cfg: dict[str, Any] | None = None, ) -> None: _layers = [] # Specify input_shape only for the first layer @@ -66,7 +66,7 @@ def __init__( def _vgg( - arch: str, pretrained: bool, num_blocks: List[int], planes: List[int], rect_pools: List[bool], **kwargs: Any + arch: str, pretrained: bool, num_blocks: list[int], planes: list[int], rect_pools: list[bool], **kwargs: Any ) -> VGG: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"]) diff --git a/doctr/models/classification/vit/pytorch.py b/doctr/models/classification/vit/pytorch.py index f63eef8d13..55581447ca 100644 --- a/doctr/models/classification/vit/pytorch.py +++ b/doctr/models/classification/vit/pytorch.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import torch from torch import nn @@ -18,7 +18,7 @@ __all__ = ["vit_s", "vit_b"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "vit_s": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -81,14 +81,14 @@ def __init__( num_layers: int, num_heads: int, ffd_ratio: int, - patch_size: Tuple[int, int] = (4, 4), - input_shape: Tuple[int, int, int] = (3, 32, 32), + patch_size: tuple[int, int] = (4, 4), + input_shape: tuple[int, int, int] = (3, 32, 32), dropout: float = 0.0, num_classes: int = 1000, include_top: bool = True, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: - _layers: List[nn.Module] = [ + _layers: list[nn.Module] = [ PatchEmbedding(input_shape, d_model, patch_size), EncoderBlock(num_layers, num_heads, d_model, d_model * ffd_ratio, dropout, nn.GELU()), ] @@ -102,7 +102,7 @@ def __init__( def _vit( arch: str, pretrained: bool, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> VisionTransformer: kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) diff --git a/doctr/models/classification/vit/tensorflow.py b/doctr/models/classification/vit/tensorflow.py index 572877bc7e..51f0a969e3 100644 --- a/doctr/models/classification/vit/tensorflow.py +++ b/doctr/models/classification/vit/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, Optional, Tuple +from typing import Any import tensorflow as tf from tensorflow.keras import Sequential, layers @@ -19,7 +19,7 @@ __all__ = ["vit_s", "vit_b"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "vit_s": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -77,12 +77,12 @@ def __init__( num_layers: int, num_heads: int, ffd_ratio: int, - patch_size: Tuple[int, int] = (4, 4), - input_shape: Tuple[int, int, int] = (32, 32, 3), + patch_size: tuple[int, int] = (4, 4), + input_shape: tuple[int, int, int] = (32, 32, 3), dropout: float = 0.0, num_classes: int = 1000, include_top: bool = True, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: _layers = [ PatchEmbedding(input_shape, d_model, patch_size), diff --git a/doctr/models/classification/zoo.py b/doctr/models/classification/zoo.py index 13050a9d9d..7cce3083ea 100644 --- a/doctr/models/classification/zoo.py +++ b/doctr/models/classification/zoo.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List +from typing import Any from doctr.file_utils import is_tf_available @@ -13,7 +13,7 @@ __all__ = ["crop_orientation_predictor", "page_orientation_predictor"] -ARCHS: List[str] = [ +ARCHS: list[str] = [ "magc_resnet31", "mobilenet_v3_small", "mobilenet_v3_small_r", @@ -31,7 +31,7 @@ "vit_s", "vit_b", ] -ORIENTATION_ARCHS: List[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"] +ORIENTATION_ARCHS: list[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"] def _orientation_predictor( diff --git a/doctr/models/core.py b/doctr/models/core.py index a05aee7aa9..d9f43652f9 100644 --- a/doctr/models/core.py +++ b/doctr/models/core.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. -from typing import Any, Dict, Optional +from typing import Any from doctr.utils.repr import NestedObject @@ -14,6 +14,6 @@ class BaseModel(NestedObject): """Implements abstract DetectionModel class""" - def __init__(self, cfg: Optional[Dict[str, Any]] = None) -> None: + def __init__(self, cfg: dict[str, Any] | None = None) -> None: super().__init__() self.cfg = cfg diff --git a/doctr/models/detection/_utils/base.py b/doctr/models/detection/_utils/base.py index 86f5caebaf..aabfe291f9 100644 --- a/doctr/models/detection/_utils/base.py +++ b/doctr/models/detection/_utils/base.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Dict, List import numpy as np @@ -11,12 +10,12 @@ def _remove_padding( - pages: List[np.ndarray], - loc_preds: List[Dict[str, np.ndarray]], + pages: list[np.ndarray], + loc_preds: list[dict[str, np.ndarray]], preserve_aspect_ratio: bool, symmetric_pad: bool, assume_straight_pages: bool, -) -> List[Dict[str, np.ndarray]]: +) -> list[dict[str, np.ndarray]]: """Remove padding from the localization predictions Args: diff --git a/doctr/models/detection/core.py b/doctr/models/detection/core.py index 2c3189f57f..4d6009c66b 100644 --- a/doctr/models/detection/core.py +++ b/doctr/models/detection/core.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List import cv2 import numpy as np @@ -68,7 +67,7 @@ def bitmap_to_boxes( def __call__( self, proba_map, - ) -> List[List[np.ndarray]]: + ) -> list[list[np.ndarray]]: """Performs postprocessing for a list of model outputs Args: diff --git a/doctr/models/detection/differentiable_binarization/base.py b/doctr/models/detection/differentiable_binarization/base.py index 08e6339967..fedeb58d3a 100644 --- a/doctr/models/detection/differentiable_binarization/base.py +++ b/doctr/models/detection/differentiable_binarization/base.py @@ -5,7 +5,6 @@ # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization -from typing import Dict, List, Tuple, Union import cv2 import numpy as np @@ -104,7 +103,7 @@ def bitmap_to_boxes( """ height, width = bitmap.shape[:2] min_size_box = 2 - boxes: List[Union[np.ndarray, List[float]]] = [] + boxes: list[np.ndarray | list[float]] = [] # get contours from connected components on the bitmap contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: @@ -206,7 +205,7 @@ def draw_thresh_map( polygon: np.ndarray, canvas: np.ndarray, mask: np.ndarray, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Draw a polygon treshold map on a canvas, as described in the DB paper Args: @@ -269,10 +268,10 @@ def draw_thresh_map( def build_target( self, - target: List[Dict[str, np.ndarray]], - output_shape: Tuple[int, int, int], + target: list[dict[str, np.ndarray]], + output_shape: tuple[int, int, int], channels_last: bool = True, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: if any(t.dtype != np.float32 for tgt in target for t in tgt.values()): raise AssertionError("the expected dtype of target 'boxes' entry is 'np.float32'.") if any(np.any((t[:, :4] > 1) | (t[:, :4] < 0)) for tgt in target for t in tgt.values()): diff --git a/doctr/models/detection/differentiable_binarization/pytorch.py b/doctr/models/detection/differentiable_binarization/pytorch.py index 5625dbec62..ddd8cfa595 100644 --- a/doctr/models/detection/differentiable_binarization/pytorch.py +++ b/doctr/models/detection/differentiable_binarization/pytorch.py @@ -3,7 +3,8 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Callable, Dict, List, Optional +from collections.abc import Callable +from typing import Any import numpy as np import torch @@ -22,7 +23,7 @@ __all__ = ["DBNet", "db_resnet50", "db_resnet34", "db_mobilenet_v3_large"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "db_resnet50": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), @@ -47,7 +48,7 @@ class FeaturePyramidNetwork(nn.Module): def __init__( self, - in_channels: List[int], + in_channels: list[int], out_channels: int, deform_conv: bool = False, ) -> None: @@ -76,12 +77,12 @@ def __init__( for idx, chans in enumerate(in_channels) ]) - def forward(self, x: List[torch.Tensor]) -> torch.Tensor: + def forward(self, x: list[torch.Tensor]) -> torch.Tensor: if len(x) != len(self.out_branches): raise AssertionError # Conv1x1 to get the same number of channels - _x: List[torch.Tensor] = [branch(t) for branch, t in zip(self.in_branches, x)] - out: List[torch.Tensor] = [_x[-1]] + _x: list[torch.Tensor] = [branch(t) for branch, t in zip(self.in_branches, x)] + out: list[torch.Tensor] = [_x[-1]] for t in _x[:-1][::-1]: out.append(self.upsample(out[-1]) + t) @@ -116,8 +117,8 @@ def __init__( box_thresh: float = 0.1, assume_straight_pages: bool = True, exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, - class_names: List[str] = [CLASS_NAME], + cfg: dict[str, Any] | None = None, + class_names: list[str] = [CLASS_NAME], ) -> None: super().__init__() self.class_names = class_names @@ -181,10 +182,10 @@ def __init__( def forward( self, x: torch.Tensor, - target: Optional[List[np.ndarray]] = None, + target: list[np.ndarray] | None = None, return_model_output: bool = False, return_preds: bool = False, - ) -> Dict[str, torch.Tensor]: + ) -> dict[str, torch.Tensor]: # Extract feature maps at different stages feats = self.feat_extractor(x) feats = [feats[str(idx)] for idx in range(len(feats))] @@ -192,7 +193,7 @@ def forward( feat_concat = self.fpn(feats) logits = self.prob_head(feat_concat) - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.exportable: out["logits"] = logits return out @@ -221,7 +222,7 @@ def compute_loss( self, out_map: torch.Tensor, thresh_map: torch.Tensor, - target: List[np.ndarray], + target: list[np.ndarray], gamma: float = 2.0, alpha: float = 0.5, eps: float = 1e-8, @@ -287,10 +288,10 @@ def _dbnet( arch: str, pretrained: bool, backbone_fn: Callable[[bool], nn.Module], - fpn_layers: List[str], - backbone_submodule: Optional[str] = None, + fpn_layers: list[str], + backbone_submodule: str | None = None, pretrained_backbone: bool = True, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> DBNet: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/detection/differentiable_binarization/tensorflow.py b/doctr/models/detection/differentiable_binarization/tensorflow.py index dc94977fb8..9658039437 100644 --- a/doctr/models/detection/differentiable_binarization/tensorflow.py +++ b/doctr/models/detection/differentiable_binarization/tensorflow.py @@ -6,7 +6,7 @@ # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import numpy as np import tensorflow as tf @@ -29,7 +29,7 @@ __all__ = ["DBNet", "db_resnet50", "db_mobilenet_v3_large"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "db_resnet50": { "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), @@ -92,7 +92,7 @@ def extra_repr(self) -> str: def call( self, - x: List[tf.Tensor], + x: list[tf.Tensor], **kwargs: Any, ) -> tf.Tensor: # Channel mapping @@ -121,7 +121,7 @@ class DBNet(_DBNet, Model, NestedObject): class_names: list of class names """ - _children_names: List[str] = ["feat_extractor", "fpn", "probability_head", "threshold_head", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "fpn", "probability_head", "threshold_head", "postprocessor"] def __init__( self, @@ -131,8 +131,8 @@ def __init__( box_thresh: float = 0.1, assume_straight_pages: bool = True, exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, - class_names: List[str] = [CLASS_NAME], + cfg: dict[str, Any] | None = None, + class_names: list[str] = [CLASS_NAME], ) -> None: super().__init__() self.class_names = class_names @@ -171,7 +171,7 @@ def compute_loss( self, out_map: tf.Tensor, thresh_map: tf.Tensor, - target: List[Dict[str, np.ndarray]], + target: list[dict[str, np.ndarray]], gamma: float = 2.0, alpha: float = 0.5, eps: float = 1e-8, @@ -240,16 +240,16 @@ def compute_loss( def call( self, x: tf.Tensor, - target: Optional[List[Dict[str, np.ndarray]]] = None, + target: list[dict[str, np.ndarray]] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: feat_maps = self.feat_extractor(x, **kwargs) feat_concat = self.fpn(feat_maps, **kwargs) logits = self.probability_head(feat_concat, **kwargs) - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if self.exportable: out["logits"] = logits return out @@ -276,9 +276,9 @@ def _db_resnet( arch: str, pretrained: bool, backbone_fn, - fpn_layers: List[str], + fpn_layers: list[str], pretrained_backbone: bool = True, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> DBNet: pretrained_backbone = pretrained_backbone and not pretrained @@ -322,9 +322,9 @@ def _db_mobilenet( arch: str, pretrained: bool, backbone_fn, - fpn_layers: List[str], + fpn_layers: list[str], pretrained_backbone: bool = True, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> DBNet: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/detection/fast/base.py b/doctr/models/detection/fast/base.py index 85970fc5be..6691c948da 100644 --- a/doctr/models/detection/fast/base.py +++ b/doctr/models/detection/fast/base.py @@ -5,7 +5,6 @@ # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization -from typing import Dict, List, Tuple, Union import cv2 import numpy as np @@ -101,7 +100,7 @@ def bitmap_to_boxes( containing x, y, w, h, alpha, score for the box """ height, width = bitmap.shape[:2] - boxes: List[Union[np.ndarray, List[float]]] = [] + boxes: list[np.ndarray | list[float]] = [] # get contours from connected components on the bitmap contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: @@ -153,10 +152,10 @@ class _FAST(BaseModel): def build_target( self, - target: List[Dict[str, np.ndarray]], - output_shape: Tuple[int, int, int], + target: list[dict[str, np.ndarray]], + output_shape: tuple[int, int, int], channels_last: bool = True, - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Build the target, and it's mask to be used from loss computation. Args: diff --git a/doctr/models/detection/fast/pytorch.py b/doctr/models/detection/fast/pytorch.py index c7fd98b098..91218dba11 100644 --- a/doctr/models/detection/fast/pytorch.py +++ b/doctr/models/detection/fast/pytorch.py @@ -3,7 +3,8 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Callable, Dict, List, Optional, Union +from collections.abc import Callable +from typing import Any import numpy as np import torch @@ -21,7 +22,7 @@ __all__ = ["FAST", "fast_tiny", "fast_small", "fast_base", "reparameterize"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "fast_tiny": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), @@ -89,7 +90,7 @@ def __init__( out_channels: int = 128, dropout: float = 0.1, ) -> None: - _layers: List[nn.Module] = [ + _layers: list[nn.Module] = [ FASTConvLayer(in_channels, out_channels, kernel_size=3), nn.Dropout(dropout), nn.Conv2d(out_channels, num_classes, kernel_size=1, bias=False), @@ -122,8 +123,8 @@ def __init__( pooling_size: int = 4, # different from paper performs better on close text-rich images assume_straight_pages: bool = True, exportable: bool = False, - cfg: Optional[Dict[str, Any]] = {}, - class_names: List[str] = [CLASS_NAME], + cfg: dict[str, Any] = {}, + class_names: list[str] = [CLASS_NAME], ) -> None: super().__init__() self.class_names = class_names @@ -172,10 +173,10 @@ def __init__( def forward( self, x: torch.Tensor, - target: Optional[List[np.ndarray]] = None, + target: list[np.ndarray] | None = None, return_model_output: bool = False, return_preds: bool = False, - ) -> Dict[str, torch.Tensor]: + ) -> dict[str, torch.Tensor]: # Extract feature maps at different stages feats = self.feat_extractor(x) feats = [feats[str(idx)] for idx in range(len(feats))] @@ -183,7 +184,7 @@ def forward( feat_concat = self.neck(feats) logits = F.interpolate(self.prob_head(feat_concat), size=x.shape[-2:], mode="bilinear") - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.exportable: out["logits"] = logits return out @@ -210,7 +211,7 @@ def forward( def compute_loss( self, out_map: torch.Tensor, - target: List[np.ndarray], + target: list[np.ndarray], eps: float = 1e-6, ) -> torch.Tensor: """Compute fast loss, 2 x Dice loss where the text kernel loss is scaled by 0.5. @@ -274,7 +275,7 @@ def ohem_sample(score: torch.Tensor, gt: torch.Tensor, mask: torch.Tensor) -> to return text_loss + kernel_loss -def reparameterize(model: Union[FAST, nn.Module]) -> FAST: +def reparameterize(model: FAST | nn.Module) -> FAST: """Fuse batchnorm and conv layers and reparameterize the model Args: @@ -317,9 +318,9 @@ def _fast( arch: str, pretrained: bool, backbone_fn: Callable[[bool], nn.Module], - feat_layers: List[str], + feat_layers: list[str], pretrained_backbone: bool = True, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> FAST: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/detection/fast/tensorflow.py b/doctr/models/detection/fast/tensorflow.py index 231f0d8dd3..0e6f8d9a23 100644 --- a/doctr/models/detection/fast/tensorflow.py +++ b/doctr/models/detection/fast/tensorflow.py @@ -6,7 +6,7 @@ # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any import numpy as np import tensorflow as tf @@ -23,7 +23,7 @@ __all__ = ["FAST", "fast_tiny", "fast_small", "fast_base", "reparameterize"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "fast_tiny": { "input_shape": (1024, 1024, 3), "mean": (0.798, 0.785, 0.772), @@ -113,7 +113,7 @@ class FAST(_FAST, Model, NestedObject): class_names: list of class names """ - _children_names: List[str] = ["feat_extractor", "neck", "head", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "neck", "head", "postprocessor"] def __init__( self, @@ -124,8 +124,8 @@ def __init__( pooling_size: int = 4, # different from paper performs better on close text-rich images assume_straight_pages: bool = True, exportable: bool = False, - cfg: Optional[Dict[str, Any]] = {}, - class_names: List[str] = [CLASS_NAME], + cfg: dict[str, Any] = {}, + class_names: list[str] = [CLASS_NAME], ) -> None: super().__init__() self.class_names = class_names @@ -156,7 +156,7 @@ def __init__( def compute_loss( self, out_map: tf.Tensor, - target: List[Dict[str, np.ndarray]], + target: list[dict[str, np.ndarray]], eps: float = 1e-6, ) -> tf.Tensor: """Compute fast loss, 2 x Dice loss where the text kernel loss is scaled by 0.5. @@ -217,18 +217,18 @@ def ohem(score: tf.Tensor, gt: tf.Tensor, mask: tf.Tensor) -> tf.Tensor: def call( self, x: tf.Tensor, - target: Optional[List[Dict[str, np.ndarray]]] = None, + target: list[dict[str, np.ndarray]] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: feat_maps = self.feat_extractor(x, **kwargs) # Pass through the Neck & Head & Upsample feat_concat = self.neck(feat_maps, **kwargs) logits: tf.Tensor = self.head(feat_concat, **kwargs) logits = layers.UpSampling2D(size=x.shape[-2] // logits.shape[-2], interpolation="bilinear")(logits, **kwargs) - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if self.exportable: out["logits"] = logits return out @@ -250,7 +250,7 @@ def call( return out -def reparameterize(model: Union[FAST, layers.Layer]) -> FAST: +def reparameterize(model: FAST | layers.Layer) -> FAST: """Fuse batchnorm and conv layers and reparameterize the model args: @@ -300,9 +300,9 @@ def _fast( arch: str, pretrained: bool, backbone_fn, - feat_layers: List[str], + feat_layers: list[str], pretrained_backbone: bool = True, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> FAST: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/detection/linknet/base.py b/doctr/models/detection/linknet/base.py index 782d688f1b..d86a57fbac 100644 --- a/doctr/models/detection/linknet/base.py +++ b/doctr/models/detection/linknet/base.py @@ -5,7 +5,6 @@ # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization -from typing import Dict, List, Tuple, Union import cv2 import numpy as np @@ -101,7 +100,7 @@ def bitmap_to_boxes( containing x, y, w, h, alpha, score for the box """ height, width = bitmap.shape[:2] - boxes: List[Union[np.ndarray, List[float]]] = [] + boxes: list[np.ndarray | list[float]] = [] # get contours from connected components on the bitmap contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: @@ -156,10 +155,10 @@ class _LinkNet(BaseModel): def build_target( self, - target: List[Dict[str, np.ndarray]], - output_shape: Tuple[int, int, int], + target: list[dict[str, np.ndarray]], + output_shape: tuple[int, int, int], channels_last: bool = True, - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray]: """Build the target, and it's mask to be used from loss computation. Args: diff --git a/doctr/models/detection/linknet/pytorch.py b/doctr/models/detection/linknet/pytorch.py index 321c894d54..88cb24204b 100644 --- a/doctr/models/detection/linknet/pytorch.py +++ b/doctr/models/detection/linknet/pytorch.py @@ -3,7 +3,8 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Callable, Dict, List, Optional, Tuple +from collections.abc import Callable +from typing import Any import numpy as np import torch @@ -20,7 +21,7 @@ __all__ = ["LinkNet", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "linknet_resnet18": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), @@ -43,7 +44,7 @@ class LinkNetFPN(nn.Module): - def __init__(self, layer_shapes: List[Tuple[int, int, int]]) -> None: + def __init__(self, layer_shapes: list[tuple[int, int, int]]) -> None: super().__init__() strides = [ 1 if (in_shape[-1] == out_shape[-1]) else 2 @@ -74,7 +75,7 @@ def decoder_block(in_chan: int, out_chan: int, stride: int) -> nn.Sequential: nn.ReLU(inplace=True), ) - def forward(self, feats: List[torch.Tensor]) -> torch.Tensor: + def forward(self, feats: list[torch.Tensor]) -> torch.Tensor: out = feats[-1] for decoder, fmap in zip(self.decoders[::-1], feats[:-1][::-1]): out = decoder(out) + fmap @@ -107,8 +108,8 @@ def __init__( head_chans: int = 32, assume_straight_pages: bool = True, exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, - class_names: List[str] = [CLASS_NAME], + cfg: dict[str, Any] | None = None, + class_names: list[str] = [CLASS_NAME], ) -> None: super().__init__() self.class_names = class_names @@ -162,16 +163,16 @@ def __init__( def forward( self, x: torch.Tensor, - target: Optional[List[np.ndarray]] = None, + target: list[np.ndarray] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: feats = self.feat_extractor(x) logits = self.fpn([feats[str(idx)] for idx in range(len(feats))]) logits = self.classifier(logits) - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.exportable: out["logits"] = logits return out @@ -197,7 +198,7 @@ def forward( def compute_loss( self, out_map: torch.Tensor, - target: List[np.ndarray], + target: list[np.ndarray], gamma: float = 2.0, alpha: float = 0.5, eps: float = 1e-8, @@ -249,9 +250,9 @@ def _linknet( arch: str, pretrained: bool, backbone_fn: Callable[[bool], nn.Module], - fpn_layers: List[str], + fpn_layers: list[str], pretrained_backbone: bool = True, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> LinkNet: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/detection/linknet/tensorflow.py b/doctr/models/detection/linknet/tensorflow.py index 502531b430..1ef8d1fd17 100644 --- a/doctr/models/detection/linknet/tensorflow.py +++ b/doctr/models/detection/linknet/tensorflow.py @@ -6,7 +6,7 @@ # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import numpy as np import tensorflow as tf @@ -27,7 +27,7 @@ __all__ = ["LinkNet", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "linknet_resnet18": { "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), @@ -73,7 +73,7 @@ class LinkNetFPN(Model, NestedObject): def __init__( self, out_chans: int, - in_shapes: List[Tuple[int, ...]], + in_shapes: list[tuple[int, ...]], ) -> None: super().__init__() self.out_chans = out_chans @@ -85,7 +85,7 @@ def __init__( for in_chan, out_chan, s, in_shape in zip(i_chans, o_chans, strides, in_shapes[::-1]) ] - def call(self, x: List[tf.Tensor], **kwargs: Any) -> tf.Tensor: + def call(self, x: list[tf.Tensor], **kwargs: Any) -> tf.Tensor: out = 0 for decoder, fmap in zip(self.decoders, x[::-1]): out = decoder(out + fmap, **kwargs) @@ -110,7 +110,7 @@ class LinkNet(_LinkNet, Model): class_names: list of class names """ - _children_names: List[str] = ["feat_extractor", "fpn", "classifier", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "fpn", "classifier", "postprocessor"] def __init__( self, @@ -120,8 +120,8 @@ def __init__( box_thresh: float = 0.1, assume_straight_pages: bool = True, exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, - class_names: List[str] = [CLASS_NAME], + cfg: dict[str, Any] | None = None, + class_names: list[str] = [CLASS_NAME], ) -> None: super().__init__(cfg=cfg) @@ -166,7 +166,7 @@ def __init__( def compute_loss( self, out_map: tf.Tensor, - target: List[Dict[str, np.ndarray]], + target: list[dict[str, np.ndarray]], gamma: float = 2.0, alpha: float = 0.5, eps: float = 1e-8, @@ -215,16 +215,16 @@ def compute_loss( def call( self, x: tf.Tensor, - target: Optional[List[Dict[str, np.ndarray]]] = None, + target: list[dict[str, np.ndarray]] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: feat_maps = self.feat_extractor(x, **kwargs) logits = self.fpn(feat_maps, **kwargs) logits = self.classifier(logits, **kwargs) - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if self.exportable: out["logits"] = logits return out @@ -250,9 +250,9 @@ def _linknet( arch: str, pretrained: bool, backbone_fn, - fpn_layers: List[str], + fpn_layers: list[str], pretrained_backbone: bool = True, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> LinkNet: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/detection/predictor/pytorch.py b/doctr/models/detection/predictor/pytorch.py index 80467d3d97..8628e1fdb5 100644 --- a/doctr/models/detection/predictor/pytorch.py +++ b/doctr/models/detection/predictor/pytorch.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np import torch @@ -36,10 +36,10 @@ def __init__( @torch.inference_mode() def forward( self, - pages: List[Union[np.ndarray, torch.Tensor]], + pages: list[np.ndarray | torch.Tensor], return_maps: bool = False, **kwargs: Any, - ) -> Union[List[Dict[str, np.ndarray]], Tuple[List[Dict[str, np.ndarray]], List[np.ndarray]]]: + ) -> list[dict[str, np.ndarray]] | tuple[list[dict[str, np.ndarray]], list[np.ndarray]]: # Extract parameters from the preprocessor preserve_aspect_ratio = self.pre_processor.resize.preserve_aspect_ratio symmetric_pad = self.pre_processor.resize.symmetric_pad diff --git a/doctr/models/detection/predictor/tensorflow.py b/doctr/models/detection/predictor/tensorflow.py index 5263a560d3..137ce6b9c1 100644 --- a/doctr/models/detection/predictor/tensorflow.py +++ b/doctr/models/detection/predictor/tensorflow.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Tuple, Union +from typing import Any import numpy as np import tensorflow as tf @@ -24,7 +24,7 @@ class DetectionPredictor(NestedObject): model: core detection architecture """ - _children_names: List[str] = ["pre_processor", "model"] + _children_names: list[str] = ["pre_processor", "model"] def __init__( self, @@ -36,10 +36,10 @@ def __init__( def __call__( self, - pages: List[Union[np.ndarray, tf.Tensor]], + pages: list[np.ndarray | tf.Tensor], return_maps: bool = False, **kwargs: Any, - ) -> Union[List[Dict[str, np.ndarray]], Tuple[List[Dict[str, np.ndarray]], List[np.ndarray]]]: + ) -> list[dict[str, np.ndarray]] | tuple[list[dict[str, np.ndarray]], list[np.ndarray]]: # Extract parameters from the preprocessor preserve_aspect_ratio = self.pre_processor.resize.preserve_aspect_ratio symmetric_pad = self.pre_processor.resize.symmetric_pad diff --git a/doctr/models/detection/zoo.py b/doctr/models/detection/zoo.py index 8c54503a41..da33b846ff 100644 --- a/doctr/models/detection/zoo.py +++ b/doctr/models/detection/zoo.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List +from typing import Any from doctr.file_utils import is_tf_available, is_torch_available @@ -14,7 +14,7 @@ __all__ = ["detection_predictor"] -ARCHS: List[str] +ARCHS: list[str] if is_tf_available(): diff --git a/doctr/models/kie_predictor/base.py b/doctr/models/kie_predictor/base.py index c8ade54579..e85d2f5edd 100644 --- a/doctr/models/kie_predictor/base.py +++ b/doctr/models/kie_predictor/base.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Optional +from typing import Any from doctr.models.builder import KIEDocumentBuilder @@ -29,8 +29,8 @@ class _KIEPredictor(_OCRPredictor): kwargs: keyword args of `DocumentBuilder` """ - crop_orientation_predictor: Optional[OrientationPredictor] - page_orientation_predictor: Optional[OrientationPredictor] + crop_orientation_predictor: OrientationPredictor | None + page_orientation_predictor: OrientationPredictor | None def __init__( self, diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py index de459c0ff2..edb2e16695 100644 --- a/doctr/models/kie_predictor/pytorch.py +++ b/doctr/models/kie_predictor/pytorch.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Union +from typing import Any import numpy as np import torch @@ -68,7 +68,7 @@ def __init__( @torch.inference_mode() def forward( self, - pages: List[Union[np.ndarray, torch.Tensor]], + pages: list[np.ndarray | torch.Tensor], **kwargs: Any, ) -> Document: # Dimension check @@ -104,7 +104,7 @@ def forward( # Forward again to get predictions on straight pages loc_preds = self.det_predictor(pages, **kwargs) - dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds) # type: ignore[assignment] + dict_loc_preds: dict[str, list[np.ndarray]] = invert_data_structure(loc_preds) # type: ignore[assignment] # Detach objectness scores from loc_preds objectness_scores = {} @@ -149,18 +149,18 @@ def forward( if not crop_orientations: crop_orientations = {k: [{"value": 0, "confidence": None} for _ in word_preds[k]] for k in word_preds} - boxes: Dict = {} - text_preds: Dict = {} - word_crop_orientations: Dict = {} + boxes: dict = {} + text_preds: dict = {} + word_crop_orientations: dict = {} for class_name in dict_loc_preds.keys(): boxes[class_name], text_preds[class_name], word_crop_orientations[class_name] = self._process_predictions( dict_loc_preds[class_name], word_preds[class_name], crop_orientations[class_name] ) - boxes_per_page: List[Dict] = invert_data_structure(boxes) # type: ignore[assignment] - objectness_scores_per_page: List[Dict] = invert_data_structure(objectness_scores) # type: ignore[assignment] - text_preds_per_page: List[Dict] = invert_data_structure(text_preds) # type: ignore[assignment] - crop_orientations_per_page: List[Dict] = invert_data_structure(word_crop_orientations) # type: ignore[assignment] + boxes_per_page: list[dict] = invert_data_structure(boxes) # type: ignore[assignment] + objectness_scores_per_page: list[dict] = invert_data_structure(objectness_scores) # type: ignore[assignment] + text_preds_per_page: list[dict] = invert_data_structure(text_preds) # type: ignore[assignment] + crop_orientations_per_page: list[dict] = invert_data_structure(word_crop_orientations) # type: ignore[assignment] if self.detect_language: languages = [get_language(self.get_text(text_pred)) for text_pred in text_preds_per_page] @@ -181,7 +181,7 @@ def forward( return out @staticmethod - def get_text(text_pred: Dict) -> str: + def get_text(text_pred: dict) -> str: text = [] for value in text_pred.values(): text += [item[0] for item in value] diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py index 3f0d58bbfc..4fba9923d2 100644 --- a/doctr/models/kie_predictor/tensorflow.py +++ b/doctr/models/kie_predictor/tensorflow.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Dict, List, Union +from typing import Any import numpy as np import tensorflow as tf @@ -68,7 +68,7 @@ def __init__( def __call__( self, - pages: List[Union[np.ndarray, tf.Tensor]], + pages: list[np.ndarray | tf.Tensor], **kwargs: Any, ) -> Document: # Dimension check @@ -104,7 +104,7 @@ def __call__( # Forward again to get predictions on straight pages loc_preds = self.det_predictor(pages, **kwargs) - dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds) # type: ignore + dict_loc_preds: dict[str, list[np.ndarray]] = invert_data_structure(loc_preds) # type: ignore # Detach objectness scores from loc_preds objectness_scores = {} @@ -147,18 +147,18 @@ def __call__( if not crop_orientations: crop_orientations = {k: [{"value": 0, "confidence": None} for _ in word_preds[k]] for k in word_preds} - boxes: Dict = {} - text_preds: Dict = {} - word_crop_orientations: Dict = {} + boxes: dict = {} + text_preds: dict = {} + word_crop_orientations: dict = {} for class_name in dict_loc_preds.keys(): boxes[class_name], text_preds[class_name], word_crop_orientations[class_name] = self._process_predictions( dict_loc_preds[class_name], word_preds[class_name], crop_orientations[class_name] ) - boxes_per_page: List[Dict] = invert_data_structure(boxes) # type: ignore[assignment] - objectness_scores_per_page: List[Dict] = invert_data_structure(objectness_scores) # type: ignore[assignment] - text_preds_per_page: List[Dict] = invert_data_structure(text_preds) # type: ignore[assignment] - crop_orientations_per_page: List[Dict] = invert_data_structure(word_crop_orientations) # type: ignore[assignment] + boxes_per_page: list[dict] = invert_data_structure(boxes) # type: ignore[assignment] + objectness_scores_per_page: list[dict] = invert_data_structure(objectness_scores) # type: ignore[assignment] + text_preds_per_page: list[dict] = invert_data_structure(text_preds) # type: ignore[assignment] + crop_orientations_per_page: list[dict] = invert_data_structure(word_crop_orientations) # type: ignore[assignment] if self.detect_language: languages = [get_language(self.get_text(text_pred)) for text_pred in text_preds_per_page] @@ -179,7 +179,7 @@ def __call__( return out @staticmethod - def get_text(text_pred: Dict) -> str: + def get_text(text_pred: dict) -> str: text = [] for value in text_pred.values(): text += [item[0] for item in value] diff --git a/doctr/models/modules/layers/pytorch.py b/doctr/models/modules/layers/pytorch.py index e20f15f104..da886e95fd 100644 --- a/doctr/models/modules/layers/pytorch.py +++ b/doctr/models/modules/layers/pytorch.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Tuple, Union import numpy as np import torch @@ -19,7 +18,7 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[int, Tuple[int, int]], + kernel_size: int | tuple[int, int], stride: int = 1, dilation: int = 1, groups: int = 1, @@ -93,9 +92,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # The following logic is used to reparametrize the layer # Borrowed from: https://github.com/czczup/FAST/blob/main/models/utils/nas_utils.py - def _identity_to_conv( - self, identity: Union[nn.BatchNorm2d, None] - ) -> Union[Tuple[torch.Tensor, torch.Tensor], Tuple[int, int]]: + def _identity_to_conv(self, identity: nn.BatchNorm2d | None) -> tuple[torch.Tensor, torch.Tensor] | tuple[int, int]: if identity is None or identity.running_var is None: return 0, 0 if not hasattr(self, "id_tensor"): @@ -110,14 +107,14 @@ def _identity_to_conv( t = (identity.weight / std).reshape(-1, 1, 1, 1) return kernel * t, identity.bias - identity.running_mean * identity.weight / std - def _fuse_bn_tensor(self, conv: nn.Conv2d, bn: nn.BatchNorm2d) -> Tuple[torch.Tensor, torch.Tensor]: + def _fuse_bn_tensor(self, conv: nn.Conv2d, bn: nn.BatchNorm2d) -> tuple[torch.Tensor, torch.Tensor]: kernel = conv.weight kernel = self._pad_to_mxn_tensor(kernel) std = (bn.running_var + bn.eps).sqrt() # type: ignore t = (bn.weight / std).reshape(-1, 1, 1, 1) return kernel * t, bn.bias - bn.running_mean * bn.weight / std - def _get_equivalent_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]: + def _get_equivalent_kernel_bias(self) -> tuple[torch.Tensor, torch.Tensor]: kernel_mxn, bias_mxn = self._fuse_bn_tensor(self.conv, self.bn) if self.ver_conv is not None: kernel_mx1, bias_mx1 = self._fuse_bn_tensor(self.ver_conv, self.ver_bn) # type: ignore[arg-type] diff --git a/doctr/models/modules/layers/tensorflow.py b/doctr/models/modules/layers/tensorflow.py index 68849fbf6e..30599a49b4 100644 --- a/doctr/models/modules/layers/tensorflow.py +++ b/doctr/models/modules/layers/tensorflow.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Tuple, Union +from typing import Any import numpy as np import tensorflow as tf @@ -21,7 +21,7 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: Union[int, Tuple[int, int]], + kernel_size: int | tuple[int, int], stride: int = 1, dilation: int = 1, groups: int = 1, @@ -103,9 +103,7 @@ def call(self, x: tf.Tensor, **kwargs: Any) -> tf.Tensor: # The following logic is used to reparametrize the layer # Adapted from: https://github.com/mindee/doctr/blob/main/doctr/models/modules/layers/pytorch.py - def _identity_to_conv( - self, identity: layers.BatchNormalization - ) -> Union[Tuple[tf.Tensor, tf.Tensor], Tuple[int, int]]: + def _identity_to_conv(self, identity: layers.BatchNormalization) -> tuple[tf.Tensor, tf.Tensor] | tuple[int, int]: if identity is None or not hasattr(identity, "moving_mean") or not hasattr(identity, "moving_variance"): return 0, 0 if not hasattr(self, "id_tensor"): @@ -120,7 +118,7 @@ def _identity_to_conv( t = tf.reshape(identity.gamma / std, (1, 1, 1, -1)) return kernel * t, identity.beta - identity.moving_mean * identity.gamma / std - def _fuse_bn_tensor(self, conv: layers.Conv2D, bn: layers.BatchNormalization) -> Tuple[tf.Tensor, tf.Tensor]: + def _fuse_bn_tensor(self, conv: layers.Conv2D, bn: layers.BatchNormalization) -> tuple[tf.Tensor, tf.Tensor]: kernel = conv.kernel kernel = self._pad_to_mxn_tensor(kernel) std = tf.sqrt(bn.moving_variance + bn.epsilon) diff --git a/doctr/models/modules/transformer/pytorch.py b/doctr/models/modules/transformer/pytorch.py index 312eba9a26..0303ac5882 100644 --- a/doctr/models/modules/transformer/pytorch.py +++ b/doctr/models/modules/transformer/pytorch.py @@ -6,7 +6,8 @@ # This module 'transformer.py' is inspired by https://github.com/wenwenyu/MASTER-pytorch and Decoder is borrowed import math -from typing import Any, Callable, Optional, Tuple +from collections.abc import Callable +from typing import Any import torch from torch import nn @@ -43,8 +44,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def scaled_dot_product_attention( - query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, mask: Optional[torch.Tensor] = None -) -> Tuple[torch.Tensor, torch.Tensor]: + query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, mask: torch.Tensor | None = None +) -> tuple[torch.Tensor, torch.Tensor]: """Scaled Dot-Product Attention""" scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1)) if mask is not None: @@ -128,7 +129,7 @@ def __init__( PositionwiseFeedForward(d_model, dff, dropout, activation_fct) for _ in range(self.num_layers) ]) - def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor: + def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor: output = x for i in range(self.num_layers): @@ -181,8 +182,8 @@ def forward( self, tgt: torch.Tensor, memory: torch.Tensor, - source_mask: Optional[torch.Tensor] = None, - target_mask: Optional[torch.Tensor] = None, + source_mask: torch.Tensor | None = None, + target_mask: torch.Tensor | None = None, ) -> torch.Tensor: tgt = self.embed(tgt) * math.sqrt(self.d_model) pos_enc_tgt = self.positional_encoding(tgt) diff --git a/doctr/models/modules/transformer/tensorflow.py b/doctr/models/modules/transformer/tensorflow.py index 79584356c1..83beceaafb 100644 --- a/doctr/models/modules/transformer/tensorflow.py +++ b/doctr/models/modules/transformer/tensorflow.py @@ -4,7 +4,8 @@ # See LICENSE or go to for full license details. import math -from typing import Any, Callable, Optional, Tuple +from collections.abc import Callable +from typing import Any import tensorflow as tf from tensorflow.keras import layers @@ -58,8 +59,8 @@ def call( @tf.function def scaled_dot_product_attention( - query: tf.Tensor, key: tf.Tensor, value: tf.Tensor, mask: Optional[tf.Tensor] = None -) -> Tuple[tf.Tensor, tf.Tensor]: + query: tf.Tensor, key: tf.Tensor, value: tf.Tensor, mask: tf.Tensor | None = None +) -> tuple[tf.Tensor, tf.Tensor]: """Scaled Dot-Product Attention""" scores = tf.matmul(query, tf.transpose(key, perm=[0, 1, 3, 2])) / math.sqrt(query.shape[-1]) if mask is not None: @@ -158,7 +159,7 @@ def __init__( PositionwiseFeedForward(d_model, dff, dropout, activation_fct) for _ in range(self.num_layers) ] - def call(self, x: tf.Tensor, mask: Optional[tf.Tensor] = None, **kwargs: Any) -> tf.Tensor: + def call(self, x: tf.Tensor, mask: tf.Tensor | None = None, **kwargs: Any) -> tf.Tensor: output = x for i in range(self.num_layers): @@ -208,8 +209,8 @@ def call( self, tgt: tf.Tensor, memory: tf.Tensor, - source_mask: Optional[tf.Tensor] = None, - target_mask: Optional[tf.Tensor] = None, + source_mask: tf.Tensor | None = None, + target_mask: tf.Tensor | None = None, **kwargs: Any, ) -> tf.Tensor: tgt = self.embed(tgt, **kwargs) * math.sqrt(self.d_model) diff --git a/doctr/models/modules/vision_transformer/pytorch.py b/doctr/models/modules/vision_transformer/pytorch.py index c13edf234b..d7a6fb7808 100644 --- a/doctr/models/modules/vision_transformer/pytorch.py +++ b/doctr/models/modules/vision_transformer/pytorch.py @@ -4,7 +4,6 @@ # See LICENSE or go to for full license details. import math -from typing import Tuple import torch from torch import nn @@ -15,7 +14,7 @@ class PatchEmbedding(nn.Module): """Compute 2D patch embeddings with cls token and positional encoding""" - def __init__(self, input_shape: Tuple[int, int, int], embed_dim: int, patch_size: Tuple[int, int]) -> None: + def __init__(self, input_shape: tuple[int, int, int], embed_dim: int, patch_size: tuple[int, int]) -> None: super().__init__() channels, height, width = input_shape self.patch_size = patch_size diff --git a/doctr/models/modules/vision_transformer/tensorflow.py b/doctr/models/modules/vision_transformer/tensorflow.py index 8386172eb1..9f4e5e2524 100644 --- a/doctr/models/modules/vision_transformer/tensorflow.py +++ b/doctr/models/modules/vision_transformer/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import math -from typing import Any, Tuple +from typing import Any import tensorflow as tf from tensorflow.keras import layers @@ -17,7 +17,7 @@ class PatchEmbedding(layers.Layer, NestedObject): """Compute 2D patch embeddings with cls token and positional encoding""" - def __init__(self, input_shape: Tuple[int, int, int], embed_dim: int, patch_size: Tuple[int, int]) -> None: + def __init__(self, input_shape: tuple[int, int, int], embed_dim: int, patch_size: tuple[int, int]) -> None: super().__init__() height, width, _ = input_shape self.patch_size = patch_size diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py index 3737ad3be0..c4863525f8 100644 --- a/doctr/models/predictor/base.py +++ b/doctr/models/predictor/base.py @@ -3,7 +3,8 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, Callable, Dict, List, Optional, Tuple +from collections.abc import Callable +from typing import Any import numpy as np @@ -33,8 +34,8 @@ class _OCRPredictor: **kwargs: keyword args of `DocumentBuilder` """ - crop_orientation_predictor: Optional[OrientationPredictor] - page_orientation_predictor: Optional[OrientationPredictor] + crop_orientation_predictor: OrientationPredictor | None + page_orientation_predictor: OrientationPredictor | None def __init__( self, @@ -62,12 +63,12 @@ def __init__( self.doc_builder = DocumentBuilder(**kwargs) self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad - self.hooks: List[Callable] = [] + self.hooks: list[Callable] = [] def _general_page_orientations( self, - pages: List[np.ndarray], - ) -> List[Tuple[int, float]]: + pages: list[np.ndarray], + ) -> list[tuple[int, float]]: _, classes, probs = zip(self.page_orientation_predictor(pages)) # type: ignore[misc] # Flatten to list of tuples with (value, confidence) page_orientations = [ @@ -78,8 +79,8 @@ def _general_page_orientations( return page_orientations def _get_orientations( - self, pages: List[np.ndarray], seg_maps: List[np.ndarray] - ) -> Tuple[List[Tuple[int, float]], List[int]]: + self, pages: list[np.ndarray], seg_maps: list[np.ndarray] + ) -> tuple[list[tuple[int, float]], list[int]]: general_pages_orientations = self._general_page_orientations(pages) origin_page_orientations = [ estimate_orientation(seq_map, general_orientation) @@ -89,11 +90,11 @@ def _get_orientations( def _straighten_pages( self, - pages: List[np.ndarray], - seg_maps: List[np.ndarray], - general_pages_orientations: Optional[List[Tuple[int, float]]] = None, - origin_pages_orientations: Optional[List[int]] = None, - ) -> List[np.ndarray]: + pages: list[np.ndarray], + seg_maps: list[np.ndarray], + general_pages_orientations: list[tuple[int, float]] | None = None, + origin_pages_orientations: list[int] | None = None, + ) -> list[np.ndarray]: general_pages_orientations = ( general_pages_orientations if general_pages_orientations else self._general_page_orientations(pages) ) @@ -113,12 +114,12 @@ def _straighten_pages( @staticmethod def _generate_crops( - pages: List[np.ndarray], - loc_preds: List[np.ndarray], + pages: list[np.ndarray], + loc_preds: list[np.ndarray], channels_last: bool, assume_straight_pages: bool = False, assume_horizontal: bool = False, - ) -> List[List[np.ndarray]]: + ) -> list[list[np.ndarray]]: if assume_straight_pages: crops = [ extract_crops(page, _boxes[:, :4], channels_last=channels_last) @@ -133,12 +134,12 @@ def _generate_crops( @staticmethod def _prepare_crops( - pages: List[np.ndarray], - loc_preds: List[np.ndarray], + pages: list[np.ndarray], + loc_preds: list[np.ndarray], channels_last: bool, assume_straight_pages: bool = False, assume_horizontal: bool = False, - ) -> Tuple[List[List[np.ndarray]], List[np.ndarray]]: + ) -> tuple[list[list[np.ndarray]], list[np.ndarray]]: crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal) # Avoid sending zero-sized crops @@ -153,9 +154,9 @@ def _prepare_crops( def _rectify_crops( self, - crops: List[List[np.ndarray]], - loc_preds: List[np.ndarray], - ) -> Tuple[List[List[np.ndarray]], List[np.ndarray], List[Tuple[int, float]]]: + crops: list[list[np.ndarray]], + loc_preds: list[np.ndarray], + ) -> tuple[list[list[np.ndarray]], list[np.ndarray], list[tuple[int, float]]]: # Work at a page level orientations, classes, probs = zip(*[self.crop_orientation_predictor(page_crops) for page_crops in crops]) # type: ignore[misc] rect_crops = [rectify_crops(page_crops, orientation) for page_crops, orientation in zip(crops, orientations)] @@ -173,10 +174,10 @@ def _rectify_crops( @staticmethod def _process_predictions( - loc_preds: List[np.ndarray], - word_preds: List[Tuple[str, float]], - crop_orientations: List[Dict[str, Any]], - ) -> Tuple[List[np.ndarray], List[List[Tuple[str, float]]], List[List[Dict[str, Any]]]]: + loc_preds: list[np.ndarray], + word_preds: list[tuple[str, float]], + crop_orientations: list[dict[str, Any]], + ) -> tuple[list[np.ndarray], list[list[tuple[str, float]]], list[list[dict[str, Any]]]]: text_preds = [] crop_orientation_preds = [] if len(loc_preds) > 0: diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index a0d26957e6..fdf97b6f49 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List, Union +from typing import Any import numpy as np import torch @@ -68,7 +68,7 @@ def __init__( @torch.inference_mode() def forward( self, - pages: List[Union[np.ndarray, torch.Tensor]], + pages: list[np.ndarray | torch.Tensor], **kwargs: Any, ) -> Document: # Dimension check diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 07f12210ce..6afbe9b8df 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List, Union +from typing import Any import numpy as np import tensorflow as tf @@ -68,7 +68,7 @@ def __init__( def __call__( self, - pages: List[Union[np.ndarray, tf.Tensor]], + pages: list[np.ndarray | tf.Tensor], **kwargs: Any, ) -> Document: # Dimension check @@ -105,7 +105,7 @@ def __call__( assert all(len(loc_pred) == 1 for loc_pred in loc_preds_dict), ( "Detection Model in ocr_predictor should output only one class" ) - loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict] + loc_preds: list[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict] # Detach objectness scores from loc_preds loc_preds, objectness_scores = detach_scores(loc_preds) diff --git a/doctr/models/preprocessor/pytorch.py b/doctr/models/preprocessor/pytorch.py index 1015e14bbd..aec6e81f99 100644 --- a/doctr/models/preprocessor/pytorch.py +++ b/doctr/models/preprocessor/pytorch.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import math -from typing import Any, List, Tuple, Union +from typing import Any import numpy as np import torch @@ -31,10 +31,10 @@ class PreProcessor(nn.Module): def __init__( self, - output_size: Tuple[int, int], + output_size: tuple[int, int], batch_size: int, - mean: Tuple[float, float, float] = (0.5, 0.5, 0.5), - std: Tuple[float, float, float] = (1.0, 1.0, 1.0), + mean: tuple[float, float, float] = (0.5, 0.5, 0.5), + std: tuple[float, float, float] = (1.0, 1.0, 1.0), **kwargs: Any, ) -> None: super().__init__() @@ -43,7 +43,7 @@ def __init__( # Perform the division by 255 at the same time self.normalize = T.Normalize(mean, std) - def batch_inputs(self, samples: List[torch.Tensor]) -> List[torch.Tensor]: + def batch_inputs(self, samples: list[torch.Tensor]) -> list[torch.Tensor]: """Gather samples into batches for inference purposes Args: @@ -60,7 +60,7 @@ def batch_inputs(self, samples: List[torch.Tensor]) -> List[torch.Tensor]: return batches - def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: + def sample_transforms(self, x: np.ndarray | torch.Tensor) -> torch.Tensor: if x.ndim != 3: raise AssertionError("expected list of 3D Tensors") if isinstance(x, np.ndarray): @@ -79,7 +79,7 @@ def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: return x # type: ignore[return-value] - def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, np.ndarray]]]) -> List[torch.Tensor]: + def __call__(self, x: torch.Tensor | np.ndarray | list[torch.Tensor | np.ndarray]) -> list[torch.Tensor]: """Prepare document data for model forwarding Args: diff --git a/doctr/models/preprocessor/tensorflow.py b/doctr/models/preprocessor/tensorflow.py index 31ff667286..370c345eb8 100644 --- a/doctr/models/preprocessor/tensorflow.py +++ b/doctr/models/preprocessor/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import math -from typing import Any, List, Tuple, Union +from typing import Any import numpy as np import tensorflow as tf @@ -27,14 +27,14 @@ class PreProcessor(NestedObject): **kwargs: additional arguments for the resizing operation """ - _children_names: List[str] = ["resize", "normalize"] + _children_names: list[str] = ["resize", "normalize"] def __init__( self, - output_size: Tuple[int, int], + output_size: tuple[int, int], batch_size: int, - mean: Tuple[float, float, float] = (0.5, 0.5, 0.5), - std: Tuple[float, float, float] = (1.0, 1.0, 1.0), + mean: tuple[float, float, float] = (0.5, 0.5, 0.5), + std: tuple[float, float, float] = (1.0, 1.0, 1.0), **kwargs: Any, ) -> None: self.batch_size = batch_size @@ -43,7 +43,7 @@ def __init__( self.normalize = Normalize(mean, std) self._runs_on_cuda = tf.config.list_physical_devices("GPU") != [] - def batch_inputs(self, samples: List[tf.Tensor]) -> List[tf.Tensor]: + def batch_inputs(self, samples: list[tf.Tensor]) -> list[tf.Tensor]: """Gather samples into batches for inference purposes Args: @@ -60,7 +60,7 @@ def batch_inputs(self, samples: List[tf.Tensor]) -> List[tf.Tensor]: return batches - def sample_transforms(self, x: Union[np.ndarray, tf.Tensor]) -> tf.Tensor: + def sample_transforms(self, x: np.ndarray | tf.Tensor) -> tf.Tensor: if x.ndim != 3: raise AssertionError("expected list of 3D Tensors") if isinstance(x, np.ndarray): @@ -77,7 +77,7 @@ def sample_transforms(self, x: Union[np.ndarray, tf.Tensor]) -> tf.Tensor: return x - def __call__(self, x: Union[tf.Tensor, np.ndarray, List[Union[tf.Tensor, np.ndarray]]]) -> List[tf.Tensor]: + def __call__(self, x: tf.Tensor | np.ndarray | list[tf.Tensor | np.ndarray]) -> list[tf.Tensor]: """Prepare document data for model forwarding Args: diff --git a/doctr/models/recognition/core.py b/doctr/models/recognition/core.py index 9f46bf3f23..26cef1e736 100644 --- a/doctr/models/recognition/core.py +++ b/doctr/models/recognition/core.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List, Tuple import numpy as np @@ -21,8 +20,8 @@ class RecognitionModel(NestedObject): def build_target( self, - gts: List[str], - ) -> Tuple[np.ndarray, List[int]]: + gts: list[str], + ) -> tuple[np.ndarray, list[int]]: """Encode a list of gts sequences into a np array and gives the corresponding* sequence lengths. diff --git a/doctr/models/recognition/crnn/pytorch.py b/doctr/models/recognition/crnn/pytorch.py index 18617fb36e..1c0641bba2 100644 --- a/doctr/models/recognition/crnn/pytorch.py +++ b/doctr/models/recognition/crnn/pytorch.py @@ -3,9 +3,10 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. +from collections.abc import Callable from copy import deepcopy from itertools import groupby -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any import torch from torch import nn @@ -19,7 +20,7 @@ __all__ = ["CRNN", "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "crnn_vgg16_bn": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -56,7 +57,7 @@ def ctc_best_path( logits: torch.Tensor, vocab: str = VOCABS["french"], blank: int = 0, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: """Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from `_. @@ -79,7 +80,7 @@ def ctc_best_path( return list(zip(words, probs.tolist())) - def __call__(self, logits: torch.Tensor) -> List[Tuple[str, float]]: + def __call__(self, logits: torch.Tensor) -> list[tuple[str, float]]: """Performs decoding of raw output with CTC and decoding of CTC predictions with label_to_idx mapping dictionnary @@ -106,16 +107,16 @@ class CRNN(RecognitionModel, nn.Module): cfg: configuration dictionary """ - _children_names: List[str] = ["feat_extractor", "decoder", "linear", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "decoder", "linear", "postprocessor"] def __init__( self, feature_extractor: nn.Module, vocab: str, rnn_units: int = 128, - input_shape: Tuple[int, int, int] = (3, 32, 128), + input_shape: tuple[int, int, int] = (3, 32, 128), exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() self.vocab = vocab @@ -157,7 +158,7 @@ def __init__( def compute_loss( self, model_output: torch.Tensor, - target: List[str], + target: list[str], ) -> torch.Tensor: """Compute CTC loss for the model. @@ -188,10 +189,10 @@ def compute_loss( def forward( self, x: torch.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: if self.training and target is None: raise ValueError("Need to provide labels during training") @@ -203,7 +204,7 @@ def forward( logits, _ = self.decoder(features_seq) logits = self.linear(logits) - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.exportable: out["logits"] = logits return out @@ -226,7 +227,7 @@ def _crnn( pretrained: bool, backbone_fn: Callable[[Any], nn.Module], pretrained_backbone: bool = True, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> CRNN: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/recognition/crnn/tensorflow.py b/doctr/models/recognition/crnn/tensorflow.py index 4bf6915b69..2da25f5d67 100644 --- a/doctr/models/recognition/crnn/tensorflow.py +++ b/doctr/models/recognition/crnn/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any import tensorflow as tf from tensorflow.keras import layers @@ -18,7 +18,7 @@ __all__ = ["CRNN", "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "crnn_vgg16_bn": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -57,7 +57,7 @@ def __call__( logits: tf.Tensor, beam_width: int = 1, top_paths: int = 1, - ) -> Union[List[Tuple[str, float]], List[Tuple[List[str], List[float]]]]: + ) -> list[tuple[str, float]] | list[tuple[list[str] | list[float]]]: """Performs decoding of raw output with CTC and decoding of CTC predictions with label_to_idx mapping dictionnary @@ -120,7 +120,7 @@ class CRNN(RecognitionModel, Model): cfg: configuration dictionary """ - _children_names: List[str] = ["feat_extractor", "decoder", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "decoder", "postprocessor"] def __init__( self, @@ -130,7 +130,7 @@ def __init__( exportable: bool = False, beam_width: int = 1, top_paths: int = 1, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: # Initialize kernels h, w, c = feature_extractor.output_shape[1:] @@ -157,7 +157,7 @@ def __init__( def compute_loss( self, model_output: tf.Tensor, - target: List[str], + target: list[str], ) -> tf.Tensor: """Compute CTC loss for the model. @@ -179,13 +179,13 @@ def compute_loss( def call( self, x: tf.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, beam_width: int = 1, top_paths: int = 1, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: if kwargs.get("training", False) and target is None: raise ValueError("Need to provide labels during training") @@ -197,7 +197,7 @@ def call( features_seq = tf.reshape(transposed_feat, shape=(-1, w, h * c)) logits = _bf16_to_float32(self.decoder(features_seq, **kwargs)) - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if self.exportable: out["logits"] = logits return out @@ -220,7 +220,7 @@ def _crnn( pretrained: bool, backbone_fn, pretrained_backbone: bool = True, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> CRNN: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/recognition/master/base.py b/doctr/models/recognition/master/base.py index 706d91fbfe..79c8bc5d8c 100644 --- a/doctr/models/recognition/master/base.py +++ b/doctr/models/recognition/master/base.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List, Tuple import numpy as np @@ -17,8 +16,8 @@ class _MASTER: def build_target( self, - gts: List[str], - ) -> Tuple[np.ndarray, List[int]]: + gts: list[str], + ) -> tuple[np.ndarray, list[int]]: """Encode a list of gts sequences into a np array and gives the corresponding* sequence lengths. diff --git a/doctr/models/recognition/master/pytorch.py b/doctr/models/recognition/master/pytorch.py index 7585b1d8a6..3de463d09b 100644 --- a/doctr/models/recognition/master/pytorch.py +++ b/doctr/models/recognition/master/pytorch.py @@ -3,8 +3,9 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any import torch from torch import nn @@ -21,7 +22,7 @@ __all__ = ["MASTER", "master"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "master": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -60,9 +61,9 @@ def __init__( num_layers: int = 3, max_length: int = 50, dropout: float = 0.2, - input_shape: Tuple[int, int, int] = (3, 32, 128), # different from the paper + input_shape: tuple[int, int, int] = (3, 32, 128), # different from the paper exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() @@ -101,7 +102,7 @@ def __init__( def make_source_and_target_mask( self, source: torch.Tensor, target: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor]: + ) -> tuple[torch.Tensor, torch.Tensor]: # borrowed and slightly modified from https://github.com/wenwenyu/MASTER-pytorch # NOTE: nn.TransformerDecoder takes the inverse from this implementation # [True, True, True, ..., False, False, False] -> False is masked @@ -153,10 +154,10 @@ def compute_loss( def forward( self, x: torch.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Call function for training Args: @@ -176,7 +177,7 @@ def forward( # add positional encoding to features encoded = self.positional_encoding(features) - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.training and target is None: raise ValueError("Need to provide labels during training") @@ -219,7 +220,7 @@ def decode(self, encoded: torch.Tensor) -> torch.Tensor: encoded: input tensor Returns: - A Tuple of torch.Tensor: predictions, logits + A tuple of torch.Tensor: predictions, logits """ b = encoded.size(0) @@ -247,7 +248,7 @@ class MASTERPostProcessor(_MASTERPostProcessor): def __call__( self, logits: torch.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = logits.argmax(-1) # N x L @@ -270,7 +271,7 @@ def _master( backbone_fn: Callable[[bool], nn.Module], layer: str, pretrained_backbone: bool = True, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> MASTER: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/recognition/master/tensorflow.py b/doctr/models/recognition/master/tensorflow.py index 62bc3eea98..3eca8d21d8 100644 --- a/doctr/models/recognition/master/tensorflow.py +++ b/doctr/models/recognition/master/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import tensorflow as tf from tensorflow.keras import Model, layers @@ -19,7 +19,7 @@ __all__ = ["MASTER", "master"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "master": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -58,9 +58,9 @@ def __init__( num_layers: int = 3, max_length: int = 50, dropout: float = 0.2, - input_shape: Tuple[int, int, int] = (32, 128, 3), # different from the paper + input_shape: tuple[int, int, int] = (32, 128, 3), # different from the paper exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() @@ -88,7 +88,7 @@ def __init__( self.postprocessor = MASTERPostProcessor(vocab=self.vocab) @tf.function - def make_source_and_target_mask(self, source: tf.Tensor, target: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: + def make_source_and_target_mask(self, source: tf.Tensor, target: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor]: # [1, 1, 1, ..., 0, 0, 0] -> 0 is masked # (N, 1, 1, max_length) target_pad_mask = tf.cast(tf.math.not_equal(target, self.vocab_size + 2), dtype=tf.uint8) @@ -108,7 +108,7 @@ def make_source_and_target_mask(self, source: tf.Tensor, target: tf.Tensor) -> T def compute_loss( model_output: tf.Tensor, gt: tf.Tensor, - seq_len: List[int], + seq_len: list[int], ) -> tf.Tensor: """Compute categorical cross-entropy loss for the model. Sequences are masked after the EOS character. @@ -141,11 +141,11 @@ def compute_loss( def call( self, x: tf.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Call function for training Args: @@ -166,7 +166,7 @@ def call( # add positional encoding to features encoded = self.positional_encoding(feature, **kwargs) - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if kwargs.get("training", False) and target is None: raise ValueError("Need to provide labels during training") @@ -208,7 +208,7 @@ def decode(self, encoded: tf.Tensor, **kwargs: Any) -> tf.Tensor: **kwargs: keyword arguments passed to the decoder Returns: - A Tuple of tf.Tensor: predictions, logits + A tuple of tf.Tensor: predictions, logits """ b = encoded.shape[0] @@ -246,7 +246,7 @@ class MASTERPostProcessor(_MASTERPostProcessor): def __call__( self, logits: tf.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = tf.math.argmax(logits, axis=2) # N x L diff --git a/doctr/models/recognition/parseq/base.py b/doctr/models/recognition/parseq/base.py index 4649bbaf9c..fc60cb4303 100644 --- a/doctr/models/recognition/parseq/base.py +++ b/doctr/models/recognition/parseq/base.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List, Tuple import numpy as np @@ -17,8 +16,8 @@ class _PARSeq: def build_target( self, - gts: List[str], - ) -> Tuple[np.ndarray, List[int]]: + gts: list[str], + ) -> tuple[np.ndarray, list[int]]: """Encode a list of gts sequences into a np array and gives the corresponding* sequence lengths. diff --git a/doctr/models/recognition/parseq/pytorch.py b/doctr/models/recognition/parseq/pytorch.py index a6ecea88e0..414739436a 100644 --- a/doctr/models/recognition/parseq/pytorch.py +++ b/doctr/models/recognition/parseq/pytorch.py @@ -4,9 +4,10 @@ # See LICENSE or go to for full license details. import math +from collections.abc import Callable from copy import deepcopy from itertools import permutations -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any import numpy as np import torch @@ -23,7 +24,7 @@ __all__ = ["PARSeq", "parseq"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "parseq": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -90,7 +91,7 @@ def forward( target, content, memory, - target_mask: Optional[torch.Tensor] = None, + target_mask: torch.Tensor | None = None, ): query_norm = self.query_norm(target) content_norm = self.content_norm(content) @@ -133,9 +134,9 @@ def __init__( dec_num_heads: int = 12, dec_ff_dim: int = 384, # we use it from the original implementation instead of 2048 dec_ffd_ratio: int = 4, - input_shape: Tuple[int, int, int] = (3, 32, 128), + input_shape: tuple[int, int, int] = (3, 32, 128), exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() self.vocab = vocab @@ -214,7 +215,7 @@ def generate_permutations(self, seqlen: torch.Tensor) -> torch.Tensor: combined[1, 1:] = max_num_chars + 1 - torch.arange(max_num_chars + 1, device=seqlen.device) return combined - def generate_permutations_attention_masks(self, permutation: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + def generate_permutations_attention_masks(self, permutation: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: # Generate source and target mask for the decoder attention. sz = permutation.shape[0] mask = torch.ones((sz, sz), device=permutation.device) @@ -233,8 +234,8 @@ def decode( self, target: torch.Tensor, memory: torch.Tensor, - target_mask: Optional[torch.Tensor] = None, - target_query: Optional[torch.Tensor] = None, + target_mask: torch.Tensor | None = None, + target_query: torch.Tensor | None = None, ) -> torch.Tensor: """Add positional information to the target sequence and pass it through the decoder.""" batch_size, sequence_length = target.shape @@ -247,7 +248,7 @@ def decode( target_query = self.dropout(target_query) return self.decoder(target_query, content, memory, target_mask) - def decode_autoregressive(self, features: torch.Tensor, max_len: Optional[int] = None) -> torch.Tensor: + def decode_autoregressive(self, features: torch.Tensor, max_len: int | None = None) -> torch.Tensor: """Generate predictions for the given features.""" max_length = max_len if max_len is not None else self.max_length max_length = min(max_length, self.max_length) + 1 @@ -304,10 +305,10 @@ def decode_autoregressive(self, features: torch.Tensor, max_len: Optional[int] = def forward( self, x: torch.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: features = self.feat_extractor(x)["features"] # (batch_size, patches_seqlen, d_model) # remove cls token features = features[:, 1:, :] @@ -334,7 +335,7 @@ def forward( ).unsqueeze(1).unsqueeze(1) # (N, 1, 1, seq_len) loss = torch.tensor(0.0, device=features.device) - loss_numel: Union[int, float] = 0 + loss_numel: int | float = 0 n = (gt_out != self.vocab_size + 2).sum().item() for i, perm in enumerate(tgt_perms): _, target_mask = self.generate_permutations_attention_masks(perm) # (seq_len, seq_len) @@ -362,7 +363,7 @@ def forward( logits = _bf16_to_float32(logits) - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.exportable: out["logits"] = logits return out @@ -390,7 +391,7 @@ class PARSeqPostProcessor(_PARSeqPostProcessor): def __call__( self, logits: torch.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = logits.argmax(-1) preds_prob = torch.softmax(logits, -1).max(dim=-1)[0] @@ -413,7 +414,7 @@ def _parseq( pretrained: bool, backbone_fn: Callable[[bool], nn.Module], layer: str, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> PARSeq: # Patch the config diff --git a/doctr/models/recognition/parseq/tensorflow.py b/doctr/models/recognition/parseq/tensorflow.py index 2ed07249c9..ded139e4c8 100644 --- a/doctr/models/recognition/parseq/tensorflow.py +++ b/doctr/models/recognition/parseq/tensorflow.py @@ -6,7 +6,7 @@ import math from copy import deepcopy from itertools import permutations -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import numpy as np import tensorflow as tf @@ -21,7 +21,7 @@ __all__ = ["PARSeq", "parseq"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "parseq": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -127,7 +127,7 @@ class PARSeq(_PARSeq, Model): cfg: dictionary containing information about the model """ - _children_names: List[str] = ["feat_extractor", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "postprocessor"] def __init__( self, @@ -139,9 +139,9 @@ def __init__( dec_num_heads: int = 12, dec_ff_dim: int = 384, # we use it from the original implementation instead of 2048 dec_ffd_ratio: int = 4, - input_shape: Tuple[int, int, int] = (32, 128, 3), + input_shape: tuple[int, int, int] = (32, 128, 3), exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() self.vocab = vocab @@ -211,7 +211,7 @@ def generate_permutations(self, seqlen: tf.Tensor) -> tf.Tensor: ) return combined - def generate_permutations_attention_masks(self, permutation: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: + def generate_permutations_attention_masks(self, permutation: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor]: # Generate source and target mask for the decoder attention. sz = permutation.shape[0] mask = tf.ones((sz, sz), dtype=tf.float32) @@ -234,8 +234,8 @@ def decode( self, target: tf.Tensor, memory: tf.Tensor, - target_mask: Optional[tf.Tensor] = None, - target_query: Optional[tf.Tensor] = None, + target_mask: tf.Tensor | None = None, + target_query: tf.Tensor | None = None, **kwargs: Any, ) -> tf.Tensor: batch_size, sequence_length = target.shape @@ -248,7 +248,7 @@ def decode( target_query = self.dropout(target_query, **kwargs) return self.decoder(target_query, content, memory, target_mask, **kwargs) - def decode_autoregressive(self, features: tf.Tensor, max_len: Optional[int] = None, **kwargs) -> tf.Tensor: + def decode_autoregressive(self, features: tf.Tensor, max_len: int | None = None, **kwargs) -> tf.Tensor: """Generate predictions for the given features.""" max_length = max_len if max_len is not None else self.max_length max_length = min(max_length, self.max_length) + 1 @@ -315,11 +315,11 @@ def decode_autoregressive(self, features: tf.Tensor, max_len: Optional[int] = No def call( self, x: tf.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: features = self.feat_extractor(x, **kwargs) # (batch_size, patches_seqlen, d_model) # remove cls token features = features[:, 1:, :] @@ -390,7 +390,7 @@ def call( logits = _bf16_to_float32(logits) - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if self.exportable: out["logits"] = logits return out @@ -418,7 +418,7 @@ class PARSeqPostProcessor(_PARSeqPostProcessor): def __call__( self, logits: tf.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = tf.math.argmax(logits, axis=2) preds_prob = tf.math.reduce_max(tf.nn.softmax(logits, axis=-1), axis=-1) @@ -444,7 +444,7 @@ def _parseq( arch: str, pretrained: bool, backbone_fn, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> PARSeq: # Patch the config diff --git a/doctr/models/recognition/predictor/_utils.py b/doctr/models/recognition/predictor/_utils.py index 6618cec677..582d426909 100644 --- a/doctr/models/recognition/predictor/_utils.py +++ b/doctr/models/recognition/predictor/_utils.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List, Tuple, Union import numpy as np @@ -13,12 +12,12 @@ def split_crops( - crops: List[np.ndarray], + crops: list[np.ndarray], max_ratio: float, target_ratio: int, dilation: float, channels_last: bool = True, -) -> Tuple[List[np.ndarray], List[Union[int, Tuple[int, int]]], bool]: +) -> tuple[list[np.ndarray], list[int | tuple[int, int]], bool]: """Chunk crops horizontally to match a given aspect ratio Args: @@ -32,8 +31,8 @@ def split_crops( a tuple with the new crops, their mapping, and a boolean specifying whether any remap is required """ _remap_required = False - crop_map: List[Union[int, Tuple[int, int]]] = [] - new_crops: List[np.ndarray] = [] + crop_map: list[int | tuple[int, int]] = [] + new_crops: list[np.ndarray] = [] for crop in crops: h, w = crop.shape[:2] if channels_last else crop.shape[-2:] aspect_ratio = w / h @@ -69,8 +68,8 @@ def split_crops( def remap_preds( - preds: List[Tuple[str, float]], crop_map: List[Union[int, Tuple[int, int]]], dilation: float -) -> List[Tuple[str, float]]: + preds: list[tuple[str, float]], crop_map: list[int | tuple[int, int]], dilation: float +) -> list[tuple[str, float]]: remapped_out = [] for _idx in crop_map: # Crop hasn't been split diff --git a/doctr/models/recognition/predictor/pytorch.py b/doctr/models/recognition/predictor/pytorch.py index dc1f644750..d3c3edf50c 100644 --- a/doctr/models/recognition/predictor/pytorch.py +++ b/doctr/models/recognition/predictor/pytorch.py @@ -3,7 +3,8 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List, Sequence, Tuple, Union +from collections.abc import Sequence +from typing import Any import numpy as np import torch @@ -43,9 +44,9 @@ def __init__( @torch.inference_mode() def forward( self, - crops: Sequence[Union[np.ndarray, torch.Tensor]], + crops: Sequence[np.ndarray | torch.Tensor], **kwargs: Any, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: if len(crops) == 0: return [] # Dimension check diff --git a/doctr/models/recognition/predictor/tensorflow.py b/doctr/models/recognition/predictor/tensorflow.py index 84772b7a23..45794f632a 100644 --- a/doctr/models/recognition/predictor/tensorflow.py +++ b/doctr/models/recognition/predictor/tensorflow.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List, Tuple, Union +from typing import Any import numpy as np import tensorflow as tf @@ -26,7 +26,7 @@ class RecognitionPredictor(NestedObject): split_wide_crops: wether to use crop splitting for high aspect ratio crops """ - _children_names: List[str] = ["pre_processor", "model"] + _children_names: list[str] = ["pre_processor", "model"] def __init__( self, @@ -44,9 +44,9 @@ def __init__( def __call__( self, - crops: List[Union[np.ndarray, tf.Tensor]], + crops: list[np.ndarray | tf.Tensor], **kwargs: Any, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: if len(crops) == 0: return [] # Dimension check diff --git a/doctr/models/recognition/sar/pytorch.py b/doctr/models/recognition/sar/pytorch.py index c6a6f31501..caf1900575 100644 --- a/doctr/models/recognition/sar/pytorch.py +++ b/doctr/models/recognition/sar/pytorch.py @@ -3,8 +3,9 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any import torch from torch import nn @@ -19,7 +20,7 @@ __all__ = ["SAR", "sar_resnet31"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "sar_resnet31": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -113,12 +114,12 @@ def forward( self, features: torch.Tensor, # (N, C, H, W) holistic: torch.Tensor, # (N, C) - gt: Optional[torch.Tensor] = None, # (N, L) + gt: torch.Tensor | None = None, # (N, L) ) -> torch.Tensor: if gt is not None: gt_embedding = self.embed_tgt(gt) - logits_list: List[torch.Tensor] = [] + logits_list: list[torch.Tensor] = [] for t in range(self.max_length + 1): # 32 if t == 0: @@ -185,9 +186,9 @@ def __init__( attention_units: int = 512, max_length: int = 30, dropout_prob: float = 0.0, - input_shape: Tuple[int, int, int] = (3, 32, 128), + input_shape: tuple[int, int, int] = (3, 32, 128), exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() self.vocab = vocab @@ -230,10 +231,10 @@ def __init__( def forward( self, x: torch.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: features = self.feat_extractor(x)["features"] # NOTE: use max instead of functional max_pool2d which leads to ONNX incompatibility (kernel_size) # Vertical max pooling (N, C, H, W) --> (N, C, W) @@ -252,7 +253,7 @@ def forward( decoded_features = _bf16_to_float32(self.decoder(features, encoded, gt=None if target is None else gt)) - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.exportable: out["logits"] = decoded_features return out @@ -310,7 +311,7 @@ class SARPostProcessor(RecognitionPostProcessor): def __call__( self, logits: torch.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = logits.argmax(-1) # N x L @@ -333,7 +334,7 @@ def _sar( backbone_fn: Callable[[bool], nn.Module], layer: str, pretrained_backbone: bool = True, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> SAR: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/recognition/sar/tensorflow.py b/doctr/models/recognition/sar/tensorflow.py index 3ae1b9fadf..ac88362378 100644 --- a/doctr/models/recognition/sar/tensorflow.py +++ b/doctr/models/recognition/sar/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import tensorflow as tf from tensorflow.keras import Model, Sequential, layers @@ -18,7 +18,7 @@ __all__ = ["SAR", "sar_resnet31"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "sar_resnet31": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -156,13 +156,13 @@ def call( self, features: tf.Tensor, holistic: tf.Tensor, - gt: Optional[tf.Tensor] = None, + gt: tf.Tensor | None = None, **kwargs: Any, ) -> tf.Tensor: if gt is not None: gt_embedding = self.embed_tgt(gt, **kwargs) - logits_list: List[tf.Tensor] = [] + logits_list: list[tf.Tensor] = [] for t in range(self.max_length + 1): # 32 if t == 0: @@ -219,7 +219,7 @@ class SAR(Model, RecognitionModel): cfg: dictionary containing information about the model """ - _children_names: List[str] = ["feat_extractor", "encoder", "decoder", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "encoder", "decoder", "postprocessor"] def __init__( self, @@ -232,7 +232,7 @@ def __init__( num_decoder_cells: int = 2, dropout_prob: float = 0.0, exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() self.vocab = vocab @@ -290,11 +290,11 @@ def compute_loss( def call( self, x: tf.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: features = self.feat_extractor(x, **kwargs) # vertical max pooling --> (N, C, W) pooled_features = tf.reduce_max(features, axis=1) @@ -312,7 +312,7 @@ def call( self.decoder(features, encoded, gt=None if target is None else gt, **kwargs) ) - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if self.exportable: out["logits"] = decoded_features return out @@ -340,7 +340,7 @@ class SARPostProcessor(RecognitionPostProcessor): def __call__( self, logits: tf.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = tf.math.argmax(logits, axis=2) # N x L @@ -364,7 +364,7 @@ def _sar( pretrained: bool, backbone_fn, pretrained_backbone: bool = True, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> SAR: pretrained_backbone = pretrained_backbone and not pretrained diff --git a/doctr/models/recognition/utils.py b/doctr/models/recognition/utils.py index b0d22c8dbf..f6b299ade8 100644 --- a/doctr/models/recognition/utils.py +++ b/doctr/models/recognition/utils.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List from rapidfuzz.distance import Levenshtein @@ -59,7 +58,7 @@ def merge_strings(a: str, b: str, dil_factor: float) -> str: return a[:-1] + b[index - 1 :] -def merge_multi_strings(seq_list: List[str], dil_factor: float) -> str: +def merge_multi_strings(seq_list: list[str], dil_factor: float) -> str: """Recursively merges consecutive string sequences with overlapping characters. Args: @@ -76,7 +75,7 @@ def merge_multi_strings(seq_list: List[str], dil_factor: float) -> str: 'abcdefghijkl' """ - def _recursive_merge(a: str, seq_list: List[str], dil_factor: float) -> str: + def _recursive_merge(a: str, seq_list: list[str], dil_factor: float) -> str: # Recursive version of compute_overlap if len(seq_list) == 1: return merge_strings(a, seq_list[0], dil_factor) diff --git a/doctr/models/recognition/vitstr/base.py b/doctr/models/recognition/vitstr/base.py index 3fc9a9832e..59f21badb2 100644 --- a/doctr/models/recognition/vitstr/base.py +++ b/doctr/models/recognition/vitstr/base.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import List, Tuple import numpy as np @@ -17,8 +16,8 @@ class _ViTSTR: def build_target( self, - gts: List[str], - ) -> Tuple[np.ndarray, List[int]]: + gts: list[str], + ) -> tuple[np.ndarray, list[int]]: """Encode a list of gts sequences into a np array and gives the corresponding* sequence lengths. diff --git a/doctr/models/recognition/vitstr/pytorch.py b/doctr/models/recognition/vitstr/pytorch.py index fe47461d73..db763d53c7 100644 --- a/doctr/models/recognition/vitstr/pytorch.py +++ b/doctr/models/recognition/vitstr/pytorch.py @@ -3,8 +3,9 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any import torch from torch import nn @@ -19,7 +20,7 @@ __all__ = ["ViTSTR", "vitstr_small", "vitstr_base"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "vitstr_small": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -58,9 +59,9 @@ def __init__( vocab: str, embedding_units: int, max_length: int = 32, # different from paper - input_shape: Tuple[int, int, int] = (3, 32, 128), # different from paper + input_shape: tuple[int, int, int] = (3, 32, 128), # different from paper exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() self.vocab = vocab @@ -76,10 +77,10 @@ def __init__( def forward( self, x: torch.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: features = self.feat_extractor(x)["features"] # (batch_size, patches_seqlen, d_model) if target is not None: @@ -97,7 +98,7 @@ def forward( logits = self.head(features).view(B, N, len(self.vocab) + 1) # (batch_size, max_length, vocab + 1) decoded_features = _bf16_to_float32(logits[:, 1:]) # remove cls_token - out: Dict[str, Any] = {} + out: dict[str, Any] = {} if self.exportable: out["logits"] = decoded_features return out @@ -156,7 +157,7 @@ class ViTSTRPostProcessor(_ViTSTRPostProcessor): def __call__( self, logits: torch.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = logits.argmax(-1) preds_prob = torch.softmax(logits, -1).max(dim=-1)[0] @@ -179,7 +180,7 @@ def _vitstr( pretrained: bool, backbone_fn: Callable[[bool], nn.Module], layer: str, - ignore_keys: Optional[List[str]] = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> ViTSTR: # Patch the config diff --git a/doctr/models/recognition/vitstr/tensorflow.py b/doctr/models/recognition/vitstr/tensorflow.py index b5e23880dd..3865348b1f 100644 --- a/doctr/models/recognition/vitstr/tensorflow.py +++ b/doctr/models/recognition/vitstr/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import tensorflow as tf from tensorflow.keras import Model, layers @@ -17,7 +17,7 @@ __all__ = ["ViTSTR", "vitstr_small", "vitstr_base"] -default_cfgs: Dict[str, Dict[str, Any]] = { +default_cfgs: dict[str, dict[str, Any]] = { "vitstr_small": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), @@ -50,7 +50,7 @@ class ViTSTR(_ViTSTR, Model): cfg: dictionary containing information about the model """ - _children_names: List[str] = ["feat_extractor", "postprocessor"] + _children_names: list[str] = ["feat_extractor", "postprocessor"] def __init__( self, @@ -59,9 +59,9 @@ def __init__( embedding_units: int, max_length: int = 32, dropout_prob: float = 0.0, - input_shape: Tuple[int, int, int] = (32, 128, 3), # different from paper + input_shape: tuple[int, int, int] = (32, 128, 3), # different from paper exportable: bool = False, - cfg: Optional[Dict[str, Any]] = None, + cfg: dict[str, Any] | None = None, ) -> None: super().__init__() self.vocab = vocab @@ -78,7 +78,7 @@ def __init__( def compute_loss( model_output: tf.Tensor, gt: tf.Tensor, - seq_len: List[int], + seq_len: list[int], ) -> tf.Tensor: """Compute categorical cross-entropy loss for the model. Sequences are masked after the EOS character. @@ -111,11 +111,11 @@ def compute_loss( def call( self, x: tf.Tensor, - target: Optional[List[str]] = None, + target: list[str] | None = None, return_model_output: bool = False, return_preds: bool = False, **kwargs: Any, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: features = self.feat_extractor(x, **kwargs) # (batch_size, patches_seqlen, d_model) if target is not None: @@ -133,7 +133,7 @@ def call( ) # (batch_size, max_length, vocab + 1) decoded_features = _bf16_to_float32(logits[:, 1:]) # remove cls_token - out: Dict[str, tf.Tensor] = {} + out: dict[str, tf.Tensor] = {} if self.exportable: out["logits"] = decoded_features return out @@ -161,7 +161,7 @@ class ViTSTRPostProcessor(_ViTSTRPostProcessor): def __call__( self, logits: tf.Tensor, - ) -> List[Tuple[str, float]]: + ) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = tf.math.argmax(logits, axis=2) preds_prob = tf.math.reduce_max(tf.nn.softmax(logits, axis=-1), axis=-1) @@ -187,7 +187,7 @@ def _vitstr( arch: str, pretrained: bool, backbone_fn, - input_shape: Optional[Tuple[int, int, int]] = None, + input_shape: tuple[int, int, int] | None = None, **kwargs: Any, ) -> ViTSTR: # Patch the config diff --git a/doctr/models/recognition/zoo.py b/doctr/models/recognition/zoo.py index 3108e147a5..f60431441c 100644 --- a/doctr/models/recognition/zoo.py +++ b/doctr/models/recognition/zoo.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List +from typing import Any from doctr.file_utils import is_tf_available from doctr.models.preprocessor import PreProcessor @@ -14,7 +14,7 @@ __all__ = ["recognition_predictor"] -ARCHS: List[str] = [ +ARCHS: list[str] = [ "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large", diff --git a/doctr/models/utils/pytorch.py b/doctr/models/utils/pytorch.py index 8c975b94d4..c9ea3a43ca 100644 --- a/doctr/models/utils/pytorch.py +++ b/doctr/models/utils/pytorch.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import logging -from typing import Any, List, Optional, Tuple, Union +from typing import Any import torch from torch import nn @@ -32,9 +32,9 @@ def _bf16_to_float32(x: torch.Tensor) -> torch.Tensor: def load_pretrained_params( model: nn.Module, - url: Optional[str] = None, - hash_prefix: Optional[str] = None, - ignore_keys: Optional[List[str]] = None, + url: str | None = None, + hash_prefix: str | None = None, + ignore_keys: list[str] | None = None, **kwargs: Any, ) -> None: """Load a set of parameters onto a model @@ -75,7 +75,7 @@ def conv_sequence_pt( relu: bool = False, bn: bool = False, **kwargs: Any, -) -> List[nn.Module]: +) -> list[nn.Module]: """Builds a convolutional-based layer sequence >>> from torch.nn import Sequential @@ -95,7 +95,7 @@ def conv_sequence_pt( # No bias before Batch norm kwargs["bias"] = kwargs.get("bias", not bn) # Add activation directly to the conv if there is no BN - conv_seq: List[nn.Module] = [nn.Conv2d(in_channels, out_channels, **kwargs)] + conv_seq: list[nn.Module] = [nn.Conv2d(in_channels, out_channels, **kwargs)] if bn: conv_seq.append(nn.BatchNorm2d(out_channels)) @@ -107,8 +107,8 @@ def conv_sequence_pt( def set_device_and_dtype( - model: Any, batches: List[torch.Tensor], device: Union[str, torch.device], dtype: torch.dtype -) -> Tuple[Any, List[torch.Tensor]]: + model: Any, batches: list[torch.Tensor], device: str | torch.device, dtype: torch.dtype +) -> tuple[Any, list[torch.Tensor]]: """Set the device and dtype of a model and its batches >>> import torch diff --git a/doctr/models/utils/tensorflow.py b/doctr/models/utils/tensorflow.py index 490e9add9f..dcb70bdad7 100644 --- a/doctr/models/utils/tensorflow.py +++ b/doctr/models/utils/tensorflow.py @@ -4,7 +4,8 @@ # See LICENSE or go to for full license details. import logging -from typing import Any, Callable, List, Optional, Tuple, Union +from collections.abc import Callable +from typing import Any import tensorflow as tf import tf2onnx @@ -46,8 +47,8 @@ def _build_model(model: Model): def load_pretrained_params( model: Model, - url: Optional[str] = None, - hash_prefix: Optional[str] = None, + url: str | None = None, + hash_prefix: str | None = None, skip_mismatch: bool = False, **kwargs: Any, ) -> None: @@ -73,12 +74,12 @@ def load_pretrained_params( def conv_sequence( out_channels: int, - activation: Optional[Union[str, Callable]] = None, + activation: str | Callable | None = None, bn: bool = False, padding: str = "same", kernel_initializer: str = "he_normal", **kwargs: Any, -) -> List[layers.Layer]: +) -> list[layers.Layer]: """Builds a convolutional-based layer sequence >>> from tensorflow.keras import Sequential @@ -125,7 +126,7 @@ class IntermediateLayerGetter(Model): layer_names: the list of layers to retrieve the feature map from """ - def __init__(self, model: Model, layer_names: List[str]) -> None: + def __init__(self, model: Model, layer_names: list[str]) -> None: intermediate_fmaps = [model.get_layer(layer_name).get_output_at(0) for layer_name in layer_names] super().__init__(model.input, outputs=intermediate_fmaps) @@ -134,8 +135,8 @@ def __repr__(self) -> str: def export_model_to_onnx( - model: Model, model_name: str, dummy_input: List[tf.TensorSpec], **kwargs: Any -) -> Tuple[str, List[str]]: + model: Model, model_name: str, dummy_input: list[tf.TensorSpec], **kwargs: Any +) -> tuple[str, list[str]]: """Export model to ONNX format. >>> import tensorflow as tf diff --git a/doctr/transforms/functional/base.py b/doctr/transforms/functional/base.py index b769ac9992..70166ae3e0 100644 --- a/doctr/transforms/functional/base.py +++ b/doctr/transforms/functional/base.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Tuple, Union import cv2 import numpy as np @@ -15,7 +14,7 @@ def crop_boxes( boxes: np.ndarray, - crop_box: Union[Tuple[int, int, int, int], Tuple[float, float, float, float]], + crop_box: tuple[int, int, int, int] | tuple[float, float, float, float], ) -> np.ndarray: """Crop localization boxes @@ -47,7 +46,7 @@ def crop_boxes( return boxes[is_valid] -def expand_line(line: np.ndarray, target_shape: Tuple[int, int]) -> Tuple[float, float]: +def expand_line(line: np.ndarray, target_shape: tuple[int, int]) -> tuple[float, float]: """Expands a 2-point line, so that the first is on the edge. In other terms, we extend the line in the same direction until we meet one of the edges. @@ -108,7 +107,7 @@ def expand_line(line: np.ndarray, target_shape: Tuple[int, int]) -> Tuple[float, def create_shadow_mask( - target_shape: Tuple[int, int], + target_shape: tuple[int, int], min_base_width=0.3, max_tip_width=0.5, max_tip_height=0.3, diff --git a/doctr/transforms/functional/pytorch.py b/doctr/transforms/functional/pytorch.py index 3c65d76b7d..a91ba7754b 100644 --- a/doctr/transforms/functional/pytorch.py +++ b/doctr/transforms/functional/pytorch.py @@ -4,7 +4,6 @@ # See LICENSE or go to for full license details. from copy import deepcopy -from typing import Tuple import numpy as np import torch @@ -46,7 +45,7 @@ def rotate_sample( geoms: np.ndarray, angle: float, expand: bool = False, -) -> Tuple[torch.Tensor, np.ndarray]: +) -> tuple[torch.Tensor, np.ndarray]: """Rotate image around the center, interpolation=NEAREST, pad with 0 (black) Args: @@ -89,8 +88,8 @@ def rotate_sample( def crop_detection( - img: torch.Tensor, boxes: np.ndarray, crop_box: Tuple[float, float, float, float] -) -> Tuple[torch.Tensor, np.ndarray]: + img: torch.Tensor, boxes: np.ndarray, crop_box: tuple[float, float, float, float] +) -> tuple[torch.Tensor, np.ndarray]: """Crop and image and associated bboxes Args: @@ -113,7 +112,7 @@ def crop_detection( return cropped_img, boxes -def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwargs) -> torch.Tensor: +def random_shadow(img: torch.Tensor, opacity_range: tuple[float, float], **kwargs) -> torch.Tensor: """Crop and image and associated bboxes Args: diff --git a/doctr/transforms/functional/tensorflow.py b/doctr/transforms/functional/tensorflow.py index 4cee02e150..782e0f4fa4 100644 --- a/doctr/transforms/functional/tensorflow.py +++ b/doctr/transforms/functional/tensorflow.py @@ -5,8 +5,8 @@ import math import random +from collections.abc import Iterable from copy import deepcopy -from typing import Iterable, Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -99,7 +99,7 @@ def rotate_sample( geoms: np.ndarray, angle: float, expand: bool = False, -) -> Tuple[tf.Tensor, np.ndarray]: +) -> tuple[tf.Tensor, np.ndarray]: """Rotate image around the center, interpolation=NEAREST, pad with 0 (black) Args: @@ -138,8 +138,8 @@ def rotate_sample( def crop_detection( - img: tf.Tensor, boxes: np.ndarray, crop_box: Tuple[float, float, float, float] -) -> Tuple[tf.Tensor, np.ndarray]: + img: tf.Tensor, boxes: np.ndarray, crop_box: tuple[float, float, float, float] +) -> tuple[tf.Tensor, np.ndarray]: """Crop and image and associated bboxes Args: @@ -164,10 +164,10 @@ def crop_detection( def _gaussian_filter( img: tf.Tensor, - kernel_size: Union[int, Iterable[int]], + kernel_size: int | Iterable[int], sigma: float, - mode: Optional[str] = None, - pad_value: Optional[int] = 0, + mode: str | None = None, + pad_value: int = 0, ): """Apply Gaussian filter to image. Adapted from: https://github.com/tensorflow/addons/blob/master/tensorflow_addons/image/filters.py @@ -225,7 +225,7 @@ def kernel2d(ksize: tf.Tensor, sigma: tf.Tensor, dtype: tf.DType): return tf.nn.depthwise_conv2d(img, g, [1, 1, 1, 1], padding="VALID", data_format="NHWC") -def random_shadow(img: tf.Tensor, opacity_range: Tuple[float, float], **kwargs) -> tf.Tensor: +def random_shadow(img: tf.Tensor, opacity_range: tuple[float, float], **kwargs) -> tf.Tensor: """Apply a random shadow to a given image Args: diff --git a/doctr/transforms/modules/base.py b/doctr/transforms/modules/base.py index b631b31c24..5fd0bd37a1 100644 --- a/doctr/transforms/modules/base.py +++ b/doctr/transforms/modules/base.py @@ -5,7 +5,8 @@ import math import random -from typing import Any, Callable, List, Optional, Tuple, Union +from collections.abc import Callable +from typing import Any import numpy as np @@ -45,12 +46,12 @@ class SampleCompose(NestedObject): transforms: list of transformation modules """ - _children_names: List[str] = ["sample_transforms"] + _children_names: list[str] = ["sample_transforms"] - def __init__(self, transforms: List[Callable[[Any, Any], Tuple[Any, Any]]]) -> None: + def __init__(self, transforms: list[Callable[[Any, Any], tuple[Any, Any]]]) -> None: self.sample_transforms = transforms - def __call__(self, x: Any, target: Any) -> Tuple[Any, Any]: + def __call__(self, x: Any, target: Any) -> tuple[Any, Any]: for t in self.sample_transforms: x, target = t(x, target) @@ -84,12 +85,12 @@ class ImageTransform(NestedObject): transform: the image transformation module to wrap """ - _children_names: List[str] = ["img_transform"] + _children_names: list[str] = ["img_transform"] def __init__(self, transform: Callable[[Any], Any]) -> None: self.img_transform = transform - def __call__(self, img: Any, target: Any) -> Tuple[Any, Any]: + def __call__(self, img: Any, target: Any) -> tuple[Any, Any]: img = self.img_transform(img) return img, target @@ -159,12 +160,12 @@ class OneOf(NestedObject): transforms: list of transformations, one only will be picked """ - _children_names: List[str] = ["transforms"] + _children_names: list[str] = ["transforms"] - def __init__(self, transforms: List[Callable[[Any], Any]]) -> None: + def __init__(self, transforms: list[Callable[[Any], Any]]) -> None: self.transforms = transforms - def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]: + def __call__(self, img: Any, target: np.ndarray | None = None) -> Any | tuple[Any, np.ndarray]: # Pick transformation transfo = self.transforms[int(random.random() * len(self.transforms))] # Apply @@ -206,7 +207,7 @@ def __init__(self, transform: Callable[[Any], Any], p: float = 0.5) -> None: def extra_repr(self) -> str: return f"transform={self.transform}, p={self.p}" - def __call__(self, img: Any, target: Optional[np.ndarray] = None) -> Union[Any, Tuple[Any, np.ndarray]]: + def __call__(self, img: Any, target: np.ndarray | None = None) -> Any | tuple[Any, np.ndarray]: if random.random() < self.p: return self.transform(img) if target is None else self.transform(img, target) # type: ignore[call-arg] return img if target is None else (img, target) @@ -230,7 +231,7 @@ def __init__(self, max_angle: float = 5.0, expand: bool = False) -> None: def extra_repr(self) -> str: return f"max_angle={self.max_angle}, expand={self.expand}" - def __call__(self, img: Any, target: np.ndarray) -> Tuple[Any, np.ndarray]: + def __call__(self, img: Any, target: np.ndarray) -> tuple[Any, np.ndarray]: angle = random.uniform(-self.max_angle, self.max_angle) r_img, r_polys = F.rotate_sample(img, target, angle, self.expand) # Removes deleted boxes @@ -246,14 +247,14 @@ class RandomCrop(NestedObject): ratio: tuple of float, relative (min_ratio, max_ratio) where ratio = h/w """ - def __init__(self, scale: Tuple[float, float] = (0.08, 1.0), ratio: Tuple[float, float] = (0.75, 1.33)) -> None: + def __init__(self, scale: tuple[float, float] = (0.08, 1.0), ratio: tuple[float, float] = (0.75, 1.33)) -> None: self.scale = scale self.ratio = ratio def extra_repr(self) -> str: return f"scale={self.scale}, ratio={self.ratio}" - def __call__(self, img: Any, target: np.ndarray) -> Tuple[Any, np.ndarray]: + def __call__(self, img: Any, target: np.ndarray) -> tuple[Any, np.ndarray]: scale = random.uniform(self.scale[0], self.scale[1]) ratio = random.uniform(self.ratio[0], self.ratio[1]) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 66d99c8ec5..027998412d 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -4,7 +4,6 @@ # See LICENSE or go to for full license details. import math -from typing import Optional, Tuple, Union import numpy as np import torch @@ -23,7 +22,7 @@ class Resize(T.Resize): def __init__( self, - size: Union[int, Tuple[int, int]], + size: int | tuple[int, int], interpolation=F.InterpolationMode.BILINEAR, preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, @@ -38,8 +37,8 @@ def __init__( def forward( self, img: torch.Tensor, - target: Optional[np.ndarray] = None, - ) -> Union[torch.Tensor, Tuple[torch.Tensor, np.ndarray]]: + target: np.ndarray | None = None, + ) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]: if isinstance(self.size, int): target_ratio = img.shape[-2] / img.shape[-1] else: @@ -158,9 +157,7 @@ def forward(self, img: torch.Tensor) -> torch.Tensor: class RandomHorizontalFlip(T.RandomHorizontalFlip): """Randomly flip the input image horizontally""" - def forward( - self, img: Union[torch.Tensor, Image], target: np.ndarray - ) -> Tuple[Union[torch.Tensor, Image], np.ndarray]: + def forward(self, img: torch.Tensor | Image, target: np.ndarray) -> tuple[torch.Tensor | Image, np.ndarray]: if torch.rand(1) < self.p: _img = F.hflip(img) _target = target.copy() @@ -185,7 +182,7 @@ class RandomShadow(torch.nn.Module): opacity_range : minimum and maximum opacity of the shade """ - def __init__(self, opacity_range: Optional[Tuple[float, float]] = None) -> None: + def __init__(self, opacity_range: tuple[float, float] | None = None) -> None: super().__init__() self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8) @@ -233,9 +230,9 @@ class RandomResize(torch.nn.Module): def __init__( self, - scale_range: Tuple[float, float] = (0.3, 0.9), - preserve_aspect_ratio: Union[bool, float] = False, - symmetric_pad: Union[bool, float] = False, + scale_range: tuple[float, float] = (0.3, 0.9), + preserve_aspect_ratio: bool | float = False, + symmetric_pad: bool | float = False, p: float = 0.5, ) -> None: super().__init__() @@ -245,7 +242,7 @@ def __init__( self.p = p self._resize = Resize - def forward(self, img: torch.Tensor, target: np.ndarray) -> Tuple[torch.Tensor, np.ndarray]: + def forward(self, img: torch.Tensor, target: np.ndarray) -> tuple[torch.Tensor, np.ndarray]: if torch.rand(1) < self.p: scale_h = np.random.uniform(*self.scale_range) scale_w = np.random.uniform(*self.scale_range) diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index b2c6c532c7..0d1f589173 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -4,7 +4,8 @@ # See LICENSE or go to for full license details. import random -from typing import Any, Callable, Iterable, List, Optional, Tuple, Union +from collections.abc import Callable, Iterable +from typing import Any import numpy as np import tensorflow as tf @@ -46,9 +47,9 @@ class Compose(NestedObject): transforms: list of transformation modules """ - _children_names: List[str] = ["transforms"] + _children_names: list[str] = ["transforms"] - def __init__(self, transforms: List[Callable[[Any], Any]]) -> None: + def __init__(self, transforms: list[Callable[[Any], Any]]) -> None: self.transforms = transforms def __call__(self, x: Any) -> Any: @@ -75,7 +76,7 @@ class Resize(NestedObject): def __init__( self, - output_size: Union[int, Tuple[int, int]], + output_size: int | tuple[int, int], method: str = "bilinear", preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, @@ -102,8 +103,8 @@ def extra_repr(self) -> str: def __call__( self, img: tf.Tensor, - target: Optional[np.ndarray] = None, - ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]: + target: np.ndarray | None = None, + ) -> tf.Tensor | tuple[tf.Tensor, np.ndarray]: input_dtype = img.dtype self.output_size = ( (self.output_size, self.output_size) if isinstance(self.output_size, int) else self.output_size @@ -166,7 +167,7 @@ class Normalize(NestedObject): std: standard deviation per channel """ - def __init__(self, mean: Tuple[float, float, float], std: Tuple[float, float, float]) -> None: + def __init__(self, mean: tuple[float, float, float], std: tuple[float, float, float]) -> None: self.mean = tf.constant(mean) self.std = tf.constant(std) @@ -381,7 +382,7 @@ class GaussianBlur(NestedObject): std: min and max value of the standard deviation """ - def __init__(self, kernel_shape: Union[int, Iterable[int]], std: Tuple[float, float]) -> None: + def __init__(self, kernel_shape: int | Iterable[int], std: tuple[float, float]) -> None: self.kernel_shape = kernel_shape self.std = std @@ -460,7 +461,7 @@ def __init__(self, p: float) -> None: super().__init__() self.p = p - def __call__(self, img: Union[tf.Tensor, np.ndarray], target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]: + def __call__(self, img: tf.Tensor | np.ndarray, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]: if np.random.rand(1) <= self.p: _img = tf.image.flip_left_right(img) _target = target.copy() @@ -485,7 +486,7 @@ class RandomShadow(NestedObject): opacity_range : minimum and maximum opacity of the shade """ - def __init__(self, opacity_range: Optional[Tuple[float, float]] = None) -> None: + def __init__(self, opacity_range: tuple[float, float] | None = None) -> None: super().__init__() self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8) @@ -526,9 +527,9 @@ class RandomResize(NestedObject): def __init__( self, - scale_range: Tuple[float, float] = (0.3, 0.9), - preserve_aspect_ratio: Union[bool, float] = False, - symmetric_pad: Union[bool, float] = False, + scale_range: tuple[float, float] = (0.3, 0.9), + preserve_aspect_ratio: bool | float = False, + symmetric_pad: bool | float = False, p: float = 0.5, ): super().__init__() @@ -538,7 +539,7 @@ def __init__( self.p = p self._resize = Resize - def __call__(self, img: tf.Tensor, target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]: + def __call__(self, img: tf.Tensor, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]: if np.random.rand(1) <= self.p: scale_h = random.uniform(*self.scale_range) scale_w = random.uniform(*self.scale_range) diff --git a/doctr/utils/common_types.py b/doctr/utils/common_types.py index a82e8db5d3..d2a0ce9b1d 100644 --- a/doctr/utils/common_types.py +++ b/doctr/utils/common_types.py @@ -4,15 +4,14 @@ # See LICENSE or go to for full license details. from pathlib import Path -from typing import List, Tuple, Union __all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"] -Point2D = Tuple[float, float] -BoundingBox = Tuple[Point2D, Point2D] -Polygon4P = Tuple[Point2D, Point2D, Point2D, Point2D] -Polygon = List[Point2D] -AbstractPath = Union[str, Path] -AbstractFile = Union[AbstractPath, bytes] -Bbox = Tuple[float, float, float, float] +Point2D = tuple[float, float] +BoundingBox = tuple[Point2D, Point2D] +Polygon4P = tuple[Point2D, Point2D, Point2D, Point2D] +Polygon = list[Point2D] +AbstractPath = str | Path +AbstractFile = AbstractPath | bytes +Bbox = tuple[float, float, float, float] diff --git a/doctr/utils/data.py b/doctr/utils/data.py index 9702969267..fb613b1d6d 100644 --- a/doctr/utils/data.py +++ b/doctr/utils/data.py @@ -13,7 +13,6 @@ import urllib.error import urllib.request from pathlib import Path -from typing import Optional, Union from tqdm.auto import tqdm @@ -25,7 +24,7 @@ USER_AGENT = "mindee/doctr" -def _urlretrieve(url: str, filename: Union[Path, str], chunk_size: int = 1024) -> None: +def _urlretrieve(url: str, filename: Path | str, chunk_size: int = 1024) -> None: with open(filename, "wb") as fh: with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response: with tqdm(total=response.length) as pbar: @@ -36,7 +35,7 @@ def _urlretrieve(url: str, filename: Union[Path, str], chunk_size: int = 1024) - fh.write(chunk) -def _check_integrity(file_path: Union[str, Path], hash_prefix: str) -> bool: +def _check_integrity(file_path: str | Path, hash_prefix: str) -> bool: with open(file_path, "rb") as f: sha_hash = hashlib.sha256(f.read()).hexdigest() @@ -45,10 +44,10 @@ def _check_integrity(file_path: Union[str, Path], hash_prefix: str) -> bool: def download_from_url( url: str, - file_name: Optional[str] = None, - hash_prefix: Optional[str] = None, - cache_dir: Optional[str] = None, - cache_subdir: Optional[str] = None, + file_name: str | None = None, + hash_prefix: str | None = None, + cache_dir: str | None = None, + cache_subdir: str | None = None, ) -> Path: """Download a file using its URL diff --git a/doctr/utils/fonts.py b/doctr/utils/fonts.py index 618ed0b713..75279445dd 100644 --- a/doctr/utils/fonts.py +++ b/doctr/utils/fonts.py @@ -5,16 +5,13 @@ import logging import platform -from typing import Optional, Union from PIL import ImageFont __all__ = ["get_font"] -def get_font( - font_family: Optional[str] = None, font_size: int = 13 -) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]: +def get_font(font_family: str | None = None, font_size: int = 13) -> ImageFont.FreeTypeFont | ImageFont.ImageFont: """Resolves a compatible ImageFont for the system Args: diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index b513a6308e..b4660cd74b 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -5,7 +5,6 @@ from copy import deepcopy from math import ceil -from typing import List, Optional, Tuple, Union import cv2 import numpy as np @@ -55,7 +54,7 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox: return (min(x), min(y)), (max(x), max(y)) -def detach_scores(boxes: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]: +def detach_scores(boxes: list[np.ndarray]) -> tuple[list[np.ndarray], list[np.ndarray]]: """Detach the objectness scores from box predictions Args: @@ -66,7 +65,7 @@ def detach_scores(boxes: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.nd the second one contains the objectness scores """ - def _detach(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + def _detach(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]: if boxes.ndim == 2: return boxes[:, :-1], boxes[:, -1] return boxes[:, :-1], boxes[:, -1, -1] @@ -75,7 +74,7 @@ def _detach(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: return list(loc_preds), list(obj_scores) -def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]: +def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBox | np.ndarray: """Compute enclosing bbox either from: Args: @@ -97,7 +96,7 @@ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Unio return (min(x), min(y)), (max(x), max(y)) -def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024) -> np.ndarray: +def resolve_enclosing_rbbox(rbboxes: list[np.ndarray], intermed_size: int = 1024) -> np.ndarray: """Compute enclosing rotated bbox either from: Args: @@ -136,7 +135,7 @@ def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray: return np.matmul(points, rotation_mat.T) -def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[int, int]: +def compute_expanded_shape(img_shape: tuple[int, int], angle: float) -> tuple[int, int]: """Compute the shape of an expanded rotated image Args: @@ -160,7 +159,7 @@ def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[in def rotate_abs_geoms( geoms: np.ndarray, angle: float, - img_shape: Tuple[int, int], + img_shape: tuple[int, int], expand: bool = True, ) -> np.ndarray: """Rotate a batch of bounding boxes or polygons by an angle around the @@ -198,7 +197,7 @@ def rotate_abs_geoms( return rotated_polys -def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray: +def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]) -> np.ndarray: """Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape. This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox coordinates after a resizing of the image. @@ -227,9 +226,9 @@ def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: def rotate_boxes( loc_preds: np.ndarray, angle: float, - orig_shape: Tuple[int, int], + orig_shape: tuple[int, int], min_angle: float = 1.0, - target_shape: Optional[Tuple[int, int]] = None, + target_shape: tuple[int, int] | None = None, ) -> np.ndarray: """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes (4, 2) of an angle, if angle > min_angle, around the center of the page. @@ -366,7 +365,7 @@ def estimate_page_angle(polys: np.ndarray) -> float: return 0.0 -def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray: +def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray: """Convert a geometry to relative coordinates Args: @@ -391,7 +390,7 @@ def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}") -def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]: +def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> list[np.ndarray]: """Created cropped images from list of bounding boxes Args: @@ -425,7 +424,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True def extract_rcrops( img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False -) -> List[np.ndarray]: +) -> list[np.ndarray]: """Created cropped images from list of rotated bounding boxes Args: diff --git a/doctr/utils/metrics.py b/doctr/utils/metrics.py index ddd6f9774e..6cff2be8d0 100644 --- a/doctr/utils/metrics.py +++ b/doctr/utils/metrics.py @@ -3,7 +3,6 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Dict, List, Optional, Tuple import numpy as np from anyascii import anyascii @@ -21,7 +20,7 @@ ] -def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]: +def string_match(word1: str, word2: str) -> tuple[bool, bool, bool, bool]: """Performs string comparison with multiple levels of tolerance Args: @@ -76,8 +75,8 @@ def __init__(self) -> None: def update( self, - gt: List[str], - pred: List[str], + gt: list[str], + pred: list[str], ) -> None: """Update the state of the metric with new predictions @@ -97,7 +96,7 @@ def update( self.total += len(gt) - def summary(self) -> Dict[str, float]: + def summary(self) -> dict[str, float]: """Computes the aggregated metrics Returns: @@ -179,7 +178,7 @@ def polygon_iou(polys_1: np.ndarray, polys_2: np.ndarray) -> np.ndarray: return iou_mat -def nms(boxes: np.ndarray, thresh: float = 0.5) -> List[int]: +def nms(boxes: np.ndarray, thresh: float = 0.5) -> list[int]: """Perform non-max suppression, borrowed from `_. Args: @@ -286,7 +285,7 @@ def update(self, gts: np.ndarray, preds: np.ndarray) -> None: self.num_gts += gts.shape[0] self.num_preds += preds.shape[0] - def summary(self) -> Tuple[Optional[float], Optional[float], Optional[float]]: + def summary(self) -> tuple[float | None, float | None, float | None]: """Computes the aggregated metrics Returns: @@ -364,8 +363,8 @@ def update( self, gt_boxes: np.ndarray, pred_boxes: np.ndarray, - gt_labels: List[str], - pred_labels: List[str], + gt_labels: list[str], + pred_labels: list[str], ) -> None: """Updates the metric @@ -403,7 +402,7 @@ def update( self.num_gts += gt_boxes.shape[0] self.num_preds += pred_boxes.shape[0] - def summary(self) -> Tuple[Dict[str, Optional[float]], Dict[str, Optional[float]], Optional[float]]: + def summary(self) -> tuple[dict[str, float | None], dict[str, float | None], float | None]: """Computes the aggregated metrics Returns: @@ -528,7 +527,7 @@ def update( self.num_gts += gt_boxes.shape[0] self.num_preds += pred_boxes.shape[0] - def summary(self) -> Tuple[Optional[float], Optional[float], Optional[float]]: + def summary(self) -> tuple[float | None, float | None, float | None]: """Computes the aggregated metrics Returns: diff --git a/doctr/utils/multithreading.py b/doctr/utils/multithreading.py index f64e1aacc8..5ec710ad81 100644 --- a/doctr/utils/multithreading.py +++ b/doctr/utils/multithreading.py @@ -6,15 +6,16 @@ import multiprocessing as mp import os +from collections.abc import Callable, Iterable, Iterator from multiprocessing.pool import ThreadPool -from typing import Any, Callable, Iterable, Iterator, Optional +from typing import Any from doctr.file_utils import ENV_VARS_TRUE_VALUES __all__ = ["multithread_exec"] -def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Optional[int] = None) -> Iterator[Any]: +def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: int | None = None) -> Iterator[Any]: """Execute a given function in parallel for each element of a given sequence >>> from doctr.utils.multithreading import multithread_exec diff --git a/doctr/utils/reconstitution.py b/doctr/utils/reconstitution.py index 23541f059c..6b12b6cab2 100644 --- a/doctr/utils/reconstitution.py +++ b/doctr/utils/reconstitution.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import logging -from typing import Any, Dict, Optional +from typing import Any import numpy as np from anyascii import anyascii @@ -18,7 +18,7 @@ ROTATION_WARNING = False -def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover +def _warn_rotation(entry: dict[str, Any]) -> None: # pragma: no cover global ROTATION_WARNING if not ROTATION_WARNING and len(entry["geometry"]) == 4: logging.warning("Polygons with larger rotations will lead to inaccurate rendering") @@ -27,11 +27,11 @@ def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover def _synthesize( response: Image.Image, - entry: Dict[str, Any], + entry: dict[str, Any], w: int, h: int, draw_proba: bool = False, - font_family: Optional[str] = None, + font_family: str | None = None, smoothing_factor: float = 0.75, min_font_size: int = 6, max_font_size: int = 50, @@ -111,9 +111,9 @@ def _synthesize( def synthesize_page( - page: Dict[str, Any], + page: dict[str, Any], draw_proba: bool = False, - font_family: Optional[str] = None, + font_family: str | None = None, smoothing_factor: float = 0.95, min_font_size: int = 8, max_font_size: int = 50, @@ -172,9 +172,9 @@ def synthesize_page( def synthesize_kie_page( - page: Dict[str, Any], + page: dict[str, Any], draw_proba: bool = False, - font_family: Optional[str] = None, + font_family: str | None = None, ) -> np.ndarray: """Draw a the content of the element page (OCR response) on a blank page. diff --git a/doctr/utils/repr.py b/doctr/utils/repr.py index ccae2d6afc..13fb7c7333 100644 --- a/doctr/utils/repr.py +++ b/doctr/utils/repr.py @@ -5,7 +5,6 @@ # Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py -from typing import List __all__ = ["NestedObject"] @@ -25,7 +24,7 @@ def _addindent(s_, num_spaces): class NestedObject: """Base class for all nested objects in doctr""" - _children_names: List[str] + _children_names: list[str] def extra_repr(self) -> str: return "" diff --git a/doctr/utils/visualization.py b/doctr/utils/visualization.py index c0e7b75d04..bea248ee84 100644 --- a/doctr/utils/visualization.py +++ b/doctr/utils/visualization.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import colorsys from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any import cv2 import matplotlib.patches as patches @@ -19,9 +19,9 @@ def rect_patch( geometry: BoundingBox, - page_dimensions: Tuple[int, int], - label: Optional[str] = None, - color: Tuple[float, float, float] = (0, 0, 0), + page_dimensions: tuple[int, int], + label: str | None = None, + color: tuple[float, float, float] = (0, 0, 0), alpha: float = 0.3, linewidth: int = 2, fill: bool = True, @@ -68,9 +68,9 @@ def rect_patch( def polygon_patch( geometry: np.ndarray, - page_dimensions: Tuple[int, int], - label: Optional[str] = None, - color: Tuple[float, float, float] = (0, 0, 0), + page_dimensions: tuple[int, int], + label: str | None = None, + color: tuple[float, float, float] = (0, 0, 0), alpha: float = 0.3, linewidth: int = 2, fill: bool = True, @@ -110,8 +110,8 @@ def polygon_patch( def create_obj_patch( - geometry: Union[BoundingBox, Polygon4P, np.ndarray], - page_dimensions: Tuple[int, int], + geometry: BoundingBox | Polygon4P | np.ndarray, + page_dimensions: tuple[int, int], **kwargs: Any, ) -> patches.Patch: """Create a matplotlib patch for the element @@ -134,7 +134,7 @@ def create_obj_patch( raise ValueError("invalid geometry format") -def get_colors(num_colors: int) -> List[Tuple[float, float, float]]: +def get_colors(num_colors: int) -> list[tuple[float, float, float]]: """Generate num_colors color for matplotlib Args: @@ -153,7 +153,7 @@ def get_colors(num_colors: int) -> List[Tuple[float, float, float]]: def visualize_page( - page: Dict[str, Any], + page: dict[str, Any], image: np.ndarray, words_only: bool = True, display_artefacts: bool = True, @@ -197,7 +197,7 @@ def visualize_page( ax.axis("off") if interactive: - artists: List[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page) + artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page) for block in page["blocks"]: if not words_only: @@ -277,7 +277,7 @@ def visualize_page( def visualize_kie_page( - page: Dict[str, Any], + page: dict[str, Any], image: np.ndarray, words_only: bool = False, display_artefacts: bool = True, @@ -321,7 +321,7 @@ def visualize_kie_page( ax.axis("off") if interactive: - artists: List[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page) + artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page) colors = {k: color for color, k in zip(get_colors(len(page["predictions"])), page["predictions"])} for key, value in page["predictions"].items(): @@ -351,7 +351,7 @@ def visualize_kie_page( return fig -def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: Optional[Tuple[int, int, int]] = None, **kwargs) -> None: +def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None: """Draw an array of relative straight boxes on an image Args: diff --git a/references/detection/utils.py b/references/detection/utils.py index fe1b326f74..ea8e2c269d 100644 --- a/references/detection/utils.py +++ b/references/detection/utils.py @@ -3,14 +3,13 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Dict, List import cv2 import matplotlib.pyplot as plt import numpy as np -def plot_samples(images, targets: List[Dict[str, np.ndarray]]) -> None: +def plot_samples(images, targets: list[dict[str, np.ndarray]]) -> None: # Unnormalize image nb_samples = min(len(images), 4) _, axes = plt.subplots(2, nb_samples, figsize=(20, 5)) diff --git a/tests/tensorflow/test_datasets_loader_tf.py b/tests/tensorflow/test_datasets_loader_tf.py index c360f52684..3abf2bf64b 100644 --- a/tests/tensorflow/test_datasets_loader_tf.py +++ b/tests/tensorflow/test_datasets_loader_tf.py @@ -1,5 +1,3 @@ -from typing import List, Tuple - import tensorflow as tf from doctr.datasets import DataLoader @@ -7,7 +5,7 @@ class MockDataset: def __init__(self, input_size): - self.data: List[Tuple[float, bool]] = [ + self.data: list[tuple[float, bool]] = [ (1, True), (0, False), (0.5, True),