Skip to content

Commit

Permalink
Linting
Browse files Browse the repository at this point in the history
  • Loading branch information
evekhm committed Jul 18, 2024
1 parent 1634311 commit 053f52f
Showing 1 changed file with 23 additions and 21 deletions.
44 changes: 23 additions & 21 deletions classify-split-extract-workflow/classify-job/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

import json
import os
from typing import Optional, Dict, Any, Union
from google.cloud import run_v2, storage
from typing import Optional, Dict, Any, cast, Tuple

import google.auth
from google.cloud import run_v2, storage

from logging_handler import Logger

logger = Logger.get_logger(__file__)
Expand Down Expand Up @@ -70,9 +72,9 @@
FULL_JOB_NAME = run_v2.ExecutionsClient.job_path(PROJECT_ID, REGION, "classify-job")

# Global variables
BUCKET = None
LAST_MODIFIED_TIME_OF_CONFIG = None
CONFIG_DATA = None
BUCKET: Optional[storage.Bucket] = None
LAST_MODIFIED_TIME_OF_CONFIG: Optional[Any] = None
CONFIG_DATA: Optional[Dict[Any, Any]] = None

logger.info(
f"Settings used: CLASSIFY_INPUT_BUCKET=gs://{CLASSIFY_INPUT_BUCKET}, INPUT_FILE={INPUT_FILE}, "
Expand Down Expand Up @@ -110,7 +112,7 @@ def get_config(config_name: Optional[str] = None,
element_path (Optional[str]): The element path.
Returns:
Dict: The configuration data.
Optional[Dict[Any, Any]]: The configuration data.
"""
global CONFIG_DATA, LAST_MODIFIED_TIME_OF_CONFIG
if not CONFIG_DATA:
Expand Down Expand Up @@ -146,15 +148,15 @@ def get_parser_name_by_doc_type(doc_type: str) -> Optional[str]:
Returns:
Optional[str]: The parser name, or None if not found.
"""
return get_config(CONFIG_JSON_DOCUMENT_TYPES_CONFIG, f"{doc_type}.parser")
return cast(Optional[str], get_config(CONFIG_JSON_DOCUMENT_TYPES_CONFIG, f"{doc_type}.parser"))


def get_document_types_config() -> Dict[Any, Any]:
def get_document_types_config() -> Optional[Dict[Any, Any]]:
"""
Retrieves the document types configuration.
Returns:
Dict: The document types configuration.
Optional[Dict[Any, Any]]: The document types configuration.
"""
return get_config(CONFIG_JSON_DOCUMENT_TYPES_CONFIG)

Expand All @@ -167,7 +169,7 @@ def get_parser_by_doc_type(doc_type: str) -> Optional[Dict[Any, Any]]:
doc_type (str): The document type.
Returns:
Optional[Dict]: The parser configuration.
Optional[Dict[Any, Any]]: The parser configuration.
"""
parser_name = get_parser_name_by_doc_type(doc_type)
if parser_name:
Expand All @@ -184,7 +186,7 @@ def load_config(bucket_name: str, filename: str) -> Optional[Dict[Any, Any]]:
filename (str): The configuration file name.
Returns:
Optional[Dict]: The configuration data.
Optional[Dict[Any, Any]]: The configuration data.
"""
global BUCKET, LAST_MODIFIED_TIME_OF_CONFIG, CONFIG_DATA

Expand Down Expand Up @@ -221,12 +223,12 @@ def load_config(bucket_name: str, filename: str) -> Optional[Dict[Any, Any]]:
return CONFIG_DATA


def get_docai_settings() -> Dict[Any, Any]:
def get_docai_settings() -> Optional[Dict[Any, Any]]:
"""
Retrieves the Document AI settings configuration.
Returns:
Dict: The Document AI settings configuration.
Optional[Dict[Any, Any]]: The Document AI settings configuration.
"""
return get_config("settings_config")

Expand Down Expand Up @@ -276,9 +278,11 @@ def get_document_class_by_classifier_label(label_name: str) -> Optional[str]:
Returns:
Optional[str]: The document class.
"""
for k, v in get_document_types_config().items():
if v.get("classifier_label") == label_name:
return k
doc_types_config = get_document_types_config()
if doc_types_config:
for k, v in doc_types_config.items():
if v.get("classifier_label") == label_name:
return k
logger.error(f"classifier_label={label_name} is not assigned to any document in the config")
return None

Expand All @@ -291,22 +295,20 @@ def get_parser_by_name(parser_name: str) -> Optional[Dict[Any, Any]]:
parser_name (str): The parser name.
Returns:
Optional[Dict]: The parser configuration.
Optional[Dict[Any, Any]]: The parser configuration.
"""
return get_config("parser_config", parser_name)


def get_model_name_table_name(document_type: str) -> \
Union[tuple[Optional[str], Optional[str]], tuple[None, None]]:
def get_model_name_table_name(document_type: str) -> Tuple[Optional[str], Optional[str]]:
"""
Retrieves the output table name and model name by document type.
Args:
document_type (str): The document type.
Returns:
Union[tuple[Optional[str], Optional[str]], tuple[None, None]]: The output table name and
model name.
Tuple[Optional[str], Optional[str]]: The output table name and model name.
"""
parser_name = get_parser_name_by_doc_type(document_type)
if parser_name:
Expand Down

0 comments on commit 053f52f

Please sign in to comment.