From ee68c9a6f7483eabf0a427337a7f9664bd768cfc Mon Sep 17 00:00:00 2001 From: David Papp Date: Tue, 10 Dec 2024 13:25:53 +0100 Subject: [PATCH] Feat: Logging improvements and add healthz endpoint --- services/cache/Dockerfile | 4 +-- services/rule/Dockerfile | 6 +++-- services/rule/src/main.py | 27 +++++-------------- ...zure_ai_content_safety_prompt_injection.py | 4 ++- services/rule/src/plugins/content_safety.py | 3 ++- services/rule/src/plugins/detect_english.py | 4 ++- services/rule/src/plugins/invisible_chars.py | 3 ++- services/rule/src/plugins/llama_guard.py | 10 +++---- .../rule/src/plugins/openai_moderation.py | 2 ++ services/rule/src/plugins/pii.py | 3 ++- services/rule/src/plugins/prompt_guard.py | 9 +++---- .../rule/src/plugins/prompt_injection_llm.py | 6 +++++ services/rule/src/utils/__init__.py | 1 + services/rule/src/utils/logger_config.py | 26 ++++++++++++++++++ 14 files changed, 65 insertions(+), 43 deletions(-) create mode 100644 services/rule/src/utils/__init__.py create mode 100644 services/rule/src/utils/logger_config.py diff --git a/services/cache/Dockerfile b/services/cache/Dockerfile index 2be5f69..1a28647 100644 --- a/services/cache/Dockerfile +++ b/services/cache/Dockerfile @@ -75,8 +75,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ apt-get clean && \ rm -rf /app && \ usermod -d /app -m nobody && \ - mkdir -p /app && \ - chown nobody:nogroup /app + mkdir -p /app/cache && \ + chown -R nobody:nogroup /app WORKDIR /app USER nobody:nogroup diff --git a/services/rule/Dockerfile b/services/rule/Dockerfile index c01eb12..8de0013 100644 --- a/services/rule/Dockerfile +++ b/services/rule/Dockerfile @@ -75,8 +75,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ apt-get clean && \ rm -rf /app && \ usermod -d /app -m nobody && \ - mkdir -p /app && \ - chown nobody:nogroup /app + mkdir -p /app/cache && \ + chown -R nobody:nogroup /app WORKDIR /app USER nobody:nogroup @@ -87,5 +87,7 @@ COPY --from=builder-base --chmod=755 --chown=nobody:nogroup $VIRTUAL_ENV $VIRTUA COPY --chmod=755 --chown=nobody:nogroup poetry.lock pyproject.toml ./ COPY --chmod=755 --chown=nobody:nogroup src/main.py ./src/main.py COPY --chmod=755 --chown=nobody:nogroup src/plugins ./src/plugins +COPY --chmod=755 --chown=nobody:nogroup src/utils ./src/utils + EXPOSE 8080 CMD ["python","src/main.py"] diff --git a/services/rule/src/main.py b/services/rule/src/main.py index 6bffc9e..a6623a3 100644 --- a/services/rule/src/main.py +++ b/services/rule/src/main.py @@ -7,6 +7,9 @@ import logging import os import json +from utils.logger_config import setup_logger + +logger = setup_logger(__name__) class JSONFormatter(logging.Formatter): def format(self, record): @@ -20,25 +23,6 @@ def format(self, record): } return json.dumps(log_record) -def setup_logging(): - # Get the log level from the environment variable, default to 'INFO' - log_level = os.getenv('LOG_LEVEL', 'INFO').upper() - - # Validate and set the log level - numeric_level = getattr(logging, log_level, None) - if not isinstance(numeric_level, int): - raise ValueError(f'Invalid log level: {log_level}') - - # Configure the logging - json_formatter = JSONFormatter() - handler = logging.StreamHandler() - handler.setFormatter(json_formatter) - logger = logging.getLogger(__name__) - logger.setLevel(numeric_level) - logger.addHandler(handler) - -# Configure logging -setup_logging() # Example usage of logging logger = logging.getLogger(__name__) @@ -90,6 +74,7 @@ async def health_check(): @app.post("/rule/execute") async def execute_plugin(rule: Rule): + logger.info(f"Received rule: {rule.model_dump_json()}") global plugin_name try: logger.debug(f"Received rule: {rule}") @@ -161,12 +146,12 @@ async def execute_plugin(rule: Rule): if not relation or not relation.strip(): logger.warning("No relation specified, defaulting to '>'") relation = '>' # Default to greater than if no relation is specified - + # Ensure there's exactly one space between components rule_expression = f"score {relation.strip()} {threshold}".strip() logger.debug(f"Rule expression: {rule_expression}") logger.debug(f"Data for rule engine: {data}") - + try: rule_obj = rule_engine.Rule(rule_expression, context=context) match = rule_obj.matches(data) diff --git a/services/rule/src/plugins/azure_ai_content_safety_prompt_injection.py b/services/rule/src/plugins/azure_ai_content_safety_prompt_injection.py index b34f2a1..cc4cdb9 100644 --- a/services/rule/src/plugins/azure_ai_content_safety_prompt_injection.py +++ b/services/rule/src/plugins/azure_ai_content_safety_prompt_injection.py @@ -19,6 +19,8 @@ import requests from srsly import json_dumps +from utils.logger_config import setup_logger +logger = setup_logger(__name__) key = os.environ["CONTENT_SAFETY_KEY"] if not key: @@ -48,4 +50,4 @@ def handler(text: str, _threshold: float, _config: Dict[str, Any]) -> dict: print("Azure Content Safety result: %s", content_safety_result["userPromptAnalysis"]["attackDetected"]) if content_safety_result["userPromptAnalysis"]["attackDetected"]: return dict(check_result=True, score=1) - return dict(check_result=False, score=0) \ No newline at end of file + return dict(check_result=False, score=0) diff --git a/services/rule/src/plugins/content_safety.py b/services/rule/src/plugins/content_safety.py index 8971334..9d52eaf 100644 --- a/services/rule/src/plugins/content_safety.py +++ b/services/rule/src/plugins/content_safety.py @@ -41,7 +41,8 @@ import re import logging -logging.basicConfig(level=logging.DEBUG) +from utils.logger_config import setup_logger +logger = setup_logger(__name__) class ContentCategories: PROFANITY_VULGAR = { diff --git a/services/rule/src/plugins/detect_english.py b/services/rule/src/plugins/detect_english.py index 3137568..f6fbf12 100644 --- a/services/rule/src/plugins/detect_english.py +++ b/services/rule/src/plugins/detect_english.py @@ -20,6 +20,8 @@ import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification from typing import Dict, Any +from utils.logger_config import setup_logger +logger = setup_logger(__name__) class LanguageDetector: @@ -50,4 +52,4 @@ def handler(text: str, threshold: float, config: Dict[str, Any]) -> Dict[str, An return { "check_result": english_score > threshold, "score": english_score, - } \ No newline at end of file + } diff --git a/services/rule/src/plugins/invisible_chars.py b/services/rule/src/plugins/invisible_chars.py index 68b40df..1d69ba0 100644 --- a/services/rule/src/plugins/invisible_chars.py +++ b/services/rule/src/plugins/invisible_chars.py @@ -20,7 +20,8 @@ import unicodedata import logging -logging.basicConfig(level=logging.DEBUG) +from utils.logger_config import setup_logger +logger = setup_logger(__name__) def contains_unicode(text: str) -> bool: diff --git a/services/rule/src/plugins/llama_guard.py b/services/rule/src/plugins/llama_guard.py index 91c5fc3..7fb379a 100644 --- a/services/rule/src/plugins/llama_guard.py +++ b/services/rule/src/plugins/llama_guard.py @@ -9,12 +9,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import login, HfApi -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s [%(levelname)s] %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' -) -logger = logging.getLogger(__name__) +from utils.logger_config import setup_logger +logger = setup_logger(__name__) DEFAULT_CATEGORIES = ["S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10", "S11", "S12", "S13"] @@ -189,4 +185,4 @@ def handler(text: str, threshold: float, config: Dict[str, Any]) -> Dict[str, An "check_result": False, "score": 0.0, "details": {"error": str(e)} - } \ No newline at end of file + } diff --git a/services/rule/src/plugins/openai_moderation.py b/services/rule/src/plugins/openai_moderation.py index 68b0660..a038005 100644 --- a/services/rule/src/plugins/openai_moderation.py +++ b/services/rule/src/plugins/openai_moderation.py @@ -17,6 +17,8 @@ import os from openai import OpenAI +from utils.logger_config import setup_logger +logger = setup_logger(__name__) client = OpenAI( api_key=os.environ.get("OPENAI_API_KEY"), diff --git a/services/rule/src/plugins/pii.py b/services/rule/src/plugins/pii.py index e31a0bb..18d58be 100644 --- a/services/rule/src/plugins/pii.py +++ b/services/rule/src/plugins/pii.py @@ -26,7 +26,8 @@ from presidio_anonymizer import AnonymizerEngine from presidio_analyzer.nlp_engine import NlpEngineProvider -logging.basicConfig(level=logging.DEBUG) +from utils.logger_config import setup_logger +logger = setup_logger(__name__) def initialize_engines(config): diff --git a/services/rule/src/plugins/prompt_guard.py b/services/rule/src/plugins/prompt_guard.py index c36a70d..359d659 100644 --- a/services/rule/src/plugins/prompt_guard.py +++ b/services/rule/src/plugins/prompt_guard.py @@ -9,11 +9,8 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification from huggingface_hub import login, HfApi -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) +from utils.logger_config import setup_logger +logger = setup_logger(__name__) def get_huggingface_token(): """Get token from environment with proper error handling.""" @@ -154,4 +151,4 @@ def handler(text: str, threshold: float, config: Dict[str, Any]) -> Dict[str, An "check_result": False, "score": 0.0, "details": {"error": str(e)} - } \ No newline at end of file + } diff --git a/services/rule/src/plugins/prompt_injection_llm.py b/services/rule/src/plugins/prompt_injection_llm.py index e0c0200..bcc3c05 100644 --- a/services/rule/src/plugins/prompt_injection_llm.py +++ b/services/rule/src/plugins/prompt_injection_llm.py @@ -18,12 +18,18 @@ import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline +from utils.logger_config import setup_logger +logger = setup_logger(__name__) + # Initialize the tokenizer and model once when the module is imported tokenizer = AutoTokenizer.from_pretrained("protectai/deberta-v3-base-prompt-injection-v2") model = AutoModelForSequenceClassification.from_pretrained("protectai/deberta-v3-base-prompt-injection-v2") def handler(text: str, threshold: float, config: dict) -> dict: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + logger.info(f"Using device: {device}") + classifier = pipeline( "text-classification", model=model, diff --git a/services/rule/src/utils/__init__.py b/services/rule/src/utils/__init__.py new file mode 100644 index 0000000..ff565d1 --- /dev/null +++ b/services/rule/src/utils/__init__.py @@ -0,0 +1 @@ +# Empty file to make utils a Python package diff --git a/services/rule/src/utils/logger_config.py b/services/rule/src/utils/logger_config.py new file mode 100644 index 0000000..3eaa783 --- /dev/null +++ b/services/rule/src/utils/logger_config.py @@ -0,0 +1,26 @@ +import logging +import os +import json + +def setup_logger(name): + logger = logging.getLogger(name) + + # Only configure if handlers haven't been set up + if not logger.handlers: + log_level = os.getenv('LOG_LEVEL', 'INFO').upper() + numeric_level = getattr(logging, log_level, None) + + if not isinstance(numeric_level, int): + raise ValueError(f'Invalid log level: {log_level}') + + json_formatter = logging.Formatter( + '{"timestamp":"%(asctime)s", "level":"%(levelname)s", "message":"%(message)s", ' + '"name":"%(name)s", "filename":"%(filename)s", "lineno":%(lineno)d}' + ) + + handler = logging.StreamHandler() + handler.setFormatter(json_formatter) + logger.setLevel(numeric_level) + logger.addHandler(handler) + + return logger