diff --git a/api/.env.example b/api/.env.example index 071a200e680278..1f25a671841b5f 100644 --- a/api/.env.example +++ b/api/.env.example @@ -427,4 +427,6 @@ CREATE_TIDB_SERVICE_JOB_ENABLED=false # Maximum number of submitted thread count in a ThreadPool for parallel node execution MAX_SUBMIT_COUNT=100 # Lockout duration in seconds -LOGIN_LOCKOUT_DURATION=86400 \ No newline at end of file +LOGIN_LOCKOUT_DURATION=86400 + +PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus_multiproc_dir diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index 73f8a95989baaf..9cdd3664bee614 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -23,7 +23,8 @@ class SecurityConfig(BaseSettings): SECRET_KEY: str = Field( description="Secret key for secure session cookie signing." "Make sure you are changing this key for your deployment with a strong key." - "Generate a strong key using `openssl rand -base64 42` or set via the `SECRET_KEY` environment variable.", + "Generate a strong key using `openssl rand -base64 42` " + "or set via the `SECRET_KEY` environment variable.", default="", ) @@ -767,6 +768,18 @@ class LoginConfig(BaseSettings): ) +class PrometheusConfig(BaseSettings): + HISTOGRAM_BUCKETS_1MIN: list[float] = Field( + description="The buckets of Prometheus histogram under 1 minute", + default=[0.1, 0.2, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 40, 50, 60], + ) + + HISTOGRAM_BUCKETS_5MIN: list[float] = Field( + description="The buckets of Prometheus histogram under 5 minute", + default=[0.1, 0.2, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 40, 50, 60, 120, 180, 300], + ) + + class FeatureConfig( # place the configs in alphabet order AppExecutionConfig, @@ -794,6 +807,7 @@ class FeatureConfig( WorkflowNodeExecutionConfig, WorkspaceConfig, LoginConfig, + PrometheusConfig, # hosted services config HostedServiceConfig, CeleryBeatConfig, diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index ce0e95962772ad..62efac54192499 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -398,7 +398,9 @@ def _process_stream_response( conversation_id=self._conversation.id, trace_manager=trace_manager, ) - + self._workflow_time_it( + is_success=True, graph_runtime_state=graph_runtime_state, workflow_run=workflow_run + ) yield self._workflow_finish_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_run=workflow_run ) @@ -421,6 +423,9 @@ def _process_stream_response( conversation_id=None, trace_manager=trace_manager, ) + self._workflow_time_it( + is_success=False, graph_runtime_state=graph_runtime_state, workflow_run=workflow_run + ) yield self._workflow_finish_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_run=workflow_run @@ -445,7 +450,9 @@ def _process_stream_response( trace_manager=trace_manager, exceptions_count=event.exceptions_count, ) - + self._workflow_time_it( + is_success=False, graph_runtime_state=graph_runtime_state, workflow_run=workflow_run + ) yield self._workflow_finish_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_run=workflow_run ) diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index 79e5e2bcb96d60..97cdf77c38f189 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -357,6 +357,9 @@ def _process_stream_response( conversation_id=None, trace_manager=trace_manager, ) + self._workflow_time_it( + is_success=True, graph_runtime_state=graph_runtime_state, workflow_run=workflow_run + ) # save workflow app log self._save_workflow_app_log(workflow_run) @@ -381,6 +384,9 @@ def _process_stream_response( conversation_id=None, trace_manager=trace_manager, ) + self._workflow_time_it( + is_success=False, graph_runtime_state=graph_runtime_state, workflow_run=workflow_run + ) # save workflow app log self._save_workflow_app_log(workflow_run) diff --git a/api/core/app/task_pipeline/__init__.py b/api/core/app/task_pipeline/__init__.py index e69de29bb2d1d6..e4307ec31133d1 100644 --- a/api/core/app/task_pipeline/__init__.py +++ b/api/core/app/task_pipeline/__init__.py @@ -0,0 +1,36 @@ +from prometheus_client import Counter, Histogram + +from configs import dify_config + +app_request = Counter( + name="app_request", + documentation="The total count of APP requests", + labelnames=["app_id", "tenant_id", "username"], +) +app_request_failed = Counter( + name="app_request_failed", + documentation="The failed count of APP requests", + labelnames=["app_id", "tenant_id", "username"], +) +app_request_latency = Histogram( + name="app_request_latency", + documentation="The latency of APP requests", + unit="seconds", + labelnames=["app_id", "tenant_id", "username"], + buckets=dify_config.HISTOGRAM_BUCKETS_5MIN, +) +app_input_tokens = Counter( + name="app_input_tokens", + documentation="The input tokens cost by APP requests", + labelnames=["app_id", "tenant_id", "username"], +) +app_output_tokens = Counter( + name="app_output_tokens", + documentation="The output tokens cost by APP requests", + labelnames=["app_id", "tenant_id", "username"], +) +app_total_tokens = Counter( + name="app_total_tokens", + documentation="The total tokens cost by APP requests", + labelnames=["app_id", "tenant_id", "username"], +) diff --git a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py index 917649f34e769c..6aef5cea482059 100644 --- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py @@ -39,6 +39,14 @@ MessageEndStreamResponse, StreamResponse, ) +from core.app.task_pipeline import ( + app_input_tokens, + app_output_tokens, + app_request, + app_request_failed, + app_request_latency, + app_total_tokens, +) from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline from core.app.task_pipeline.message_cycle_manage import MessageCycleManage from core.model_manager import ModelInstance @@ -251,6 +259,47 @@ def _wrapper_process_stream_response( if publisher: yield MessageAudioEndStreamResponse(audio="", task_id=task_id) + def _chat_time_it(self, is_success: bool) -> None: + """ + Record chat / completion / agent run metrics. + """ + app_id = self._app_config.app_id + tenant_id = self._app_config.tenant_id + username = self._conversation.from_account_name + app_request.labels( + app_id=app_id, + tenant_id=tenant_id, + username=username, + ).inc() + + if not is_success: + app_request_failed.labels( + app_id=app_id, + tenant_id=tenant_id, + username=username, + ).inc() + return + app_request_latency.labels( + app_id=app_id, + tenant_id=tenant_id, + username=username, + ).observe(self._message.provider_response_latency) + app_input_tokens.labels( + app_id=app_id, + tenant_id=tenant_id, + username=username, + ).inc(self._message.message_tokens) + app_output_tokens.labels( + app_id=app_id, + tenant_id=tenant_id, + username=username, + ).inc(self._message.answer_tokens) + app_total_tokens.labels( + app_id=app_id, + tenant_id=tenant_id, + username=username, + ).inc(self._message.message_tokens + self._message.answer_tokens) + def _process_stream_response( self, publisher: AppGeneratorTTSPublisher, trace_manager: Optional[TraceQueueManager] = None ) -> Generator[StreamResponse, None, None]: @@ -265,6 +314,7 @@ def _process_stream_response( if isinstance(event, QueueErrorEvent): err = self._handle_error(event, self._message) + self._chat_time_it(is_success=False) yield self._error_to_stream_response(err) break elif isinstance(event, QueueStopEvent | QueueMessageEndEvent): @@ -283,6 +333,7 @@ def _process_stream_response( # Save message self._save_message(trace_manager) + self._chat_time_it(is_success=True) yield self._message_end_to_stream_response() elif isinstance(event, QueueRetrieverResourcesEvent): diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py index e2fa12b1cddd70..666c4c2cc870fc 100644 --- a/api/core/app/task_pipeline/workflow_cycle_manage.py +++ b/api/core/app/task_pipeline/workflow_cycle_manage.py @@ -35,6 +35,14 @@ WorkflowStartStreamResponse, WorkflowTaskState, ) +from core.app.task_pipeline import ( + app_input_tokens, + app_output_tokens, + app_request, + app_request_failed, + app_request_latency, + app_total_tokens, +) from core.file import FILE_MODEL_IDENTITY, File from core.model_runtime.utils.encoders import jsonable_encoder from core.ops.entities.trace_entity import TraceTaskName @@ -42,6 +50,7 @@ from core.tools.tool_manager import ToolManager from core.workflow.entities.node_entities import NodeRunMetadataKey from core.workflow.enums import SystemVariableKey +from core.workflow.graph_engine import GraphRuntimeState from core.workflow.nodes import NodeType from core.workflow.nodes.tool.entities import ToolNodeData from core.workflow.workflow_entry import WorkflowEntry @@ -119,6 +128,33 @@ def _handle_workflow_run_start(self) -> WorkflowRun: return workflow_run + def _workflow_time_it( + self, is_success: bool, graph_runtime_state: GraphRuntimeState, workflow_run: WorkflowRun + ) -> None: + """ + Record advanced-chat / workflow run metrics. + """ + app_id = workflow_run.app_id + tenant_id = workflow_run.tenant_id + username = self._user.name + app_request.labels(app_id=app_id, tenant_id=tenant_id, username=username).inc() + + if not is_success: + app_request_failed.labels(app_id=app_id, tenant_id=tenant_id, username=username).inc() + return + app_request_latency.labels(app_id=app_id, tenant_id=tenant_id, username=username).observe( + workflow_run.elapsed_time + ) + app_input_tokens.labels(app_id=app_id, tenant_id=tenant_id, username=username).inc( + graph_runtime_state.llm_usage.prompt_tokens + ) + app_output_tokens.labels(app_id=app_id, tenant_id=tenant_id, username=username).inc( + graph_runtime_state.llm_usage.completion_tokens + ) + app_total_tokens.labels(app_id=app_id, tenant_id=tenant_id, username=username).inc( + graph_runtime_state.llm_usage.total_tokens + ) + def _handle_workflow_run_success( self, workflow_run: WorkflowRun, diff --git a/api/core/model_manager.py b/api/core/model_manager.py index 1986688551b601..997cedf64f2145 100644 --- a/api/core/model_manager.py +++ b/api/core/model_manager.py @@ -2,6 +2,8 @@ from collections.abc import Callable, Generator, Iterable, Sequence from typing import IO, Any, Optional, Union, cast +from prometheus_client import Counter, Histogram + from configs import dify_config from core.entities.embedding_type import EmbeddingInputType from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle @@ -26,6 +28,25 @@ logger = logging.getLogger(__name__) +model_request_total_counter = Counter( + name="model_request_total_counter", + documentation="The total count of model requests", + labelnames=["model_type", "provider", "model", "method"], +) +model_request_failed_counter = Counter( + name="model_request_failed_counter", + documentation="The failed count of model requests", + labelnames=["model_type", "provider", "model", "method"], +) +model_request_latency = Histogram( + name="model_request_latency", + documentation="The latency of model requests. For the LLM model, it just indicate " + "the TTFT (a.k.a. Time To First Token).", + unit="seconds", + labelnames=["model_type", "provider", "model", "method"], + buckets=dify_config.HISTOGRAM_BUCKETS_1MIN, +) + class ModelInstance: """ @@ -298,6 +319,30 @@ def invoke_tts(self, content_text: str, tenant_id: str, voice: str, user: Option voice=voice, ) + def _invoke_with_timeit(self, function: Callable[..., Any], *args, **kwargs): + with model_request_latency.labels( + model_type=self.model_type_instance.model_type.value, + provider=self.provider, + model=self.model, + method=function.__name__ if hasattr(function, "__name__") else "unknown", + ).time(): + model_request_total_counter.labels( + model_type=self.model_type_instance.model_type.value, + provider=self.provider, + model=self.model, + method=function.__name__ if hasattr(function, "__name__") else "unknown", + ).inc() + try: + return function(*args, **kwargs) + except Exception as e: + model_request_failed_counter.labels( + model_type=self.model_type_instance.model_type.value, + provider=self.provider, + model=self.model, + method=function.__name__ if hasattr(function, "__name__") else "unknown", + ).inc() + raise e + def _round_robin_invoke(self, function: Callable[..., Any], *args, **kwargs): """ Round-robin invoke @@ -307,7 +352,7 @@ def _round_robin_invoke(self, function: Callable[..., Any], *args, **kwargs): :return: """ if not self.load_balancing_manager: - return function(*args, **kwargs) + return self._invoke_with_timeit(function, *args, **kwargs) last_exception = None while True: @@ -321,7 +366,7 @@ def _round_robin_invoke(self, function: Callable[..., Any], *args, **kwargs): try: if "credentials" in kwargs: del kwargs["credentials"] - return function(*args, **kwargs, credentials=lb_config.credentials) + return self._invoke_with_timeit(function, *args, **kwargs, credentials=lb_config.credentials) except InvokeRateLimitError as e: # expire in 60 seconds self.load_balancing_manager.cooldown(lb_config, expire=60) diff --git a/api/core/model_runtime/callbacks/metrics_callback.py b/api/core/model_runtime/callbacks/metrics_callback.py new file mode 100644 index 00000000000000..4c9a3716bcdecb --- /dev/null +++ b/api/core/model_runtime/callbacks/metrics_callback.py @@ -0,0 +1,118 @@ +from typing import Optional + +from prometheus_client import Counter, Histogram + +from configs import dify_config +from core.model_runtime.callbacks.base_callback import Callback +from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk +from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool +from core.model_runtime.model_providers.__base.ai_model import AIModel + +llm_model_request_total_counter = Counter( + name="llm_model_request_total_counter", + documentation="The total count of LLM model requests", + labelnames=["model_type", "model"], +) +llm_model_request_failed_counter = Counter( + name="llm_model_request_failed_counter", + documentation="The failed count of LLM model requests", + labelnames=["model_type", "model"], +) +llm_model_request_first_chunk_latency = Histogram( + name="llm_model_request_first_chunk_latency", + documentation="The first chunk latency of LLM model requests", + unit="seconds", + labelnames=["model_type", "model"], + buckets=dify_config.HISTOGRAM_BUCKETS_1MIN, +) +llm_model_request_following_chunk_latency = Histogram( + name="llm_model_request_following_chunk_latency", + documentation="The following chunk latency of LLM model requests", + unit="seconds", + labelnames=["model_type", "model"], + buckets=Histogram.DEFAULT_BUCKETS, +) +llm_model_request_entire_latency = Histogram( + name="llm_model_request_entire_latency", + documentation="The entire latency of LLM model requests", + unit="seconds", + labelnames=["model_type", "model"], + buckets=dify_config.HISTOGRAM_BUCKETS_5MIN, +) + + +class MetricsCallback(Callback): + first_chunk: bool = True + + def on_before_invoke( + self, + llm_instance: AIModel, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> None: + llm_model_request_total_counter.labels(model_type=llm_instance.model_type.value, model=model).inc() + + def on_new_chunk( + self, + llm_instance: AIModel, + chunk: LLMResultChunk, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ): + # Skip the last one. The last one indicate the entire usage. + if chunk.delta.finish_reason is not None: + return + + if self.first_chunk: + llm_model_request_first_chunk_latency.labels(model_type=llm_instance.model_type.value, model=model).observe( + chunk.delta.usage.latency + ) + self.first_chunk = False + else: + llm_model_request_following_chunk_latency.labels( + model_type=llm_instance.model_type.value, model=model + ).observe(chunk.delta.usage.latency) + + def on_after_invoke( + self, + llm_instance: AIModel, + result: LLMResult, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> None: + llm_model_request_entire_latency.labels(model_type=llm_instance.model_type.value, model=model).observe( + result.usage.latency + ) + + def on_invoke_error( + self, + llm_instance: AIModel, + ex: Exception, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> None: + llm_model_request_failed_counter.labels(model_type=llm_instance.model_type.value, model=model).inc() diff --git a/api/core/model_runtime/model_providers/__base/ai_model.py b/api/core/model_runtime/model_providers/__base/ai_model.py index 79a1d28ebe637e..78f01be668dbc8 100644 --- a/api/core/model_runtime/model_providers/__base/ai_model.py +++ b/api/core/model_runtime/model_providers/__base/ai_model.py @@ -31,6 +31,7 @@ class AIModel(ABC): model_type: ModelType model_schemas: Optional[list[AIModelEntity]] = None started_at: float = 0 + last_chunked_at: float = 0 # pydantic configs model_config = ConfigDict(protected_namespaces=()) diff --git a/api/core/model_runtime/model_providers/__base/large_language_model.py b/api/core/model_runtime/model_providers/__base/large_language_model.py index 8faeffa872b40f..e30ffa1d89344c 100644 --- a/api/core/model_runtime/model_providers/__base/large_language_model.py +++ b/api/core/model_runtime/model_providers/__base/large_language_model.py @@ -10,6 +10,7 @@ from configs import dify_config from core.model_runtime.callbacks.base_callback import Callback from core.model_runtime.callbacks.logging_callback import LoggingCallback +from core.model_runtime.callbacks.metrics_callback import MetricsCallback from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from core.model_runtime.entities.message_entities import ( AssistantPromptMessage, @@ -74,8 +75,10 @@ def invoke( model_parameters = self._validate_and_filter_model_parameters(model, model_parameters, credentials) self.started_at = time.perf_counter() + self.last_chunked_at = self.started_at callbacks = callbacks or [] + callbacks.append(MetricsCallback()) if dify_config.DEBUG: callbacks.append(LoggingCallback()) @@ -429,6 +432,14 @@ def _invoke_result_generator( for chunk in result: yield chunk + if chunk.delta.usage: + usage = chunk.delta.usage + else: + chunk.delta.usage = LLMUsage.empty_usage() + now = time.perf_counter() + chunk.delta.usage.latency = now - self.last_chunked_at + self.last_chunked_at = now + self._trigger_new_chunk_callbacks( chunk=chunk, model=model, @@ -444,8 +455,6 @@ def _invoke_result_generator( prompt_message.content += chunk.delta.message.content real_model = chunk.model - if chunk.delta.usage: - usage = chunk.delta.usage if chunk.system_fingerprint: system_fingerprint = chunk.system_fingerprint diff --git a/api/core/tools/tool/tool.py b/api/core/tools/tool/tool.py index 8d4045038171a6..25bbd84f47b8ba 100644 --- a/api/core/tools/tool/tool.py +++ b/api/core/tools/tool/tool.py @@ -4,9 +4,11 @@ from enum import Enum, StrEnum from typing import TYPE_CHECKING, Any, Optional, Union +from prometheus_client import Counter, Histogram from pydantic import BaseModel, ConfigDict, field_validator from pydantic_core.core_schema import ValidationInfo +from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom from core.tools.entities.tool_entities import ( ToolDescription, @@ -24,6 +26,22 @@ if TYPE_CHECKING: from core.file.models import File +tool_request_total_counter = Counter( + name="tool_request_total_counter", documentation="The total count of tool requests", labelnames=["provider", "tool"] +) +tool_request_failed_counter = Counter( + name="tool_request_failed_counter", + documentation="The failed count of tool requests", + labelnames=["provider", "tool"], +) +tool_request_latency = Histogram( + name="tool_request_latency", + documentation="The latency of tool requests", + unit="seconds", + labelnames=["provider", "tool"], + buckets=dify_config.HISTOGRAM_BUCKETS_5MIN, +) + class Tool(BaseModel, ABC): identity: Optional[ToolIdentity] = None @@ -206,10 +224,17 @@ def invoke(self, user_id: str, tool_parameters: Mapping[str, Any]) -> list[ToolI # try parse tool parameters into the correct type tool_parameters = self._transform_tool_parameters_type(tool_parameters) - result = self._invoke( - user_id=user_id, - tool_parameters=tool_parameters, - ) + result = [] + with tool_request_latency.labels(provider=self.identity.provider, tool=self.identity.name).time(): + tool_request_total_counter.labels(provider=self.identity.provider, tool=self.identity.name).inc() + try: + result = self._invoke( + user_id=user_id, + tool_parameters=tool_parameters, + ) + except Exception as e: + tool_request_failed_counter.labels(provider=self.identity.provider, tool=self.identity.name).inc() + raise e if not isinstance(result, list): result = [result] diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 881263171fa145..07f96a952e051e 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -26,6 +26,7 @@ if [[ "${MODE}" == "worker" ]]; then elif [[ "${MODE}" == "beat" ]]; then exec celery -A app.celery beat --loglevel ${LOG_LEVEL} else + mkdir "${PROMETHEUS_MULTIPROC_DIR}" if [[ "${DEBUG}" == "true" ]]; then exec flask run --host=${DIFY_BIND_ADDRESS:-0.0.0.0} --port=${DIFY_PORT:-5001} --debug else diff --git a/api/extensions/ext_app_metrics.py b/api/extensions/ext_app_metrics.py index de1cdfeb984e86..c865cc4a088f14 100644 --- a/api/extensions/ext_app_metrics.py +++ b/api/extensions/ext_app_metrics.py @@ -3,9 +3,58 @@ import threading from flask import Response +from prometheus_client import ( + CollectorRegistry, + Counter, + Gauge, + make_wsgi_app, + multiprocess, +) +from sqlalchemy import text +from werkzeug.middleware.dispatcher import DispatcherMiddleware from configs import dify_config from dify_app import DifyApp +from extensions.ext_database import db +from extensions.ext_redis import redis_client +from extensions.ext_storage import storage + +health_check_total_counter = Counter(name="health_check_total_counter", documentation="The count of health check") +redis_checked_counter = Counter( + name="redis_checked_counter", documentation="The count of Redis has been checked as health" +) +db_checked_counter = Counter(name="db_checked_counter", documentation="The count of DB has been checked as health") +storage_checked_counter = Counter( + name="storage_checked_counter", documentation="The count of storage has been checked as health" +) +redis_used_memory_bytes = Gauge( + name="redis_used_memory_bytes", documentation="The used bytes of memory in Redis", multiprocess_mode="livesum" +) +redis_total_memory_bytes = Gauge( + name="redis_total_memory_bytes", documentation="The total bytes of memory in Redis", multiprocess_mode="livesum" +) + +db_pool_total_size = Gauge( + name="db_pool_total_size", + documentation="The total size of db pool", + multiprocess_mode="livesum", +) +db_pool_checkout_size = Gauge( + name="db_pool_checkout_size", documentation="The checkout size of db pool", multiprocess_mode="livesum" +) +db_pool_overflow_size = Gauge( + name="db_pool_overflow_size", documentation="The overflow size of db pool", multiprocess_mode="livesum" +) + + +# Using multiprocess collector for registry +def _make_metrics_app(): + if os.getenv("PROMETHEUS_MULTIPROC_DIR", "") != "": + registry = CollectorRegistry() + multiprocess.MultiProcessCollector(registry) + return make_wsgi_app(registry=registry) + else: + return make_wsgi_app() def init_app(app: DifyApp): @@ -18,11 +67,32 @@ def after_request(response): @app.route("/health") def health(): - return Response( - json.dumps({"pid": os.getpid(), "status": "ok", "version": dify_config.CURRENT_VERSION}), - status=200, - content_type="application/json", - ) + try: + health_check_key = "dify.health_check" + redis_client.set(health_check_key, 1) + redis_client.get(health_check_key) + redis_checked_counter.inc() + + info = redis_client.info() + redis_used_memory_bytes.set(info["used_memory"]) + redis_total_memory_bytes.set(info["maxmemory"] if info["maxmemory"] != 0 else info["total_system_memory"]) + + db.session.execute(text("SELECT 1")) + db_checked_counter.inc() + + storage.save(health_check_key, b"test") + storage.load(health_check_key) + storage_checked_counter.inc() + + return Response( + json.dumps({"pid": os.getpid(), "status": "ok", "version": dify_config.CURRENT_VERSION}), + status=200, + content_type="application/json", + ) + finally: + # 最后才增加计数,保证在健康时,也不会出现 health_check_total_counter > *_checked_counter 的情况, + # 保证 max(*_checked_counter / health_check_total_counter, 1) == 1,以便于监控系统判断是否有异常 + health_check_total_counter.inc() @app.route("/threads") def threads(): @@ -54,7 +124,7 @@ def pool_stat(): from extensions.ext_database import db engine = db.engine - return { + stat = { "pid": os.getpid(), "pool_size": engine.pool.size(), "checked_in_connections": engine.pool.checkedin(), @@ -63,3 +133,9 @@ def pool_stat(): "connection_timeout": engine.pool.timeout(), "recycle_time": db.engine.pool._recycle, } + db_pool_total_size.set(stat["pool_size"]) + db_pool_checkout_size.set(stat["checked_out_connections"]) + db_pool_overflow_size.set(stat["overflow_connections"]) + return stat + + app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {"/metrics": _make_metrics_app()}) diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 42422263c4dd03..be9f423eec93b8 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -1,8 +1,10 @@ import logging from collections.abc import Callable, Generator +from functools import wraps from typing import Union from flask import Flask +from prometheus_client import Counter, Histogram from configs import dify_config from dify_app import DifyApp @@ -11,6 +13,39 @@ logger = logging.getLogger(__name__) +storage_request_latency = Histogram( + name="storage_request_latency", + documentation="The latency of storage requests", + unit="seconds", + labelnames=["method", "provider"], +) + +storage_request_total_counter = Counter( + name="storage_request_total_counter", + documentation="The total count of storage requests", + labelnames=["method", "provider"], +) + +storage_request_failed_counter = Counter( + name="storage_request_failed_counter", + documentation="The failed count of storage requests", + labelnames=["method", "provider"], +) + + +def timeit(func): + @wraps(func) + def decorator(*args, **kwargs): + with storage_request_latency.labels(method=func.__name__, provider=dify_config.STORAGE_TYPE).time(): + storage_request_total_counter.labels(method=func.__name__, provider=dify_config.STORAGE_TYPE).inc() + try: + return func(*args, **kwargs) + except Exception as e: + storage_request_failed_counter.labels(method=func.__name__, provider=dify_config.STORAGE_TYPE).inc() + raise e + + return decorator + class Storage: def init_app(self, app: Flask): @@ -72,6 +107,7 @@ def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]: case _: raise ValueError(f"unsupported storage type {storage_type}") + @timeit def save(self, filename, data): try: self.storage_runner.save(filename, data) @@ -79,6 +115,7 @@ def save(self, filename, data): logger.exception(f"Failed to save file {filename}") raise e + @timeit def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]: try: if stream: @@ -89,6 +126,7 @@ def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Genera logger.exception(f"Failed to load file {filename}") raise e + @timeit def load_once(self, filename: str) -> bytes: try: return self.storage_runner.load_once(filename) @@ -96,6 +134,7 @@ def load_once(self, filename: str) -> bytes: logger.exception(f"Failed to load_once file {filename}") raise e + @timeit def load_stream(self, filename: str) -> Generator: try: return self.storage_runner.load_stream(filename) @@ -103,6 +142,7 @@ def load_stream(self, filename: str) -> Generator: logger.exception(f"Failed to load_stream file {filename}") raise e + @timeit def download(self, filename, target_filepath): try: self.storage_runner.download(filename, target_filepath) @@ -110,6 +150,7 @@ def download(self, filename, target_filepath): logger.exception(f"Failed to download file {filename}") raise e + @timeit def exists(self, filename): try: return self.storage_runner.exists(filename) @@ -117,6 +158,7 @@ def exists(self, filename): logger.exception(f"Failed to check file exists {filename}") raise e + @timeit def delete(self, filename): try: return self.storage_runner.delete(filename) diff --git a/api/poetry.lock b/api/poetry.lock index 35fda9b36fa42a..a89b7f8c5da761 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -1993,7 +1993,6 @@ files = [ {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb"}, {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b"}, {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543"}, - {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385"}, {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e"}, {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e"}, {file = "cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053"}, @@ -2004,7 +2003,6 @@ files = [ {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289"}, {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7"}, {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c"}, - {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba"}, {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64"}, {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285"}, {file = "cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417"}, @@ -6852,6 +6850,20 @@ files = [ [package.extras] dev = ["certifi", "pytest (>=8.1.1)"] +[[package]] +name = "prometheus-client" +version = "0.21.1" +description = "Python client for the Prometheus monitoring system." +optional = false +python-versions = ">=3.8" +files = [ + {file = "prometheus_client-0.21.1-py3-none-any.whl", hash = "sha256:594b45c410d6f4f8888940fe80b5cc2521b305a1fafe1c58609ef715a001f301"}, + {file = "prometheus_client-0.21.1.tar.gz", hash = "sha256:252505a722ac04b0456be05c05f75f45d760c2911ffc45f2a06bcaed9f3ae3fb"}, +] + +[package.extras] +twisted = ["twisted"] + [[package]] name = "prompt-toolkit" version = "3.0.48" @@ -11095,4 +11107,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "14476bf95504a4df4b8d5a5c6608c6aa3dae7499d27d1e41ef39d761cc7c693d" +content-hash = "740e20c643f9bc492cfc5cb8760d4100fdd90af84644027995e7778da416b3bd" diff --git a/api/pyproject.toml b/api/pyproject.toml index da9eabecf55ccf..e6ae9e27203dbb 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -60,6 +60,7 @@ oci = "~2.135.1" openai = "~1.52.0" openpyxl = "~3.1.5" pandas = { version = "~2.2.2", extras = ["performance", "excel"] } +prometheus-client = ">=0.5.0,<1.0.0" psycopg2-binary = "~2.9.6" pycryptodome = "3.19.1" pydantic = "~2.9.2" diff --git a/docker/.env.example b/docker/.env.example index 43e67a8db41254..27210cbc7f6666 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -923,3 +923,4 @@ CREATE_TIDB_SERVICE_JOB_ENABLED=false # Maximum number of submitted thread count in a ThreadPool for parallel node execution MAX_SUBMIT_COUNT=100 +PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus_multiproc_dir diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 99bc14c7171e4b..27f70440128f5b 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -386,6 +386,7 @@ x-shared-env: &shared-api-worker-env CSP_WHITELIST: ${CSP_WHITELIST:-} CREATE_TIDB_SERVICE_JOB_ENABLED: ${CREATE_TIDB_SERVICE_JOB_ENABLED:-false} MAX_SUBMIT_COUNT: ${MAX_SUBMIT_COUNT:-100} + PROMETHEUS_MULTIPROC_DIR: ${PROMETHEUS_MULTIPROC_DIR:-/tmp/prometheus_multiproc_dir} services: # API service