Skip to content

Commit

Permalink
✨ Add custom metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
pajowu committed Nov 19, 2023
1 parent db9e262 commit 4d28293
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 102 deletions.
19 changes: 18 additions & 1 deletion backend/openapi-schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -626,10 +626,14 @@ components:
- name
title: WorkerWithId
type: object
securitySchemes:
HTTPBasic:
scheme: basic
type: http
info:
title: FastAPI
version: 0.1.0
openapi: 3.0.2
openapi: 3.1.0
paths:
/:
get:
Expand Down Expand Up @@ -1579,3 +1583,16 @@ paths:
$ref: '#/components/schemas/HTTPValidationError'
description: Validation Error
summary: Serve Media
/metrics:
get:
description: Endpoint that serves Prometheus metrics.
operationId: metrics_metrics_get
responses:
'200':
content:
application/json:
schema: {}
description: Successful Response
security:
- HTTPBasic: []
summary: Metrics
109 changes: 15 additions & 94 deletions backend/pdm.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"transcribee-proto @ file:///${PROJECT_ROOT}/../proto",
"python-frontmatter>=1.0.0",
"psycopg2>=2.9.9",
"aioprometheus[starlette] @ git+https://github.com/bugbakery/aioprometheus.git@2eaf503426eb0e71c8d4d51924a45a10f2937bec",
"prometheus-fastapi-instrumentator>=6.1.0",
]
requires-python = ">=3.10"
readme = "./README.md"
Expand Down
5 changes: 5 additions & 0 deletions backend/transcribee_backend/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ def validate_worker_authorization(session: Session, authorization: str) -> Worke
worker = session.exec(statement).one_or_none()
if worker is None:
raise HTTPException(status_code=401)

worker.last_seen = now_tz_aware()
session.add(worker)
session.commit()

return worker


Expand Down
3 changes: 3 additions & 0 deletions backend/transcribee_backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class Settings(BaseSettings):
model_config_path: Path = Path("data/models.json")
pages_dir: Path = Path("data/pages/")

metrics_username = "transcribee"
metrics_password = "transcribee"


class ModelConfig(BaseModel):
id: str
Expand Down
12 changes: 6 additions & 6 deletions backend/transcribee_backend/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import asyncio

from aioprometheus.asgi.middleware import MetricsMiddleware
from aioprometheus.asgi.starlette import metrics
from fastapi import FastAPI
from fastapi import Depends, FastAPI
from fastapi.middleware.cors import CORSMiddleware
from prometheus_fastapi_instrumentator import Instrumentator

from transcribee_backend.config import settings
from transcribee_backend.helpers.periodic_tasks import run_periodic
from transcribee_backend.helpers.tasks import remove_expired_tokens, timeout_attempts
from transcribee_backend.metrics import init_metrics, metrics_auth, refresh_metrics
from transcribee_backend.routers.config import config_router
from transcribee_backend.routers.document import document_router
from transcribee_backend.routers.page import page_router
Expand All @@ -18,9 +18,8 @@
from .media_storage import serve_media

app = FastAPI()
app.add_middleware(MetricsMiddleware)
app.add_route("/metrics", metrics)

Instrumentator().instrument(app).expose(app, dependencies=[Depends(metrics_auth)])
init_metrics()

origins = ["*"]

Expand Down Expand Up @@ -55,3 +54,4 @@ async def setup_periodic_tasks():
run_periodic(timeout_attempts, seconds=min(30, settings.worker_timeout))
)
asyncio.create_task(run_periodic(remove_expired_tokens, seconds=60 * 60)) # 1 hour
asyncio.create_task(run_periodic(refresh_metrics, seconds=1))
128 changes: 128 additions & 0 deletions backend/transcribee_backend/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import datetime
import secrets
from abc import abstractmethod
from typing import List

from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from prometheus_client import Gauge
from sqlmodel import Session, col, func

from transcribee_backend.config import settings
from transcribee_backend.db import SessionContextManager
from transcribee_backend.helpers.time import now_tz_aware
from transcribee_backend.models.document import Document
from transcribee_backend.models.task import Task, TaskAttempt, TaskState
from transcribee_backend.models.user import User
from transcribee_backend.models.worker import Worker


class Metric:
@abstractmethod
def refresh(self, session: Session):
pass


class TasksInState(Metric):
def __init__(self):
self.collector = Gauge("tasks", "Number of tasks", ["state"])

def refresh(self, session: Session):
result = session.query(Task.state, func.count()).group_by(Task.state).all()
counts = {x: 0 for x in TaskState}
for state, count in result:
counts[state] = count
for state, count in counts.items():
self.collector.labels(state=state.value).set(count)


class Workers(Metric):
def __init__(self):
self.collector = Gauge("workers", "Workers", ["group"])

def refresh(self, session: Session):
(result,) = session.query(func.count(Worker.id)).one()
self.collector.labels(group="all").set(result)

now = now_tz_aware()
worker_timeout_ago = now - datetime.timedelta(seconds=settings.worker_timeout)
(result,) = (
session.query(func.count(Worker.id))
.where(
col(Worker.last_seen) >= worker_timeout_ago,
)
.one()
)
self.collector.labels(group="alive").set(result)


class Users(Metric):
def __init__(self):
self.collector = Gauge("users", "Registered users")

def refresh(self, session: Session):
(result,) = session.query(func.count(User.id)).one()
self.collector.set(result)


class Documents(Metric):
def __init__(self):
self.collector = Gauge("documents", "Documents")

def refresh(self, session: Session):
(result,) = session.query(func.count(Document.id)).one()
self.collector.set(result)


class Queue(Metric):
def __init__(self):
self.collector = Gauge("queue", "Queue length in seconds")

def refresh(self, session: Session):
(result,) = (
session.query(
func.coalesce(
func.sum(
Document.duration * (1 - func.coalesce(TaskAttempt.progress, 0))
),
0,
),
)
.join(Task, Task.document_id == Document.id)
.join(TaskAttempt, Task.current_attempt_id == TaskAttempt.id, isouter=True)
.where(col(Task.state).in_(["NEW", "ASSIGNED"]))
).one()
self.collector.set(result)


METRIC_CLASSES: List[type[Metric]] = [TasksInState, Workers, Users, Documents, Queue]
METRICS: List[Metric] = []


def refresh_metrics():
with SessionContextManager() as session:
for metric in METRICS:
metric.refresh(session)


def init_metrics():
for klass in METRIC_CLASSES:
METRICS.append(klass())


security = HTTPBasic()


def metrics_auth(credentials: HTTPBasicCredentials = Depends(security)):
is_correct_username = secrets.compare_digest(
credentials.username.encode("utf8"), settings.metrics_username.encode("utf8")
)
is_correct_password = secrets.compare_digest(
credentials.password.encode("utf8"), settings.metrics_password.encode("utf8")
)
if not (is_correct_username and is_correct_password):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
headers={"WWW-Authenticate": "Basic"},
)
return credentials.username
21 changes: 21 additions & 0 deletions frontend/src/openapi-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ export interface paths {
/** Serve Media */
get: operations["serve_media_media__file__get"];
};
"/metrics": {
/**
* Metrics
* @description Endpoint that serves Prometheus metrics.
*/
get: operations["metrics_metrics_get"];
};
}

export type webhooks = Record<string, never>;
Expand Down Expand Up @@ -1304,4 +1311,18 @@ export interface operations {
};
};
};
/**
* Metrics
* @description Endpoint that serves Prometheus metrics.
*/
metrics_metrics_get: {
responses: {
/** @description Successful Response */
200: {
content: {
"application/json": unknown;
};
};
};
};
}

0 comments on commit 4d28293

Please sign in to comment.