From a554f6e2d7d3eefc5d5c049ecfda3590195d5fb0 Mon Sep 17 00:00:00 2001 From: pajowu Date: Sat, 18 Nov 2023 22:35:08 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Increase=20performance?= =?UTF-8?q?=20of=20list=5Fdocuments=20by=20eager=20loading?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/transcribee_backend/models/task.py | 35 +++++++++++++++---- .../transcribee_backend/routers/document.py | 7 ++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/backend/transcribee_backend/models/task.py b/backend/transcribee_backend/models/task.py index d5ff8972..e3a45eb1 100644 --- a/backend/transcribee_backend/models/task.py +++ b/backend/transcribee_backend/models/task.py @@ -95,6 +95,12 @@ class Task(TaskBase, table=True): "secondaryjoin": "Task.id==TaskDependency.dependant_on_id", }, ) + dependency_links: List[TaskDependency] = Relationship( + sa_relationship_kwargs={ + "primaryjoin": "Task.id==TaskDependency.dependent_task_id", + "viewonly": True, + }, + ) dependants: List["Task"] = Relationship( back_populates="dependencies", link_model=TaskDependency, @@ -155,12 +161,29 @@ class TaskResponse(TaskBase): @classmethod def from_orm(cls, task: Task, update={}) -> Self: - return super().from_orm( - task, - update={ - "dependencies": [x.id for x in task.dependencies], - **update, - }, + # The following code is equivalent to this: + # return super().from_orm( + # task, + # update={ + # "dependencies": [x.dependant_on_id for x in task.dependency_links], + # **update, + # }, + # ) + # But much faster, because from_orm destructures the `obj` to mix it + # with the `update` dict, which causes an access to all attributes, + # including `dependencies`/`dependents` which are then all seperately + # selected from the database, causing many query + # Even with a small number of document this cuts the loading time of + # the `/api/v1/documents/` endpoint roughly in half on my test machine + return cls( + id=task.id, + state=task.state, + dependencies=[x.dependant_on_id for x in task.dependency_links], + current_attempt=None, + document_id=task.document_id, + task_type=task.task_type, + task_parameters=task.task_parameters, + **update, ) diff --git a/backend/transcribee_backend/routers/document.py b/backend/transcribee_backend/routers/document.py index 649fc3af..6211ad55 100644 --- a/backend/transcribee_backend/routers/document.py +++ b/backend/transcribee_backend/routers/document.py @@ -22,6 +22,7 @@ from fastapi.exceptions import RequestValidationError from pydantic import BaseModel from pydantic.error_wrappers import ErrorWrapper +from sqlalchemy.orm import selectinload from sqlalchemy.sql.expression import desc from sqlmodel import Session, col, select from transcribee_proto.api import Document as ApiDocument @@ -409,6 +410,12 @@ def list_documents( select(Document) .where(Document.user == token.user) .order_by(desc(Document.changed_at), Document.id) + .options( + selectinload("tasks"), + selectinload("media_files"), + selectinload("media_files.tags"), + selectinload("tasks.dependency_links"), + ) ) results = session.exec(statement) return [doc.as_api_document() for doc in results] From 93353593bcc41318eece0645fad3d96ea4bc8c05 Mon Sep 17 00:00:00 2001 From: pajowu Date: Sun, 19 Nov 2023 23:58:53 +0100 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Reduce=20number=20of?= =?UTF-8?q?=20sql=20queries=20in=20task=20api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/openapi-schema.yml | 9 +++------ backend/transcribee_backend/auth.py | 5 ++++- .../transcribee_backend/routers/document.py | 6 +++++- backend/transcribee_backend/routers/task.py | 19 ++++++++++--------- backend/transcribee_backend/routers/user.py | 9 +++------ frontend/src/openapi-schema.ts | 6 +++--- 6 files changed, 28 insertions(+), 26 deletions(-) diff --git a/backend/openapi-schema.yml b/backend/openapi-schema.yml index d3456c8c..ba1d443b 100644 --- a/backend/openapi-schema.yml +++ b/backend/openapi-schema.yml @@ -1251,8 +1251,7 @@ paths: '200': content: application/json: - schema: - $ref: '#/components/schemas/AssignedTaskResponse' + schema: {} description: Successful Response '422': content: @@ -1289,8 +1288,7 @@ paths: '200': content: application/json: - schema: - $ref: '#/components/schemas/AssignedTaskResponse' + schema: {} description: Successful Response '422': content: @@ -1327,8 +1325,7 @@ paths: '200': content: application/json: - schema: - $ref: '#/components/schemas/AssignedTaskResponse' + schema: {} description: Successful Response '422': content: diff --git a/backend/transcribee_backend/auth.py b/backend/transcribee_backend/auth.py index d352acd3..10d86b4f 100644 --- a/backend/transcribee_backend/auth.py +++ b/backend/transcribee_backend/auth.py @@ -8,6 +8,7 @@ from typing import Optional, Tuple from fastapi import Depends, Header, HTTPException +from sqlalchemy.orm import joinedload from sqlmodel import Session, col, or_, select from transcribee_backend.db import get_session @@ -157,7 +158,9 @@ def get_authorized_task( session: Session = Depends(get_session), authorized_worker: Worker = Depends(get_authorized_worker), ): - statement = select(Task).where(Task.id == task_id) + statement = ( + select(Task).where(Task.id == task_id).options(joinedload(Task.current_attempt)) + ) task = session.exec(statement).one_or_none() if task is None: raise HTTPException(status_code=404) diff --git a/backend/transcribee_backend/routers/document.py b/backend/transcribee_backend/routers/document.py index 6211ad55..3cccd898 100644 --- a/backend/transcribee_backend/routers/document.py +++ b/backend/transcribee_backend/routers/document.py @@ -462,7 +462,11 @@ def get_document_tasks( auth: AuthInfo = Depends(get_doc_min_readonly_auth), session: Session = Depends(get_session), ) -> List[TaskResponse]: - statement = select(Task).where(Task.document_id == auth.document.id) + statement = ( + select(Task) + .where(Task.document_id == auth.document.id) + .options(selectinload(Task.dependency_links)) + ) return [TaskResponse.from_orm(x) for x in session.exec(statement)] diff --git a/backend/transcribee_backend/routers/task.py b/backend/transcribee_backend/routers/task.py index f07a62ef..5b7c8619 100644 --- a/backend/transcribee_backend/routers/task.py +++ b/backend/transcribee_backend/routers/task.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Body, Depends, Query from fastapi.exceptions import HTTPException -from sqlalchemy.orm import aliased +from sqlalchemy.orm import aliased, selectinload from sqlalchemy.sql.operators import is_ from sqlmodel import Session, col, select from transcribee_proto.api import KeepaliveBody @@ -104,7 +104,7 @@ def keepalive( keepalive_data: KeepaliveBody = Body(), session: Session = Depends(get_session), task: Task = Depends(get_authorized_task), -) -> Optional[AssignedTaskResponse]: +): # mostly to please the type checker, get_authorized_task already ensures # that the task has a current attempt if task.current_attempt is None: @@ -115,7 +115,6 @@ def keepalive( session.add(task.current_attempt) session.add(task) session.commit() - return AssignedTaskResponse.from_orm(task) @task_router.post("/{task_id}/mark_completed/") @@ -124,11 +123,10 @@ def mark_completed( session: Session = Depends(get_session), task: Task = Depends(get_authorized_task), now: datetime.datetime = Depends(now_tz_aware), -) -> Optional[AssignedTaskResponse]: +): finish_current_attempt( session=session, task=task, now=now, extra_data=extra_data, successful=True ) - return AssignedTaskResponse.from_orm(task) @task_router.post("/{task_id}/mark_failed/") @@ -137,21 +135,24 @@ def mark_failed( session: Session = Depends(get_session), task: Task = Depends(get_authorized_task), now: datetime.datetime = Depends(now_tz_aware), -) -> Optional[AssignedTaskResponse]: +): now = now_tz_aware() finish_current_attempt( session=session, task=task, now=now, extra_data=extra_data, successful=False ) - return AssignedTaskResponse.from_orm(task) - @task_router.get("/") def list_tasks( session: Session = Depends(get_session), token: UserToken = Depends(get_user_token), ) -> List[TaskResponse]: - statement = select(Task).join(Document).where(Document.user == token.user) + statement = ( + select(Task) + .join(Document) + .where(Document.user == token.user) + .options(selectinload(Task.dependency_links)) + ) results = session.exec(statement) return [TaskResponse.from_orm(x) for x in results] diff --git a/backend/transcribee_backend/routers/user.py b/backend/transcribee_backend/routers/user.py index 75fe90df..4d89d140 100644 --- a/backend/transcribee_backend/routers/user.py +++ b/backend/transcribee_backend/routers/user.py @@ -1,5 +1,5 @@ from fastapi import APIRouter, Depends, HTTPException -from sqlmodel import Session, delete, select +from sqlmodel import Session, delete from transcribee_proto.api import LoginResponse from transcribee_backend.auth import ( @@ -12,7 +12,7 @@ ) from transcribee_backend.db import get_session from transcribee_backend.exceptions import UserAlreadyExists -from transcribee_backend.models import CreateUser, User, UserBase, UserToken +from transcribee_backend.models import CreateUser, UserBase, UserToken from transcribee_backend.models.user import ChangePasswordRequest user_router = APIRouter() @@ -57,11 +57,8 @@ def logout( @user_router.get("/me/") def read_user( token: UserToken = Depends(get_user_token), - session: Session = Depends(get_session), ) -> UserBase: - statement = select(User).where(User.id == token.user_id) - user = session.exec(statement).one() - return UserBase(username=user.username) + return UserBase(username=token.user.username) @user_router.post("/change_password/") diff --git a/frontend/src/openapi-schema.ts b/frontend/src/openapi-schema.ts index 71cdac75..c60662f9 100644 --- a/frontend/src/openapi-schema.ts +++ b/frontend/src/openapi-schema.ts @@ -1014,7 +1014,7 @@ export interface operations { /** @description Successful Response */ 200: { content: { - "application/json": components["schemas"]["AssignedTaskResponse"]; + "application/json": unknown; }; }; /** @description Validation Error */ @@ -1044,7 +1044,7 @@ export interface operations { /** @description Successful Response */ 200: { content: { - "application/json": components["schemas"]["AssignedTaskResponse"]; + "application/json": unknown; }; }; /** @description Validation Error */ @@ -1074,7 +1074,7 @@ export interface operations { /** @description Successful Response */ 200: { content: { - "application/json": components["schemas"]["AssignedTaskResponse"]; + "application/json": unknown; }; }; /** @description Validation Error */