From a554f6e2d7d3eefc5d5c049ecfda3590195d5fb0 Mon Sep 17 00:00:00 2001 From: pajowu Date: Sat, 18 Nov 2023 22:35:08 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Increase=20performance=20o?= =?UTF-8?q?f=20list=5Fdocuments=20by=20eager=20loading?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/transcribee_backend/models/task.py | 35 +++++++++++++++---- .../transcribee_backend/routers/document.py | 7 ++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/backend/transcribee_backend/models/task.py b/backend/transcribee_backend/models/task.py index d5ff8972..e3a45eb1 100644 --- a/backend/transcribee_backend/models/task.py +++ b/backend/transcribee_backend/models/task.py @@ -95,6 +95,12 @@ class Task(TaskBase, table=True): "secondaryjoin": "Task.id==TaskDependency.dependant_on_id", }, ) + dependency_links: List[TaskDependency] = Relationship( + sa_relationship_kwargs={ + "primaryjoin": "Task.id==TaskDependency.dependent_task_id", + "viewonly": True, + }, + ) dependants: List["Task"] = Relationship( back_populates="dependencies", link_model=TaskDependency, @@ -155,12 +161,29 @@ class TaskResponse(TaskBase): @classmethod def from_orm(cls, task: Task, update={}) -> Self: - return super().from_orm( - task, - update={ - "dependencies": [x.id for x in task.dependencies], - **update, - }, + # The following code is equivalent to this: + # return super().from_orm( + # task, + # update={ + # "dependencies": [x.dependant_on_id for x in task.dependency_links], + # **update, + # }, + # ) + # But much faster, because from_orm destructures the `obj` to mix it + # with the `update` dict, which causes an access to all attributes, + # including `dependencies`/`dependents` which are then all seperately + # selected from the database, causing many query + # Even with a small number of document this cuts the loading time of + # the `/api/v1/documents/` endpoint roughly in half on my test machine + return cls( + id=task.id, + state=task.state, + dependencies=[x.dependant_on_id for x in task.dependency_links], + current_attempt=None, + document_id=task.document_id, + task_type=task.task_type, + task_parameters=task.task_parameters, + **update, ) diff --git a/backend/transcribee_backend/routers/document.py b/backend/transcribee_backend/routers/document.py index 649fc3af..6211ad55 100644 --- a/backend/transcribee_backend/routers/document.py +++ b/backend/transcribee_backend/routers/document.py @@ -22,6 +22,7 @@ from fastapi.exceptions import RequestValidationError from pydantic import BaseModel from pydantic.error_wrappers import ErrorWrapper +from sqlalchemy.orm import selectinload from sqlalchemy.sql.expression import desc from sqlmodel import Session, col, select from transcribee_proto.api import Document as ApiDocument @@ -409,6 +410,12 @@ def list_documents( select(Document) .where(Document.user == token.user) .order_by(desc(Document.changed_at), Document.id) + .options( + selectinload("tasks"), + selectinload("media_files"), + selectinload("media_files.tags"), + selectinload("tasks.dependency_links"), + ) ) results = session.exec(statement) return [doc.as_api_document() for doc in results]