From 6ebf644b7b414facb4eebb5ee75a12f84f659888 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 9 Jul 2024 12:02:30 -0400 Subject: [PATCH 01/15] work on schemas --- pepdbagent/db_utils.py | 59 +++++++++ pepdbagent/modules/schemas.py | 229 ++++++++++++++++++++++++++++++++++ tests/test_schema.py | 11 ++ 3 files changed, 299 insertions(+) create mode 100644 pepdbagent/modules/schemas.py create mode 100644 tests/test_schema.py diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index ea36bd8..cd7e59d 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -296,6 +296,65 @@ class HistorySamples(Base): ) +class Schemas(Base): + + __tablename__ = "schemas" + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE")) + name: Mapped[str] = mapped_column(nullable=False, index=True) + description: Mapped[Optional[str]] = mapped_column(nullable=True, index=True) + schema_json: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue()) + private: Mapped[bool] = mapped_column(default=False) + submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) + last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column( + default=deliver_update_date, onupdate=deliver_update_date + ) + + group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( + "SchemaGroupRelations", back_populates="schema_mapping" + ) + + __table_args__ = (UniqueConstraint("namespace", "name"),) + + +class SchemaGroups(Base): + + __tablename__ = "schema_groups" + + id: Mapped[int] = mapped_column(primary_key=True, index=True) + namespace: Mapped[str] = mapped_column( + ForeignKey("users.namespace", ondelete="CASCADE"), index=True + ) + name: Mapped[str] = mapped_column(nullable=False, index=True) + description: Mapped[Optional[str]] = mapped_column(nullable=True) + + schema_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( + "SchemaGroupRelations", back_populates="group_mapping" + ) + + __table_args__ = (UniqueConstraint("namespace", "name"),) + + +class SchemaGroupRelations(Base): + + __tablename__ = "schema_group_relations" + + schema_id: Mapped[int] = mapped_column( + ForeignKey("schemas.id", ondelete="CASCADE"), index=True, primary_key=True + ) + group_id: Mapped[int] = mapped_column( + ForeignKey("schema_groups.id", ondelete="CASCADE"), index=True, primary_key=True + ) + + schema_mapping: Mapped["Schemas"] = relationship( + "Schemas", back_populates="group_relation_mapping" + ) + group_mapping: Mapped["SchemaGroups"] = relationship( + "SchemaGroups", back_populates="schema_relation_mapping" + ) + + class BaseEngine: """ A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all diff --git a/pepdbagent/modules/schemas.py b/pepdbagent/modules/schemas.py new file mode 100644 index 0000000..41d87f5 --- /dev/null +++ b/pepdbagent/modules/schemas.py @@ -0,0 +1,229 @@ +import datetime +import json +import logging +from typing import Dict, List, NoReturn, Union + +import numpy as np +import peppy +from peppy.const import ( + CONFIG_KEY, + SAMPLE_NAME_ATTR, + SAMPLE_RAW_DICT_KEY, + SAMPLE_TABLE_INDEX_KEY, + SUBSAMPLE_RAW_LIST_KEY, +) +from sqlalchemy import Select, and_, delete, select +from sqlalchemy.exc import IntegrityError, NoResultFound +from sqlalchemy.orm import Session +from sqlalchemy.orm.attributes import flag_modified + +from pepdbagent.const import ( + DEFAULT_TAG, + DESCRIPTION_KEY, + MAX_HISTORY_SAMPLES_NUMBER, + NAME_KEY, + PEPHUB_SAMPLE_ID_KEY, + PKG_NAME, +) +from pepdbagent.db_utils import ( + BaseEngine, + HistoryProjects, + HistorySamples, + Projects, + Samples, + Subsamples, + UpdateTypes, + User, +) +from pepdbagent.exceptions import ( + HistoryNotFoundError, + PEPDatabaseAgentError, + ProjectDuplicatedSampleGUIDsError, + ProjectNotFoundError, + ProjectUniqueNameError, + SampleTableUpdateError, +) +from pepdbagent.models import ( + HistoryAnnotationModel, + HistoryChangeModel, + ProjectDict, + UpdateItems, + UpdateModel, +) +from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter + +_LOGGER = logging.getLogger(PKG_NAME) + + +class PEPDatabaseSchemas: + """ + Class that represents Schemas in Database. + + While using this class, user can create, retrieve, delete, and update schemas from database + """ + + def __init__(self, pep_db_engine: BaseEngine): + """ + :param pep_db_engine: pepdbengine object with sa engine + """ + self._sa_engine = pep_db_engine.engine + self._pep_db_engine = pep_db_engine + + def get(self, namespace: str, name: str) -> Dict: + """ + Get schema from the database. + + :param namespace: user namespace + :param name: schema name + + :return: schema dict + """ + ... + + def search(self, namespace: str = None, query: str = "") -> ...: + """ + Search schemas in the database. + + :param namespace: user namespace [Default: None]. If None, search in all namespaces + :param query: query string. [Default: ""]. If empty, return all schemas + + :return: list of schema dicts + """ + ... + + def create( + self, + namespace: str, + name: str, + schema: dict, + description: str = "", + # private: bool = False, # TODO: for simplicity was not implemented yet + overwrite: bool = False, + update_only: bool = False, + ) -> None: + """ + Create or update schema in the database. + + :param namespace: user namespace + :param name: schema name + :param schema: schema dict + :param description: schema description [Default: ""] + :param overwrite: overwrite schema if exists [Default: False] + :param update_only: update only schema if exists [Default: False] + """ + ... + + def update( + self, + namespace: str, + name: str, + schema: dict, + description: str = "", + # private: bool = False, # TODO: for simplicity was not implemented yet + ) -> None: + """ + Update schema in the database. + + :param namespace: user namespace + :param name: schema name + :param schema: schema dict + :param description: schema description [Default: ""] + + :return: None + """ + ... + + def delete(self, namespace: str, name: str) -> None: + """ + Delete schema from the database. + + :param namespace: user namespace + :param name: schema name + + :return: None + """ + ... + + def exist(self, namespace: str, name: str) -> bool: + """ + Check if schema exists in the database. + + :param namespace: user namespace + :param name: schema name + + :return: True if schema exists, False otherwise + """ + ... + + def group_create(self, namespace: str, name: str, description: str = "") -> None: + """ + Create schema group in the database. + + :param namespace: user namespace + :param name: schema group name + :param description: schema group description [Default: ""] + """ + ... + + def group_get(self, namespace: str, name: str) -> ...: + """ + Get schema group from the database. + + :param namespace: user namespace + :param name: schema group name + + :return: ... + """ + ... + + def group_search(self, namespace: str = None, query: str = "") -> ...: + """ + Search schema groups in the database. + + :param namespace: user namespace [Default: None]. If None, search in all namespaces + :param query: query string. [Default: ""]. If empty, return all schema groups + + :return: list of schema group dicts + """ + ... + + def group_delete(self, namespace: str, name: str) -> None: + """ + Delete schema group from the database. + + :param namespace: user namespace + :param name: schema group name + + :return: None + """ + ... + + def group_add_schema( + self, namespace: str, name: str, schema_namespace: str, schema_name: str + ) -> None: + """ + Add schema to the schema group. + + :param namespace: user namespace + :param name: schema group name + :param schema_namespace: schema namespace + :param schema_name: schema name + + :return: None + """ + ... + + def group_remove_schema( + self, namespace: str, name: str, schema_namespace: str, schema_name: str + ) -> None: + """ + Remove schema from the schema group. + + :param namespace: user namespace + :param name: schema group name + :param schema_namespace: schema namespace + :param schema_name: schema name + + :return: None + """ + ... diff --git a/tests/test_schema.py b/tests/test_schema.py new file mode 100644 index 0000000..fa9efd8 --- /dev/null +++ b/tests/test_schema.py @@ -0,0 +1,11 @@ +import pytest + +from .utils import PEPDBAgentContextManager + + +@pytest.mark.skipif( + not PEPDBAgentContextManager().db_setup(), + reason="DB is not setup", +) +class TestSamples: + ... \ No newline at end of file From afd334df44ad2efbf23d37450f47d42525a495d7 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 9 Jul 2024 14:25:14 -0400 Subject: [PATCH 02/15] work on schemas --- pepdbagent/db_utils.py | 4 ++++ pepdbagent/modules/{schemas.py => schema.py} | 11 +++++++++++ 2 files changed, 15 insertions(+) rename pepdbagent/modules/{schemas.py => schema.py} (95%) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index cd7e59d..df52815 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -87,6 +87,10 @@ class Projects(Base): default=deliver_update_date, # onupdate=deliver_update_date, # This field should not be updated, while we are adding project to favorites ) pep_schema: Mapped[Optional[str]] + + schema_id: Mapped[Optional[int]] = mapped_column(ForeignKey("schemas.id", ondelete="SET NULL"), nullable=True) + schema_mapping: Mapped["Schemas"] = relationship("Schemas", lazy="joined") + pop: Mapped[Optional[bool]] = mapped_column(default=False) samples_mapping: Mapped[List["Samples"]] = relationship( back_populates="project_mapping", cascade="all, delete-orphan" diff --git a/pepdbagent/modules/schemas.py b/pepdbagent/modules/schema.py similarity index 95% rename from pepdbagent/modules/schemas.py rename to pepdbagent/modules/schema.py index 41d87f5..3318355 100644 --- a/pepdbagent/modules/schemas.py +++ b/pepdbagent/modules/schema.py @@ -227,3 +227,14 @@ def group_remove_schema( :return: None """ ... + + def group_exist(self, namespace: str, name: str) -> bool: + """ + Check if schema group exists in the database. + + :param namespace: user namespace + :param name: schema group name + + :return: True if schema group exists, False otherwise + """ + ... \ No newline at end of file From a07f6bb07281e83d6cf2609a16e47714e2cf8270 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 9 Jul 2024 16:46:04 -0400 Subject: [PATCH 03/15] work on schemas 2 --- pepdbagent/db_utils.py | 4 +- pepdbagent/exceptions.py | 10 ++ pepdbagent/models.py | 23 +++++ pepdbagent/modules/schema.py | 179 +++++++++++++++++++++++++++++------ tests/test_schema.py | 3 +- 5 files changed, 188 insertions(+), 31 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index df52815..251a551 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -88,7 +88,9 @@ class Projects(Base): ) pep_schema: Mapped[Optional[str]] - schema_id: Mapped[Optional[int]] = mapped_column(ForeignKey("schemas.id", ondelete="SET NULL"), nullable=True) + schema_id: Mapped[Optional[int]] = mapped_column( + ForeignKey("schemas.id", ondelete="SET NULL"), nullable=True + ) schema_mapping: Mapped["Schemas"] = relationship("Schemas", lazy="joined") pop: Mapped[Optional[bool]] = mapped_column(default=False) diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index 64e5278..5512ed6 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -117,3 +117,13 @@ def __init__(self, msg=""): class UserNotFoundError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""User does not exist. {msg}""") + + +class SchemaDoesNotExistError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Schema does not exist. {msg}""") + + +class SchemaAlreadyExistsError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Schema already exists. {msg}""") diff --git a/pepdbagent/models.py b/pepdbagent/models.py index c138d18..8bbddc0 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -246,3 +246,26 @@ class HistoryAnnotationModel(BaseModel): name: str tag: str = DEFAULT_TAG history: List[HistoryChangeModel] + + +class SchemaAnnotation(BaseModel): + """ + Schema annotation model + """ + + namespace: str + name: str + last_update_date: Optional[str] + submission_date: Optional[str] + description: Optional[str] + + +class SchemaSearchResult(BaseModel): + """ + Schema search result model + """ + + count: int + limit: int + offset: int + results: List[SchemaAnnotation] diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 3318355..cc1c1d1 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -12,7 +12,7 @@ SAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_RAW_LIST_KEY, ) -from sqlalchemy import Select, and_, delete, select +from sqlalchemy import Select, and_, delete, select, or_ from sqlalchemy.exc import IntegrityError, NoResultFound from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified @@ -27,28 +27,17 @@ ) from pepdbagent.db_utils import ( BaseEngine, - HistoryProjects, - HistorySamples, - Projects, - Samples, - Subsamples, - UpdateTypes, - User, + Schemas, + SchemaGroups, + SchemaGroupRelations, ) from pepdbagent.exceptions import ( - HistoryNotFoundError, - PEPDatabaseAgentError, - ProjectDuplicatedSampleGUIDsError, - ProjectNotFoundError, - ProjectUniqueNameError, - SampleTableUpdateError, + SchemaAlreadyExistsError, + SchemaDoesNotExistError, ) from pepdbagent.models import ( - HistoryAnnotationModel, - HistoryChangeModel, - ProjectDict, - UpdateItems, - UpdateModel, + SchemaAnnotation, + SchemaSearchResult, ) from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter @@ -69,7 +58,7 @@ def __init__(self, pep_db_engine: BaseEngine): self._sa_engine = pep_db_engine.engine self._pep_db_engine = pep_db_engine - def get(self, namespace: str, name: str) -> Dict: + def get(self, namespace: str, name: str) -> dict: """ Get schema from the database. @@ -78,18 +67,99 @@ def get(self, namespace: str, name: str) -> Dict: :return: schema dict """ - ... - def search(self, namespace: str = None, query: str = "") -> ...: + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) + ) + + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + + return schema_obj.schema_json + + def info(self, namespace: str, name: str) -> SchemaAnnotation: + """ + Get schema information from the database. + + :param namespace: user namespace + :param name: schema name + + :return: SchemaAnnotation object: + - namespace: schema namespace + - name: schema name + - last_update_date: last update date + - submission_date: submission date + - description: schema description + """ + + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) + ) + + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + + return SchemaAnnotation( + namespace=schema_obj.namespace, + name=schema_obj.name, + last_update_date=schema_obj.last_update_date, + submission_date=schema_obj.submission_date, + description=schema_obj.description, + ) + + def search( + self, namespace: str = None, query: str = "", limit: int = 100, offset: int = 0 + ) -> SchemaSearchResult: """ Search schemas in the database. :param namespace: user namespace [Default: None]. If None, search in all namespaces :param query: query string. [Default: ""]. If empty, return all schemas + :param limit: limit number of schemas [Default: 100] + :param offset: offset number of schemas [Default: 0] :return: list of schema dicts """ - ... + + statement = select(Schemas) + + # TODO: add count to the result + if query: + sql_search_str = f"%{query}%" + search_query = or_( + Schemas.name.ilike(sql_search_str), + Schemas.description.ilike(sql_search_str), + ) + statement = statement.where(search_query) + if namespace: + statement = statement.where(Schemas.namespace == namespace) + + statement = statement.limit(limit).offset(offset) + + return_list = [] + + with Session(self._sa_engine) as session: + results = session.scalars(statement) + + for result in results: + return_list.append( + SchemaAnnotation( + namespace=result.namespace, + name=result.name, + last_update_date=result.last_update_date, + submission_date=result.submission_date, + description=result.description, + ) + ) + + return SchemaSearchResult( + count=0, + limit=limit, + offset=offset, + results=return_list, + ) def create( self, @@ -111,7 +181,32 @@ def create( :param overwrite: overwrite schema if exists [Default: False] :param update_only: update only schema if exists [Default: False] """ - ... + + if self.exist(namespace, name): + if overwrite: + self.update(namespace, name, schema, description) + return None + elif update_only: + self.update(namespace, name, schema, description) + return None + else: + raise SchemaAlreadyExistsError(f"Schema '{name}' already exists in the database") + + if update_only: + raise SchemaDoesNotExistError( + f"Schema '{name}' does not exist in the database" + f"Cannot update schema that does not exist" + ) + + with Session(self._sa_engine) as session: + schema_obj = Schemas( + namespace=namespace, + name=name, + schema_json=schema, + description=description, + ) + session.add(schema_obj) + session.commit() def update( self, @@ -131,7 +226,19 @@ def update( :return: None """ - ... + + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) + ) + + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + + schema_obj.schema_json = schema + schema_obj.description = description + + session.commit() def delete(self, namespace: str, name: str) -> None: """ @@ -142,7 +249,18 @@ def delete(self, namespace: str, name: str) -> None: :return: None """ - ... + + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) + ) + + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + + session.delete(schema_obj) + + session.commit() def exist(self, namespace: str, name: str) -> bool: """ @@ -153,7 +271,12 @@ def exist(self, namespace: str, name: str) -> bool: :return: True if schema exists, False otherwise """ - ... + + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) + ) + return True if schema_obj else False def group_create(self, namespace: str, name: str, description: str = "") -> None: """ @@ -237,4 +360,4 @@ def group_exist(self, namespace: str, name: str) -> bool: :return: True if schema group exists, False otherwise """ - ... \ No newline at end of file + ... diff --git a/tests/test_schema.py b/tests/test_schema.py index fa9efd8..bc86c34 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -7,5 +7,4 @@ not PEPDBAgentContextManager().db_setup(), reason="DB is not setup", ) -class TestSamples: - ... \ No newline at end of file +class TestSamples: ... From 6cbb8c9f270d12c3dc34fee2f34b3e5911c64514 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 10 Jul 2024 13:15:09 -0400 Subject: [PATCH 04/15] work on schemas --- pepdbagent/exceptions.py | 10 ++ pepdbagent/models.py | 22 ++++ pepdbagent/modules/schema.py | 227 ++++++++++++++++++++++++++++++----- 3 files changed, 232 insertions(+), 27 deletions(-) diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index 5512ed6..61d45fd 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -127,3 +127,13 @@ def __init__(self, msg=""): class SchemaAlreadyExistsError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""Schema already exists. {msg}""") + + +class SchemaGroupDoesNotExistError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Schema group does not exist. {msg}""") + + +class SchemaGroupAlreadyExistsError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Schema group already exists. {msg}""") diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 8bbddc0..c9ae67e 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -269,3 +269,25 @@ class SchemaSearchResult(BaseModel): limit: int offset: int results: List[SchemaAnnotation] + + +class SchemaGroupAnnotation(BaseModel): + """ + Schema group annotation model + """ + + namespace: str + name: str + description: Optional[str] + schemas: List[SchemaAnnotation] + + +class SchemaGroupSearchResult(BaseModel): + """ + Schema group search result model + """ + + count: int + limit: int + offset: int + results: List[SchemaGroupAnnotation] diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index cc1c1d1..5d75bfd 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -12,10 +12,10 @@ SAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_RAW_LIST_KEY, ) -from sqlalchemy import Select, and_, delete, select, or_ -from sqlalchemy.exc import IntegrityError, NoResultFound +from sqlalchemy import Select, and_, delete, select, or_, func from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified +from sqlalchemy.exc import IntegrityError, NoResultFound from pepdbagent.const import ( DEFAULT_TAG, @@ -34,10 +34,14 @@ from pepdbagent.exceptions import ( SchemaAlreadyExistsError, SchemaDoesNotExistError, + SchemaGroupAlreadyExistsError, + SchemaGroupDoesNotExistError, ) from pepdbagent.models import ( SchemaAnnotation, SchemaSearchResult, + SchemaGroupAnnotation, + SchemaGroupSearchResult, ) from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter @@ -110,13 +114,13 @@ def info(self, namespace: str, name: str) -> SchemaAnnotation: ) def search( - self, namespace: str = None, query: str = "", limit: int = 100, offset: int = 0 + self, namespace: str = None, search_str: str = "", limit: int = 100, offset: int = 0 ) -> SchemaSearchResult: """ Search schemas in the database. :param namespace: user namespace [Default: None]. If None, search in all namespaces - :param query: query string. [Default: ""]. If empty, return all schemas + :param search_str: query string. [Default: ""]. If empty, return all schemas :param limit: limit number of schemas [Default: 100] :param offset: offset number of schemas [Default: 0] @@ -124,18 +128,7 @@ def search( """ statement = select(Schemas) - - # TODO: add count to the result - if query: - sql_search_str = f"%{query}%" - search_query = or_( - Schemas.name.ilike(sql_search_str), - Schemas.description.ilike(sql_search_str), - ) - statement = statement.where(search_query) - if namespace: - statement = statement.where(Schemas.namespace == namespace) - + statement = self._add_condition(statement, namespace, search_str) statement = statement.limit(limit).offset(offset) return_list = [] @@ -155,12 +148,47 @@ def search( ) return SchemaSearchResult( - count=0, + count=self._count_search(namespace=namespace, search_str=search_str), limit=limit, offset=offset, results=return_list, ) + def _count_search(self, namespace: str = None, search_str: str = "") -> int: + """ + Count number of found schemas + + :param namespace: user namespace [Default: None]. If None, search in all namespaces + :param search_str: query string. [Default: ""]. If empty, return all schemas + + :return: list of schema dicts + """ + statement = select(func.count(Schemas.id)) + + statement = self._add_condition(statement, namespace, search_str) + + with Session(self._sa_engine) as session: + result = session.execute(statement).one() + + return result[0] + + @staticmethod + def _add_condition( + statement: Select, + namespace: str = None, + search_str: str = None, + ) -> Select: + if search_str: + sql_search_str = f"%{search_str}%" + search_query = or_( + Schemas.name.ilike(sql_search_str), + Schemas.description.ilike(sql_search_str), + ) + statement = statement.where(search_query) + if namespace: + statement = statement.where(Schemas.namespace == namespace) + return statement + def create( self, namespace: str, @@ -237,6 +265,7 @@ def update( schema_obj.schema_json = schema schema_obj.description = description + flag_modified(schema_obj, "schema_json") session.commit() @@ -285,30 +314,154 @@ def group_create(self, namespace: str, name: str, description: str = "") -> None :param namespace: user namespace :param name: schema group name :param description: schema group description [Default: ""] + + :return: None """ - ... + try: + with Session(self._sa_engine) as session: + session.add( + SchemaGroups( + namespace=namespace, + name=name, + description=description, + ) + ) + session.commit() - def group_get(self, namespace: str, name: str) -> ...: + except IntegrityError: + raise SchemaGroupAlreadyExistsError + + def group_get(self, namespace: str, name: str) -> SchemaGroupAnnotation: """ Get schema group from the database. :param namespace: user namespace :param name: schema group name - :return: ... + :return: SchemaGroupAnnotation object: + - namespace: schema group namespace + - name: schema group name + - description: schema group description + - schemas: list of SchemaAnnotation objects """ - ... - def group_search(self, namespace: str = None, query: str = "") -> ...: + with Session(self._sa_engine) as session: + schema_group_obj = session.scalar( + select(SchemaGroups).where( + and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + ) + ) + + if not schema_group_obj: + raise SchemaGroupDoesNotExistError( + f"Schema group '{name}' does not exist in the database" + ) + + schemas = [] + for schema_relation in schema_group_obj.schema_relation_mapping: + schema_annotation = schema_relation.schema_mapping + schemas.append( + SchemaAnnotation( + namespace=schema_annotation.namespace, + name=schema_annotation.name, + last_update_date=schema_annotation.last_update_date, + submission_date=schema_annotation.submission_date, + desciription=schema_annotation.description, + ) + ) + + return SchemaGroupAnnotation( + namespace=schema_group_obj.namespace, + name=schema_group_obj.name, + description=schema_group_obj.description, + schemas=schemas, + ) + + def group_search( + self, namespace: str = None, search_str: str = "", limit: int = 100, offset: int = 0 + ) -> SchemaGroupSearchResult: """ Search schema groups in the database. :param namespace: user namespace [Default: None]. If None, search in all namespaces - :param query: query string. [Default: ""]. If empty, return all schema groups + :param search_str: query string. [Default: ""]. If empty, return all schema groups + :param limit: limit of the search + :param offset: offset of the search + + :return: SchemaGroupSearchResult object: + - count: number of found schema groups + - limit: limit number of schema groups + - offset: offset number of schema groups + - results: list of SchemaGroupAnnotation objects + """ + + statement = select(SchemaGroups) + statement = self._add_group_condition( + statement=statement, namespace=namespace, search_str=search_str + ) + + with Session(self._sa_engine) as session: + results = session.scalars(statement) + + return_results = [] + for result in results: + return_results.append( + SchemaGroupAnnotation( + namespace=result.namespace, + name=result.name, + description=result.description, + schemas=[], + ) + ) + + return SchemaGroupSearchResult( + count=self._group_search_count(namespace, search_str), + limit=limit, + offset=offset, + results=return_results, + ) + + @staticmethod + def _add_group_condition( + statement: Select, + namespace: str = None, + search_str: str = "", + ) -> Select: + """ + Add query condition to statement in group search + + :param statement: Select statement + :param namespace: Namespace of schema group [Default: None]. If none set, all search in all namespaces + :param search_str: Search string to look for schemas. Search in name and description of the group + """ + if search_str: + sql_search_str = f"%{search_str}%" + search_query = or_( + SchemaGroups.name.ilike(sql_search_str), + SchemaGroups.description.ilike(sql_search_str), + ) + statement = statement.where(search_query) + if namespace: + statement = statement.where(SchemaGroups.namespace == namespace) + return statement + + def _group_search_count(self, namespace: str = None, search_str: str = ""): + """ + Count number of found group of schemas + + :param namespace: user namespace [Default: None]. If None, search in all namespaces + :param search_str: query string. [Default: ""]. If empty, return all schemas - :return: list of schema group dicts + :return: list of schema dicts """ - ... + statement = select(func.count(SchemaGroups.id)) + + statement = self._add_condition(statement, namespace, search_str) + + with Session(self._sa_engine) as session: + result = session.execute(statement).one() + + return result[0] def group_delete(self, namespace: str, name: str) -> None: """ @@ -319,7 +472,20 @@ def group_delete(self, namespace: str, name: str) -> None: :return: None """ - ... + + if self.group_exist(namespace, name): + raise SchemaGroupDoesNotExistError( + f"Schema group '{name}' does not exist in the database" + ) + + with Session(self._sa_engine) as session: + session.execute( + delete(SchemaGroups).where( + and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + ) + ) + + session.commit() def group_add_schema( self, namespace: str, name: str, schema_namespace: str, schema_name: str @@ -360,4 +526,11 @@ def group_exist(self, namespace: str, name: str) -> bool: :return: True if schema group exists, False otherwise """ - ... + + with Session(self._sa_engine) as session: + schema_group_obj = session.scalar( + select(SchemaGroups).where( + and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + ) + ) + return True if schema_group_obj else False From 1418e1f6fa918ed295f18521533bd4d4944b220b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 10 Jul 2024 14:10:16 -0400 Subject: [PATCH 05/15] more work on schemas --- pepdbagent/exceptions.py | 10 +++++ pepdbagent/modules/schema.py | 73 +++++++++++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index 61d45fd..caf152b 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -137,3 +137,13 @@ def __init__(self, msg=""): class SchemaGroupAlreadyExistsError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""Schema group already exists. {msg}""") + + +class SchemaAlreadyInGroupError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Schema already in the group. {msg}""") + + +class SchemaIsNotInGroupError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Schema not found in group. {msg}""") diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 5d75bfd..05db131 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -36,6 +36,8 @@ SchemaDoesNotExistError, SchemaGroupAlreadyExistsError, SchemaGroupDoesNotExistError, + SchemaAlreadyInGroupError, + SchemaIsNotInGroupError, ) from pepdbagent.models import ( SchemaAnnotation, @@ -500,7 +502,46 @@ def group_add_schema( :return: None """ - ... + + try: + with Session(self._sa_engine) as session: + group_mapping = session.scalar( + select(SchemaGroups).where( + and_( + SchemaGroups.namespace == namespace, + SchemaGroups.name == name, + ) + ) + ) + + if not group_mapping: + raise SchemaGroupDoesNotExistError( + f"Group of Schemas with namespace='{namespace}' and name='{name}' does not exist" + ) + + schema_mapping = session.scalar( + select(Schemas).where( + and_( + Schemas.namespace == schema_namespace, + Schemas.name == schema_name, + ) + ) + ) + + if not schema_mapping: + raise SchemaDoesNotExistError( + f"Schema with namespace='{schema_namespace}' and name='{schema_name}' does not exist" + ) + + session.add( + SchemaGroupRelations( + schema_id=schema_mapping.id, + group_id=group_mapping.id, + ) + ) + session.commit() + except IntegrityError: + raise SchemaAlreadyInGroupError def group_remove_schema( self, namespace: str, name: str, schema_namespace: str, schema_name: str @@ -515,7 +556,35 @@ def group_remove_schema( :return: None """ - ... + + try: + with Session(self._sa_engine) as session: + session.execute( + delete(SchemaGroupRelations).where( + and_( + SchemaGroupRelations.schema_id + == select(Schemas.id) + .where( + and_( + Schemas.namespace == schema_namespace, + Schemas.name == schema_name, + ) + ) + .subquery(), + SchemaGroupRelations.group_id + == select(SchemaGroups.id) + .where( + and_( + SchemaGroups.namespace == namespace, + SchemaGroups.name == name, + ) + ) + .subquery(), + ) + ) + ) + except IntegrityError: + raise SchemaIsNotInGroupError("Schema not found in the group") def group_exist(self, namespace: str, name: str) -> bool: """ From cdc1b6853b5d07cf1fe584a1b4e71460a826f80e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Jul 2024 11:33:48 -0400 Subject: [PATCH 06/15] added schemas to main pepdbagent class --- pepdbagent/modules/schema.py | 2 +- pepdbagent/pepdbagent.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 05db131..09045e8 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -50,7 +50,7 @@ _LOGGER = logging.getLogger(PKG_NAME) -class PEPDatabaseSchemas: +class PEPDatabaseSchema: """ Class that represents Schemas in Database. diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index 1519c33..dc64cb9 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -6,6 +6,7 @@ from pepdbagent.modules.sample import PEPDatabaseSample from pepdbagent.modules.user import PEPDatabaseUser from pepdbagent.modules.view import PEPDatabaseView +from pepdbagent.modules.schema import PEPDatabaseSchema class PEPDatabaseAgent(object): @@ -54,6 +55,7 @@ def __init__( self._sample = PEPDatabaseSample(pep_db_engine) self._user = PEPDatabaseUser(pep_db_engine) self._view = PEPDatabaseView(pep_db_engine) + self._schema = PEPDatabaseSchema(pep_db_engine) self._db_name = database @@ -81,6 +83,10 @@ def sample(self) -> PEPDatabaseSample: def view(self) -> PEPDatabaseView: return self._view + @property + def schema(self) -> PEPDatabaseSchema: + return self._schema + def __str__(self): return f"Connection to the database: '{self.__db_name}' is set!" From 2082ce872da58fdfb117323e90b7477f43895c4e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Jul 2024 13:24:21 -0400 Subject: [PATCH 07/15] fixed schema annotation model --- pepdbagent/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index c9ae67e..46ba418 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -255,9 +255,9 @@ class SchemaAnnotation(BaseModel): namespace: str name: str - last_update_date: Optional[str] - submission_date: Optional[str] - description: Optional[str] + last_update_date: Optional[datetime.datetime] + submission_date: Optional[datetime.datetime] + description: Optional[datetime.datetime] class SchemaSearchResult(BaseModel): From 39070cca2e1a1150abc828f6fec1bf1b018317ac Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Jul 2024 13:53:59 -0400 Subject: [PATCH 08/15] fixed string description --- pepdbagent/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 46ba418..d0281cd 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -257,7 +257,7 @@ class SchemaAnnotation(BaseModel): name: str last_update_date: Optional[datetime.datetime] submission_date: Optional[datetime.datetime] - description: Optional[datetime.datetime] + description: Optional[str] class SchemaSearchResult(BaseModel): From 9550289b6c92b1f500a4489d6f711089bc6dcb61 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 23 Jul 2024 15:28:52 -0400 Subject: [PATCH 09/15] tests + bug fix --- pepdbagent/db_utils.py | 5 +- pepdbagent/models.py | 7 +- pepdbagent/modules/schema.py | 77 ++++++--- tests/schemas/namespace1/2.0.0.yaml | 69 ++++++++ tests/schemas/namespace1/2.1.0.yaml | 77 +++++++++ tests/schemas/namespace2/bedboss.yaml | 47 +++++ tests/schemas/namespace2/bedbuncher.yaml | 25 +++ tests/schemas/namespace2/bedmaker.yaml | 59 +++++++ tests/test_schema.py | 211 ++++++++++++++++++++++- tests/utils.py | 52 +++++- 10 files changed, 594 insertions(+), 35 deletions(-) create mode 100644 tests/schemas/namespace1/2.0.0.yaml create mode 100644 tests/schemas/namespace1/2.1.0.yaml create mode 100644 tests/schemas/namespace2/bedboss.yaml create mode 100644 tests/schemas/namespace2/bedbuncher.yaml create mode 100644 tests/schemas/namespace2/bedmaker.yaml diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 251a551..de1a9f3 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -129,7 +129,7 @@ class Projects(Base): history_mapping: Mapped[List["HistoryProjects"]] = relationship( back_populates="project_mapping", cascade="all, delete-orphan" - ) # TODO: check if cascade is correct + ) __table_args__ = (UniqueConstraint("namespace", "name", "tag"),) @@ -317,6 +317,9 @@ class Schemas(Base): default=deliver_update_date, onupdate=deliver_update_date ) + projects_mappings: Mapped[List["Projects"]] = relationship( + "Projects", back_populates="schema_mapping" + ) group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( "SchemaGroupRelations", back_populates="schema_mapping" ) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index d0281cd..6270f4e 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -255,9 +255,10 @@ class SchemaAnnotation(BaseModel): namespace: str name: str - last_update_date: Optional[datetime.datetime] - submission_date: Optional[datetime.datetime] - description: Optional[str] + last_update_date: str + submission_date: str + description: Optional[str] = "" + popularity_number: Optional[int] = 0 class SchemaSearchResult(BaseModel): diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 09045e8..4091891 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -30,6 +30,7 @@ Schemas, SchemaGroups, SchemaGroupRelations, + User, ) from pepdbagent.exceptions import ( SchemaAlreadyExistsError, @@ -110,9 +111,10 @@ def info(self, namespace: str, name: str) -> SchemaAnnotation: return SchemaAnnotation( namespace=schema_obj.namespace, name=schema_obj.name, - last_update_date=schema_obj.last_update_date, - submission_date=schema_obj.submission_date, + last_update_date=str(schema_obj.last_update_date), + submission_date=str(schema_obj.submission_date), description=schema_obj.description, + popularity_number=len(schema_obj.projects_mappings), ) def search( @@ -143,9 +145,10 @@ def search( SchemaAnnotation( namespace=result.namespace, name=result.name, - last_update_date=result.last_update_date, - submission_date=result.submission_date, + last_update_date=str(result.last_update_date), + submission_date=str(result.submission_date), description=result.description, + # popularity_number=sum(result.projects_mappings), ) ) @@ -229,6 +232,13 @@ def create( ) with Session(self._sa_engine) as session: + user = session.scalar(select(User).where(User.namespace == namespace)) + + if not user: + user = User(namespace=namespace) + session.add(user) + session.commit() + schema_obj = Schemas( namespace=namespace, name=name, @@ -366,8 +376,8 @@ def group_get(self, namespace: str, name: str) -> SchemaGroupAnnotation: SchemaAnnotation( namespace=schema_annotation.namespace, name=schema_annotation.name, - last_update_date=schema_annotation.last_update_date, - submission_date=schema_annotation.submission_date, + last_update_date=str(schema_annotation.last_update_date), + submission_date=str(schema_annotation.submission_date), desciription=schema_annotation.description, ) ) @@ -458,7 +468,7 @@ def _group_search_count(self, namespace: str = None, search_str: str = ""): """ statement = select(func.count(SchemaGroups.id)) - statement = self._add_condition(statement, namespace, search_str) + statement = self._add_group_condition(statement, namespace, search_str) with Session(self._sa_engine) as session: result = session.execute(statement).one() @@ -475,7 +485,7 @@ def group_delete(self, namespace: str, name: str) -> None: :return: None """ - if self.group_exist(namespace, name): + if not self.group_exist(namespace, name): raise SchemaGroupDoesNotExistError( f"Schema group '{name}' does not exist in the database" ) @@ -559,30 +569,43 @@ def group_remove_schema( try: with Session(self._sa_engine) as session: - session.execute( - delete(SchemaGroupRelations).where( - and_( - SchemaGroupRelations.schema_id - == select(Schemas.id) - .where( - and_( - Schemas.namespace == schema_namespace, - Schemas.name == schema_name, - ) + + a = session.scalar( + select(Schemas).where( + and_(Schemas.namespace == schema_namespace, Schemas.name == schema_name) + ) + ) + b = session.scalar( + select(SchemaGroups).where( + and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + ) + ) + + delete_statement = delete(SchemaGroupRelations).where( + and_( + SchemaGroupRelations.schema_id + == select(Schemas.id) + .where( + and_( + Schemas.namespace == schema_namespace, + Schemas.name == schema_name, ) - .subquery(), - SchemaGroupRelations.group_id - == select(SchemaGroups.id) - .where( - and_( - SchemaGroups.namespace == namespace, - SchemaGroups.name == name, - ) + ) + .subquery(), + SchemaGroupRelations.group_id + == select(SchemaGroups.id) + .where( + and_( + SchemaGroups.namespace == namespace, + SchemaGroups.name == name, ) - .subquery(), ) + .subquery(), ) ) + + session.execute(delete_statement) + session.commit() except IntegrityError: raise SchemaIsNotInGroupError("Schema not found in the group") diff --git a/tests/schemas/namespace1/2.0.0.yaml b/tests/schemas/namespace1/2.0.0.yaml new file mode 100644 index 0000000..56f2034 --- /dev/null +++ b/tests/schemas/namespace1/2.0.0.yaml @@ -0,0 +1,69 @@ +description: "Schema for a minimal PEP" +version: "2.0.0" +properties: + config: + properties: + name: + type: string + pattern: "^\\S*$" + description: "Project name with no whitespace" + pep_version: + description: "Version of the PEP Schema this PEP follows" + type: string + sample_table: + type: string + description: "Path to the sample annotation table with one row per sample" + subsample_table: + type: string + description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table" + sample_modifiers: + type: object + properties: + append: + type: object + duplicate: + type: object + imply: + type: array + items: + type: object + properties: + if: + type: object + then: + type: object + derive: + type: object + properties: + attributes: + type: array + items: + type: string + sources: + type: object + project_modifiers: + type: object + properties: + amend: + description: "Object overwriting original project attributes" + type: object + import: + description: "List of external PEP project config files to import" + type: array + items: + type: string + required: + - pep_version + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + pattern: "^\\S*$" + description: "Unique name of the sample with no whitespace" + required: + - sample_name +required: + - samples diff --git a/tests/schemas/namespace1/2.1.0.yaml b/tests/schemas/namespace1/2.1.0.yaml new file mode 100644 index 0000000..e3982aa --- /dev/null +++ b/tests/schemas/namespace1/2.1.0.yaml @@ -0,0 +1,77 @@ +description: "Schema for a minimal PEP" +version: "2.1.0" +properties: + config: + properties: + name: + type: string + pattern: "^\\S*$" + description: "Project name with no whitespace" + pep_version: + description: "Version of the PEP Schema this PEP follows" + type: string + sample_table: + type: string + description: "Path to the sample annotation table" + subsample_table: + type: string + description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table" + sample_table_index: + type: string + pattern: "^\\S*$" + description: "Name of the column in sample table to use as an index. It's 'sample_name' by default" + subsample_table_index: + type: array + items: + type: string + pattern: "^\\S*$" + description: "Names of the columns in subsample table to use as an index. It's ['sample_name', 'subsample_name'] by default" + sample_modifiers: + type: object + properties: + append: + type: object + duplicate: + type: object + imply: + type: array + items: + type: object + properties: + if: + type: object + then: + type: object + derive: + type: object + properties: + attributes: + type: array + items: + type: string + sources: + type: object + project_modifiers: + type: object + properties: + amend: + description: "Object overwriting original project attributes" + type: object + import: + description: "List of external PEP project config files to import" + type: array + items: + type: string + required: + - pep_version + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + pattern: "^\\S*$" + description: "Unique name of the sample with no whitespace" +required: + - samples diff --git a/tests/schemas/namespace2/bedboss.yaml b/tests/schemas/namespace2/bedboss.yaml new file mode 100644 index 0000000..e6ffa0c --- /dev/null +++ b/tests/schemas/namespace2/bedboss.yaml @@ -0,0 +1,47 @@ +description: bedboss run-all pep schema + +properties: + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + description: "Name of the sample" + input_file: + type: string + description: "Absolute path to the input file" + input_type: + type: string + description: "file format" + enum: [ "bigWig", "bigBed", "bed", "wig", "bedGraph" ] + genome: + type: string + description: "organism genome code" + format_type: + type: string + description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)" + enum: [ "narrowPeak", "broadPeak" ] + description: + type: string + description: "freeform description of the sample" + open_signal_matrix: + type: string + description: "A full path to the openSignalMatrix required for the tissue" + chrom_sizes: + type: string + description: "A full path to the chrom.sizes required for the bedtobigbed conversion" + treatment: + type: string + description: "freeform description of the sample treatment" + cell_type: + type: string + description: "cell type code" + required: + - sample_name + - input_file + - input_type + - genome +required: + - samples diff --git a/tests/schemas/namespace2/bedbuncher.yaml b/tests/schemas/namespace2/bedbuncher.yaml new file mode 100644 index 0000000..cd42998 --- /dev/null +++ b/tests/schemas/namespace2/bedbuncher.yaml @@ -0,0 +1,25 @@ +description: bedbuncher PEP schema +imports: + - http://schema.databio.org/pep/2.0.0.yaml + +properties: + samples: + type: array + items: + type: object + properties: + JSONquery_path: + type: string + description: "path to the JSON file with the Elasticsearch query" + bedset_name: + type: string + pattern: "^\\S*$" + description: "name of the bedset that will be created" + bbconfig_path: + type: string + description: "path to bedbase config file" + required: + - JSONquery_path + - bedset_name +required: + - samples \ No newline at end of file diff --git a/tests/schemas/namespace2/bedmaker.yaml b/tests/schemas/namespace2/bedmaker.yaml new file mode 100644 index 0000000..93806d0 --- /dev/null +++ b/tests/schemas/namespace2/bedmaker.yaml @@ -0,0 +1,59 @@ +description: bedmaker PEP schema + +properties: + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + description: "name of the sample, which is the name of the output BED file" + input_file_path: + type: string + description: "absolute path the file to convert" + output_bed_path: + type: string + description: "absolute path the file to the output BED file (derived attribute)" + output_bigbed_path: + type: string + description: "absolute path the file to the output bigBed file (derived attribute)" + genome: + type: string + description: "organism genome code" + narrowpeak: + type: boolean + description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)" + format: + type: string + description: "file format" + enum: ["bigWig", "bigBed", "bed", "wig", "bedGraph"] + cell_type: + type: string + description: "cell type code" + antibody: + type: string + description: "antibody used if ChIP-seq experiment" + description: + type: string + description: "freeform description of the sample" + exp_protocol: + type: string + description: "type of the experiment the file was generated in" + data_source: + type: string + description: "source of the sample, preferably a GSE* code" + treatment: + type: string + description: "freeform description of the sample treatment" + required_files: + - input_file_path + required: + - input_file_path + - output_bed_path + - output_bigbed_path + - genome + - narrowpeak + - sample_name +required: + - samples diff --git a/tests/test_schema.py b/tests/test_schema.py index bc86c34..f09625c 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -7,4 +7,213 @@ not PEPDBAgentContextManager().db_setup(), reason="DB is not setup", ) -class TestSamples: ... +class TestSamples: + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_get(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema = agent.schema.get(namespace=namespace, name=name) + assert agent.schema.exist(namespace=namespace, name=name) + assert schema + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_delete(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + assert agent.schema.exist(namespace=namespace, name=name) + agent.schema.delete(namespace=namespace, name=name) + assert not agent.schema.exist(namespace=namespace, name=name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_update(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema = agent.schema.get(namespace=namespace, name=name) + schema["new"] = "hello" + agent.schema.update(namespace=namespace, name=name, schema=schema) + assert agent.schema.exist(namespace=namespace, name=name) + assert schema == agent.schema.get(namespace=namespace, name=name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_get_annotation(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema_annot = agent.schema.info(namespace=namespace, name=name) + assert schema_annot + assert schema_annot.model_fields_set == { + "namespace", + "name", + "last_update_date", + "submission_date", + "description", + "popularity_number", + } + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_update_annotation(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema_annot = agent.schema.info(namespace=namespace, name=name) + schema = agent.schema.get(namespace=namespace, name=name) + agent.schema.update( + namespace=namespace, name=name, schema=schema, description="new desc" + ) + assert schema_annot != agent.schema.info(namespace=namespace, name=name) + + @pytest.mark.skip("") + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_annotation_popular(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + ... + # TODO: implement this feature + + def test_search(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2") + assert results + assert results.count == 3 + assert len(results.results) == 3 + + def test_search_offset(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", offset=1) + assert results + assert results.count == 3 + assert len(results.results) == 2 + + def test_search_limit(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", limit=1) + assert results + assert results.count == 3 + assert len(results.results) == 1 + + def test_search_limit_offset(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", limit=2, offset=2) + assert results + assert results.count == 3 + assert len(results.results) == 1 + + def test_search_query(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", search_str="bedb") + assert results + assert results.count == 2 + assert len(results.results) == 2 + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_create_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + assert agent.schema.group_exist(namespace=namespace, name=group_name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_delete_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + assert agent.schema.group_exist(namespace=namespace, name=group_name) + agent.schema.group_delete(namespace=namespace, name=group_name) + assert not agent.schema.group_exist(namespace=namespace, name=group_name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_add_to_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + agent.schema.group_add_schema( + namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace + ) + group_annot = agent.schema.group_get(namespace=namespace, name=group_name) + assert group_annot.schemas[0].name == name + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_remove_from_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + agent.schema.group_add_schema( + namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace + ) + group_annot = agent.schema.group_get(namespace=namespace, name=group_name) + assert len(group_annot.schemas) == 1 + + agent.schema.group_remove_schema( + namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace + ) + group_annot = agent.schema.group_get(namespace=namespace, name=group_name) + assert len(group_annot.schemas) == 0 + + def test_search_group(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name1 = "new_group1" + group_name2 = "new2" + group_name3 = "new_group3" + agent.schema.group_create( + namespace="namespace1", name=group_name1, description="new group" + ) + agent.schema.group_create(namespace="namespace1", name=group_name2, description="new") + agent.schema.group_create( + namespace="namespace1", name=group_name3, description="new group" + ) + + results = agent.schema.group_search(search_str="new_group") + + assert results.count == 2 + assert len(results.results) == 2 diff --git a/tests/utils.py b/tests/utils.py index 8ddc820..b96192a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ import os import warnings +import yaml import peppy from sqlalchemy.exc import OperationalError @@ -8,12 +9,21 @@ DSN = "postgresql+psycopg://postgres:pass8743hf9h23f87h437@localhost:5432/pep-db" -DATA_PATH = os.path.join( +TESTS_PATH = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "tests", +) + +DATA_PATH = os.path.join( + TESTS_PATH, "data", ) +SCHEMAS_PATH = os.path.join( + TESTS_PATH, + "schemas", +) + def get_path_to_example_file(namespace: str, project_name: str) -> str: """ @@ -22,6 +32,13 @@ def get_path_to_example_file(namespace: str, project_name: str) -> str: return os.path.join(DATA_PATH, namespace, project_name, "project_config.yaml") +def get_path_to_example_schema(namespace: str, schema_name: str) -> str: + """ + Get path to example schema + """ + return os.path.join(SCHEMAS_PATH, namespace, schema_name) + + def list_of_available_peps() -> dict: pep_namespaces = os.listdir(DATA_PATH) projects = {} @@ -31,12 +48,29 @@ def list_of_available_peps() -> dict: return projects +def list_of_available_schemas() -> dict: + schema_namespaces = os.listdir(SCHEMAS_PATH) + schemas = {} + for np in schema_namespaces: + schema_name = os.listdir(os.path.join(SCHEMAS_PATH, np)) + schemas[np] = {p: get_path_to_example_schema(np, p) for p in schema_name} + return schemas + + +def read_yaml_file(file_path: str) -> dict: + """ + Read yaml file + """ + with open(file_path, "r") as file: + return yaml.safe_load(file) + + class PEPDBAgentContextManager: """ Class with context manager to connect to database. Adds data and drops everything from the database upon exit to ensure. """ - def __init__(self, url: str = DSN, add_data: bool = False): + def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=False, echo=False): """ :param url: database url e.g. "postgresql+psycopg://postgres:docker@localhost:5432/pep-db" :param add_data: add data to the database @@ -44,7 +78,9 @@ def __init__(self, url: str = DSN, add_data: bool = False): self.url = url self._agent = None + self._echo = echo self.add_data = add_data + self.add_schemas = add_schemas def __enter__(self): self._agent = PEPDatabaseAgent(dsn=self.url, echo=False) @@ -52,13 +88,15 @@ def __enter__(self): self.db_engine.create_schema() if self.add_data: self._insert_data() + if self.add_schemas: + self._add_schemas() return self._agent def __exit__(self, exc_type, exc_value, exc_traceback): self.db_engine.delete_schema() def _insert_data(self): - pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=True) + pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=self._echo) for namespace, item in list_of_available_peps().items(): if namespace == "private_test": private = True @@ -76,6 +114,14 @@ def _insert_data(self): pep_schema="random_schema_name", ) + def _add_schemas(self): + pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=self._echo) + for namespace, item in list_of_available_schemas().items(): + for name, path in item.items(): + file_dict = read_yaml_file(path) + + pepdb_con.schema.create(namespace=namespace, name=name[0:-5], schema=file_dict) + @property def agent(self) -> PEPDatabaseAgent: return self._agent From 0e1dc5f11fa29744f7627a9ee73f2e95432c9c5c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 23 Jul 2024 16:47:24 -0400 Subject: [PATCH 10/15] connected projects to schemas --- pepdbagent/models.py | 1 + pepdbagent/modules/annotation.py | 22 ++++++++--- pepdbagent/modules/project.py | 68 +++++++++++++++++++++++++++++--- pepdbagent/utils.py | 13 ++++++ tests/test_updates.py | 25 +++++++++++- tests/utils.py | 8 ++-- 6 files changed, 121 insertions(+), 16 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 6270f4e..78269fb 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -99,6 +99,7 @@ class UpdateItems(BaseModel): samples: Optional[List[dict]] = None subsamples: Optional[List[List[dict]]] = None pop: Optional[bool] = None + schema_id: Optional[int] = None model_config = ConfigDict( arbitrary_types_allowed=True, diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 2b30ccb..c9c7612 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -211,7 +211,11 @@ def _get_single_annotation( submission_date=str(query_result.submission_date), last_update_date=str(query_result.last_update_date), digest=query_result.digest, - pep_schema=query_result.pep_schema, + pep_schema=( + f"{query_result.schema_mapping.namespace}/{query_result.schema_mapping.name}" + if query_result.schema_mapping + else None + ), pop=query_result.pop, stars_number=query_result.number_of_stars, forked_from=( @@ -342,7 +346,11 @@ def _get_projects( submission_date=str(result.submission_date), last_update_date=str(result.last_update_date), digest=result.digest, - pep_schema=result.pep_schema, + pep_schema=( + f"{result.schema_mapping.namespace}/{result.schema_mapping.name}" + if result.schema_mapping + else None + ), pop=result.pop, stars_number=result.number_of_stars, forked_from=( @@ -538,9 +546,9 @@ def get_by_rp_list( statement = select(Projects).where(or_(*or_statement_list)) anno_results = [] with Session(self._sa_engine) as session: - query_result = session.execute(statement).all() + query_result = session.scalars(statement) for result in query_result: - project_obj = result[0] + project_obj = result annot = AnnotationModel( namespace=project_obj.namespace, name=project_obj.name, @@ -551,7 +559,11 @@ def get_by_rp_list( submission_date=str(project_obj.submission_date), last_update_date=str(project_obj.last_update_date), digest=project_obj.digest, - pep_schema=project_obj.pep_schema, + pep_schema=( + f"{project_obj.schema_mapping.namespace}/{project_obj.schema_mapping.name}" + if project_obj.schema_mapping + else None + ), pop=project_obj.pop, stars_number=project_obj.number_of_stars, forked_from=( diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index c4348f1..2ac4303 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -34,6 +34,7 @@ Subsamples, UpdateTypes, User, + Schemas, ) from pepdbagent.exceptions import ( HistoryNotFoundError, @@ -42,6 +43,7 @@ ProjectNotFoundError, ProjectUniqueNameError, SampleTableUpdateError, + SchemaDoesNotExistError, ) from pepdbagent.models import ( HistoryAnnotationModel, @@ -50,7 +52,13 @@ UpdateItems, UpdateModel, ) -from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter +from pepdbagent.utils import ( + create_digest, + generate_guid, + order_samples, + registry_path_converter, + schema_path_converter, +) _LOGGER = logging.getLogger(PKG_NAME) @@ -314,7 +322,7 @@ def create( :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project. :param is_private: boolean value if the project should be visible just for user that creates it. - :param pep_schema: assign PEP to a specific schema. [Default: None] + :param pep_schema: assign PEP to a specific schema. Example: 'namespace/name' [Default: None] :param pop: if project is a pep of peps (POP) [Default: False] :param overwrite: if project exists overwrite the project, otherwise upload it. [Default: False - project won't be overwritten if it exists in db] @@ -356,6 +364,24 @@ def create( except AttributeError: number_of_samples = len(proj_dict[SAMPLE_RAW_DICT_KEY]) + if pep_schema: + schema_namespace, schema_name = schema_path_converter(pep_schema) + with Session(self._sa_engine) as session: + schema_mapping = session.scalar( + select(Schemas).where( + and_( + Schemas.namespace == schema_namespace, + Schemas.name == schema_name, + ) + ) + ) + if not schema_mapping: + raise SchemaDoesNotExistError( + f"Schema {schema_namespace}/{schema_name} does not exist. " + f"Project won't be uploaded." + ) + pep_schema = schema_mapping.id + if update_only: _LOGGER.info(f"Update_only argument is set True. Updating project {proj_name} ...") self._overwrite( @@ -384,7 +410,8 @@ def create( private=is_private, submission_date=datetime.datetime.now(datetime.timezone.utc), last_update_date=datetime.datetime.now(datetime.timezone.utc), - pep_schema=pep_schema, + # pep_schema=pep_schema, + schema_id=pep_schema, description=description, pop=pop, ) @@ -447,7 +474,7 @@ def _overwrite( project_digest: str, number_of_samples: int, private: bool = False, - pep_schema: str = None, + pep_schema: int = None, description: str = "", pop: bool = False, ) -> None: @@ -483,7 +510,8 @@ def _overwrite( found_prj.digest = project_digest found_prj.number_of_samples = number_of_samples found_prj.private = private - found_prj.pep_schema = pep_schema + # found_prj.pep_schema = pep_schema + found_prj.schema_id = pep_schema found_prj.config = project_dict[CONFIG_KEY] found_prj.description = description found_prj.last_update_date = datetime.datetime.now(datetime.timezone.utc) @@ -577,6 +605,8 @@ def update( f"Pep {namespace}/{name}:{tag} was not found. No items will be updated!" ) + self._convert_update_schema_id(session, update_values) + for k, v in update_values.items(): if getattr(found_prj, k) != v: setattr(found_prj, k, v) @@ -647,6 +677,34 @@ def update( else: raise ProjectNotFoundError("No items will be updated!") + @staticmethod + def _convert_update_schema_id(session: Session, update_values: dict): + """ + Convert schema path to schema_id in update_values and update it in update dict + + + :param session: open session object + :param update_values: dict with update key->values + + return None + """ + if "pep_schema" in update_values: + schema_namespace, schema_name = schema_path_converter(update_values["pep_schema"]) + schema_mapping = session.scalar( + select(Schemas).where( + and_( + Schemas.namespace == schema_namespace, + Schemas.name == schema_name, + ) + ) + ) + if not schema_mapping: + raise SchemaDoesNotExistError( + f"Schema {schema_namespace}/{schema_name} does not exist. " + f"Project won't be updated." + ) + update_values["schema_id"] = schema_mapping.id + def _update_samples( self, project_id: int, diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index cbc596b..2c4f990 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -80,6 +80,19 @@ def registry_path_converter(registry_path: str) -> Tuple[str, str, str]: raise RegistryPathError(f"Error in: '{registry_path}'") +def schema_path_converter(schema_path: str) -> Tuple[str, str]: + """ + Convert schema path to namespace, name + + :param schema_path: schema path that has structure: "namespace/name.yaml" + :return: tuple(namespace, name) + """ + if "/" in schema_path: + namespace, name = schema_path.split("/") + return namespace, name + raise RegistryPathError(f"Error in: '{schema_path}'") + + def tuple_converter(value: Union[tuple, list, str, None]) -> tuple: """ Convert string list or tuple to tuple. diff --git a/tests/test_updates.py b/tests/test_updates.py index 790b313..e057024 100644 --- a/tests/test_updates.py +++ b/tests/test_updates.py @@ -89,6 +89,27 @@ def test_update_project_description(self, namespace, name, new_description): == new_description ) + @pytest.mark.parametrize( + "namespace, name, new_schema", + [ + ["namespace1", "amendments1", "bedboss"], + ["namespace2", "derive", "bedboss"], + ], + ) + def test_update_project_schema(self, namespace, name, new_schema): + with PEPDBAgentContextManager(add_data=True) as agent: + prj_annot = agent.annotation.get(namespace=namespace, name=name) + assert prj_annot.results[0].pep_schema == "namespace1/2.0.0" + + agent.project.update( + namespace=namespace, + name=name, + tag="default", + update_dict={"pep_schema": "namespace2/bedboss"}, + ) + prj_annot = agent.annotation.get(namespace=namespace, name=name) + assert prj_annot.results[0].pep_schema == "namespace2/bedboss" + @pytest.mark.parametrize( "namespace, name, new_description", [ @@ -134,8 +155,8 @@ def test_update_whole_project(self, namespace, name): @pytest.mark.parametrize( "namespace, name, pep_schema", [ - ["namespace1", "amendments1", "schema1"], - ["namespace2", "derive", "schema3"], + ["namespace1", "amendments1", "namespace2/bedmaker"], + ["namespace2", "derive", "namespace2/bedbuncher"], ], ) def test_update_pep_schema(self, namespace, name, pep_schema): diff --git a/tests/utils.py b/tests/utils.py index b96192a..9a325d4 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -70,7 +70,7 @@ class PEPDBAgentContextManager: Class with context manager to connect to database. Adds data and drops everything from the database upon exit to ensure. """ - def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=False, echo=False): + def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=True, echo=False): """ :param url: database url e.g. "postgresql+psycopg://postgres:docker@localhost:5432/pep-db" :param add_data: add data to the database @@ -86,10 +86,10 @@ def __enter__(self): self._agent = PEPDatabaseAgent(dsn=self.url, echo=False) self.db_engine = self._agent.pep_db_engine self.db_engine.create_schema() - if self.add_data: - self._insert_data() if self.add_schemas: self._add_schemas() + if self.add_data: + self._insert_data() return self._agent def __exit__(self, exc_type, exc_value, exc_traceback): @@ -111,7 +111,7 @@ def _insert_data(self): is_private=private, project=prj, overwrite=True, - pep_schema="random_schema_name", + pep_schema="namespace1/2.0.0", ) def _add_schemas(self): From 6681748e74e92fbdb40213a0a1d9ae9d0a3e722a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 23 Jul 2024 16:51:40 -0400 Subject: [PATCH 11/15] cleaning, restructuring --- pepdbagent/modules/project.py | 2 +- pepdbagent/modules/schema.py | 54 ++++++----------------------------- pepdbagent/pepdbagent.py | 2 +- scripts/update_db.py | 27 ++++++++++++++++++ tests/test_namespace.py | 2 +- tests/test_schema.py | 14 +++++---- tests/utils.py | 2 +- 7 files changed, 48 insertions(+), 55 deletions(-) create mode 100644 scripts/update_db.py diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 2ac4303..d7cb181 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -31,10 +31,10 @@ HistorySamples, Projects, Samples, + Schemas, Subsamples, UpdateTypes, User, - Schemas, ) from pepdbagent.exceptions import ( HistoryNotFoundError, diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 4091891..71ccd0f 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -1,52 +1,26 @@ -import datetime -import json import logging -from typing import Dict, List, NoReturn, Union - -import numpy as np -import peppy -from peppy.const import ( - CONFIG_KEY, - SAMPLE_NAME_ATTR, - SAMPLE_RAW_DICT_KEY, - SAMPLE_TABLE_INDEX_KEY, - SUBSAMPLE_RAW_LIST_KEY, -) -from sqlalchemy import Select, and_, delete, select, or_, func + +from sqlalchemy import Select, and_, delete, func, or_, select +from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified -from sqlalchemy.exc import IntegrityError, NoResultFound - -from pepdbagent.const import ( - DEFAULT_TAG, - DESCRIPTION_KEY, - MAX_HISTORY_SAMPLES_NUMBER, - NAME_KEY, - PEPHUB_SAMPLE_ID_KEY, - PKG_NAME, -) -from pepdbagent.db_utils import ( - BaseEngine, - Schemas, - SchemaGroups, - SchemaGroupRelations, - User, -) + +from pepdbagent.const import PKG_NAME +from pepdbagent.db_utils import BaseEngine, SchemaGroupRelations, SchemaGroups, Schemas, User from pepdbagent.exceptions import ( SchemaAlreadyExistsError, + SchemaAlreadyInGroupError, SchemaDoesNotExistError, SchemaGroupAlreadyExistsError, SchemaGroupDoesNotExistError, - SchemaAlreadyInGroupError, SchemaIsNotInGroupError, ) from pepdbagent.models import ( SchemaAnnotation, - SchemaSearchResult, SchemaGroupAnnotation, SchemaGroupSearchResult, + SchemaSearchResult, ) -from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter _LOGGER = logging.getLogger(PKG_NAME) @@ -569,18 +543,6 @@ def group_remove_schema( try: with Session(self._sa_engine) as session: - - a = session.scalar( - select(Schemas).where( - and_(Schemas.namespace == schema_namespace, Schemas.name == schema_name) - ) - ) - b = session.scalar( - select(SchemaGroups).where( - and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) - ) - ) - delete_statement = delete(SchemaGroupRelations).where( and_( SchemaGroupRelations.schema_id diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index dc64cb9..6365af1 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -4,9 +4,9 @@ from pepdbagent.modules.namespace import PEPDatabaseNamespace from pepdbagent.modules.project import PEPDatabaseProject from pepdbagent.modules.sample import PEPDatabaseSample +from pepdbagent.modules.schema import PEPDatabaseSchema from pepdbagent.modules.user import PEPDatabaseUser from pepdbagent.modules.view import PEPDatabaseView -from pepdbagent.modules.schema import PEPDatabaseSchema class PEPDatabaseAgent(object): diff --git a/scripts/update_db.py b/scripts/update_db.py new file mode 100644 index 0000000..1cc4fe4 --- /dev/null +++ b/scripts/update_db.py @@ -0,0 +1,27 @@ +import os + +from dotenv import load_dotenv +from tqdm import tqdm + +from pepdbagent import PEPDatabaseAgent + +load_dotenv() + + +def update(): + agent = PEPDatabaseAgent( + user=os.environ.get("POSTGRES_USER"), + password=os.environ.get("POSTGRES_PASSWORD"), + host=os.environ.get("POSTGRES_HOST"), + database=os.environ.get("POSTGRES_DB"), + # port=os.environ.get("POSTGRES_PORT"), + ) + + if_list = agent.update.get_namespace_projects("geo") + + for i in tqdm(if_list, desc="Updating projects"): + agent.update.update_parent_project(i) + + +if __name__ == "__main__": + update() diff --git a/tests/test_namespace.py b/tests/test_namespace.py index 196f6e8..d4fd309 100644 --- a/tests/test_namespace.py +++ b/tests/test_namespace.py @@ -170,7 +170,7 @@ class TestUser: def test_create_user(self): with PEPDBAgentContextManager(add_data=True) as agent: - user = agent.user.create_user("test_user") + agent.user.create_user("test_user") assert agent.user.exists("test_user") diff --git a/tests/test_schema.py b/tests/test_schema.py index f09625c..52e329c 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -81,17 +81,21 @@ def test_update_annotation(self, namespace, name): ) assert schema_annot != agent.schema.info(namespace=namespace, name=name) - @pytest.mark.skip("") @pytest.mark.parametrize( "namespace, name", [ - ["namespace1", "2.0.0"], + ["namespace2", "bedboss"], ], ) def test_annotation_popular(self, namespace, name): - with PEPDBAgentContextManager(add_schemas=True) as agent: - ... - # TODO: implement this feature + with PEPDBAgentContextManager(add_data=True, add_schemas=True) as agent: + agent.project.update( + namespace="namespace1", + name="amendments1", + update_dict={"pep_schema": "namespace2/bedboss"}, + ) + schema_annot = agent.schema.info(namespace=namespace, name=name) + assert schema_annot.popularity_number == 1 def test_search(self): with PEPDBAgentContextManager(add_schemas=True) as agent: diff --git a/tests/utils.py b/tests/utils.py index 9a325d4..2afc623 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,8 +1,8 @@ import os import warnings -import yaml import peppy +import yaml from sqlalchemy.exc import OperationalError from pepdbagent import PEPDatabaseAgent From 4cab3d7f582543ffaf5f342edd388778bce929b1 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 23 Jul 2024 16:53:11 -0400 Subject: [PATCH 12/15] deleted unused file --- scripts/update_db.py | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 scripts/update_db.py diff --git a/scripts/update_db.py b/scripts/update_db.py deleted file mode 100644 index 1cc4fe4..0000000 --- a/scripts/update_db.py +++ /dev/null @@ -1,27 +0,0 @@ -import os - -from dotenv import load_dotenv -from tqdm import tqdm - -from pepdbagent import PEPDatabaseAgent - -load_dotenv() - - -def update(): - agent = PEPDatabaseAgent( - user=os.environ.get("POSTGRES_USER"), - password=os.environ.get("POSTGRES_PASSWORD"), - host=os.environ.get("POSTGRES_HOST"), - database=os.environ.get("POSTGRES_DB"), - # port=os.environ.get("POSTGRES_PORT"), - ) - - if_list = agent.update.get_namespace_projects("geo") - - for i in tqdm(if_list, desc="Updating projects"): - agent.update.update_parent_project(i) - - -if __name__ == "__main__": - update() From be1d67b7e1e29455b0a2dad9744f1d87e19a4ac2 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 24 Jul 2024 11:42:39 -0400 Subject: [PATCH 13/15] warning fixws --- pepdbagent/modules/schema.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 71ccd0f..bfcad3c 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -189,6 +189,11 @@ def create( :param update_only: update only schema if exists [Default: False] """ + if description: + schema["description"] = description + else: + description = schema.get("description", "") + if self.exist(namespace, name): if overwrite: self.update(namespace, name, schema, description) @@ -553,7 +558,7 @@ def group_remove_schema( Schemas.name == schema_name, ) ) - .subquery(), + .scalar_subquery(), SchemaGroupRelations.group_id == select(SchemaGroups.id) .where( @@ -562,7 +567,7 @@ def group_remove_schema( SchemaGroups.name == name, ) ) - .subquery(), + .scalar_subquery(), ) ) From 25aa92a583378cca6a79965ef4cc9f8a2c7a0c92 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 24 Jul 2024 12:47:26 -0400 Subject: [PATCH 14/15] changelog --- docs/changelog.md | 4 ++++ pepdbagent/_version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 6586c4b..204c54d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.11.0] -- 2024-07-24 +- Added validation schemas + + ## [0.10.0] -- 2024-07-18 - Added user delete method - Added project history and restoring projects diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 61fb31c..ae6db5f 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.10.0" +__version__ = "0.11.0" From f3265be3999632fa3d95d68b16864abaa98a0d3c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 24 Jul 2024 13:25:28 -0400 Subject: [PATCH 15/15] added order by to schemas --- pepdbagent/modules/schema.py | 47 +++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index bfcad3c..4294476 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -92,7 +92,13 @@ def info(self, namespace: str, name: str) -> SchemaAnnotation: ) def search( - self, namespace: str = None, search_str: str = "", limit: int = 100, offset: int = 0 + self, + namespace: str = None, + search_str: str = "", + limit: int = 100, + offset: int = 0, + order_by: str = "update_date", + order_desc: bool = False, ) -> SchemaSearchResult: """ Search schemas in the database. @@ -101,6 +107,10 @@ def search( :param search_str: query string. [Default: ""]. If empty, return all schemas :param limit: limit number of schemas [Default: 100] :param offset: offset number of schemas [Default: 0] + :param order_by: sort the result-set by the information + Options: ["name", "update_date", "submission_date"] + [Default: update_date] + :param order_desc: Sort the records in descending order. [Default: False] :return: list of schema dicts """ @@ -108,6 +118,7 @@ def search( statement = select(Schemas) statement = self._add_condition(statement, namespace, search_str) statement = statement.limit(limit).offset(offset) + statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) return_list = [] @@ -151,6 +162,40 @@ def _count_search(self, namespace: str = None, search_str: str = "") -> int: return result[0] + @staticmethod + def _add_order_by_keyword( + statement: Select, by: str = "update_date", desc: bool = False + ) -> Select: + """ + Add order by clause to sqlalchemy statement + + :param statement: sqlalchemy representation of a SELECT statement. + :param by: sort the result-set by the information + Options: ["name", "update_date", "submission_date"] + [Default: "update_date"] + :param desc: Sort the records in descending order. [Default: False] + :return: sqlalchemy representation of a SELECT statement with order by keyword + """ + if by == "update_date": + order_by_obj = Schemas.last_update_date + elif by == "name": + order_by_obj = Schemas.name + elif by == "submission_date": + order_by_obj = Schemas.submission_date + else: + _LOGGER.warning( + f"order by: '{by}' statement is unavailable. Projects are sorted by 'update_date'" + ) + order_by_obj = Schemas.last_update_date + + if desc and by == "name": + order_by_obj = order_by_obj.desc() + + elif by != "name" and not desc: + order_by_obj = order_by_obj.desc() + + return statement.order_by(order_by_obj) + @staticmethod def _add_condition( statement: Select,