From 6ebf644b7b414facb4eebb5ee75a12f84f659888 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 9 Jul 2024 12:02:30 -0400
Subject: [PATCH 01/15] work on schemas

---
 pepdbagent/db_utils.py        |  59 +++++++++
 pepdbagent/modules/schemas.py | 229 ++++++++++++++++++++++++++++++++++
 tests/test_schema.py          |  11 ++
 3 files changed, 299 insertions(+)
 create mode 100644 pepdbagent/modules/schemas.py
 create mode 100644 tests/test_schema.py

diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py
index ea36bd8..cd7e59d 100644
--- a/pepdbagent/db_utils.py
+++ b/pepdbagent/db_utils.py
@@ -296,6 +296,65 @@ class HistorySamples(Base):
     )
 
 
+class Schemas(Base):
+
+    __tablename__ = "schemas"
+
+    id: Mapped[int] = mapped_column(primary_key=True, index=True)
+    namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE"))
+    name: Mapped[str] = mapped_column(nullable=False, index=True)
+    description: Mapped[Optional[str]] = mapped_column(nullable=True, index=True)
+    schema_json: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue())
+    private: Mapped[bool] = mapped_column(default=False)
+    submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date)
+    last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column(
+        default=deliver_update_date, onupdate=deliver_update_date
+    )
+
+    group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship(
+        "SchemaGroupRelations", back_populates="schema_mapping"
+    )
+
+    __table_args__ = (UniqueConstraint("namespace", "name"),)
+
+
+class SchemaGroups(Base):
+
+    __tablename__ = "schema_groups"
+
+    id: Mapped[int] = mapped_column(primary_key=True, index=True)
+    namespace: Mapped[str] = mapped_column(
+        ForeignKey("users.namespace", ondelete="CASCADE"), index=True
+    )
+    name: Mapped[str] = mapped_column(nullable=False, index=True)
+    description: Mapped[Optional[str]] = mapped_column(nullable=True)
+
+    schema_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship(
+        "SchemaGroupRelations", back_populates="group_mapping"
+    )
+
+    __table_args__ = (UniqueConstraint("namespace", "name"),)
+
+
+class SchemaGroupRelations(Base):
+
+    __tablename__ = "schema_group_relations"
+
+    schema_id: Mapped[int] = mapped_column(
+        ForeignKey("schemas.id", ondelete="CASCADE"), index=True, primary_key=True
+    )
+    group_id: Mapped[int] = mapped_column(
+        ForeignKey("schema_groups.id", ondelete="CASCADE"), index=True, primary_key=True
+    )
+
+    schema_mapping: Mapped["Schemas"] = relationship(
+        "Schemas", back_populates="group_relation_mapping"
+    )
+    group_mapping: Mapped["SchemaGroups"] = relationship(
+        "SchemaGroups", back_populates="schema_relation_mapping"
+    )
+
+
 class BaseEngine:
     """
     A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all
diff --git a/pepdbagent/modules/schemas.py b/pepdbagent/modules/schemas.py
new file mode 100644
index 0000000..41d87f5
--- /dev/null
+++ b/pepdbagent/modules/schemas.py
@@ -0,0 +1,229 @@
+import datetime
+import json
+import logging
+from typing import Dict, List, NoReturn, Union
+
+import numpy as np
+import peppy
+from peppy.const import (
+    CONFIG_KEY,
+    SAMPLE_NAME_ATTR,
+    SAMPLE_RAW_DICT_KEY,
+    SAMPLE_TABLE_INDEX_KEY,
+    SUBSAMPLE_RAW_LIST_KEY,
+)
+from sqlalchemy import Select, and_, delete, select
+from sqlalchemy.exc import IntegrityError, NoResultFound
+from sqlalchemy.orm import Session
+from sqlalchemy.orm.attributes import flag_modified
+
+from pepdbagent.const import (
+    DEFAULT_TAG,
+    DESCRIPTION_KEY,
+    MAX_HISTORY_SAMPLES_NUMBER,
+    NAME_KEY,
+    PEPHUB_SAMPLE_ID_KEY,
+    PKG_NAME,
+)
+from pepdbagent.db_utils import (
+    BaseEngine,
+    HistoryProjects,
+    HistorySamples,
+    Projects,
+    Samples,
+    Subsamples,
+    UpdateTypes,
+    User,
+)
+from pepdbagent.exceptions import (
+    HistoryNotFoundError,
+    PEPDatabaseAgentError,
+    ProjectDuplicatedSampleGUIDsError,
+    ProjectNotFoundError,
+    ProjectUniqueNameError,
+    SampleTableUpdateError,
+)
+from pepdbagent.models import (
+    HistoryAnnotationModel,
+    HistoryChangeModel,
+    ProjectDict,
+    UpdateItems,
+    UpdateModel,
+)
+from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter
+
+_LOGGER = logging.getLogger(PKG_NAME)
+
+
+class PEPDatabaseSchemas:
+    """
+    Class that represents Schemas in Database.
+
+    While using this class, user can create, retrieve, delete, and update schemas from database
+    """
+
+    def __init__(self, pep_db_engine: BaseEngine):
+        """
+        :param pep_db_engine: pepdbengine object with sa engine
+        """
+        self._sa_engine = pep_db_engine.engine
+        self._pep_db_engine = pep_db_engine
+
+    def get(self, namespace: str, name: str) -> Dict:
+        """
+        Get schema from the database.
+
+        :param namespace: user namespace
+        :param name: schema name
+
+        :return: schema dict
+        """
+        ...
+
+    def search(self, namespace: str = None, query: str = "") -> ...:
+        """
+        Search schemas in the database.
+
+        :param namespace: user namespace [Default: None]. If None, search in all namespaces
+        :param query: query string. [Default: ""]. If empty, return all schemas
+
+        :return: list of schema dicts
+        """
+        ...
+
+    def create(
+        self,
+        namespace: str,
+        name: str,
+        schema: dict,
+        description: str = "",
+        # private: bool = False, # TODO: for simplicity was not implemented yet
+        overwrite: bool = False,
+        update_only: bool = False,
+    ) -> None:
+        """
+        Create or update schema in the database.
+
+        :param namespace: user namespace
+        :param name: schema name
+        :param schema: schema dict
+        :param description: schema description [Default: ""]
+        :param overwrite: overwrite schema if exists [Default: False]
+        :param update_only: update only schema if exists [Default: False]
+        """
+        ...
+
+    def update(
+        self,
+        namespace: str,
+        name: str,
+        schema: dict,
+        description: str = "",
+        # private: bool = False, # TODO: for simplicity was not implemented yet
+    ) -> None:
+        """
+        Update schema in the database.
+
+        :param namespace: user namespace
+        :param name: schema name
+        :param schema: schema dict
+        :param description: schema description [Default: ""]
+
+        :return: None
+        """
+        ...
+
+    def delete(self, namespace: str, name: str) -> None:
+        """
+        Delete schema from the database.
+
+        :param namespace: user namespace
+        :param name: schema name
+
+        :return: None
+        """
+        ...
+
+    def exist(self, namespace: str, name: str) -> bool:
+        """
+        Check if schema exists in the database.
+
+        :param namespace: user namespace
+        :param name: schema name
+
+        :return: True if schema exists, False otherwise
+        """
+        ...
+
+    def group_create(self, namespace: str, name: str, description: str = "") -> None:
+        """
+        Create schema group in the database.
+
+        :param namespace: user namespace
+        :param name: schema group name
+        :param description: schema group description [Default: ""]
+        """
+        ...
+
+    def group_get(self, namespace: str, name: str) -> ...:
+        """
+        Get schema group from the database.
+
+        :param namespace: user namespace
+        :param name: schema group name
+
+        :return: ...
+        """
+        ...
+
+    def group_search(self, namespace: str = None, query: str = "") -> ...:
+        """
+        Search schema groups in the database.
+
+        :param namespace: user namespace [Default: None]. If None, search in all namespaces
+        :param query: query string. [Default: ""]. If empty, return all schema groups
+
+        :return: list of schema group dicts
+        """
+        ...
+
+    def group_delete(self, namespace: str, name: str) -> None:
+        """
+        Delete schema group from the database.
+
+        :param namespace: user namespace
+        :param name: schema group name
+
+        :return: None
+        """
+        ...
+
+    def group_add_schema(
+        self, namespace: str, name: str, schema_namespace: str, schema_name: str
+    ) -> None:
+        """
+        Add schema to the schema group.
+
+        :param namespace: user namespace
+        :param name: schema group name
+        :param schema_namespace: schema namespace
+        :param schema_name: schema name
+
+        :return: None
+        """
+        ...
+
+    def group_remove_schema(
+        self, namespace: str, name: str, schema_namespace: str, schema_name: str
+    ) -> None:
+        """
+        Remove schema from the schema group.
+
+        :param namespace: user namespace
+        :param name: schema group name
+        :param schema_namespace: schema namespace
+        :param schema_name: schema name
+
+        :return: None
+        """
+        ...
diff --git a/tests/test_schema.py b/tests/test_schema.py
new file mode 100644
index 0000000..fa9efd8
--- /dev/null
+++ b/tests/test_schema.py
@@ -0,0 +1,11 @@
+import pytest
+
+from .utils import PEPDBAgentContextManager
+
+
+@pytest.mark.skipif(
+    not PEPDBAgentContextManager().db_setup(),
+    reason="DB is not setup",
+)
+class TestSamples:
+    ...
\ No newline at end of file

From afd334df44ad2efbf23d37450f47d42525a495d7 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 9 Jul 2024 14:25:14 -0400
Subject: [PATCH 02/15] work on schemas

---
 pepdbagent/db_utils.py                       |  4 ++++
 pepdbagent/modules/{schemas.py => schema.py} | 11 +++++++++++
 2 files changed, 15 insertions(+)
 rename pepdbagent/modules/{schemas.py => schema.py} (95%)

diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py
index cd7e59d..df52815 100644
--- a/pepdbagent/db_utils.py
+++ b/pepdbagent/db_utils.py
@@ -87,6 +87,10 @@ class Projects(Base):
         default=deliver_update_date,  # onupdate=deliver_update_date, # This field should not be updated, while we are adding project to favorites
     )
     pep_schema: Mapped[Optional[str]]
+
+    schema_id: Mapped[Optional[int]] = mapped_column(ForeignKey("schemas.id", ondelete="SET NULL"), nullable=True)
+    schema_mapping: Mapped["Schemas"] = relationship("Schemas", lazy="joined")
+
     pop: Mapped[Optional[bool]] = mapped_column(default=False)
     samples_mapping: Mapped[List["Samples"]] = relationship(
         back_populates="project_mapping", cascade="all, delete-orphan"
diff --git a/pepdbagent/modules/schemas.py b/pepdbagent/modules/schema.py
similarity index 95%
rename from pepdbagent/modules/schemas.py
rename to pepdbagent/modules/schema.py
index 41d87f5..3318355 100644
--- a/pepdbagent/modules/schemas.py
+++ b/pepdbagent/modules/schema.py
@@ -227,3 +227,14 @@ def group_remove_schema(
         :return: None
         """
         ...
+
+    def group_exist(self, namespace: str, name: str) -> bool:
+        """
+        Check if schema group exists in the database.
+
+        :param namespace: user namespace
+        :param name: schema group name
+
+        :return: True if schema group exists, False otherwise
+        """
+        ...
\ No newline at end of file

From a07f6bb07281e83d6cf2609a16e47714e2cf8270 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 9 Jul 2024 16:46:04 -0400
Subject: [PATCH 03/15] work on schemas 2

---
 pepdbagent/db_utils.py       |   4 +-
 pepdbagent/exceptions.py     |  10 ++
 pepdbagent/models.py         |  23 +++++
 pepdbagent/modules/schema.py | 179 +++++++++++++++++++++++++++++------
 tests/test_schema.py         |   3 +-
 5 files changed, 188 insertions(+), 31 deletions(-)

diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py
index df52815..251a551 100644
--- a/pepdbagent/db_utils.py
+++ b/pepdbagent/db_utils.py
@@ -88,7 +88,9 @@ class Projects(Base):
     )
     pep_schema: Mapped[Optional[str]]
 
-    schema_id: Mapped[Optional[int]] = mapped_column(ForeignKey("schemas.id", ondelete="SET NULL"), nullable=True)
+    schema_id: Mapped[Optional[int]] = mapped_column(
+        ForeignKey("schemas.id", ondelete="SET NULL"), nullable=True
+    )
     schema_mapping: Mapped["Schemas"] = relationship("Schemas", lazy="joined")
 
     pop: Mapped[Optional[bool]] = mapped_column(default=False)
diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py
index 64e5278..5512ed6 100644
--- a/pepdbagent/exceptions.py
+++ b/pepdbagent/exceptions.py
@@ -117,3 +117,13 @@ def __init__(self, msg=""):
 class UserNotFoundError(PEPDatabaseAgentError):
     def __init__(self, msg=""):
         super().__init__(f"""User does not exist. {msg}""")
+
+
+class SchemaDoesNotExistError(PEPDatabaseAgentError):
+    def __init__(self, msg=""):
+        super().__init__(f"""Schema does not exist. {msg}""")
+
+
+class SchemaAlreadyExistsError(PEPDatabaseAgentError):
+    def __init__(self, msg=""):
+        super().__init__(f"""Schema already exists. {msg}""")
diff --git a/pepdbagent/models.py b/pepdbagent/models.py
index c138d18..8bbddc0 100644
--- a/pepdbagent/models.py
+++ b/pepdbagent/models.py
@@ -246,3 +246,26 @@ class HistoryAnnotationModel(BaseModel):
     name: str
     tag: str = DEFAULT_TAG
     history: List[HistoryChangeModel]
+
+
+class SchemaAnnotation(BaseModel):
+    """
+    Schema annotation model
+    """
+
+    namespace: str
+    name: str
+    last_update_date: Optional[str]
+    submission_date: Optional[str]
+    description: Optional[str]
+
+
+class SchemaSearchResult(BaseModel):
+    """
+    Schema search result model
+    """
+
+    count: int
+    limit: int
+    offset: int
+    results: List[SchemaAnnotation]
diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index 3318355..cc1c1d1 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -12,7 +12,7 @@
     SAMPLE_TABLE_INDEX_KEY,
     SUBSAMPLE_RAW_LIST_KEY,
 )
-from sqlalchemy import Select, and_, delete, select
+from sqlalchemy import Select, and_, delete, select, or_
 from sqlalchemy.exc import IntegrityError, NoResultFound
 from sqlalchemy.orm import Session
 from sqlalchemy.orm.attributes import flag_modified
@@ -27,28 +27,17 @@
 )
 from pepdbagent.db_utils import (
     BaseEngine,
-    HistoryProjects,
-    HistorySamples,
-    Projects,
-    Samples,
-    Subsamples,
-    UpdateTypes,
-    User,
+    Schemas,
+    SchemaGroups,
+    SchemaGroupRelations,
 )
 from pepdbagent.exceptions import (
-    HistoryNotFoundError,
-    PEPDatabaseAgentError,
-    ProjectDuplicatedSampleGUIDsError,
-    ProjectNotFoundError,
-    ProjectUniqueNameError,
-    SampleTableUpdateError,
+    SchemaAlreadyExistsError,
+    SchemaDoesNotExistError,
 )
 from pepdbagent.models import (
-    HistoryAnnotationModel,
-    HistoryChangeModel,
-    ProjectDict,
-    UpdateItems,
-    UpdateModel,
+    SchemaAnnotation,
+    SchemaSearchResult,
 )
 from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter
 
@@ -69,7 +58,7 @@ def __init__(self, pep_db_engine: BaseEngine):
         self._sa_engine = pep_db_engine.engine
         self._pep_db_engine = pep_db_engine
 
-    def get(self, namespace: str, name: str) -> Dict:
+    def get(self, namespace: str, name: str) -> dict:
         """
         Get schema from the database.
 
@@ -78,18 +67,99 @@ def get(self, namespace: str, name: str) -> Dict:
 
         :return: schema dict
         """
-        ...
 
-    def search(self, namespace: str = None, query: str = "") -> ...:
+        with Session(self._sa_engine) as session:
+            schema_obj = session.scalar(
+                select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name))
+            )
+
+            if not schema_obj:
+                raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database")
+
+            return schema_obj.schema_json
+
+    def info(self, namespace: str, name: str) -> SchemaAnnotation:
+        """
+        Get schema information from the database.
+
+        :param namespace: user namespace
+        :param name: schema name
+
+        :return: SchemaAnnotation object:
+                    - namespace: schema namespace
+                    - name: schema name
+                    - last_update_date: last update date
+                    - submission_date: submission date
+                    - description: schema description
+        """
+
+        with Session(self._sa_engine) as session:
+            schema_obj = session.scalar(
+                select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name))
+            )
+
+            if not schema_obj:
+                raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database")
+
+            return SchemaAnnotation(
+                namespace=schema_obj.namespace,
+                name=schema_obj.name,
+                last_update_date=schema_obj.last_update_date,
+                submission_date=schema_obj.submission_date,
+                description=schema_obj.description,
+            )
+
+    def search(
+        self, namespace: str = None, query: str = "", limit: int = 100, offset: int = 0
+    ) -> SchemaSearchResult:
         """
         Search schemas in the database.
 
         :param namespace: user namespace [Default: None]. If None, search in all namespaces
         :param query: query string. [Default: ""]. If empty, return all schemas
+        :param limit: limit number of schemas [Default: 100]
+        :param offset: offset number of schemas [Default: 0]
 
         :return: list of schema dicts
         """
-        ...
+
+        statement = select(Schemas)
+
+        # TODO: add count to the result
+        if query:
+            sql_search_str = f"%{query}%"
+            search_query = or_(
+                Schemas.name.ilike(sql_search_str),
+                Schemas.description.ilike(sql_search_str),
+            )
+            statement = statement.where(search_query)
+        if namespace:
+            statement = statement.where(Schemas.namespace == namespace)
+
+        statement = statement.limit(limit).offset(offset)
+
+        return_list = []
+
+        with Session(self._sa_engine) as session:
+            results = session.scalars(statement)
+
+            for result in results:
+                return_list.append(
+                    SchemaAnnotation(
+                        namespace=result.namespace,
+                        name=result.name,
+                        last_update_date=result.last_update_date,
+                        submission_date=result.submission_date,
+                        description=result.description,
+                    )
+                )
+
+        return SchemaSearchResult(
+            count=0,
+            limit=limit,
+            offset=offset,
+            results=return_list,
+        )
 
     def create(
         self,
@@ -111,7 +181,32 @@ def create(
         :param overwrite: overwrite schema if exists [Default: False]
         :param update_only: update only schema if exists [Default: False]
         """
-        ...
+
+        if self.exist(namespace, name):
+            if overwrite:
+                self.update(namespace, name, schema, description)
+                return None
+            elif update_only:
+                self.update(namespace, name, schema, description)
+                return None
+            else:
+                raise SchemaAlreadyExistsError(f"Schema '{name}' already exists in the database")
+
+        if update_only:
+            raise SchemaDoesNotExistError(
+                f"Schema '{name}' does not exist in the database"
+                f"Cannot update schema that does not exist"
+            )
+
+        with Session(self._sa_engine) as session:
+            schema_obj = Schemas(
+                namespace=namespace,
+                name=name,
+                schema_json=schema,
+                description=description,
+            )
+            session.add(schema_obj)
+            session.commit()
 
     def update(
         self,
@@ -131,7 +226,19 @@ def update(
 
         :return: None
         """
-        ...
+
+        with Session(self._sa_engine) as session:
+            schema_obj = session.scalar(
+                select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name))
+            )
+
+            if not schema_obj:
+                raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database")
+
+            schema_obj.schema_json = schema
+            schema_obj.description = description
+
+            session.commit()
 
     def delete(self, namespace: str, name: str) -> None:
         """
@@ -142,7 +249,18 @@ def delete(self, namespace: str, name: str) -> None:
 
         :return: None
         """
-        ...
+
+        with Session(self._sa_engine) as session:
+            schema_obj = session.scalar(
+                select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name))
+            )
+
+            if not schema_obj:
+                raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database")
+
+            session.delete(schema_obj)
+
+            session.commit()
 
     def exist(self, namespace: str, name: str) -> bool:
         """
@@ -153,7 +271,12 @@ def exist(self, namespace: str, name: str) -> bool:
 
         :return: True if schema exists, False otherwise
         """
-        ...
+
+        with Session(self._sa_engine) as session:
+            schema_obj = session.scalar(
+                select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name))
+            )
+            return True if schema_obj else False
 
     def group_create(self, namespace: str, name: str, description: str = "") -> None:
         """
@@ -237,4 +360,4 @@ def group_exist(self, namespace: str, name: str) -> bool:
 
         :return: True if schema group exists, False otherwise
         """
-        ...
\ No newline at end of file
+        ...
diff --git a/tests/test_schema.py b/tests/test_schema.py
index fa9efd8..bc86c34 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -7,5 +7,4 @@
     not PEPDBAgentContextManager().db_setup(),
     reason="DB is not setup",
 )
-class TestSamples:
-    ...
\ No newline at end of file
+class TestSamples: ...

From 6cbb8c9f270d12c3dc34fee2f34b3e5911c64514 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Wed, 10 Jul 2024 13:15:09 -0400
Subject: [PATCH 04/15] work on schemas

---
 pepdbagent/exceptions.py     |  10 ++
 pepdbagent/models.py         |  22 ++++
 pepdbagent/modules/schema.py | 227 ++++++++++++++++++++++++++++++-----
 3 files changed, 232 insertions(+), 27 deletions(-)

diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py
index 5512ed6..61d45fd 100644
--- a/pepdbagent/exceptions.py
+++ b/pepdbagent/exceptions.py
@@ -127,3 +127,13 @@ def __init__(self, msg=""):
 class SchemaAlreadyExistsError(PEPDatabaseAgentError):
     def __init__(self, msg=""):
         super().__init__(f"""Schema already exists. {msg}""")
+
+
+class SchemaGroupDoesNotExistError(PEPDatabaseAgentError):
+    def __init__(self, msg=""):
+        super().__init__(f"""Schema group does not exist. {msg}""")
+
+
+class SchemaGroupAlreadyExistsError(PEPDatabaseAgentError):
+    def __init__(self, msg=""):
+        super().__init__(f"""Schema group already exists. {msg}""")
diff --git a/pepdbagent/models.py b/pepdbagent/models.py
index 8bbddc0..c9ae67e 100644
--- a/pepdbagent/models.py
+++ b/pepdbagent/models.py
@@ -269,3 +269,25 @@ class SchemaSearchResult(BaseModel):
     limit: int
     offset: int
     results: List[SchemaAnnotation]
+
+
+class SchemaGroupAnnotation(BaseModel):
+    """
+    Schema group annotation model
+    """
+
+    namespace: str
+    name: str
+    description: Optional[str]
+    schemas: List[SchemaAnnotation]
+
+
+class SchemaGroupSearchResult(BaseModel):
+    """
+    Schema group search result model
+    """
+
+    count: int
+    limit: int
+    offset: int
+    results: List[SchemaGroupAnnotation]
diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index cc1c1d1..5d75bfd 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -12,10 +12,10 @@
     SAMPLE_TABLE_INDEX_KEY,
     SUBSAMPLE_RAW_LIST_KEY,
 )
-from sqlalchemy import Select, and_, delete, select, or_
-from sqlalchemy.exc import IntegrityError, NoResultFound
+from sqlalchemy import Select, and_, delete, select, or_, func
 from sqlalchemy.orm import Session
 from sqlalchemy.orm.attributes import flag_modified
+from sqlalchemy.exc import IntegrityError, NoResultFound
 
 from pepdbagent.const import (
     DEFAULT_TAG,
@@ -34,10 +34,14 @@
 from pepdbagent.exceptions import (
     SchemaAlreadyExistsError,
     SchemaDoesNotExistError,
+    SchemaGroupAlreadyExistsError,
+    SchemaGroupDoesNotExistError,
 )
 from pepdbagent.models import (
     SchemaAnnotation,
     SchemaSearchResult,
+    SchemaGroupAnnotation,
+    SchemaGroupSearchResult,
 )
 from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter
 
@@ -110,13 +114,13 @@ def info(self, namespace: str, name: str) -> SchemaAnnotation:
             )
 
     def search(
-        self, namespace: str = None, query: str = "", limit: int = 100, offset: int = 0
+        self, namespace: str = None, search_str: str = "", limit: int = 100, offset: int = 0
     ) -> SchemaSearchResult:
         """
         Search schemas in the database.
 
         :param namespace: user namespace [Default: None]. If None, search in all namespaces
-        :param query: query string. [Default: ""]. If empty, return all schemas
+        :param search_str: query string. [Default: ""]. If empty, return all schemas
         :param limit: limit number of schemas [Default: 100]
         :param offset: offset number of schemas [Default: 0]
 
@@ -124,18 +128,7 @@ def search(
         """
 
         statement = select(Schemas)
-
-        # TODO: add count to the result
-        if query:
-            sql_search_str = f"%{query}%"
-            search_query = or_(
-                Schemas.name.ilike(sql_search_str),
-                Schemas.description.ilike(sql_search_str),
-            )
-            statement = statement.where(search_query)
-        if namespace:
-            statement = statement.where(Schemas.namespace == namespace)
-
+        statement = self._add_condition(statement, namespace, search_str)
         statement = statement.limit(limit).offset(offset)
 
         return_list = []
@@ -155,12 +148,47 @@ def search(
                 )
 
         return SchemaSearchResult(
-            count=0,
+            count=self._count_search(namespace=namespace, search_str=search_str),
             limit=limit,
             offset=offset,
             results=return_list,
         )
 
+    def _count_search(self, namespace: str = None, search_str: str = "") -> int:
+        """
+        Count number of found schemas
+
+        :param namespace: user namespace [Default: None]. If None, search in all namespaces
+        :param search_str: query string. [Default: ""]. If empty, return all schemas
+
+        :return: list of schema dicts
+        """
+        statement = select(func.count(Schemas.id))
+
+        statement = self._add_condition(statement, namespace, search_str)
+
+        with Session(self._sa_engine) as session:
+            result = session.execute(statement).one()
+
+            return result[0]
+
+    @staticmethod
+    def _add_condition(
+        statement: Select,
+        namespace: str = None,
+        search_str: str = None,
+    ) -> Select:
+        if search_str:
+            sql_search_str = f"%{search_str}%"
+            search_query = or_(
+                Schemas.name.ilike(sql_search_str),
+                Schemas.description.ilike(sql_search_str),
+            )
+            statement = statement.where(search_query)
+        if namespace:
+            statement = statement.where(Schemas.namespace == namespace)
+        return statement
+
     def create(
         self,
         namespace: str,
@@ -237,6 +265,7 @@ def update(
 
             schema_obj.schema_json = schema
             schema_obj.description = description
+            flag_modified(schema_obj, "schema_json")
 
             session.commit()
 
@@ -285,30 +314,154 @@ def group_create(self, namespace: str, name: str, description: str = "") -> None
         :param namespace: user namespace
         :param name: schema group name
         :param description: schema group description [Default: ""]
+
+        :return: None
         """
-        ...
+        try:
+            with Session(self._sa_engine) as session:
+                session.add(
+                    SchemaGroups(
+                        namespace=namespace,
+                        name=name,
+                        description=description,
+                    )
+                )
+                session.commit()
 
-    def group_get(self, namespace: str, name: str) -> ...:
+        except IntegrityError:
+            raise SchemaGroupAlreadyExistsError
+
+    def group_get(self, namespace: str, name: str) -> SchemaGroupAnnotation:
         """
         Get schema group from the database.
 
         :param namespace: user namespace
         :param name: schema group name
 
-        :return: ...
+        :return: SchemaGroupAnnotation object:
+            - namespace: schema group namespace
+            - name: schema group name
+            - description: schema group description
+            - schemas: list of SchemaAnnotation objects
         """
-        ...
 
-    def group_search(self, namespace: str = None, query: str = "") -> ...:
+        with Session(self._sa_engine) as session:
+            schema_group_obj = session.scalar(
+                select(SchemaGroups).where(
+                    and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name)
+                )
+            )
+
+            if not schema_group_obj:
+                raise SchemaGroupDoesNotExistError(
+                    f"Schema group '{name}' does not exist in the database"
+                )
+
+            schemas = []
+            for schema_relation in schema_group_obj.schema_relation_mapping:
+                schema_annotation = schema_relation.schema_mapping
+                schemas.append(
+                    SchemaAnnotation(
+                        namespace=schema_annotation.namespace,
+                        name=schema_annotation.name,
+                        last_update_date=schema_annotation.last_update_date,
+                        submission_date=schema_annotation.submission_date,
+                        desciription=schema_annotation.description,
+                    )
+                )
+
+            return SchemaGroupAnnotation(
+                namespace=schema_group_obj.namespace,
+                name=schema_group_obj.name,
+                description=schema_group_obj.description,
+                schemas=schemas,
+            )
+
+    def group_search(
+        self, namespace: str = None, search_str: str = "", limit: int = 100, offset: int = 0
+    ) -> SchemaGroupSearchResult:
         """
         Search schema groups in the database.
 
         :param namespace: user namespace [Default: None]. If None, search in all namespaces
-        :param query: query string. [Default: ""]. If empty, return all schema groups
+        :param search_str: query string. [Default: ""]. If empty, return all schema groups
+        :param limit: limit of the search
+        :param offset: offset of the search
+
+        :return: SchemaGroupSearchResult object:
+            - count: number of found schema groups
+            - limit: limit number of schema groups
+            - offset: offset number of schema groups
+            - results: list of SchemaGroupAnnotation objects
+        """
+
+        statement = select(SchemaGroups)
+        statement = self._add_group_condition(
+            statement=statement, namespace=namespace, search_str=search_str
+        )
+
+        with Session(self._sa_engine) as session:
+            results = session.scalars(statement)
+
+            return_results = []
+            for result in results:
+                return_results.append(
+                    SchemaGroupAnnotation(
+                        namespace=result.namespace,
+                        name=result.name,
+                        description=result.description,
+                        schemas=[],
+                    )
+                )
+
+        return SchemaGroupSearchResult(
+            count=self._group_search_count(namespace, search_str),
+            limit=limit,
+            offset=offset,
+            results=return_results,
+        )
+
+    @staticmethod
+    def _add_group_condition(
+        statement: Select,
+        namespace: str = None,
+        search_str: str = "",
+    ) -> Select:
+        """
+        Add query condition to statement in group search
+
+        :param statement: Select statement
+        :param namespace: Namespace of schema group [Default: None]. If none set, all search in all namespaces
+        :param search_str: Search string to look for schemas. Search in name and description of the group
+        """
+        if search_str:
+            sql_search_str = f"%{search_str}%"
+            search_query = or_(
+                SchemaGroups.name.ilike(sql_search_str),
+                SchemaGroups.description.ilike(sql_search_str),
+            )
+            statement = statement.where(search_query)
+        if namespace:
+            statement = statement.where(SchemaGroups.namespace == namespace)
+        return statement
+
+    def _group_search_count(self, namespace: str = None, search_str: str = ""):
+        """
+        Count number of found group of schemas
+
+        :param namespace: user namespace [Default: None]. If None, search in all namespaces
+        :param search_str: query string. [Default: ""]. If empty, return all schemas
 
-        :return: list of schema group dicts
+        :return: list of schema dicts
         """
-        ...
+        statement = select(func.count(SchemaGroups.id))
+
+        statement = self._add_condition(statement, namespace, search_str)
+
+        with Session(self._sa_engine) as session:
+            result = session.execute(statement).one()
+
+            return result[0]
 
     def group_delete(self, namespace: str, name: str) -> None:
         """
@@ -319,7 +472,20 @@ def group_delete(self, namespace: str, name: str) -> None:
 
         :return: None
         """
-        ...
+
+        if self.group_exist(namespace, name):
+            raise SchemaGroupDoesNotExistError(
+                f"Schema group '{name}' does not exist in the database"
+            )
+
+        with Session(self._sa_engine) as session:
+            session.execute(
+                delete(SchemaGroups).where(
+                    and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name)
+                )
+            )
+
+            session.commit()
 
     def group_add_schema(
         self, namespace: str, name: str, schema_namespace: str, schema_name: str
@@ -360,4 +526,11 @@ def group_exist(self, namespace: str, name: str) -> bool:
 
         :return: True if schema group exists, False otherwise
         """
-        ...
+
+        with Session(self._sa_engine) as session:
+            schema_group_obj = session.scalar(
+                select(SchemaGroups).where(
+                    and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name)
+                )
+            )
+            return True if schema_group_obj else False

From 1418e1f6fa918ed295f18521533bd4d4944b220b Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Wed, 10 Jul 2024 14:10:16 -0400
Subject: [PATCH 05/15] more work on schemas

---
 pepdbagent/exceptions.py     | 10 +++++
 pepdbagent/modules/schema.py | 73 +++++++++++++++++++++++++++++++++++-
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py
index 61d45fd..caf152b 100644
--- a/pepdbagent/exceptions.py
+++ b/pepdbagent/exceptions.py
@@ -137,3 +137,13 @@ def __init__(self, msg=""):
 class SchemaGroupAlreadyExistsError(PEPDatabaseAgentError):
     def __init__(self, msg=""):
         super().__init__(f"""Schema group already exists. {msg}""")
+
+
+class SchemaAlreadyInGroupError(PEPDatabaseAgentError):
+    def __init__(self, msg=""):
+        super().__init__(f"""Schema already in the group. {msg}""")
+
+
+class SchemaIsNotInGroupError(PEPDatabaseAgentError):
+    def __init__(self, msg=""):
+        super().__init__(f"""Schema not found in group. {msg}""")
diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index 5d75bfd..05db131 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -36,6 +36,8 @@
     SchemaDoesNotExistError,
     SchemaGroupAlreadyExistsError,
     SchemaGroupDoesNotExistError,
+    SchemaAlreadyInGroupError,
+    SchemaIsNotInGroupError,
 )
 from pepdbagent.models import (
     SchemaAnnotation,
@@ -500,7 +502,46 @@ def group_add_schema(
 
         :return: None
         """
-        ...
+
+        try:
+            with Session(self._sa_engine) as session:
+                group_mapping = session.scalar(
+                    select(SchemaGroups).where(
+                        and_(
+                            SchemaGroups.namespace == namespace,
+                            SchemaGroups.name == name,
+                        )
+                    )
+                )
+
+                if not group_mapping:
+                    raise SchemaGroupDoesNotExistError(
+                        f"Group of Schemas with namespace='{namespace}' and name='{name}' does not exist"
+                    )
+
+                schema_mapping = session.scalar(
+                    select(Schemas).where(
+                        and_(
+                            Schemas.namespace == schema_namespace,
+                            Schemas.name == schema_name,
+                        )
+                    )
+                )
+
+                if not schema_mapping:
+                    raise SchemaDoesNotExistError(
+                        f"Schema with namespace='{schema_namespace}' and name='{schema_name}' does not exist"
+                    )
+
+                session.add(
+                    SchemaGroupRelations(
+                        schema_id=schema_mapping.id,
+                        group_id=group_mapping.id,
+                    )
+                )
+                session.commit()
+        except IntegrityError:
+            raise SchemaAlreadyInGroupError
 
     def group_remove_schema(
         self, namespace: str, name: str, schema_namespace: str, schema_name: str
@@ -515,7 +556,35 @@ def group_remove_schema(
 
         :return: None
         """
-        ...
+
+        try:
+            with Session(self._sa_engine) as session:
+                session.execute(
+                    delete(SchemaGroupRelations).where(
+                        and_(
+                            SchemaGroupRelations.schema_id
+                            == select(Schemas.id)
+                            .where(
+                                and_(
+                                    Schemas.namespace == schema_namespace,
+                                    Schemas.name == schema_name,
+                                )
+                            )
+                            .subquery(),
+                            SchemaGroupRelations.group_id
+                            == select(SchemaGroups.id)
+                            .where(
+                                and_(
+                                    SchemaGroups.namespace == namespace,
+                                    SchemaGroups.name == name,
+                                )
+                            )
+                            .subquery(),
+                        )
+                    )
+                )
+        except IntegrityError:
+            raise SchemaIsNotInGroupError("Schema not found in the group")
 
     def group_exist(self, namespace: str, name: str) -> bool:
         """

From cdc1b6853b5d07cf1fe584a1b4e71460a826f80e Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Mon, 22 Jul 2024 11:33:48 -0400
Subject: [PATCH 06/15] added schemas to main pepdbagent class

---
 pepdbagent/modules/schema.py | 2 +-
 pepdbagent/pepdbagent.py     | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index 05db131..09045e8 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -50,7 +50,7 @@
 _LOGGER = logging.getLogger(PKG_NAME)
 
 
-class PEPDatabaseSchemas:
+class PEPDatabaseSchema:
     """
     Class that represents Schemas in Database.
 
diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py
index 1519c33..dc64cb9 100644
--- a/pepdbagent/pepdbagent.py
+++ b/pepdbagent/pepdbagent.py
@@ -6,6 +6,7 @@
 from pepdbagent.modules.sample import PEPDatabaseSample
 from pepdbagent.modules.user import PEPDatabaseUser
 from pepdbagent.modules.view import PEPDatabaseView
+from pepdbagent.modules.schema import PEPDatabaseSchema
 
 
 class PEPDatabaseAgent(object):
@@ -54,6 +55,7 @@ def __init__(
         self._sample = PEPDatabaseSample(pep_db_engine)
         self._user = PEPDatabaseUser(pep_db_engine)
         self._view = PEPDatabaseView(pep_db_engine)
+        self._schema = PEPDatabaseSchema(pep_db_engine)
 
         self._db_name = database
 
@@ -81,6 +83,10 @@ def sample(self) -> PEPDatabaseSample:
     def view(self) -> PEPDatabaseView:
         return self._view
 
+    @property
+    def schema(self) -> PEPDatabaseSchema:
+        return self._schema
+
     def __str__(self):
         return f"Connection to the database: '{self.__db_name}' is set!"
 

From 2082ce872da58fdfb117323e90b7477f43895c4e Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Mon, 22 Jul 2024 13:24:21 -0400
Subject: [PATCH 07/15] fixed schema annotation model

---
 pepdbagent/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pepdbagent/models.py b/pepdbagent/models.py
index c9ae67e..46ba418 100644
--- a/pepdbagent/models.py
+++ b/pepdbagent/models.py
@@ -255,9 +255,9 @@ class SchemaAnnotation(BaseModel):
 
     namespace: str
     name: str
-    last_update_date: Optional[str]
-    submission_date: Optional[str]
-    description: Optional[str]
+    last_update_date: Optional[datetime.datetime]
+    submission_date: Optional[datetime.datetime]
+    description: Optional[datetime.datetime]
 
 
 class SchemaSearchResult(BaseModel):

From 39070cca2e1a1150abc828f6fec1bf1b018317ac Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Mon, 22 Jul 2024 13:53:59 -0400
Subject: [PATCH 08/15] fixed string description

---
 pepdbagent/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pepdbagent/models.py b/pepdbagent/models.py
index 46ba418..d0281cd 100644
--- a/pepdbagent/models.py
+++ b/pepdbagent/models.py
@@ -257,7 +257,7 @@ class SchemaAnnotation(BaseModel):
     name: str
     last_update_date: Optional[datetime.datetime]
     submission_date: Optional[datetime.datetime]
-    description: Optional[datetime.datetime]
+    description: Optional[str]
 
 
 class SchemaSearchResult(BaseModel):

From 9550289b6c92b1f500a4489d6f711089bc6dcb61 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 23 Jul 2024 15:28:52 -0400
Subject: [PATCH 09/15] tests + bug fix

---
 pepdbagent/db_utils.py                   |   5 +-
 pepdbagent/models.py                     |   7 +-
 pepdbagent/modules/schema.py             |  77 ++++++---
 tests/schemas/namespace1/2.0.0.yaml      |  69 ++++++++
 tests/schemas/namespace1/2.1.0.yaml      |  77 +++++++++
 tests/schemas/namespace2/bedboss.yaml    |  47 +++++
 tests/schemas/namespace2/bedbuncher.yaml |  25 +++
 tests/schemas/namespace2/bedmaker.yaml   |  59 +++++++
 tests/test_schema.py                     | 211 ++++++++++++++++++++++-
 tests/utils.py                           |  52 +++++-
 10 files changed, 594 insertions(+), 35 deletions(-)
 create mode 100644 tests/schemas/namespace1/2.0.0.yaml
 create mode 100644 tests/schemas/namespace1/2.1.0.yaml
 create mode 100644 tests/schemas/namespace2/bedboss.yaml
 create mode 100644 tests/schemas/namespace2/bedbuncher.yaml
 create mode 100644 tests/schemas/namespace2/bedmaker.yaml

diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py
index 251a551..de1a9f3 100644
--- a/pepdbagent/db_utils.py
+++ b/pepdbagent/db_utils.py
@@ -129,7 +129,7 @@ class Projects(Base):
 
     history_mapping: Mapped[List["HistoryProjects"]] = relationship(
         back_populates="project_mapping", cascade="all, delete-orphan"
-    )  # TODO: check if cascade is correct
+    )
 
     __table_args__ = (UniqueConstraint("namespace", "name", "tag"),)
 
@@ -317,6 +317,9 @@ class Schemas(Base):
         default=deliver_update_date, onupdate=deliver_update_date
     )
 
+    projects_mappings: Mapped[List["Projects"]] = relationship(
+        "Projects", back_populates="schema_mapping"
+    )
     group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship(
         "SchemaGroupRelations", back_populates="schema_mapping"
     )
diff --git a/pepdbagent/models.py b/pepdbagent/models.py
index d0281cd..6270f4e 100644
--- a/pepdbagent/models.py
+++ b/pepdbagent/models.py
@@ -255,9 +255,10 @@ class SchemaAnnotation(BaseModel):
 
     namespace: str
     name: str
-    last_update_date: Optional[datetime.datetime]
-    submission_date: Optional[datetime.datetime]
-    description: Optional[str]
+    last_update_date: str
+    submission_date: str
+    description: Optional[str] = ""
+    popularity_number: Optional[int] = 0
 
 
 class SchemaSearchResult(BaseModel):
diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index 09045e8..4091891 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -30,6 +30,7 @@
     Schemas,
     SchemaGroups,
     SchemaGroupRelations,
+    User,
 )
 from pepdbagent.exceptions import (
     SchemaAlreadyExistsError,
@@ -110,9 +111,10 @@ def info(self, namespace: str, name: str) -> SchemaAnnotation:
             return SchemaAnnotation(
                 namespace=schema_obj.namespace,
                 name=schema_obj.name,
-                last_update_date=schema_obj.last_update_date,
-                submission_date=schema_obj.submission_date,
+                last_update_date=str(schema_obj.last_update_date),
+                submission_date=str(schema_obj.submission_date),
                 description=schema_obj.description,
+                popularity_number=len(schema_obj.projects_mappings),
             )
 
     def search(
@@ -143,9 +145,10 @@ def search(
                     SchemaAnnotation(
                         namespace=result.namespace,
                         name=result.name,
-                        last_update_date=result.last_update_date,
-                        submission_date=result.submission_date,
+                        last_update_date=str(result.last_update_date),
+                        submission_date=str(result.submission_date),
                         description=result.description,
+                        # popularity_number=sum(result.projects_mappings),
                     )
                 )
 
@@ -229,6 +232,13 @@ def create(
             )
 
         with Session(self._sa_engine) as session:
+            user = session.scalar(select(User).where(User.namespace == namespace))
+
+            if not user:
+                user = User(namespace=namespace)
+                session.add(user)
+                session.commit()
+
             schema_obj = Schemas(
                 namespace=namespace,
                 name=name,
@@ -366,8 +376,8 @@ def group_get(self, namespace: str, name: str) -> SchemaGroupAnnotation:
                     SchemaAnnotation(
                         namespace=schema_annotation.namespace,
                         name=schema_annotation.name,
-                        last_update_date=schema_annotation.last_update_date,
-                        submission_date=schema_annotation.submission_date,
+                        last_update_date=str(schema_annotation.last_update_date),
+                        submission_date=str(schema_annotation.submission_date),
                         desciription=schema_annotation.description,
                     )
                 )
@@ -458,7 +468,7 @@ def _group_search_count(self, namespace: str = None, search_str: str = ""):
         """
         statement = select(func.count(SchemaGroups.id))
 
-        statement = self._add_condition(statement, namespace, search_str)
+        statement = self._add_group_condition(statement, namespace, search_str)
 
         with Session(self._sa_engine) as session:
             result = session.execute(statement).one()
@@ -475,7 +485,7 @@ def group_delete(self, namespace: str, name: str) -> None:
         :return: None
         """
 
-        if self.group_exist(namespace, name):
+        if not self.group_exist(namespace, name):
             raise SchemaGroupDoesNotExistError(
                 f"Schema group '{name}' does not exist in the database"
             )
@@ -559,30 +569,43 @@ def group_remove_schema(
 
         try:
             with Session(self._sa_engine) as session:
-                session.execute(
-                    delete(SchemaGroupRelations).where(
-                        and_(
-                            SchemaGroupRelations.schema_id
-                            == select(Schemas.id)
-                            .where(
-                                and_(
-                                    Schemas.namespace == schema_namespace,
-                                    Schemas.name == schema_name,
-                                )
+
+                a = session.scalar(
+                    select(Schemas).where(
+                        and_(Schemas.namespace == schema_namespace, Schemas.name == schema_name)
+                    )
+                )
+                b = session.scalar(
+                    select(SchemaGroups).where(
+                        and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name)
+                    )
+                )
+
+                delete_statement = delete(SchemaGroupRelations).where(
+                    and_(
+                        SchemaGroupRelations.schema_id
+                        == select(Schemas.id)
+                        .where(
+                            and_(
+                                Schemas.namespace == schema_namespace,
+                                Schemas.name == schema_name,
                             )
-                            .subquery(),
-                            SchemaGroupRelations.group_id
-                            == select(SchemaGroups.id)
-                            .where(
-                                and_(
-                                    SchemaGroups.namespace == namespace,
-                                    SchemaGroups.name == name,
-                                )
+                        )
+                        .subquery(),
+                        SchemaGroupRelations.group_id
+                        == select(SchemaGroups.id)
+                        .where(
+                            and_(
+                                SchemaGroups.namespace == namespace,
+                                SchemaGroups.name == name,
                             )
-                            .subquery(),
                         )
+                        .subquery(),
                     )
                 )
+
+                session.execute(delete_statement)
+                session.commit()
         except IntegrityError:
             raise SchemaIsNotInGroupError("Schema not found in the group")
 
diff --git a/tests/schemas/namespace1/2.0.0.yaml b/tests/schemas/namespace1/2.0.0.yaml
new file mode 100644
index 0000000..56f2034
--- /dev/null
+++ b/tests/schemas/namespace1/2.0.0.yaml
@@ -0,0 +1,69 @@
+description: "Schema for a minimal PEP"
+version: "2.0.0"
+properties:
+  config:
+    properties:
+      name: 
+        type: string
+        pattern: "^\\S*$"
+        description: "Project name with no whitespace"
+      pep_version:
+        description: "Version of the PEP Schema this PEP follows"
+        type: string
+      sample_table:
+        type: string
+        description: "Path to the sample annotation table with one row per sample"
+      subsample_table:
+        type: string
+        description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table"
+      sample_modifiers:
+        type: object
+        properties:
+          append:
+            type: object
+          duplicate:
+            type: object
+          imply:
+            type: array
+            items:
+              type: object
+              properties:
+                if:
+                  type: object
+                then:
+                  type: object
+          derive:
+            type: object
+            properties:
+              attributes:
+                type: array
+                items:
+                  type: string
+              sources:
+                type: object
+        project_modifiers:
+          type: object
+          properties:
+            amend:
+              description: "Object overwriting original project attributes"
+              type: object
+            import:
+              description: "List of external PEP project config files to import"
+              type: array
+              items:
+                type: string
+    required:
+      - pep_version
+  samples:
+    type: array
+    items:
+      type: object
+      properties:
+        sample_name: 
+          type: string
+          pattern: "^\\S*$"
+          description: "Unique name of the sample with no whitespace"
+      required:
+        - sample_name
+required:
+  - samples
diff --git a/tests/schemas/namespace1/2.1.0.yaml b/tests/schemas/namespace1/2.1.0.yaml
new file mode 100644
index 0000000..e3982aa
--- /dev/null
+++ b/tests/schemas/namespace1/2.1.0.yaml
@@ -0,0 +1,77 @@
+description: "Schema for a minimal PEP"
+version: "2.1.0"
+properties:
+  config:
+    properties:
+      name: 
+        type: string
+        pattern: "^\\S*$"
+        description: "Project name with no whitespace"
+      pep_version:
+        description: "Version of the PEP Schema this PEP follows"
+        type: string
+      sample_table:
+        type: string
+        description: "Path to the sample annotation table"
+      subsample_table:
+        type: string
+        description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table"
+      sample_table_index: 
+        type: string
+        pattern: "^\\S*$"
+        description: "Name of the column in sample table to use as an index. It's 'sample_name' by default"
+      subsample_table_index: 
+        type: array
+        items:
+          type: string
+          pattern: "^\\S*$"
+        description: "Names of the columns in subsample table to use as an index. It's ['sample_name', 'subsample_name'] by default"
+      sample_modifiers:
+        type: object
+        properties:
+          append:
+            type: object
+          duplicate:
+            type: object
+          imply:
+            type: array
+            items:
+              type: object
+              properties:
+                if:
+                  type: object
+                then:
+                  type: object
+          derive:
+            type: object
+            properties:
+              attributes:
+                type: array
+                items:
+                  type: string
+              sources:
+                type: object
+        project_modifiers:
+          type: object
+          properties:
+            amend:
+              description: "Object overwriting original project attributes"
+              type: object
+            import:
+              description: "List of external PEP project config files to import"
+              type: array
+              items:
+                type: string
+    required:
+      - pep_version
+  samples:
+    type: array
+    items:
+      type: object
+      properties:
+        sample_name: 
+          type: string
+          pattern: "^\\S*$"
+          description: "Unique name of the sample with no whitespace"
+required:
+  - samples
diff --git a/tests/schemas/namespace2/bedboss.yaml b/tests/schemas/namespace2/bedboss.yaml
new file mode 100644
index 0000000..e6ffa0c
--- /dev/null
+++ b/tests/schemas/namespace2/bedboss.yaml
@@ -0,0 +1,47 @@
+description: bedboss run-all pep schema
+
+properties:
+  samples:
+    type: array
+    items:
+      type: object
+      properties:
+        sample_name: 
+          type: string
+          description: "Name of the sample"
+        input_file:
+          type: string
+          description: "Absolute path to the input file"
+        input_type:
+          type: string
+          description: "file format"
+          enum: [ "bigWig", "bigBed", "bed", "wig", "bedGraph" ]
+        genome:
+          type: string
+          description: "organism genome code"
+        format_type:
+          type: string
+          description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)"
+          enum: [ "narrowPeak", "broadPeak" ]
+        description:
+          type: string
+          description: "freeform description of the sample"
+        open_signal_matrix:
+          type: string
+          description: "A full path to the openSignalMatrix required for the tissue"
+        chrom_sizes:
+          type: string
+          description: "A full path to the chrom.sizes required for the bedtobigbed conversion"
+        treatment:
+          type: string
+          description: "freeform description of the sample treatment"
+        cell_type:
+          type: string
+          description: "cell type code"
+      required:
+        - sample_name
+        - input_file
+        - input_type
+        - genome
+required:
+  - samples
diff --git a/tests/schemas/namespace2/bedbuncher.yaml b/tests/schemas/namespace2/bedbuncher.yaml
new file mode 100644
index 0000000..cd42998
--- /dev/null
+++ b/tests/schemas/namespace2/bedbuncher.yaml
@@ -0,0 +1,25 @@
+description: bedbuncher PEP schema
+imports: 
+  - http://schema.databio.org/pep/2.0.0.yaml
+
+properties:
+  samples:
+    type: array
+    items:
+      type: object
+      properties:
+        JSONquery_path: 
+          type: string
+          description: "path to the JSON file with the Elasticsearch query"
+        bedset_name:
+          type: string
+          pattern: "^\\S*$"
+          description: "name of the bedset that will be created"
+        bbconfig_path:
+          type: string
+          description: "path to bedbase config file"
+      required:
+        - JSONquery_path
+        - bedset_name
+required:
+  - samples
\ No newline at end of file
diff --git a/tests/schemas/namespace2/bedmaker.yaml b/tests/schemas/namespace2/bedmaker.yaml
new file mode 100644
index 0000000..93806d0
--- /dev/null
+++ b/tests/schemas/namespace2/bedmaker.yaml
@@ -0,0 +1,59 @@
+description: bedmaker PEP schema
+
+properties:
+  samples:
+    type: array
+    items:
+      type: object
+      properties:
+        sample_name: 
+          type: string
+          description: "name of the sample, which is the name of the output BED file"
+        input_file_path: 
+          type: string
+          description: "absolute path the file to convert"
+        output_bed_path: 
+          type: string
+          description: "absolute path the file to the output BED file (derived attribute)"
+        output_bigbed_path: 
+          type: string
+          description: "absolute path the file to the output bigBed file (derived attribute)"
+        genome:
+          type: string
+          description: "organism genome code"
+        narrowpeak:
+          type: boolean
+          description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)"
+        format:
+          type: string
+          description: "file format"
+          enum: ["bigWig", "bigBed", "bed", "wig", "bedGraph"]
+        cell_type:
+          type: string
+          description: "cell type code"
+        antibody:
+          type: string
+          description: "antibody used if ChIP-seq experiment"
+        description:
+          type: string
+          description: "freeform description of the sample"
+        exp_protocol:
+          type: string
+          description: "type of the experiment the file was generated in"
+        data_source:
+          type: string
+          description: "source of the sample, preferably a GSE* code"
+        treatment:
+          type: string
+          description: "freeform description of the sample treatment"
+      required_files:
+        - input_file_path
+      required:
+        - input_file_path
+        - output_bed_path
+        - output_bigbed_path
+        - genome
+        - narrowpeak
+        - sample_name
+required:
+  - samples
diff --git a/tests/test_schema.py b/tests/test_schema.py
index bc86c34..f09625c 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -7,4 +7,213 @@
     not PEPDBAgentContextManager().db_setup(),
     reason="DB is not setup",
 )
-class TestSamples: ...
+class TestSamples:
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_get(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            schema = agent.schema.get(namespace=namespace, name=name)
+            assert agent.schema.exist(namespace=namespace, name=name)
+            assert schema
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_delete(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            assert agent.schema.exist(namespace=namespace, name=name)
+            agent.schema.delete(namespace=namespace, name=name)
+            assert not agent.schema.exist(namespace=namespace, name=name)
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_update(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            schema = agent.schema.get(namespace=namespace, name=name)
+            schema["new"] = "hello"
+            agent.schema.update(namespace=namespace, name=name, schema=schema)
+            assert agent.schema.exist(namespace=namespace, name=name)
+            assert schema == agent.schema.get(namespace=namespace, name=name)
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_get_annotation(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            schema_annot = agent.schema.info(namespace=namespace, name=name)
+            assert schema_annot
+            assert schema_annot.model_fields_set == {
+                "namespace",
+                "name",
+                "last_update_date",
+                "submission_date",
+                "description",
+                "popularity_number",
+            }
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_update_annotation(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            schema_annot = agent.schema.info(namespace=namespace, name=name)
+            schema = agent.schema.get(namespace=namespace, name=name)
+            agent.schema.update(
+                namespace=namespace, name=name, schema=schema, description="new desc"
+            )
+            assert schema_annot != agent.schema.info(namespace=namespace, name=name)
+
+    @pytest.mark.skip("")
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_annotation_popular(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            ...
+            # TODO: implement this feature
+
+    def test_search(self):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            results = agent.schema.search(namespace="namespace2")
+            assert results
+            assert results.count == 3
+            assert len(results.results) == 3
+
+    def test_search_offset(self):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            results = agent.schema.search(namespace="namespace2", offset=1)
+            assert results
+            assert results.count == 3
+            assert len(results.results) == 2
+
+    def test_search_limit(self):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            results = agent.schema.search(namespace="namespace2", limit=1)
+            assert results
+            assert results.count == 3
+            assert len(results.results) == 1
+
+    def test_search_limit_offset(self):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            results = agent.schema.search(namespace="namespace2", limit=2, offset=2)
+            assert results
+            assert results.count == 3
+            assert len(results.results) == 1
+
+    def test_search_query(self):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            results = agent.schema.search(namespace="namespace2", search_str="bedb")
+            assert results
+            assert results.count == 2
+            assert len(results.results) == 2
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_create_group(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            group_name = "new_group"
+            agent.schema.group_create(
+                namespace=namespace, name=group_name, description="new group"
+            )
+            assert agent.schema.group_exist(namespace=namespace, name=group_name)
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_delete_group(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            group_name = "new_group"
+            agent.schema.group_create(
+                namespace=namespace, name=group_name, description="new group"
+            )
+            assert agent.schema.group_exist(namespace=namespace, name=group_name)
+            agent.schema.group_delete(namespace=namespace, name=group_name)
+            assert not agent.schema.group_exist(namespace=namespace, name=group_name)
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_add_to_group(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            group_name = "new_group"
+            agent.schema.group_create(
+                namespace=namespace, name=group_name, description="new group"
+            )
+            agent.schema.group_add_schema(
+                namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace
+            )
+            group_annot = agent.schema.group_get(namespace=namespace, name=group_name)
+            assert group_annot.schemas[0].name == name
+
+    @pytest.mark.parametrize(
+        "namespace, name",
+        [
+            ["namespace1", "2.0.0"],
+        ],
+    )
+    def test_remove_from_group(self, namespace, name):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            group_name = "new_group"
+            agent.schema.group_create(
+                namespace=namespace, name=group_name, description="new group"
+            )
+            agent.schema.group_add_schema(
+                namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace
+            )
+            group_annot = agent.schema.group_get(namespace=namespace, name=group_name)
+            assert len(group_annot.schemas) == 1
+
+            agent.schema.group_remove_schema(
+                namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace
+            )
+            group_annot = agent.schema.group_get(namespace=namespace, name=group_name)
+            assert len(group_annot.schemas) == 0
+
+    def test_search_group(self):
+        with PEPDBAgentContextManager(add_schemas=True) as agent:
+            group_name1 = "new_group1"
+            group_name2 = "new2"
+            group_name3 = "new_group3"
+            agent.schema.group_create(
+                namespace="namespace1", name=group_name1, description="new group"
+            )
+            agent.schema.group_create(namespace="namespace1", name=group_name2, description="new")
+            agent.schema.group_create(
+                namespace="namespace1", name=group_name3, description="new group"
+            )
+
+            results = agent.schema.group_search(search_str="new_group")
+
+            assert results.count == 2
+            assert len(results.results) == 2
diff --git a/tests/utils.py b/tests/utils.py
index 8ddc820..b96192a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,5 +1,6 @@
 import os
 import warnings
+import yaml
 
 import peppy
 from sqlalchemy.exc import OperationalError
@@ -8,12 +9,21 @@
 
 DSN = "postgresql+psycopg://postgres:pass8743hf9h23f87h437@localhost:5432/pep-db"
 
-DATA_PATH = os.path.join(
+TESTS_PATH = os.path.join(
     os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
     "tests",
+)
+
+DATA_PATH = os.path.join(
+    TESTS_PATH,
     "data",
 )
 
+SCHEMAS_PATH = os.path.join(
+    TESTS_PATH,
+    "schemas",
+)
+
 
 def get_path_to_example_file(namespace: str, project_name: str) -> str:
     """
@@ -22,6 +32,13 @@ def get_path_to_example_file(namespace: str, project_name: str) -> str:
     return os.path.join(DATA_PATH, namespace, project_name, "project_config.yaml")
 
 
+def get_path_to_example_schema(namespace: str, schema_name: str) -> str:
+    """
+    Get path to example schema
+    """
+    return os.path.join(SCHEMAS_PATH, namespace, schema_name)
+
+
 def list_of_available_peps() -> dict:
     pep_namespaces = os.listdir(DATA_PATH)
     projects = {}
@@ -31,12 +48,29 @@ def list_of_available_peps() -> dict:
     return projects
 
 
+def list_of_available_schemas() -> dict:
+    schema_namespaces = os.listdir(SCHEMAS_PATH)
+    schemas = {}
+    for np in schema_namespaces:
+        schema_name = os.listdir(os.path.join(SCHEMAS_PATH, np))
+        schemas[np] = {p: get_path_to_example_schema(np, p) for p in schema_name}
+    return schemas
+
+
+def read_yaml_file(file_path: str) -> dict:
+    """
+    Read yaml file
+    """
+    with open(file_path, "r") as file:
+        return yaml.safe_load(file)
+
+
 class PEPDBAgentContextManager:
     """
     Class with context manager to connect to database. Adds data and drops everything from the database upon exit to ensure.
     """
 
-    def __init__(self, url: str = DSN, add_data: bool = False):
+    def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=False, echo=False):
         """
         :param url: database url e.g. "postgresql+psycopg://postgres:docker@localhost:5432/pep-db"
         :param add_data: add data to the database
@@ -44,7 +78,9 @@ def __init__(self, url: str = DSN, add_data: bool = False):
 
         self.url = url
         self._agent = None
+        self._echo = echo
         self.add_data = add_data
+        self.add_schemas = add_schemas
 
     def __enter__(self):
         self._agent = PEPDatabaseAgent(dsn=self.url, echo=False)
@@ -52,13 +88,15 @@ def __enter__(self):
         self.db_engine.create_schema()
         if self.add_data:
             self._insert_data()
+        if self.add_schemas:
+            self._add_schemas()
         return self._agent
 
     def __exit__(self, exc_type, exc_value, exc_traceback):
         self.db_engine.delete_schema()
 
     def _insert_data(self):
-        pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=True)
+        pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=self._echo)
         for namespace, item in list_of_available_peps().items():
             if namespace == "private_test":
                 private = True
@@ -76,6 +114,14 @@ def _insert_data(self):
                     pep_schema="random_schema_name",
                 )
 
+    def _add_schemas(self):
+        pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=self._echo)
+        for namespace, item in list_of_available_schemas().items():
+            for name, path in item.items():
+                file_dict = read_yaml_file(path)
+
+                pepdb_con.schema.create(namespace=namespace, name=name[0:-5], schema=file_dict)
+
     @property
     def agent(self) -> PEPDatabaseAgent:
         return self._agent

From 0e1dc5f11fa29744f7627a9ee73f2e95432c9c5c Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 23 Jul 2024 16:47:24 -0400
Subject: [PATCH 10/15] connected projects to schemas

---
 pepdbagent/models.py             |  1 +
 pepdbagent/modules/annotation.py | 22 ++++++++---
 pepdbagent/modules/project.py    | 68 +++++++++++++++++++++++++++++---
 pepdbagent/utils.py              | 13 ++++++
 tests/test_updates.py            | 25 +++++++++++-
 tests/utils.py                   |  8 ++--
 6 files changed, 121 insertions(+), 16 deletions(-)

diff --git a/pepdbagent/models.py b/pepdbagent/models.py
index 6270f4e..78269fb 100644
--- a/pepdbagent/models.py
+++ b/pepdbagent/models.py
@@ -99,6 +99,7 @@ class UpdateItems(BaseModel):
     samples: Optional[List[dict]] = None
     subsamples: Optional[List[List[dict]]] = None
     pop: Optional[bool] = None
+    schema_id: Optional[int] = None
 
     model_config = ConfigDict(
         arbitrary_types_allowed=True,
diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py
index 2b30ccb..c9c7612 100644
--- a/pepdbagent/modules/annotation.py
+++ b/pepdbagent/modules/annotation.py
@@ -211,7 +211,11 @@ def _get_single_annotation(
                     submission_date=str(query_result.submission_date),
                     last_update_date=str(query_result.last_update_date),
                     digest=query_result.digest,
-                    pep_schema=query_result.pep_schema,
+                    pep_schema=(
+                        f"{query_result.schema_mapping.namespace}/{query_result.schema_mapping.name}"
+                        if query_result.schema_mapping
+                        else None
+                    ),
                     pop=query_result.pop,
                     stars_number=query_result.number_of_stars,
                     forked_from=(
@@ -342,7 +346,11 @@ def _get_projects(
                         submission_date=str(result.submission_date),
                         last_update_date=str(result.last_update_date),
                         digest=result.digest,
-                        pep_schema=result.pep_schema,
+                        pep_schema=(
+                            f"{result.schema_mapping.namespace}/{result.schema_mapping.name}"
+                            if result.schema_mapping
+                            else None
+                        ),
                         pop=result.pop,
                         stars_number=result.number_of_stars,
                         forked_from=(
@@ -538,9 +546,9 @@ def get_by_rp_list(
             statement = select(Projects).where(or_(*or_statement_list))
             anno_results = []
             with Session(self._sa_engine) as session:
-                query_result = session.execute(statement).all()
+                query_result = session.scalars(statement)
                 for result in query_result:
-                    project_obj = result[0]
+                    project_obj = result
                     annot = AnnotationModel(
                         namespace=project_obj.namespace,
                         name=project_obj.name,
@@ -551,7 +559,11 @@ def get_by_rp_list(
                         submission_date=str(project_obj.submission_date),
                         last_update_date=str(project_obj.last_update_date),
                         digest=project_obj.digest,
-                        pep_schema=project_obj.pep_schema,
+                        pep_schema=(
+                            f"{project_obj.schema_mapping.namespace}/{project_obj.schema_mapping.name}"
+                            if project_obj.schema_mapping
+                            else None
+                        ),
                         pop=project_obj.pop,
                         stars_number=project_obj.number_of_stars,
                         forked_from=(
diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py
index c4348f1..2ac4303 100644
--- a/pepdbagent/modules/project.py
+++ b/pepdbagent/modules/project.py
@@ -34,6 +34,7 @@
     Subsamples,
     UpdateTypes,
     User,
+    Schemas,
 )
 from pepdbagent.exceptions import (
     HistoryNotFoundError,
@@ -42,6 +43,7 @@
     ProjectNotFoundError,
     ProjectUniqueNameError,
     SampleTableUpdateError,
+    SchemaDoesNotExistError,
 )
 from pepdbagent.models import (
     HistoryAnnotationModel,
@@ -50,7 +52,13 @@
     UpdateItems,
     UpdateModel,
 )
-from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter
+from pepdbagent.utils import (
+    create_digest,
+    generate_guid,
+    order_samples,
+    registry_path_converter,
+    schema_path_converter,
+)
 
 _LOGGER = logging.getLogger(PKG_NAME)
 
@@ -314,7 +322,7 @@ def create(
         :param name: name of the project (Default: name is taken from the project object)
         :param tag: tag (or version) of the project.
         :param is_private: boolean value if the project should be visible just for user that creates it.
-        :param pep_schema: assign PEP to a specific schema. [Default: None]
+        :param pep_schema: assign PEP to a specific schema. Example: 'namespace/name' [Default: None]
         :param pop: if project is a pep of peps (POP) [Default: False]
         :param overwrite: if project exists overwrite the project, otherwise upload it.
             [Default: False - project won't be overwritten if it exists in db]
@@ -356,6 +364,24 @@ def create(
         except AttributeError:
             number_of_samples = len(proj_dict[SAMPLE_RAW_DICT_KEY])
 
+        if pep_schema:
+            schema_namespace, schema_name = schema_path_converter(pep_schema)
+            with Session(self._sa_engine) as session:
+                schema_mapping = session.scalar(
+                    select(Schemas).where(
+                        and_(
+                            Schemas.namespace == schema_namespace,
+                            Schemas.name == schema_name,
+                        )
+                    )
+                )
+                if not schema_mapping:
+                    raise SchemaDoesNotExistError(
+                        f"Schema {schema_namespace}/{schema_name} does not exist. "
+                        f"Project won't be uploaded."
+                    )
+                pep_schema = schema_mapping.id
+
         if update_only:
             _LOGGER.info(f"Update_only argument is set True. Updating project {proj_name} ...")
             self._overwrite(
@@ -384,7 +410,8 @@ def create(
                     private=is_private,
                     submission_date=datetime.datetime.now(datetime.timezone.utc),
                     last_update_date=datetime.datetime.now(datetime.timezone.utc),
-                    pep_schema=pep_schema,
+                    # pep_schema=pep_schema,
+                    schema_id=pep_schema,
                     description=description,
                     pop=pop,
                 )
@@ -447,7 +474,7 @@ def _overwrite(
         project_digest: str,
         number_of_samples: int,
         private: bool = False,
-        pep_schema: str = None,
+        pep_schema: int = None,
         description: str = "",
         pop: bool = False,
     ) -> None:
@@ -483,7 +510,8 @@ def _overwrite(
                     found_prj.digest = project_digest
                     found_prj.number_of_samples = number_of_samples
                     found_prj.private = private
-                    found_prj.pep_schema = pep_schema
+                    # found_prj.pep_schema = pep_schema
+                    found_prj.schema_id = pep_schema
                     found_prj.config = project_dict[CONFIG_KEY]
                     found_prj.description = description
                     found_prj.last_update_date = datetime.datetime.now(datetime.timezone.utc)
@@ -577,6 +605,8 @@ def update(
                         f"Pep {namespace}/{name}:{tag} was not found. No items will be updated!"
                     )
 
+                self._convert_update_schema_id(session, update_values)
+
                 for k, v in update_values.items():
                     if getattr(found_prj, k) != v:
                         setattr(found_prj, k, v)
@@ -647,6 +677,34 @@ def update(
         else:
             raise ProjectNotFoundError("No items will be updated!")
 
+    @staticmethod
+    def _convert_update_schema_id(session: Session, update_values: dict):
+        """
+        Convert schema path to schema_id in update_values and update it in update dict
+
+
+        :param session: open session object
+        :param update_values: dict with update key->values
+
+        return None
+        """
+        if "pep_schema" in update_values:
+            schema_namespace, schema_name = schema_path_converter(update_values["pep_schema"])
+            schema_mapping = session.scalar(
+                select(Schemas).where(
+                    and_(
+                        Schemas.namespace == schema_namespace,
+                        Schemas.name == schema_name,
+                    )
+                )
+            )
+            if not schema_mapping:
+                raise SchemaDoesNotExistError(
+                    f"Schema {schema_namespace}/{schema_name} does not exist. "
+                    f"Project won't be updated."
+                )
+            update_values["schema_id"] = schema_mapping.id
+
     def _update_samples(
         self,
         project_id: int,
diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py
index cbc596b..2c4f990 100644
--- a/pepdbagent/utils.py
+++ b/pepdbagent/utils.py
@@ -80,6 +80,19 @@ def registry_path_converter(registry_path: str) -> Tuple[str, str, str]:
     raise RegistryPathError(f"Error in: '{registry_path}'")
 
 
+def schema_path_converter(schema_path: str) -> Tuple[str, str]:
+    """
+    Convert schema path to namespace, name
+
+    :param schema_path: schema path that has structure: "namespace/name.yaml"
+    :return: tuple(namespace, name)
+    """
+    if "/" in schema_path:
+        namespace, name = schema_path.split("/")
+        return namespace, name
+    raise RegistryPathError(f"Error in: '{schema_path}'")
+
+
 def tuple_converter(value: Union[tuple, list, str, None]) -> tuple:
     """
     Convert string list or tuple to tuple.
diff --git a/tests/test_updates.py b/tests/test_updates.py
index 790b313..e057024 100644
--- a/tests/test_updates.py
+++ b/tests/test_updates.py
@@ -89,6 +89,27 @@ def test_update_project_description(self, namespace, name, new_description):
                 == new_description
             )
 
+    @pytest.mark.parametrize(
+        "namespace, name, new_schema",
+        [
+            ["namespace1", "amendments1", "bedboss"],
+            ["namespace2", "derive", "bedboss"],
+        ],
+    )
+    def test_update_project_schema(self, namespace, name, new_schema):
+        with PEPDBAgentContextManager(add_data=True) as agent:
+            prj_annot = agent.annotation.get(namespace=namespace, name=name)
+            assert prj_annot.results[0].pep_schema == "namespace1/2.0.0"
+
+            agent.project.update(
+                namespace=namespace,
+                name=name,
+                tag="default",
+                update_dict={"pep_schema": "namespace2/bedboss"},
+            )
+            prj_annot = agent.annotation.get(namespace=namespace, name=name)
+            assert prj_annot.results[0].pep_schema == "namespace2/bedboss"
+
     @pytest.mark.parametrize(
         "namespace, name, new_description",
         [
@@ -134,8 +155,8 @@ def test_update_whole_project(self, namespace, name):
     @pytest.mark.parametrize(
         "namespace, name, pep_schema",
         [
-            ["namespace1", "amendments1", "schema1"],
-            ["namespace2", "derive", "schema3"],
+            ["namespace1", "amendments1", "namespace2/bedmaker"],
+            ["namespace2", "derive", "namespace2/bedbuncher"],
         ],
     )
     def test_update_pep_schema(self, namespace, name, pep_schema):
diff --git a/tests/utils.py b/tests/utils.py
index b96192a..9a325d4 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -70,7 +70,7 @@ class PEPDBAgentContextManager:
     Class with context manager to connect to database. Adds data and drops everything from the database upon exit to ensure.
     """
 
-    def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=False, echo=False):
+    def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=True, echo=False):
         """
         :param url: database url e.g. "postgresql+psycopg://postgres:docker@localhost:5432/pep-db"
         :param add_data: add data to the database
@@ -86,10 +86,10 @@ def __enter__(self):
         self._agent = PEPDatabaseAgent(dsn=self.url, echo=False)
         self.db_engine = self._agent.pep_db_engine
         self.db_engine.create_schema()
-        if self.add_data:
-            self._insert_data()
         if self.add_schemas:
             self._add_schemas()
+        if self.add_data:
+            self._insert_data()
         return self._agent
 
     def __exit__(self, exc_type, exc_value, exc_traceback):
@@ -111,7 +111,7 @@ def _insert_data(self):
                     is_private=private,
                     project=prj,
                     overwrite=True,
-                    pep_schema="random_schema_name",
+                    pep_schema="namespace1/2.0.0",
                 )
 
     def _add_schemas(self):

From 6681748e74e92fbdb40213a0a1d9ae9d0a3e722a Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 23 Jul 2024 16:51:40 -0400
Subject: [PATCH 11/15] cleaning, restructuring

---
 pepdbagent/modules/project.py |  2 +-
 pepdbagent/modules/schema.py  | 54 ++++++-----------------------------
 pepdbagent/pepdbagent.py      |  2 +-
 scripts/update_db.py          | 27 ++++++++++++++++++
 tests/test_namespace.py       |  2 +-
 tests/test_schema.py          | 14 +++++----
 tests/utils.py                |  2 +-
 7 files changed, 48 insertions(+), 55 deletions(-)
 create mode 100644 scripts/update_db.py

diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py
index 2ac4303..d7cb181 100644
--- a/pepdbagent/modules/project.py
+++ b/pepdbagent/modules/project.py
@@ -31,10 +31,10 @@
     HistorySamples,
     Projects,
     Samples,
+    Schemas,
     Subsamples,
     UpdateTypes,
     User,
-    Schemas,
 )
 from pepdbagent.exceptions import (
     HistoryNotFoundError,
diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index 4091891..71ccd0f 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -1,52 +1,26 @@
-import datetime
-import json
 import logging
-from typing import Dict, List, NoReturn, Union
-
-import numpy as np
-import peppy
-from peppy.const import (
-    CONFIG_KEY,
-    SAMPLE_NAME_ATTR,
-    SAMPLE_RAW_DICT_KEY,
-    SAMPLE_TABLE_INDEX_KEY,
-    SUBSAMPLE_RAW_LIST_KEY,
-)
-from sqlalchemy import Select, and_, delete, select, or_, func
+
+from sqlalchemy import Select, and_, delete, func, or_, select
+from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session
 from sqlalchemy.orm.attributes import flag_modified
-from sqlalchemy.exc import IntegrityError, NoResultFound
-
-from pepdbagent.const import (
-    DEFAULT_TAG,
-    DESCRIPTION_KEY,
-    MAX_HISTORY_SAMPLES_NUMBER,
-    NAME_KEY,
-    PEPHUB_SAMPLE_ID_KEY,
-    PKG_NAME,
-)
-from pepdbagent.db_utils import (
-    BaseEngine,
-    Schemas,
-    SchemaGroups,
-    SchemaGroupRelations,
-    User,
-)
+
+from pepdbagent.const import PKG_NAME
+from pepdbagent.db_utils import BaseEngine, SchemaGroupRelations, SchemaGroups, Schemas, User
 from pepdbagent.exceptions import (
     SchemaAlreadyExistsError,
+    SchemaAlreadyInGroupError,
     SchemaDoesNotExistError,
     SchemaGroupAlreadyExistsError,
     SchemaGroupDoesNotExistError,
-    SchemaAlreadyInGroupError,
     SchemaIsNotInGroupError,
 )
 from pepdbagent.models import (
     SchemaAnnotation,
-    SchemaSearchResult,
     SchemaGroupAnnotation,
     SchemaGroupSearchResult,
+    SchemaSearchResult,
 )
-from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter
 
 _LOGGER = logging.getLogger(PKG_NAME)
 
@@ -569,18 +543,6 @@ def group_remove_schema(
 
         try:
             with Session(self._sa_engine) as session:
-
-                a = session.scalar(
-                    select(Schemas).where(
-                        and_(Schemas.namespace == schema_namespace, Schemas.name == schema_name)
-                    )
-                )
-                b = session.scalar(
-                    select(SchemaGroups).where(
-                        and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name)
-                    )
-                )
-
                 delete_statement = delete(SchemaGroupRelations).where(
                     and_(
                         SchemaGroupRelations.schema_id
diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py
index dc64cb9..6365af1 100644
--- a/pepdbagent/pepdbagent.py
+++ b/pepdbagent/pepdbagent.py
@@ -4,9 +4,9 @@
 from pepdbagent.modules.namespace import PEPDatabaseNamespace
 from pepdbagent.modules.project import PEPDatabaseProject
 from pepdbagent.modules.sample import PEPDatabaseSample
+from pepdbagent.modules.schema import PEPDatabaseSchema
 from pepdbagent.modules.user import PEPDatabaseUser
 from pepdbagent.modules.view import PEPDatabaseView
-from pepdbagent.modules.schema import PEPDatabaseSchema
 
 
 class PEPDatabaseAgent(object):
diff --git a/scripts/update_db.py b/scripts/update_db.py
new file mode 100644
index 0000000..1cc4fe4
--- /dev/null
+++ b/scripts/update_db.py
@@ -0,0 +1,27 @@
+import os
+
+from dotenv import load_dotenv
+from tqdm import tqdm
+
+from pepdbagent import PEPDatabaseAgent
+
+load_dotenv()
+
+
+def update():
+    agent = PEPDatabaseAgent(
+        user=os.environ.get("POSTGRES_USER"),
+        password=os.environ.get("POSTGRES_PASSWORD"),
+        host=os.environ.get("POSTGRES_HOST"),
+        database=os.environ.get("POSTGRES_DB"),
+        # port=os.environ.get("POSTGRES_PORT"),
+    )
+
+    if_list = agent.update.get_namespace_projects("geo")
+
+    for i in tqdm(if_list, desc="Updating projects"):
+        agent.update.update_parent_project(i)
+
+
+if __name__ == "__main__":
+    update()
diff --git a/tests/test_namespace.py b/tests/test_namespace.py
index 196f6e8..d4fd309 100644
--- a/tests/test_namespace.py
+++ b/tests/test_namespace.py
@@ -170,7 +170,7 @@ class TestUser:
     def test_create_user(self):
         with PEPDBAgentContextManager(add_data=True) as agent:
 
-            user = agent.user.create_user("test_user")
+            agent.user.create_user("test_user")
 
             assert agent.user.exists("test_user")
 
diff --git a/tests/test_schema.py b/tests/test_schema.py
index f09625c..52e329c 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -81,17 +81,21 @@ def test_update_annotation(self, namespace, name):
             )
             assert schema_annot != agent.schema.info(namespace=namespace, name=name)
 
-    @pytest.mark.skip("")
     @pytest.mark.parametrize(
         "namespace, name",
         [
-            ["namespace1", "2.0.0"],
+            ["namespace2", "bedboss"],
         ],
     )
     def test_annotation_popular(self, namespace, name):
-        with PEPDBAgentContextManager(add_schemas=True) as agent:
-            ...
-            # TODO: implement this feature
+        with PEPDBAgentContextManager(add_data=True, add_schemas=True) as agent:
+            agent.project.update(
+                namespace="namespace1",
+                name="amendments1",
+                update_dict={"pep_schema": "namespace2/bedboss"},
+            )
+            schema_annot = agent.schema.info(namespace=namespace, name=name)
+            assert schema_annot.popularity_number == 1
 
     def test_search(self):
         with PEPDBAgentContextManager(add_schemas=True) as agent:
diff --git a/tests/utils.py b/tests/utils.py
index 9a325d4..2afc623 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,8 +1,8 @@
 import os
 import warnings
-import yaml
 
 import peppy
+import yaml
 from sqlalchemy.exc import OperationalError
 
 from pepdbagent import PEPDatabaseAgent

From 4cab3d7f582543ffaf5f342edd388778bce929b1 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 23 Jul 2024 16:53:11 -0400
Subject: [PATCH 12/15] deleted unused file

---
 scripts/update_db.py | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 scripts/update_db.py

diff --git a/scripts/update_db.py b/scripts/update_db.py
deleted file mode 100644
index 1cc4fe4..0000000
--- a/scripts/update_db.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-from tqdm import tqdm
-
-from pepdbagent import PEPDatabaseAgent
-
-load_dotenv()
-
-
-def update():
-    agent = PEPDatabaseAgent(
-        user=os.environ.get("POSTGRES_USER"),
-        password=os.environ.get("POSTGRES_PASSWORD"),
-        host=os.environ.get("POSTGRES_HOST"),
-        database=os.environ.get("POSTGRES_DB"),
-        # port=os.environ.get("POSTGRES_PORT"),
-    )
-
-    if_list = agent.update.get_namespace_projects("geo")
-
-    for i in tqdm(if_list, desc="Updating projects"):
-        agent.update.update_parent_project(i)
-
-
-if __name__ == "__main__":
-    update()

From be1d67b7e1e29455b0a2dad9744f1d87e19a4ac2 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Wed, 24 Jul 2024 11:42:39 -0400
Subject: [PATCH 13/15] warning fixws

---
 pepdbagent/modules/schema.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index 71ccd0f..bfcad3c 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -189,6 +189,11 @@ def create(
         :param update_only: update only schema if exists [Default: False]
         """
 
+        if description:
+            schema["description"] = description
+        else:
+            description = schema.get("description", "")
+
         if self.exist(namespace, name):
             if overwrite:
                 self.update(namespace, name, schema, description)
@@ -553,7 +558,7 @@ def group_remove_schema(
                                 Schemas.name == schema_name,
                             )
                         )
-                        .subquery(),
+                        .scalar_subquery(),
                         SchemaGroupRelations.group_id
                         == select(SchemaGroups.id)
                         .where(
@@ -562,7 +567,7 @@ def group_remove_schema(
                                 SchemaGroups.name == name,
                             )
                         )
-                        .subquery(),
+                        .scalar_subquery(),
                     )
                 )
 

From 25aa92a583378cca6a79965ef4cc9f8a2c7a0c92 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Wed, 24 Jul 2024 12:47:26 -0400
Subject: [PATCH 14/15] changelog

---
 docs/changelog.md      | 4 ++++
 pepdbagent/_version.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 6586c4b..204c54d 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -2,6 +2,10 @@
 
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
 
+## [0.11.0] -- 2024-07-24
+- Added validation schemas
+
+
 ## [0.10.0] -- 2024-07-18
 - Added user delete method
 - Added project history and restoring projects
diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py
index 61fb31c..ae6db5f 100644
--- a/pepdbagent/_version.py
+++ b/pepdbagent/_version.py
@@ -1 +1 @@
-__version__ = "0.10.0"
+__version__ = "0.11.0"

From f3265be3999632fa3d95d68b16864abaa98a0d3c Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Wed, 24 Jul 2024 13:25:28 -0400
Subject: [PATCH 15/15] added order by to schemas

---
 pepdbagent/modules/schema.py | 47 +++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py
index bfcad3c..4294476 100644
--- a/pepdbagent/modules/schema.py
+++ b/pepdbagent/modules/schema.py
@@ -92,7 +92,13 @@ def info(self, namespace: str, name: str) -> SchemaAnnotation:
             )
 
     def search(
-        self, namespace: str = None, search_str: str = "", limit: int = 100, offset: int = 0
+        self,
+        namespace: str = None,
+        search_str: str = "",
+        limit: int = 100,
+        offset: int = 0,
+        order_by: str = "update_date",
+        order_desc: bool = False,
     ) -> SchemaSearchResult:
         """
         Search schemas in the database.
@@ -101,6 +107,10 @@ def search(
         :param search_str: query string. [Default: ""]. If empty, return all schemas
         :param limit: limit number of schemas [Default: 100]
         :param offset: offset number of schemas [Default: 0]
+        :param order_by: sort the result-set by the information
+            Options: ["name", "update_date", "submission_date"]
+            [Default: update_date]
+        :param order_desc: Sort the records in descending order. [Default: False]
 
         :return: list of schema dicts
         """
@@ -108,6 +118,7 @@ def search(
         statement = select(Schemas)
         statement = self._add_condition(statement, namespace, search_str)
         statement = statement.limit(limit).offset(offset)
+        statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc)
 
         return_list = []
 
@@ -151,6 +162,40 @@ def _count_search(self, namespace: str = None, search_str: str = "") -> int:
 
             return result[0]
 
+    @staticmethod
+    def _add_order_by_keyword(
+        statement: Select, by: str = "update_date", desc: bool = False
+    ) -> Select:
+        """
+        Add order by clause to sqlalchemy statement
+
+        :param statement: sqlalchemy representation of a SELECT statement.
+        :param by: sort the result-set by the information
+            Options: ["name", "update_date", "submission_date"]
+            [Default: "update_date"]
+        :param desc: Sort the records in descending order. [Default: False]
+        :return: sqlalchemy representation of a SELECT statement with order by keyword
+        """
+        if by == "update_date":
+            order_by_obj = Schemas.last_update_date
+        elif by == "name":
+            order_by_obj = Schemas.name
+        elif by == "submission_date":
+            order_by_obj = Schemas.submission_date
+        else:
+            _LOGGER.warning(
+                f"order by: '{by}' statement is unavailable. Projects are sorted by 'update_date'"
+            )
+            order_by_obj = Schemas.last_update_date
+
+        if desc and by == "name":
+            order_by_obj = order_by_obj.desc()
+
+        elif by != "name" and not desc:
+            order_by_obj = order_by_obj.desc()
+
+        return statement.order_by(order_by_obj)
+
     @staticmethod
     def _add_condition(
         statement: Select,