diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4e1ef42..b120129 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,6 +1,3 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - name: Upload Python Package on: @@ -9,9 +6,10 @@ on: jobs: deploy: - + name: upload release to PyPI runs-on: ubuntu-latest - + permissions: + id-token: write steps: - uses: actions/checkout@v2 - name: Set up Python @@ -23,9 +21,7 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel - twine upload dist/* + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 847aad8..2271b3a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -45,12 +45,12 @@ agent.project.create(prj_obj, namespace, name, tag) # examples of update_dict: update_dict1 = {"is_private" = True} update_dict2 = {"is_private" = True, name = "new_name"} -update_dict3 = {"project" = prj_obj, "is_private"=True} +update_dict3 = {"project" = prj_obj, "is_private" = True} update_dict4 = {"project" = prj_obj} update_dict4 = {"tag" = "new_tag"} # after creation of the dict, update record by providing update_dict and namespace, name and tag: -agent.project.update(update_dict, namespace, name, tag) +agent.project.update(update_dict, namespace, name, tag, ) # retrieve a project agent.project.get(namespace, name, tag) diff --git a/docs/changelog.md b/docs/changelog.md index 2daa71f..b7d5430 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,15 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.7.0] -- 2023-01-17 +- Added `pop` to project table and annotation model [#107](https://github.com/pepkit/pepdbagent/issues/107) +- Added `forked_from` feature [#73](https://github.com/pepkit/pepdbagent/issues/73) +- Switched to pydantic2 [#105](https://github.com/pepkit/pepdbagent/issues/105) +- Updated requirements (psycopg2 -> psycopg3) [#102](https://github.com/pepkit/pepdbagent/issues/102) +- Added sample module that contains functionality for updating, adding, and deleting samples in the project separately [#111](https://github.com/pepkit/pepdbagent/issues/111) +- Added user and favorite tables with functionality [#104](https://github.com/pepkit/pepdbagent/issues/104) +- Updated the sample update method when updating the whole project. Following this change, samples are updated without changing the ID in the database + ## [0.6.0] -- 2023-08-24 - Added date filter to project annotation diff --git a/pepdbagent/__init__.py b/pepdbagent/__init__.py index 4098c3b..11e033b 100644 --- a/pepdbagent/__init__.py +++ b/pepdbagent/__init__.py @@ -2,8 +2,11 @@ import coloredlogs import logmuse -from ._version import __version__ -from .pepdbagent import * +from pepdbagent._version import __version__ +from pepdbagent.pepdbagent import PEPDatabaseAgent + +__all__ = ["__version__", "PEPDatabaseAgent"] + _LOGGER = logmuse.init_logger("pepdbagent") coloredlogs.install( diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 906d362..49e0fc1 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.6.0" +__version__ = "0.7.0" diff --git a/pepdbagent/const.py b/pepdbagent/const.py index 6fff364..31819c8 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -12,7 +12,7 @@ DEFAULT_LIMIT = 100 # db_dialects -POSTGRES_DIALECT = "postgresql" +POSTGRES_DIALECT = "postgresql+psycopg" DEFAULT_LIMIT_INFO = 5 diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index e758ea1..b48fb3f 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -1,11 +1,10 @@ import datetime import logging -from typing import Any, Optional, List +from typing import Optional, List from sqlalchemy import ( BigInteger, FetchedValue, - PrimaryKeyConstraint, Result, Select, String, @@ -13,7 +12,6 @@ select, TIMESTAMP, ForeignKey, - ForeignKeyConstraint, UniqueConstraint, ) from sqlalchemy.dialects.postgresql import JSON @@ -87,17 +85,41 @@ class Projects(Base): config: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue()) private: Mapped[bool] number_of_samples: Mapped[int] + number_of_stars: Mapped[int] = mapped_column(default=0) submission_date: Mapped[datetime.datetime] last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column( onupdate=deliver_update_date, default=deliver_update_date ) pep_schema: Mapped[Optional[str]] + pop: Mapped[Optional[bool]] = mapped_column(default=False) samples_mapping: Mapped[List["Samples"]] = relationship( back_populates="sample_mapping", cascade="all, delete-orphan" ) subsamples_mapping: Mapped[List["Subsamples"]] = relationship( back_populates="subsample_mapping", cascade="all, delete-orphan" ) + stars_mapping: Mapped[List["Stars"]] = relationship( + back_populates="project_mapping", cascade="all, delete-orphan" + ) + views_mapping: Mapped[List["Views"]] = relationship( + back_populates="project_mapping", cascade="all, delete-orphan" + ) + + # Self-referential relationship. The parent project is the one that was forked to create this one. + forked_from_id: Mapped[Optional[int]] = mapped_column( + ForeignKey("projects.id", ondelete="SET NULL"), nullable=True + ) + forked_from_mapping = relationship( + "Projects", + back_populates="forked_to_mapping", + remote_side=[id], + single_parent=True, + cascade="all", + ) + + forked_to_mapping = relationship( + "Projects", back_populates="forked_from_mapping", cascade="all" + ) __table_args__ = (UniqueConstraint("namespace", "name", "tag"),) @@ -113,8 +135,13 @@ class Samples(Base): sample: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue()) row_number: Mapped[int] project_id = mapped_column(ForeignKey("projects.id", ondelete="CASCADE")) + sample_name: Mapped[Optional[str]] = mapped_column() sample_mapping: Mapped["Projects"] = relationship(back_populates="samples_mapping") + views: Mapped[Optional[List["ViewSampleAssociation"]]] = relationship( + back_populates="sample", cascade="all, delete-orphan" + ) + class Subsamples(Base): """ @@ -131,6 +158,67 @@ class Subsamples(Base): subsample_mapping: Mapped["Projects"] = relationship(back_populates="subsamples_mapping") +class User(Base): + """ + User table representation in the database + """ + + __tablename__ = "users" + + id: Mapped[int] = mapped_column(primary_key=True) + namespace: Mapped[str] + stars_mapping: Mapped[List["Stars"]] = relationship( + back_populates="user_mapping", cascade="all, delete-orphan" + ) + + +class Stars(Base): + """ + FavoriteProjects table representation in the database + """ + + __tablename__ = "stars" + + user_id = mapped_column(ForeignKey("users.id", ondelete="CASCADE"), primary_key=True) + project_id = mapped_column(ForeignKey("projects.id", ondelete="CASCADE"), primary_key=True) + user_mapping: Mapped[List["User"]] = relationship(back_populates="stars_mapping") + project_mapping: Mapped["Projects"] = relationship(back_populates="stars_mapping") + + +class Views(Base): + """ + Views table representation in the database + """ + + __tablename__ = "views" + + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column() + description: Mapped[Optional[str]] + + project_id = mapped_column(ForeignKey("projects.id", ondelete="CASCADE")) + project_mapping = relationship("Projects", back_populates="views_mapping") + + samples: Mapped[List["ViewSampleAssociation"]] = relationship( + back_populates="view", cascade="all, delete-orphan" + ) + + _table_args__ = (UniqueConstraint("namespace", "project_id"),) + + +class ViewSampleAssociation(Base): + """ + Association table between views and samples + """ + + __tablename__ = "views_samples" + + sample_id = mapped_column(ForeignKey("samples.id", ondelete="CASCADE"), primary_key=True) + view_id = mapped_column(ForeignKey("views.id", ondelete="CASCADE"), primary_key=True) + sample: Mapped["Samples"] = relationship(back_populates="views") + view: Mapped["Views"] = relationship(back_populates="samples") + + class BaseEngine: """ A class with base methods, that are used in several classes. e.g. fetch_one or fetch_all diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index cd094f6..462301e 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -36,3 +36,40 @@ def __init__(self, msg=""): class FilterError(PEPDatabaseAgentError): def __init__(self, msg=""): super().__init__(f"""pepdbagent filter error. {msg}""") + + +class ProjectNotInFavorites(PEPDatabaseAgentError): + """ + Project doesn't exist in favorites + """ + + def __init__(self, msg=""): + super().__init__(f"""Project is not in favorites list. {msg}""") + + +class ProjectAlreadyInFavorites(PEPDatabaseAgentError): + """ + Project doesn't exist in favorites + """ + + def __init__(self, msg=""): + super().__init__(f"""Project is already in favorites list. {msg}""") + + +class SampleNotFoundError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""Sample does not exist. {msg}""") + + +class ViewNotFoundError(PEPDatabaseAgentError): + def __init__(self, msg=""): + super().__init__(f"""View does not exist. {msg}""") + + +class SampleAlreadyInView(PEPDatabaseAgentError): + """ + Sample is already in the view exception + """ + + def __init__(self, msg=""): + super().__init__(f"""Sample is already in the view. {msg}""") diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 0703940..a7f08b9 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -1,9 +1,8 @@ # file with pydantic models -import datetime from typing import List, Optional, Union +from pydantic import BaseModel, Field, ConfigDict, field_validator -import peppy -from pydantic import BaseModel, Extra, Field, validator +from pepdbagent.const import DEFAULT_TAG class AnnotationModel(BaseModel): @@ -21,12 +20,16 @@ class AnnotationModel(BaseModel): submission_date: Optional[str] digest: Optional[str] pep_schema: Optional[str] + pop: Optional[bool] = False + stars_number: Optional[int] = 0 + forked_from: Optional[Union[str, None]] = None - class Config: - allow_population_by_field_name = True - validate_assignment = True + model_config = ConfigDict( + validate_assignment=True, + populate_by_name=True, + ) - @validator("is_private") + @field_validator("is_private") def is_private_should_be_bool(cls, v): if not isinstance(v, bool): return False @@ -42,7 +45,7 @@ class AnnotationList(BaseModel): count: int limit: int offset: int - results: List[AnnotationModel] + results: List[Union[AnnotationModel, None]] class Namespace(BaseModel): @@ -71,20 +74,21 @@ class UpdateItems(BaseModel): Model used for updating individual items in db """ - name: Optional[str] - description: Optional[str] - tag: Optional[str] - is_private: Optional[bool] - pep_schema: Optional[str] - digest: Optional[str] - config: Optional[dict] - samples: Optional[List[dict]] - subsamples: Optional[List[List[dict]]] - description: Optional[str] - - class Config: - arbitrary_types_allowed = True - extra = Extra.forbid + name: Optional[str] = None + description: Optional[str] = None + tag: Optional[str] = None + is_private: Optional[bool] = None + pep_schema: Optional[str] = None + digest: Optional[str] = None + config: Optional[dict] = None + samples: Optional[List[dict]] = None + subsamples: Optional[List[List[dict]]] = None + pop: Optional[bool] = False + + model_config = ConfigDict( + arbitrary_types_allowed=True, + extra="forbid", + ) @property def number_of_samples(self) -> Union[int, None]: @@ -99,37 +103,36 @@ class UpdateModel(BaseModel): Model used for updating individual items and creating sql string in the code """ - config: Optional[dict] + config: Optional[dict] = None name: Optional[str] = None tag: Optional[str] = None - private: Optional[bool] = Field(alias="is_private") - digest: Optional[str] - number_of_samples: Optional[int] - pep_schema: Optional[str] + private: Optional[bool] = Field(alias="is_private", default=None) + digest: Optional[str] = None + number_of_samples: Optional[int] = None + pep_schema: Optional[str] = None description: Optional[str] = "" # last_update_date: Optional[datetime.datetime] = datetime.datetime.now(datetime.timezone.utc) + pop: Optional[bool] = False - @validator("tag", "name") + @field_validator("tag", "name") def value_must_not_be_empty(cls, v): if "" == v: return None return v - @validator("tag", "name") + @field_validator("tag", "name") def value_must_be_lowercase(cls, v): if v: return v.lower() return v - @validator("tag", "name") + @field_validator("tag", "name") def value_should_not_contain_question(cls, v): if "?" in v: return ValueError("Question mark (?) is prohibited in name and tag.") return v - class Config: - extra = Extra.forbid - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True, extra="forbid") class NamespaceInfo(BaseModel): @@ -149,3 +152,45 @@ class ListOfNamespaceInfo(BaseModel): number_of_namespaces: int limit: int results: List[NamespaceInfo] + + +class ProjectRegistryPath(BaseModel): + """ + Project Namespace + """ + + namespace: str + name: str + tag: str = DEFAULT_TAG + + +class ViewAnnotation(BaseModel): + """ + Project views model + """ + + name: str + description: Optional[str] = None + number_of_samples: int = 0 + + +class ProjectViews(BaseModel): + """ + View annotation model + """ + + namespace: str + name: str + tag: str = DEFAULT_TAG + views: List[ViewAnnotation] = [] + + +class CreateViewDictModel(BaseModel): + """ + View creation dict model + """ + + project_namespace: str + project_name: str + project_tag: str + sample_list: List[str] diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 3b7873c..0876840 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -2,9 +2,9 @@ from datetime import datetime from typing import List, Literal, Optional, Union -from sqlalchemy import Engine, and_, func, or_, select -from sqlalchemy.exc import IntegrityError +from sqlalchemy import and_, func, or_, select from sqlalchemy.sql.selectable import Select +from sqlalchemy.orm import Session from pepdbagent.const import ( DEFAULT_LIMIT, @@ -50,6 +50,7 @@ def get( filter_by: Optional[Literal["submission_date", "last_update_date"]] = None, filter_start_date: Optional[str] = None, filter_end_date: Optional[str] = None, + pep_type: Optional[Literal["pep", "pop"]] = None, ) -> AnnotationList: """ Get project annotations. @@ -77,6 +78,7 @@ def get( [Default: filter won't be used] :param filter_start_date: Filter start date. Format: "YYYY/MM/DD" :param filter_end_date: Filter end date. Format: "YYYY/MM/DD". if None: present date will be used + :param pep_type: Get pep with specified type. Options: ["pep", "pop"]. Default: None, get all peps :return: pydantic model: AnnotationList """ if all([namespace, name, tag]): @@ -94,6 +96,10 @@ def get( offset=0, results=found_annotation, ) + + if pep_type not in [None, "pep", "pop"]: + raise ValueError(f"pep_type should be one of ['pep', 'pop'], got {pep_type}") + return AnnotationList( limit=limit, offset=offset, @@ -104,6 +110,7 @@ def get( filter_by=filter_by, filter_end_date=filter_end_date, filter_start_date=filter_start_date, + pep_type=pep_type, ), results=self._get_projects( namespace=namespace, @@ -116,6 +123,7 @@ def get( filter_by=filter_by, filter_end_date=filter_end_date, filter_start_date=filter_start_date, + pep_type=pep_type, ), ) @@ -178,18 +186,7 @@ def _get_single_annotation( _LOGGER.info(f"Getting annotation of the project: '{namespace}/{name}:{tag}'") admin_tuple = tuple_converter(admin) - statement = select( - Projects.namespace, - Projects.name, - Projects.tag, - Projects.private, - Projects.description, - Projects.number_of_samples, - Projects.submission_date, - Projects.last_update_date, - Projects.digest, - Projects.pep_schema, - ).where( + statement = select(Projects).where( and_( Projects.name == name, Projects.namespace == namespace, @@ -200,25 +197,33 @@ def _get_single_annotation( ), ) ) - query_result = self._pep_db_engine.session_execute(statement).first() - - if query_result: - annot = AnnotationModel( - namespace=query_result.namespace, - name=query_result.name, - tag=query_result.tag, - is_private=query_result.private, - description=query_result.description, - number_of_samples=query_result.number_of_samples, - submission_date=str(query_result.submission_date), - last_update_date=str(query_result.last_update_date), - digest=query_result.digest, - pep_schema=query_result.pep_schema, - ) - _LOGGER.info(f"Annotation of the project '{namespace}/{name}:{tag}' has been found!") - return annot - else: - raise ProjectNotFoundError(f"Project '{namespace}/{name}:{tag}' was not found.") + with Session(self._sa_engine) as session: + query_result = session.scalar(statement) + + if query_result: + annot = AnnotationModel( + namespace=query_result.namespace, + name=query_result.name, + tag=query_result.tag, + is_private=query_result.private, + description=query_result.description, + number_of_samples=query_result.number_of_samples, + submission_date=str(query_result.submission_date), + last_update_date=str(query_result.last_update_date), + digest=query_result.digest, + pep_schema=query_result.pep_schema, + pop=query_result.pop, + stars_number=query_result.number_of_stars, + forked_from=f"{query_result.forked_from_mapping.namespace}/{query_result.forked_from_mapping.name}:{query_result.forked_from_mapping.tag}" + if query_result.forked_from_id + else None, + ) + _LOGGER.info( + f"Annotation of the project '{namespace}/{name}:{tag}' has been found!" + ) + return annot + else: + raise ProjectNotFoundError(f"Project '{namespace}/{name}:{tag}' was not found.") def _count_projects( self, @@ -228,9 +233,11 @@ def _count_projects( filter_by: Optional[Literal["submission_date", "last_update_date"]] = None, filter_start_date: Optional[str] = None, filter_end_date: Optional[str] = None, + pep_type: Optional[Literal["pep", "pop"]] = None, ) -> int: """ Count projects. [This function is related to _find_projects] + :param namespace: namespace where to search for a project :param search_str: search string. will be searched in name, tag and description information :param admin: string or list of admins [e.g. "Khoroshevskyi", or ["doc_adin","Khoroshevskyi"]] @@ -239,6 +246,8 @@ def _count_projects( [Default: filter won't be used] :param filter_start_date: Filter start date. Format: "YYYY:MM:DD" :param filter_end_date: Filter end date. Format: "YYYY:MM:DD". if None: present date will be used + :param pep_type: Get pep with specified type. Options: ["pep", "pop"]. Default: None, get all peps + :return: number of found project in specified namespace """ if admin is None: @@ -253,6 +262,8 @@ def _count_projects( statement = self._add_date_filter_if_provided( statement, filter_by, filter_start_date, filter_end_date ) + if pep_type: + statement = statement.where(Projects.pop.is_(pep_type == "pop")) result = self._pep_db_engine.session_execute(statement).first() try: @@ -272,6 +283,7 @@ def _get_projects( filter_by: Optional[Literal["submission_date", "last_update_date"]] = None, filter_start_date: Optional[str] = None, filter_end_date: Optional[str] = None, + pep_type: Optional[Literal["pep", "pop"]] = None, ) -> List[AnnotationModel]: """ Get projects by providing search string. @@ -290,24 +302,14 @@ def _get_projects( [Default: filter won't be used] :param filter_start_date: Filter start date. Format: "YYYY:MM:DD" :param filter_end_date: Filter end date. Format: "YYYY:MM:DD". if None: present date will be used + :param pep_type: Get pep with specified type. Options: ["pep", "pop"]. Default: None, get all peps :return: list of found projects with their annotations. """ _LOGGER.info(f"Running annotation search: (namespace: {namespace}, query: {search_str}.") if admin is None: admin = [] - statement = select( - Projects.namespace, - Projects.name, - Projects.tag, - Projects.private, - Projects.description, - Projects.number_of_samples, - Projects.submission_date, - Projects.last_update_date, - Projects.digest, - Projects.pep_schema, - ).select_from(Projects) + statement = select(Projects.id) statement = self._add_condition( statement, @@ -320,25 +322,35 @@ def _get_projects( ) statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) statement = statement.limit(limit).offset(offset) + if pep_type: + statement = statement.where(Projects.pop.is_(pep_type == "pop")) - query_results = self._pep_db_engine.session_execute(statement).all() + id_results = self._pep_db_engine.session_execute(statement).all() results_list = [] - for result in query_results: - results_list.append( - AnnotationModel( - namespace=result.namespace, - name=result.name, - tag=result.tag, - is_private=result.private, - description=result.description, - number_of_samples=result.number_of_samples, - submission_date=str(result.submission_date), - last_update_date=str(result.last_update_date), - digest=result.digest, - pep_schema=result.pep_schema, + with Session(self._sa_engine) as session: + for prj_ids in id_results: + result = session.scalar(select(Projects).where(Projects.id == prj_ids[0])) + + results_list.append( + AnnotationModel( + namespace=result.namespace, + name=result.name, + tag=result.tag, + is_private=result.private, + description=result.description, + number_of_samples=result.number_of_samples, + submission_date=str(result.submission_date), + last_update_date=str(result.last_update_date), + digest=result.digest, + pep_schema=result.pep_schema, + pop=result.pop, + stars_number=result.number_of_stars, + forked_from=f"{result.forked_from_mapping.namespace}/{result.forked_from_mapping.name}:{result.forked_from_mapping.tag}" + if result.forked_from_id + else None, + ) ) - ) return results_list @staticmethod @@ -445,7 +457,7 @@ def _add_date_filter_if_provided( return statement else: if filter_by: - _LOGGER.warning(f"filter_start_date was not provided, skipping filter...") + _LOGGER.warning("filter_start_date was not provided, skipping filter...") return statement def get_project_number_in_namespace( @@ -475,3 +487,88 @@ def get_project_number_in_namespace( return result[0] except IndexError: return 0 + + def get_by_rp_list( + self, + registry_paths: List[str], + admin: Union[str, List[str]] = None, + ) -> AnnotationList: + """ + Get project annotations by providing list of registry paths. + + :param registry_paths: registry path string or list of registry paths + :param admin: list of namespaces where user is admin + :return: pydantic model: AnnotationReturnModel( + limit: + offset: + count: + result: List [AnnotationModel]) + """ + admin_tuple = tuple_converter(admin) + + if isinstance(registry_paths, list): + or_statement_list = [] + for path in registry_paths: + try: + namespace, name, tag = registry_path_converter(path) + or_statement_list.append( + and_( + Projects.name == name, + Projects.namespace == namespace, + Projects.tag == tag, + or_( + Projects.namespace.in_(admin_tuple), + Projects.private.is_(False), + ), + ) + ) + except RegistryPathError as err: + _LOGGER.error(str(err), registry_paths) + continue + if not or_statement_list: + _LOGGER.error("No valid registry paths were provided!") + return AnnotationList( + count=0, + limit=len(registry_paths), + offset=0, + results=[], + ) + + statement = select(Projects).where(or_(*or_statement_list)) + anno_results = [] + with Session(self._sa_engine) as session: + query_result = session.execute(statement).all() + for result in query_result: + project_obj = result[0] + annot = AnnotationModel( + namespace=project_obj.namespace, + name=project_obj.name, + tag=project_obj.tag, + is_private=project_obj.private, + description=project_obj.description, + number_of_samples=project_obj.number_of_samples, + submission_date=str(project_obj.submission_date), + last_update_date=str(project_obj.last_update_date), + digest=project_obj.digest, + pep_schema=project_obj.pep_schema, + pop=project_obj.pop, + stars_number=project_obj.number_of_stars, + forked_from=f"{project_obj.forked_from_mapping.namespace}/{project_obj.forked_from_mapping.name}:{project_obj.forked_from_mapping.tag}" + if project_obj.forked_from_mapping + else None, + ) + anno_results.append(annot) + + found_dict = {f"{r.namespace}/{r.name}:{r.tag}": r for r in anno_results} + end_results = [found_dict.get(project) for project in registry_paths] + + return_len = len(anno_results) + return AnnotationList( + count=return_len, + limit=len(registry_paths), + offset=0, + results=end_results, + ) + + else: + return self.get_by_rp(registry_paths, admin) diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 7066650..4cab83f 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -1,5 +1,5 @@ import logging -from typing import List, Union +from typing import List, Union, Tuple from sqlalchemy import distinct, func, or_, select, text from sqlalchemy.sql.selectable import Select @@ -141,7 +141,7 @@ def _count_namespace(self, search_str: str = None, admin_nsp: tuple = tuple()) - def _add_condition( statement: Select, search_str: str = None, - admin_list: Union[str, List[str]] = None, + admin_list: Union[Tuple[str], List[str], str] = None, ) -> Select: """ Add where clause to sqlalchemy statement (in namespace search) diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 6bb8b05..8c89b54 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -1,17 +1,28 @@ import datetime import json import logging -from typing import Union, List, NoReturn +from typing import Union, List, NoReturn, Mapping import peppy -from sqlalchemy import Engine, and_, delete, insert, or_, select, update +from sqlalchemy import and_, delete, select from sqlalchemy.exc import IntegrityError, NoResultFound from sqlalchemy.orm import Session from sqlalchemy import Select -from peppy.const import SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_LIST_KEY, CONFIG_KEY +from peppy.const import ( + SAMPLE_RAW_DICT_KEY, + SUBSAMPLE_RAW_LIST_KEY, + CONFIG_KEY, + SAMPLE_TABLE_INDEX_KEY, +) + +from pepdbagent.const import ( + DEFAULT_TAG, + DESCRIPTION_KEY, + NAME_KEY, + PKG_NAME, +) -from pepdbagent.const import * from pepdbagent.db_utils import Projects, Samples, Subsamples, BaseEngine from pepdbagent.exceptions import ProjectNotFoundError, ProjectUniqueNameError from pepdbagent.models import UpdateItems, UpdateModel @@ -79,11 +90,16 @@ def get( subsample_list = list(subsample_dict.values()) else: subsample_list = [] + + # samples + samples_dict = { + sample_sa.row_number: sample_sa.sample + for sample_sa in found_prj.samples_mapping + } + project_value = { CONFIG_KEY: found_prj.config, - SAMPLE_RAW_DICT_KEY: [ - sample_sa.sample for sample_sa in found_prj.samples_mapping - ], + SAMPLE_RAW_DICT_KEY: [samples_dict[key] for key in sorted(samples_dict)], SUBSAMPLE_RAW_LIST_KEY: subsample_list, } # project_value = found_prj.project_value @@ -200,6 +216,7 @@ def create( tag: str = DEFAULT_TAG, description: str = None, is_private: bool = False, + pop: bool = False, pep_schema: str = None, overwrite: bool = False, update_only: bool = False, @@ -214,7 +231,8 @@ def create( :param name: name of the project (Default: name is taken from the project object) :param tag: tag (or version) of the project. :param is_private: boolean value if the project should be visible just for user that creates it. - :param pep_schema: assign PEP to a specific schema. [DefaultL: None] + :param pep_schema: assign PEP to a specific schema. [Default: None] + :param pop: if project is a pep of peps (POP) [Default: False] :param overwrite: if project exists overwrite the project, otherwise upload it. [Default: False - project won't be overwritten if it exists in db] :param update_only: if project exists overwrite it, otherwise do nothing. [Default: False] @@ -232,7 +250,7 @@ def create( elif proj_dict[CONFIG_KEY][NAME_KEY]: proj_name = proj_dict[CONFIG_KEY][NAME_KEY].lower() else: - raise ValueError(f"Name of the project wasn't provided. Project will not be uploaded.") + raise ValueError("Name of the project wasn't provided. Project will not be uploaded.") proj_dict[CONFIG_KEY][NAME_KEY] = proj_name @@ -251,6 +269,7 @@ def create( private=is_private, pep_schema=pep_schema, description=description, + pop=pop, ) return None else: @@ -268,9 +287,14 @@ def create( last_update_date=datetime.datetime.now(datetime.timezone.utc), pep_schema=pep_schema, description=description, + pop=pop, ) - self._add_samples_to_project(new_prj, proj_dict[SAMPLE_RAW_DICT_KEY]) + self._add_samples_to_project( + new_prj, + proj_dict[SAMPLE_RAW_DICT_KEY], + sample_table_index=project.sample_table_index, + ) if proj_dict[SUBSAMPLE_RAW_LIST_KEY]: subsamples = proj_dict[SUBSAMPLE_RAW_LIST_KEY] @@ -299,9 +323,9 @@ def create( else: raise ProjectUniqueNameError( - f"Namespace, name and tag already exists. Project won't be " - f"uploaded. Solution: Set overwrite value as True" - f" (project will be overwritten), or change tag!" + "Namespace, name and tag already exists. Project won't be " + "uploaded. Solution: Set overwrite value as True" + " (project will be overwritten), or change tag!" ) def _overwrite( @@ -315,6 +339,7 @@ def _overwrite( private: bool = False, pep_schema: str = None, description: str = "", + pop: bool = False, ) -> None: """ Update existing project by providing all necessary information. @@ -328,6 +353,7 @@ def _overwrite( :param private: boolean value if the project should be visible just for user that creates it. :param pep_schema: assign PEP to a specific schema. [DefaultL: None] :param description: project description + :param pop: if project is a pep of peps, simply POP [Default: False] :return: None """ proj_name = proj_name.lower() @@ -351,6 +377,7 @@ def _overwrite( found_prj.config = project_dict[CONFIG_KEY] found_prj.description = description found_prj.last_update_date = datetime.datetime.now(datetime.timezone.utc) + found_prj.pop = pop # Deleting old samples and subsamples if found_prj.samples_mapping: @@ -364,7 +391,11 @@ def _overwrite( session.delete(subsample) # Adding new samples and subsamples - self._add_samples_to_project(found_prj, project_dict[SAMPLE_RAW_DICT_KEY]) + self._add_samples_to_project( + found_prj, + project_dict[SAMPLE_RAW_DICT_KEY], + sample_table_index=project_dict[CONFIG_KEY].get(SAMPLE_TABLE_INDEX_KEY), + ) if project_dict[SUBSAMPLE_RAW_LIST_KEY]: self._add_subsamples_to_project( @@ -420,7 +451,7 @@ def update( statement = self._create_select_statement(name, namespace, tag) with Session(self._sa_engine) as session: - found_prj = session.scalars(statement).one() + found_prj = session.scalar(statement) if found_prj: _LOGGER.debug( @@ -440,12 +471,25 @@ def update( found_prj.name = found_prj.config[NAME_KEY] if "samples" in update_dict: - if found_prj.samples_mapping: - for sample in found_prj.samples_mapping: - _LOGGER.debug(f"deleting samples: {str(sample)}") - session.delete(sample) - - self._add_samples_to_project(found_prj, update_dict["samples"]) + self._update_samples( + namespace=namespace, + name=name, + tag=tag, + samples_list=update_dict["samples"], + sample_name_key=update_dict["config"].get( + SAMPLE_TABLE_INDEX_KEY, "sample_name" + ), + ) + # if found_prj.samples_mapping: + # for sample in found_prj.samples_mapping: + # _LOGGER.debug(f"deleting samples: {str(sample)}") + # session.delete(sample) + # + # self._add_samples_to_project( + # found_prj, + # update_dict["samples"], + # sample_table_index=update_dict["config"].get(SAMPLE_TABLE_INDEX_KEY), + # ) if "subsamples" in update_dict: if found_prj.subsamples_mapping: @@ -466,6 +510,67 @@ def update( else: raise ProjectNotFoundError("No items will be updated!") + def _update_samples( + self, + namespace: str, + name: str, + tag: str, + samples_list: List[Mapping], + sample_name_key: str = "sample_name", + ) -> None: + """ + Update samples in the project + This is a new method that instead of deleting all samples and adding new ones, + updates samples and adds new ones if they don't exist + + :param samples_list: list of samples to be updated + :param sample_name_key: key of the sample name + :return: None + """ + new_sample_names = [sample[sample_name_key] for sample in samples_list] + with Session(self._sa_engine) as session: + project = session.scalar( + select(Projects).where( + and_( + Projects.namespace == namespace, Projects.name == name, Projects.tag == tag + ) + ) + ) + old_sample_names = [sample.sample_name for sample in project.samples_mapping] + for old_sample in old_sample_names: + if old_sample not in new_sample_names: + session.execute( + delete(Samples).where( + and_( + Samples.sample_name == old_sample, Samples.project_id == project.id + ) + ) + ) + + order_number = 0 + for new_sample in samples_list: + order_number += 1 + if new_sample[sample_name_key] not in old_sample_names: + project.samples_mapping.append( + Samples( + sample=new_sample, + sample_name=new_sample[sample_name_key], + row_number=order_number, + ) + ) + else: + sample_mapping = session.scalar( + select(Samples).where( + and_( + Samples.sample_name == new_sample[sample_name_key], + Samples.project_id == project.id, + ) + ) + ) + sample_mapping.sample = new_sample + sample_mapping.row_number = order_number + session.commit() + @staticmethod def __create_update_dict(update_values: UpdateItems) -> dict: """ @@ -476,14 +581,14 @@ def __create_update_dict(update_values: UpdateItems) -> dict: updating values :return: unified update dict """ - update_final = UpdateModel() + update_final = UpdateModel.model_construct() if update_values.name is not None: if update_values.config is not None: update_values.config[NAME_KEY] = update_values.name update_final = UpdateModel( name=update_values.name, - **update_final.dict(exclude_unset=True), + **update_final.model_dump(exclude_unset=True), ) if update_values.description is not None: @@ -491,49 +596,49 @@ def __create_update_dict(update_values: UpdateItems) -> dict: update_values.config[DESCRIPTION_KEY] = update_values.description update_final = UpdateModel( description=update_values.description, - **update_final.dict(exclude_unset=True), + **update_final.model_dump(exclude_unset=True), ) if update_values.config is not None: update_final = UpdateModel( - config=update_values.config, **update_final.dict(exclude_unset=True) + config=update_values.config, **update_final.model_dump(exclude_unset=True) ) name = update_values.config.get(NAME_KEY) description = update_values.config.get(DESCRIPTION_KEY) if name: update_final = UpdateModel( name=name, - **update_final.dict(exclude_unset=True, exclude={NAME_KEY}), + **update_final.model_dump(exclude_unset=True, exclude={NAME_KEY}), ) if description: update_final = UpdateModel( description=description, - **update_final.dict(exclude_unset=True, exclude={DESCRIPTION_KEY}), + **update_final.model_dump(exclude_unset=True, exclude={DESCRIPTION_KEY}), ) if update_values.tag is not None: update_final = UpdateModel( - tag=update_values.tag, **update_final.dict(exclude_unset=True) + tag=update_values.tag, **update_final.model_dump(exclude_unset=True) ) if update_values.is_private is not None: update_final = UpdateModel( is_private=update_values.is_private, - **update_final.dict(exclude_unset=True), + **update_final.model_dump(exclude_unset=True), ) if update_values.pep_schema is not None: update_final = UpdateModel( pep_schema=update_values.pep_schema, - **update_final.dict(exclude_unset=True), + **update_final.model_dump(exclude_unset=True), ) if update_values.number_of_samples is not None: update_final = UpdateModel( number_of_samples=update_values.number_of_samples, - **update_final.dict(exclude_unset=True), + **update_final.model_dump(exclude_unset=True), ) - return update_final.dict(exclude_unset=True, exclude_none=True) + return update_final.model_dump(exclude_unset=True, exclude_none=True) def exists( self, @@ -565,15 +670,26 @@ def exists( return False @staticmethod - def _add_samples_to_project(projects_sa: Projects, samples: List[dict]) -> NoReturn: + def _add_samples_to_project( + projects_sa: Projects, samples: List[dict], sample_table_index: str = "sample_name" + ) -> None: """ Add samples to the project sa object. (With commit this samples will be added to the 'samples table') :param projects_sa: Projects sa object, in open session :param samples: list of samles to be added to the database + :param sample_table_index: index of the sample table :return: NoReturn """ for row_number, sample in enumerate(samples): - projects_sa.samples_mapping.append(Samples(sample=sample, row_number=row_number)) + projects_sa.samples_mapping.append( + Samples( + sample=sample, + row_number=row_number, + sample_name=sample.get(sample_table_index), + ) + ) + + return None @staticmethod def _add_subsamples_to_project( @@ -590,3 +706,79 @@ def _add_subsamples_to_project( projects_sa.subsamples_mapping.append( Subsamples(subsample=sub_item, subsample_number=i, row_number=row_number) ) + + def get_project_id(self, namespace: str, name: str, tag: str) -> Union[int, None]: + """ + Get Project id by providing namespace, name, and tag + + :param namespace: project namespace + :param name: project name + :param tag: project tag + :return: projects id + """ + statement = select(Projects.id).where( + and_(Projects.namespace == namespace, Projects.name == name, Projects.tag == tag) + ) + with Session(self._sa_engine) as session: + result = session.execute(statement).one_or_none() + + if result: + return result[0] + return None + + def fork( + self, + original_namespace: str, + original_name: str, + original_tag: str, + fork_namespace: str, + fork_name: str = None, + fork_tag: str = None, + description: str = None, + private: bool = False, + ): + """ + Fork project from one namespace to another + + :param original_namespace: namespace of the project to be forked + :param original_name: name of the project to be forked + :param original_tag: tag of the project to be forked + :param fork_namespace: namespace of the forked project + :param fork_name: name of the forked project + :param fork_tag: tag of the forked project + :param description: description of the forked project + :param private: boolean value if the project should be visible just for user that creates it. + :return: None + """ + self.create( + project=self.get( + namespace=original_namespace, + name=original_name, + tag=original_tag, + ), + namespace=fork_namespace, + name=fork_name, + tag=fork_tag, + description=description or None, + is_private=private, + ) + original_statement = select(Projects).where( + Projects.namespace == original_namespace, + Projects.name == original_name, + Projects.tag == original_tag, + ) + fork_statement = select(Projects).where( + Projects.namespace == fork_namespace, + Projects.name == fork_name, + Projects.tag == fork_tag, + ) + + with Session(self._sa_engine) as session: + original_prj = session.scalar(original_statement) + fork_prj = session.scalar(fork_statement) + fork_prj.forked_from_id = original_prj.id + fork_prj.pop = original_prj.pop + fork_prj.submission_date = original_prj.submission_date + + session.commit() + return None diff --git a/pepdbagent/modules/sample.py b/pepdbagent/modules/sample.py new file mode 100644 index 0000000..cd54c5c --- /dev/null +++ b/pepdbagent/modules/sample.py @@ -0,0 +1,320 @@ +import logging +from typing import Union +import datetime + +import peppy +from peppy.const import SAMPLE_TABLE_INDEX_KEY +from sqlalchemy import select, and_, func +from sqlalchemy.orm import Session +from sqlalchemy.orm.attributes import flag_modified + + +from pepdbagent.const import ( + DEFAULT_TAG, + PKG_NAME, +) +from pepdbagent.exceptions import SampleNotFoundError + +from pepdbagent.db_utils import BaseEngine, Samples, Projects + +_LOGGER = logging.getLogger(PKG_NAME) + + +class PEPDatabaseSample: + """ + Class that represents Project in Database. + + While using this class, user can create, retrieve, delete, and update projects from database + """ + + def __init__(self, pep_db_engine: BaseEngine): + """ + :param pep_db_engine: pepdbengine object with sa engine + """ + self._sa_engine = pep_db_engine.engine + self._pep_db_engine = pep_db_engine + + def get( + self, + namespace: str, + name: str, + sample_name: str, + tag: str = DEFAULT_TAG, + raw: bool = False, + ) -> Union[peppy.Sample, dict, None]: + """ + Retrieve sample from the database using namespace, name, tag, and sample_name + + :param namespace: namespace of the project + :param name: name of the project (Default: name is taken from the project object) + :param tag: tag (or version) of the project. + :param sample_name: sample_name of the sample + :param raw: return raw dict or peppy.Sample object + :return: peppy.Project object with found project or dict with unprocessed + PEP elements: { + name: str + description: str + _config: dict + _sample_dict: dict + _subsample_dict: dict + } + """ + statement_sample = select(Samples).where( + and_( + Samples.project_id + == select(Projects.id) + .where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ), + ) + .scalar_subquery(), + Samples.sample_name == sample_name, + ) + ) + project_config_statement = select(Projects.config).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + + with Session(self._sa_engine) as session: + result = session.scalar(statement_sample) + if result: + if not raw: + config = session.execute(project_config_statement).one_or_none()[0] + project = peppy.Project().from_dict( + pep_dictionary={ + "name": name, + "description": config.get("description"), + "_config": config, + "_sample_dict": [result.sample], + "_subsample_dict": None, + } + ) + return project.samples[0] + else: + return result.sample + else: + raise SampleNotFoundError( + f"Sample {namespace}/{name}:{tag}?{sample_name} not found in the database" + ) + + def update( + self, + namespace: str, + name: str, + tag: str, + sample_name: str, + update_dict: dict, + full_update: bool = False, + ) -> None: + """ + Update one sample in the database + + :param namespace: namespace of the project + :param name: name of the project (Default: name is taken from the project object) + :param tag: tag (or version) of the project. + :param sample_name: sample_name of the sample + :param update_dict: dictionary with sample data (key: value pairs). e.g. + {"sample_name": "sample1", + "sample_protocol": "sample1 protocol"} + :param full_update: if True, update all sample fields, if False, update only fields from update_dict + :return: None + """ + statement = select(Samples).where( + and_( + Samples.project_id + == select(Projects.id) + .where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ), + ) + .scalar_subquery(), + Samples.sample_name == sample_name, + ) + ) + project_statement = select(Projects).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + with Session(self._sa_engine) as session: + sample_mapping = session.scalar(statement) + project_mapping = session.scalar(project_statement) + + if sample_mapping: + if full_update: + sample_mapping.sample = update_dict + else: + sample_mapping.sample.update(update_dict) + try: + sample_mapping.sample_name = sample_mapping.sample[ + project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, "sample_name") + ] + except KeyError: + raise KeyError( + f"Sample index key {project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, 'sample_name')} not found in sample dict" + ) + + # This line needed due to: https://github.com/sqlalchemy/sqlalchemy/issues/5218 + flag_modified(sample_mapping, "sample") + + project_mapping.last_update_date = datetime.datetime.now(datetime.timezone.utc) + + session.commit() + else: + raise SampleNotFoundError( + f"Sample {namespace}/{name}:{tag}?{sample_name} not found in the database" + ) + + def add( + self, + namespace: str, + name: str, + tag: str, + sample_dict: dict, + overwrite: bool = False, + ) -> None: + """ + Add one sample to the project in the database + + :param namespace: namespace of the project + :param name: name of the project + :param tag: tag (or version) of the project. + :param overwrite: overwrite sample if it already exists + :param sample_dict: dictionary with sample data (key: value pairs). e.g. + {"sample_name": "sample1", + "sample_protocol": "sample1 protocol"} + :return: None + """ + + with Session(self._sa_engine) as session: + project_statement = select(Projects).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + # project mapping is needed to update number of samples, last_update_date and get sample_index_key + project_mapping = session.scalar(project_statement) + try: + sample_name = sample_dict[ + project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, "sample_name") + ] + except KeyError: + raise KeyError( + f"Sample index key {project_mapping.config.get(SAMPLE_TABLE_INDEX_KEY, 'sample_name')} not found in sample dict" + ) + project_where_statement = ( + Samples.project_id + == select(Projects.id) + .where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ), + ) + .scalar_subquery() + ) + statement = select(Samples).where( + and_(project_where_statement, Samples.sample_name == sample_name) + ) + + sample_mapping = session.scalar(statement) + row_number = ( + session.execute( + select(func.max(Samples.row_number)).where(project_where_statement) + ).one()[0] + or 0 + ) + + if sample_mapping and not overwrite: + raise ValueError( + f"Sample {namespace}/{name}:{tag}?{sample_name} already exists in the database" + ) + elif sample_mapping and overwrite: + self.update( + namespace=namespace, + name=name, + tag=tag, + sample_name=sample_name, + update_dict=sample_dict, + full_update=True, + ) + return None + else: + sample_mapping = Samples( + sample=sample_dict, + row_number=row_number + 1, + project_id=project_mapping.id, + sample_name=sample_name, + ) + project_mapping.number_of_samples += 1 + project_mapping.last_update_date = datetime.datetime.now(datetime.timezone.utc) + + session.add(sample_mapping) + session.commit() + + def delete( + self, + namespace: str, + name: str, + tag: str, + sample_name: str, + ) -> None: + """ + Delete one sample from the database + + :param namespace: namespace of the project + :param name: name of the project + :param tag: tag (or version) of the project. + :param sample_name: sample_name of the sample + :return: None + """ + statement = select(Samples).where( + and_( + Samples.project_id + == select(Projects.id) + .where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ), + ) + .scalar_subquery(), + Samples.sample_name == sample_name, + ) + ) + project_statement = select(Projects).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + with Session(self._sa_engine) as session: + sample_mapping = session.scalar(statement) + project_mapping = session.scalar(project_statement) + + if sample_mapping: + session.delete(sample_mapping) + project_mapping.number_of_samples -= 1 + project_mapping.last_update_date = datetime.datetime.now(datetime.timezone.utc) + session.commit() + else: + raise SampleNotFoundError( + f"Sample {namespace}/{name}:{tag}?{sample_name} not found in the database" + ) diff --git a/pepdbagent/modules/user.py b/pepdbagent/modules/user.py new file mode 100644 index 0000000..5670195 --- /dev/null +++ b/pepdbagent/modules/user.py @@ -0,0 +1,218 @@ +import logging +from typing import Union + +from sqlalchemy import and_, delete, select +from sqlalchemy.orm import Session +from sqlalchemy.exc import IntegrityError + +from pepdbagent.const import ( + PKG_NAME, +) + +from pepdbagent.db_utils import BaseEngine, User, Stars, Projects +from pepdbagent.modules.project import PEPDatabaseProject +from pepdbagent.models import AnnotationList, AnnotationModel +from pepdbagent.exceptions import ProjectNotInFavorites, ProjectAlreadyInFavorites + + +_LOGGER = logging.getLogger(PKG_NAME) + + +class PEPDatabaseUser: + """ + Class that represents Project in Database. + + While using this class, user can create, retrieve, delete, and update projects from database + """ + + def __init__(self, pep_db_engine: BaseEngine): + """ + :param pep_db_engine: pepdbengine object with sa engine + """ + self._sa_engine = pep_db_engine.engine + self._pep_db_engine = pep_db_engine + + def create_user(self, namespace: str) -> int: + """ + Create new user + + :param namespace: user namespace + :return: user id + """ + new_user_raw = User(namespace=namespace) + + with Session(self._sa_engine) as session: + session.add(new_user_raw) + session.commit() + user_id = new_user_raw.id + return user_id + + def get_user_id(self, namespace: str) -> Union[int, None]: + """ + Get user id using username + + :param namespace: user namespace + :return: user id + """ + statement = select(User.id).where(User.namespace == namespace) + with Session(self._sa_engine) as session: + result = session.execute(statement).one_or_none() + + if result: + return result[0] + return None + + def add_project_to_favorites( + self, namespace: str, project_namespace: str, project_name: str, project_tag: str + ) -> None: + """ + Add project to favorites + + :param namespace: namespace of the user + :param project_namespace: namespace of the project + :param project_name: name of the project + :param project_tag: tag of the project + :return: None + """ + + user_id = self.get_user_id(namespace) + + if not user_id: + user_id = self.create_user(namespace) + + try: + with Session(self._sa_engine) as session: + project_mapping = session.scalar( + select(Projects).where( + and_( + Projects.namespace == project_namespace, + Projects.name == project_name, + Projects.tag == project_tag, + ) + ) + ) + + new_favorites_raw = Stars(user_id=user_id, project_id=project_mapping.id) + + session.add(new_favorites_raw) + project_mapping.number_of_stars += 1 + session.commit() + except IntegrityError: + raise ProjectAlreadyInFavorites() + return None + + def remove_project_from_favorites( + self, namespace: str, project_namespace: str, project_name: str, project_tag: str + ) -> None: + """ + Remove project from favorites + + :param namespace: namespace of the user + :param project_namespace: namespace of the project + :param project_name: name of the project + :param project_tag: tag of the project + :return: None + """ + _LOGGER.debug( + f"Removing project {project_namespace}/{project_name}:{project_tag} from fProjectNotInFavorites for user {namespace}" + ) + + user_id = self.get_user_id(namespace) + + with Session(self._sa_engine) as session: + project_mapping = session.scalar( + select(Projects).where( + and_( + Projects.namespace == project_namespace, + Projects.name == project_name, + Projects.tag == project_tag, + ) + ) + ) + delete_statement = delete(Stars).where( + and_( + Stars.user_id == user_id, + Stars.project_id == project_mapping.id, + ) + ) + project_mapping.number_of_stars -= 1 + result = session.execute(delete_statement) + session.commit() + row_count = result.rowcount + if row_count == 0: + raise ProjectNotInFavorites( + f"Project {project_namespace}/{project_name}:{project_tag} is not in favorites for user {namespace}" + ) + return None + + def get_favorites(self, namespace: str) -> AnnotationList: + """ + Get list of favorites for user + + :param namespace: namespace of the user + :return: list of favorite projects with annotations + """ + _LOGGER.debug(f"Getting favorites for user {namespace}") + if not self.exists(namespace): + return AnnotationList( + count=0, + limit=0, + offset=0, + results=[], + ) + statement = select(User).where(User.namespace == namespace) + with Session(self._sa_engine) as session: + query_result = session.scalar(statement) + number_of_projects = len([kk.project_mapping for kk in query_result.stars_mapping]) + project_list = [] + for prj_list in query_result.stars_mapping: + project_list.append( + AnnotationModel( + namespace=prj_list.project_mapping.namespace, + name=prj_list.project_mapping.name, + tag=prj_list.project_mapping.tag, + is_private=prj_list.project_mapping.private, + number_of_samples=prj_list.project_mapping.number_of_samples, + description=prj_list.project_mapping.description, + last_update_date=str(prj_list.project_mapping.last_update_date), + submission_date=str(prj_list.project_mapping.submission_date), + digest=prj_list.project_mapping.digest, + pep_schema=prj_list.project_mapping.pep_schema, + pop=prj_list.project_mapping.pop, + stars_number=prj_list.project_mapping.number_of_stars, + forked_from=f"{prj_list.project_mapping.namespace}/{prj_list.project_mapping.name}:{prj_list.project_mapping.tag}" + if prj_list.project_mapping + else None, + ) + ) + favorite_prj = AnnotationList( + count=number_of_projects, + limit=number_of_projects, + offset=0, + results=project_list, + ) + return favorite_prj + + def exists( + self, + namespace: str, + ) -> bool: + """ + Check if user exists in the database. + + :param namespace: project namespace + :return: Returning True if project exist + """ + + statement = select(User.id) + statement = statement.where( + and_( + User.namespace == namespace, + ) + ) + found_prj = self._pep_db_engine.session_execute(statement).all() + + if len(found_prj) > 0: + return True + else: + return False diff --git a/pepdbagent/modules/view.py b/pepdbagent/modules/view.py new file mode 100644 index 0000000..fe41be6 --- /dev/null +++ b/pepdbagent/modules/view.py @@ -0,0 +1,405 @@ +# View of the PEP. In other words, it is a part of the PEP, or subset of the samples in the PEP. + +import logging +from typing import Union, List + +import peppy +from sqlalchemy import select, and_, delete +from sqlalchemy.orm import Session +from sqlalchemy.exc import IntegrityError + + +from pepdbagent.const import ( + DEFAULT_TAG, + PKG_NAME, +) +from pepdbagent.exceptions import ( + ViewNotFoundError, + SampleAlreadyInView, + ProjectNotFoundError, + SampleNotFoundError, +) + +from pepdbagent.db_utils import BaseEngine, Samples, Projects, Views, ViewSampleAssociation +from pepdbagent.models import ViewAnnotation, CreateViewDictModel, ProjectViews + +_LOGGER = logging.getLogger(PKG_NAME) + + +class PEPDatabaseView: + """ + Class that represents Project in Database. + + While using this class, user can create, retrieve, delete, and update projects from database + """ + + def __init__(self, pep_db_engine: BaseEngine): + """ + :param pep_db_engine: pepdbengine object with sa engine + """ + self._sa_engine = pep_db_engine.engine + self._pep_db_engine = pep_db_engine + + def get( + self, + namespace: str, + name: str, + tag: str = DEFAULT_TAG, + view_name: str = None, + raw: bool = False, + ) -> Union[peppy.Project, dict, None]: + """ + Retrieve view of the project from the database. + View is a subset of the samples in the project. e.g. bed-db project has all the samples in bedbase, + bedset is a view of the bedbase project with only the samples in the bedset. + + :param namespace: namespace of the project + :param name: name of the project (Default: name is taken from the project object) + :param tag: tag of the project (Default: tag is taken from the project object) + :param view_name: name of the view + :param raw: retrieve unprocessed (raw) PEP dict. + :return: peppy.Project object with found project or dict with unprocessed + PEP elements: { + name: str + description: str + _config: dict + _sample_dict: dict + _subsample_dict: dict + } + """ + view_statement = select(Views).where( + and_( + Views.project_mapping.has(namespace=namespace, name=name, tag=tag), + Views.name == view_name, + ) + ) + + with Session(self._sa_engine) as sa_session: + view = sa_session.scalar(view_statement) + if not view: + raise ViewNotFoundError( + f"View {view_name} of the project {namespace}/{name}:{tag} does not exist" + ) + samples = [sample.sample.sample for sample in view.samples] + config = view.project_mapping.config + sub_project_dict = {"_config": config, "_sample_dict": samples, "_subsample_dict": None} + if raw: + return sub_project_dict + else: + return peppy.Project.from_dict(sub_project_dict) + + def get_annotation( + self, namespace: str, name: str, tag: str = DEFAULT_TAG, view_name: str = None + ) -> ViewAnnotation: + """ + Get annotation of the view. + + :param namespace: namespace of the project + :param name: name of the project + :param tag: tag of the project + :param view_name: name of the sample + :return: ViewAnnotation object: + {project_namespace: str, + project_name: str, + project_tag: str, + name: str, + description: str, + number_of_samples: int} + """ + view_statement = select(Views).where( + and_( + Views.project_mapping.has(namespace=namespace, name=name, tag=tag), + Views.name == view_name, + ) + ) + + with Session(self._sa_engine) as sa_session: + view = sa_session.scalar(view_statement) + if not view: + raise ViewNotFoundError( + f"View {name} of the project {namespace}/{name}:{tag} does not exist" + ) + return ViewAnnotation( + project_namespace=namespace, + project_name=name, + project_tag=tag, + name=view.name, + description=view.description, + number_of_samples=len(view.samples), + ) + + def create( + self, + view_name: str, + view_dict: Union[dict, CreateViewDictModel], + description: str = None, + ) -> None: + """ + Create a view of the project in the database. + + :param view_name: namespace of the project + :param view_dict: dict or CreateViewDictModel object with view samples. + Dict should have the following structure: + { + project_namespace: str + project_name: str + project_tag: str + sample_list: List[str] # list of sample names + } + :param description: description of the view + retrun: None + """ + if isinstance(view_dict, dict): + view_dict = CreateViewDictModel(**view_dict) + + project_statement = select(Projects).where( + and_( + Projects.namespace == view_dict.project_namespace, + Projects.name == view_dict.project_name, + Projects.tag == view_dict.project_tag, + ) + ) + + with Session(self._sa_engine) as sa_session: + project = sa_session.scalar(project_statement) + if not project: + raise ProjectNotFoundError( + f"Project {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag} does not exist" + ) + view = Views( + name=view_name, + description=description, + project_mapping=project, + ) + sa_session.add(view) + + for sample_name in view_dict.sample_list: + sample_statement = select(Samples.id).where( + and_( + Samples.project_id == project.id, + Samples.sample_name == sample_name, + ) + ) + sample_id = sa_session.execute(sample_statement).one()[0] + if not sample_id: + raise SampleNotFoundError( + f"Sample {view_dict.project_namespace}/{view_dict.project_name}:{view_dict.project_tag}:{sample_name} does not exist" + ) + sa_session.add(ViewSampleAssociation(sample_id=sample_id, view=view)) + + sa_session.commit() + + def delete( + self, + project_namespace: str, + project_name: str, + project_tag: str = DEFAULT_TAG, + view_name: str = None, + ) -> None: + """ + Delete a view of the project in the database. + + :param project_namespace: namespace of the project + :param project_name: name of the project + :param project_tag: tag of the project + :param view_name: name of the view + :return: None + """ + view_statement = select(Views).where( + and_( + Views.project_mapping.has( + namespace=project_namespace, name=project_name, tag=project_tag + ), + Views.name == view_name, + ) + ) + + with Session(self._sa_engine) as sa_session: + view = sa_session.scalar(view_statement) + if not view: + raise ViewNotFoundError( + f"View {view_name} of the project {project_namespace}/{project_name}:{project_tag} does not exist" + ) + sa_session.delete(view) + sa_session.commit() + + def add_sample( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + sample_name: Union[str, List[str]], + ): + """ + Add sample to the view. + + :param namespace: namespace of the project + :param name: name of the project + :param tag: tag of the project + :param view_name: name of the view + :param sample_name: sample name + :return: None + """ + if isinstance(sample_name, str): + sample_name = [sample_name] + view_statement = select(Views).where( + and_( + Views.project_mapping.has(namespace=namespace, name=name, tag=tag), + Views.name == view_name, + ) + ) + + with Session(self._sa_engine) as sa_session: + view = sa_session.scalar(view_statement) + if not view: + raise ViewNotFoundError( + f"View {view_name} of the project {namespace}/{name}:{tag} does not exist" + ) + for sample_name_one in sample_name: + sample_statement = select(Samples).where( + and_( + Samples.project_id == view.project_mapping.id, + Samples.sample_name == sample_name_one, + ) + ) + sample = sa_session.scalar(sample_statement) + if not sample: + raise SampleNotFoundError( + f"Sample {namespace}/{name}:{tag}:{sample_name} does not exist" + ) + try: + sa_session.add(ViewSampleAssociation(sample=sample, view=view)) + sa_session.commit() + except IntegrityError: + raise SampleAlreadyInView( + f"Sample {namespace}/{name}:{tag}:{sample_name} already in view {view_name}" + ) + + return None + + def remove_sample( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + sample_name: str, + ) -> None: + """ + Remove sample from the view. + + :param namespace: namespace of the project + :param name: name of the project + :param tag: tag of the project + :param view_name: name of the view + :param sample_name: sample name + :return: None + """ + view_statement = select(Views).where( + and_( + Views.project_mapping.has(namespace=namespace, name=name, tag=tag), + Views.name == view_name, + ) + ) + + with Session(self._sa_engine) as sa_session: + view = sa_session.scalar(view_statement) + if not view: + raise ViewNotFoundError( + f"View {view_name} of the project {namespace}/{name}:{tag} does not exist" + ) + sample_statement = select(Samples).where( + and_( + Samples.project_id == view.project_mapping.id, + Samples.sample_name == sample_name, + ) + ) + sample = sa_session.scalar(sample_statement) + delete_statement = delete(ViewSampleAssociation).where( + and_( + ViewSampleAssociation.sample_id == sample.id, + ViewSampleAssociation.view_id == view.id, + ) + ) + sa_session.execute(delete_statement) + sa_session.commit() + + return None + + def get_snap_view( + self, namespace: str, name: str, tag: str, sample_name_list: List[str], raw: bool = False + ) -> Union[peppy.Project, dict]: + """ + Get a snap view of the project. Snap view is a view of the project + with only the samples in the list. This view won't be saved in the database. + + :param namespace: project namespace + :param name: name of the project + :param tag: tag of the project + :param sample_name_list: list of sample names e.g. ["sample1", "sample2"] + :param raw: retrieve unprocessed (raw) PEP dict. + :return: peppy.Project object + """ + project_statement = select(Projects).where( + and_( + Projects.namespace == namespace, + Projects.name == name, + Projects.tag == tag, + ) + ) + with Session(self._sa_engine) as sa_session: + project = sa_session.scalar(project_statement) + if not project: + raise ProjectNotFoundError(f"Project {namespace}/{name}:{tag} does not exist") + samples = [] + for sample_name in sample_name_list: + sample_statement = select(Samples).where( + and_( + Samples.project_id == project.id, + Samples.sample_name == sample_name, + ) + ) + sample = sa_session.scalar(sample_statement) + if not sample: + raise SampleNotFoundError( + f"Sample {namespace}/{name}:{tag}:{sample_name} does not exist" + ) + samples.append(sample.sample) + config = project.config + + if raw: + return {"_config": config, "_sample_dict": samples, "_subsample_dict": None} + else: + return peppy.Project.from_dict( + {"_config": config, "_sample_dict": samples, "_subsample_dict": None} + ) + + def get_views_annotation( + self, namespace: str, name: str, tag: str = DEFAULT_TAG + ) -> Union[ProjectViews, None]: + """ + Get list of views of the project + + :param namespace: namespace of the project + :param name: name of the project + :param tag: tag of the project + :return: list of views of the project + """ + statement = select(Views).where( + Views.project_mapping.has(namespace=namespace, name=name, tag=tag), + ) + views_list = [] + + with Session(self._sa_engine) as session: + views = session.scalars(statement) + for view in views: + views_list.append( + ViewAnnotation( + name=view.name, + description=view.description, + number_of_samples=len(view.samples), + ) + ) + + return ProjectViews(namespace=namespace, name=name, tag=tag, views=views_list) diff --git a/pepdbagent/pepdbagent.py b/pepdbagent/pepdbagent.py index e73500a..19995e5 100644 --- a/pepdbagent/pepdbagent.py +++ b/pepdbagent/pepdbagent.py @@ -3,6 +3,9 @@ from pepdbagent.modules.annotation import PEPDatabaseAnnotation from pepdbagent.modules.namespace import PEPDatabaseNamespace from pepdbagent.modules.project import PEPDatabaseProject +from pepdbagent.modules.user import PEPDatabaseUser +from pepdbagent.modules.sample import PEPDatabaseSample +from pepdbagent.modules.view import PEPDatabaseView class PEPDatabaseAgent(object): @@ -47,21 +50,36 @@ def __init__( self.__project = PEPDatabaseProject(pep_db_engine) self.__annotation = PEPDatabaseAnnotation(pep_db_engine) self.__namespace = PEPDatabaseNamespace(pep_db_engine) + self.__sample = PEPDatabaseSample(pep_db_engine) + self.__user = PEPDatabaseUser(pep_db_engine) + self.__view = PEPDatabaseView(pep_db_engine) self.__db_name = database @property - def project(self): + def project(self) -> PEPDatabaseProject: return self.__project @property - def annotation(self): + def annotation(self) -> PEPDatabaseAnnotation: return self.__annotation @property - def namespace(self): + def namespace(self) -> PEPDatabaseNamespace: return self.__namespace + @property + def user(self) -> PEPDatabaseUser: + return self.__user + + @property + def sample(self) -> PEPDatabaseSample: + return self.__sample + + @property + def view(self) -> PEPDatabaseView: + return self.__view + def __str__(self): return f"Connection to the database: '{self.__db_name}' is set!" diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index dc11bcc..c58cff4 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -7,7 +7,7 @@ import ubiquerg from peppy.const import SAMPLE_RAW_DICT_KEY -from .exceptions import IncorrectDateFormat, RegistryPathError +from .exceptions import RegistryPathError def is_valid_registry_path(rpath: str) -> bool: diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 0b8fbeb..e4b7e0e 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,8 +1,8 @@ sqlalchemy>=2.0.0 -logmuse -peppy>=0.40.0a4 +logmuse>=0.2.7 +peppy>=0.40.0 ubiquerg>=0.6.2 coloredlogs>=15.0.1 pytest-mock -pydantic<2.0 -psycopg2-binary +pydantic>=2.0 +psycopg>=3.1.15 diff --git a/tests/README.md b/tests/README.md index b0dd607..b057606 100644 --- a/tests/README.md +++ b/tests/README.md @@ -3,13 +3,12 @@ ### How to run tests localy: 1. Use or create empty database with next credentials: ```txt -POSTGRES_USER=postgres -POSTGRES_PASSWORD=docker -POSTGRES_DB=pep-db -POSTGRES_PORT=5432 +docker run --rm -it --name bedbase \ + -e POSTGRES_USER=postgres \ + -e POSTGRES_PASSWORD=docker \ + -e POSTGRES_DB=pep-db \ + -p 5432:5432 postgres ``` -Database can be created using docker file: [../pep_db/Dockerfile](../pep_db/Dockerfile) -To run docker use this tutorial [../docs/db_tutorial.md](../docs/db_tutorial.md) 2. Run pytest using this command: `pytest` diff --git a/tests/conftest.py b/tests/conftest.py index f210dbf..32d3dbe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,9 +5,10 @@ from sqlalchemy import create_engine from sqlalchemy import text -DNS = f"postgresql://postgres:docker@localhost:5432/pep-db" from pepdbagent import PEPDatabaseAgent +DNS = "postgresql+psycopg://postgres:docker@localhost:5432/pep-db" + DATA_PATH = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), @@ -39,6 +40,11 @@ def initiate_pepdb_con( conn.execute(text("DROP table IF EXISTS projects CASCADE")) conn.execute(text("DROP table IF EXISTS samples CASCADE")) conn.execute(text("DROP table IF EXISTS subsamples CASCADE")) + conn.execute(text("DROP table IF EXISTS stars CASCADE")) + conn.execute(text("DROP table IF EXISTS users CASCADE")) + conn.execute(text("DROP table IF EXISTS views CASCADE")) + conn.execute(text("DROP table IF EXISTS views_samples CASCADE")) + pepdb_con = PEPDatabaseAgent(dsn=DNS, echo=True) for namespace, item in list_of_available_peps.items(): if namespace == "private_test": diff --git a/tests/test_pepagent.py b/tests/test_pepagent.py index 7a5e84a..91b53de 100644 --- a/tests/test_pepagent.py +++ b/tests/test_pepagent.py @@ -1,13 +1,22 @@ import datetime import os +import warnings import peppy import pytest +from sqlalchemy.exc import OperationalError -from pepdbagent.exceptions import FilterError, ProjectNotFoundError - -DNS = f"postgresql://postgres:docker@localhost:5432/pep-db" - +import pepdbagent +from pepdbagent.exceptions import ( + FilterError, + ProjectNotFoundError, + ProjectNotInFavorites, + ProjectAlreadyInFavorites, + SampleNotFoundError, + ViewNotFoundError, + SampleAlreadyInView, +) +from .conftest import DNS DATA_PATH = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), @@ -20,6 +29,24 @@ def get_path_to_example_file(namespace, project_name): return os.path.join(DATA_PATH, namespace, project_name, "project_config.yaml") +def db_setup(): + # Check if the database is setup + try: + pepdbagent.PEPDatabaseAgent(dsn=DNS) + except OperationalError: + warnings.warn( + UserWarning( + f"Skipping tests, because DB is not setup. {DNS}. To setup DB go to README.md" + ) + ) + return False + return True + + +@pytest.mark.skipif( + not db_setup(), + reason="DB is not setup", +) class TestProject: """ Test project methods @@ -61,7 +88,7 @@ def test_get_project(self, initiate_pepdb_con, namespace, name): ) def test_get_project_error(self, initiate_pepdb_con, namespace, name, tag): with pytest.raises(ProjectNotFoundError, match="Project does not exist."): - kk = initiate_pepdb_con.project.get(namespace=namespace, name=name, tag=tag) + initiate_pepdb_con.project.get(namespace=namespace, name=name, tag=tag) @pytest.mark.parametrize( "namespace, name", @@ -98,9 +125,135 @@ def test_delete_project(self, initiate_pepdb_con, namespace, name): initiate_pepdb_con.project.delete(namespace=namespace, name=name, tag="default") with pytest.raises(ProjectNotFoundError, match="Project does not exist."): - kk = initiate_pepdb_con.project.get(namespace=namespace, name=name, tag="default") + initiate_pepdb_con.project.get(namespace=namespace, name=name, tag="default") + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ["namespace2", "derive"], + ["namespace2", "imply"], + ], + ) + def test_fork_projects(self, initiate_pepdb_con, namespace, name): + initiate_pepdb_con.project.fork( + original_namespace=namespace, + original_name=name, + original_tag="default", + fork_namespace="new_namespace", + fork_name="new_name", + fork_tag="new_tag", + ) + assert initiate_pepdb_con.project.exists( + namespace="new_namespace", name="new_name", tag="new_tag" + ) + result = initiate_pepdb_con.annotation.get( + namespace="new_namespace", name="new_name", tag="new_tag" + ) + assert result.results[0].forked_from == f"{namespace}/{name}:default" + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ], + ) + def test_parent_project_delete(self, initiate_pepdb_con, namespace, name): + """ + Test if parent project is deleted, forked project is not deleted + """ + initiate_pepdb_con.project.fork( + original_namespace=namespace, + original_name=name, + original_tag="default", + fork_namespace="new_namespace", + fork_name="new_name", + fork_tag="new_tag", + ) + + assert initiate_pepdb_con.project.exists( + namespace="new_namespace", name="new_name", tag="new_tag" + ) + initiate_pepdb_con.project.delete(namespace=namespace, name=name, tag="default") + assert initiate_pepdb_con.project.exists( + namespace="new_namespace", name="new_name", tag="new_tag" + ) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ], + ) + def test_child_project_delete(self, initiate_pepdb_con, namespace, name): + """ + Test if child project is deleted, parent project is not deleted + """ + initiate_pepdb_con.project.fork( + original_namespace=namespace, + original_name=name, + original_tag="default", + fork_namespace="new_namespace", + fork_name="new_name", + fork_tag="new_tag", + ) + + assert initiate_pepdb_con.project.exists( + namespace="new_namespace", name="new_name", tag="new_tag" + ) + assert initiate_pepdb_con.project.exists(namespace=namespace, name=name, tag="default") + initiate_pepdb_con.project.delete( + namespace="new_namespace", name="new_name", tag="new_tag" + ) + assert initiate_pepdb_con.project.exists(namespace=namespace, name=name, tag="default") + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ["namespace1", "amendments2"], + ], + ) + def test_project_can_be_forked_twice(self, initiate_pepdb_con, namespace, name): + """ + Test if project can be forked twice + """ + initiate_pepdb_con.project.fork( + original_namespace=namespace, + original_name=name, + original_tag="default", + fork_namespace="new_namespace", + fork_name="new_name", + fork_tag="new_tag", + ) + initiate_pepdb_con.project.fork( + original_namespace=namespace, + original_name=name, + original_tag="default", + fork_namespace="new_namespace2", + fork_name="new_name2", + fork_tag="new_tag2", + ) + + result = initiate_pepdb_con.annotation.get( + namespace="new_namespace", name="new_name", tag="new_tag" + ) + assert result.results[0].forked_from == f"{namespace}/{name}:default" + + result = initiate_pepdb_con.annotation.get( + namespace="new_namespace2", name="new_name2", tag="new_tag2" + ) + assert result.results[0].forked_from == f"{namespace}/{name}:default" + + +@pytest.mark.skipif( + not db_setup(), + reason="DB is not setup", +) class TestProjectUpdate: @pytest.mark.parametrize( "namespace, name,new_name", @@ -171,9 +324,9 @@ def test_update_project_description( "namespace, name", [ ["namespace1", "amendments1"], - ["namespace1", "amendments2"], - ["namespace2", "derive"], - ["namespace2", "imply"], + # ["namespace1", "amendments2"], + # ["namespace2", "derive"], + # ["namespace2", "imply"], ], ) def test_update_whole_project(self, initiate_pepdb_con, namespace, name): @@ -233,6 +386,10 @@ def test_update_project_private(self, initiate_pepdb_con, namespace, name): assert is_private is True +@pytest.mark.skipif( + not db_setup(), + reason="DB is not setup", +) class TestAnnotation: """ Test function within annotation class @@ -256,6 +413,20 @@ def test_annotation_of_one_project(self, initiate_pepdb_con, namespace, name): ) assert result.results[0].namespace == namespace + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace6", "amendments1"], + ], + ) + def test_annotation_of_one_non_existing_project(self, initiate_pepdb_con, namespace, name): + with pytest.raises(ProjectNotFoundError): + initiate_pepdb_con.annotation.get( + namespace=namespace, + name=name, + tag="default", + ) + @pytest.mark.parametrize( "namespace, n_projects", [ @@ -386,7 +557,7 @@ def test_all_annotations_are_returned(self, initiate_pepdb_con, namespace, name) name=name, tag="default", ) - assert result.results[0].__fields_set__ == { + assert result.results[0].model_fields_set == { "is_private", "tag", "namespace", @@ -397,6 +568,9 @@ def test_all_annotations_are_returned(self, initiate_pepdb_con, namespace, name) "last_update_date", "submission_date", "pep_schema", + "pop", + "stars_number", + "forked_from", } @pytest.mark.parametrize( @@ -407,7 +581,7 @@ def test_all_annotations_are_returned(self, initiate_pepdb_con, namespace, name) ], ) def test_search_filter_success(self, initiate_pepdb_con, namespace, query, found_number): - date_now = datetime.datetime.now() + date_now = datetime.datetime.now() + datetime.timedelta(days=1) date_old = datetime.datetime.now() - datetime.timedelta(days=5) result = initiate_pepdb_con.annotation.get( namespace=namespace, @@ -451,7 +625,7 @@ def test_search_incorrect_filter_by_string( date_now = datetime.datetime.now() - datetime.timedelta(days=2) date_old = date_now - datetime.timedelta(days=2) with pytest.raises(FilterError): - result = initiate_pepdb_con.annotation.get( + initiate_pepdb_con.annotation.get( namespace=namespace, query=query, admin="private_test", @@ -460,7 +634,56 @@ def test_search_incorrect_filter_by_string( filter_end_date=date_now.strftime("%Y/%m/%d"), ) + @pytest.mark.parametrize( + "rp_list, admin, found_number", + [ + [ + [ + "namespace1/amendments1:default", + "namespace1/amendments2:default", + "namespace2/derive:default", + "private_test/amendments1:default", + ], + "namespace1", + 4, + ], + [ + [ + "namespace1/amendments1:default", + "namespace1/amendments2:default", + "namespace2/derive:default", + "private_test/amendments1:default", + ], + "private_test", + 4, + ], + ], + ) + def test_get_annotation_by_rp_list(self, initiate_pepdb_con, rp_list, admin, found_number): + result = initiate_pepdb_con.annotation.get_by_rp_list(rp_list) + assert len(result.results) == found_number + + def test_get_annotation_by_rp_enpty_list(self, initiate_pepdb_con): + result = initiate_pepdb_con.annotation.get_by_rp_list([]) + assert len(result.results) == 0 + @pytest.mark.parametrize( + "namespace, query, found_number", + [ + ["namespace1", "ame", 2], + ], + ) + def test_search_incorrect_incorrect_pep_type( + self, initiate_pepdb_con, namespace, query, found_number + ): + with pytest.raises(ValueError): + initiate_pepdb_con.annotation.get(namespace=namespace, pep_type="incorrect") + + +@pytest.mark.skipif( + not db_setup(), + reason="DB is not setup", +) class TestNamespace: """ Test function within namespace class @@ -484,3 +707,495 @@ def test_namespace_info(self, initiate_pepdb_con): result = initiate_pepdb_con.namespace.info() assert len(result.results) == 4 assert result.results[3].number_of_projects == 1 + + +@pytest.mark.skipif( + not db_setup(), + reason="DB is not setup", +) +class TestFavorites: + """ + Test function within user class + """ + + def test_add_projects_to_favorites(self, initiate_pepdb_con): + result = initiate_pepdb_con.annotation.get( + namespace="namespace1", + ) + for project in result.results: + initiate_pepdb_con.user.add_project_to_favorites( + "random_namespace", project.namespace, project.name, "default" + ) + fav_results = initiate_pepdb_con.user.get_favorites("random_namespace") + + assert fav_results.count == len(result.results) + + # This can fail if the order of the results is different + assert fav_results.results[0].namespace == result.results[0].namespace + + def test_count_project_none(self, initiate_pepdb_con): + result = initiate_pepdb_con.user.get_favorites("private_test") + assert result.count == 0 + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ], + ) + def test_count_project_one(self, initiate_pepdb_con, namespace, name): + initiate_pepdb_con.user.add_project_to_favorites(namespace, namespace, name, "default") + result = initiate_pepdb_con.user.get_favorites("namespace1") + assert result.count == 1 + result1 = initiate_pepdb_con.user.get_favorites("private_test") + assert result1.count == 0 + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ], + ) + def test_remove_from_favorite(self, initiate_pepdb_con, namespace, name): + initiate_pepdb_con.user.add_project_to_favorites("namespace1", namespace, name, "default") + initiate_pepdb_con.user.add_project_to_favorites( + "namespace1", namespace, "amendments2", "default" + ) + result = initiate_pepdb_con.user.get_favorites("namespace1") + assert result.count == len(result.results) == 2 + initiate_pepdb_con.user.remove_project_from_favorites( + "namespace1", namespace, name, "default" + ) + result = initiate_pepdb_con.user.get_favorites("namespace1") + assert result.count == len(result.results) == 1 + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ], + ) + def test_remove_from_favorite_error(self, initiate_pepdb_con, namespace, name): + with pytest.raises(ProjectNotInFavorites): + initiate_pepdb_con.user.remove_project_from_favorites( + "namespace1", namespace, name, "default" + ) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ], + ) + def test_favorites_duplication_error(self, initiate_pepdb_con, namespace, name): + initiate_pepdb_con.user.add_project_to_favorites("namespace1", namespace, name, "default") + with pytest.raises(ProjectAlreadyInFavorites): + initiate_pepdb_con.user.add_project_to_favorites( + "namespace1", namespace, name, "default" + ) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "amendments1"], + ], + ) + def test_annotation_favorite_number(self, initiate_pepdb_con, namespace, name): + initiate_pepdb_con.user.add_project_to_favorites("namespace1", namespace, name, "default") + annotations_in_namespace = initiate_pepdb_con.annotation.get("namespace1") + + for prj_annot in annotations_in_namespace.results: + if prj_annot.name == name: + assert prj_annot.stars_number == 1 + else: + assert prj_annot.stars_number == 0 + + +@pytest.mark.skipif( + not db_setup(), + reason="DB is not setup", +) +class TestSamples: + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_retrieve_one_sample(self, initiate_pepdb_con, namespace, name, sample_name): + one_sample = initiate_pepdb_con.sample.get(namespace, name, sample_name) + assert isinstance(one_sample, peppy.Sample) + assert one_sample.sample_name == sample_name + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_retrieve_raw_sample(self, initiate_pepdb_con, namespace, name, sample_name): + one_sample = initiate_pepdb_con.sample.get(namespace, name, sample_name, raw=True) + assert isinstance(one_sample, dict) + assert one_sample["sample_name"] == sample_name + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace2", "custom_index", "frog_1"], + ], + ) + def test_retrieve_sample_with_modified_sample_id( + self, initiate_pepdb_con, namespace, name, sample_name + ): + one_sample = initiate_pepdb_con.sample.get(namespace, name, sample_name) + assert isinstance(one_sample, peppy.Sample) + assert one_sample.sample_id == "frog_1" + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_update(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.sample.update( + namespace=namespace, + name=name, + tag="default", + sample_name=sample_name, + update_dict={"organism": "butterfly"}, + ) + one_sample = initiate_pepdb_con.sample.get(namespace, name, sample_name) + assert one_sample.organism == "butterfly" + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_update_sample_name(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.sample.update( + namespace=namespace, + name=name, + tag="default", + sample_name=sample_name, + update_dict={"sample_name": "butterfly"}, + ) + one_sample = initiate_pepdb_con.sample.get(namespace, name, "butterfly") + assert one_sample.sample_name == "butterfly" + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace2", "custom_index", "frog_1"], + ], + ) + def test_update_custom_sample_id(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.sample.update( + namespace=namespace, + name=name, + tag="default", + sample_name=sample_name, + update_dict={"sample_id": "butterfly"}, + ) + one_sample = initiate_pepdb_con.sample.get(namespace, name, "butterfly") + assert one_sample.sample_id == "butterfly" + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_add_new_attributes(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.sample.update( + namespace=namespace, + name=name, + tag="default", + sample_name=sample_name, + update_dict={"new_attr": "butterfly"}, + ) + prj = initiate_pepdb_con.project.get(namespace, name) + + assert prj.get_sample(sample_name).new_attr == "butterfly" + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_project_timestamp_was_changed(self, initiate_pepdb_con, namespace, name, sample_name): + annotation1 = initiate_pepdb_con.annotation.get(namespace, name, "default") + import time + + time.sleep(0.2) + initiate_pepdb_con.sample.update( + namespace=namespace, + name=name, + tag="default", + sample_name=sample_name, + update_dict={"new_attr": "butterfly"}, + ) + annotation2 = initiate_pepdb_con.annotation.get(namespace, name, "default") + + assert annotation1.results[0].last_update_date != annotation2.results[0].last_update_date + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_delete_sample(self, initiate_pepdb_con, namespace, name, sample_name): + one_sample = initiate_pepdb_con.sample.get(namespace, name, sample_name) + assert isinstance(one_sample, peppy.Sample) + + initiate_pepdb_con.sample.delete(namespace, name, tag="default", sample_name=sample_name) + + with pytest.raises(SampleNotFoundError): + initiate_pepdb_con.sample.get(namespace, name, tag="default", sample_name=sample_name) + + @pytest.mark.parametrize( + "namespace, name, tag, sample_dict", + [ + [ + "namespace1", + "amendments1", + "default", + { + "sample_name": "new_sample", + "time": "new_time", + }, + ], + ], + ) + def test_add_sample(self, initiate_pepdb_con, namespace, name, tag, sample_dict): + prj = initiate_pepdb_con.project.get(namespace, name) + initiate_pepdb_con.sample.add(namespace, name, tag, sample_dict) + + prj2 = initiate_pepdb_con.project.get(namespace, name) + + assert len(prj.samples) + 1 == len(prj2.samples) + assert prj2.samples[-1].sample_name == sample_dict["sample_name"] + + @pytest.mark.parametrize( + "namespace, name, tag, sample_dict", + [ + [ + "namespace1", + "amendments1", + "default", + { + "sample_name": "pig_0h", + "time": "new_time", + }, + ], + ], + ) + def test_overwrite_sample(self, initiate_pepdb_con, namespace, name, tag, sample_dict): + assert initiate_pepdb_con.project.get(namespace, name).get_sample("pig_0h").time == "0" + initiate_pepdb_con.sample.add(namespace, name, tag, sample_dict, overwrite=True) + + assert ( + initiate_pepdb_con.project.get(namespace, name).get_sample("pig_0h").time == "new_time" + ) + + @pytest.mark.parametrize( + "namespace, name, tag, sample_dict", + [ + [ + "namespace1", + "amendments1", + "default", + { + "sample_name": "new_sample", + "time": "new_time", + }, + ], + ], + ) + def test_delete_and_add(self, initiate_pepdb_con, namespace, name, tag, sample_dict): + prj = initiate_pepdb_con.project.get(namespace, name) + sample_dict = initiate_pepdb_con.sample.get(namespace, name, "pig_0h", raw=True) + initiate_pepdb_con.sample.delete(namespace, name, tag, "pig_0h") + initiate_pepdb_con.sample.add(namespace, name, tag, sample_dict) + prj2 = initiate_pepdb_con.project.get(namespace, name) + assert prj.get_sample("pig_0h").to_dict() == prj2.get_sample("pig_0h").to_dict() + + +@pytest.mark.skipif( + not db_setup(), + reason="DB is not setup", +) +class TestViews: + """ + Test function within view class + """ + + @pytest.mark.parametrize( + "namespace, name, sample_name, view_name", + [ + ["namespace1", "amendments1", "pig_0h", "view1"], + ], + ) + def test_create_view(self, initiate_pepdb_con, namespace, name, sample_name, view_name): + initiate_pepdb_con.view.create( + view_name, + { + "project_namespace": namespace, + "project_name": name, + "project_tag": "default", + "sample_list": [sample_name, "pig_1h"], + }, + ) + + project = initiate_pepdb_con.project.get(namespace, name) + view_project = initiate_pepdb_con.view.get(namespace, name, "default", view_name) + assert len(view_project.samples) == 2 + assert view_project != project + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_delete_view(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": namespace, + "project_name": name, + "project_tag": "default", + "sample_list": [sample_name, "pig_1h"], + }, + ) + assert len(initiate_pepdb_con.view.get(namespace, name, "default", "view1").samples) == 2 + initiate_pepdb_con.view.delete(namespace, name, "default", "view1") + with pytest.raises(ViewNotFoundError): + initiate_pepdb_con.view.get(namespace, name, "default", "view1") + assert len(initiate_pepdb_con.project.get(namespace, name).samples) == 4 + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_add_sample_to_view(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": namespace, + "project_name": name, + "project_tag": "default", + "sample_list": [sample_name], + }, + ) + initiate_pepdb_con.view.add_sample(namespace, name, "default", "view1", "pig_1h") + assert len(initiate_pepdb_con.view.get(namespace, name, "default", "view1").samples) == 2 + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_add_multiple_samples_to_view(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": namespace, + "project_name": name, + "project_tag": "default", + "sample_list": [sample_name], + }, + ) + initiate_pepdb_con.view.add_sample( + namespace, name, "default", "view1", ["pig_1h", "frog_0h"] + ) + assert len(initiate_pepdb_con.view.get(namespace, name, "default", "view1").samples) == 3 + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_remove_sample_from_view(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": namespace, + "project_name": name, + "project_tag": "default", + "sample_list": [sample_name, "pig_1h"], + }, + ) + initiate_pepdb_con.view.remove_sample(namespace, name, "default", "view1", sample_name) + assert len(initiate_pepdb_con.view.get(namespace, name, "default", "view1").samples) == 1 + assert len(initiate_pepdb_con.project.get(namespace, name).samples) == 4 + + @pytest.mark.parametrize( + "namespace, name, sample_name", + [ + ["namespace1", "amendments1", "pig_0h"], + ], + ) + def test_add_existing_sample_in_view(self, initiate_pepdb_con, namespace, name, sample_name): + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": namespace, + "project_name": name, + "project_tag": "default", + "sample_list": [sample_name, "pig_1h"], + }, + ) + with pytest.raises(SampleAlreadyInView): + initiate_pepdb_con.view.add_sample(namespace, name, "default", "view1", sample_name) + + @pytest.mark.parametrize( + "namespace, name, sample_name, view_name", + [ + ["namespace1", "amendments1", "pig_0h", "view1"], + ], + ) + def test_get_snap_view(self, initiate_pepdb_con, namespace, name, sample_name, view_name): + snap_project = initiate_pepdb_con.view.get_snap_view( + namespace=namespace, + name=name, + tag="default", + sample_name_list=[sample_name, "pig_1h"], + ) + + assert len(snap_project.samples) == 2 + + @pytest.mark.parametrize( + "namespace, name, sample_name, view_name", + [ + ["namespace1", "amendments1", "pig_0h", "view1"], + ], + ) + def test_get_view_list_from_project( + self, initiate_pepdb_con, namespace, name, sample_name, view_name + ): + assert ( + len(initiate_pepdb_con.view.get_views_annotation(namespace, name, "default").views) + == 0 + ) + initiate_pepdb_con.view.create( + "view1", + { + "project_namespace": namespace, + "project_name": name, + "project_tag": "default", + "sample_list": [sample_name, "pig_1h"], + }, + ) + assert ( + len(initiate_pepdb_con.view.get_views_annotation(namespace, name, "default").views) + == 1 + )