Skip to content

Commit

Permalink
Merge pull request #137 from pepkit/dev
Browse files Browse the repository at this point in the history
Release 0.9.0
  • Loading branch information
nleroy917 authored Jul 2, 2024
2 parents d4431da + 5a37bf8 commit f28d401
Show file tree
Hide file tree
Showing 26 changed files with 2,526 additions and 1,869 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cli-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
image: postgres
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: docker
POSTGRES_PASSWORD: pass8743hf9h23f87h437
POSTGRES_DB: pep-db
POSTGRES_HOST: localhost
ports:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
image: postgres
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: docker
POSTGRES_PASSWORD: pass8743hf9h23f87h437
POSTGRES_DB: pep-db
POSTGRES_HOST: localhost
ports:
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.9.0] -- 2024-06-25
- Introduced new sample ordering with linked list [#133](https://github.com/pepkit/pepdbagent/issues/133)
- Efficiency improvements of project update function
- Test restructuring


## [0.8.0] -- 2024-02-26
- Fixed forking schema
- Improved forking efficiency [#129](https://github.com/pepkit/pepdbagent/issues/129)
Expand Down
2 changes: 1 addition & 1 deletion pepdbagent/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.0"
__version__ = "0.9.0"
2 changes: 2 additions & 0 deletions pepdbagent/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@

SUBMISSION_DATE_KEY = "submission_date"
LAST_UPDATE_DATE_KEY = "last_update_date"

PEPHUB_SAMPLE_ID_KEY = "ph_id"
71 changes: 52 additions & 19 deletions pepdbagent/db_utils.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,26 @@
import datetime
import logging
from typing import Optional, List
from typing import List, Optional

from sqlalchemy import (
TIMESTAMP,
BigInteger,
FetchedValue,
ForeignKey,
Result,
Select,
String,
UniqueConstraint,
event,
select,
TIMESTAMP,
ForeignKey,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import JSON
from sqlalchemy.engine import URL, create_engine
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.orm import (
DeclarativeBase,
Mapped,
Session,
mapped_column,
relationship,
)
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship

from pepdbagent.const import POSTGRES_DIALECT, PKG_NAME
from pepdbagent.const import PKG_NAME, POSTGRES_DIALECT
from pepdbagent.exceptions import SchemaError

_LOGGER = logging.getLogger(PKG_NAME)
Expand Down Expand Up @@ -77,7 +71,7 @@ class Projects(Base):
__tablename__ = "projects"

id: Mapped[int] = mapped_column(primary_key=True)
namespace: Mapped[str] = mapped_column()
namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE"))
name: Mapped[str] = mapped_column()
tag: Mapped[str] = mapped_column()
digest: Mapped[str] = mapped_column(String(32))
Expand All @@ -93,7 +87,7 @@ class Projects(Base):
pep_schema: Mapped[Optional[str]]
pop: Mapped[Optional[bool]] = mapped_column(default=False)
samples_mapping: Mapped[List["Samples"]] = relationship(
back_populates="sample_mapping", cascade="all, delete-orphan"
back_populates="project_mapping", cascade="all, delete-orphan"
)
subsamples_mapping: Mapped[List["Subsamples"]] = relationship(
back_populates="subsample_mapping", cascade="all, delete-orphan"
Expand All @@ -114,13 +108,17 @@ class Projects(Base):
back_populates="forked_to_mapping",
remote_side=[id],
single_parent=True,
cascade="all",
cascade="save-update, merge, refresh-expire",
)

forked_to_mapping = relationship(
"Projects", back_populates="forked_from_mapping", cascade="all"
"Projects",
back_populates="forked_from_mapping",
cascade="save-update, merge, refresh-expire",
)

namespace_mapping: Mapped["User"] = relationship("User", back_populates="projects_mapping")

__table_args__ = (UniqueConstraint("namespace", "name", "tag"),)


Expand All @@ -133,10 +131,28 @@ class Samples(Base):

id: Mapped[int] = mapped_column(primary_key=True)
sample: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue())
row_number: Mapped[int]
row_number: Mapped[int] # TODO: should be removed
project_id = mapped_column(ForeignKey("projects.id", ondelete="CASCADE"))
project_mapping: Mapped["Projects"] = relationship(back_populates="samples_mapping")
sample_name: Mapped[Optional[str]] = mapped_column()
sample_mapping: Mapped["Projects"] = relationship(back_populates="samples_mapping")
guid: Mapped[Optional[str]] = mapped_column(nullable=False, unique=True)

submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date)
last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column(
default=deliver_update_date,
onupdate=deliver_update_date,
)

parent_guid: Mapped[Optional[str]] = mapped_column(
ForeignKey("samples.guid", ondelete="CASCADE"),
nullable=True,
doc="Parent sample id. Used to create a hierarchy of samples.",
)

parent_mapping: Mapped["Samples"] = relationship(
"Samples", remote_side=guid, back_populates="child_mapping"
)
child_mapping: Mapped["Samples"] = relationship("Samples", back_populates="parent_mapping")

views: Mapped[Optional[List["ViewSampleAssociation"]]] = relationship(
back_populates="sample", cascade="all, delete-orphan"
Expand Down Expand Up @@ -166,12 +182,17 @@ class User(Base):
__tablename__ = "users"

id: Mapped[int] = mapped_column(primary_key=True)
namespace: Mapped[str]
namespace: Mapped[str] = mapped_column(nullable=False, unique=True)
stars_mapping: Mapped[List["Stars"]] = relationship(
back_populates="user_mapping",
cascade="all, delete-orphan",
order_by="Stars.star_date.desc()",
)
number_of_projects: Mapped[int] = mapped_column(default=0)

projects_mapping: Mapped[List["Projects"]] = relationship(
"Projects", back_populates="namespace_mapping"
)


class Stars(Base):
Expand Down Expand Up @@ -318,3 +339,15 @@ def check_db_connection(self):
self.session_execute(select(Projects).limit(1))
except ProgrammingError:
raise SchemaError()

def delete_schema(self, engine=None) -> None:
"""
Delete sql schema in the database.
:param engine: sqlalchemy engine [Default: None]
:return: None
"""
if not engine:
engine = self._engine
Base.metadata.drop_all(engine)
return None
10 changes: 10 additions & 0 deletions pepdbagent/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ def __init__(self, msg=""):
super().__init__(f"""Sample does not exist. {msg}""")


class SampleTableUpdateError(PEPDatabaseAgentError):
def __init__(self, msg=""):
super().__init__(f"""Sample table update error. {msg}""")


class ProjectDuplicatedSampleGUIDsError(SampleTableUpdateError):
def __init__(self, msg=""):
super().__init__(f"""Project has duplicated sample GUIDs. {msg}""")


class SampleAlreadyExistsError(PEPDatabaseAgentError):
def __init__(self, msg=""):
super().__init__(f"""Sample already exists. {msg}""")
Expand Down
7 changes: 4 additions & 3 deletions pepdbagent/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# file with pydantic models
from typing import List, Optional, Union, Dict
from pydantic import BaseModel, Field, ConfigDict, field_validator
from peppy.const import CONFIG_KEY, SUBSAMPLE_RAW_LIST_KEY, SAMPLE_RAW_DICT_KEY
from typing import Dict, List, Optional, Union

from peppy.const import CONFIG_KEY, SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_LIST_KEY
from pydantic import BaseModel, ConfigDict, Field, field_validator

from pepdbagent.const import DEFAULT_TAG

Expand Down
4 changes: 2 additions & 2 deletions pepdbagent/modules/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
from typing import List, Literal, Optional, Union

from sqlalchemy import and_, func, or_, select
from sqlalchemy.sql.selectable import Select
from sqlalchemy.orm import Session
from sqlalchemy.sql.selectable import Select

from pepdbagent.const import (
DEFAULT_LIMIT,
DEFAULT_OFFSET,
DEFAULT_TAG,
LAST_UPDATE_DATE_KEY,
PKG_NAME,
SUBMISSION_DATE_KEY,
LAST_UPDATE_DATE_KEY,
)
from pepdbagent.db_utils import BaseEngine, Projects
from pepdbagent.exceptions import FilterError, ProjectNotFoundError, RegistryPathError
Expand Down
99 changes: 66 additions & 33 deletions pepdbagent/modules/namespace.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import logging
from typing import List, Union, Tuple
from collections import Counter
from datetime import datetime, timedelta
from typing import List, Tuple, Union

from sqlalchemy import distinct, func, or_, select, text
from sqlalchemy.sql.selectable import Select
from sqlalchemy import distinct, func, or_, select
from sqlalchemy.orm import Session
from sqlalchemy.sql.selectable import Select

from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_OFFSET, PKG_NAME, DEFAULT_LIMIT_INFO
from pepdbagent.const import DEFAULT_LIMIT, DEFAULT_LIMIT_INFO, DEFAULT_OFFSET, PKG_NAME
from pepdbagent.db_utils import BaseEngine, Projects, User
from pepdbagent.exceptions import NamespaceNotFoundError
from pepdbagent.db_utils import Projects, BaseEngine
from pepdbagent.models import (
ListOfNamespaceInfo,
Namespace,
NamespaceList,
NamespaceInfo,
ListOfNamespaceInfo,
NamespaceList,
NamespaceStats,
)
from pepdbagent.utils import tuple_converter
Expand Down Expand Up @@ -172,9 +172,54 @@ def _add_condition(
)
return statement

# old function, that counts namespace info based on Projects table
# def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
# """
# Get list of top n namespaces in the database
#
# :param limit: limit of results (top namespace )
# :return: number_of_namespaces: int
# limit: int
# results: { namespace: str
# number_of_projects: int
# }
# """
# total_number_of_namespaces = self._count_namespace()
#
# statement = (
# select(
# func.count(Projects.namespace).label("number_of_projects"),
# Projects.namespace,
# )
# .select_from(Projects)
# .where(Projects.private.is_(False))
# .limit(limit)
# .order_by(text("number_of_projects desc"))
# .group_by(Projects.namespace)
# )
#
# with Session(self._sa_engine) as session:
# query_results = session.execute(statement).all()
#
# list_of_results = []
# for result in query_results:
# list_of_results.append(
# NamespaceInfo(
# namespace=result.namespace,
# number_of_projects=result.number_of_projects,
# )
# )
# return ListOfNamespaceInfo(
# number_of_namespaces=total_number_of_namespaces,
# limit=limit,
# results=list_of_results,
# )

def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
"""
Get list of top n namespaces in the database
! Warning: this function counts number of all projects in namespaces.
! it does not filter private projects (It was done for efficiency reasons)
:param limit: limit of results (top namespace )
:return: number_of_namespaces: int
Expand All @@ -183,36 +228,24 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo:
number_of_projects: int
}
"""
total_number_of_namespaces = self._count_namespace()

statement = (
select(
func.count(Projects.namespace).label("number_of_projects"),
Projects.namespace,
)
.select_from(Projects)
.where(Projects.private.is_(False))
.limit(limit)
.order_by(text("number_of_projects desc"))
.group_by(Projects.namespace)
)

with Session(self._sa_engine) as session:
query_results = session.execute(statement).all()
results = session.scalars(
select(User).limit(limit).order_by(User.number_of_projects.desc())
)

list_of_results = []
for result in query_results:
list_of_results.append(
NamespaceInfo(
namespace=result.namespace,
number_of_projects=result.number_of_projects,
list_of_results = []
for result in results:
list_of_results.append(
NamespaceInfo(
namespace=result.namespace,
number_of_projects=result.number_of_projects,
)
)
return ListOfNamespaceInfo(
number_of_namespaces=len(list_of_results),
limit=limit,
results=list_of_results,
)
return ListOfNamespaceInfo(
number_of_namespaces=total_number_of_namespaces,
limit=limit,
results=list_of_results,
)

def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats:
"""
Expand Down
Loading

0 comments on commit f28d401

Please sign in to comment.