Skip to content

Commit

Permalink
connected projects to schemas
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Jul 23, 2024
1 parent 9550289 commit 0e1dc5f
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 16 deletions.
1 change: 1 addition & 0 deletions pepdbagent/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class UpdateItems(BaseModel):
samples: Optional[List[dict]] = None
subsamples: Optional[List[List[dict]]] = None
pop: Optional[bool] = None
schema_id: Optional[int] = None

model_config = ConfigDict(
arbitrary_types_allowed=True,
Expand Down
22 changes: 17 additions & 5 deletions pepdbagent/modules/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,11 @@ def _get_single_annotation(
submission_date=str(query_result.submission_date),
last_update_date=str(query_result.last_update_date),
digest=query_result.digest,
pep_schema=query_result.pep_schema,
pep_schema=(
f"{query_result.schema_mapping.namespace}/{query_result.schema_mapping.name}"
if query_result.schema_mapping
else None
),
pop=query_result.pop,
stars_number=query_result.number_of_stars,
forked_from=(
Expand Down Expand Up @@ -342,7 +346,11 @@ def _get_projects(
submission_date=str(result.submission_date),
last_update_date=str(result.last_update_date),
digest=result.digest,
pep_schema=result.pep_schema,
pep_schema=(
f"{result.schema_mapping.namespace}/{result.schema_mapping.name}"
if result.schema_mapping
else None
),
pop=result.pop,
stars_number=result.number_of_stars,
forked_from=(
Expand Down Expand Up @@ -538,9 +546,9 @@ def get_by_rp_list(
statement = select(Projects).where(or_(*or_statement_list))
anno_results = []
with Session(self._sa_engine) as session:
query_result = session.execute(statement).all()
query_result = session.scalars(statement)
for result in query_result:
project_obj = result[0]
project_obj = result
annot = AnnotationModel(
namespace=project_obj.namespace,
name=project_obj.name,
Expand All @@ -551,7 +559,11 @@ def get_by_rp_list(
submission_date=str(project_obj.submission_date),
last_update_date=str(project_obj.last_update_date),
digest=project_obj.digest,
pep_schema=project_obj.pep_schema,
pep_schema=(
f"{project_obj.schema_mapping.namespace}/{project_obj.schema_mapping.name}"
if project_obj.schema_mapping
else None
),
pop=project_obj.pop,
stars_number=project_obj.number_of_stars,
forked_from=(
Expand Down
68 changes: 63 additions & 5 deletions pepdbagent/modules/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
Subsamples,
UpdateTypes,
User,
Schemas,
)
from pepdbagent.exceptions import (
HistoryNotFoundError,
Expand All @@ -42,6 +43,7 @@
ProjectNotFoundError,
ProjectUniqueNameError,
SampleTableUpdateError,
SchemaDoesNotExistError,
)
from pepdbagent.models import (
HistoryAnnotationModel,
Expand All @@ -50,7 +52,13 @@
UpdateItems,
UpdateModel,
)
from pepdbagent.utils import create_digest, generate_guid, order_samples, registry_path_converter
from pepdbagent.utils import (
create_digest,
generate_guid,
order_samples,
registry_path_converter,
schema_path_converter,
)

_LOGGER = logging.getLogger(PKG_NAME)

Expand Down Expand Up @@ -314,7 +322,7 @@ def create(
:param name: name of the project (Default: name is taken from the project object)
:param tag: tag (or version) of the project.
:param is_private: boolean value if the project should be visible just for user that creates it.
:param pep_schema: assign PEP to a specific schema. [Default: None]
:param pep_schema: assign PEP to a specific schema. Example: 'namespace/name' [Default: None]
:param pop: if project is a pep of peps (POP) [Default: False]
:param overwrite: if project exists overwrite the project, otherwise upload it.
[Default: False - project won't be overwritten if it exists in db]
Expand Down Expand Up @@ -356,6 +364,24 @@ def create(
except AttributeError:
number_of_samples = len(proj_dict[SAMPLE_RAW_DICT_KEY])

if pep_schema:
schema_namespace, schema_name = schema_path_converter(pep_schema)
with Session(self._sa_engine) as session:
schema_mapping = session.scalar(
select(Schemas).where(
and_(
Schemas.namespace == schema_namespace,
Schemas.name == schema_name,
)
)
)
if not schema_mapping:
raise SchemaDoesNotExistError(
f"Schema {schema_namespace}/{schema_name} does not exist. "
f"Project won't be uploaded."
)
pep_schema = schema_mapping.id

if update_only:
_LOGGER.info(f"Update_only argument is set True. Updating project {proj_name} ...")
self._overwrite(
Expand Down Expand Up @@ -384,7 +410,8 @@ def create(
private=is_private,
submission_date=datetime.datetime.now(datetime.timezone.utc),
last_update_date=datetime.datetime.now(datetime.timezone.utc),
pep_schema=pep_schema,
# pep_schema=pep_schema,
schema_id=pep_schema,
description=description,
pop=pop,
)
Expand Down Expand Up @@ -447,7 +474,7 @@ def _overwrite(
project_digest: str,
number_of_samples: int,
private: bool = False,
pep_schema: str = None,
pep_schema: int = None,
description: str = "",
pop: bool = False,
) -> None:
Expand Down Expand Up @@ -483,7 +510,8 @@ def _overwrite(
found_prj.digest = project_digest
found_prj.number_of_samples = number_of_samples
found_prj.private = private
found_prj.pep_schema = pep_schema
# found_prj.pep_schema = pep_schema
found_prj.schema_id = pep_schema
found_prj.config = project_dict[CONFIG_KEY]
found_prj.description = description
found_prj.last_update_date = datetime.datetime.now(datetime.timezone.utc)
Expand Down Expand Up @@ -577,6 +605,8 @@ def update(
f"Pep {namespace}/{name}:{tag} was not found. No items will be updated!"
)

self._convert_update_schema_id(session, update_values)

for k, v in update_values.items():
if getattr(found_prj, k) != v:
setattr(found_prj, k, v)
Expand Down Expand Up @@ -647,6 +677,34 @@ def update(
else:
raise ProjectNotFoundError("No items will be updated!")

@staticmethod
def _convert_update_schema_id(session: Session, update_values: dict):
"""
Convert schema path to schema_id in update_values and update it in update dict
:param session: open session object
:param update_values: dict with update key->values
return None
"""
if "pep_schema" in update_values:
schema_namespace, schema_name = schema_path_converter(update_values["pep_schema"])
schema_mapping = session.scalar(
select(Schemas).where(
and_(
Schemas.namespace == schema_namespace,
Schemas.name == schema_name,
)
)
)
if not schema_mapping:
raise SchemaDoesNotExistError(
f"Schema {schema_namespace}/{schema_name} does not exist. "
f"Project won't be updated."
)
update_values["schema_id"] = schema_mapping.id

def _update_samples(
self,
project_id: int,
Expand Down
13 changes: 13 additions & 0 deletions pepdbagent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ def registry_path_converter(registry_path: str) -> Tuple[str, str, str]:
raise RegistryPathError(f"Error in: '{registry_path}'")


def schema_path_converter(schema_path: str) -> Tuple[str, str]:
"""
Convert schema path to namespace, name
:param schema_path: schema path that has structure: "namespace/name.yaml"
:return: tuple(namespace, name)
"""
if "/" in schema_path:
namespace, name = schema_path.split("/")
return namespace, name
raise RegistryPathError(f"Error in: '{schema_path}'")


def tuple_converter(value: Union[tuple, list, str, None]) -> tuple:
"""
Convert string list or tuple to tuple.
Expand Down
25 changes: 23 additions & 2 deletions tests/test_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,27 @@ def test_update_project_description(self, namespace, name, new_description):
== new_description
)

@pytest.mark.parametrize(
"namespace, name, new_schema",
[
["namespace1", "amendments1", "bedboss"],
["namespace2", "derive", "bedboss"],
],
)
def test_update_project_schema(self, namespace, name, new_schema):
with PEPDBAgentContextManager(add_data=True) as agent:
prj_annot = agent.annotation.get(namespace=namespace, name=name)
assert prj_annot.results[0].pep_schema == "namespace1/2.0.0"

agent.project.update(
namespace=namespace,
name=name,
tag="default",
update_dict={"pep_schema": "namespace2/bedboss"},
)
prj_annot = agent.annotation.get(namespace=namespace, name=name)
assert prj_annot.results[0].pep_schema == "namespace2/bedboss"

@pytest.mark.parametrize(
"namespace, name, new_description",
[
Expand Down Expand Up @@ -134,8 +155,8 @@ def test_update_whole_project(self, namespace, name):
@pytest.mark.parametrize(
"namespace, name, pep_schema",
[
["namespace1", "amendments1", "schema1"],
["namespace2", "derive", "schema3"],
["namespace1", "amendments1", "namespace2/bedmaker"],
["namespace2", "derive", "namespace2/bedbuncher"],
],
)
def test_update_pep_schema(self, namespace, name, pep_schema):
Expand Down
8 changes: 4 additions & 4 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class PEPDBAgentContextManager:
Class with context manager to connect to database. Adds data and drops everything from the database upon exit to ensure.
"""

def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=False, echo=False):
def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=True, echo=False):
"""
:param url: database url e.g. "postgresql+psycopg://postgres:docker@localhost:5432/pep-db"
:param add_data: add data to the database
Expand All @@ -86,10 +86,10 @@ def __enter__(self):
self._agent = PEPDatabaseAgent(dsn=self.url, echo=False)
self.db_engine = self._agent.pep_db_engine
self.db_engine.create_schema()
if self.add_data:
self._insert_data()
if self.add_schemas:
self._add_schemas()
if self.add_data:
self._insert_data()
return self._agent

def __exit__(self, exc_type, exc_value, exc_traceback):
Expand All @@ -111,7 +111,7 @@ def _insert_data(self):
is_private=private,
project=prj,
overwrite=True,
pep_schema="random_schema_name",
pep_schema="namespace1/2.0.0",
)

def _add_schemas(self):
Expand Down

0 comments on commit 0e1dc5f

Please sign in to comment.