From 9550289b6c92b1f500a4489d6f711089bc6dcb61 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 23 Jul 2024 15:28:52 -0400 Subject: [PATCH] tests + bug fix --- pepdbagent/db_utils.py | 5 +- pepdbagent/models.py | 7 +- pepdbagent/modules/schema.py | 77 ++++++--- tests/schemas/namespace1/2.0.0.yaml | 69 ++++++++ tests/schemas/namespace1/2.1.0.yaml | 77 +++++++++ tests/schemas/namespace2/bedboss.yaml | 47 +++++ tests/schemas/namespace2/bedbuncher.yaml | 25 +++ tests/schemas/namespace2/bedmaker.yaml | 59 +++++++ tests/test_schema.py | 211 ++++++++++++++++++++++- tests/utils.py | 52 +++++- 10 files changed, 594 insertions(+), 35 deletions(-) create mode 100644 tests/schemas/namespace1/2.0.0.yaml create mode 100644 tests/schemas/namespace1/2.1.0.yaml create mode 100644 tests/schemas/namespace2/bedboss.yaml create mode 100644 tests/schemas/namespace2/bedbuncher.yaml create mode 100644 tests/schemas/namespace2/bedmaker.yaml diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 251a551..de1a9f3 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -129,7 +129,7 @@ class Projects(Base): history_mapping: Mapped[List["HistoryProjects"]] = relationship( back_populates="project_mapping", cascade="all, delete-orphan" - ) # TODO: check if cascade is correct + ) __table_args__ = (UniqueConstraint("namespace", "name", "tag"),) @@ -317,6 +317,9 @@ class Schemas(Base): default=deliver_update_date, onupdate=deliver_update_date ) + projects_mappings: Mapped[List["Projects"]] = relationship( + "Projects", back_populates="schema_mapping" + ) group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( "SchemaGroupRelations", back_populates="schema_mapping" ) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index d0281cd..6270f4e 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -255,9 +255,10 @@ class SchemaAnnotation(BaseModel): namespace: str name: str - last_update_date: Optional[datetime.datetime] - submission_date: Optional[datetime.datetime] - description: Optional[str] + last_update_date: str + submission_date: str + description: Optional[str] = "" + popularity_number: Optional[int] = 0 class SchemaSearchResult(BaseModel): diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 09045e8..4091891 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -30,6 +30,7 @@ Schemas, SchemaGroups, SchemaGroupRelations, + User, ) from pepdbagent.exceptions import ( SchemaAlreadyExistsError, @@ -110,9 +111,10 @@ def info(self, namespace: str, name: str) -> SchemaAnnotation: return SchemaAnnotation( namespace=schema_obj.namespace, name=schema_obj.name, - last_update_date=schema_obj.last_update_date, - submission_date=schema_obj.submission_date, + last_update_date=str(schema_obj.last_update_date), + submission_date=str(schema_obj.submission_date), description=schema_obj.description, + popularity_number=len(schema_obj.projects_mappings), ) def search( @@ -143,9 +145,10 @@ def search( SchemaAnnotation( namespace=result.namespace, name=result.name, - last_update_date=result.last_update_date, - submission_date=result.submission_date, + last_update_date=str(result.last_update_date), + submission_date=str(result.submission_date), description=result.description, + # popularity_number=sum(result.projects_mappings), ) ) @@ -229,6 +232,13 @@ def create( ) with Session(self._sa_engine) as session: + user = session.scalar(select(User).where(User.namespace == namespace)) + + if not user: + user = User(namespace=namespace) + session.add(user) + session.commit() + schema_obj = Schemas( namespace=namespace, name=name, @@ -366,8 +376,8 @@ def group_get(self, namespace: str, name: str) -> SchemaGroupAnnotation: SchemaAnnotation( namespace=schema_annotation.namespace, name=schema_annotation.name, - last_update_date=schema_annotation.last_update_date, - submission_date=schema_annotation.submission_date, + last_update_date=str(schema_annotation.last_update_date), + submission_date=str(schema_annotation.submission_date), desciription=schema_annotation.description, ) ) @@ -458,7 +468,7 @@ def _group_search_count(self, namespace: str = None, search_str: str = ""): """ statement = select(func.count(SchemaGroups.id)) - statement = self._add_condition(statement, namespace, search_str) + statement = self._add_group_condition(statement, namespace, search_str) with Session(self._sa_engine) as session: result = session.execute(statement).one() @@ -475,7 +485,7 @@ def group_delete(self, namespace: str, name: str) -> None: :return: None """ - if self.group_exist(namespace, name): + if not self.group_exist(namespace, name): raise SchemaGroupDoesNotExistError( f"Schema group '{name}' does not exist in the database" ) @@ -559,30 +569,43 @@ def group_remove_schema( try: with Session(self._sa_engine) as session: - session.execute( - delete(SchemaGroupRelations).where( - and_( - SchemaGroupRelations.schema_id - == select(Schemas.id) - .where( - and_( - Schemas.namespace == schema_namespace, - Schemas.name == schema_name, - ) + + a = session.scalar( + select(Schemas).where( + and_(Schemas.namespace == schema_namespace, Schemas.name == schema_name) + ) + ) + b = session.scalar( + select(SchemaGroups).where( + and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + ) + ) + + delete_statement = delete(SchemaGroupRelations).where( + and_( + SchemaGroupRelations.schema_id + == select(Schemas.id) + .where( + and_( + Schemas.namespace == schema_namespace, + Schemas.name == schema_name, ) - .subquery(), - SchemaGroupRelations.group_id - == select(SchemaGroups.id) - .where( - and_( - SchemaGroups.namespace == namespace, - SchemaGroups.name == name, - ) + ) + .subquery(), + SchemaGroupRelations.group_id + == select(SchemaGroups.id) + .where( + and_( + SchemaGroups.namespace == namespace, + SchemaGroups.name == name, ) - .subquery(), ) + .subquery(), ) ) + + session.execute(delete_statement) + session.commit() except IntegrityError: raise SchemaIsNotInGroupError("Schema not found in the group") diff --git a/tests/schemas/namespace1/2.0.0.yaml b/tests/schemas/namespace1/2.0.0.yaml new file mode 100644 index 0000000..56f2034 --- /dev/null +++ b/tests/schemas/namespace1/2.0.0.yaml @@ -0,0 +1,69 @@ +description: "Schema for a minimal PEP" +version: "2.0.0" +properties: + config: + properties: + name: + type: string + pattern: "^\\S*$" + description: "Project name with no whitespace" + pep_version: + description: "Version of the PEP Schema this PEP follows" + type: string + sample_table: + type: string + description: "Path to the sample annotation table with one row per sample" + subsample_table: + type: string + description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table" + sample_modifiers: + type: object + properties: + append: + type: object + duplicate: + type: object + imply: + type: array + items: + type: object + properties: + if: + type: object + then: + type: object + derive: + type: object + properties: + attributes: + type: array + items: + type: string + sources: + type: object + project_modifiers: + type: object + properties: + amend: + description: "Object overwriting original project attributes" + type: object + import: + description: "List of external PEP project config files to import" + type: array + items: + type: string + required: + - pep_version + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + pattern: "^\\S*$" + description: "Unique name of the sample with no whitespace" + required: + - sample_name +required: + - samples diff --git a/tests/schemas/namespace1/2.1.0.yaml b/tests/schemas/namespace1/2.1.0.yaml new file mode 100644 index 0000000..e3982aa --- /dev/null +++ b/tests/schemas/namespace1/2.1.0.yaml @@ -0,0 +1,77 @@ +description: "Schema for a minimal PEP" +version: "2.1.0" +properties: + config: + properties: + name: + type: string + pattern: "^\\S*$" + description: "Project name with no whitespace" + pep_version: + description: "Version of the PEP Schema this PEP follows" + type: string + sample_table: + type: string + description: "Path to the sample annotation table" + subsample_table: + type: string + description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table" + sample_table_index: + type: string + pattern: "^\\S*$" + description: "Name of the column in sample table to use as an index. It's 'sample_name' by default" + subsample_table_index: + type: array + items: + type: string + pattern: "^\\S*$" + description: "Names of the columns in subsample table to use as an index. It's ['sample_name', 'subsample_name'] by default" + sample_modifiers: + type: object + properties: + append: + type: object + duplicate: + type: object + imply: + type: array + items: + type: object + properties: + if: + type: object + then: + type: object + derive: + type: object + properties: + attributes: + type: array + items: + type: string + sources: + type: object + project_modifiers: + type: object + properties: + amend: + description: "Object overwriting original project attributes" + type: object + import: + description: "List of external PEP project config files to import" + type: array + items: + type: string + required: + - pep_version + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + pattern: "^\\S*$" + description: "Unique name of the sample with no whitespace" +required: + - samples diff --git a/tests/schemas/namespace2/bedboss.yaml b/tests/schemas/namespace2/bedboss.yaml new file mode 100644 index 0000000..e6ffa0c --- /dev/null +++ b/tests/schemas/namespace2/bedboss.yaml @@ -0,0 +1,47 @@ +description: bedboss run-all pep schema + +properties: + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + description: "Name of the sample" + input_file: + type: string + description: "Absolute path to the input file" + input_type: + type: string + description: "file format" + enum: [ "bigWig", "bigBed", "bed", "wig", "bedGraph" ] + genome: + type: string + description: "organism genome code" + format_type: + type: string + description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)" + enum: [ "narrowPeak", "broadPeak" ] + description: + type: string + description: "freeform description of the sample" + open_signal_matrix: + type: string + description: "A full path to the openSignalMatrix required for the tissue" + chrom_sizes: + type: string + description: "A full path to the chrom.sizes required for the bedtobigbed conversion" + treatment: + type: string + description: "freeform description of the sample treatment" + cell_type: + type: string + description: "cell type code" + required: + - sample_name + - input_file + - input_type + - genome +required: + - samples diff --git a/tests/schemas/namespace2/bedbuncher.yaml b/tests/schemas/namespace2/bedbuncher.yaml new file mode 100644 index 0000000..cd42998 --- /dev/null +++ b/tests/schemas/namespace2/bedbuncher.yaml @@ -0,0 +1,25 @@ +description: bedbuncher PEP schema +imports: + - http://schema.databio.org/pep/2.0.0.yaml + +properties: + samples: + type: array + items: + type: object + properties: + JSONquery_path: + type: string + description: "path to the JSON file with the Elasticsearch query" + bedset_name: + type: string + pattern: "^\\S*$" + description: "name of the bedset that will be created" + bbconfig_path: + type: string + description: "path to bedbase config file" + required: + - JSONquery_path + - bedset_name +required: + - samples \ No newline at end of file diff --git a/tests/schemas/namespace2/bedmaker.yaml b/tests/schemas/namespace2/bedmaker.yaml new file mode 100644 index 0000000..93806d0 --- /dev/null +++ b/tests/schemas/namespace2/bedmaker.yaml @@ -0,0 +1,59 @@ +description: bedmaker PEP schema + +properties: + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + description: "name of the sample, which is the name of the output BED file" + input_file_path: + type: string + description: "absolute path the file to convert" + output_bed_path: + type: string + description: "absolute path the file to the output BED file (derived attribute)" + output_bigbed_path: + type: string + description: "absolute path the file to the output bigBed file (derived attribute)" + genome: + type: string + description: "organism genome code" + narrowpeak: + type: boolean + description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)" + format: + type: string + description: "file format" + enum: ["bigWig", "bigBed", "bed", "wig", "bedGraph"] + cell_type: + type: string + description: "cell type code" + antibody: + type: string + description: "antibody used if ChIP-seq experiment" + description: + type: string + description: "freeform description of the sample" + exp_protocol: + type: string + description: "type of the experiment the file was generated in" + data_source: + type: string + description: "source of the sample, preferably a GSE* code" + treatment: + type: string + description: "freeform description of the sample treatment" + required_files: + - input_file_path + required: + - input_file_path + - output_bed_path + - output_bigbed_path + - genome + - narrowpeak + - sample_name +required: + - samples diff --git a/tests/test_schema.py b/tests/test_schema.py index bc86c34..f09625c 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -7,4 +7,213 @@ not PEPDBAgentContextManager().db_setup(), reason="DB is not setup", ) -class TestSamples: ... +class TestSamples: + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_get(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema = agent.schema.get(namespace=namespace, name=name) + assert agent.schema.exist(namespace=namespace, name=name) + assert schema + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_delete(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + assert agent.schema.exist(namespace=namespace, name=name) + agent.schema.delete(namespace=namespace, name=name) + assert not agent.schema.exist(namespace=namespace, name=name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_update(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema = agent.schema.get(namespace=namespace, name=name) + schema["new"] = "hello" + agent.schema.update(namespace=namespace, name=name, schema=schema) + assert agent.schema.exist(namespace=namespace, name=name) + assert schema == agent.schema.get(namespace=namespace, name=name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_get_annotation(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema_annot = agent.schema.info(namespace=namespace, name=name) + assert schema_annot + assert schema_annot.model_fields_set == { + "namespace", + "name", + "last_update_date", + "submission_date", + "description", + "popularity_number", + } + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_update_annotation(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema_annot = agent.schema.info(namespace=namespace, name=name) + schema = agent.schema.get(namespace=namespace, name=name) + agent.schema.update( + namespace=namespace, name=name, schema=schema, description="new desc" + ) + assert schema_annot != agent.schema.info(namespace=namespace, name=name) + + @pytest.mark.skip("") + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_annotation_popular(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + ... + # TODO: implement this feature + + def test_search(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2") + assert results + assert results.count == 3 + assert len(results.results) == 3 + + def test_search_offset(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", offset=1) + assert results + assert results.count == 3 + assert len(results.results) == 2 + + def test_search_limit(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", limit=1) + assert results + assert results.count == 3 + assert len(results.results) == 1 + + def test_search_limit_offset(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", limit=2, offset=2) + assert results + assert results.count == 3 + assert len(results.results) == 1 + + def test_search_query(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + results = agent.schema.search(namespace="namespace2", search_str="bedb") + assert results + assert results.count == 2 + assert len(results.results) == 2 + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_create_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + assert agent.schema.group_exist(namespace=namespace, name=group_name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_delete_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + assert agent.schema.group_exist(namespace=namespace, name=group_name) + agent.schema.group_delete(namespace=namespace, name=group_name) + assert not agent.schema.group_exist(namespace=namespace, name=group_name) + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_add_to_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + agent.schema.group_add_schema( + namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace + ) + group_annot = agent.schema.group_get(namespace=namespace, name=group_name) + assert group_annot.schemas[0].name == name + + @pytest.mark.parametrize( + "namespace, name", + [ + ["namespace1", "2.0.0"], + ], + ) + def test_remove_from_group(self, namespace, name): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name = "new_group" + agent.schema.group_create( + namespace=namespace, name=group_name, description="new group" + ) + agent.schema.group_add_schema( + namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace + ) + group_annot = agent.schema.group_get(namespace=namespace, name=group_name) + assert len(group_annot.schemas) == 1 + + agent.schema.group_remove_schema( + namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace + ) + group_annot = agent.schema.group_get(namespace=namespace, name=group_name) + assert len(group_annot.schemas) == 0 + + def test_search_group(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + group_name1 = "new_group1" + group_name2 = "new2" + group_name3 = "new_group3" + agent.schema.group_create( + namespace="namespace1", name=group_name1, description="new group" + ) + agent.schema.group_create(namespace="namespace1", name=group_name2, description="new") + agent.schema.group_create( + namespace="namespace1", name=group_name3, description="new group" + ) + + results = agent.schema.group_search(search_str="new_group") + + assert results.count == 2 + assert len(results.results) == 2 diff --git a/tests/utils.py b/tests/utils.py index 8ddc820..b96192a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ import os import warnings +import yaml import peppy from sqlalchemy.exc import OperationalError @@ -8,12 +9,21 @@ DSN = "postgresql+psycopg://postgres:pass8743hf9h23f87h437@localhost:5432/pep-db" -DATA_PATH = os.path.join( +TESTS_PATH = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "tests", +) + +DATA_PATH = os.path.join( + TESTS_PATH, "data", ) +SCHEMAS_PATH = os.path.join( + TESTS_PATH, + "schemas", +) + def get_path_to_example_file(namespace: str, project_name: str) -> str: """ @@ -22,6 +32,13 @@ def get_path_to_example_file(namespace: str, project_name: str) -> str: return os.path.join(DATA_PATH, namespace, project_name, "project_config.yaml") +def get_path_to_example_schema(namespace: str, schema_name: str) -> str: + """ + Get path to example schema + """ + return os.path.join(SCHEMAS_PATH, namespace, schema_name) + + def list_of_available_peps() -> dict: pep_namespaces = os.listdir(DATA_PATH) projects = {} @@ -31,12 +48,29 @@ def list_of_available_peps() -> dict: return projects +def list_of_available_schemas() -> dict: + schema_namespaces = os.listdir(SCHEMAS_PATH) + schemas = {} + for np in schema_namespaces: + schema_name = os.listdir(os.path.join(SCHEMAS_PATH, np)) + schemas[np] = {p: get_path_to_example_schema(np, p) for p in schema_name} + return schemas + + +def read_yaml_file(file_path: str) -> dict: + """ + Read yaml file + """ + with open(file_path, "r") as file: + return yaml.safe_load(file) + + class PEPDBAgentContextManager: """ Class with context manager to connect to database. Adds data and drops everything from the database upon exit to ensure. """ - def __init__(self, url: str = DSN, add_data: bool = False): + def __init__(self, url: str = DSN, add_data: bool = False, add_schemas=False, echo=False): """ :param url: database url e.g. "postgresql+psycopg://postgres:docker@localhost:5432/pep-db" :param add_data: add data to the database @@ -44,7 +78,9 @@ def __init__(self, url: str = DSN, add_data: bool = False): self.url = url self._agent = None + self._echo = echo self.add_data = add_data + self.add_schemas = add_schemas def __enter__(self): self._agent = PEPDatabaseAgent(dsn=self.url, echo=False) @@ -52,13 +88,15 @@ def __enter__(self): self.db_engine.create_schema() if self.add_data: self._insert_data() + if self.add_schemas: + self._add_schemas() return self._agent def __exit__(self, exc_type, exc_value, exc_traceback): self.db_engine.delete_schema() def _insert_data(self): - pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=True) + pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=self._echo) for namespace, item in list_of_available_peps().items(): if namespace == "private_test": private = True @@ -76,6 +114,14 @@ def _insert_data(self): pep_schema="random_schema_name", ) + def _add_schemas(self): + pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=self._echo) + for namespace, item in list_of_available_schemas().items(): + for name, path in item.items(): + file_dict = read_yaml_file(path) + + pepdb_con.schema.create(namespace=namespace, name=name[0:-5], schema=file_dict) + @property def agent(self) -> PEPDatabaseAgent: return self._agent