Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] feat(ontologies): add ontology terms/resources models/types/instances #236

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ All Bento channels are prefixed with `bento.`.
`logging` contains helper functions for standardized Bento logging configuration
and formatting.

### `ontologies`

`ontologies` contains models, types, and helpers for working with ontology terms,
especially in the context of terms which must be eventually ingested into
[Katsu](https://github.com/bento-platform/katsu).

### `responses`

`responses` contains standardized error message-generating functions
Expand Down
13 changes: 12 additions & 1 deletion bento_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from . import auth
from . import drs
from . import events
from . import ontologies
from . import schemas
from . import search
from . import service_info
Expand All @@ -12,5 +13,15 @@

__version__ = metadata.version(__name__)
__all__ = [
"__version__", "apps", "auth", "drs", "events", "schemas", "search", "service_info", "streaming", "workflows"
"__version__",
"apps",
"auth",
"drs",
"events",
"ontologies",
"schemas",
"search",
"service_info",
"streaming",
"workflows"
]
Empty file.
120 changes: 120 additions & 0 deletions bento_lib/ontologies/common_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from .models import OntologyResource, VersionedOntologyResource

Check warning on line 1 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L1

Added line #L1 was not covered by tests

__all__ = [

Check warning on line 3 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L3

Added line #L3 was not covered by tests
# EFO
"EFO",
"EFO_3_69_0",
# MONDO
"MONDO",
"MONDO_2024_09_03",
# NCBITaxon
"NCBI_TAXON",
"NCBI_TAXON_2024_07_03",
# NCIT
"NCIT",
"NCIT_2024_05_07",
# OBI
"OBI",
"OBI_2024_06_10",
# SO
"SO",
# UBERON
"UBERON",
]


def _versioned(ont: OntologyResource, url: str, version: str) -> VersionedOntologyResource:
return VersionedOntologyResource(

Check warning on line 27 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L26-L27

Added lines #L26 - L27 were not covered by tests
**ont.model_dump(include={"id", "name", "namespace_prefix", "iri_prefix"}),
url=url,
version=version,
)


# === EFO ==============================================================================================================

EFO = OntologyResource(

Check warning on line 36 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L36

Added line #L36 was not covered by tests
id="efo",
name="Experimental Factor Ontology",
namespace_prefix="EFO",
iri_prefix="http://www.ebi.ac.uk/efo/EFO_",
url="http://www.ebi.ac.uk/efo/efo.owl",
)
EFO_3_69_0 = _versioned(EFO, "http://www.ebi.ac.uk/efo/releases/v3.69.0/efo.owl", version="3.69.0")

Check warning on line 43 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L43

Added line #L43 was not covered by tests

# === MONDO ============================================================================================================

MONDO = OntologyResource(

Check warning on line 47 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L47

Added line #L47 was not covered by tests
id="mondo",
name="Mondo Disease Ontology",
namespace_prefix="MONDO",
iri_prefix="http://purl.obolibrary.org/obo/MONDO_",
url="http://purl.obolibrary.org/obo/mondo.owl",
)
MONDO_2024_09_03 = _versioned(

Check warning on line 54 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L54

Added line #L54 was not covered by tests
MONDO,
url="http://purl.obolibrary.org/obo/mondo/releases/2024-09-03/mondo.owl",
version="2024-09-03",
)

# === NCBITaxon ========================================================================================================

NCBI_TAXON = OntologyResource(

Check warning on line 62 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L62

Added line #L62 was not covered by tests
id="ncbitaxon",
name="NCBI organismal classification",
namespace_prefix="NCBITaxon",
iri_prefix="http://purl.obolibrary.org/obo/NCBITaxon_",
url="http://purl.obolibrary.org/obo/ncbitaxon.owl",
)
NCBI_TAXON_2024_07_03 = _versioned(

Check warning on line 69 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L69

Added line #L69 was not covered by tests
NCBI_TAXON,
url="http://purl.obolibrary.org/obo/ncbitaxon/2024-07-03/ncbitaxon.owl",
version="2024-07-03",
)

# === NCIT =============================================================================================================

NCIT = OntologyResource(

Check warning on line 77 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L77

Added line #L77 was not covered by tests
id="ncit",
name="NCI Thesaurus OBO Edition",
namespace_prefix="NCIT",
iri_prefix="http://purl.obolibrary.org/obo/NCIT_",
url="http://purl.obolibrary.org/obo/ncit.owl",
)
NCIT_2024_05_07 = _versioned(

Check warning on line 84 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L84

Added line #L84 was not covered by tests
NCIT,
url="http://purl.obolibrary.org/obo/ncit/releases/2024-05-07/ncit.owl",
version="2024-05-07",
)

# === OBI ==============================================================================================================

OBI = OntologyResource(

Check warning on line 92 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L92

Added line #L92 was not covered by tests
id="obi",
name="Ontology for Biomedical Investigations",
namespace_prefix="OBI",
iri_prefix="http://purl.obolibrary.org/obo/OBI_",
url="http://purl.obolibrary.org/obo/obi.owl",
)
OBI_2024_06_10 = _versioned(OBI, url="http://purl.obolibrary.org/obo/obi/2024-06-10/obi.owl", version="2024-06-10")

Check warning on line 99 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L99

Added line #L99 was not covered by tests

# === SO ===============================================================================================================

SO = OntologyResource(

Check warning on line 103 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L103

Added line #L103 was not covered by tests
id="so",
name="Sequence types and features ontology",
namespace_prefix="SO",
iri_prefix="http://purl.obolibrary.org/obo/SO_",
url="http://purl.obolibrary.org/obo/so.owl",
)
SO_2024_06_05 = _versioned(SO, url="http://purl.obolibrary.org/obo/so/2024-06-05/so.owl", version="2024-06-05")

Check warning on line 110 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L110

Added line #L110 was not covered by tests

# === UBERON ===========================================================================================================

UBERON = OntologyResource(

Check warning on line 114 in bento_lib/ontologies/common_resources.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_resources.py#L114

Added line #L114 was not covered by tests
id="uberon",
name="Uberon multi-species anatomy ontology",
namespace_prefix="UBERON",
iri_prefix="http://purl.obolibrary.org/obo/UBERON_",
url="http://purl.obolibrary.org/obo/uberon.owl",
)
31 changes: 31 additions & 0 deletions bento_lib/ontologies/common_terms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from .common_resources import NCBI_TAXON, OBI, SO

Check warning on line 1 in bento_lib/ontologies/common_terms.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_terms.py#L1

Added line #L1 was not covered by tests

__all__ = [

Check warning on line 3 in bento_lib/ontologies/common_terms.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_terms.py#L3

Added line #L3 was not covered by tests
# NCBITaxon
"NCBI_TAXON_HOMO_SAPIENS",
"NCBI_TAXON_MUS_MUSCULUS",
# OBI
"OBI_16S_RRNA_ASSAY",
"OBI_RNA_SEQ_ASSAY",
"OBI_PROTEOMIC_PROFILING_BY_ARRAY_ASSAY",
"OBI_WHOLE_GENOME_SEQUENCING_ASSAY",
# SO
"SO_GENOMIC_DNA",
]


# === NCBITaxon ========================================================================================================

NCBI_TAXON_HOMO_SAPIENS = NCBI_TAXON.make_term("NCBITaxon:9606", "Homo sapiens")
NCBI_TAXON_MUS_MUSCULUS = NCBI_TAXON.make_term("NCBITaxon:10090", "Mus musculus")

Check warning on line 20 in bento_lib/ontologies/common_terms.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_terms.py#L19-L20

Added lines #L19 - L20 were not covered by tests

# === OBI ==============================================================================================================

OBI_16S_RRNA_ASSAY = OBI.make_term("OBI:0002763", "16s ribosomal gene sequencing assay")
OBI_RNA_SEQ_ASSAY = OBI.make_term("OBI:0001271", "RNA-seq assay")
OBI_PROTEOMIC_PROFILING_BY_ARRAY_ASSAY = OBI.make_term("OBI:0001318", "proteomic profiling by array assay")
OBI_WHOLE_GENOME_SEQUENCING_ASSAY = OBI.make_term("OBI:0002117", "whole genome sequencing assay")

Check warning on line 27 in bento_lib/ontologies/common_terms.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_terms.py#L24-L27

Added lines #L24 - L27 were not covered by tests

# === SO ===============================================================================================================

SO_GENOMIC_DNA = SO.make_term("SO:0000991", "genomic DNA")

Check warning on line 31 in bento_lib/ontologies/common_terms.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/common_terms.py#L31

Added line #L31 was not covered by tests
58 changes: 58 additions & 0 deletions bento_lib/ontologies/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from pydantic import BaseModel, Field, HttpUrl
from typing import Annotated

Check warning on line 2 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L1-L2

Added lines #L1 - L2 were not covered by tests

from .types import PhenoV2Resource, PhenoV2OntologyClassDict

Check warning on line 4 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L4

Added line #L4 was not covered by tests

NC_NAME_PATTERN = r"^[a-zA-Z_][a-zA-Z0-9.\-_]*$"
CURIE_PATTERN = r"^[a-zA-Z_][a-zA-Z0-9.\-_]*:[a-zA-Z0-9.\-_]+$"

Check warning on line 7 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L6-L7

Added lines #L6 - L7 were not covered by tests


class OntologyResource(BaseModel):

Check warning on line 10 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L10

Added line #L10 was not covered by tests
"""
Inspired by the Phenopackets v2 Resource model:
https://phenopacket-schema.readthedocs.io/en/latest/resource.html
"""

# From Phenopackets v2: "For OBO ontologies, the value of this string MUST always be the official OBO ID, which is
# always equivalent to the ID prefix in lower case. Examples: hp, go, mp, mondo Consult http://obofoundry.org for
# a complete list. For other resources which do not use native CURIE identifiers (e.g. SNOMED, UniProt, ClinVar),
# use the prefix in identifiers.org."
id: str

Check warning on line 20 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L20

Added line #L20 was not covered by tests

# From Phenopackets v2: "The name of the ontology referred to by the id element, for example, The Human Phenotype
# Ontology. For OBO Ontologies, the value of this string SHOULD be the same as the title field on
# http://obofoundry.org. Other resources should use the official title for that resource. Note that this field is
# purely for information purposes and software should not encode any assumptions."
name: str
url: HttpUrl

Check warning on line 27 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L26-L27

Added lines #L26 - L27 were not covered by tests
# From Phenopackets v2: "The prefix used in the CURIE of an OntologyClass e.g. HP, MP, ECO for example an HPO term
# will have a CURIE like this - HP:0012828 which should be used in combination with the iri_prefix to form a
# fully-resolvable IRI."
# Since we use it in a CURIE prefix context, it must match a valid NCName:
# https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName
namespace_prefix: Annotated[str, Field(pattern=NC_NAME_PATTERN)]
iri_prefix: HttpUrl

Check warning on line 34 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L33-L34

Added lines #L33 - L34 were not covered by tests

def make_term(self, id_: str, label: str) -> "OntologyTerm":
return OntologyTerm(ontology=self, id=id_, label=label)

Check warning on line 37 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L36-L37

Added lines #L36 - L37 were not covered by tests


class VersionedOntologyResource(OntologyResource):
version: str

Check warning on line 41 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L40-L41

Added lines #L40 - L41 were not covered by tests

def to_phenopackets_repr(self) -> PhenoV2Resource:
return self.model_dump(mode="json", include={"id", "version", "name", "url", "namespace_prefix", "iri_prefix"})

Check warning on line 44 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L43-L44

Added lines #L43 - L44 were not covered by tests


class OntologyTerm(BaseModel):

Check warning on line 47 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L47

Added line #L47 was not covered by tests
"""
Inspired by the Phenopackets v2 OntologyClass model:
https://phenopacket-schema.readthedocs.io/en/latest/ontologyclass.html
"""

ontology: VersionedOntologyResource
id: Annotated[str, Field(pattern=CURIE_PATTERN)]
label: str

Check warning on line 55 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L53-L55

Added lines #L53 - L55 were not covered by tests

def to_phenopackets_repr(self) -> PhenoV2OntologyClassDict:
return self.model_dump(mode="json", include={"id", "label"})

Check warning on line 58 in bento_lib/ontologies/models.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/models.py#L57-L58

Added lines #L57 - L58 were not covered by tests
20 changes: 20 additions & 0 deletions bento_lib/ontologies/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import TypedDict

Check warning on line 1 in bento_lib/ontologies/types.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/types.py#L1

Added line #L1 was not covered by tests

__all__ = [

Check warning on line 3 in bento_lib/ontologies/types.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/types.py#L3

Added line #L3 was not covered by tests
"PhenoV2Resource",
"PhenoV2OntologyClassDict",
]


class PhenoV2Resource(TypedDict):
id: str
name: str
url: str
version: str
namespace_prefix: str
iri_prefix: str

Check warning on line 15 in bento_lib/ontologies/types.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/types.py#L9-L15

Added lines #L9 - L15 were not covered by tests


class PhenoV2OntologyClassDict(TypedDict):
id: str
label: str

Check warning on line 20 in bento_lib/ontologies/types.py

View check run for this annotation

Codecov / codecov/patch

bento_lib/ontologies/types.py#L18-L20

Added lines #L18 - L20 were not covered by tests