Skip to content

Commit

Permalink
Add basic search implementation for OLS (#163)
Browse files Browse the repository at this point in the history
  • Loading branch information
pkalita-lbl authored Jul 6, 2022
1 parent 81834ac commit 2fc6c04
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 5 deletions.
54 changes: 49 additions & 5 deletions src/oaklib/implementations/ols/ols_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from oaklib.datamodels import oxo
from oaklib.datamodels.oxo import ScopeEnum
from oaklib.datamodels.search import SearchConfiguration
from oaklib.datamodels.search import SearchConfiguration, SearchProperty
from oaklib.datamodels.text_annotator import TextAnnotation
from oaklib.datamodels.vocabulary import IS_A
from oaklib.implementations.ols import SEARCH_CONFIG
Expand All @@ -21,6 +21,7 @@
from oaklib.types import CURIE, PRED_CURIE

ANNOTATION = Dict[str, Any]
SEARCH_ROWS = 50

oxo_pred_mappings = {
ScopeEnum.EXACT.text: "skos:exactMatch",
Expand All @@ -43,7 +44,7 @@ class OlsImplementation(TextAnnotatorInterface, SearchInterface, MappingProvider
ols_api_key: str = None
label_cache: Dict[CURIE, str] = field(default_factory=lambda: {})
base_url = "https://www.ebi.ac.uk/spot/oxo/api/mappings"
ols_base_url = "https://www.ebi.ac.uk/ols/api/ontologies/"
ols_base_url = "https://www.ebi.ac.uk/ols/api"
prefix_map: Dict[str, str] = field(default_factory=lambda: {})
focus_ontology: str = None

Expand All @@ -61,7 +62,8 @@ def get_prefix_map(self) -> PREFIX_MAP:
return self.prefix_map

def get_labels_for_curies(self, curies: Iterable[CURIE]) -> Iterable[Tuple[CURIE, str]]:
raise NotImplementedError
for curie in curies:
yield curie, self.label_cache[curie]

def annotate_text(self, text: str) -> Iterator[TextAnnotation]:
raise NotImplementedError
Expand All @@ -88,7 +90,7 @@ def ancestors(
# must be double encoded https://www.ebi.ac.uk/ols/docs/api
term_id_quoted = urllib.parse.quote(term_id, safe="")
term_id_quoted = urllib.parse.quote(term_id_quoted, safe="")
url = f"{self.ols_base_url}{ontology}/terms/{term_id_quoted}/{query}"
url = f"{self.ols_base_url}/ontologies/{ontology}/terms/{term_id_quoted}/{query}"
logging.debug(f"URL={url}")
result = requests.get(url)
obj = result.json()
Expand All @@ -106,7 +108,49 @@ def ancestors(
def basic_search(
self, search_term: str, config: SearchConfiguration = SEARCH_CONFIG
) -> Iterable[CURIE]:
raise NotImplementedError
query_fields = set()
# Anything not covered by these conditions (i.e. query_fields set remains empty)
# will cause the queryFields query param to be left off and all fields to be queried
if SearchProperty(SearchProperty.IDENTIFIER) in config.properties:
query_fields.update(["iri", "obo_id"])
if SearchProperty(SearchProperty.LABEL) in config.properties:
query_fields.update(["label"])
if SearchProperty(SearchProperty.ALIAS) in config.properties:
query_fields.update(["synonym"])
if SearchProperty(SearchProperty.DEFINITION) in config.properties:
query_fields.update(["description"])
if SearchProperty(SearchProperty.INFORMATIVE_TEXT) in config.properties:
query_fields.update(["description"])

params = {
"q": search_term,
"type": "class",
"local": "true",
"fieldList": "iri,label",
"rows": config.limit if config.limit is not None else SEARCH_ROWS,
"start": 0,
"exact": "true"
if (config.is_complete is True or config.is_partial is False)
else "false",
}
if len(query_fields) > 0:
params["queryFields"] = ",".join(query_fields)
if self.focus_ontology:
params["ontology"] = self.focus_ontology.lower()

finished = False
while not finished:
response = requests.get(f"{self.ols_base_url}/search", params=params)
logging.debug(f"URL={response.url}")
body = response.json()
params["start"] += params["rows"]
if params["start"] > body["response"]["numFound"]:
finished = True
for doc in body["response"]["docs"]:
curie = self.uri_to_curie(doc["iri"])
label = doc["label"]
self.label_cache[curie] = label
yield curie

# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Implements: MappingsInterface
Expand Down
26 changes: 26 additions & 0 deletions tests/test_implementations/test_ols.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import itertools
import unittest

from linkml_runtime.dumpers import yaml_dumper

from oaklib.datamodels.search import SearchConfiguration, SearchProperty
from oaklib.datamodels.vocabulary import IS_A
from oaklib.implementations.ols.ols_implementation import OlsImplementation
from oaklib.resource import OntologyResource
Expand Down Expand Up @@ -32,3 +34,27 @@ def test_ancestors(self):
# print(a)
assert CYTOPLASM not in ancs
assert CELLULAR_COMPONENT in ancs

def test_basic_search(self):
self.oi.focus_ontology = None
results = list(itertools.islice(self.oi.basic_search("epilepsy"), 20))
self.assertIn("MONDO:0005027", results)

def test_focus_ontology_search(self):
self.oi.focus_ontology = "MONDO"
results = list(itertools.islice(self.oi.basic_search("epilepsy"), 20))
for result in results:
self.assertRegex(result, "^MONDO:")

def test_search_configuration(self):
self.oi.focus_ontology = None

config = SearchConfiguration(properties=[SearchProperty.LABEL])
results = list(itertools.islice(self.oi.basic_search("swimming", config), 20))
self.assertIn("GO:0036268", results) # GO:0036268 == swimming
self.assertNotIn("NBO:0000371", results) # NBO:0000371 == aquatic locomotion

config = SearchConfiguration(is_complete=True)
results = list(itertools.islice(self.oi.basic_search("swimming", config), 20))
self.assertIn("OMIT:0014415", results) # OMIT:0014415 == Swimming
self.assertNotIn("OMIT:0014416", results) # OMIT:0014416 == Swimming Pools

0 comments on commit 2fc6c04

Please sign in to comment.