Skip to content

Commit be8c773

Browse files
authored
Externalize OLS implementation (#270)
* Externalize OLS implementation This PR switches out interaction with the EBI's ontology lookup service using the `ols-client` package, which enables interaction with alternative OLS instances like the TIB Hannover one. Probably of interest to @StroemPhi * Add intermediate class skips * Update test_bioportal.py * Update basic_ontology_interface.py * Update poetry.lock * Update main.yaml * Update main.yaml
1 parent 9b49621 commit be8c773

8 files changed

Lines changed: 1225 additions & 255 deletions

File tree

poetry.lock

Lines changed: 1152 additions & 213 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ ontoportal-client = "0.0.3"
3030
curies = "^0.1.5"
3131
bioregistry = "^0.5.64"
3232
prefixmaps = "^0.1.2"
33+
ols-client = "^0.1.1"
3334

3435
[tool.poetry.dev-dependencies]
3536
pytest = "^5.2"

src/oaklib/implementations/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66

77
from oaklib.implementations.funowl.funowl_implementation import FunOwlImplementation
88
from oaklib.implementations.gilda import GildaImplementation
9-
from oaklib.implementations.ols.ols_implementation import OlsImplementation
9+
from oaklib.implementations.ols import (
10+
BaseOlsImplementation,
11+
OlsImplementation,
12+
TIBOlsImplementation,
13+
)
1014
from oaklib.implementations.ontobee.ontobee_implementation import OntobeeImplementation
1115
from oaklib.implementations.ontoportal.agroportal_implementation import (
1216
AgroPortalImplementation,
@@ -20,6 +24,9 @@
2024
from oaklib.implementations.ontoportal.matportal_implementation import (
2125
MatPortalImplementation,
2226
)
27+
from oaklib.implementations.ontoportal.ontoportal_implementation_base import (
28+
OntoPortalImplementationBase,
29+
)
2330
from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
2431
from oaklib.implementations.simpleobo.simple_obo_implementation import (
2532
SimpleOboImplementation,
@@ -43,6 +50,7 @@
4350
"EcoPortalImplementation",
4451
"MatPortalImplementation",
4552
"OlsImplementation",
53+
"TIBOlsImplementation",
4654
"OntobeeImplementation",
4755
"ProntoImplementation",
4856
"SimpleOboImplementation",
@@ -64,6 +72,10 @@
6472
implementation_resolver: ClassResolver[OntologyInterface] = ClassResolver.from_subclasses(
6573
OntologyInterface,
6674
suffix="Implementation",
75+
skip={
76+
OntoPortalImplementationBase,
77+
BaseOlsImplementation,
78+
},
6779
)
6880
implementation_resolver.synonyms.update(
6981
{
Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1-
from oaklib.datamodels.search import SearchConfiguration
1+
from .constants import SEARCH_CONFIG
2+
from .ols_implementation import (
3+
BaseOlsImplementation,
4+
OlsImplementation,
5+
TIBOlsImplementation,
6+
)
27

3-
SEARCH_CONFIG = SearchConfiguration()
8+
__all__ = [
9+
"SEARCH_CONFIG",
10+
"BaseOlsImplementation",
11+
"OlsImplementation",
12+
"TIBOlsImplementation",
13+
]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from oaklib.datamodels.search import SearchConfiguration
2+
3+
__all__ = [
4+
"SEARCH_CONFIG",
5+
]
6+
7+
SEARCH_CONFIG = SearchConfiguration()

src/oaklib/implementations/ols/ols_implementation.py

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,33 @@
1-
import logging
2-
import urllib
31
from collections import ChainMap
42
from dataclasses import dataclass, field
5-
from typing import Any, Dict, Iterable, Iterator, List, Tuple, Union
3+
from typing import Any, ClassVar, Dict, Iterable, Iterator, List, Tuple, Union
64

75
import requests
6+
from ols_client import Client, EBIClient, TIBClient
87
from sssom_schema import Mapping
98

109
from oaklib.datamodels import oxo
1110
from oaklib.datamodels.oxo import ScopeEnum
1211
from oaklib.datamodels.search import SearchConfiguration, SearchProperty
1312
from oaklib.datamodels.text_annotator import TextAnnotation
1413
from oaklib.datamodels.vocabulary import IS_A, SEMAPV
15-
from oaklib.implementations.ols import SEARCH_CONFIG
16-
from oaklib.implementations.ols.oxo_utils import load_oxo_payload
1714
from oaklib.interfaces.basic_ontology_interface import PREFIX_MAP
1815
from oaklib.interfaces.mapping_provider_interface import MappingProviderInterface
1916
from oaklib.interfaces.search_interface import SearchInterface
2017
from oaklib.interfaces.text_annotator_interface import TextAnnotatorInterface
2118
from oaklib.types import CURIE, PRED_CURIE
2219

20+
from .constants import SEARCH_CONFIG
21+
from .oxo_utils import load_oxo_payload
22+
23+
__all__ = [
24+
# Abstract classes
25+
"BaseOlsImplementation",
26+
# Concrete classes
27+
"OlsImplementation",
28+
"TIBOlsImplementation",
29+
]
30+
2331
ANNOTATION = Dict[str, Any]
2432
SEARCH_ROWS = 50
2533

@@ -32,19 +40,20 @@
3240

3341

3442
@dataclass
35-
class OlsImplementation(TextAnnotatorInterface, SearchInterface, MappingProviderInterface):
43+
class BaseOlsImplementation(TextAnnotatorInterface, SearchInterface, MappingProviderInterface):
3644
"""
3745
Implementation over OLS and OxO APIs
3846
"""
3947

40-
ols_api_key: str = None
48+
ols_client_class: ClassVar[type[Client]]
4149
label_cache: Dict[CURIE, str] = field(default_factory=lambda: {})
4250
base_url = "https://www.ebi.ac.uk/spot/oxo/api/mappings"
43-
ols_base_url = "https://www.ebi.ac.uk/ols/api"
4451
_prefix_map: Dict[str, str] = field(default_factory=lambda: {})
4552
focus_ontology: str = None
53+
client: Client = field(init=False)
4654

4755
def __post_init__(self):
56+
self.client = self.ols_client_class()
4857
if self.focus_ontology is None:
4958
if self.resource:
5059
self.focus_ontology = self.resource.slug
@@ -71,30 +80,20 @@ def annotate_text(self, text: str) -> Iterator[TextAnnotation]:
7180
def ancestors(
7281
self, start_curies: Union[CURIE, List[CURIE]], predicates: List[PRED_CURIE] = None
7382
) -> Iterable[CURIE]:
74-
query = "hierarchicalAncestors"
83+
func = self.client.iter_hierarchical_ancestors
7584
if predicates:
7685
if predicates == [IS_A]:
77-
query = "ancestors"
86+
func = self.client.iter_ancestors
7887
elif IS_A not in predicates:
7988
raise NotImplementedError(f"OLS always include {IS_A}, you selected: {predicates}")
8089
if not isinstance(start_curies, list):
8190
start_curies = [start_curies]
8291
ancs = set()
8392
ontology = self.focus_ontology
8493
for curie in start_curies:
85-
term_id = self.curie_to_uri(curie)
86-
# must be double encoded https://www.ebi.ac.uk/ols/docs/api
87-
term_id_quoted = urllib.parse.quote(term_id, safe="")
88-
term_id_quoted = urllib.parse.quote(term_id_quoted, safe="")
89-
url = f"{self.ols_base_url}/ontologies/{ontology}/terms/{term_id_quoted}/{query}"
90-
logging.debug(f"URL={url}")
91-
result = requests.get(url)
92-
obj = result.json()
93-
if result.status_code == 200 and "_embedded" in obj:
94-
ancs.update([x["obo_id"] for x in obj["_embedded"]["terms"]])
95-
else:
96-
logging.debug(f"No ancestors for {url} (maybe ontology not indexed in OLS?)")
97-
ancs = []
94+
iri = self.curie_to_uri(curie)
95+
records = func(ontology=ontology, iri=iri)
96+
ancs.update(record["obo_id"] for record in records)
9897
return list(ancs)
9998

10099
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -119,7 +118,6 @@ def basic_search(
119118
query_fields.update(["description"])
120119

121120
params = {
122-
"q": search_term,
123121
"type": "class",
124122
"local": "true",
125123
"fieldList": "iri,label",
@@ -134,19 +132,10 @@ def basic_search(
134132
if self.focus_ontology:
135133
params["ontology"] = self.focus_ontology.lower()
136134

137-
finished = False
138-
while not finished:
139-
response = requests.get(f"{self.ols_base_url}/search", params=params)
140-
logging.debug(f"URL={response.url}")
141-
body = response.json()
142-
params["start"] += params["rows"]
143-
if params["start"] > body["response"]["numFound"]:
144-
finished = True
145-
for doc in body["response"]["docs"]:
146-
curie = self.uri_to_curie(doc["iri"])
147-
label = doc["label"]
148-
self.label_cache[curie] = label
149-
yield curie
135+
for record in self.client.search(search_term, params=params):
136+
curie = self.uri_to_curie(record["iri"])
137+
self.label_cache[curie] = record["label"]
138+
yield curie
150139

151140
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
152141
# Implements: MappingsInterface
@@ -199,3 +188,15 @@ def convert_payload(self, container: oxo.Container) -> Iterator[Mapping]:
199188
# msdoc.mapping_set.mappings.append(m)
200189
# n += 1
201190
# return n
191+
192+
193+
class OlsImplementation(BaseOlsImplementation):
194+
"""Implementation for the EBI OLS instance."""
195+
196+
ols_client_class = EBIClient
197+
198+
199+
class TIBOlsImplementation(BaseOlsImplementation):
200+
"""Implementation for the TIB Hannover OLS instance."""
201+
202+
ols_client_class = TIBClient

src/oaklib/interfaces/basic_ontology_interface.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
MISSING_PREFIX_MAP = dict(
3232
EFO="http://www.ebi.ac.uk/efo/EFO_",
3333
SCTID="http://snomed.info/id/",
34+
ORPHANET="http://www.orpha.net/ORDO/Orphanet_",
3435
)
3536

3637

tests/test_implementations/test_ols.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from tests import CELLULAR_COMPONENT, CYTOPLASM, DIGIT, VACUOLE
1212

1313

14-
@unittest.skip("TODO: use mock tests, this fails whenever OLS is down")
1514
class TestOlsImplementation(unittest.TestCase):
1615
def setUp(self) -> None:
1716
oi = OlsImplementation(OntologyResource("go"))
@@ -39,7 +38,7 @@ def test_ancestors(self):
3938

4039
def test_basic_search(self):
4140
self.oi.focus_ontology = None
42-
results = list(itertools.islice(self.oi.basic_search("epilepsy"), 20))
41+
results = list(self.oi.basic_search("epilepsy"))
4342
self.assertIn("MONDO:0005027", results)
4443

4544
def test_focus_ontology_search(self):

0 commit comments

Comments
 (0)