Skip to content

Commit 7325346

Browse files
authored
[codex] fix: support info and tree for functional owl inputs (#876)
* fix: support info and tree for functional owl inputs * fix: support mappings for functional owl inputs
1 parent 4d0a114 commit 7325346

2 files changed

Lines changed: 202 additions & 3 deletions

File tree

src/oaklib/implementations/funowl/funowl_implementation.py

Lines changed: 115 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import pyhornedowl
99
import rdflib
10+
import sssom_schema as sssom
1011
from kgcl_schema.datamodel import kgcl
1112
from pyhornedowl.model import (
1213
IRI,
@@ -38,6 +39,8 @@
3839
)
3940

4041
from oaklib.datamodels import obograph
42+
from oaklib.datamodels.search import SearchConfiguration
43+
from oaklib.datamodels.search_datamodel import SearchTermSyntax
4144
from oaklib.datamodels.vocabulary import (
4245
DEPRECATED_PREDICATE,
4346
EQUIVALENT_CLASS,
@@ -59,15 +62,19 @@
5962
RDF_TYPE,
6063
RDFS_DOMAIN,
6164
RDFS_RANGE,
65+
SEMAPV,
66+
SKOS_MATCH_PREDICATES,
6267
SUBPROPERTY_OF,
6368
)
6469
from oaklib.interfaces import SearchInterface
6570
from oaklib.interfaces.basic_ontology_interface import LANGUAGE_TAG, RELATIONSHIP
71+
from oaklib.interfaces.mapping_provider_interface import MappingProviderInterface
6672
from oaklib.interfaces.obograph_interface import OboGraphInterface
6773
from oaklib.interfaces.owl_interface import OwlInterface, ReasonerConfiguration
6874
from oaklib.interfaces.patcher_interface import PatcherInterface
6975
from oaklib.types import CURIE, PRED_CURIE
7076
from oaklib.utilities.axioms.logical_definition_utilities import logical_definition_matches
77+
from oaklib.utilities.mapping.sssom_utils import inject_mapping_sources
7178

7279
logger = logging.getLogger(__name__)
7380
SERIALIZATION_ALIASES = {
@@ -139,6 +146,7 @@ class FunOwlImplementation(
139146
OboGraphInterface,
140147
PatcherInterface,
141148
SearchInterface,
149+
MappingProviderInterface,
142150
):
143151
"""
144152
An experimental partial implementation of :ref:`OwlInterface`
@@ -329,7 +337,14 @@ def _coerce_annotation_value(self, value: Any):
329337
return SimpleLiteral(str(value))
330338

331339
def _single_valued_assignment(self, curie: CURIE, property: CURIE) -> Optional[str]:
332-
values = self._ontology.get_annotations(self.curie_to_uri(curie), self.curie_to_uri(property))
340+
subject_iri = self.curie_to_uri(curie)
341+
property_iri = self.curie_to_uri(property)
342+
if subject_iri is None or property_iri is None:
343+
return None
344+
try:
345+
values = self._ontology.get_annotations(subject_iri, property_iri)
346+
except TypeError:
347+
return None
333348
if values:
334349
if len(values) > 1:
335350
logger.warning("Multiple values for %s %s = %s", curie, property, values)
@@ -429,15 +444,113 @@ def synonym_property_values(
429444
for value in alias_map.get(predicate, []):
430445
yield curie, obograph.SynonymPropertyValue(pred=pred_text, val=value)
431446

447+
def simple_mappings_by_curie(self, curie: CURIE) -> Iterable[tuple[PRED_CURIE, CURIE]]:
448+
metadata = self.entity_metadata_map(curie)
449+
for xref in metadata.get(HAS_DBXREF, []):
450+
yield HAS_DBXREF, cast(CURIE, xref)
451+
for predicate in SKOS_MATCH_PREDICATES:
452+
for mapped_curie in metadata.get(predicate, []):
453+
yield predicate, cast(CURIE, mapped_curie)
454+
455+
def get_sssom_mappings_by_curie(self, curie: CURIE) -> Iterable[sssom.Mapping]:
456+
seen = set()
457+
458+
def _mapping(subject_id: CURIE, predicate_id: PRED_CURIE, object_id: CURIE) -> sssom.Mapping:
459+
mapping = sssom.Mapping(
460+
subject_id=subject_id,
461+
predicate_id=predicate_id,
462+
object_id=object_id,
463+
mapping_justification=sssom.EntityReference(SEMAPV.UnspecifiedMatching.value),
464+
)
465+
inject_mapping_sources(mapping)
466+
return mapping
467+
468+
direct_mappings = list(self.simple_mappings_by_curie(curie))
469+
for predicate_id, object_id in direct_mappings:
470+
key = (curie, predicate_id, object_id)
471+
if key in seen:
472+
continue
473+
seen.add(key)
474+
yield _mapping(curie, predicate_id, object_id)
475+
476+
if direct_mappings or self.label(curie) is not None or set(self.owl_type(curie)):
477+
return
478+
479+
for entity in self.entities(filter_obsoletes=False):
480+
for predicate_id, object_id in self.simple_mappings_by_curie(entity):
481+
if object_id != curie:
482+
continue
483+
key = (entity, predicate_id, object_id)
484+
if key in seen:
485+
continue
486+
seen.add(key)
487+
yield _mapping(entity, predicate_id, object_id)
488+
489+
def basic_search(
490+
self, search_term: str, config: Optional[SearchConfiguration] = None
491+
) -> Iterable[CURIE]:
492+
if config is None:
493+
config = SearchConfiguration()
494+
property_names = {str(p) for p in config.properties}
495+
if not property_names:
496+
property_names = {"LABEL", "ALIAS"}
497+
498+
flags = re.IGNORECASE if config.force_case_insensitive else 0
499+
normalized_search_term = (
500+
search_term.lower() if config.force_case_insensitive else search_term
501+
)
502+
503+
def _normalize(value: str) -> str:
504+
return value.lower() if config.force_case_insensitive else value
505+
506+
if config.syntax == SearchTermSyntax.STARTS_WITH:
507+
matches = lambda value: _normalize(value).startswith(normalized_search_term)
508+
elif config.syntax == SearchTermSyntax.REGULAR_EXPRESSION:
509+
prog = re.compile(search_term, flags=flags)
510+
matches = lambda value: prog.search(value) is not None
511+
elif config.is_partial:
512+
matches = lambda value: normalized_search_term in _normalize(value)
513+
else:
514+
matches = lambda value: _normalize(value) == normalized_search_term
515+
516+
search_all = "ANYTHING" in property_names
517+
seen = set()
518+
for curie in self.entities(filter_obsoletes=not config.include_obsoletes_in_results):
519+
if (search_all or "LABEL" in property_names) and (label := self.label(curie)) and matches(label):
520+
if curie not in seen:
521+
seen.add(curie)
522+
yield curie
523+
continue
524+
if (search_all or "IDENTIFIER" in property_names) and matches(curie):
525+
if curie not in seen:
526+
seen.add(curie)
527+
yield curie
528+
continue
529+
if search_all or "ALIAS" in property_names:
530+
if any(matches(alias) for alias in self.entity_aliases(curie)):
531+
if curie not in seen:
532+
seen.add(curie)
533+
yield curie
534+
continue
535+
if search_all or "MAPPED_IDENTIFIER" in property_names:
536+
metadata = self.entity_metadata_map(curie)
537+
if any(matches(xref) for xref in metadata.get(HAS_DBXREF, [])):
538+
if curie not in seen:
539+
seen.add(curie)
540+
yield curie
541+
432542
def node(
433543
self, curie: CURIE, strict=False, include_metadata=False, expand_curies=False
434544
) -> Optional[obograph.Node]:
435545
entity_types = set(self.owl_type(curie))
436546
label = self.label(curie)
547+
node_id = cast(CURIE, self.curie_to_uri(curie)) if expand_curies else curie
548+
if node_id is None:
549+
node_id = curie
437550
if not entity_types and label is None:
438551
if strict:
439552
raise ValueError(f"Unknown entity: {curie}")
440-
return None
553+
return obograph.Node(id=node_id)
441554
if any(
442555
owl_type in entity_types
443556
for owl_type in [OWL_OBJECT_PROPERTY, OWL_ANNOTATION_PROPERTY, OWL_DATATYPE_PROPERTY]
@@ -447,7 +560,6 @@ def node(
447560
node_type = "INDIVIDUAL"
448561
else:
449562
node_type = "CLASS"
450-
node_id = cast(CURIE, self.curie_to_uri(curie)) if expand_curies else curie
451563
meta = None
452564
if include_metadata:
453565
meta = obograph.Meta()

tests/test_cli.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,21 @@
6666
TEST_SYNONYMIZER_OBO = "simpleobo:" + str(INPUT_DIR / "synonym-test.obo")
6767
RULES_FILE = INPUT_DIR / "matcher_rules.yaml"
6868
SYNONYMIZER_RULES_FILE = INPUT_DIR / "cli-synonymizer-rules.yaml"
69+
MINIMAL_CL_FUNOWL = """\
70+
Prefix(obo:=<http://purl.obolibrary.org/obo/>)
71+
Prefix(oboInOwl:=<http://www.geneontology.org/formats/oboInOwl#>)
72+
Prefix(owl:=<http://www.w3.org/2002/07/owl#>)
73+
Prefix(rdfs:=<http://www.w3.org/2000/01/rdf-schema#>)
74+
Prefix(xsd:=<http://www.w3.org/2001/XMLSchema#>)
75+
76+
Ontology(<http://example.org/cl-edit.owl>
77+
Declaration(Class(obo:CL_0000540))
78+
AnnotationAssertion(oboInOwl:hasDbXref obo:CL_0000540 "BTO:0000938")
79+
AnnotationAssertion(oboInOwl:hasDbXref obo:CL_0000540 "CALOHA:TS-0683")
80+
AnnotationAssertion(rdfs:label obo:CL_0000540 "neuron"^^xsd:string)
81+
SubClassOf(obo:CL_0000540 obo:CL_0000000)
82+
)
83+
"""
6984

7085

7186
def _outpath(test: str, fmt: str = "tmp") -> str:
@@ -94,6 +109,10 @@ def _out(self, path: Optional[str] = TEST_OUT) -> str:
94109
with open(path) as f:
95110
return "".join(f.readlines())
96111

112+
def _write_minimal_cl_funowl(self, path: Path) -> Path:
113+
path.write_text(MINIMAL_CL_FUNOWL, encoding="utf-8")
114+
return path
115+
97116
def test_main_help(self):
98117
result = self.runner.invoke(main, ["--help"])
99118
out = result.stdout
@@ -115,6 +134,74 @@ def test_input_type_and_sniff_for_functional_owl_suffix(self):
115134
self.assertEqual(0, result.exit_code, result.output)
116135
self.assertIn("nucleus", result.stdout)
117136

137+
def test_functional_owl_info_and_tree_with_search_and_undeclared_ancestors(self):
138+
with tempfile.TemporaryDirectory() as tmpdir:
139+
disguised_path = self._write_minimal_cl_funowl(Path(tmpdir) / "cl-edit.owl")
140+
outpath = Path(tmpdir) / "info.tsv"
141+
142+
result = self.runner.invoke(
143+
main, ["-I", "ofn", "-i", str(disguised_path), "info", "neuron", "-o", str(outpath)]
144+
)
145+
self.assertEqual(0, result.exit_code, result.output)
146+
self.assertIn("CL:0000540", outpath.read_text(encoding="utf-8"))
147+
self.assertIn("neuron", outpath.read_text(encoding="utf-8"))
148+
149+
result = self.runner.invoke(
150+
main, ["-I", "ofn", "-i", str(disguised_path), "tree", "-p", "i", "CL:0000540"]
151+
)
152+
self.assertEqual(0, result.exit_code, result.output)
153+
self.assertIn("CL:0000540", result.stdout)
154+
self.assertIn("CL:0000000", result.stdout)
155+
156+
def test_functional_owl_mappings(self):
157+
with tempfile.TemporaryDirectory() as tmpdir:
158+
disguised_path = self._write_minimal_cl_funowl(Path(tmpdir) / "cl-edit.owl")
159+
outpath = Path(tmpdir) / "mappings.csv"
160+
reverse_outpath = Path(tmpdir) / "reverse-mappings.csv"
161+
162+
result = self.runner.invoke(
163+
main,
164+
[
165+
"-I",
166+
"ofn",
167+
"-i",
168+
str(disguised_path),
169+
"mappings",
170+
"CL:0000540",
171+
"-O",
172+
"csv",
173+
"-o",
174+
str(outpath),
175+
],
176+
)
177+
178+
self.assertEqual(0, result.exit_code, result.output)
179+
output = outpath.read_text(encoding="utf-8")
180+
self.assertIn("CL:0000540", output)
181+
self.assertIn("BTO:0000938", output)
182+
self.assertIn("CALOHA:TS-0683", output)
183+
184+
reverse_result = self.runner.invoke(
185+
main,
186+
[
187+
"-I",
188+
"ofn",
189+
"-i",
190+
str(disguised_path),
191+
"mappings",
192+
"BTO:0000938",
193+
"-O",
194+
"csv",
195+
"-o",
196+
str(reverse_outpath),
197+
],
198+
)
199+
200+
self.assertEqual(0, reverse_result.exit_code, reverse_result.output)
201+
reverse_output = reverse_outpath.read_text(encoding="utf-8")
202+
self.assertIn("CL:0000540", reverse_output)
203+
self.assertIn("BTO:0000938", reverse_output)
204+
118205
def test_multilingual(self):
119206
for input_arg in [INPUT_DIR / "hp-international-test.db"]:
120207
results = self.runner.invoke(main, ["-i", str(input_arg), "languages"])

0 commit comments

Comments
 (0)