Skip to content

Commit 6a6acdf

Browse files
committed
feat(ogit_bridge): multi-parent OntologySchema for OWL multi-inheritance
OWL permits a class to declare multiple `rdfs:subClassOf` triples (multi-inheritance); biomedical ontologies (FMA's ~75k anatomical classes, ChEBI, GO) use it extensively. Production `OntologySchema` previously stored a single `parent: Option<Box<str>>` and the `from_triples` loop silently overwrote on each new `subClassOf` triple — the second declared parent won, the first was discarded without warning. Identified during the FMA hydrator spec review (lance-graph/.claude/specs/pr-d-1-fma-owl-hydrator.md): Pattern D hydrates OWL ontologies through this code path, and silently losing half a class's inheritance edges would corrupt any downstream closure-based reasoning (is_ancestor, type-gated propagation). Changes: 1. `EntityClass` gains `extra_parents: Vec<Box<str>>` — additional parents beyond the first-observed. `parent: Option<Box<str>>` keeps the first observed parent (back-compat for single-parent consumers that read `.parent` directly). 2. `EntityClass::parents()` accessor — iterator over every parent in source order. Use this in preference to reading `.parent` when the caller's logic must cover multi-inheritance. 3. `from_triples` no longer silently overwrites: the first subClassOf populates `parent`; later ones append to `extra_parents` (dedup against the existing primary + extras). 4. `is_ancestor` walks the multi-parent DAG via BFS instead of the linear parent chain — this is the case the previous implementation silently missed. MAX_VISITS=4096 caps total work (defensive guard against cycles); single-parent chains still terminate at parent==None in the original O(depth) shape. 3 new regression tests: - `is_ancestor_multi_parent_direct` — two `subClassOf` triples on one class; both parents reachable. - `is_ancestor_multi_parent_transitive_through_second_parent` — the bug case: ancestor only reachable through the second parent chain. Would fail on the previous implementation. - `entity_class_parents_iterator_yields_all` — parents() surfaces every declared parent. All 18 ogit_bridge::schema tests pass; lib fmt + clippy clean.
1 parent 428f496 commit 6a6acdf

1 file changed

Lines changed: 149 additions & 17 deletions

File tree

src/hpc/ogit_bridge/schema.rs

Lines changed: 149 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,19 @@ pub struct EntityClass {
8484
pub iri: Box<str>,
8585
/// Human-readable label (`rdfs:label`); empty string when absent.
8686
pub label: Box<str>,
87-
/// Parent class IRI (`rdfs:subClassOf`); `None` for root classes.
87+
/// First-observed parent class IRI (`rdfs:subClassOf`); `None` for
88+
/// root classes. OWL allows a class to declare multiple
89+
/// `rdfs:subClassOf` targets (multi-inheritance); the second and
90+
/// later parents land in [`Self::extra_parents`]. Consumers wanting
91+
/// the full parent set should iterate via [`Self::parents`].
8892
pub parent: Option<Box<str>>,
93+
/// Additional parent IRIs beyond the first. Empty for single-parent
94+
/// classes (the common case in RDFS-style ontologies); non-empty when
95+
/// the source declares multi-inheritance (common in OWL biomedical
96+
/// ontologies — FMA, ChEBI, etc.). Order is source order of the
97+
/// surplus `rdfs:subClassOf` triples; the first parent stays in
98+
/// [`Self::parent`].
99+
pub extra_parents: Vec<Box<str>>,
89100
/// Properties declared with `ogit:mandatory`.
90101
pub mandatory: Vec<Property>,
91102
/// Properties declared with `ogit:optional`.
@@ -99,11 +110,27 @@ pub struct EntityClass {
99110
}
100111

101112
impl EntityClass {
113+
/// Iterator over every parent class IRI declared on this entity —
114+
/// the first-observed [`Self::parent`] (if present) followed by
115+
/// every IRI in [`Self::extra_parents`]. Empty when the class is
116+
/// a root.
117+
///
118+
/// Use this in preference to reading `.parent` directly when the
119+
/// caller's logic should cover multi-inheritance — e.g. transitive
120+
/// closure walks like [`OntologySchema::is_ancestor`].
121+
pub fn parents(&self) -> impl Iterator<Item = &str> {
122+
self.parent
123+
.as_deref()
124+
.into_iter()
125+
.chain(self.extra_parents.iter().map(|s| s.as_ref()))
126+
}
127+
102128
fn new(iri: Box<str>) -> Self {
103129
EntityClass {
104130
iri,
105131
label: "".into(),
106132
parent: None,
133+
extra_parents: Vec::new(),
107134
mandatory: Vec::new(),
108135
optional: Vec::new(),
109136
indexed: Vec::new(),
@@ -365,7 +392,19 @@ impl OntologySchema {
365392
RDFS_SUB_CLASS_OF => {
366393
if let Some(parent_iri) = node_iri(&triple.object) {
367394
if let Some(cls) = entities.get_mut(subject_iri) {
368-
cls.parent = Some(parent_iri.into());
395+
// First parent → `parent`; subsequent
396+
// parents → `extra_parents` (multi-inheritance
397+
// as permitted by OWL; common in biomedical
398+
// ontologies like FMA / ChEBI). The previous
399+
// behaviour silently overwrote — the second
400+
// declared parent won, the first was discarded.
401+
if cls.parent.is_none() {
402+
cls.parent = Some(parent_iri.into());
403+
} else if cls.parent.as_deref() != Some(parent_iri)
404+
&& !cls.extra_parents.iter().any(|p| p.as_ref() == parent_iri)
405+
{
406+
cls.extra_parents.push(parent_iri.into());
407+
}
369408
}
370409
}
371410
}
@@ -641,26 +680,39 @@ impl OntologySchema {
641680
return false;
642681
}
643682

644-
// Walk the parent chain from descendant upward, looking for ancestor.
645-
// Defensive depth cap — see method docstring.
646-
const MAX_DEPTH: usize = 64;
647-
let mut current: &str = descendant;
648-
for _ in 0..MAX_DEPTH {
683+
// BFS over the multi-parent DAG. The previous version walked a
684+
// linear chain via `EntityClass.parent` alone — correct for
685+
// single-inheritance schemas but missed ancestors reachable
686+
// only through `EntityClass.extra_parents` (OWL multi-inheritance,
687+
// common in FMA / ChEBI). MAX_VISITS bounds total work for any
688+
// cycle that slipped past upstream antisymmetry checks.
689+
const MAX_VISITS: usize = 4096;
690+
let mut frontier: Vec<&str> = vec![descendant];
691+
let mut visited: std::collections::HashSet<&str> = std::collections::HashSet::new();
692+
visited.insert(descendant);
693+
let mut visits = 0usize;
694+
while let Some(current) = frontier.pop() {
695+
visits += 1;
696+
if visits > MAX_VISITS {
697+
return false;
698+
}
649699
let entity = match self.entities.get(current) {
650700
Some(e) => e,
651-
None => return false, // descendant unknownno chain to walk
652-
};
653-
let parent = match entity.parent.as_deref() {
654-
Some(p) => p,
655-
None => return false, // reached root without finding ancestor
701+
// Walk hit an unknown IRI mid-chainthat subtree of the
702+
// closure terminates here. Continue exploring siblings
703+
// rather than aborting, since other parents may yet reach
704+
// `ancestor`.
705+
None => continue,
656706
};
657-
if parent == ancestor {
658-
return true;
707+
for parent in entity.parents() {
708+
if parent == ancestor {
709+
return true;
710+
}
711+
if visited.insert(parent) {
712+
frontier.push(parent);
713+
}
659714
}
660-
current = parent;
661715
}
662-
// Exceeded depth cap — treat as not-an-ancestor (defensive; this
663-
// path should be unreachable on a well-formed schema).
664716
false
665717
}
666718
}
@@ -960,4 +1012,84 @@ mod tests {
9601012
assert!(!schema.is_ancestor("ogit:Heel", "ogit:OtherHip"));
9611013
assert!(!schema.is_ancestor("ogit:OtherHeel", "ogit:Hip"));
9621014
}
1015+
1016+
// -----------------------------------------------------------------------
1017+
// Multi-inheritance — OWL biomedical-ontology shape (FMA, ChEBI, etc.)
1018+
// -----------------------------------------------------------------------
1019+
1020+
/// A class declaring two `rdfs:subClassOf` triples must reach both
1021+
/// ancestors through `is_ancestor`. The previous single-parent
1022+
/// implementation silently picked one and discarded the other.
1023+
#[test]
1024+
fn is_ancestor_multi_parent_direct() {
1025+
// Hand mimics an OWL fragment: ogit:Hybrid is both a kind of
1026+
// ogit:Animal AND a kind of ogit:Mineral.
1027+
let src = "\
1028+
@prefix ogit: <http://www.purl.org/ogit/> .\n\
1029+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\
1030+
ogit:Animal a rdfs:Class .\n\
1031+
ogit:Mineral a rdfs:Class .\n\
1032+
ogit:Hybrid a rdfs:Class ; rdfs:subClassOf ogit:Animal ; rdfs:subClassOf ogit:Mineral .";
1033+
let triples = TurtleParser::parse(src).unwrap();
1034+
let schema = OntologySchema::from_triples(&triples).unwrap();
1035+
// Both parents must be reachable from the hybrid.
1036+
assert!(schema.is_ancestor("ogit:Animal", "ogit:Hybrid"));
1037+
assert!(schema.is_ancestor("ogit:Mineral", "ogit:Hybrid"));
1038+
// Reverse direction still false (antisymmetry).
1039+
assert!(!schema.is_ancestor("ogit:Hybrid", "ogit:Animal"));
1040+
assert!(!schema.is_ancestor("ogit:Hybrid", "ogit:Mineral"));
1041+
}
1042+
1043+
/// Multi-parent transitivity: an ancestor reachable only through
1044+
/// the SECOND parent of a multi-inheritance class must still be
1045+
/// found. This is the case the previous linear-walk implementation
1046+
/// silently missed.
1047+
#[test]
1048+
fn is_ancestor_multi_parent_transitive_through_second_parent() {
1049+
// Two disjoint chains converge at ogit:Hybrid:
1050+
// ogit:Root1 ← ogit:Mid1 ← ogit:Hybrid (via "first" parent)
1051+
// ogit:Root2 ← ogit:Mid2 ← ogit:Hybrid (via "second" parent)
1052+
let src = "\
1053+
@prefix ogit: <http://www.purl.org/ogit/> .\n\
1054+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\
1055+
ogit:Root1 a rdfs:Class .\n\
1056+
ogit:Mid1 a rdfs:Class ; rdfs:subClassOf ogit:Root1 .\n\
1057+
ogit:Root2 a rdfs:Class .\n\
1058+
ogit:Mid2 a rdfs:Class ; rdfs:subClassOf ogit:Root2 .\n\
1059+
ogit:Hybrid a rdfs:Class ; rdfs:subClassOf ogit:Mid1 ; rdfs:subClassOf ogit:Mid2 .";
1060+
let triples = TurtleParser::parse(src).unwrap();
1061+
let schema = OntologySchema::from_triples(&triples).unwrap();
1062+
// Reachable through first parent chain.
1063+
assert!(schema.is_ancestor("ogit:Root1", "ogit:Hybrid"));
1064+
assert!(schema.is_ancestor("ogit:Mid1", "ogit:Hybrid"));
1065+
// Reachable through second parent chain — the case the
1066+
// previous implementation missed.
1067+
assert!(schema.is_ancestor("ogit:Root2", "ogit:Hybrid"));
1068+
assert!(schema.is_ancestor("ogit:Mid2", "ogit:Hybrid"));
1069+
}
1070+
1071+
/// The `parents()` iterator must surface both `parent` and every
1072+
/// `extra_parents` IRI in source order.
1073+
#[test]
1074+
fn entity_class_parents_iterator_yields_all() {
1075+
let src = "\
1076+
@prefix ogit: <http://www.purl.org/ogit/> .\n\
1077+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\
1078+
ogit:A a rdfs:Class .\n\
1079+
ogit:B a rdfs:Class .\n\
1080+
ogit:C a rdfs:Class .\n\
1081+
ogit:X a rdfs:Class ; rdfs:subClassOf ogit:A ; rdfs:subClassOf ogit:B ; rdfs:subClassOf ogit:C .";
1082+
let triples = TurtleParser::parse(src).unwrap();
1083+
let schema = OntologySchema::from_triples(&triples).unwrap();
1084+
let x = schema.entities.get("ogit:X").expect("ogit:X declared");
1085+
let parents: Vec<&str> = x.parents().collect();
1086+
assert_eq!(parents.len(), 3, "expected 3 parents, got {parents:?}");
1087+
// First parent populates `parent`; the rest go to extra_parents.
1088+
// Source-order is preserved within extra_parents but the "first"
1089+
// parent depends on triple processing order, so just check set.
1090+
let parent_set: std::collections::HashSet<&str> = parents.iter().copied().collect();
1091+
assert!(parent_set.contains("ogit:A"));
1092+
assert!(parent_set.contains("ogit:B"));
1093+
assert!(parent_set.contains("ogit:C"));
1094+
}
9631095
}

0 commit comments

Comments
 (0)