Skip to content

Commit 1e83f07

Browse files
authored
Merge pull request #50 from AdaWorldAPI/claude/fma-ttl-hydrate
osint/fma: hydrate the bake from a TTL source, not hand-typed nodes
2 parents 358cd57 + aa66ab2 commit 1e83f07

5 files changed

Lines changed: 303 additions & 80 deletions

File tree

cockpit/public/fma.soa

-4.14 KB
Binary file not shown.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# fma-heart.fixture.ttl — FIXTURE, NOT the real FMA.
2+
#
3+
# A faithful but hand-authored heart subtree in the canonical FMA predicate
4+
# set. The real Foundational Model of Anatomy (266 MB fma.owl, ~1.5M triples,
5+
# OGIT contextId 13, dcterms:source AdaWorldAPI/MedCare-rs bioportal-ontologies)
6+
# hydrates through lance-graph-rdf / lance_graph_ontology::hydrate_fma at the
7+
# spine; this light q2 bake mirrors that shape on a subtree so /fma renders
8+
# without the lance/datafusion closure that bake deliberately excludes.
9+
#
10+
# Line-oriented Turtle subset: one `subject predicate object .` per line.
11+
# Predicates mirror the canonical hydrator set (pr-d-1-fma-owl-hydrator):
12+
# bfo:part_of → partonomy (drives the HHTL cascade tiers)
13+
# rdfs:subClassOf → cross-cutting tissue type (the is-a ceiling)
14+
15+
@prefix fma: <http://purl.org/sig/ont/fma/> .
16+
@prefix bfo: <http://purl.obolibrary.org/obo/> .
17+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
18+
19+
# ── chambers regional-part-of the heart ──
20+
fma:Left_atrium bfo:part_of fma:Heart .
21+
fma:Right_atrium bfo:part_of fma:Heart .
22+
fma:Left_ventricle bfo:part_of fma:Heart .
23+
fma:Right_ventricle bfo:part_of fma:Heart .
24+
25+
# ── each chamber's wall layers, each a subClassOf its tissue type ──
26+
fma:Myocardium_of_left_atrium bfo:part_of fma:Left_atrium .
27+
fma:Myocardium_of_left_atrium rdfs:subClassOf fma:Cardiac_muscle_tissue .
28+
fma:Endocardium_of_left_atrium bfo:part_of fma:Left_atrium .
29+
fma:Endocardium_of_left_atrium rdfs:subClassOf fma:Endothelium .
30+
fma:Epicardium_of_left_atrium bfo:part_of fma:Left_atrium .
31+
fma:Epicardium_of_left_atrium rdfs:subClassOf fma:Mesothelium .
32+
33+
fma:Myocardium_of_right_atrium bfo:part_of fma:Right_atrium .
34+
fma:Myocardium_of_right_atrium rdfs:subClassOf fma:Cardiac_muscle_tissue .
35+
fma:Endocardium_of_right_atrium bfo:part_of fma:Right_atrium .
36+
fma:Endocardium_of_right_atrium rdfs:subClassOf fma:Endothelium .
37+
fma:Epicardium_of_right_atrium bfo:part_of fma:Right_atrium .
38+
fma:Epicardium_of_right_atrium rdfs:subClassOf fma:Mesothelium .
39+
40+
fma:Myocardium_of_left_ventricle bfo:part_of fma:Left_ventricle .
41+
fma:Myocardium_of_left_ventricle rdfs:subClassOf fma:Cardiac_muscle_tissue .
42+
fma:Endocardium_of_left_ventricle bfo:part_of fma:Left_ventricle .
43+
fma:Endocardium_of_left_ventricle rdfs:subClassOf fma:Endothelium .
44+
fma:Epicardium_of_left_ventricle bfo:part_of fma:Left_ventricle .
45+
fma:Epicardium_of_left_ventricle rdfs:subClassOf fma:Mesothelium .
46+
47+
fma:Myocardium_of_right_ventricle bfo:part_of fma:Right_ventricle .
48+
fma:Myocardium_of_right_ventricle rdfs:subClassOf fma:Cardiac_muscle_tissue .
49+
fma:Endocardium_of_right_ventricle bfo:part_of fma:Right_ventricle .
50+
fma:Endocardium_of_right_ventricle rdfs:subClassOf fma:Endothelium .
51+
fma:Epicardium_of_right_ventricle bfo:part_of fma:Right_ventricle .
52+
fma:Epicardium_of_right_ventricle rdfs:subClassOf fma:Mesothelium .
53+

crates/osint-bake/src/bin/fma.rs

Lines changed: 156 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
//! FMA anatomy slice — the "real test" of the dual-membership lattice.
22
//!
3-
//! Stands up a small Foundational-Model-of-Anatomy-shaped slice of the **heart**
4-
//! (~120 nodes: organ → chambers → walls → tissues → cells) and proves that one
5-
//! node resolves to BOTH addresses at once:
3+
//! **Hydrated from an FMA `.ttl` fixture** (`data/fma-heart.fixture.ttl`) via
4+
//! [`hydrate_fma`] — no longer hand-built. Stands up a Foundational-Model-of-
5+
//! Anatomy-shaped slice of the **heart** (organ → chambers → wall layers) and
6+
//! proves that one node resolves to BOTH addresses at once:
67
//!
7-
//! * **part-of position** (basin-local): HEEL=organ, HIP=chamber, TWIG=wall,
8-
//! LEAF=structure, family=chamber-basin — where the node *is* in the body.
8+
//! * **part-of position** (basin-local): HEEL=[Organ:Heart], HIP=[Chamber:id],
9+
//! TWIG=[Wall:id] — where the node *is* in the body, read straight off the key
10+
//! (the partonomy walk fills the cascade; deeper tiers stay 0 until the real
11+
//! 75K FMA hydrates tissues/cells through the same walk).
912
//! * **leaf-limited global type** (the CEILING pole, HEEL=HIP=TWIG=0xFFFF,
1013
//! LEAF=type): "cardiac muscle tissue", "endothelium" — cross-cutting types
1114
//! that appear in *every* chamber. The deepest sentinel run (through TWIG)
@@ -41,6 +44,7 @@
4144
//! Run from the workspace root: `cargo run -p osint-bake --bin fma`
4245
4346
use lance_graph_contract::canonical_node::NodeGuid;
47+
use osint_bake::fma_ttl;
4448
use std::path::{Path, PathBuf};
4549

4650
/// The CEILING global-category pole (HEEL=HIP=0xFFFF; sentinel through TWIG = leaf-grain).
@@ -95,7 +99,10 @@ struct Builder {
9599

96100
impl Builder {
97101
fn new() -> Self {
98-
Self { nodes: Vec::new(), edges: Vec::new() }
102+
Self {
103+
nodes: Vec::new(),
104+
edges: Vec::new(),
105+
}
99106
}
100107

101108
/// A part-of node addressed by its `[kind-mixin : instance]` HHTL cascade.
@@ -116,14 +123,26 @@ impl Builder {
116123
let i = self.nodes.len();
117124
let key = NodeGuid::new_v2(
118125
CLASSID_FMA,
119-
tier(MX_ORGAN, ID_HEART), // HEEL [Organ:Heart]
120-
if chamber > 0 { tier(MX_CHAMBER, chamber) } else { 0 }, // HIP [Chamber:id]
121-
if wall > 0 { tier(MX_WALL, wall) } else { 0 }, // TWIG [Wall:id]
122-
if tissue > 0 { tier(MX_TISSUE, tissue) } else { 0 }, // LEAF [Tissue:id]
123-
if cell > 0 { tier(MX_CELL, cell) } else { 0 }, // family[Cell:id]
124-
i as u16, // identity — stable node id
126+
tier(MX_ORGAN, ID_HEART), // HEEL [Organ:Heart]
127+
if chamber > 0 {
128+
tier(MX_CHAMBER, chamber)
129+
} else {
130+
0
131+
}, // HIP [Chamber:id]
132+
if wall > 0 { tier(MX_WALL, wall) } else { 0 }, // TWIG [Wall:id]
133+
if tissue > 0 {
134+
tier(MX_TISSUE, tissue)
135+
} else {
136+
0
137+
}, // LEAF [Tissue:id]
138+
if cell > 0 { tier(MX_CELL, cell) } else { 0 }, // family[Cell:id]
139+
i as u16, // identity — stable node id
125140
);
126-
self.nodes.push(Node { label: label.to_string(), class, key });
141+
self.nodes.push(Node {
142+
label: label.to_string(),
143+
class,
144+
key,
145+
});
127146
i
128147
}
129148

@@ -132,14 +151,18 @@ impl Builder {
132151
let i = self.nodes.len();
133152
let key = NodeGuid::new_v2(
134153
CLASSID_FMA,
135-
CEILING, // HEEL sentinel
136-
CEILING, // HIP sentinel
137-
CEILING, // TWIG sentinel → leaf-grain ("limited to the leaf")
154+
CEILING, // HEEL sentinel
155+
CEILING, // HIP sentinel
156+
CEILING, // TWIG sentinel → leaf-grain ("limited to the leaf")
138157
type_idx, // LEAF — the sole discriminator
139158
0, // family — global, no basin
140159
i as u16,
141160
);
142-
self.nodes.push(Node { label: label.to_string(), class: C_TYPE, key });
161+
self.nodes.push(Node {
162+
label: label.to_string(),
163+
class: C_TYPE,
164+
key,
165+
});
143166
i
144167
}
145168

@@ -148,69 +171,120 @@ impl Builder {
148171
}
149172
}
150173

151-
fn build_heart() -> Builder {
152-
let mut b = Builder::new();
174+
/// Embedded FMA heart fixture — real class names + the canonical FMA predicate
175+
/// set. The production path hydrates the 266 MB `fma.owl` through
176+
/// `lance-graph-rdf` at the spine; this light bake hydrates the fixture so
177+
/// `/fma` renders without the lance/datafusion closure. See
178+
/// `data/fma-heart.fixture.ttl`.
179+
const FMA_TTL: &str = include_str!("../../data/fma-heart.fixture.ttl");
180+
181+
/// Hydrate an FMA `.ttl` fragment into the bake's [`Builder`] — the light-bake
182+
/// twin of `lance_graph_ontology::hydrate_fma`. Walk the `bfo:part_of` partonomy
183+
/// into the canonical HHTL cascade (each node's sibling-rank at each depth → the
184+
/// 8:8 `[mixin:instance]` tier), and project each `rdfs:subClassOf` onto the
185+
/// cross-cutting global-type ceiling. Depth (organ→chamber→wall→…) is the
186+
/// distance from the partonomy root; nothing is hardcoded to "heart", so the
187+
/// real 75K FMA hydrates through the exact same walk.
188+
fn hydrate_fma(ttl: &str) -> Builder {
189+
use std::collections::{BTreeMap, BTreeSet, VecDeque};
190+
let frag = fma_ttl::parse(ttl);
153191

154-
// ── cross-cutting global TYPE categories (leaf-limited, ceiling pole) ──
155-
// Each is the is-a target for the matching tissue in EVERY chamber.
156-
let types = [
157-
"Cardiac muscle tissue",
158-
"Fibrous tissue",
159-
"Endothelium",
160-
"Elastic tissue",
161-
"Mesothelium",
162-
"Adipose tissue",
163-
];
164-
let type_idx: Vec<usize> = types
192+
// child → parent (part_of); parent → IRI-sorted children (stable sibling
193+
// ranks ⇒ a reproducible, byte-deterministic asset).
194+
let parent_of: BTreeMap<&str, &str> = frag
195+
.part_of
165196
.iter()
166-
.enumerate()
167-
.map(|(k, t)| b.type_node(t, k as u16))
197+
.map(|(c, p)| (c.as_str(), p.as_str()))
168198
.collect();
199+
let mut children: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
200+
let mut in_tree: BTreeSet<&str> = BTreeSet::new();
201+
for (c, p) in &frag.part_of {
202+
children.entry(p.as_str()).or_default().push(c.as_str());
203+
in_tree.insert(c.as_str());
204+
in_tree.insert(p.as_str());
205+
}
206+
for v in children.values_mut() {
207+
v.sort_unstable();
208+
}
209+
// 1-based sibling rank under the parent — the tier identity byte (0 = root).
210+
let rank_of = |node: &str| -> u8 {
211+
parent_of
212+
.get(node)
213+
.and_then(|p| children[p].iter().position(|&c| c == node))
214+
.map_or(0, |k| (k as u8) + 1)
215+
};
216+
// (depth, [chamber, wall, tissue, cell]) — sibling ranks along the ancestor
217+
// chain, root-first; depth 0 = the partonomy root (the organ).
218+
let path_of = |node: &str| -> (u8, [u8; 4]) {
219+
let mut chain: Vec<&str> = Vec::new();
220+
let mut cur = node;
221+
while let Some(&p) = parent_of.get(cur) {
222+
chain.push(cur);
223+
cur = p;
224+
}
225+
chain.reverse();
226+
let mut ids = [0u8; 4];
227+
for (k, &n) in chain.iter().enumerate().take(4) {
228+
ids[k] = rank_of(n);
229+
}
230+
(chain.len() as u8, ids)
231+
};
232+
let class_for = |depth: u8| match depth {
233+
0 => C_ORGAN,
234+
1 => C_CHAMBER,
235+
2 => C_WALL,
236+
3 => C_TISSUE,
237+
_ => C_CELL,
238+
};
169239

170-
// each wall carries two tissues, each is-a one of the global types above.
171-
// (wall label, [(tissue label, type index)])
172-
let walls: [(&str, [(&str, usize); 2]); 3] = [
173-
("myocardium", [("muscle layer", 0), ("fibrous skeleton", 1)]),
174-
("endocardium", [("endothelial lining", 2), ("elastic layer", 3)]),
175-
("epicardium", [("mesothelial layer", 4), ("subepicardial fat", 5)]),
176-
];
177-
// a couple of cell types per tissue (depth + scale; part-of only).
178-
let cells: [&str; 2] = ["cell A", "cell B"];
179-
180-
// ── the heart organ — HEEL=[Organ:Heart], deeper tiers zero ──
181-
let heart = b.part_of_node("Heart", C_ORGAN, 0, 0, 0, 0);
182-
183-
let chambers = ["left atrium", "right atrium", "left ventricle", "right ventricle"];
184-
for (ci, chamber) in chambers.iter().enumerate() {
185-
let cid = (ci as u8) + 1; // chamber instance 1..4 (HIP identity)
186-
let ch = b.part_of_node(chamber, C_CHAMBER, cid, 0, 0, 0);
187-
b.edge(ch, heart, REL_PART_OF);
240+
let mut b = Builder::new();
241+
let mut idx: BTreeMap<&str, usize> = BTreeMap::new();
188242

189-
for (wi, (wall, tissues)) in walls.iter().enumerate() {
190-
let wid = (wi as u8) + 1; // wall instance 1..3 (TWIG identity)
191-
let w = b.part_of_node(&format!("{chamber} {wall}"), C_WALL, cid, wid, 0, 0);
192-
b.edge(w, ch, REL_PART_OF);
243+
// BFS from the root(s) so every parent is built before its children (the
244+
// edge list references node indices).
245+
let mut queue: VecDeque<&str> = in_tree
246+
.iter()
247+
.copied()
248+
.filter(|n| !parent_of.contains_key(n))
249+
.collect();
250+
while let Some(n) = queue.pop_front() {
251+
if idx.contains_key(n) {
252+
continue;
253+
}
254+
let (depth, ids) = path_of(n);
255+
let node = b.part_of_node(
256+
&fma_ttl::label_of(n),
257+
class_for(depth),
258+
ids[0],
259+
ids[1],
260+
ids[2],
261+
ids[3],
262+
);
263+
idx.insert(n, node);
264+
if let Some(cs) = children.get(n) {
265+
queue.extend(cs.iter().copied());
266+
}
267+
}
193268

194-
for (ti, (tissue, gtype)) in tissues.iter().enumerate() {
195-
let tid = (ti as u8) + 1; // tissue instance 1..2 (LEAF identity)
196-
let t = b.part_of_node(&format!("{chamber} {tissue}"), C_TISSUE, cid, wid, tid, 0);
197-
b.edge(t, w, REL_PART_OF);
198-
// THE dual membership: this tissue is-a the cross-cutting global type.
199-
b.edge(t, type_idx[*gtype], REL_IS_A);
269+
// cross-cutting tissue-type ceiling nodes (subClassOf targets not in the tree).
270+
let mut type_idx: BTreeMap<&str, usize> = BTreeMap::new();
271+
for (_c, ty) in &frag.is_a {
272+
if idx.contains_key(ty.as_str()) || type_idx.contains_key(ty.as_str()) {
273+
continue;
274+
}
275+
let t = b.type_node(&fma_ttl::label_of(ty), type_idx.len() as u16);
276+
type_idx.insert(ty.as_str(), t);
277+
}
200278

201-
for (cell_i, cell) in cells.iter().enumerate() {
202-
let ceid = (cell_i as u8) + 1; // cell instance 1..2 (family identity)
203-
let c = b.part_of_node(
204-
&format!("{chamber} {tissue} {cell}"),
205-
C_CELL,
206-
cid,
207-
wid,
208-
tid,
209-
ceid,
210-
);
211-
b.edge(c, t, REL_PART_OF);
212-
}
213-
}
279+
// part_of edges (containment) + is-a edges (the dual membership).
280+
for (c, p) in &frag.part_of {
281+
if let (Some(&ci), Some(&pi)) = (idx.get(c.as_str()), idx.get(p.as_str())) {
282+
b.edge(ci, pi, REL_PART_OF);
283+
}
284+
}
285+
for (c, ty) in &frag.is_a {
286+
if let (Some(&ci), Some(&ti)) = (idx.get(c.as_str()), type_idx.get(ty.as_str())) {
287+
b.edge(ci, ti, REL_IS_A);
214288
}
215289
}
216290
b
@@ -244,15 +318,15 @@ fn emit_oso1(b: &Builder) -> Vec<u8> {
244318
}
245319

246320
fn main() {
247-
let b = build_heart();
321+
let b = hydrate_fma(FMA_TTL);
248322
let bytes = emit_oso1(&b);
249323

250-
// dual-membership proof: find a basin-local tissue and show BOTH addresses.
324+
// dual-membership proof: a hydrated wall layer carries BOTH addresses.
251325
let tissue = b
252326
.nodes
253327
.iter()
254-
.position(|x| x.label == "left ventricle muscle layer")
255-
.expect("LV muscle layer present");
328+
.position(|x| x.label == "Myocardium of left ventricle")
329+
.expect("LV myocardium hydrated from the fixture");
256330
let key = &b.nodes[tissue].key;
257331
println!("── FMA dual-membership proof ──");
258332
println!("node: {}", b.nodes[tissue].label);
@@ -283,7 +357,11 @@ fn main() {
283357
gk.leaf()
284358
);
285359
// cross-cutting: how many chambers' tissues share this one global type?
286-
let members = b.edges.iter().filter(|&&(_, t, rel)| t == gtype && rel == REL_IS_A).count();
360+
let members = b
361+
.edges
362+
.iter()
363+
.filter(|&&(_, t, rel)| t == gtype && rel == REL_IS_A)
364+
.count();
287365
println!(
288366
" '{}' is the is-a target of {members} tissues across the chambers (cross-cutting)",
289367
b.nodes[gtype].label

0 commit comments

Comments
 (0)