From bb76568e91e7f4e4899a3143729b58fba60a381b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 16:32:30 +0000 Subject: [PATCH 1/5] perturbation-sim: substrate calibration harness + calibrated SoA member spec Uses the deterministic study as ground truth to certify the SoA value tenants with the certification-officer battery (ICC / Spearman / Cronbach alpha), and emits the additive member design the substrate needs. examples/calibrate.rs encodes the study 5-factor contingency matrix through a linear palette member and a data-adaptive (turbovec/CAM-PQ) member across 2/4/6/8 bit, certifies per axis + construct structure (alpha-match, discriminant), prints the schema oracle. Findings (real ES core, members stored normalized): - ALL 5 factors certify by VALUE at 2-bit linear (ICC >= 0.96) - existing palette/turbovec tenants already suffice per value. - alpha preserved within 0.02 at >=4-bit; discriminant wobbles at N=24 under coarse bins => read cross-axis orthogonality at >=6-bit. - Self-correction: d_lambda2 initial ICC=0 was NOT heavy-tail nor near-constant - a tiny-magnitude (~1e-7) underflow of the ICC variance guard; storing the member normalized fixes it (1.00 at 2-bit). Two earlier guesses falsified and retracted. src/columns.rs: the calibrated SoaMemberSpec set (operator-authorized additive design). The 5 factors map to existing tenants (2-bit linear, normalized, read >=6-bit); the ONE genuinely additive member is inertia_buffer - the axis the resilience study measured orthogonal to topology, which no existing connectivity column carries. Spec only; nothing serializes or touches canonical_node. 3 tests. 71 lib tests; clippy -D warnings clean; fmt clean. --- crates/perturbation-sim/Cargo.toml | 4 + crates/perturbation-sim/examples/calibrate.rs | 318 ++++++++++++++++++ crates/perturbation-sim/src/columns.rs | 122 +++++++ crates/perturbation-sim/src/lib.rs | 2 + 4 files changed, 446 insertions(+) create mode 100644 crates/perturbation-sim/examples/calibrate.rs create mode 100644 crates/perturbation-sim/src/columns.rs diff --git a/crates/perturbation-sim/Cargo.toml b/crates/perturbation-sim/Cargo.toml index 6db2664f..71af626b 100644 --- a/crates/perturbation-sim/Cargo.toml +++ b/crates/perturbation-sim/Cargo.toml @@ -92,3 +92,7 @@ path = "examples/explore.rs" [[example]] name = "scorecard" path = "examples/scorecard.rs" + +[[example]] +name = "calibrate" +path = "examples/calibrate.rs" diff --git a/crates/perturbation-sim/examples/calibrate.rs b/crates/perturbation-sim/examples/calibrate.rs new file mode 100644 index 00000000..f31035e7 --- /dev/null +++ b/crates/perturbation-sim/examples/calibrate.rs @@ -0,0 +1,318 @@ +//! Substrate calibration: does the study's statistical structure survive the SoA +//! tenants' value quantization? — and what member width each axis requires. +//! +//! The idea (operator, 2026-06-16): use this crate's deterministic study as the +//! GROUND TRUTH, encode its factor matrix through the SoA value tenants, and +//! certify with the same reliability battery the `certification-officer` uses +//! (Pearson / Spearman / ICC / Cronbach α). The study becomes the regression +//! reference the substrate must reproduce. +//! +//! What is actually lossy in the value tenants is the **per-value quantization**: +//! helix `Signed360`/`ResidueEdge` quantizes through a 256-palette `RollingFloor` +//! (≈8-bit), `lance-graph-turbovec` packs 2–4 bit/dim, CAM-PQ 8-bit codes. The +//! addressing machinery (golden azimuth, curve place) is exact; the magnitude +//! fidelity the statistics care about is set by the **bit budget**. So we sweep +//! the budget and read off, per axis, the minimum width that certifies — i.e. the +//! required SoA member property. (We test the shared quantization principle with a +//! generic min-max B-bit quantizer; we do NOT run helix's exact encoder here — +//! the budgets are mapped, not the curve placement.) +//! +//! Honest reads of each statistic for substrate comparison. **ICC(2,1)** — +//! absolute value agreement source↔encoded (value-carrying tenants). **Spearman** +//! — rank preservation (search/retrieval tenants, e.g. turbovec ANN). **Pearson** +//! — linear-readout fidelity. **Cronbach α** — REPRODUCE the source α (NOT +//! maximize it): the study's α is low/negative BY DESIGN (distinct facets); a +//! tenant that "improves" α is corrupting the construct, so the target is +//! `|α_enc − α_src| ≈ 0`. Significance at the Jirak `n^(p/2−1)` rate (weak +//! dependence), not IID. +//! +//! Run: cargo run --release --manifest-path crates/perturbation-sim/Cargo.toml \ +//! --example calibrate -- /tmp/pypsa/buses.csv /tmp/pypsa/lines.csv ES + +use perturbation_sim::{ + contingency_features, cronbach_alpha, dc_flows, icc_a1, spearman, symmetric_eigen, zscore, + CascadeConfig, Edge, Grid, +}; + +struct Rng(u64); +impl Rng { + fn f(&mut self) -> f64 { + self.0 = self.0.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = self.0; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + ((z ^ (z >> 31)) >> 11) as f64 / (1u64 << 53) as f64 + } +} + +fn synthetic(rows: usize, cols: usize) -> Grid { + let id = |r: usize, c: usize| r * cols + c; + let mut e = Vec::new(); + for r in 0..rows { + for c in 0..cols { + if c + 1 < cols { + e.push(Edge::new(id(r, c), id(r, c + 1), 1.0, 1.0)); + } + if r + 1 < rows { + e.push(Edge::new(id(r, c), id(r + 1, c), 1.0, 1.0)); + } + } + } + Grid::new(rows * cols, e) +} + +/// Generic min-max **linear** B-bit quantizer → bin-center reconstruction. The +/// shared value-loss step of a LINEAR palette member at budget `bits`. Wastes bins +/// on a heavy tail (collapses the bulk into bin 0). +fn quantize_bits(col: &[f64], bits: u32) -> Vec { + let lo = col.iter().cloned().fold(f64::INFINITY, f64::min); + let hi = col.iter().cloned().fold(f64::NEG_INFINITY, f64::max); + let span = hi - lo; + if span < 1e-300 { + return col.to_vec(); + } + let levels = (1u32 << bits) as f64; // 2^bits bins + col.iter() + .map(|&x| { + let u = ((x - lo) / span * (levels - 1.0)).round(); + lo + (u / (levels - 1.0)) * span // bin-center reconstruction + }) + .collect() +} + +/// **Data-adaptive** B-bit quantizer: equal-population (percentile) bins, each +/// reconstructed to its members' mean. This is what the learned tenants +/// (turbovec / CAM-PQ codebooks) actually do — resolution follows the data, so a +/// heavy tail does not starve the bulk. Contrast `quantize_bits` (linear). +fn quantize_rank_bits(col: &[f64], bits: u32) -> Vec { + let n = col.len(); + let bins = (1usize << bits).min(n.max(1)); + let mut idx: Vec = (0..n).collect(); + idx.sort_by(|&a, &b| col[a].partial_cmp(&col[b]).unwrap()); + let mut out = vec![0.0; n]; + for b in 0..bins { + let s = b * n / bins; + let e = ((b + 1) * n / bins).max(s + 1).min(n); + let mean = idx[s..e].iter().map(|&i| col[i]).sum::() / (e - s) as f64; + for &i in &idx[s..e] { + out[i] = mean; + } + } + out +} + +fn main() { + let args: Vec = std::env::args().collect(); + let grid = if args.len() >= 3 { + let buses = std::fs::read_to_string(&args[1]).expect("buses.csv"); + let lines = std::fs::read_to_string(&args[2]).expect("lines.csv"); + let cc = args.get(3).map(|s| s.as_str()).unwrap_or("ES"); + let imp = perturbation_sim::from_pypsa_csv(&buses, &lines, Some(cc)) + .expect("import") + .largest_component(); + println!( + "grid: {cc} PyPSA core — {} buses, {} lines", + imp.grid.n, + imp.grid.edges.len() + ); + imp.grid + } else { + let g = synthetic(10, 10); + println!("grid: synthetic 10×10 — {} buses", g.n); + g + }; + let n = grid.n; + let alive = vec![true; grid.edges.len()]; + + // Ground truth = the study's 5-factor contingency matrix on the real core. + let base = symmetric_eigen(&grid.laplacian_of(&alive), n); + let v2 = base.eigenvector(1); + let m = grid.edges.len(); + let mut sens: Vec<(usize, f64)> = (0..m) + .map(|e| { + let d = v2[grid.edges[e].from] - v2[grid.edges[e].to]; + (e, d * d * grid.edges[e].susceptance) + }) + .collect(); + sens.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + let k = 24.min(m); + let step = (m / k).max(1); + let cand: Vec = (0..k).map(|i| sens[(i * step).min(m - 1)].0).collect(); + + let mut rng = Rng(0xCA11B); + let raw: Vec = (0..n).map(|_| rng.f()).collect(); + let mean = raw.iter().sum::() / n as f64; + let p: Vec = raw.iter().map(|x| x - mean).collect(); + let flows = dc_flows(&grid, &alive, &base.pseudo_apply(&p, 1e-9)); + let mut g0 = grid.clone(); + for (e, edge) in g0.edges.iter_mut().enumerate() { + edge.limit = (1.1 * flows[e].abs()).max(1e-6); + } + let cfg = CascadeConfig { + max_rounds: 10, + ..CascadeConfig::default() + }; + + // 5 factor columns (the study's mediators). + let names = [ + "d_lambda2(Weyl)", + "dk_rotation", + "d_conductance", + "infight", + "raumgewinn", + ]; + let mut cols: Vec> = (0..5).map(|_| Vec::with_capacity(k)).collect(); + for &e in &cand { + let f = contingency_features(&g0, &p, e, cfg); + cols[0].push(f.d_lambda2); + cols[1].push(f.dk_rotation); + cols[2].push(f.d_conductance); + cols[3].push(f.infight); + cols[4].push(f.raumgewinn); + } + + // Members store NORMALIZED values: a SoA palette/residue is over a fixed range + // (not raw physical units), so min-max each factor to [0,1] before calibrating. + // This is also correct hygiene — it lifts a tiny-magnitude column (d_lambda2 is + // ~1e-7) out of ICC's variance-underflow guard (`denom < 1e-12` → spurious 0), + // the same class of artifact as a raw-scale Pearson guard. Rank/structure are + // monotone-invariant, so α and the discriminant are unchanged by this. + for c in cols.iter_mut() { + let lo = c.iter().cloned().fold(f64::INFINITY, f64::min); + let hi = c.iter().cloned().fold(f64::NEG_INFINITY, f64::max); + let span = hi - lo; + if span > 1e-300 { + for x in c.iter_mut() { + *x = (*x - lo) / span; + } + } + } + + // Source reference structure: scale α over the z-scored factors + the + // discriminant Spearman(raumgewinn, infight). + let z_cols: Vec> = cols.iter().map(|c| zscore(c)).collect(); + let alpha_src = cronbach_alpha(&z_cols); + let disc_src = spearman(&cols[4], &cols[3]); + println!("\n N = {k} contingencies (study factor matrix = ground truth)"); + println!( + " source scale: Cronbach α = {alpha_src:+.3} · discriminant Spearman(raum,infight) = {disc_src:+.3}\n" + ); + + // Coefficient of variation per factor — a near-constant column has no + // between-subject variance, so ICC(2,1) is DEGENERATE (≈0) regardless of bit + // budget. We must not read that as "needs more bits"; we flag it and certify + // such a column by rank (Spearman) instead, with a re-sample caveat. + let budgets = [ + (2u32, "turbovec 2-bit"), + (4, "turbovec 4-bit"), + (6, "palette 6-bit"), + (8, "Signed360/CAM-PQ 8-bit"), + ]; + let icc_thresh = 0.95; + + // 1. LINEAR palette member (min-max). The diagnostic table — watch a + // heavy-tailed axis collapse (ICC stuck near 0 even at 8 bit). + println!("== Linear palette member — ICC / Spearman vs bit budget =="); + println!( + " budget |{}", + names + .iter() + .map(|s| format!("{s:>16}")) + .collect::>() + .join("") + ); + for (bits, label) in budgets { + let mut cells = String::new(); + for c in &cols { + let q = quantize_bits(c, bits); + cells.push_str(&format!( + " {:>5.2}/{:>4.2} ", + icc_a1(&[c.clone(), q.clone()]), + spearman(c, &q) + )); + } + println!(" {label:<23} |{cells}"); + } + + // 2. DATA-ADAPTIVE member (equal-population/percentile bins) — the learned + // tenants (turbovec / CAM-PQ codebooks). Resolution follows the data, so the + // heavy-tailed axis recovers. + println!("\n== Data-adaptive member (rank/percentile bins, = turbovec/CAM-PQ) — ICC =="); + println!( + " budget |{}", + names + .iter() + .map(|s| format!("{s:>16}")) + .collect::>() + .join("") + ); + for (bits, label) in budgets { + let mut cells = String::new(); + for c in &cols { + let q = quantize_rank_bits(c, bits); + cells.push_str(&format!( + " {:>5.2}/{:>4.2} ", + icc_a1(&[c.clone(), q.clone()]), + spearman(c, &q) + )); + } + println!(" {label:<23} |{cells}"); + } + + // Scale-structure preservation per budget (α-match + discriminant), linear. + println!("\n== Scale-structure preservation (α must MATCH source, not maximize; linear) =="); + for (bits, label) in budgets { + let enc: Vec> = cols.iter().map(|c| quantize_bits(c, bits)).collect(); + let z_enc: Vec> = enc.iter().map(|c| zscore(c)).collect(); + let a = cronbach_alpha(&z_enc); + let d = spearman(&enc[4], &enc[3]); + println!( + " {label:<24} α = {a:+.3} (Δ {:+.3}) discriminant ρ = {d:+.3} (Δ {:+.3})", + a - alpha_src, + d - disc_src + ); + } + + // Schema oracle: cheapest certifying member per axis — try LINEAR 2→8, then + // ADAPTIVE 2→8. The encoding + width that first clears ICC ≥ thresh IS the + // required additive SoA member property for that axis. + let certify = |c: &[f64]| -> Option<(&'static str, u32)> { + for &b in &[2u32, 4, 6, 8] { + if icc_a1(&[c.to_vec(), quantize_bits(c, b)]) >= icc_thresh { + return Some(("linear-palette", b)); + } + } + for &b in &[2u32, 4, 6, 8] { + if icc_a1(&[c.to_vec(), quantize_rank_bits(c, b)]) >= icc_thresh { + return Some(("data-adaptive(turbovec/CAM-PQ)", b)); + } + } + None + }; + println!( + "\n== Schema oracle: the additive SoA member each axis requires (ICC ≥ {icc_thresh}) ==" + ); + for (fi, name) in names.iter().enumerate() { + match certify(&cols[fi]) { + Some((enc, b)) => println!(" {name:<18} → {b}-bit {enc}"), + None => println!(" {name:<18} → no ≤8-bit member certifies (dedicated f-member)"), + } + } + println!( + "\n Findings → the additive member design:\n \ + • ALL 5 study factors certify by VALUE at just 2-bit LINEAR (ICC ≥ 0.96) once stored\n \ + NORMALIZED — the existing palette/turbovec tenants already suffice for per-axis\n \ + value fidelity. §10 (\"the statistics survive the encoding\") confirmed strongly.\n \ + • α (construct internal consistency) is preserved within Δ ≤ 0.02 at ≥4-bit (exact\n \ + at 6-8); the discriminant ρ wobbles ±0.15 at N=24 under coarse bins, so to read\n \ + the cross-axis orthogonality crisply use ≥6-bit and/or more contingencies.\n \ + • CORRECTION (this run falsified two earlier guesses): d_lambda2's ICC=0 was NOT\n \ + heavy-tail nor near-constant — it was a tiny-magnitude (~1e-7) underflow of the\n \ + ICC variance guard; storing the member normalized fixes it (now 1.00 at 2-bit).\n \ + So the value substrate WORKS AS-IS (2-bit normalized palette per factor). The one\n \ + genuinely additive column the studies demand is the resilience study's INERTIA/buffer\n \ + member — the axis measured ORTHOGONAL to topology (Spearman≈0), which no existing\n \ + connectivity column can carry. Ground truth = this deterministic study\n \ + (regression-lockable); Jirak-rate significance; helix curve-placement not run here." + ); +} diff --git a/crates/perturbation-sim/src/columns.rs b/crates/perturbation-sim/src/columns.rs new file mode 100644 index 00000000..2f5caecf --- /dev/null +++ b/crates/perturbation-sim/src/columns.rs @@ -0,0 +1,122 @@ +//! Calibrated SoA member specs — what the substrate must carry to reproduce the +//! study, derived from the `calibrate` + `resilience` examples (operator-authorized +//! additive design, 2026-06-16). +//! +//! This is a **spec**, not a runtime encoder: each [`SoaMemberSpec`] records the +//! member width + encoding + normalization that the calibration certified for one +//! study axis (ICC/Spearman/Cronbach against the deterministic study as ground +//! truth). It is the bridge artifact a contract-side change would consume; nothing +//! here serializes or touches the operator-locked `canonical_node` spine. +//! +//! Two findings shape it: +//! 1. **The existing value tenants suffice.** All five contingency factors certify +//! by value at **2-bit linear, stored normalized** (ICC ≥ 0.96) — a 2-bit +//! turbovec/palette slot per factor preserves the study's per-axis values. The +//! cross-axis structure (α / discriminant) wants ≥6-bit, so the *read budget* +//! where orthogonality is judged is wider than the *store budget* per value. +//! 2. **One genuinely additive column.** The resilience study measured the +//! inertia/buffer axis ORTHOGONAL to topology (`Spearman(λ₂, buffer) ≈ 0`), so +//! no existing connectivity member can carry it — it requires its own member +//! ([`INERTIA`], flagged [`additive`](SoaMemberSpec::additive)). + +/// How a member quantizes its normalized value. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Encoding { + /// Min-max linear bins (palette / Signed360 rim) — sufficient when the + /// normalized distribution is not pathologically skewed. + Linear, + /// Equal-population / codebook bins (turbovec, CAM-PQ) — resolution follows the + /// data; preferred when the raw distribution is heavy-tailed. + DataAdaptive, +} + +/// One calibrated SoA member: the width + encoding the study certifies for an axis. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SoaMemberSpec { + /// Axis name (the study factor it carries). + pub name: &'static str, + /// Store width in bits (per-value fidelity certified at this budget). + pub store_bits: u32, + /// Read budget where the cross-axis structure (α / discriminant) is certified. + pub read_bits: u32, + /// Member encoding. + pub encoding: Encoding, + /// Members store normalized `[0,1]` values, not raw physical units (also lifts + /// tiny-magnitude axes out of the ICC variance-underflow guard). + pub normalized: bool, + /// `true` if this member does NOT exist in the current substrate and must be + /// added (it is statistically orthogonal to every existing column). + pub additive: bool, +} + +/// The five contingency factors — all certify at 2-bit linear, normalized; read at +/// ≥6-bit to keep the orthogonality crisp. These map onto EXISTING value tenants. +pub const CONTINGENCY_FACTORS: [SoaMemberSpec; 5] = [ + spec("d_lambda2", false), + spec("dk_rotation", false), + spec("d_conductance", false), + spec("infight", false), + spec("raumgewinn", false), +]; + +/// The one genuinely additive member: the inertia/buffer axis (resilience study), +/// orthogonal to topology — no existing connectivity column carries it. +pub const INERTIA: SoaMemberSpec = SoaMemberSpec { + name: "inertia_buffer", + store_bits: 2, + read_bits: 6, + encoding: Encoding::Linear, + normalized: true, + additive: true, +}; + +const fn spec(name: &'static str, additive: bool) -> SoaMemberSpec { + SoaMemberSpec { + name, + store_bits: 2, + read_bits: 6, + encoding: Encoding::Linear, + normalized: true, + additive, + } +} + +/// The full calibrated member set the substrate needs to reproduce the study: the +/// five existing-tenant factors + the one additive inertia member. +pub fn study_member_specs() -> Vec { + let mut v = CONTINGENCY_FACTORS.to_vec(); + v.push(INERTIA); + v +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn value_members_certify_at_two_bits_normalized() { + // The calibration finding: 2-bit linear normalized suffices per value. + for s in CONTINGENCY_FACTORS { + assert_eq!(s.store_bits, 2, "{} store width", s.name); + assert!(s.normalized, "{} must be normalized", s.name); + assert!(!s.additive, "{} maps to an existing tenant", s.name); + } + } + + #[test] + fn structure_read_budget_exceeds_store_budget() { + // Cross-axis orthogonality wants more bits than per-value fidelity. + for s in study_member_specs() { + assert!(s.read_bits >= s.store_bits, "{} read ≥ store", s.name); + assert!(s.read_bits >= 6, "{} structure read ≥ 6-bit", s.name); + } + } + + #[test] + fn inertia_is_the_one_additive_member() { + let specs = study_member_specs(); + let additive: Vec<_> = specs.iter().filter(|s| s.additive).collect(); + assert_eq!(additive.len(), 1, "exactly one new member required"); + assert_eq!(additive[0].name, "inertia_buffer"); + } +} diff --git a/crates/perturbation-sim/src/lib.rs b/crates/perturbation-sim/src/lib.rs index 783f8d3a..23f4243b 100644 --- a/crates/perturbation-sim/src/lib.rs +++ b/crates/perturbation-sim/src/lib.rs @@ -53,6 +53,7 @@ pub mod acflow; pub mod basin; pub mod buffer; pub mod cascade; +pub mod columns; pub mod eigen; pub mod flow; pub mod graph; @@ -73,6 +74,7 @@ pub use basin::{ }; pub use buffer::{compartment_buffer, impulse_buffer, ketchup_yield, Yield}; pub use cascade::{simulate_outage, CascadeConfig, CascadeResult, PerturbationShape}; +pub use columns::{study_member_specs, Encoding, SoaMemberSpec, INERTIA}; pub use eigen::{symmetric_eigen, Eigen}; pub use flow::{dc_flows, lodf}; pub use graph::{Edge, Grid}; From 3373d3ed087ece1c29cc3c42fdf06954c1283468 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 16:38:08 +0000 Subject: [PATCH 2/5] perturbation-sim: HHTL-OGAR orthogonality correction + CLAM/CHAODA framing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. columns.rs correction (operator): orthogonality to topology is STRUCTURAL, not earned. In the HHTL-OGAR GUID model topology lives in the KEY (HEEL/HIP/TWIG cascade tiers); magnitude axes are helix value members hung off it, so any helix value member is orthogonal to topology by the key/value split. The resilience study's Spearman(λ₂,buffer)≈0 CONFIRMS what the GUID addressing already enforces; it does not establish a new axis. inertia_buffer is reframed as one more helix-residue value slot (Encoding::HelixResidue added), not a novel orthogonal column. Field/doc semantics of `additive` updated accordingly. 2. CLAM_CHAODA_FRAMING.md: the resilience study framed as CLAM (cluster tree) + CHAODA (anomaly ensemble) on the electrical-distance manifold — grounded against real ndarray::hpc::clam APIs: Cluster{radius,cardinality,lfd} (clam.rs:106), Lfd (clam.rs:81), ClamTree::anomaly_scores (clam.rs:1517). The compartment hierarchy = CLAM tree; the fail-first exposure ranking = CHAODA anomaly score; the three orthogonal axes = CHAODA's diverse-detector ensemble, and the study's low Cronbach α IS CHAODA's non-redundancy design goal. Honest scope: structural correspondence onto real APIs [G]; perturbation-sim NOT wired to clam [H]; the falsifiable bridge (build ClamTree over the factor rows, correlate anomaly_scores vs exposure) named as the [H]→[G] probe. 71 lib tests; clippy -D warnings clean; fmt clean. --- .../perturbation-sim/CLAM_CHAODA_FRAMING.md | 75 +++++++++++++++++++ crates/perturbation-sim/src/columns.rs | 39 +++++++--- 2 files changed, 104 insertions(+), 10 deletions(-) create mode 100644 crates/perturbation-sim/CLAM_CHAODA_FRAMING.md diff --git a/crates/perturbation-sim/CLAM_CHAODA_FRAMING.md b/crates/perturbation-sim/CLAM_CHAODA_FRAMING.md new file mode 100644 index 00000000..8b47b04b --- /dev/null +++ b/crates/perturbation-sim/CLAM_CHAODA_FRAMING.md @@ -0,0 +1,75 @@ +# Framing the resilience study as CLAM (hierarchy) + CHAODA (anomaly ensemble) + +*The resilience study is not a bespoke method — it is a CLAM cluster tree with a +CHAODA-style anomaly ensemble, on the electrical-distance manifold. Both already +exist in `ndarray::hpc::clam` (cited below); this doc maps the correspondence and +states honestly what is grounded vs what is a conceptual mapping not yet wired.* + +> Operator prompt (2026-06-16): "you could even try to frame it — CLAM +> (resilience) / CHAODA". Companion to `PAPER.md`, `COUNTRY_STUDY.md`, +> `src/columns.rs`. + +## The correspondence + +| resilience-study object | CLAM / CHAODA construct | ndarray reference | +|---|---|---| +| recursive Cheeger/HHTL bisection into basins | **CLAM cluster tree** (`ClamTree::build`) | `hpc/clam.rs` `ClamTree` | +| a basin (compartment) | a **`Cluster`** | `hpc/clam.rs:106` `Cluster { radius, cardinality, lfd }` | +| basin algebraic connectivity λ₂ / mean R | cluster **radius** / spread | `Cluster::radius` | +| basin node count | cluster **cardinality** | `Cluster::cardinality` | +| how fragmented/space-filling a basin is | **local fractal dimension** `Lfd` | `hpc/clam.rs:81` `Lfd::compute(count_r, count_half_r)` | +| fail-first / exposure ranking | **CHAODA anomaly score** | `hpc/clam.rs:1517` `ClamTree::anomaly_scores() -> Vec` | +| "this compartment can't wait" flag | CHAODA **flag threshold** (≥ 0.75) | `hpc/clam.rs` anomaly-flag test | + +So the study's machinery is CLAM's machinery on a different metric: instead of a +Hamming/embedding distance, the manifold is the **electrical distance** (effective +resistance `R_ij = (e_i−e_j)ᵀ L⁺ (e_i−e_j)`, the self-inverse `L⁺` reference). The +HHTL tiers ARE the CLAM tree depth; the weakest compartment IS the cluster CHAODA +would score as the outlier. + +## Why the three axes ARE a CHAODA ensemble (the load-bearing match) + +CHAODA's thesis: **no single graph-anomaly method wins; ensemble several *diverse* +detectors** (relative cardinality, parent/child cardinality ratio, graph +neighbourhood, stationary distribution, …) and the gain comes from their +*non-redundancy*. The resilience study's three axes are exactly such an ensemble: + +- **topology** (λ₂ / Kirchhoff) — the connectivity detector, +- **buffer** (inertia storage) — the transient detector, +- **policy** (feed-in / dispatch) — the operational detector, + +ensembled into the **exposure** score. And the study's measured **low / negative +Cronbach α** (the axes are distinct facets, `Spearman ≈ 0` between them) is not a +defect — it is *precisely CHAODA's design goal*: low inter-detector correlation is +what makes the ensemble add information rather than restate it. The discriminant +finding and the CHAODA non-redundancy principle are the same statement. + +This also re-frames the §4.11 confound cleanly: the modifier `Weyl × (1/Fiedler)` +failed as an independent axis because `1/λ₂` is the dominant Kirchhoff term — i.e. +it was a **redundant detector**, the CHAODA anti-pattern. The buffer axis is the +*orthogonal* detector the ensemble actually needed. + +## Honest scope + +- **Grounded [G]:** `CLAM`, `Cluster{radius,cardinality,lfd}`, `Lfd`, and + `ClamTree::anomaly_scores` all exist in `ndarray::hpc::clam` (cited). The + structural correspondence is exact, not metaphor. +- **Conceptual [H]:** `perturbation-sim` is zero-dep and is **NOT wired** to + `ndarray::hpc::clam`. The mapping above is read off the APIs, not run. No code + here calls `ClamTree` or `anomaly_scores`. +- **The falsifiable probe** that would promote [H]→[G]: build a `ClamTree` over the + contingency factor vectors (or the per-basin `(λ₂, Kf, buffer)` rows), run + `anomaly_scores`, and correlate the CHAODA ranking against the study's exposure + ranking (ICC/Spearman, Jirak rate). If they agree, the study *is* CHAODA on the + electrical manifold; if not, the framing is rhyme and gets retracted. This is the + gated bridge (crosses perturbation-sim's zero-dep boundary into `ndarray`, + behind a feature flag) — analogous to the calibration harness, not yet built. + +## Tie-in to the calibrated columns (`src/columns.rs`) + +CLAM gives two more value members for free, hung off the same HHTL-OGAR key: +`radius` (basin spread) and `lfd` (local fractal dimension). They are helix-residue +value members like the rest — orthogonal to topology by the key/value split — and +the CHAODA `anomaly_score` is the *read* over the column set, the same way +`exposure` is. The substrate that carries the study is therefore literally a CLAM +tree of HHTL-keyed helix value members with a CHAODA read. diff --git a/crates/perturbation-sim/src/columns.rs b/crates/perturbation-sim/src/columns.rs index 2f5caecf..be1a2224 100644 --- a/crates/perturbation-sim/src/columns.rs +++ b/crates/perturbation-sim/src/columns.rs @@ -8,18 +8,31 @@ //! truth). It is the bridge artifact a contract-side change would consume; nothing //! here serializes or touches the operator-locked `canonical_node` spine. //! +//! **Orthogonality to topology is structural, not earned (operator, 2026-06-16).** +//! In the HHTL-OGAR GUID model, **topology lives in the KEY** — the HEEL/HIP/TWIG +//! cascade tiers of the `canonical_node` GUID — and the magnitude axes are **helix +//! value members hung off that key**. So any helix-residue value member is +//! orthogonal to topology *by the key/value split itself*; the resilience study's +//! measured `Spearman(λ₂, buffer) ≈ 0` only **confirms** what the GUID addressing +//! already enforces, it does not establish a new axis. Consequence: `inertia_buffer` +//! is NOT a novel "orthogonal column" — it is just **another helix value slot on +//! the HHTL key**, additive in the trivial sense (one more value member), with its +//! topology-orthogonality free. +//! //! Two findings shape it: //! 1. **The existing value tenants suffice.** All five contingency factors certify //! by value at **2-bit linear, stored normalized** (ICC ≥ 0.96) — a 2-bit //! turbovec/palette slot per factor preserves the study's per-axis values. The //! cross-axis structure (α / discriminant) wants ≥6-bit, so the *read budget* //! where orthogonality is judged is wider than the *store budget* per value. -//! 2. **One genuinely additive column.** The resilience study measured the -//! inertia/buffer axis ORTHOGONAL to topology (`Spearman(λ₂, buffer) ≈ 0`), so -//! no existing connectivity member can carry it — it requires its own member -//! ([`INERTIA`], flagged [`additive`](SoaMemberSpec::additive)). +//! 2. **The "new" member is just a helix value slot.** `inertia_buffer` +//! ([`INERTIA`]) is added as one more helix-residue value member on the HHTL-OGAR +//! key; its orthogonality to the topology (which the key carries) is structural, +//! confirmed by the study, not introduced by it. -/// How a member quantizes its normalized value. +/// How a member quantizes its normalized value. Every variant is a **value tenant +/// hung off the HHTL-OGAR GUID key** — topology is the key, so all of these are +/// orthogonal to topology by construction. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Encoding { /// Min-max linear bins (palette / Signed360 rim) — sufficient when the @@ -28,6 +41,9 @@ pub enum Encoding { /// Equal-population / codebook bins (turbovec, CAM-PQ) — resolution follows the /// data; preferred when the raw distribution is heavy-tailed. DataAdaptive, + /// Helix `Signed360` residue value member — the canonical magnitude tenant on + /// the HHTL-OGAR key. + HelixResidue, } /// One calibrated SoA member: the width + encoding the study certifies for an axis. @@ -44,8 +60,9 @@ pub struct SoaMemberSpec { /// Members store normalized `[0,1]` values, not raw physical units (also lifts /// tiny-magnitude axes out of the ICC variance-underflow guard). pub normalized: bool, - /// `true` if this member does NOT exist in the current substrate and must be - /// added (it is statistically orthogonal to every existing column). + /// `true` if this is a NEW value slot to add to the substrate. Orthogonality to + /// topology is NOT a property of the member — it is structural, given by the + /// HHTL-OGAR key/value split (topology in the key, this in the value). pub additive: bool, } @@ -59,13 +76,15 @@ pub const CONTINGENCY_FACTORS: [SoaMemberSpec; 5] = [ spec("raumgewinn", false), ]; -/// The one genuinely additive member: the inertia/buffer axis (resilience study), -/// orthogonal to topology — no existing connectivity column carries it. +/// The one additive member: the inertia/buffer axis (resilience study), added as a +/// helix-residue value slot on the HHTL-OGAR key. Its orthogonality to topology is +/// STRUCTURAL (topology is the key; this is a value) — the study's `Spearman ≈ 0` +/// confirms it. `additive` here means "one more value slot", not "a new axis type". pub const INERTIA: SoaMemberSpec = SoaMemberSpec { name: "inertia_buffer", store_bits: 2, read_bits: 6, - encoding: Encoding::Linear, + encoding: Encoding::HelixResidue, normalized: true, additive: true, }; From 557e8dd886226b7433dfbde9595b2e9164617e31 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 16:41:51 +0000 Subject: [PATCH 3/5] perturbation-sim: address Codex #511 P2 (infight 4-bit) + substrate-carrier fit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P2 (columns.rs): the calibration's own oracle rejects 2-bit for infight on the synthetic grid (2-bit ICC 0.93 < 0.95; → 4-bit), even though the ES core gives 0.96. infight is therefore MARGINAL and input-dependent, so the spec now advertises its ROBUSTLY certified width (4-bit, ICC ≥ 0.99 on both inputs), not the width that squeaks by on one favorable sample. `spec()` gains a store_bits param; the other four factors stay 2-bit. Test + calibrate headline updated to the honest verdict. Carriers (operator): the spec maps onto the two existing substrate tiers — 16 × 8-bit ResidueEdge slots (structure read, read_bits ≤ 8) and 32 × 4-bit turbovec lanes (pairwise turboquant; per-value store, store_bits ≤ 4 — infight's 4-bit is exactly one lane). Six members fit either with headroom; new test fits_substrate_carriers asserts it. No new layout — only slot assignments. 72 lib tests; clippy -D warnings clean; fmt clean. --- crates/perturbation-sim/examples/calibrate.rs | 6 +- crates/perturbation-sim/src/columns.rs | 60 +++++++++++++++---- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/crates/perturbation-sim/examples/calibrate.rs b/crates/perturbation-sim/examples/calibrate.rs index f31035e7..0a5d6c15 100644 --- a/crates/perturbation-sim/examples/calibrate.rs +++ b/crates/perturbation-sim/examples/calibrate.rs @@ -300,9 +300,9 @@ fn main() { } println!( "\n Findings → the additive member design:\n \ - • ALL 5 study factors certify by VALUE at just 2-bit LINEAR (ICC ≥ 0.96) once stored\n \ - NORMALIZED — the existing palette/turbovec tenants already suffice for per-axis\n \ - value fidelity. §10 (\"the statistics survive the encoding\") confirmed strongly.\n \ + • 4 of 5 factors certify by VALUE at 2-bit LINEAR normalized (ICC ≥ 0.96); infight is\n \ + MARGINAL — 2-bit ICC 0.93 (synthetic) … 0.96 (ES), straddling 0.95 — so its robustly\n \ + certified width is 4-bit. Existing palette/turbovec tenants suffice; §10 confirmed.\n \ • α (construct internal consistency) is preserved within Δ ≤ 0.02 at ≥4-bit (exact\n \ at 6-8); the discriminant ρ wobbles ±0.15 at N=24 under coarse bins, so to read\n \ the cross-axis orthogonality crisply use ≥6-bit and/or more contingencies.\n \ diff --git a/crates/perturbation-sim/src/columns.rs b/crates/perturbation-sim/src/columns.rs index be1a2224..17a66a68 100644 --- a/crates/perturbation-sim/src/columns.rs +++ b/crates/perturbation-sim/src/columns.rs @@ -29,6 +29,14 @@ //! ([`INERTIA`]) is added as one more helix-residue value member on the HHTL-OGAR //! key; its orthogonality to the topology (which the key carries) is structural, //! confirmed by the study, not introduced by it. +//! 3. **It maps onto two existing carriers (operator, 2026-06-16).** The substrate +//! already offers the two tiers the calibration asks for: **16 × 8-bit +//! `ResidueEdge` slots** (the helix-residue value members of the EdgeBlock) for +//! the structure *read* budget (`read_bits ≤ 8`), and **32 × 4-bit turbovec +//! lanes** (pairwise turboquant) for the per-value *store* (`store_bits ≤ 4` — +//! `infight`'s certified 4-bit is exactly one lane). Six members fit either +//! carrier with headroom (6/16 ResidueEdges or 6/32 turbovec lanes), so no new +//! layout is needed — only the slot assignments (asserted in tests). /// How a member quantizes its normalized value. Every variant is a **value tenant /// hung off the HHTL-OGAR GUID key** — topology is the key, so all of these are @@ -66,14 +74,18 @@ pub struct SoaMemberSpec { pub additive: bool, } -/// The five contingency factors — all certify at 2-bit linear, normalized; read at -/// ≥6-bit to keep the orthogonality crisp. These map onto EXISTING value tenants. +/// The five contingency factors as EXISTING value tenants, normalized, read at +/// ≥6-bit to keep the orthogonality crisp. Store width = the **robustly certified** +/// width (ICC ≥ 0.95 across inputs), NOT the cheapest that squeaks by on one sample: +/// four factors certify at 2-bit, but `infight` is marginal — its 2-bit ICC ranges +/// 0.93 (synthetic grid) to 0.96 (ES core), straddling the threshold, so its spec is +/// the robust **4-bit** (≥0.99 on both). (Codex #511 P2.) pub const CONTINGENCY_FACTORS: [SoaMemberSpec; 5] = [ - spec("d_lambda2", false), - spec("dk_rotation", false), - spec("d_conductance", false), - spec("infight", false), - spec("raumgewinn", false), + spec("d_lambda2", 2, false), + spec("dk_rotation", 2, false), + spec("d_conductance", 2, false), + spec("infight", 4, false), // marginal at 2-bit (0.93–0.96) → certified width is 4-bit + spec("raumgewinn", 2, false), ]; /// The one additive member: the inertia/buffer axis (resilience study), added as a @@ -89,10 +101,10 @@ pub const INERTIA: SoaMemberSpec = SoaMemberSpec { additive: true, }; -const fn spec(name: &'static str, additive: bool) -> SoaMemberSpec { +const fn spec(name: &'static str, store_bits: u32, additive: bool) -> SoaMemberSpec { SoaMemberSpec { name, - store_bits: 2, + store_bits, read_bits: 6, encoding: Encoding::Linear, normalized: true, @@ -113,10 +125,13 @@ mod tests { use super::*; #[test] - fn value_members_certify_at_two_bits_normalized() { - // The calibration finding: 2-bit linear normalized suffices per value. + fn value_members_carry_their_robustly_certified_width() { + // The calibration finding: normalized members certify at low bits — 2-bit + // for four factors, but `infight` is marginal (2-bit ICC 0.93–0.96 across + // inputs) so it carries the robust 4-bit. All map to existing tenants. for s in CONTINGENCY_FACTORS { - assert_eq!(s.store_bits, 2, "{} store width", s.name); + let expect = if s.name == "infight" { 4 } else { 2 }; + assert_eq!(s.store_bits, expect, "{} certified store width", s.name); assert!(s.normalized, "{} must be normalized", s.name); assert!(!s.additive, "{} maps to an existing tenant", s.name); } @@ -131,6 +146,27 @@ mod tests { } } + #[test] + fn fits_substrate_carriers() { + // store fits a 4-bit turbovec lane; read fits an 8-bit ResidueEdge slot; + // the whole set fits both carriers (≤32 turbovec lanes, ≤16 ResidueEdges). + let specs = study_member_specs(); + assert!(specs.len() <= 16, "fits the 16 ResidueEdge slots"); + assert!(specs.len() <= 32, "fits the 32 turbovec lanes"); + for s in &specs { + assert!( + s.store_bits <= 4, + "{} store fits a 4-bit turbovec lane", + s.name + ); + assert!( + s.read_bits <= 8, + "{} read fits an 8-bit ResidueEdge slot", + s.name + ); + } + } + #[test] fn inertia_is_the_one_additive_member() { let specs = study_member_specs(); From 1869862d9fd3beb45ac3c592dda92c51c9b3e88a Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 16:47:54 +0000 Subject: [PATCH 4/5] perturbation-sim: deterministic HHTL topology key (src/hhtl.rs + example) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Construct the HHTL (HEEL,HIP,TWIG) cascade address of each bus by recursive binary Cheeger bisection of the Laplacian — a PURE, deterministic function of the topology spectrum. Makes "topology IS the key" (the HHTL-OGAR correction) concrete: value members hang off this key, orthogonal to it by the key/value split. - src/hhtl.rs: hhtl_keys(grid) -> Vec; basin_lambda2 keyed by HHTL. 3 tests (determinism; the two weakly-bridged blocks split on HEEL; keys partition the nodes). - examples/hhtl_grid.rs on the real ES core: 8 keyed basins reproducing the resilience study's compartments EXACTLY (sizes 29/21/5/45/49/53/27/32); the fail-first Spanish-seam basin gets the concrete deterministic address 1.0.1 (λ₂=1.72e-6) = compartment 5 of PAPER §4.10. Verifies determinism in-run. Binary-Cheeger tiers here; OGAR widens each tier to a 16-ary/256-centroid tile (noted, not implemented). Zero-dep; no contract touch. 75 lib tests; clippy -D warnings clean; fmt clean. --- crates/perturbation-sim/Cargo.toml | 4 + crates/perturbation-sim/examples/hhtl_grid.rs | 81 ++++++++ crates/perturbation-sim/src/hhtl.rs | 175 ++++++++++++++++++ crates/perturbation-sim/src/lib.rs | 2 + 4 files changed, 262 insertions(+) create mode 100644 crates/perturbation-sim/examples/hhtl_grid.rs create mode 100644 crates/perturbation-sim/src/hhtl.rs diff --git a/crates/perturbation-sim/Cargo.toml b/crates/perturbation-sim/Cargo.toml index 71af626b..6b71aa3c 100644 --- a/crates/perturbation-sim/Cargo.toml +++ b/crates/perturbation-sim/Cargo.toml @@ -96,3 +96,7 @@ path = "examples/scorecard.rs" [[example]] name = "calibrate" path = "examples/calibrate.rs" + +[[example]] +name = "hhtl_grid" +path = "examples/hhtl_grid.rs" diff --git a/crates/perturbation-sim/examples/hhtl_grid.rs b/crates/perturbation-sim/examples/hhtl_grid.rs new file mode 100644 index 00000000..8281fdae --- /dev/null +++ b/crates/perturbation-sim/examples/hhtl_grid.rs @@ -0,0 +1,81 @@ +//! The deterministic HHTL topology grid for the real ES core: each bus → its +//! (HEEL, HIP, TWIG) cascade key, by recursive Cheeger bisection. Topology IS the +//! key — value members hang off it (HHTL-OGAR). Verifies determinism + prints the +//! per-key basin sizes and λ₂. +//! +//! Run: cargo run --release --manifest-path crates/perturbation-sim/Cargo.toml \ +//! --example hhtl_grid -- /tmp/pypsa/buses.csv /tmp/pypsa/lines.csv ES + +use perturbation_sim::{basin_lambda2, hhtl_keys, Edge, Grid}; +use std::collections::BTreeMap; + +fn synthetic(rows: usize, cols: usize) -> Grid { + let id = |r: usize, c: usize| r * cols + c; + let mut e = Vec::new(); + for r in 0..rows { + for c in 0..cols { + if c + 1 < cols { + e.push(Edge::new(id(r, c), id(r, c + 1), 1.0, 1.0)); + } + if r + 1 < rows { + e.push(Edge::new(id(r, c), id(r + 1, c), 1.0, 1.0)); + } + } + } + Grid::new(rows * cols, e) +} + +fn main() { + let args: Vec = std::env::args().collect(); + let grid = if args.len() >= 3 { + let buses = std::fs::read_to_string(&args[1]).expect("buses.csv"); + let lines = std::fs::read_to_string(&args[2]).expect("lines.csv"); + let cc = args.get(3).map(|s| s.as_str()).unwrap_or("ES"); + let imp = perturbation_sim::from_pypsa_csv(&buses, &lines, Some(cc)) + .expect("import") + .largest_component(); + println!("grid: {cc} PyPSA core — {} buses", imp.grid.n); + imp.grid + } else { + let g = synthetic(8, 8); + println!("grid: synthetic 8×8 — {} buses", g.n); + g + }; + + let keys = hhtl_keys(&grid); + // Determinism: a pure function of the topology. + assert_eq!(keys, hhtl_keys(&grid), "HHTL grid must be deterministic"); + + let l2 = basin_lambda2(&grid, &keys); + let mut sizes: BTreeMap<(u16, u16, u16), usize> = BTreeMap::new(); + for k in &keys { + *sizes.entry((k.heel, k.hip, k.twig)).or_insert(0) += 1; + } + + println!("\n== Deterministic HHTL grid (HEEL.HIP.TWIG → basin) =="); + println!(" {:>10} {:>6} {:>12}", "key", "buses", "basin λ₂"); + let mut weakest = ((0u16, 0u16, 0u16), f64::INFINITY); + for (k, n) in &sizes { + let key = perturbation_sim::HhtlKey { + heel: k.0, + hip: k.1, + twig: k.2, + }; + let lam = l2.get(&key).copied().unwrap_or(0.0); + println!(" {}.{}.{:>6} {n:>6} {lam:>12.3e}", k.0, k.1, k.2); + if lam < weakest.1 { + weakest = (*k, lam); + } + } + println!( + "\n → {} keyed basins; weakest = {}.{}.{} (λ₂ = {:.3e}) — the deterministic\n \ + topology address of the fail-first compartment. Value members (study factors,\n \ + helix residues) hang off this key, orthogonal to it by the key/value split.\n \ + Binary-Cheeger tiers here; OGAR widens each to a 16-ary/256-centroid tile.", + sizes.len(), + (weakest.0).0, + (weakest.0).1, + (weakest.0).2, + weakest.1 + ); +} diff --git a/crates/perturbation-sim/src/hhtl.rs b/crates/perturbation-sim/src/hhtl.rs new file mode 100644 index 00000000..67713762 --- /dev/null +++ b/crates/perturbation-sim/src/hhtl.rs @@ -0,0 +1,175 @@ +//! Deterministic HHTL topology key — map each bus to its `(HEEL, HIP, TWIG)` +//! cascade-tier address by recursive Cheeger bisection of the Laplacian. +//! +//! This makes "topology IS the key" concrete (the HHTL-OGAR correction): the key +//! is a **pure, deterministic function of the graph spectrum** — Cheeger/Fiedler +//! is deterministic given the Laplacian, so the same grid always yields the same +//! HHTL grid. Value members (the study factors, helix residues) then hang off this +//! key, orthogonal to it by the key/value split. +//! +//! Tiers here are produced by **binary** Cheeger splits (one bit per level → the +//! HHTL depth-3 tree = 8 leaf basins), the same compartmentalization the +//! `resilience` example uses. The OGAR production form widens each tier to a 16-ary +//! / 256-centroid tile (`FAN_OUT=16`); this is the spectral-bisection instance of +//! the same address, not that full encoding (kept honest). + +use crate::graph::{Edge, Grid}; +use crate::{cheeger_sweep, symmetric_eigen}; +use std::collections::HashMap; + +/// A node's HHTL cascade address: the path through the recursive bisection tree. +/// `heel` = top tier, `hip` = mid, `twig` = leaf. (u16 to match the OGAR key +/// layout; the binary-Cheeger instance only fills the low bits per tier.) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct HhtlKey { + pub heel: u16, + pub hip: u16, + pub twig: u16, +} + +/// Induced sub-grid on `members` (reindexed 0..k), edges with both endpoints kept. +fn induced(grid: &Grid, members: &[usize]) -> Grid { + let mut remap = HashMap::new(); + for (i, &m) in members.iter().enumerate() { + remap.insert(m, i); + } + let edges = grid + .edges + .iter() + .filter_map(|e| match (remap.get(&e.from), remap.get(&e.to)) { + (Some(&a), Some(&b)) => Some(Edge::new(a, b, e.susceptance, e.limit)), + _ => None, + }) + .collect(); + Grid::new(members.len(), edges) +} + +/// One deterministic binary Cheeger split of `members` into (bit-0 side, bit-1 +/// side). A basin too small / too sparse to split returns everything on bit 0. +fn split(grid: &Grid, members: &[usize]) -> (Vec, Vec) { + if members.len() < 4 { + return (members.to_vec(), Vec::new()); + } + let sub = induced(grid, members); + if sub.edges.is_empty() { + return (members.to_vec(), Vec::new()); + } + let part = cheeger_sweep(&sub, &vec![true; sub.edges.len()]).partition; + let (mut a, mut b) = (Vec::new(), Vec::new()); + for (i, &m) in members.iter().enumerate() { + if part[i] { + a.push(m); + } else { + b.push(m); + } + } + if a.is_empty() || b.is_empty() { + (members.to_vec(), Vec::new()) + } else { + (a, b) + } +} + +/// Assign every node its `(HEEL, HIP, TWIG)` key by three nested binary Cheeger +/// splits. Deterministic: a pure function of the grid topology. +pub fn hhtl_keys(grid: &Grid) -> Vec { + let mut keys = vec![ + HhtlKey { + heel: 0, + hip: 0, + twig: 0 + }; + grid.n + ]; + let all: Vec = (0..grid.n).collect(); + let (h0, h1) = split(grid, &all); + for (heel, side) in [h0, h1].into_iter().enumerate() { + for &n in &side { + keys[n].heel = heel as u16; + } + let (p0, p1) = split(grid, &side); + for (hip, mid) in [p0, p1].into_iter().enumerate() { + for &n in &mid { + keys[n].hip = hip as u16; + } + let (t0, t1) = split(grid, &mid); + for (twig, leaf) in [t0, t1].into_iter().enumerate() { + for &n in &leaf { + keys[n].twig = twig as u16; + } + } + } + } + keys +} + +/// Per-leaf-basin algebraic connectivity `λ₂` keyed by HHTL address — the topology +/// "value" the key indexes (read once from the spectrum, deterministic). +pub fn basin_lambda2(grid: &Grid, keys: &[HhtlKey]) -> HashMap { + let mut groups: HashMap> = HashMap::new(); + for (n, k) in keys.iter().enumerate() { + groups.entry(*k).or_default().push(n); + } + let mut out = HashMap::new(); + for (k, members) in groups { + let sub = induced(grid, &members); + let l2 = if sub.edges.is_empty() { + 0.0 + } else { + symmetric_eigen(&sub.laplacian_of(&vec![true; sub.edges.len()]), sub.n) + .values + .get(1) + .copied() + .unwrap_or(0.0) + }; + out.insert(k, l2); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + fn grid_2x2_blocks() -> Grid { + // Two 4-cliques weakly joined by one bridge — a clean 2-basin topology. + let mut e = Vec::new(); + for (a, b) in [(0, 1), (0, 2), (1, 3), (2, 3)] { + e.push(Edge::new(a, b, 1.0, 1.0)); + } + for (a, b) in [(4, 5), (4, 6), (5, 7), (6, 7)] { + e.push(Edge::new(a, b, 1.0, 1.0)); + } + e.push(Edge::new(3, 4, 0.01, 1.0)); // weak bridge + Grid::new(8, e) + } + + #[test] + fn key_is_deterministic_function_of_topology() { + let g = grid_2x2_blocks(); + assert_eq!(hhtl_keys(&g), hhtl_keys(&g), "same grid ⇒ same HHTL grid"); + } + + #[test] + fn the_two_blocks_get_distinct_heel_tiers() { + let g = grid_2x2_blocks(); + let k = hhtl_keys(&g); + // The weak bridge is the top Cheeger cut ⇒ block {0..3} and {4..7} split on HEEL. + let heel_a = k[0].heel; + assert!(k[1].heel == heel_a && k[2].heel == heel_a && k[3].heel == heel_a); + assert!( + k[4].heel != heel_a, + "the other block lands on the other HEEL tier" + ); + } + + #[test] + fn keys_partition_the_nodes() { + let g = grid_2x2_blocks(); + let k = hhtl_keys(&g); + // Every node has a key; nodes sharing a full key are in one leaf basin. + let l2 = basin_lambda2(&g, &k); + assert!(!l2.is_empty(), "at least one keyed basin"); + assert_eq!(k.len(), g.n); + } +} diff --git a/crates/perturbation-sim/src/lib.rs b/crates/perturbation-sim/src/lib.rs index 23f4243b..31fa9969 100644 --- a/crates/perturbation-sim/src/lib.rs +++ b/crates/perturbation-sim/src/lib.rs @@ -57,6 +57,7 @@ pub mod columns; pub mod eigen; pub mod flow; pub mod graph; +pub mod hhtl; pub mod ingest; pub mod model; pub mod perturbation; @@ -78,6 +79,7 @@ pub use columns::{study_member_specs, Encoding, SoaMemberSpec, INERTIA}; pub use eigen::{symmetric_eigen, Eigen}; pub use flow::{dc_flows, lodf}; pub use graph::{Edge, Grid}; +pub use hhtl::{basin_lambda2, hhtl_keys, HhtlKey}; pub use ingest::{estimate_snom_mva, from_pypsa_csv, PypsaImport}; pub use model::{ apply_aging, assess_capability, edge_age_factors, scale_susceptance, with_uniform_derate, From 35041fda05e33ed91e5d88b8a2c205699ea27c0c Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 17:45:56 +0000 Subject: [PATCH 5/5] =?UTF-8?q?ci(rust-test):=20debuginfo=3D0=20on=20the?= =?UTF-8?q?=20test=20job=20=E2=80=94=20link-footprint=20relief=20(TD-CI-CO?= =?UTF-8?q?VERAGE-MOLD-1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `test` job has begun hitting the same disk/RSS link cliff already mitigated on `test-with-coverage` (b56bb2cd): `ld terminated with signal 7 [Bus error]` + an LLVM crash at the `cargo test --no-run` link step of test_sql_query / intervene_counterfactual. Root cause is link-footprint growth, NOT a logic break (a layout break would fail an assertion, not SIGBUS at link). PR #507 (0c6ef02c, +4055 lines across causal-edge ce64-v2 layout + cognitive-shader-driver MailboxSoaOwner / SurrealMailboxView) grew the integration-test object set enough to tip the previously-marginal `test`-job link over the same ceiling. It surfaced on the first full-workspace CI run after #507 (the intervening PRs are root-excluded crates, so their CI never linked the post-#507 tree). Fix: give the `test` job a job-level RUSTFLAGS with `-C debuginfo=0` (parity with the coverage job). debuginfo carries no value in CI (no debugger is attached); dropping it cut the coverage job's per-binary link ~930 MB -> ~252 MB (-73%, measured in b56bb2cd) and relieves both the mold/GNU-ld RSS and the disk ceiling. mold is already installed on this job. Side effect: the job gets its own Swatinem cache key (first run repopulates). This is a fence (buys headroom), not a root reduction of #507's legitimate codegen — documented as such in the TD-CI-COVERAGE-MOLD-1 ledger addendum, including the secular-growth caveat and the separate (warns-not-fails) intervene_counterfactual.rs deprecated-API debt. --- .claude/board/TECH_DEBT.md | 32 ++++++++++++++++++++++++++++++++ .github/workflows/rust-test.yml | 16 ++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/.claude/board/TECH_DEBT.md b/.claude/board/TECH_DEBT.md index ea56ea6f..d7980e13 100644 --- a/.claude/board/TECH_DEBT.md +++ b/.claude/board/TECH_DEBT.md @@ -147,6 +147,38 @@ timing-race hypothesis (read the actual `cargo llvm-cov` log with a scoped token Cross-ref: `.github/workflows/rust-test.yml` (test job mold step vs coverage job); `bindspace-singleton-to-mailbox-soa-v1` (the migration this is NOT). +**2026-06-16 addendum — the `test` job now hits the SAME cliff; fix extended to +it (branch `claude/ci-test-job-debuginfo0`).** The cliff this entry called out as +*"a 2/50 intermittent"* on the coverage job has now surfaced on the **plain +`test`** job: `ld terminated with signal 7 [Bus error]` + an LLVM crash dump at +the `cargo test --no-run` link step of `test_sql_query` / `intervene_counterfactual`. +Root-caused to a **link-footprint growth, not a logic break** (a layout break would +fail an assertion, not SIGBUS at link): **PR #507** (`0c6ef02c`, +4055/−1048 across +`causal-edge` edge.rs/layout.rs — the ce64-v2 layout — and `cognitive-shader-driver` +mailbox_soa.rs/driver.rs/planner_bridge.rs — MailboxSoaOwner + SurrealMailboxView, +D-PG-6) grew the object-file set linked by the lance-graph integration tests enough +to tip the previously-marginal `test`-job link over the same disk/RSS ceiling. It +surfaced on the first full-workspace CI run *after* #507 (the two PRs between, #509 +and the perturbation-sim #511, are root-`exclude`d so their CI never linked the +post-#507 tree — which is why this is "the first failing PR" yet not its fault). +**This is a FENCE, not a root reduction:** it does not shrink #507's legitimate +codegen; it removes the dead `debuginfo=1` weight (CI never opens a debugger) to +buy headroom — exactly the b56bb2cd lever, now applied to the `test` job. **Fix:** +job-level `RUSTFLAGS: "-C debuginfo=0 -C target-cpu=x86-64-v3"` on `test` (parity +with `test-with-coverage`; mold already installed). Side effect: the `test` job +gets its own Swatinem cache key (first run repopulates). **Confirm** on the next +green `test` run. **Residual debt if it recurs after this:** the footprint is on a +secular upward trend (every cognitive-layer PR adds codegen) — the durable fix is a +bigger runner or splitting the integration-test link set, not repeatedly shaving +flags. Separately, #507 left `intervene_counterfactual.rs:133/165` calling the +**deprecated** `CausalEdge64::inference_type()` (the consumer-migration commit +`8131c480` lives on the unmerged `claude/continue-ndarray-x0Oaw`) — that WARNS, does +not fail (v1 default routes through the canonical mapping per I-LEGACY-API-FEATURE- +GATED); tracked here as a separate latent item, not fixed on this CI branch. +Cross-ref: `.github/workflows/rust-test.yml` (now both jobs at `debuginfo=0`); PR +#507 (`0c6ef02c`); `claude/continue-ndarray-x0Oaw` (the pending ce64-v2 consumer +migration). + ### TD-UNBUNDLE-FROM-1 — `unbundle_from` is NOT the inverse of `bundle_into` (2026-06-07) **Open.** `crates/lance-graph-planner/src/cache/kv_bundle.rs` — `unbundle_from` diff --git a/.github/workflows/rust-test.yml b/.github/workflows/rust-test.yml index d83581ea..f2da8693 100644 --- a/.github/workflows/rust-test.yml +++ b/.github/workflows/rust-test.yml @@ -29,6 +29,22 @@ jobs: test: runs-on: ubuntu-24.04 timeout-minutes: 30 + env: + # Override the workflow-level debuginfo=1 for this job too (parity with + # test-with-coverage, TD-CI-COVERAGE-MOLD-1). The `test` job links the + # full lance+datafusion integration-test set at the SAME disk/RSS cliff + # the coverage job hit — and #507 (+4055 lines across causal-edge + + # cognitive-shader-driver: ce64-v2 layout + MailboxSoaOwner/ + # SurrealMailboxView) grew that link footprint enough to tip the + # previously-marginal link into a hard `ld` SIGBUS (signal 7 = object + # file truncated when the runner partition fills mid-link). debuginfo=1 + # carried no value here (CI never opens a debugger); dropping it cut the + # coverage job's per-binary link from ~930 MB to ~252 MB (-73%, measured + # in b56bb2cd) and relieves BOTH ceilings (mold/GNU-ld RSS + disk). mold + # is already installed below. Note: a job-level RUSTFLAGS gives this job + # its own Swatinem cache key — the first run after this change + # repopulates the test cache. + RUSTFLAGS: "-C debuginfo=0 -C target-cpu=x86-64-v3" defaults: run: working-directory: lance-graph