Skip to content

Commit 8d87ef0

Browse files
committed
feat(perturbation-sim): probe 1 — CAKES + CHAODA over HHTL basins
The CLAM-family similarity (attraction) / anomaly (repulsion) pair, applied to grid resilience exactly as the CAKES/CHAODA picture frames it (papillary muscles, terrain tiles, invoices — domain-agnostic; only the feature adapter is grid-specific): - HHTL = the family basin ("where in the tree", the GUID-key cascade). - CAKES (cakes_neighbors): attraction — a basin's k nearest relatives by resilience-feature distance ("who looks similar / who are my relatives"). - CHAODA (chaoda_scores / anomaly_ranking): repulsion — per-basin kNN-distance anomaly score, normalized [0,1]; the top-anomaly basin is the fail-first compartment ("why am I different"). CHAODA_FLAG=0.75 mirrors ndarray CLAM's flag. - resilience_basin_features: per-basin [λ₂, size, inertia] rows (topology / scale / buffer — the three orthogonal axes), min-max normalized per axis. Self-contained CHAODA-lite (one kNN scorer), zero-dep; ndarray::clam's full ClamTree ensemble is the gated production path (no local ndarray sibling). Example `chaoda` lights up the planted brittle block (synthetic 8×8 → basin 1.1.0 flagged score 1.000) and prints its CAKES relatives; runs on real PyPSA core via args. Tests: +4 (outlier-flagged, CAKES-pulls-similar, degenerate-safe, adapter normalized+deterministic). fmt + clippy --all-targets -D warnings clean; 82 tests.
1 parent 33d66ca commit 8d87ef0

4 files changed

Lines changed: 394 additions & 0 deletions

File tree

crates/perturbation-sim/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,7 @@ path = "examples/calibrate.rs"
100100
[[example]]
101101
name = "hhtl_grid"
102102
path = "examples/hhtl_grid.rs"
103+
104+
[[example]]
105+
name = "chaoda"
106+
path = "examples/chaoda.rs"
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
//! CAKES + CHAODA over the HHTL basins of a real grid core — the similarity
2+
//! (attraction) / anomaly (repulsion) pair from the CLAM family, scoring grid
3+
//! compartments the way CHAODA scores papillary muscles or terrain tiles.
4+
//!
5+
//! HHTL is the family basin ("where"); CAKES finds each basin's relatives ("who
6+
//! looks similar"); CHAODA scores how far a basin sits from its family ("why am I
7+
//! different") — the top-anomaly basin is the fail-first compartment.
8+
//!
9+
//! Run: cargo run --release --manifest-path crates/perturbation-sim/Cargo.toml \
10+
//! --example chaoda -- /tmp/pypsa/buses.csv /tmp/pypsa/lines.csv ES
11+
//! (no args → a synthetic 8×8 grid with one deliberately weakened block).
12+
13+
use perturbation_sim::{
14+
anomaly_ranking, cakes_neighbors, chaoda_scores, resilience_basin_features, Edge, Grid,
15+
HhtlKey, CHAODA_FLAG,
16+
};
17+
18+
/// Synthetic 8×8 lattice with one corner block weakened (low-susceptance internal
19+
/// edges) — a planted fail-first compartment for the no-data demo.
20+
fn synthetic() -> Grid {
21+
let (rows, cols) = (8usize, 8usize);
22+
let id = |r: usize, c: usize| r * cols + c;
23+
let mut e = Vec::new();
24+
for r in 0..rows {
25+
for c in 0..cols {
26+
// The 3×3 top-left block is brittle (weak internal coupling).
27+
let brittle = r < 3 && c < 3;
28+
let b = if brittle { 0.05 } else { 1.0 };
29+
if c + 1 < cols {
30+
e.push(Edge::new(id(r, c), id(r, c + 1), b, 1.0));
31+
}
32+
if r + 1 < rows {
33+
e.push(Edge::new(id(r, c), id(r + 1, c), b, 1.0));
34+
}
35+
}
36+
}
37+
Grid::new(rows * cols, e)
38+
}
39+
40+
/// Deterministic per-bus inertia proxy (real `H` is not in PyPSA topology): a small
41+
/// fixed cycle, decoupled from wiring, so the buffer axis is an independent input —
42+
/// the structure (basins as families, the outlier as fail-first) holds regardless.
43+
fn proxy_inertia(n: usize) -> Vec<f64> {
44+
(0..n).map(|i| 2.0 + (i % 5) as f64).collect()
45+
}
46+
47+
fn fmt_key(k: &HhtlKey) -> String {
48+
format!("{}.{}.{}", k.heel, k.hip, k.twig)
49+
}
50+
51+
fn main() {
52+
let args: Vec<String> = std::env::args().collect();
53+
let grid = if args.len() >= 3 {
54+
let buses = std::fs::read_to_string(&args[1]).expect("buses.csv");
55+
let lines = std::fs::read_to_string(&args[2]).expect("lines.csv");
56+
let cc = args.get(3).map(|s| s.as_str()).unwrap_or("ES");
57+
let imp = perturbation_sim::from_pypsa_csv(&buses, &lines, Some(cc))
58+
.expect("import")
59+
.largest_component();
60+
println!("grid: {cc} PyPSA core — {} buses", imp.grid.n);
61+
imp.grid
62+
} else {
63+
let g = synthetic();
64+
println!(
65+
"grid: synthetic 8×8 with a planted brittle 3×3 block — {} buses",
66+
g.n
67+
);
68+
g
69+
};
70+
71+
let h = proxy_inertia(grid.n);
72+
let (basins, rows) = resilience_basin_features(&grid, &h, 0.2);
73+
let scores = chaoda_scores(&rows, 2);
74+
75+
println!("\n== HHTL family basins — CAKES (similar) + CHAODA (anomalous) ==");
76+
println!(
77+
" {:>10} {:>6} {:>8} {:>8} {:>8} flag",
78+
"basin", "size", "λ₂n", "inertia", "CHAODA"
79+
);
80+
for (i, k) in basins.iter().enumerate() {
81+
let flag = if scores[i] >= CHAODA_FLAG {
82+
"◀ ANOMALY"
83+
} else {
84+
""
85+
};
86+
println!(
87+
" {:>10} {:>6.2} {:>8.3} {:>8.3} {:>8.3} {}",
88+
fmt_key(k),
89+
rows[i][1],
90+
rows[i][0],
91+
rows[i][2],
92+
scores[i],
93+
flag
94+
);
95+
}
96+
97+
// CHAODA: the fail-first compartment is the top anomaly.
98+
let rank = anomaly_ranking(&rows, 2);
99+
if let Some(&(top, score)) = rank.first() {
100+
println!(
101+
"\n CHAODA → fail-first compartment: basin {} (score {:.3}{})\n \
102+
\"why am I different\" — the basin whose resilience profile deviates most\n \
103+
from its family. {} basins; flag threshold {CHAODA_FLAG}.",
104+
fmt_key(&basins[top]),
105+
score,
106+
if score >= CHAODA_FLAG {
107+
", FLAGGED"
108+
} else {
109+
""
110+
},
111+
basins.len()
112+
);
113+
114+
// CAKES: the top-anomaly basin's nearest relatives ("who looks similar").
115+
let nbrs = cakes_neighbors(&rows, top, 3);
116+
let rel: Vec<String> = nbrs
117+
.iter()
118+
.map(|(i, d)| format!("{} (d={:.3})", fmt_key(&basins[*i]), d))
119+
.collect();
120+
println!(
121+
" CAKES → {}'s nearest relatives: [{}]\n \
122+
attraction vs repulsion: the family it resembles, and how far it still sits from them.",
123+
fmt_key(&basins[top]),
124+
rel.join(", ")
125+
);
126+
}
127+
128+
println!(
129+
"\n CAKES pulls in the similar; CHAODA pushes out the unusual.\n \
130+
Family basin (HHTL) + deviation-from-family (CHAODA) = the fail-first locator.\n \
131+
(CHAODA-lite kNN scorer; ndarray::clam's ClamTree ensemble is the gated production path.)"
132+
);
133+
}
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
//! **CAKES + CHAODA over HHTL-keyed basins** — the similarity (attraction) /
2+
//! anomaly (repulsion) pair from the CLAM family, applied to grid resilience.
3+
//!
4+
//! The picture (cancer-detection / foveation framing): three layers cooperate —
5+
//!
6+
//! - **HHTL** = the semantic *family basin* (the GUID-key cascade, [`crate::hhtl`])
7+
//! — "**where** in the tree".
8+
//! - **CAKES** ([`cakes_neighbors`]) = *attraction*: the `k` nearest basins by
9+
//! resilience-feature distance — "**who are my relatives / who looks similar**".
10+
//! - **CHAODA** ([`chaoda_scores`]) = *repulsion*: a per-basin anomaly score = how
11+
//! far a basin sits from its family — "**who looks wrong / why am I different**".
12+
//! The high-anomaly basin is the **fail-first compartment**.
13+
//!
14+
//! `CAKES pulls in the similar + CHAODA pushes out the unusual = meaningful
15+
//! intelligence`. The pair is **domain-agnostic** — the same two functions score
16+
//! papillary muscles, terrain tiles, invoices, or grid basins; only the feature
17+
//! adapter ([`resilience_basin_features`]) is grid-specific.
18+
//!
19+
//! Zero-dep, deterministic. This is a CHAODA-**lite** (a single kNN-distance
20+
//! scorer), **not** ndarray's full `ClamTree::anomaly_scores` graph ensemble; the
21+
//! `ndarray-clam` bridge (gated behind the `ndarray-simd` feature, since there is
22+
//! no local ndarray sibling here) is the production path. The [`CHAODA_FLAG`]
23+
//! threshold (0.75) mirrors ndarray's CLAM anomaly flag so the lite and full
24+
//! scorers agree on the binary decision.
25+
26+
use crate::buffer::inertia_buffer_column;
27+
use crate::graph::Grid;
28+
use crate::hhtl::{basin_lambda2, hhtl_keys, HhtlKey};
29+
use std::collections::BTreeMap;
30+
31+
/// The CHAODA anomaly flag threshold: a normalized score `≥ 0.75` is "anomalous"
32+
/// (matches ndarray CLAM's `ClamTree::anomaly_scores` flag, so the lite scorer here
33+
/// and the full ensemble agree on the binary call).
34+
pub const CHAODA_FLAG: f64 = 0.75;
35+
36+
/// Euclidean distance between two feature rows (the metric both CAKES and CHAODA
37+
/// read; rows must be equal length).
38+
fn dist(a: &[f64], b: &[f64]) -> f64 {
39+
a.iter()
40+
.zip(b)
41+
.map(|(x, y)| (x - y) * (x - y))
42+
.sum::<f64>()
43+
.sqrt()
44+
}
45+
46+
/// **CAKES (attraction).** The `k` nearest rows to `query` by feature distance —
47+
/// "who are my relatives". Returns `(index, distance)` ascending, excluding `query`
48+
/// itself; `k` is clamped to the available population.
49+
pub fn cakes_neighbors(rows: &[Vec<f64>], query: usize, k: usize) -> Vec<(usize, f64)> {
50+
if query >= rows.len() {
51+
return Vec::new();
52+
}
53+
let mut d: Vec<(usize, f64)> = rows
54+
.iter()
55+
.enumerate()
56+
.filter(|(i, _)| *i != query)
57+
.map(|(i, r)| (i, dist(&rows[query], r)))
58+
.collect();
59+
d.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
60+
d.truncate(k.min(d.len()));
61+
d
62+
}
63+
64+
/// **CHAODA (repulsion).** Per-row anomaly score = the mean distance to its `k`
65+
/// nearest neighbors, min-max **normalized to `[0,1]`** across all rows. High = far
66+
/// from its family = "why am I different" = the fail-first compartment. A row that
67+
/// is feature-isolated scores near `1.0`; tight family members score near `0.0`.
68+
///
69+
/// This is the kNN-distance CHAODA-lite (one scorer), not the full ClamTree
70+
/// ensemble. Degenerate (`< 2` rows, or all rows identical) yields all-zeros, never
71+
/// `NaN`.
72+
pub fn chaoda_scores(rows: &[Vec<f64>], k: usize) -> Vec<f64> {
73+
let n = rows.len();
74+
if n < 2 {
75+
return vec![0.0; n];
76+
}
77+
let raw: Vec<f64> = (0..n)
78+
.map(|i| {
79+
let nbrs = cakes_neighbors(rows, i, k);
80+
if nbrs.is_empty() {
81+
0.0
82+
} else {
83+
nbrs.iter().map(|(_, d)| *d).sum::<f64>() / nbrs.len() as f64
84+
}
85+
})
86+
.collect();
87+
let lo = raw.iter().copied().fold(f64::INFINITY, f64::min);
88+
let hi = raw.iter().copied().fold(f64::NEG_INFINITY, f64::max);
89+
let span = hi - lo;
90+
raw.iter()
91+
.map(|&r| if span > 0.0 { (r - lo) / span } else { 0.0 })
92+
.collect()
93+
}
94+
95+
/// CHAODA anomaly ranking: `(row index, score)` sorted by score descending — the
96+
/// fail-first compartment is the head of the list.
97+
pub fn anomaly_ranking(rows: &[Vec<f64>], k: usize) -> Vec<(usize, f64)> {
98+
let scores = chaoda_scores(rows, k);
99+
let mut r: Vec<(usize, f64)> = scores.into_iter().enumerate().collect();
100+
r.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
101+
r
102+
}
103+
104+
/// Build per-basin resilience feature rows from a grid + per-bus inertia. Each row
105+
/// is `[λ₂_norm, size_norm, inertia_norm]` — the **topology / scale / buffer** axes
106+
/// the CAKES/CHAODA pair reasons over (the three orthogonal resilience axes). Rows
107+
/// are min-max normalized per axis across basins so the Euclidean metric weights
108+
/// them comparably. Returns the basin keys (ordered) aligned with the rows.
109+
///
110+
/// `inertia_h` is per-bus inertia (length `grid.n`); when real `H` is unavailable a
111+
/// deterministic proxy is fine — the *structure* (basins as families, the outlier as
112+
/// fail-first) holds regardless (buffer ⊥ topology by the key/value split).
113+
pub fn resilience_basin_features(
114+
grid: &Grid,
115+
inertia_h: &[f64],
116+
df_band: f64,
117+
) -> (Vec<HhtlKey>, Vec<Vec<f64>>) {
118+
let keys = hhtl_keys(grid);
119+
let l2 = basin_lambda2(grid, &keys);
120+
let buf = inertia_buffer_column(inertia_h, df_band);
121+
122+
// Aggregate per basin: (node count, summed normalized buffer).
123+
let mut agg: BTreeMap<(u16, u16, u16), (usize, f64)> = BTreeMap::new();
124+
for (n, key) in keys.iter().enumerate() {
125+
let e = agg.entry((key.heel, key.hip, key.twig)).or_insert((0, 0.0));
126+
e.0 += 1;
127+
e.1 += *buf.get(n).unwrap_or(&0.0) as f64;
128+
}
129+
130+
// Per-basin raw triples in deterministic key order.
131+
let mut basin_keys = Vec::with_capacity(agg.len());
132+
let mut raw: Vec<[f64; 3]> = Vec::with_capacity(agg.len());
133+
for (&(heel, hip, twig), &(count, buf_sum)) in &agg {
134+
let key = HhtlKey { heel, hip, twig };
135+
let lam = l2.get(&key).copied().unwrap_or(0.0);
136+
let mean_buf = if count > 0 {
137+
buf_sum / count as f64
138+
} else {
139+
0.0
140+
};
141+
basin_keys.push(key);
142+
raw.push([lam, count as f64, mean_buf]);
143+
}
144+
145+
// Min-max normalize each of the 3 axes across basins.
146+
let mut rows: Vec<Vec<f64>> = raw.iter().map(|r| r.to_vec()).collect();
147+
for axis in 0..3 {
148+
let lo = raw.iter().map(|r| r[axis]).fold(f64::INFINITY, f64::min);
149+
let hi = raw
150+
.iter()
151+
.map(|r| r[axis])
152+
.fold(f64::NEG_INFINITY, f64::max);
153+
let span = hi - lo;
154+
if span > 0.0 {
155+
for (row, r) in rows.iter_mut().zip(&raw) {
156+
row[axis] = (r[axis] - lo) / span;
157+
}
158+
} else {
159+
for row in rows.iter_mut() {
160+
row[axis] = 0.0;
161+
}
162+
}
163+
}
164+
(basin_keys, rows)
165+
}
166+
167+
#[cfg(test)]
168+
mod tests {
169+
use super::*;
170+
171+
/// Four tight family members + one far outlier. CHAODA must flag the outlier
172+
/// (score 1.0, ≥ flag) and keep the family low (the repulsion picture).
173+
#[test]
174+
fn chaoda_flags_the_isolated_outlier() {
175+
let rows = vec![
176+
vec![0.10, 0.10, 0.10],
177+
vec![0.12, 0.09, 0.11],
178+
vec![0.09, 0.11, 0.10],
179+
vec![0.11, 0.10, 0.09],
180+
vec![0.95, 0.95, 0.95], // the anomaly — far from the family
181+
];
182+
let scores = chaoda_scores(&rows, 2);
183+
let rank = anomaly_ranking(&rows, 2);
184+
assert_eq!(rank[0].0, 4, "the outlier is the top anomaly");
185+
assert!((scores[4] - 1.0).abs() < 1e-9, "outlier normalizes to 1.0");
186+
assert!(scores[4] >= CHAODA_FLAG, "outlier crosses the CHAODA flag");
187+
for &i in &[0usize, 1, 2, 3] {
188+
assert!(scores[i] < CHAODA_FLAG, "family member {i} stays unflagged");
189+
}
190+
}
191+
192+
/// CAKES (attraction): a query inside the family pulls in its family members,
193+
/// not the outlier — "who are my relatives".
194+
#[test]
195+
fn cakes_pulls_in_the_similar_not_the_outlier() {
196+
let rows = vec![
197+
vec![0.10, 0.10, 0.10],
198+
vec![0.12, 0.09, 0.11],
199+
vec![0.09, 0.11, 0.10],
200+
vec![0.95, 0.95, 0.95], // outlier
201+
];
202+
let nbrs = cakes_neighbors(&rows, 0, 2);
203+
let idx: Vec<usize> = nbrs.iter().map(|(i, _)| *i).collect();
204+
assert!(
205+
idx.contains(&1) && idx.contains(&2),
206+
"relatives are the family"
207+
);
208+
assert!(!idx.contains(&3), "the outlier is NOT a relative");
209+
}
210+
211+
/// Degenerate inputs never NaN / never panic.
212+
#[test]
213+
fn chaoda_degenerate_is_safe() {
214+
assert!(chaoda_scores(&[], 3).is_empty());
215+
assert_eq!(chaoda_scores(&[vec![1.0, 2.0]], 3), vec![0.0]);
216+
// All-identical rows → no span → all zeros.
217+
let same = vec![vec![0.5, 0.5], vec![0.5, 0.5], vec![0.5, 0.5]];
218+
assert!(chaoda_scores(&same, 2).iter().all(|&s| s == 0.0));
219+
}
220+
221+
/// The resilience adapter: one row per distinct basin, every feature in [0,1],
222+
/// deterministic, and CHAODA runs over it. Two clean blocks joined by a weak
223+
/// bridge → 2 basins; a uniform-inertia proxy keeps the buffer axis flat so the
224+
/// split is carried by topology, as expected.
225+
#[test]
226+
fn resilience_adapter_builds_normalized_basin_rows() {
227+
// Two 4-cliques + a weak bridge (the hhtl two-block topology).
228+
let mut e = Vec::new();
229+
for (a, b) in [(0, 1), (0, 2), (1, 3), (2, 3)] {
230+
e.push(crate::graph::Edge::new(a, b, 1.0, 1.0));
231+
}
232+
for (a, b) in [(4, 5), (4, 6), (5, 7), (6, 7)] {
233+
e.push(crate::graph::Edge::new(a, b, 1.0, 1.0));
234+
}
235+
e.push(crate::graph::Edge::new(3, 4, 0.01, 1.0));
236+
let grid = Grid::new(8, e);
237+
let h = vec![4.0; grid.n]; // uniform proxy inertia
238+
let (keys, rows) = resilience_basin_features(&grid, &h, 0.2);
239+
assert_eq!(keys.len(), rows.len(), "rows align with basins");
240+
assert!(keys.len() >= 2, "the weak bridge gives at least two basins");
241+
for r in &rows {
242+
assert_eq!(r.len(), 3, "[λ₂, size, inertia]");
243+
assert!(r.iter().all(|&x| (0.0..=1.0).contains(&x)), "normalized");
244+
}
245+
// Deterministic: same grid ⇒ same rows.
246+
let (_, rows2) = resilience_basin_features(&grid, &h, 0.2);
247+
assert_eq!(rows, rows2);
248+
// CHAODA runs and returns one score per basin in range.
249+
let scores = chaoda_scores(&rows, 1);
250+
assert_eq!(scores.len(), keys.len());
251+
assert!(scores.iter().all(|&s| (0.0..=1.0).contains(&s)));
252+
}
253+
}

0 commit comments

Comments
 (0)