|
| 1 | +//! **CAKES + CHAODA over HHTL-keyed basins** — the similarity (attraction) / |
| 2 | +//! anomaly (repulsion) pair from the CLAM family, applied to grid resilience. |
| 3 | +//! |
| 4 | +//! The picture (cancer-detection / foveation framing): three layers cooperate — |
| 5 | +//! |
| 6 | +//! - **HHTL** = the semantic *family basin* (the GUID-key cascade, [`crate::hhtl`]) |
| 7 | +//! — "**where** in the tree". |
| 8 | +//! - **CAKES** ([`cakes_neighbors`]) = *attraction*: the `k` nearest basins by |
| 9 | +//! resilience-feature distance — "**who are my relatives / who looks similar**". |
| 10 | +//! - **CHAODA** ([`chaoda_scores`]) = *repulsion*: a per-basin anomaly score = how |
| 11 | +//! far a basin sits from its family — "**who looks wrong / why am I different**". |
| 12 | +//! The high-anomaly basin is the **fail-first compartment**. |
| 13 | +//! |
| 14 | +//! `CAKES pulls in the similar + CHAODA pushes out the unusual = meaningful |
| 15 | +//! intelligence`. The pair is **domain-agnostic** — the same two functions score |
| 16 | +//! papillary muscles, terrain tiles, invoices, or grid basins; only the feature |
| 17 | +//! adapter ([`resilience_basin_features`]) is grid-specific. |
| 18 | +//! |
| 19 | +//! Zero-dep, deterministic. This is a CHAODA-**lite** (a single kNN-distance |
| 20 | +//! scorer), **not** ndarray's full `ClamTree::anomaly_scores` graph ensemble; the |
| 21 | +//! `ndarray-clam` bridge (gated behind the `ndarray-simd` feature, since there is |
| 22 | +//! no local ndarray sibling here) is the production path. The [`CHAODA_FLAG`] |
| 23 | +//! threshold (0.75) mirrors ndarray's CLAM anomaly flag so the lite and full |
| 24 | +//! scorers agree on the binary decision. |
| 25 | +
|
| 26 | +use crate::buffer::inertia_buffer_column; |
| 27 | +use crate::graph::Grid; |
| 28 | +use crate::hhtl::{basin_lambda2, hhtl_keys, HhtlKey}; |
| 29 | +use std::collections::BTreeMap; |
| 30 | + |
| 31 | +/// The CHAODA anomaly flag threshold: a normalized score `≥ 0.75` is "anomalous" |
| 32 | +/// (matches ndarray CLAM's `ClamTree::anomaly_scores` flag, so the lite scorer here |
| 33 | +/// and the full ensemble agree on the binary call). |
| 34 | +pub const CHAODA_FLAG: f64 = 0.75; |
| 35 | + |
| 36 | +/// Euclidean distance between two feature rows (the metric both CAKES and CHAODA |
| 37 | +/// read; rows must be equal length). |
| 38 | +fn dist(a: &[f64], b: &[f64]) -> f64 { |
| 39 | + a.iter() |
| 40 | + .zip(b) |
| 41 | + .map(|(x, y)| (x - y) * (x - y)) |
| 42 | + .sum::<f64>() |
| 43 | + .sqrt() |
| 44 | +} |
| 45 | + |
| 46 | +/// **CAKES (attraction).** The `k` nearest rows to `query` by feature distance — |
| 47 | +/// "who are my relatives". Returns `(index, distance)` ascending, excluding `query` |
| 48 | +/// itself; `k` is clamped to the available population. |
| 49 | +pub fn cakes_neighbors(rows: &[Vec<f64>], query: usize, k: usize) -> Vec<(usize, f64)> { |
| 50 | + if query >= rows.len() { |
| 51 | + return Vec::new(); |
| 52 | + } |
| 53 | + let mut d: Vec<(usize, f64)> = rows |
| 54 | + .iter() |
| 55 | + .enumerate() |
| 56 | + .filter(|(i, _)| *i != query) |
| 57 | + .map(|(i, r)| (i, dist(&rows[query], r))) |
| 58 | + .collect(); |
| 59 | + d.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)); |
| 60 | + d.truncate(k.min(d.len())); |
| 61 | + d |
| 62 | +} |
| 63 | + |
| 64 | +/// **CHAODA (repulsion).** Per-row anomaly score = the mean distance to its `k` |
| 65 | +/// nearest neighbors, min-max **normalized to `[0,1]`** across all rows. High = far |
| 66 | +/// from its family = "why am I different" = the fail-first compartment. A row that |
| 67 | +/// is feature-isolated scores near `1.0`; tight family members score near `0.0`. |
| 68 | +/// |
| 69 | +/// This is the kNN-distance CHAODA-lite (one scorer), not the full ClamTree |
| 70 | +/// ensemble. Degenerate (`< 2` rows, or all rows identical) yields all-zeros, never |
| 71 | +/// `NaN`. |
| 72 | +pub fn chaoda_scores(rows: &[Vec<f64>], k: usize) -> Vec<f64> { |
| 73 | + let n = rows.len(); |
| 74 | + if n < 2 { |
| 75 | + return vec![0.0; n]; |
| 76 | + } |
| 77 | + let raw: Vec<f64> = (0..n) |
| 78 | + .map(|i| { |
| 79 | + let nbrs = cakes_neighbors(rows, i, k); |
| 80 | + if nbrs.is_empty() { |
| 81 | + 0.0 |
| 82 | + } else { |
| 83 | + nbrs.iter().map(|(_, d)| *d).sum::<f64>() / nbrs.len() as f64 |
| 84 | + } |
| 85 | + }) |
| 86 | + .collect(); |
| 87 | + let lo = raw.iter().copied().fold(f64::INFINITY, f64::min); |
| 88 | + let hi = raw.iter().copied().fold(f64::NEG_INFINITY, f64::max); |
| 89 | + let span = hi - lo; |
| 90 | + raw.iter() |
| 91 | + .map(|&r| if span > 0.0 { (r - lo) / span } else { 0.0 }) |
| 92 | + .collect() |
| 93 | +} |
| 94 | + |
| 95 | +/// CHAODA anomaly ranking: `(row index, score)` sorted by score descending — the |
| 96 | +/// fail-first compartment is the head of the list. |
| 97 | +pub fn anomaly_ranking(rows: &[Vec<f64>], k: usize) -> Vec<(usize, f64)> { |
| 98 | + let scores = chaoda_scores(rows, k); |
| 99 | + let mut r: Vec<(usize, f64)> = scores.into_iter().enumerate().collect(); |
| 100 | + r.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); |
| 101 | + r |
| 102 | +} |
| 103 | + |
| 104 | +/// Build per-basin resilience feature rows from a grid + per-bus inertia. Each row |
| 105 | +/// is `[λ₂_norm, size_norm, inertia_norm]` — the **topology / scale / buffer** axes |
| 106 | +/// the CAKES/CHAODA pair reasons over (the three orthogonal resilience axes). Rows |
| 107 | +/// are min-max normalized per axis across basins so the Euclidean metric weights |
| 108 | +/// them comparably. Returns the basin keys (ordered) aligned with the rows. |
| 109 | +/// |
| 110 | +/// `inertia_h` is per-bus inertia (length `grid.n`); when real `H` is unavailable a |
| 111 | +/// deterministic proxy is fine — the *structure* (basins as families, the outlier as |
| 112 | +/// fail-first) holds regardless (buffer ⊥ topology by the key/value split). |
| 113 | +pub fn resilience_basin_features( |
| 114 | + grid: &Grid, |
| 115 | + inertia_h: &[f64], |
| 116 | + df_band: f64, |
| 117 | +) -> (Vec<HhtlKey>, Vec<Vec<f64>>) { |
| 118 | + let keys = hhtl_keys(grid); |
| 119 | + let l2 = basin_lambda2(grid, &keys); |
| 120 | + let buf = inertia_buffer_column(inertia_h, df_band); |
| 121 | + |
| 122 | + // Aggregate per basin: (node count, summed normalized buffer). |
| 123 | + let mut agg: BTreeMap<(u16, u16, u16), (usize, f64)> = BTreeMap::new(); |
| 124 | + for (n, key) in keys.iter().enumerate() { |
| 125 | + let e = agg.entry((key.heel, key.hip, key.twig)).or_insert((0, 0.0)); |
| 126 | + e.0 += 1; |
| 127 | + e.1 += *buf.get(n).unwrap_or(&0.0) as f64; |
| 128 | + } |
| 129 | + |
| 130 | + // Per-basin raw triples in deterministic key order. |
| 131 | + let mut basin_keys = Vec::with_capacity(agg.len()); |
| 132 | + let mut raw: Vec<[f64; 3]> = Vec::with_capacity(agg.len()); |
| 133 | + for (&(heel, hip, twig), &(count, buf_sum)) in &agg { |
| 134 | + let key = HhtlKey { heel, hip, twig }; |
| 135 | + let lam = l2.get(&key).copied().unwrap_or(0.0); |
| 136 | + let mean_buf = if count > 0 { |
| 137 | + buf_sum / count as f64 |
| 138 | + } else { |
| 139 | + 0.0 |
| 140 | + }; |
| 141 | + basin_keys.push(key); |
| 142 | + raw.push([lam, count as f64, mean_buf]); |
| 143 | + } |
| 144 | + |
| 145 | + // Min-max normalize each of the 3 axes across basins. |
| 146 | + let mut rows: Vec<Vec<f64>> = raw.iter().map(|r| r.to_vec()).collect(); |
| 147 | + for axis in 0..3 { |
| 148 | + let lo = raw.iter().map(|r| r[axis]).fold(f64::INFINITY, f64::min); |
| 149 | + let hi = raw |
| 150 | + .iter() |
| 151 | + .map(|r| r[axis]) |
| 152 | + .fold(f64::NEG_INFINITY, f64::max); |
| 153 | + let span = hi - lo; |
| 154 | + if span > 0.0 { |
| 155 | + for (row, r) in rows.iter_mut().zip(&raw) { |
| 156 | + row[axis] = (r[axis] - lo) / span; |
| 157 | + } |
| 158 | + } else { |
| 159 | + for row in rows.iter_mut() { |
| 160 | + row[axis] = 0.0; |
| 161 | + } |
| 162 | + } |
| 163 | + } |
| 164 | + (basin_keys, rows) |
| 165 | +} |
| 166 | + |
| 167 | +#[cfg(test)] |
| 168 | +mod tests { |
| 169 | + use super::*; |
| 170 | + |
| 171 | + /// Four tight family members + one far outlier. CHAODA must flag the outlier |
| 172 | + /// (score 1.0, ≥ flag) and keep the family low (the repulsion picture). |
| 173 | + #[test] |
| 174 | + fn chaoda_flags_the_isolated_outlier() { |
| 175 | + let rows = vec![ |
| 176 | + vec![0.10, 0.10, 0.10], |
| 177 | + vec![0.12, 0.09, 0.11], |
| 178 | + vec![0.09, 0.11, 0.10], |
| 179 | + vec![0.11, 0.10, 0.09], |
| 180 | + vec![0.95, 0.95, 0.95], // the anomaly — far from the family |
| 181 | + ]; |
| 182 | + let scores = chaoda_scores(&rows, 2); |
| 183 | + let rank = anomaly_ranking(&rows, 2); |
| 184 | + assert_eq!(rank[0].0, 4, "the outlier is the top anomaly"); |
| 185 | + assert!((scores[4] - 1.0).abs() < 1e-9, "outlier normalizes to 1.0"); |
| 186 | + assert!(scores[4] >= CHAODA_FLAG, "outlier crosses the CHAODA flag"); |
| 187 | + for &i in &[0usize, 1, 2, 3] { |
| 188 | + assert!(scores[i] < CHAODA_FLAG, "family member {i} stays unflagged"); |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + /// CAKES (attraction): a query inside the family pulls in its family members, |
| 193 | + /// not the outlier — "who are my relatives". |
| 194 | + #[test] |
| 195 | + fn cakes_pulls_in_the_similar_not_the_outlier() { |
| 196 | + let rows = vec![ |
| 197 | + vec![0.10, 0.10, 0.10], |
| 198 | + vec![0.12, 0.09, 0.11], |
| 199 | + vec![0.09, 0.11, 0.10], |
| 200 | + vec![0.95, 0.95, 0.95], // outlier |
| 201 | + ]; |
| 202 | + let nbrs = cakes_neighbors(&rows, 0, 2); |
| 203 | + let idx: Vec<usize> = nbrs.iter().map(|(i, _)| *i).collect(); |
| 204 | + assert!( |
| 205 | + idx.contains(&1) && idx.contains(&2), |
| 206 | + "relatives are the family" |
| 207 | + ); |
| 208 | + assert!(!idx.contains(&3), "the outlier is NOT a relative"); |
| 209 | + } |
| 210 | + |
| 211 | + /// Degenerate inputs never NaN / never panic. |
| 212 | + #[test] |
| 213 | + fn chaoda_degenerate_is_safe() { |
| 214 | + assert!(chaoda_scores(&[], 3).is_empty()); |
| 215 | + assert_eq!(chaoda_scores(&[vec![1.0, 2.0]], 3), vec![0.0]); |
| 216 | + // All-identical rows → no span → all zeros. |
| 217 | + let same = vec![vec![0.5, 0.5], vec![0.5, 0.5], vec![0.5, 0.5]]; |
| 218 | + assert!(chaoda_scores(&same, 2).iter().all(|&s| s == 0.0)); |
| 219 | + } |
| 220 | + |
| 221 | + /// The resilience adapter: one row per distinct basin, every feature in [0,1], |
| 222 | + /// deterministic, and CHAODA runs over it. Two clean blocks joined by a weak |
| 223 | + /// bridge → 2 basins; a uniform-inertia proxy keeps the buffer axis flat so the |
| 224 | + /// split is carried by topology, as expected. |
| 225 | + #[test] |
| 226 | + fn resilience_adapter_builds_normalized_basin_rows() { |
| 227 | + // Two 4-cliques + a weak bridge (the hhtl two-block topology). |
| 228 | + let mut e = Vec::new(); |
| 229 | + for (a, b) in [(0, 1), (0, 2), (1, 3), (2, 3)] { |
| 230 | + e.push(crate::graph::Edge::new(a, b, 1.0, 1.0)); |
| 231 | + } |
| 232 | + for (a, b) in [(4, 5), (4, 6), (5, 7), (6, 7)] { |
| 233 | + e.push(crate::graph::Edge::new(a, b, 1.0, 1.0)); |
| 234 | + } |
| 235 | + e.push(crate::graph::Edge::new(3, 4, 0.01, 1.0)); |
| 236 | + let grid = Grid::new(8, e); |
| 237 | + let h = vec![4.0; grid.n]; // uniform proxy inertia |
| 238 | + let (keys, rows) = resilience_basin_features(&grid, &h, 0.2); |
| 239 | + assert_eq!(keys.len(), rows.len(), "rows align with basins"); |
| 240 | + assert!(keys.len() >= 2, "the weak bridge gives at least two basins"); |
| 241 | + for r in &rows { |
| 242 | + assert_eq!(r.len(), 3, "[λ₂, size, inertia]"); |
| 243 | + assert!(r.iter().all(|&x| (0.0..=1.0).contains(&x)), "normalized"); |
| 244 | + } |
| 245 | + // Deterministic: same grid ⇒ same rows. |
| 246 | + let (_, rows2) = resilience_basin_features(&grid, &h, 0.2); |
| 247 | + assert_eq!(rows, rows2); |
| 248 | + // CHAODA runs and returns one score per basin in range. |
| 249 | + let scores = chaoda_scores(&rows, 1); |
| 250 | + assert_eq!(scores.len(), keys.len()); |
| 251 | + assert!(scores.iter().all(|&s| (0.0..=1.0).contains(&s))); |
| 252 | + } |
| 253 | +} |
0 commit comments