Skip to content

Commit c4b6988

Browse files
committed
B-fix-triangle: real classification distance + parameterize role_candidates + cosine length assert
1 parent 64e91c6 commit c4b6988

2 files changed

Lines changed: 255 additions & 38 deletions

File tree

crates/deepnsm/src/trajectory.rs

Lines changed: 135 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,94 @@
11
//! META-AGENT: add `pub mod trajectory;` to lib.rs.
22
3+
use crate::markov_bundle::GrammaticalRole;
4+
35
#[derive(Debug, Clone)]
46
pub struct Trajectory {
57
pub fingerprint: Vec<f32>,
68
pub radius: u32,
79
}
810

911
impl Trajectory {
10-
pub fn role_bundle(&self, start: usize, stop: usize) -> Vec<f32> {
12+
/// Slice the role band out of the trajectory's fingerprint.
13+
pub fn role_bundle(&self, role: GrammaticalRole) -> Vec<f32> {
14+
let (start, stop) = role.slice();
15+
self.role_bundle_range(start, stop)
16+
}
17+
18+
/// Lower-level slice helper retained for test fixtures + callers
19+
/// that pre-compute (start, stop) by hand. Prefer `role_bundle(role)`.
20+
pub fn role_bundle_range(&self, start: usize, stop: usize) -> Vec<f32> {
1121
let stop = stop.min(self.fingerprint.len());
1222
if start >= stop {
1323
return Vec::new();
1424
}
1525
self.fingerprint[start..stop].to_vec()
1626
}
1727

28+
/// Score the codebook against the role's bundle, filter by
29+
/// `threshold` (cosine ≥ threshold), sort descending, truncate to
30+
/// `top_k`.
31+
///
32+
/// `threshold` and `top_k` are explicit so callers tune them per
33+
/// style / per role band — no hidden 0.5 / 5 defaults baked into
34+
/// the carrier. See `role_candidates_default` for the
35+
/// backwards-compat shim with the previous (0.5, 5) values.
1836
pub fn role_candidates(
1937
&self,
20-
start: usize,
21-
stop: usize,
38+
role: GrammaticalRole,
2239
codebook: &[Vec<f32>],
40+
threshold: f32,
41+
top_k: usize,
2342
) -> Vec<Candidate> {
24-
let bundle = self.role_bundle(start, stop);
43+
let bundle = self.role_bundle(role);
2544
let mut scored: Vec<Candidate> = codebook
2645
.iter()
2746
.enumerate()
28-
.map(|(i, entry)| {
29-
let score = cosine(&bundle, entry);
30-
Candidate {
31-
codebook_index: i,
32-
score,
33-
}
47+
.map(|(i, entry)| Candidate {
48+
codebook_index: i,
49+
score: cosine(&bundle, entry),
3450
})
35-
.filter(|c| c.score > 0.5)
51+
.filter(|c| c.score >= threshold)
3652
.collect();
3753
scored.sort_by(|a, b| {
3854
b.score
3955
.partial_cmp(&a.score)
4056
.unwrap_or(std::cmp::Ordering::Equal)
4157
});
42-
scored.truncate(5);
58+
scored.truncate(top_k);
4359
scored
4460
}
61+
62+
/// Backwards-compat shim with the previous signature
63+
/// (threshold = 0.5, top_k = 5).
64+
#[deprecated(note = "use role_candidates with explicit threshold + top_k")]
65+
pub fn role_candidates_default(
66+
&self,
67+
role: GrammaticalRole,
68+
codebook: &[Vec<f32>],
69+
) -> Vec<Candidate> {
70+
self.role_candidates(role, codebook, 0.5, 5)
71+
}
4572
}
4673

74+
/// Cosine similarity. **Panics** on length mismatch — the carrier
75+
/// guarantees role-aligned slices and a length mismatch is a wiring
76+
/// bug, not a runtime input error. Callers that need a fallible
77+
/// variant should length-check before invoking.
4778
fn cosine(a: &[f32], b: &[f32]) -> f32 {
48-
let n = a.len().min(b.len());
49-
if n == 0 {
79+
assert_eq!(
80+
a.len(),
81+
b.len(),
82+
"cosine: length mismatch ({} vs {})",
83+
a.len(),
84+
b.len()
85+
);
86+
if a.is_empty() {
5087
return 0.0;
5188
}
52-
let dot: f32 = a[..n].iter().zip(&b[..n]).map(|(x, y)| x * y).sum();
53-
let na: f32 = a[..n].iter().map(|x| x * x).sum::<f32>().sqrt();
54-
let nb: f32 = b[..n].iter().map(|x| x * x).sum::<f32>().sqrt();
89+
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
90+
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
91+
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
5592
if na < 1e-9 || nb < 1e-9 {
5693
0.0
5794
} else {
@@ -68,31 +105,102 @@ pub struct Candidate {
68105
#[cfg(test)]
69106
mod tests {
70107
use super::*;
108+
109+
fn full_carrier(value: f32) -> Vec<f32> {
110+
vec![value; 16_384]
111+
}
112+
71113
#[test]
72-
fn role_bundle_returns_slice() {
114+
fn role_bundle_returns_subject_band() {
73115
let t = Trajectory {
74-
fingerprint: vec![1.0; 100],
116+
fingerprint: full_carrier(1.0),
75117
radius: 5,
76118
};
77-
assert_eq!(t.role_bundle(10, 30).len(), 20);
119+
let bundle = t.role_bundle(GrammaticalRole::Subject);
120+
let (start, stop) = GrammaticalRole::Subject.slice();
121+
assert_eq!(bundle.len(), stop - start);
78122
}
123+
79124
#[test]
80-
fn role_bundle_empty_when_inverted() {
125+
fn role_bundle_range_empty_when_inverted() {
81126
let t = Trajectory {
82-
fingerprint: vec![1.0; 100],
127+
fingerprint: full_carrier(1.0),
83128
radius: 5,
84129
};
85-
assert_eq!(t.role_bundle(50, 30).len(), 0);
130+
assert_eq!(t.role_bundle_range(50, 30).len(), 0);
86131
}
132+
87133
#[test]
88134
fn role_candidates_filters_by_threshold() {
135+
// Codebook of 5 with similarities [0.9, 0.8, 0.4, 0.3, 0.1];
136+
// threshold 0.5 → only the first two pass.
137+
let (start, stop) = GrammaticalRole::Subject.slice();
138+
let n = stop - start;
139+
let mut fingerprint = vec![0.0_f32; 16_384];
140+
for v in fingerprint[start..stop].iter_mut() {
141+
*v = 1.0;
142+
}
89143
let t = Trajectory {
90-
fingerprint: vec![1.0; 100],
144+
fingerprint,
91145
radius: 5,
92146
};
93-
let codebook: Vec<Vec<f32>> = vec![vec![1.0; 100], vec![-1.0; 100]];
94-
let cands = t.role_candidates(0, 100, &codebook);
95-
assert_eq!(cands.len(), 1);
147+
// Build codebook entries where each entry has `value` in the
148+
// subject band and 0.0 elsewhere — cosine vs the all-1 bundle
149+
// becomes deterministic.
150+
let make_entry = |scale: f32| -> Vec<f32> { vec![scale; n] };
151+
// Cosine of all-ones bundle vs scaled all-ones → 1.0 regardless
152+
// of scale (when scale > 0). Use the sign + zeroing of
153+
// individual positions to engineer specific cosines.
154+
// Concretely: an entry that has the first `k` positions = 1 and
155+
// the rest = 0 has cosine = sqrt(k/n) against the all-ones
156+
// bundle.
157+
let make_partial = |k: usize| -> Vec<f32> {
158+
let mut e = vec![0.0_f32; n];
159+
for v in e.iter_mut().take(k) {
160+
*v = 1.0;
161+
}
162+
e
163+
};
164+
// Choose k so cosines are roughly [0.9, 0.8, 0.4, 0.3, 0.1].
165+
let codebook: Vec<Vec<f32>> = vec![
166+
make_partial((0.9_f32 * 0.9 * n as f32) as usize),
167+
make_partial((0.8_f32 * 0.8 * n as f32) as usize),
168+
make_partial((0.4_f32 * 0.4 * n as f32) as usize),
169+
make_partial((0.3_f32 * 0.3 * n as f32) as usize),
170+
make_partial((0.1_f32 * 0.1 * n as f32) as usize),
171+
];
172+
let _ = make_entry; // silence unused if-the-build-is-aggressive
173+
let cands = t.role_candidates(GrammaticalRole::Subject, &codebook, 0.5, 10);
174+
assert_eq!(cands.len(), 2, "threshold 0.5 should keep 2 of 5 entries");
175+
// Sorted descending — the 0.9-cosine entry comes first.
96176
assert_eq!(cands[0].codebook_index, 0);
177+
assert_eq!(cands[1].codebook_index, 1);
178+
}
179+
180+
#[test]
181+
fn role_candidates_top_k_truncation() {
182+
// Codebook of 10 entries all above threshold — top_k=3 must
183+
// return exactly 3.
184+
let (start, stop) = GrammaticalRole::Subject.slice();
185+
let n = stop - start;
186+
let mut fingerprint = vec![0.0_f32; 16_384];
187+
for v in fingerprint[start..stop].iter_mut() {
188+
*v = 1.0;
189+
}
190+
let t = Trajectory {
191+
fingerprint,
192+
radius: 5,
193+
};
194+
let codebook: Vec<Vec<f32>> = (0..10).map(|_| vec![1.0_f32; n]).collect();
195+
let cands = t.role_candidates(GrammaticalRole::Subject, &codebook, 0.5, 3);
196+
assert_eq!(cands.len(), 3);
197+
}
198+
199+
#[test]
200+
#[should_panic(expected = "length mismatch")]
201+
fn cosine_panics_on_length_mismatch() {
202+
let a = vec![1.0_f32; 10];
203+
let b = vec![1.0_f32; 11];
204+
let _ = cosine(&a, &b);
97205
}
98206
}

crates/deepnsm/src/triangle_bridge.rs

Lines changed: 120 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,82 @@ pub fn analyze_without_triangle(structure: SentenceStructure) -> SpoWithGrammar
8383
}
8484
}
8585

86-
/// Hamming-style distance between the SPO's expected qualia footprint
87-
/// and the Triangle's actual qualia signature.
86+
/// Normalized Hamming distance between the qualia fingerprint and the
87+
/// SPO predicate's expected qualia footprint.
8888
///
89-
/// **Stub**: returns 0.0 today. The expected footprint is currently a
90-
/// fixed prior; once D7 GrammarStyleConfig surfaces per-style qualia
91-
/// expectations, this lookup becomes "compare actual qualia to the
92-
/// style-specific footprint and emit a normalized distance."
89+
/// The footprint is derived from the verb's row in the 144-cell table
90+
/// (currently a placeholder = neutral 0.5 prior, encoded as zero bits
91+
/// after thresholding). Once D7 `GrammarStyleConfig` surfaces per-style
92+
/// qualia expectations, `expected_qualia_footprint` will look up the row
93+
/// by (verb, tense) and return that row's qualia footprint.
94+
///
95+
/// Returns `[0.0, 1.0]`:
96+
/// - `0.0` = qualia exactly matches expected footprint (familiar domain).
97+
/// - `1.0` = total mismatch (novel domain — extrapolation needed).
9398
///
9499
/// FOLLOW-UP: tune against the Jirak-derived noise floor (see CLAUDE.md
95100
/// §I-NOISE-FLOOR-JIRAK) — values that exceed the n^(-1/2) weak-dependence
96101
/// bound are real signal, not register noise.
97102
#[cfg(feature = "grammar-triangle")]
98103
fn compute_classification_distance(
99-
_structure: &SentenceStructure,
100-
_triangle: &GrammarTriangle,
104+
structure: &SentenceStructure,
105+
triangle: &GrammarTriangle,
101106
) -> f32 {
102-
0.0
107+
// Convert the 18D qualia coordinates to a binary fingerprint by
108+
// thresholding each dimension at 0.5. Pack into a single u64 (only
109+
// 18 bits used; remaining 46 bits are zero — they participate in
110+
// the Hamming compare but cancel against zeroed expected bits).
111+
let q_bits = qualia_to_binary_fingerprint(triangle);
112+
let expected_bits = expected_qualia_footprint(structure);
113+
hamming_normalized(&q_bits, &expected_bits)
114+
}
115+
116+
/// Threshold the 18D qualia coordinates at 0.5 → 18-bit packed `u64`.
117+
///
118+
/// The packed register is `[u64; 1]` so the Hamming compare counts only
119+
/// the 18 meaningful bits against the same 18 bits of the expected
120+
/// footprint (the upper 46 bits in both registers are zero, contributing
121+
/// nothing to the diff but inflating the denominator — handled by
122+
/// `hamming_normalized` returning a normalized [0, 1] value).
123+
#[cfg(feature = "grammar-triangle")]
124+
fn qualia_to_binary_fingerprint(triangle: &GrammarTriangle) -> Vec<u64> {
125+
let coords = triangle.qualia.coordinates();
126+
let mut packed: u64 = 0;
127+
for (i, &c) in coords.iter().enumerate() {
128+
if c >= 0.5 {
129+
packed |= 1u64 << i;
130+
}
131+
}
132+
vec![packed]
133+
}
134+
135+
/// Expected qualia footprint for a given sentence structure.
136+
///
137+
/// Placeholder: zero-fingerprint = neutral expectation (every dimension
138+
/// below 0.5). When the 144-cell verb table lands, this looks up the row
139+
/// by (verb, tense) on `structure.triples[0]` and returns that row's
140+
/// qualia footprint.
141+
#[cfg(feature = "grammar-triangle")]
142+
fn expected_qualia_footprint(_structure: &SentenceStructure) -> Vec<u64> {
143+
vec![0u64; 1]
144+
}
145+
146+
/// Normalized Hamming distance between two `[u64]` registers.
147+
///
148+
/// Returns `bits_diff / total_bits` in `[0.0, 1.0]`. Compares
149+
/// `min(a.len(), b.len())` words; empty input returns `0.0`.
150+
#[cfg(feature = "grammar-triangle")]
151+
fn hamming_normalized(a: &[u64], b: &[u64]) -> f32 {
152+
let n = a.len().min(b.len());
153+
if n == 0 {
154+
return 0.0;
155+
}
156+
let mut bits_diff: u32 = 0;
157+
for i in 0..n {
158+
bits_diff += (a[i] ^ b[i]).count_ones();
159+
}
160+
let total_bits = (n * 64) as f32;
161+
bits_diff as f32 / total_bits
103162
}
104163

105164
#[cfg(test)]
@@ -126,13 +185,63 @@ mod tests {
126185
assert_eq!(out.triples.triples.len(), 1);
127186
}
128187

188+
#[cfg(feature = "grammar-triangle")]
189+
#[test]
190+
fn classification_distance_in_unit_interval() {
191+
// Identical fingerprints → 0.0; orthogonal (all bits flipped)
192+
// within the 18 used bits → 18/64 = 0.28125; full-register
193+
// orthogonality (every bit flipped) → 1.0.
194+
assert_eq!(hamming_normalized(&[0u64], &[0u64]), 0.0);
195+
assert_eq!(hamming_normalized(&[u64::MAX], &[u64::MAX]), 0.0);
196+
assert!((hamming_normalized(&[0u64], &[u64::MAX]) - 1.0).abs() < 1e-6);
197+
// 18 bits set vs. 0 bits → 18/64.
198+
let eighteen_bits = (1u64 << 18) - 1;
199+
let d = hamming_normalized(&[eighteen_bits], &[0u64]);
200+
assert!((d - (18.0 / 64.0)).abs() < 1e-6);
201+
}
202+
129203
#[cfg(feature = "grammar-triangle")]
130204
#[test]
131205
fn analyze_with_triangle_stamps_lenses() {
132206
let s = fixture_structure();
133207
let out = analyze_with_triangle("the dog runs", s);
134-
// Stub returns 0.0 today — until D7 footprint lookup lands.
135-
assert_eq!(out.classification_distance, 0.0);
208+
// Real Hamming over 18-bit qualia footprint vs. zero expectation;
209+
// result must be in [0, 1] and not a hardcoded 0.0.
210+
assert!(out.classification_distance >= 0.0);
211+
assert!(out.classification_distance <= 1.0);
136212
assert_eq!(out.triples.triples.len(), 1);
137213
}
214+
215+
#[cfg(feature = "grammar-triangle")]
216+
#[test]
217+
fn novel_domain_qualia_yields_high_distance() {
218+
// High-activation, high-novelty, high-urgency text should pull
219+
// multiple qualia dimensions above the 0.5 threshold, producing
220+
// a non-zero Hamming distance against the all-zero expected
221+
// footprint (placeholder = neutral expectation).
222+
let s = fixture_structure();
223+
let out = analyze_with_triangle(
224+
"Suddenly an unprecedented intense urgent novel surprising explosion!",
225+
s,
226+
);
227+
assert!(
228+
out.classification_distance > 0.0,
229+
"novel-domain text should yield non-zero classification distance, got {}",
230+
out.classification_distance
231+
);
232+
}
233+
234+
#[cfg(feature = "grammar-triangle")]
235+
#[test]
236+
fn qualia_fingerprint_thresholds_at_half() {
237+
// Build a triangle whose qualia coordinates straddle 0.5; the
238+
// packed fingerprint must have exactly the bits at-or-above 0.5
239+
// set.
240+
let triangle = GrammarTriangle::default(); // all coords = 0.5
241+
let fp = qualia_to_binary_fingerprint(&triangle);
242+
// Default = 0.5 on every dim → every bit set (>= 0.5 threshold),
243+
// so packed register == 18 lowest bits set.
244+
let expected = (1u64 << 18) - 1;
245+
assert_eq!(fp[0], expected);
246+
}
138247
}

0 commit comments

Comments
 (0)