Skip to content

Commit c3681c6

Browse files
committed
B4: D5 Markov bundler + Trajectory (concise)
1 parent 9993189 commit c3681c6

2 files changed

Lines changed: 263 additions & 0 deletions

File tree

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
//! META-AGENT: add `pub mod markov_bundle;` to lib.rs.
2+
3+
use crate::trajectory::Trajectory;
4+
5+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6+
pub enum Kernel {
7+
Uniform,
8+
#[default]
9+
MexicanHat,
10+
Gaussian,
11+
}
12+
13+
impl Kernel {
14+
pub fn weight(&self, delta: i32, radius: u32) -> f32 {
15+
let d = delta.abs() as f32 / radius.max(1) as f32;
16+
match self {
17+
Self::Uniform => 1.0,
18+
Self::MexicanHat => (1.0 - d * d) * (-(d * d) / 2.0).exp(),
19+
Self::Gaussian => (-(d * d) / 2.0).exp(),
20+
}
21+
}
22+
}
23+
24+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25+
pub enum GrammaticalRole {
26+
Subject,
27+
Predicate,
28+
Object,
29+
Modifier,
30+
Context,
31+
Temporal,
32+
Kausal,
33+
Modal,
34+
Lokal,
35+
Instrument,
36+
}
37+
38+
impl GrammaticalRole {
39+
/// Slice of the 16384-dim VSA carrier that owns this role.
40+
pub fn slice(&self) -> (usize, usize) {
41+
match self {
42+
Self::Subject => (0, 3277),
43+
Self::Predicate => (3277, 6554),
44+
Self::Object => (6554, 9830),
45+
Self::Modifier => (9830, 13107),
46+
Self::Context => (13107, 16384),
47+
// TEKAMOLO sub-slices inside Context band.
48+
Self::Temporal => (13107, 13762),
49+
Self::Kausal => (13762, 14418),
50+
Self::Modal => (14418, 15074),
51+
Self::Lokal => (15074, 15729),
52+
Self::Instrument => (15729, 16384),
53+
}
54+
}
55+
}
56+
57+
#[derive(Debug, Clone)]
58+
pub struct TokenWithRole {
59+
pub content_fp: Vec<f32>,
60+
pub role: GrammaticalRole,
61+
}
62+
63+
#[derive(Debug, Clone)]
64+
pub struct WindowedSentence {
65+
pub tokens: Vec<TokenWithRole>,
66+
}
67+
68+
pub struct MarkovBundler {
69+
pub radius: u32,
70+
pub kernel: Kernel,
71+
pub dims: usize,
72+
buffer: std::collections::VecDeque<WindowedSentence>,
73+
}
74+
75+
impl MarkovBundler {
76+
pub fn new(radius: u32, kernel: Kernel) -> Self {
77+
Self {
78+
radius,
79+
kernel,
80+
dims: 16_384,
81+
buffer: std::collections::VecDeque::with_capacity((2 * radius + 1) as usize),
82+
}
83+
}
84+
85+
pub fn push(&mut self, sentence: WindowedSentence) -> Option<Trajectory> {
86+
let cap = (2 * self.radius + 1) as usize;
87+
if self.buffer.len() == cap {
88+
self.buffer.pop_front();
89+
}
90+
self.buffer.push_back(sentence);
91+
if self.buffer.len() < cap {
92+
return None;
93+
}
94+
Some(self.bundle_current())
95+
}
96+
97+
fn bundle_current(&self) -> Trajectory {
98+
let mut acc = vec![0.0f32; self.dims];
99+
let focal = self.radius as i32;
100+
for (i, sent) in self.buffer.iter().enumerate() {
101+
let delta = (i as i32) - focal;
102+
let weight = self.kernel.weight(delta, self.radius);
103+
for tok in &sent.tokens {
104+
let (start, stop) = tok.role.slice();
105+
let len = (stop - start).min(tok.content_fp.len());
106+
for k in 0..len {
107+
acc[start + k] += weight * tok.content_fp[k];
108+
}
109+
}
110+
}
111+
// permute by position offset (rotate_right)
112+
if !acc.is_empty() {
113+
let k = (self.radius as usize) % acc.len();
114+
acc.rotate_right(k);
115+
}
116+
Trajectory {
117+
fingerprint: acc,
118+
radius: self.radius,
119+
}
120+
}
121+
}
122+
123+
#[cfg(test)]
124+
mod tests {
125+
use super::*;
126+
fn tok(role: GrammaticalRole, len: usize) -> TokenWithRole {
127+
TokenWithRole {
128+
content_fp: vec![1.0; len],
129+
role,
130+
}
131+
}
132+
#[test]
133+
fn first_pushes_return_none_until_window_full() {
134+
let mut b = MarkovBundler::new(5, Kernel::MexicanHat);
135+
for _ in 0..10 {
136+
assert!(b
137+
.push(WindowedSentence {
138+
tokens: vec![tok(GrammaticalRole::Subject, 4)]
139+
})
140+
.is_none());
141+
}
142+
assert!(b
143+
.push(WindowedSentence {
144+
tokens: vec![tok(GrammaticalRole::Subject, 4)]
145+
})
146+
.is_some());
147+
}
148+
#[test]
149+
fn kernel_uniform_constant() {
150+
assert_eq!(Kernel::Uniform.weight(0, 5), 1.0);
151+
assert_eq!(Kernel::Uniform.weight(3, 5), 1.0);
152+
}
153+
#[test]
154+
fn kernel_mexican_symmetric() {
155+
assert!(
156+
(Kernel::MexicanHat.weight(-2, 5) - Kernel::MexicanHat.weight(2, 5)).abs() < 1e-6
157+
);
158+
}
159+
#[test]
160+
fn role_slices_disjoint() {
161+
let s = GrammaticalRole::Subject.slice();
162+
let p = GrammaticalRole::Predicate.slice();
163+
assert_eq!(s.1, p.0);
164+
}
165+
}

crates/deepnsm/src/trajectory.rs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//! META-AGENT: add `pub mod trajectory;` to lib.rs.
2+
3+
#[derive(Debug, Clone)]
4+
pub struct Trajectory {
5+
pub fingerprint: Vec<f32>,
6+
pub radius: u32,
7+
}
8+
9+
impl Trajectory {
10+
pub fn role_bundle(&self, start: usize, stop: usize) -> Vec<f32> {
11+
let stop = stop.min(self.fingerprint.len());
12+
if start >= stop {
13+
return Vec::new();
14+
}
15+
self.fingerprint[start..stop].to_vec()
16+
}
17+
18+
pub fn role_candidates(
19+
&self,
20+
start: usize,
21+
stop: usize,
22+
codebook: &[Vec<f32>],
23+
) -> Vec<Candidate> {
24+
let bundle = self.role_bundle(start, stop);
25+
let mut scored: Vec<Candidate> = codebook
26+
.iter()
27+
.enumerate()
28+
.map(|(i, entry)| {
29+
let score = cosine(&bundle, entry);
30+
Candidate {
31+
codebook_index: i,
32+
score,
33+
}
34+
})
35+
.filter(|c| c.score > 0.5)
36+
.collect();
37+
scored.sort_by(|a, b| {
38+
b.score
39+
.partial_cmp(&a.score)
40+
.unwrap_or(std::cmp::Ordering::Equal)
41+
});
42+
scored.truncate(5);
43+
scored
44+
}
45+
}
46+
47+
fn cosine(a: &[f32], b: &[f32]) -> f32 {
48+
let n = a.len().min(b.len());
49+
if n == 0 {
50+
return 0.0;
51+
}
52+
let dot: f32 = a[..n].iter().zip(&b[..n]).map(|(x, y)| x * y).sum();
53+
let na: f32 = a[..n].iter().map(|x| x * x).sum::<f32>().sqrt();
54+
let nb: f32 = b[..n].iter().map(|x| x * x).sum::<f32>().sqrt();
55+
if na < 1e-9 || nb < 1e-9 {
56+
0.0
57+
} else {
58+
dot / (na * nb)
59+
}
60+
}
61+
62+
#[derive(Debug, Clone)]
63+
pub struct Candidate {
64+
pub codebook_index: usize,
65+
pub score: f32,
66+
}
67+
68+
#[cfg(test)]
69+
mod tests {
70+
use super::*;
71+
#[test]
72+
fn role_bundle_returns_slice() {
73+
let t = Trajectory {
74+
fingerprint: vec![1.0; 100],
75+
radius: 5,
76+
};
77+
assert_eq!(t.role_bundle(10, 30).len(), 20);
78+
}
79+
#[test]
80+
fn role_bundle_empty_when_inverted() {
81+
let t = Trajectory {
82+
fingerprint: vec![1.0; 100],
83+
radius: 5,
84+
};
85+
assert_eq!(t.role_bundle(50, 30).len(), 0);
86+
}
87+
#[test]
88+
fn role_candidates_filters_by_threshold() {
89+
let t = Trajectory {
90+
fingerprint: vec![1.0; 100],
91+
radius: 5,
92+
};
93+
let codebook: Vec<Vec<f32>> = vec![vec![1.0; 100], vec![-1.0; 100]];
94+
let cands = t.role_candidates(0, 100, &codebook);
95+
assert_eq!(cands.len(), 1);
96+
assert_eq!(cands[0].codebook_index, 0);
97+
}
98+
}

0 commit comments

Comments
 (0)