Skip to content

Commit ae4a23b

Browse files
committed
feat(information,save): KL/JS divergence + save overhaul
- Fix digamma Stirling expansion; KLDivergence with JS, TV, cross-entropy + tests - Discrete + continuous estimator traits; KnnEntropy/MI (default k=3) - PersistentId(u64), seeded counter, atomic write fix, VecDeque events - Chained version migration; remove dead helpers; basic_save.rs updated
1 parent 6d7eb61 commit ae4a23b

9 files changed

Lines changed: 508 additions & 166 deletions

File tree

Lines changed: 139 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,185 @@
1-
/// Kullback-Leibler divergence and related measures
2-
/// Core information-theoretic distance metrics
1+
/// Kullback-Leibler divergence and related measures.
2+
/// Core information-theoretic distance metrics for comparing probability distributions.
3+
///
4+
/// LP usage: measure evolutionary distance between species, compare creature behaviour
5+
/// distributions, quantify trait drift between populations. In multiplayer, these
6+
/// metrics let the simulation decide when two players' lineages count as separate species.
37
pub struct KLDivergence;
48

59
impl KLDivergence {
6-
/// Calculate KL divergence D(P||Q) = Σ P(i) log₂(P(i)/Q(i))
7-
/// Measures how distribution P differs from reference Q
8-
/// Returns divergence in bits - NOT symmetric
10+
/// D(P||Q) = Σ P(i) log₂(P(i)/Q(i))
11+
///
12+
/// How many extra bits are needed to encode P-distributed events using a code
13+
/// optimised for Q. NOT symmetric. Returns +∞ when P has mass where Q has none.
914
pub fn divergence(p_probs: &[f64], q_probs: &[f64]) -> f64 {
1015
assert_eq!(
1116
p_probs.len(),
1217
q_probs.len(),
1318
"P and Q must have same length"
1419
);
1520

16-
let mut kl_div = 0.0;
21+
let mut kl = 0.0;
1722
for (&p, &q) in p_probs.iter().zip(q_probs) {
1823
if p > 0.0 && q > 0.0 {
19-
kl_div += p * (p / q).log2();
20-
} else if p > 0.0 && q == 0.0 {
21-
// P has probability where Q doesn't - infinite divergence
24+
kl += p * (p / q).log2();
25+
} else if p > 0.0 {
26+
// P has support where Q does not — infinite divergence by definition
2227
return f64::INFINITY;
2328
}
24-
// p == 0.0 contributes nothing to KL divergence
29+
// p == 0 contributes 0 regardless of q (0 · log(0/q) := 0)
2530
}
26-
27-
kl_div
31+
kl
2832
}
2933

30-
/// Calculate Jensen-Shannon divergence - symmetric version of KL
31-
/// JS(P,Q) = 0.5 * [D(P||M) + D(Q||M)] where M = 0.5*(P+Q)
32-
/// Always finite and bounded [0, 1] bits
34+
/// JS(P,Q) = 0.5 · [D(P||M) + D(Q||M)] where M = 0.5·(P+Q)
35+
///
36+
/// Symmetric, always finite, bounded [0, 1] bits (log₂ base).
37+
/// Reaches 1 bit only when P and Q have fully disjoint support.
38+
/// Preferred over raw KL for comparing creature trait distributions
39+
/// because it never blows up and is a proper metric (square root is a distance).
3340
pub fn jensen_shannon(p_probs: &[f64], q_probs: &[f64]) -> f64 {
3441
assert_eq!(
3542
p_probs.len(),
3643
q_probs.len(),
3744
"P and Q must have same length"
3845
);
3946

40-
// Calculate mixture distribution M = 0.5*(P+Q)
41-
let m_probs: Vec<f64> = p_probs
47+
let m: Vec<f64> = p_probs
4248
.iter()
4349
.zip(q_probs)
4450
.map(|(&p, &q)| 0.5 * (p + q))
4551
.collect();
4652

47-
let kl_pm = Self::divergence(p_probs, &m_probs);
48-
let kl_qm = Self::divergence(q_probs, &m_probs);
49-
50-
0.5 * (kl_pm + kl_qm)
53+
// M[i] >= 0.5 · max(P[i], Q[i]), so KL(P||M) and KL(Q||M) are always finite.
54+
0.5 * (Self::divergence(p_probs, &m) + Self::divergence(q_probs, &m))
5155
}
5256

53-
/// Calculate cross-entropy H(P,Q) = -Σ P(i) log₂(Q(i))
54-
/// Useful for ML loss functions and distribution comparison
57+
/// H(P,Q) = -Σ P(i) log₂(Q(i))
58+
///
59+
/// Cross-entropy: bits to encode P-distributed events with a Q-optimal code.
60+
/// H(P,Q) = H(P) + D(P||Q). Returns +∞ when Q has no mass where P has mass.
5561
pub fn cross_entropy(p_probs: &[f64], q_probs: &[f64]) -> f64 {
5662
assert_eq!(
5763
p_probs.len(),
5864
q_probs.len(),
5965
"P and Q must have same length"
6066
);
6167

62-
let mut cross_ent = 0.0;
68+
let mut ce = 0.0;
6369
for (&p, &q) in p_probs.iter().zip(q_probs) {
6470
if p > 0.0 && q > 0.0 {
65-
cross_ent -= p * q.log2();
66-
} else if p > 0.0 && q == 0.0 {
71+
ce -= p * q.log2();
72+
} else if p > 0.0 {
6773
return f64::INFINITY;
6874
}
6975
}
76+
ce
77+
}
78+
79+
/// TV(P,Q) = 0.5 · Σ|P(i) - Q(i)|
80+
///
81+
/// Total variation distance: the maximum probability gap any single event
82+
/// can have between P and Q. Symmetric, bounded [0, 1], no log required.
83+
/// Fastest way to ask "how different are these two distributions?" when you
84+
/// do not need the information-theoretic interpretation of KL/JS.
85+
pub fn total_variation(p_probs: &[f64], q_probs: &[f64]) -> f64 {
86+
assert_eq!(
87+
p_probs.len(),
88+
q_probs.len(),
89+
"P and Q must have same length"
90+
);
91+
0.5 * p_probs
92+
.iter()
93+
.zip(q_probs)
94+
.map(|(&p, &q)| (p - q).abs())
95+
.sum::<f64>()
96+
}
97+
}
98+
99+
#[cfg(test)]
100+
mod tests {
101+
use super::*;
102+
103+
#[test]
104+
fn kl_identical_is_zero() {
105+
let p = [0.5, 0.5];
106+
assert!(KLDivergence::divergence(&p, &p).abs() < 1e-10);
107+
}
108+
109+
#[test]
110+
fn kl_disjoint_support_is_infinity() {
111+
let p = [1.0, 0.0];
112+
let q = [0.0, 1.0];
113+
assert_eq!(KLDivergence::divergence(&p, &q), f64::INFINITY);
114+
}
115+
116+
#[test]
117+
fn kl_known_value() {
118+
// D([0.5, 0.5] || [0.25, 0.75]) = 0.5*log2(2) + 0.5*log2(2/3) ≈ 0.2075
119+
let p = [0.5, 0.5];
120+
let q = [0.25, 0.75];
121+
let kl = KLDivergence::divergence(&p, &q);
122+
let expected = 0.5 * (0.5_f64 / 0.25).log2() + 0.5 * (0.5_f64 / 0.75).log2();
123+
assert!((kl - expected).abs() < 1e-10, "got {}", kl);
124+
}
125+
126+
#[test]
127+
fn js_identical_is_zero() {
128+
let p = [0.25, 0.25, 0.25, 0.25];
129+
assert!(KLDivergence::jensen_shannon(&p, &p).abs() < 1e-10);
130+
}
131+
132+
#[test]
133+
fn js_disjoint_support_is_one_bit() {
134+
let p = [1.0, 0.0];
135+
let q = [0.0, 1.0];
136+
let js = KLDivergence::jensen_shannon(&p, &q);
137+
assert!((js - 1.0).abs() < 1e-10, "expected 1 bit, got {}", js);
138+
}
139+
140+
#[test]
141+
fn js_is_symmetric() {
142+
let p = [0.7, 0.2, 0.1];
143+
let q = [0.1, 0.5, 0.4];
144+
let diff =
145+
(KLDivergence::jensen_shannon(&p, &q) - KLDivergence::jensen_shannon(&q, &p)).abs();
146+
assert!(diff < 1e-12, "JS must be symmetric, diff = {}", diff);
147+
}
148+
149+
#[test]
150+
fn js_bounded_between_zero_and_one() {
151+
let p = [0.6, 0.3, 0.1];
152+
let q = [0.1, 0.2, 0.7];
153+
let js = KLDivergence::jensen_shannon(&p, &q);
154+
assert!(js >= 0.0 && js <= 1.0, "JS out of [0,1]: {}", js);
155+
}
156+
157+
#[test]
158+
fn cross_entropy_of_self_equals_entropy() {
159+
// H(P, P) = H(P). For fair coin P=[0.5,0.5], H = 1 bit.
160+
let p = [0.5, 0.5];
161+
let h = KLDivergence::cross_entropy(&p, &p);
162+
assert!((h - 1.0).abs() < 1e-10, "expected 1 bit, got {}", h);
163+
}
164+
165+
#[test]
166+
fn total_variation_identical_is_zero() {
167+
let p = [0.3, 0.3, 0.4];
168+
assert!(KLDivergence::total_variation(&p, &p).abs() < 1e-12);
169+
}
170+
171+
#[test]
172+
fn total_variation_disjoint_is_one() {
173+
let p = [1.0, 0.0];
174+
let q = [0.0, 1.0];
175+
assert!((KLDivergence::total_variation(&p, &q) - 1.0).abs() < 1e-12);
176+
}
70177

71-
cross_ent
178+
#[test]
179+
fn total_variation_bounded() {
180+
let p = [0.6, 0.4];
181+
let q = [0.2, 0.8];
182+
let tv = KLDivergence::total_variation(&p, &q);
183+
assert!(tv >= 0.0 && tv <= 1.0, "TV must be in [0, 1], got {}", tv);
72184
}
73185
}

crates/information/src/measures/estimators.rs

Lines changed: 100 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,42 @@
11
use super::mutual::MutualInfo;
22
use super::shannon::Shannon;
33

4-
/// Trait for discrete entropy estimators.
4+
// ── Discrete estimator traits ─────────────────────────────────────────────────
5+
6+
/// Compute entropy from a sequence of discrete integer labels (in bits).
57
pub trait DiscreteEntropyEstimator {
68
fn estimate_entropy_bits(&self, values: &[i32]) -> f64;
79
}
810

9-
/// Trait for discrete mutual-information estimators.
11+
/// Compute mutual information between two discrete integer sequences (in bits).
1012
pub trait DiscreteMutualInformationEstimator {
1113
fn estimate_mutual_information_bits(&self, x_values: &[i32], y_values: &[i32]) -> f64;
1214
}
1315

14-
/// Baseline Shannon entropy estimator.
16+
// ── Continuous estimator traits ───────────────────────────────────────────────
17+
//
18+
// Continuous data is represented as &[Vec<f32>] (N points × D dimensions),
19+
// matching the kNN-based API in shannon.rs and mutual/calculation.rs.
20+
//
21+
// LP / multiplayer usage: plug in the right estimator per signal type without
22+
// changing call sites. Discrete handles genome symbols and material-type counts;
23+
// continuous handles position density fields, velocity distributions, and any
24+
// other physical observable sampled from the MPM grid.
25+
26+
/// Compute differential entropy from a continuous multivariate sample (in bits).
27+
pub trait ContinuousEntropyEstimator {
28+
fn estimate_entropy_bits(&self, data: &[Vec<f32>]) -> f64;
29+
}
30+
31+
/// Compute mutual information between two continuous multivariate samples (in bits).
32+
pub trait ContinuousMutualInformationEstimator {
33+
fn estimate_mutual_information_bits(&self, x: &[Vec<f32>], y: &[Vec<f32>]) -> f64;
34+
}
35+
36+
// ── Discrete implementations ──────────────────────────────────────────────────
37+
38+
/// Baseline maximum-likelihood Shannon entropy estimator for discrete data.
39+
/// Biased downward for small samples; prefer Miller-Madow correction for n < 100.
1540
#[derive(Debug, Default, Clone, Copy)]
1641
pub struct ShannonEstimator;
1742

@@ -21,7 +46,7 @@ impl DiscreteEntropyEstimator for ShannonEstimator {
2146
}
2247
}
2348

24-
/// Baseline empirical mutual-information estimator.
49+
/// Baseline empirical mutual-information estimator for discrete data.
2550
#[derive(Debug, Default, Clone, Copy)]
2651
pub struct EmpiricalMutualInformationEstimator;
2752

@@ -31,10 +56,52 @@ impl DiscreteMutualInformationEstimator for EmpiricalMutualInformationEstimator
3156
}
3257
}
3358

59+
// ── Continuous implementations (k-NN based) ───────────────────────────────────
60+
61+
/// Kraskov et al. 2004 k-NN entropy estimator for continuous multivariate data.
62+
/// `k` controls the neighbourhood size; k=3..5 works well for most LP use cases.
63+
#[derive(Debug, Clone, Copy)]
64+
pub struct KnnEntropyEstimator {
65+
pub k: usize,
66+
}
67+
68+
impl Default for KnnEntropyEstimator {
69+
fn default() -> Self {
70+
Self { k: 3 }
71+
}
72+
}
73+
74+
impl ContinuousEntropyEstimator for KnnEntropyEstimator {
75+
fn estimate_entropy_bits(&self, data: &[Vec<f32>]) -> f64 {
76+
Shannon::continuous_entropy(data, self.k)
77+
}
78+
}
79+
80+
/// Kraskov et al. 2004 k-NN mutual information estimator for continuous data.
81+
/// `k` controls the neighbourhood size; k=3..5 works well for most LP use cases.
82+
#[derive(Debug, Clone, Copy)]
83+
pub struct KnnMutualInformationEstimator {
84+
pub k: usize,
85+
}
86+
87+
impl Default for KnnMutualInformationEstimator {
88+
fn default() -> Self {
89+
Self { k: 3 }
90+
}
91+
}
92+
93+
impl ContinuousMutualInformationEstimator for KnnMutualInformationEstimator {
94+
fn estimate_mutual_information_bits(&self, x: &[Vec<f32>], y: &[Vec<f32>]) -> f64 {
95+
MutualInfo::continuous_knn(x, y, self.k)
96+
}
97+
}
98+
3499
#[cfg(test)]
35100
mod tests {
36101
use super::*;
37102

103+
// ── discrete ─────────────────────────────────────────────────────────────
104+
38105
#[test]
39106
fn shannon_entropy_for_fair_binary_is_one_bit() {
40107
let estimator = ShannonEstimator;
@@ -68,4 +135,33 @@ mod tests {
68135
let mi = estimator.estimate_mutual_information_bits(&x, &y);
69136
assert!(mi.abs() < 1e-9, "expected near 0, got {}", mi);
70137
}
138+
139+
// ── continuous ────────────────────────────────────────────────────────────
140+
141+
#[test]
142+
fn knn_entropy_estimator_positive_for_spread_data() {
143+
let estimator = KnnEntropyEstimator::default();
144+
// 20 points spread across 1D — entropy should be clearly positive
145+
let data: Vec<Vec<f32>> = (0..20).map(|i| vec![i as f32 * 0.5]).collect();
146+
let h = estimator.estimate_entropy_bits(&data);
147+
assert!(
148+
h > 0.0,
149+
"expected positive entropy for spread data, got {}",
150+
h
151+
);
152+
}
153+
154+
#[test]
155+
fn knn_mi_estimator_nonzero_for_correlated_data() {
156+
let estimator = KnnMutualInformationEstimator::default();
157+
// x and y perfectly correlated → MI should be positive
158+
let x: Vec<Vec<f32>> = (0..20).map(|i| vec![i as f32]).collect();
159+
let y: Vec<Vec<f32>> = (0..20).map(|i| vec![i as f32 * 2.0]).collect();
160+
let mi = estimator.estimate_mutual_information_bits(&x, &y);
161+
assert!(
162+
mi > 0.0,
163+
"expected positive MI for correlated data, got {}",
164+
mi
165+
);
166+
}
71167
}

0 commit comments

Comments
 (0)