77//! - off-diag cosine mean 0.64
88//! - effective rank (participation ratio) 1.82 out of 256
99//! - 43.76% of pairs with cos > 0.9
10- //! → degenerate null-context artifact, not a real semantic manifold
10+ //! → degenerate null-context artifact, not a real semantic manifold
1111//!
1212//! The DeepNSM matrix is a completely different source: 96-dimensional
1313//! distributional vectors from COCA subgenre frequencies (1-billion-word
4949use std:: fs;
5050use std:: path:: PathBuf ;
5151
52- use deepnsm:: DeepNsmEngine ;
5352use deepnsm:: spo:: WordDistanceMatrix ;
53+ use deepnsm:: DeepNsmEngine ;
5454
5555fn main ( ) {
5656 println ! ( "# Probe: DeepNSM Semantic Layer Sanity" ) ;
@@ -86,7 +86,11 @@ fn main() {
8686 let nonzero_diagonals: Vec < ( usize , u8 ) > = ( 0 ..k)
8787 . filter_map ( |i| {
8888 let d = dm. get ( i as u16 , i as u16 ) ;
89- if d != 0 { Some ( ( i, d) ) } else { None }
89+ if d != 0 {
90+ Some ( ( i, d) )
91+ } else {
92+ None
93+ }
9094 } )
9195 . take ( 5 )
9296 . collect ( ) ;
@@ -113,10 +117,14 @@ fn main() {
113117 // Convert to f64 for stats
114118 let n = off. len ( ) as f64 ;
115119 let mean: f64 = off. iter ( ) . map ( |& v| v as f64 ) . sum :: < f64 > ( ) / n;
116- let var: f64 = off. iter ( ) . map ( |& v| {
117- let diff = v as f64 - mean;
118- diff * diff
119- } ) . sum :: < f64 > ( ) / n;
120+ let var: f64 = off
121+ . iter ( )
122+ . map ( |& v| {
123+ let diff = v as f64 - mean;
124+ diff * diff
125+ } )
126+ . sum :: < f64 > ( )
127+ / n;
120128 let std_dev = var. sqrt ( ) ;
121129
122130 // Percentiles via sort
@@ -165,12 +173,20 @@ fn main() {
165173 // has no per-row distinguishing structure → degenerate.
166174 let row_sum_f64: Vec < f64 > = row_sum. iter ( ) . map ( |& s| s as f64 ) . collect ( ) ;
167175 let mean_rs = row_sum_f64. iter ( ) . sum :: < f64 > ( ) / k as f64 ;
168- let var_rs = row_sum_f64. iter ( ) . map ( |& s| {
169- let diff = s - mean_rs;
170- diff * diff
171- } ) . sum :: < f64 > ( ) / k as f64 ;
176+ let var_rs = row_sum_f64
177+ . iter ( )
178+ . map ( |& s| {
179+ let diff = s - mean_rs;
180+ diff * diff
181+ } )
182+ . sum :: < f64 > ( )
183+ / k as f64 ;
172184 let std_rs = var_rs. sqrt ( ) ;
173- let cv = if mean_rs. abs ( ) > 1e-9 { std_rs / mean_rs } else { 0.0 } ;
185+ let cv = if mean_rs. abs ( ) > 1e-9 {
186+ std_rs / mean_rs
187+ } else {
188+ 0.0
189+ } ;
174190 println ! ( "## Row-sum constancy (matrix isotropy proxy)" ) ;
175191 println ! ( "- mean row sum: {:.2}" , mean_rs) ;
176192 println ! ( "- std row sum: {:.2}" , std_rs) ;
@@ -186,17 +202,25 @@ fn main() {
186202 for i in 0 ..k {
187203 let mut best = u32:: MAX ;
188204 for j in 0 ..k {
189- if i == j { continue ; }
205+ if i == j {
206+ continue ;
207+ }
190208 let d = dm. get ( i as u16 , j as u16 ) as u32 ;
191- if d < best { best = d; }
209+ if d < best {
210+ best = d;
211+ }
192212 }
193213 nn_dist. push ( best) ;
194214 }
195215 let nn_mean: f64 = nn_dist. iter ( ) . map ( |& v| v as f64 ) . sum :: < f64 > ( ) / k as f64 ;
196- let nn_var: f64 = nn_dist. iter ( ) . map ( |& v| {
197- let diff = v as f64 - nn_mean;
198- diff * diff
199- } ) . sum :: < f64 > ( ) / k as f64 ;
216+ let nn_var: f64 = nn_dist
217+ . iter ( )
218+ . map ( |& v| {
219+ let diff = v as f64 - nn_mean;
220+ diff * diff
221+ } )
222+ . sum :: < f64 > ( )
223+ / k as f64 ;
200224 let nn_std = nn_var. sqrt ( ) ;
201225 println ! ( "## Nearest-neighbor distance (excluding self)" ) ;
202226 println ! ( "- mean: {:.2}" , nn_mean) ;
@@ -232,8 +256,10 @@ fn main() {
232256 println ! ( "| matrix size | 256×256 | {}×{} |" , k, k) ;
233257 println ! ( "| off-diag mean | 0.640 (cos) | {:.2} (u8 dist) |" , mean) ;
234258 println ! ( "| effective rank | 1.82 | see Python follow-up |" ) ;
235- println ! ( "| frac > 0.9 (cos) / high u8 | 43.76% | {:.2}% (top 10 bins) |" ,
236- top10 as f64 / n * 100.0 ) ;
259+ println ! (
260+ "| frac > 0.9 (cos) / high u8 | 43.76% | {:.2}% (top 10 bins) |" ,
261+ top10 as f64 / n * 100.0
262+ ) ;
237263 println ! ( "| nearest-neighbor similarity | 0.9407 (cos) | see std above |" ) ;
238264 println ! ( ) ;
239265
@@ -242,7 +268,10 @@ fn main() {
242268 println ! ( ) ;
243269 println ! ( "```python" ) ;
244270 println ! ( "import numpy as np" ) ;
245- println ! ( "d = np.fromfile('{}', dtype=np.uint8).reshape(4096, 4096).astype(np.float64)" , dump_path) ;
271+ println ! (
272+ "d = np.fromfile('{}', dtype=np.uint8).reshape(4096, 4096).astype(np.float64)" ,
273+ dump_path
274+ ) ;
246275 println ! ( "# Convert distance to similarity: normalize [0,255] → [0,1], invert" ) ;
247276 println ! ( "max_d = d.max()" ) ;
248277 println ! ( "sim = 1.0 - d / max(max_d, 1)" ) ;
0 commit comments