Skip to content

Commit 4d26776

Browse files
authored
Merge pull request #479 from AdaWorldAPI/claude/stoic-turing-M0Eiq
deepnsm: sentence-level AriGraph reader — left-corner state machine + P64 discrete substrate
2 parents efdb3b1 + 4f73044 commit 4d26776

35 files changed

Lines changed: 4762 additions & 204 deletions

.claude/board/TECH_DEBT.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2638,3 +2638,14 @@ W6 entropy-ledger reframe of `DEEPNSM-NSM-1`.
26382638
## TD-DEEPNSM-CLIPPY-195 — 12 pre-existing default-clippy lints in deepnsm (clippy 1.95 bump)
26392639

26402640
`cargo clippy --manifest-path crates/deepnsm/Cargo.toml --all-targets -- -D warnings` reports 12 errors across 7 files (codebook 2, encoder 4, similarity 2, disambiguator_glue/nsm_primes/parser/quantum_mode 1 each) — newer lints (`manual_repeat_n`, `uninlined_format_args`, …) that were clean when written and fire only under clippy 1.95.0. Pre-existing (not from the E-ENGLISH-BIFURCATES slice; `arcs.rs` is clean at pedantic+nursery). Tests unaffected (94+4+8+1 green). Fix = a separate mechanical sweep across the 7 files; deliberately NOT bundled into the feature slice (7-file scope creep). Surfaced 2026-05-31.
2641+
2642+
**Resolved 2026-06-09** (PR #479, branch `claude/stoic-turing-M0Eiq`, commit `bf95caa`):
2643+
hand-reviewed clippy sweep landed. `cargo clippy --manifest-path
2644+
crates/deepnsm/Cargo.toml --all-targets -- -D warnings` is now clean (exit 0).
2645+
Cleared the original 7-file set plus the lints in PR #479's new reader modules
2646+
(window / reader_state / crystal_neighborhood / sentence_transformer64 /
2647+
signed_crystal / codebook) surfaced by `--all-targets` — 22 lints across 13
2648+
files; 217 tests green. Fixes are hand-applied (NOT `clippy --fix`, which mangled
2649+
`reader_state.rs` into stranded-comment match guards). The CI clippy step for
2650+
deepnsm was promoted Tier-B advisory → Tier-A gating in
2651+
`.github/workflows/style.yml`.

.github/workflows/rust-test.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ env:
2020
RUST_BACKTRACE: "1"
2121
CARGO_INCREMENTAL: "0"
2222

23+
# Least-privilege: these jobs only read the repo (checkout + build + test).
24+
# Codecov upload uses its own token secret and is non-fatal (fail_ci_if_error: false).
25+
permissions:
26+
contents: read
27+
2328
jobs:
2429
test:
2530
runs-on: ubuntu-24.04
@@ -87,6 +92,11 @@ jobs:
8792
run: cargo test --manifest-path crates/lance-graph-contract/Cargo.toml --tests
8893
- name: Run contract doctests
8994
run: cargo test --manifest-path crates/lance-graph-contract/Cargo.toml --doc
95+
# deepnsm: standalone 0-dep codec crate, workspace-excluded, so the
96+
# lance-graph test steps above never reached it. ~217 lib + integration +
97+
# doctests, fast (no lance/datafusion/ndarray deps). Gating.
98+
- name: Run deepnsm tests
99+
run: cargo test --manifest-path crates/deepnsm/Cargo.toml
90100

91101
test-with-coverage:
92102
runs-on: ubuntu-24.04

.github/workflows/style.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ env:
1818
CARGO_TERM_COLOR: always
1919
RUSTFLAGS: "-C debuginfo=1 -C target-cpu=x86-64-v3"
2020

21+
# Least-privilege: these jobs only read the repo (checkout + build + lint).
22+
permissions:
23+
contents: read
24+
2125
jobs:
2226
# Clippy runs FIRST and is mandatory — logical soundness before syntax.
2327
# Discipline:
@@ -71,6 +75,12 @@ jobs:
7175
- name: Clippy lance-graph (advisory)
7276
continue-on-error: true
7377
run: cargo clippy --manifest-path crates/lance-graph/Cargo.toml --lib --tests -- -D warnings
78+
# Tier A (mandatory, gating): deepnsm is now clippy-clean — TD-DEEPNSM-CLIPPY-195
79+
# resolved 2026-06-09 by a hand-reviewed sweep. It's a standalone 0-dep codec
80+
# crate, workspace-excluded, so the lance-graph clippy steps don't cover it;
81+
# gate it explicitly (same posture as the contract crate) so it can't regress.
82+
- name: Clippy deepnsm (mandatory)
83+
run: cargo clippy --manifest-path crates/deepnsm/Cargo.toml --all-targets -- -D warnings
7484

7585
format:
7686
runs-on: ubuntu-24.04
@@ -94,8 +104,13 @@ jobs:
94104
- uses: actions-rust-lang/setup-rust-toolchain@v1
95105
with:
96106
components: rustfmt
97-
- name: Check formatting
107+
- name: Check formatting (lance-graph)
98108
run: cargo fmt --manifest-path crates/lance-graph/Cargo.toml -- --check
109+
# deepnsm is a standalone, workspace-excluded codec crate, so
110+
# `cargo fmt --all` never reaches it. It was brought to a rustfmt-clean
111+
# baseline in this PR; check it explicitly so it can't silently drift.
112+
- name: Check formatting (deepnsm)
113+
run: cargo fmt --manifest-path crates/deepnsm/Cargo.toml -- --check
99114

100115
# typos / spell-check removed 2026-04-26: too many false positives on
101116
# technical jargon (NARS terms, codec acronyms, German loanwords used in

crates/deepnsm/Cargo.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,19 @@ version = "0.1.0"
44
edition = "2021"
55
license = "Apache-2.0"
66
publish = false
7-
8-
# Empty `[workspace]` so cargo treats this crate as standalone when invoked
9-
# via `--manifest-path` (deepnsm is `exclude`d from the parent workspace,
10-
# but in nested git-worktree directories cargo's auto-discovery would
11-
# otherwise walk further up and pick up the outer workspace root).
12-
[workspace]
137
description = """
148
DeepNSM: Distributional semantic transformer replacement.
159
4,096 words × 12 bits × 8MB distance matrix = complete semantic engine.
1610
O(1) per word, O(n) per sentence, deterministic, bit-reproducible.
1711
No GPU. No learned weights. Same decision boundaries as cosine.
1812
"""
1913

14+
# Empty `[workspace]` so cargo treats this crate as standalone when invoked
15+
# via `--manifest-path` (deepnsm is `exclude`d from the parent workspace,
16+
# but in nested git-worktree directories cargo's auto-discovery would
17+
# otherwise walk further up and pick up the outer workspace root).
18+
[workspace]
19+
2020
# Zero EXTERNAL (crates.io) dependencies — for supply-chain cleanness.
2121
# AdaWorldAPI path deps are mandatory and compile into the same binary.
2222
# ndarray is the canonical SIMD/BLAS/CLAM provider: ndarray::simd is a

crates/deepnsm/examples/probe_semantic_sanity.rs

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//! - off-diag cosine mean 0.64
88
//! - effective rank (participation ratio) 1.82 out of 256
99
//! - 43.76% of pairs with cos > 0.9
10-
//! → degenerate null-context artifact, not a real semantic manifold
10+
//! → degenerate null-context artifact, not a real semantic manifold
1111
//!
1212
//! The DeepNSM matrix is a completely different source: 96-dimensional
1313
//! distributional vectors from COCA subgenre frequencies (1-billion-word
@@ -49,8 +49,8 @@
4949
use std::fs;
5050
use std::path::PathBuf;
5151

52-
use deepnsm::DeepNsmEngine;
5352
use deepnsm::spo::WordDistanceMatrix;
53+
use deepnsm::DeepNsmEngine;
5454

5555
fn main() {
5656
println!("# Probe: DeepNSM Semantic Layer Sanity");
@@ -86,7 +86,11 @@ fn main() {
8686
let nonzero_diagonals: Vec<(usize, u8)> = (0..k)
8787
.filter_map(|i| {
8888
let d = dm.get(i as u16, i as u16);
89-
if d != 0 { Some((i, d)) } else { None }
89+
if d != 0 {
90+
Some((i, d))
91+
} else {
92+
None
93+
}
9094
})
9195
.take(5)
9296
.collect();
@@ -113,10 +117,14 @@ fn main() {
113117
// Convert to f64 for stats
114118
let n = off.len() as f64;
115119
let mean: f64 = off.iter().map(|&v| v as f64).sum::<f64>() / n;
116-
let var: f64 = off.iter().map(|&v| {
117-
let diff = v as f64 - mean;
118-
diff * diff
119-
}).sum::<f64>() / n;
120+
let var: f64 = off
121+
.iter()
122+
.map(|&v| {
123+
let diff = v as f64 - mean;
124+
diff * diff
125+
})
126+
.sum::<f64>()
127+
/ n;
120128
let std_dev = var.sqrt();
121129

122130
// Percentiles via sort
@@ -165,12 +173,20 @@ fn main() {
165173
// has no per-row distinguishing structure → degenerate.
166174
let row_sum_f64: Vec<f64> = row_sum.iter().map(|&s| s as f64).collect();
167175
let mean_rs = row_sum_f64.iter().sum::<f64>() / k as f64;
168-
let var_rs = row_sum_f64.iter().map(|&s| {
169-
let diff = s - mean_rs;
170-
diff * diff
171-
}).sum::<f64>() / k as f64;
176+
let var_rs = row_sum_f64
177+
.iter()
178+
.map(|&s| {
179+
let diff = s - mean_rs;
180+
diff * diff
181+
})
182+
.sum::<f64>()
183+
/ k as f64;
172184
let std_rs = var_rs.sqrt();
173-
let cv = if mean_rs.abs() > 1e-9 { std_rs / mean_rs } else { 0.0 };
185+
let cv = if mean_rs.abs() > 1e-9 {
186+
std_rs / mean_rs
187+
} else {
188+
0.0
189+
};
174190
println!("## Row-sum constancy (matrix isotropy proxy)");
175191
println!("- mean row sum: {:.2}", mean_rs);
176192
println!("- std row sum: {:.2}", std_rs);
@@ -186,17 +202,25 @@ fn main() {
186202
for i in 0..k {
187203
let mut best = u32::MAX;
188204
for j in 0..k {
189-
if i == j { continue; }
205+
if i == j {
206+
continue;
207+
}
190208
let d = dm.get(i as u16, j as u16) as u32;
191-
if d < best { best = d; }
209+
if d < best {
210+
best = d;
211+
}
192212
}
193213
nn_dist.push(best);
194214
}
195215
let nn_mean: f64 = nn_dist.iter().map(|&v| v as f64).sum::<f64>() / k as f64;
196-
let nn_var: f64 = nn_dist.iter().map(|&v| {
197-
let diff = v as f64 - nn_mean;
198-
diff * diff
199-
}).sum::<f64>() / k as f64;
216+
let nn_var: f64 = nn_dist
217+
.iter()
218+
.map(|&v| {
219+
let diff = v as f64 - nn_mean;
220+
diff * diff
221+
})
222+
.sum::<f64>()
223+
/ k as f64;
200224
let nn_std = nn_var.sqrt();
201225
println!("## Nearest-neighbor distance (excluding self)");
202226
println!("- mean: {:.2}", nn_mean);
@@ -232,8 +256,10 @@ fn main() {
232256
println!("| matrix size | 256×256 | {}×{} |", k, k);
233257
println!("| off-diag mean | 0.640 (cos) | {:.2} (u8 dist) |", mean);
234258
println!("| effective rank | 1.82 | see Python follow-up |");
235-
println!("| frac > 0.9 (cos) / high u8 | 43.76% | {:.2}% (top 10 bins) |",
236-
top10 as f64 / n * 100.0);
259+
println!(
260+
"| frac > 0.9 (cos) / high u8 | 43.76% | {:.2}% (top 10 bins) |",
261+
top10 as f64 / n * 100.0
262+
);
237263
println!("| nearest-neighbor similarity | 0.9407 (cos) | see std above |");
238264
println!();
239265

@@ -242,7 +268,10 @@ fn main() {
242268
println!();
243269
println!("```python");
244270
println!("import numpy as np");
245-
println!("d = np.fromfile('{}', dtype=np.uint8).reshape(4096, 4096).astype(np.float64)", dump_path);
271+
println!(
272+
"d = np.fromfile('{}', dtype=np.uint8).reshape(4096, 4096).astype(np.float64)",
273+
dump_path
274+
);
246275
println!("# Convert distance to similarity: normalize [0,255] → [0,1], invert");
247276
println!("max_d = d.max()");
248277
println!("sim = 1.0 - d / max(max_d, 1)");

crates/deepnsm/src/arcs.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,10 @@ mod tests {
6666
let ranks = [12_u16, 670, 2942];
6767
let (basin, literal) = t.split_arcs(&ranks);
6868
assert_eq!(basin.0, t.fingerprint, "basin arc IS the spine bundle");
69-
assert_eq!(literal.0, ranks, "literal arc carries the COCA ranks verbatim");
69+
assert_eq!(
70+
literal.0, ranks,
71+
"literal arc carries the COCA ranks verbatim"
72+
);
7073
}
7174

7275
#[test]

crates/deepnsm/src/arcuate.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@ mod tests {
166166
}
167167
let result = arc.disambiguate([fp(1.0), fp(-1.0)]);
168168
assert_eq!(result.candidate_count, 2, "both candidates evaluated");
169-
assert!(result.winner_index < 2, "a real winner over the ±5 evidence");
169+
assert!(
170+
result.winner_index < 2,
171+
"a real winner over the ±5 evidence"
172+
);
170173
}
171174
}

0 commit comments

Comments
 (0)