Skip to content

Commit 7046ca7

Browse files
substrate codec + compressed messaging + inline error hints
Four new builtins make substrate-aware code compression a first-class OMC primitive: - omc_codec_encode(code, every_n) — sampled-token + substrate metadata payload - omc_codec_decode_lookup(codec, library) — exact recovery by canonical-hash match - omc_msg_sign_compressed(content, sender, kind, every_n) - omc_msg_recover_compressed(msg, library) — verify + library lookup The codec is alpha-rename invariant (canonical-hash addressing) and yields ~5–7x wire-payload reduction with lossless recovery for in-library content. Substrate signatures preserved across compression. Tested: 7 cases in test_codec.omc, 6 in test_compressed_messaging.omc. All 13 pass. Existing test_introspection unaffected (13 pass). Inline-error UX: Undefined-function error now carries the suggested function's signature inline, so the LLM iteration loop no longer needs a separate omc_help round-trip after a typo. Empirical results from experiments/seed_expansion/ (PyTorch): - v3 structural-features 40/10 split: 0/10 held-out exact - v4 token-sampled (N=2, N=3): 0/10 held-out exact - Closed-set v2 still 100% exact — the substrate-keyed lookup is sound; open-set generalization needs scale. See FINDINGS.md for full extrapolation of what shipped vs. what remains data-budget bound. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent fb62424 commit 7046ca7

13 files changed

Lines changed: 1183 additions & 6 deletions

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Tandem demo: REPLY side (Hermes responding to Claude).
2+
# Reads a substrate-signed message from Claude, verifies it,
3+
# then sends back a signed response with sender_id=28765 and kind=2.
4+
5+
fn show(label, v) { print(concat_many(label, " = ", to_string(v))); }
6+
7+
fn main() {
8+
h HERMES_ID = 28765;
9+
h KIND_RESPONSE = 2;
10+
11+
// Read the incoming wire from Claude
12+
h wire = read_file("/home/thearchitect/omc_channel/from_claude.json");
13+
h msg = omc_msg_deserialize(wire);
14+
h check = omc_msg_verify(msg);
15+
16+
show("valid signature? ", dict_get(check, "valid"));
17+
show("sender_id ", dict_get(check, "sender_id"));
18+
show("kind ", dict_get(check, "kind"));
19+
show("expected_hash ", dict_get(check, "expected_hash"));
20+
show("actual_hash ", dict_get(check, "actual_hash"));
21+
show("drift_resonance ", dict_get(check, "drift_resonance"));
22+
show("drift_him ", dict_get(check, "drift_him"));
23+
24+
if dict_get(check, "valid") == 1 {
25+
print("");
26+
print("=== Signature valid — content trustworthy ===");
27+
h payload = dict_get(check, "content"); // extract verified content
28+
print("Payload:");
29+
print(payload);
30+
31+
// Now sign a response (we echo back the same payload for demo)
32+
h resp_msg = omc_msg_sign(payload, HERMES_ID, KIND_RESPONSE);
33+
show("resp packed ID ", dict_get(resp_msg, "packed"));
34+
show("resp content_hash ", dict_get(resp_msg, "content_hash"));
35+
show("resp resonance ", dict_get(resp_msg, "resonance"));
36+
show("resp attractor ", dict_get(resp_msg, "attractor"));
37+
38+
h resp_wire = omc_msg_serialize(resp_msg);
39+
write_file("/home/thearchitect/omc_channel/from_hermes.json", resp_wire);
40+
print("");
41+
print("Wrote /home/thearchitect/omc_channel/from_hermes.json");
42+
print("Hermes response sent (kind=2, sender_id=28765).");
43+
} else {
44+
print("");
45+
print("=== SIGNATURE MISMATCH — will not respond ===");
46+
}
47+
}
48+
main();

examples/tests/test_codec.omc

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Substrate-keyed codec — round-trip on a known library.
2+
3+
fn assert_eq(actual, expected, msg) {
4+
if actual != expected {
5+
test_record_failure(msg + ": expected " + to_string(expected) + " got " + to_string(actual));
6+
}
7+
}
8+
9+
fn assert_true(cond, msg) { if !cond { test_record_failure(msg); } }
10+
11+
fn test_encode_returns_required_fields() {
12+
h codec = omc_codec_encode("fn f(x) { return x; }", 3);
13+
assert_true(dict_has(codec, "sampled_tokens"), "has sampled_tokens");
14+
assert_true(dict_has(codec, "content_hash"), "has content_hash");
15+
assert_true(dict_has(codec, "attractor"), "has attractor");
16+
assert_true(dict_has(codec, "dist"), "has dist");
17+
assert_true(dict_has(codec, "original_tok_count"), "has original_tok_count");
18+
assert_true(dict_has(codec, "compression_ratio"), "has compression_ratio");
19+
}
20+
21+
fn test_compression_ratio_positive() {
22+
h codec = omc_codec_encode("fn f(x) { return x; }", 3);
23+
assert_true(dict_get(codec, "compression_ratio") > 1.0, "ratio > 1");
24+
}
25+
26+
fn test_lookup_finds_known() {
27+
h lib = [
28+
"fn a() { return 1; }",
29+
"fn b() { return 2; }",
30+
"fn c() { return 3; }",
31+
];
32+
h codec = omc_codec_encode(arr_get(lib, 1), 3);
33+
h decoded = omc_codec_decode_lookup(codec, lib);
34+
assert_eq(decoded, "fn b() { return 2; }", "library lookup");
35+
}
36+
37+
fn test_lookup_misses_unknown() {
38+
h lib = ["fn a() { return 1; }"];
39+
h codec = omc_codec_encode("fn z() { return 99; }", 3);
40+
h decoded = omc_codec_decode_lookup(codec, lib);
41+
assert_true(decoded == null, "miss returns null");
42+
}
43+
44+
fn test_alpha_equivalent_lookup_succeeds() {
45+
# Canonical hash is invariant under rename, so encoding one
46+
# alpha-equivalent form and looking up the other still hits.
47+
h lib = ["fn f(x) { return x; }"];
48+
h codec = omc_codec_encode("fn f(y) { return y; }", 3);
49+
h decoded = omc_codec_decode_lookup(codec, lib);
50+
assert_eq(decoded, "fn f(x) { return x; }", "alpha-rename lookup hits");
51+
}
52+
53+
fn test_encode_n_2_keeps_more() {
54+
# Same source, N=2 keeps roughly twice as many tokens as N=4.
55+
h c2 = omc_codec_encode("fn add(x, y) { return x + y + 1; }", 2);
56+
h c4 = omc_codec_encode("fn add(x, y) { return x + y + 1; }", 4);
57+
h n2 = arr_len(dict_get(c2, "sampled_tokens"));
58+
h n4 = arr_len(dict_get(c4, "sampled_tokens"));
59+
assert_true(n2 > n4, "N=2 keeps more than N=4");
60+
}
61+
62+
# Unknown-function error now includes signature hint inline.
63+
fn test_unknown_fn_inlines_signature_hint() {
64+
h err = "";
65+
try {
66+
h _ = arr_softmx([1.0]); # typo
67+
} catch e {
68+
err = e;
69+
}
70+
assert_true(re_match("did you mean", err) == 1, "did-you-mean present");
71+
assert_true(re_match("signature", err) == 1, "inline signature hint present");
72+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Compressed substrate-signed messaging round-trip.
2+
3+
fn assert_eq(actual, expected, msg) {
4+
if actual != expected {
5+
test_record_failure(msg + ": expected " + to_string(expected) + " got " + to_string(actual));
6+
}
7+
}
8+
9+
fn assert_true(cond, msg) { if !cond { test_record_failure(msg); } }
10+
11+
fn test_compressed_sign_includes_codec() {
12+
h msg = omc_msg_sign_compressed("fn f(x) { return x; }", 18173, 1, 3);
13+
assert_true(dict_has(msg, "sampled_tokens"), "carries sampled_tokens");
14+
assert_true(dict_has(msg, "content_hash"), "carries content_hash");
15+
assert_true(dict_has(msg, "compression_ratio"), "has ratio");
16+
assert_true(dict_get(msg, "compression_ratio") > 1.0, "compression > 1");
17+
}
18+
19+
fn test_recover_in_library() {
20+
h lib = [
21+
"fn a() { return 1; }",
22+
"fn b(x) { return x * 2; }",
23+
"fn c(x, y) { return x + y; }",
24+
];
25+
h msg = omc_msg_sign_compressed("fn b(x) { return x * 2; }", 18173, 1, 3);
26+
h recovered = omc_msg_recover_compressed(msg, lib);
27+
assert_eq(recovered, "fn b(x) { return x * 2; }", "recovered exact");
28+
}
29+
30+
fn test_recover_alpha_equivalent() {
31+
# Sender renames params; receiver's library has the original name.
32+
h lib = ["fn b(x) { return x * 2; }"];
33+
h msg = omc_msg_sign_compressed("fn b(y) { return y * 2; }", 18173, 1, 3);
34+
h recovered = omc_msg_recover_compressed(msg, lib);
35+
# Recovers the library version (canonical-hash invariant under rename).
36+
assert_eq(recovered, "fn b(x) { return x * 2; }", "alpha-eq recovery");
37+
}
38+
39+
fn test_recover_miss() {
40+
h lib = ["fn a() { return 1; }"];
41+
h msg = omc_msg_sign_compressed("fn nope() { return 99; }", 18173, 1, 3);
42+
h recovered = omc_msg_recover_compressed(msg, lib);
43+
assert_true(recovered == null, "miss → null");
44+
}
45+
46+
fn test_substrate_metadata_present() {
47+
h msg = omc_msg_sign_compressed("fn f() {}", 18173, 1, 3);
48+
assert_true(dict_has(msg, "resonance"), "has resonance");
49+
assert_true(dict_has(msg, "him_score"), "has him");
50+
assert_true(dict_has(msg, "attractor"), "has attractor");
51+
assert_true(dict_has(msg, "packed"), "has packed");
52+
}
53+
54+
fn test_wire_serialize_roundtrip() {
55+
# Sign compressed, serialize to wire, deserialize, recover via library.
56+
h lib = ["fn add(x, y) { return x + y; }"];
57+
h msg = omc_msg_sign_compressed("fn add(a, b) { return a + b; }", 18173, 1, 3);
58+
h wire = omc_msg_serialize(msg);
59+
h received = omc_msg_deserialize(wire);
60+
h recovered = omc_msg_recover_compressed(received, lib);
61+
assert_eq(recovered, "fn add(x, y) { return x + y; }", "wire roundtrip");
62+
}

0 commit comments

Comments
 (0)