Skip to content

Commit 84a4a16

Browse files
Phase 3: codec ↔ package manager
Two new builtins make registry-installed packages a first-class codec library: - omc_registry_codec_library() -> string[] Scans omc_modules/ for *.omc files and returns every top-level fn definition as a separate string entry. Empty array if omc_modules/ doesn't exist (graceful no-op). - omc_msg_recover_from_registry(msg: dict) -> string|null Convenience for omc_msg_recover_compressed(msg, omc_registry_codec_library()). Walks installed packages, returns the matching canonical source or null. Brace-aware fn extractor (extract_top_level_fns) tracks # line comments + "..."/'...' string literals so braces inside literals don't throw off depth counting. Demos: - examples/demos/llm_tandem_registry.omc: writes a synthetic stats package to omc_modules/, signs an alpha-renamed copy of pkg_mean, recovers the original via registry. Verified end-to-end (3 fns extracted, canonical-hash match, returns library's parameter name 'vs' not sender's 'xs'). - examples/tests/test_codec_registry.omc: 3 tests covering the graceful-no-omc_modules path. README + INDEX.md updated. 177 OMC tests total, all green. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 8133b97 commit 84a4a16

6 files changed

Lines changed: 242 additions & 2 deletions

File tree

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ fn coherent_loop(n) {
250250
| [`examples/tests/test_heal_pass.omc`](examples/tests/test_heal_pass.omc) | 16 tests for the self-healing compiler's heal classes + per-class pragmas |
251251
| [`examples/tests/test_codec.omc`](examples/tests/test_codec.omc) | 7 tests for `omc_codec_encode/decode_lookup` — alpha-rename invariant library recovery + inline error-hint UX check |
252252
| [`examples/tests/test_compressed_messaging.omc`](examples/tests/test_compressed_messaging.omc) | 6 tests: substrate-signed wire payloads carrying codec output, alpha-equivalent recovery, JSON round-trip |
253+
| [`examples/tests/test_codec_registry.omc`](examples/tests/test_codec_registry.omc) | 3 tests: `omc_registry_codec_library` + `omc_msg_recover_from_registry` graceful no-op when omc_modules/ doesn't exist |
254+
| [`examples/demos/llm_tandem_registry.omc`](examples/demos/llm_tandem_registry.omc) | End-to-end: synthetic package in omc_modules/, signs renamed copy, recovers via registry — alpha-rename invariant |
253255
| [`experiments/seed_expansion/FINDINGS.md`](experiments/seed_expansion/FINDINGS.md) | Empirical writeup: substrate-keyed codec works (lossless on in-library content); open-set ML stays data-budget bound at 40 samples — honest |
254256

255257
---
@@ -269,7 +271,7 @@ fn coherent_loop(n) {
269271
| `experiments/substrate_primitives/` | Empirical comparison of substrate vs native vs OMC search |
270272
| `examples/lib/` | `substrate.omc`, `harmonic_anomaly`, `harmonic_clustering`, `harmonic_recommend`, np/pd/sklearn/torch/requests/sqlite |
271273
| `examples/datascience/` | Real-data demos with honest numbers |
272-
| `examples/tests/` | `test_substrate_primitives.omc` (57), `test_new_builtins.omc` (70), `test_harmonic_libs.omc` (18), `test_heal_pass.omc` (16), `test_codec.omc` (7), `test_compressed_messaging.omc` (6) **174 total** |
274+
| `examples/tests/` | `test_substrate_primitives.omc` (57), `test_new_builtins.omc` (70), `test_harmonic_libs.omc` (18), `test_heal_pass.omc` (16), `test_codec.omc` (7), `test_compressed_messaging.omc` (6), `test_codec_registry.omc` (3) **177 total** |
273275
| `docs/` | Substrate audit, JIT benchmarks, anomaly-detection comparisons |
274276
| `registry/` | Central package registry (sha256-verified) |
275277

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Tandem demo: codec recovery via the installed registry library.
2+
#
3+
# Phase 3: instead of passing an explicit library array to
4+
# omc_msg_recover_compressed, the receiver calls
5+
# omc_msg_recover_from_registry(msg) — which automatically scans
6+
# omc_modules/ (the package-manager install directory) for matching
7+
# canonical-hash content.
8+
#
9+
# This demo:
10+
# 1. Synthesizes a tiny "stats" package and writes it to
11+
# omc_modules/stats_pkg.omc.
12+
# 2. Signs an alpha-renamed copy of one of its functions.
13+
# 3. Recovers the original via omc_msg_recover_from_registry.
14+
# 4. Shows that the recovered source matches the package's
15+
# canonical form, not the sender's renamed form.
16+
#
17+
# Run with:
18+
# ./target/release/omnimcode-standalone examples/demos/llm_tandem_registry.omc
19+
20+
fn show(label, v) { print(concat_many(label, " = ", to_string(v))); }
21+
22+
fn main() {
23+
# 1. Write a synthetic registry package.
24+
h pkg = concat_many(
25+
"# Synthetic stats package for demo purposes.\n",
26+
"fn pkg_mean(vs) { h n = arr_len(vs); h s = 0.0; h i = 0; while i < n { s = s + arr_get(vs, i); i = i + 1; } return s / n; }\n",
27+
"fn pkg_sum(vs) { h n = arr_len(vs); h s = 0.0; h i = 0; while i < n { s = s + arr_get(vs, i); i = i + 1; } return s; }\n",
28+
"fn pkg_max(vs) { h n = arr_len(vs); h m = arr_get(vs, 0); h i = 1; while i < n { if arr_get(vs, i) > m { m = arr_get(vs, i); } i = i + 1; } return m; }\n"
29+
);
30+
write_file("omc_modules/stats_pkg.omc", pkg);
31+
print("[setup] Wrote synthetic package to omc_modules/stats_pkg.omc");
32+
33+
# 2. Inspect what the registry library now contains.
34+
h lib = omc_registry_codec_library();
35+
show("registry library size", arr_len(lib));
36+
37+
# 3. Sender writes a RENAMED variant of pkg_mean (xs instead of vs).
38+
h sender_src = "fn pkg_mean(xs) { h n = arr_len(xs); h s = 0.0; h i = 0; while i < n { s = s + arr_get(xs, i); i = i + 1; } return s / n; }";
39+
h msg = omc_msg_sign_compressed(sender_src, 18173, 1, 3);
40+
show("sender content_hash ", dict_get(msg, "content_hash"));
41+
42+
# 4. Receiver recovers via registry (no explicit library passed).
43+
h recovered = omc_msg_recover_from_registry(msg);
44+
if recovered == null {
45+
print("[FAIL] registry lookup MISSED — canonical hash not in any installed package");
46+
} else {
47+
print("[OK] registry lookup hit");
48+
print("Recovered (registry's canonical form, with 'vs' parameter):");
49+
print(recovered);
50+
print("");
51+
print("Sender used 'xs', registry entry uses 'vs'.");
52+
print("Canonical hash matched because rename is invariant.");
53+
}
54+
}
55+
56+
main();

examples/tests/INDEX.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Test suite index
22

3-
**65 test files, 997 `fn test_*` functions, all green under `omnimcode-standalone --test FILE`.**
3+
**66 test files, 1000 `fn test_*` functions, all green under `omnimcode-standalone --test FILE`.**
44

55
This is a map of what's covered, organized by surface area — not an
66
exhaustive doc. Run any file with `--test FILE` to see the actual
@@ -33,6 +33,7 @@ exercise many sub-assertions inside one `test_*` body, so the
3333
|---|--:|---|
3434
| `test_codec.omc` | 7 | `omc_codec_encode/decode_lookup` — alpha-rename invariant library recovery + inline error-hint UX check |
3535
| `test_compressed_messaging.omc` | 6 | `omc_msg_sign_compressed/recover` — substrate-signed wire payloads carrying codec output, JSON round-trip |
36+
| `test_codec_registry.omc` | 3 | `omc_registry_codec_library` / `omc_msg_recover_from_registry` — graceful no-op when omc_modules/ absent |
3637
| `test_substrate_messaging.omc` | 10 | The base substrate-signed messaging protocol (pre-codec) — `omc_msg_sign / verify / serialize` |
3738
| `test_canonical.omc` | 15 | AST canonicalization — the LLM-reach-for semantic-equivalence layer |
3839
| `test_canonical_extras.omc` | 11 | More canonical / structural-equivalence cases |
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Codec ↔ registry integration.
2+
#
3+
# These tests exercise omc_registry_codec_library and
4+
# omc_msg_recover_from_registry. They run in any working directory:
5+
# if omc_modules/ doesn't exist, the lookup gracefully returns
6+
# empty / null without erroring.
7+
8+
fn assert_eq(actual, expected, msg) {
9+
if actual != expected {
10+
test_record_failure(msg + ": expected " + to_string(expected) + " got " + to_string(actual));
11+
}
12+
}
13+
14+
fn assert_true(cond, msg) { if !cond { test_record_failure(msg); } }
15+
16+
fn test_registry_library_returns_array() {
17+
h lib = omc_registry_codec_library();
18+
assert_true(arr_len(lib) >= 0, "returns an array (possibly empty)");
19+
}
20+
21+
fn test_recover_from_registry_handles_unknown() {
22+
# If omc_modules/ doesn't exist or doesn't contain this fn, returns null.
23+
h msg = omc_msg_sign_compressed("fn nonexistent_xyz_123() { return 42; }", 18173, 1, 3);
24+
h r = omc_msg_recover_from_registry(msg);
25+
assert_true(r == null, "unknown fn → null");
26+
}
27+
28+
fn test_recover_from_registry_returns_null_or_string() {
29+
h msg = omc_msg_sign_compressed("fn add(x, y) { return x + y; }", 18173, 1, 3);
30+
h r = omc_msg_recover_from_registry(msg);
31+
# Either null (not in registry) or a string (matched). No error.
32+
h ok = false;
33+
if r == null { ok = true; }
34+
else { ok = str_len(r) > 0; }
35+
assert_true(ok, "null or non-empty string");
36+
}

omnimcode-core/src/docs.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,20 @@ pub const BUILTINS: &[BuiltinDoc] = &[
11371137
example: "omc_msg_recover_compressed(msg, shared_library) // recovered source",
11381138
unique_to_omc: true,
11391139
},
1140+
BuiltinDoc {
1141+
name: "omc_registry_codec_library", category: "messaging",
1142+
signature: "() -> string[]",
1143+
description: "Scan omc_modules/ for installed registry packages and return every top-level fn definition as a separate string. Suitable as the library arg to omc_codec_decode_lookup / omc_msg_recover_compressed. Empty array if omc_modules/ doesn't exist.",
1144+
example: "omc_registry_codec_library() // [\"fn mean(xs) {...}\", ...]",
1145+
unique_to_omc: true,
1146+
},
1147+
BuiltinDoc {
1148+
name: "omc_msg_recover_from_registry", category: "messaging",
1149+
signature: "(msg: dict) -> string|null",
1150+
description: "Convenience for omc_msg_recover_compressed(msg, omc_registry_codec_library()). Returns the matching registry-package fn source, or null if no installed package contains it.",
1151+
example: "omc_msg_recover_from_registry(msg) // recovered registry-fn source or null",
1152+
unique_to_omc: true,
1153+
},
11401154
BuiltinDoc {
11411155
name: "omc_prompt_agent", category: "messaging",
11421156
signature: "(target_id: int, prompt: string, sender_id: int, channel?: string) -> int",

omnimcode-core/src/interpreter.rs

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8332,6 +8332,69 @@ impl Interpreter {
83328332
}
83338333
Ok(Value::Null)
83348334
}
8335+
// omc_registry_codec_library() -> string[]
8336+
// Scan omc_modules/ for installed registry packages, extract
8337+
// each top-level fn definition as a separate string entry.
8338+
// The returned array is suitable as the library argument to
8339+
// omc_codec_decode_lookup / omc_msg_recover_compressed.
8340+
"omc_registry_codec_library" => {
8341+
let dir = std::path::Path::new("omc_modules");
8342+
if !dir.is_dir() {
8343+
return Ok(Value::Array(HArray::from_vec(vec![])));
8344+
}
8345+
let mut entries: Vec<Value> = Vec::new();
8346+
if let Ok(rd) = std::fs::read_dir(dir) {
8347+
for ent in rd.flatten() {
8348+
let p = ent.path();
8349+
if p.extension().and_then(|s| s.to_str()) != Some("omc") {
8350+
continue;
8351+
}
8352+
if let Ok(src) = std::fs::read_to_string(&p) {
8353+
for fn_src in extract_top_level_fns(&src) {
8354+
entries.push(Value::String(fn_src));
8355+
}
8356+
}
8357+
}
8358+
}
8359+
Ok(Value::Array(HArray::from_vec(entries)))
8360+
}
8361+
// omc_msg_recover_from_registry(msg) -> string|null
8362+
// Convenience: omc_msg_recover_compressed(msg,
8363+
// omc_registry_codec_library()). Returns the matching
8364+
// library entry's canonical source, or null on miss.
8365+
"omc_msg_recover_from_registry" => {
8366+
if args.is_empty() {
8367+
return Err("omc_msg_recover_from_registry requires (msg: dict)".to_string());
8368+
}
8369+
let v = self.eval_expr(&args[0])?;
8370+
let target_hash = if let Value::Dict(d) = v {
8371+
d.borrow().get("content_hash").map(|x| x.to_int()).unwrap_or(0)
8372+
} else {
8373+
return Err("omc_msg_recover_from_registry: msg must be a dict".to_string());
8374+
};
8375+
let dir = std::path::Path::new("omc_modules");
8376+
if !dir.is_dir() {
8377+
return Ok(Value::Null);
8378+
}
8379+
if let Ok(rd) = std::fs::read_dir(dir) {
8380+
for ent in rd.flatten() {
8381+
let p = ent.path();
8382+
if p.extension().and_then(|s| s.to_str()) != Some("omc") {
8383+
continue;
8384+
}
8385+
if let Ok(src) = std::fs::read_to_string(&p) {
8386+
for fn_src in extract_top_level_fns(&src) {
8387+
let canon = crate::canonical::canonicalize(&fn_src)
8388+
.unwrap_or_else(|_| fn_src.clone());
8389+
if crate::tokenizer::fnv1a_64(canon.as_bytes()) == target_hash {
8390+
return Ok(Value::String(fn_src));
8391+
}
8392+
}
8393+
}
8394+
}
8395+
}
8396+
Ok(Value::Null)
8397+
}
83358398
"omc_find_similar" => {
83368399
// omc_find_similar(query, corpus[]) → [{index, distance}, ...]
83378400
// ranked closest-first by canonical-hash distance.
@@ -10607,6 +10670,74 @@ impl Interpreter {
1060710670
/// (arr_push, arr_set, dict_set, dict_del) is already handled by
1060810671
/// dedicated opcodes in the compiler, so it never reaches
1060910672
/// vm_call_builtin in the first place.
10673+
/// Walk `src` and return every top-level `fn NAME(...) { ... }` as a
10674+
/// separate string. Skips nested fns and `#`-prefixed line comments;
10675+
/// tracks `"..."` and `'...'` so braces inside string literals don't
10676+
/// throw off depth counting. Used by omc_registry_codec_library and
10677+
/// omc_msg_recover_from_registry.
10678+
fn extract_top_level_fns(src: &str) -> Vec<String> {
10679+
let bytes = src.as_bytes();
10680+
let n = bytes.len();
10681+
let mut out = Vec::new();
10682+
let mut i = 0usize;
10683+
while i < n {
10684+
// Skip line comments.
10685+
if bytes[i] == b'#' {
10686+
while i < n && bytes[i] != b'\n' { i += 1; }
10687+
continue;
10688+
}
10689+
// Skip string literals at top level.
10690+
if bytes[i] == b'"' || bytes[i] == b'\'' {
10691+
let q = bytes[i]; i += 1;
10692+
while i < n && bytes[i] != q {
10693+
if bytes[i] == b'\\' && i + 1 < n { i += 2; } else { i += 1; }
10694+
}
10695+
if i < n { i += 1; }
10696+
continue;
10697+
}
10698+
// Recognize `fn ` only at start-of-line or after whitespace.
10699+
let at_boundary = i == 0 || bytes[i - 1].is_ascii_whitespace();
10700+
if at_boundary && i + 3 < n && &bytes[i..i + 3] == b"fn " {
10701+
let fn_start = i;
10702+
// Find the opening `{` of the body.
10703+
let mut j = i;
10704+
while j < n && bytes[j] != b'{' { j += 1; }
10705+
if j >= n { break; }
10706+
// Track depth, respecting strings + line comments.
10707+
let mut depth = 0i32;
10708+
let mut k = j;
10709+
while k < n {
10710+
let c = bytes[k];
10711+
if c == b'#' {
10712+
while k < n && bytes[k] != b'\n' { k += 1; }
10713+
continue;
10714+
}
10715+
if c == b'"' || c == b'\'' {
10716+
let q = c; k += 1;
10717+
while k < n && bytes[k] != q {
10718+
if bytes[k] == b'\\' && k + 1 < n { k += 2; } else { k += 1; }
10719+
}
10720+
if k < n { k += 1; }
10721+
continue;
10722+
}
10723+
if c == b'{' { depth += 1; }
10724+
else if c == b'}' {
10725+
depth -= 1;
10726+
if depth == 0 { k += 1; break; }
10727+
}
10728+
k += 1;
10729+
}
10730+
if depth == 0 && k > fn_start {
10731+
out.push(src[fn_start..k].to_string());
10732+
}
10733+
i = k;
10734+
continue;
10735+
}
10736+
i += 1;
10737+
}
10738+
out
10739+
}
10740+
1061010741
fn vm_fast_dispatch(name: &str, args: &[Value]) -> Option<Result<Value, String>> {
1061110742
match (name, args.len()) {
1061210743
// ---- string ops ----

0 commit comments

Comments
 (0)