Skip to content

Commit 5e054b2

Browse files
LLM ergonomics batch: ||/&&, --test-all + --json, std/test, signature
search, onboarding token Five wins for both human and LLM authors of OMC, addressing the specific friction points encountered while writing OMC code today. 1. Lexer: `||` and `&&` now alias keyword `or` and `and`. Single `|`/`&` still parse as bit-OR/AND. Every C-family programmer (and every LLM trained on them) reaches for `||` first; the silent BitOr parse error was the largest single footgun. 2. CLI: --test-all DIR runs every test_*.omc under DIR with a one-line-per-file pass/fail summary + overall total. Default DIR is `examples/tests/`. With --json, both --test and --test-all emit JSONL per-test results + a summary line for machine consumption (CI, LLM iteration loops). Verified: 66 files / 1000 tests / 997 pass / 3 fail (heal_pass requires OMC_HEAL=1 — surfaced as a real diagnostic). 3. examples/lib/test.omc: canonical assert_eq, assert_ne, assert_true, assert_false, assert_near, assert_throws, assert_throws_with, assert_contains, assert_len. The 64-copy assert_eq cluster (largest in the codebase per omc-grep) can now be deduplicated as files are touched. test_stdlib_test.omc exercises 9/9 ok. 4. omc_find_by_signature(pattern, max?) builtin: case-insensitive substring match against every builtin's signature string. Lets LLMs discover by intent ("-> float[]", "string, int") instead of name-guessing. Returns [{name, signature, category, description}, ...]. 5. LLM-onboarding token (examples/tools/gen_onboarding_token.omc): produces docs/onboarding_token.json — a 77.6 KB substrate- signed envelope containing: - 41.7 KB bootstrap_pack (the existing markdown intro) - manifest of 193 lib fns: {lib, name, canonical_hash, attractor, signature} - kind=7 substrate-signed envelope so receivers can verify integrity without a shared key The consumer demo (examples/tools/consume_onboarding_token.omc) verifies the envelope signature, parses the bundle, prints manifest samples, and demonstrates downstream omc_find_by_signature discovery. One artifact you can drop into a fresh LLM's context and it knows OMC. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 20196db commit 5e054b2

9 files changed

Lines changed: 640 additions & 13 deletions

File tree

docs/onboarding_token.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

examples/lib/test.omc

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# std/test — canonical assertion helpers for OMC test suites.
2+
#
3+
# Before this library, every test file under examples/tests/ defined
4+
# its own assert_eq / assert_true. omc-grep counted 64 copies of
5+
# assert_eq alone (the largest cluster in the codebase).
6+
#
7+
# To use:
8+
# from "lib/test.omc" import assert_eq, assert_true, assert_near, assert_throws
9+
#
10+
# Or import the whole module:
11+
# import "lib/test.omc"
12+
#
13+
# Conventions:
14+
# - msg is the diagnostic shown when an assertion fails
15+
# - test_record_failure is the OMC test-runner hook that flags a fail
16+
# - assertions short-circuit (return) after recording so a single
17+
# bad assertion doesn't cascade into noise
18+
19+
fn assert_eq(actual, expected, msg) {
20+
if actual != expected {
21+
test_record_failure(msg + ": expected " + to_string(expected) + " got " + to_string(actual));
22+
}
23+
}
24+
25+
fn assert_ne(actual, unexpected, msg) {
26+
if actual == unexpected {
27+
test_record_failure(msg + ": expected != " + to_string(unexpected) + " but got equal");
28+
}
29+
}
30+
31+
fn assert_true(cond, msg) {
32+
if !cond {
33+
test_record_failure(msg);
34+
}
35+
}
36+
37+
fn assert_false(cond, msg) {
38+
if cond {
39+
test_record_failure(msg + ": expected false, got true");
40+
}
41+
}
42+
43+
# Float equality within an epsilon tolerance. Default eps = 1e-9
44+
# (tight for typical FP work; pass a looser eps for ML/stats tests).
45+
fn assert_near(actual, expected, eps, msg) {
46+
h diff = actual - expected;
47+
h abs_diff = diff;
48+
if abs_diff < 0.0 { abs_diff = 0.0 - abs_diff; }
49+
if abs_diff > eps {
50+
test_record_failure(msg + ": expected ~" + to_string(expected) + " (eps=" + to_string(eps) + ") got " + to_string(actual));
51+
}
52+
}
53+
54+
# Assertion: invoking nullary fn body_fn raises ANY error.
55+
# Use for "this should fail" tests without coupling to the exact
56+
# error message.
57+
fn assert_throws(body_fn, msg) {
58+
h threw = false;
59+
try {
60+
body_fn();
61+
} catch e {
62+
threw = true;
63+
}
64+
if !threw {
65+
test_record_failure(msg + ": expected to throw, completed normally");
66+
}
67+
}
68+
69+
# Like assert_throws but also requires the error message to contain
70+
# a given substring. Use to assert a specific failure mode.
71+
fn assert_throws_with(body_fn, expected_substr, msg) {
72+
h threw = false;
73+
h err_text = "";
74+
try {
75+
body_fn();
76+
} catch e {
77+
threw = true;
78+
err_text = e;
79+
}
80+
if !threw {
81+
test_record_failure(msg + ": expected to throw, completed normally");
82+
return;
83+
}
84+
if re_match(expected_substr, err_text) != 1 {
85+
test_record_failure(msg + ": error did not contain `" + expected_substr + "`; got: " + err_text);
86+
}
87+
}
88+
89+
# Convenience: assert string contains substring.
90+
fn assert_contains(haystack, needle, msg) {
91+
if re_match(needle, haystack) != 1 {
92+
test_record_failure(msg + ": expected `" + needle + "` in `" + haystack + "`");
93+
}
94+
}
95+
96+
# Convenience: assert array length.
97+
fn assert_len(arr, expected_len, msg) {
98+
h actual = arr_len(arr);
99+
if actual != expected_len {
100+
test_record_failure(msg + ": expected len " + to_string(expected_len) + " got " + to_string(actual));
101+
}
102+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Tests for examples/lib/test.omc — the canonical assertion helpers.
2+
# (Meta-test: tests that the test library does what it says.)
3+
4+
from "examples/lib/test.omc" import assert_eq, assert_ne, assert_true, assert_false, assert_near, assert_throws, assert_throws_with, assert_contains, assert_len;
5+
6+
fn test_assert_eq_pass() {
7+
assert_eq(1 + 1, 2, "math");
8+
assert_eq("hi", "hi", "string");
9+
assert_eq(arr_len([1, 2, 3]), 3, "len");
10+
}
11+
12+
fn test_assert_ne_pass() {
13+
assert_ne(1, 2, "diff");
14+
assert_ne("a", "b", "str diff");
15+
}
16+
17+
fn test_assert_true_pass() {
18+
assert_true(true, "literal true");
19+
assert_true(1 == 1, "eq result");
20+
assert_true(arr_len([1]) > 0, "nonempty");
21+
}
22+
23+
fn test_assert_false_pass() {
24+
assert_false(false, "literal false");
25+
assert_false(1 == 2, "neq result");
26+
}
27+
28+
fn test_assert_near_pass() {
29+
assert_near(0.1 + 0.2, 0.3, 1e-9, "fp drift");
30+
assert_near(1.0, 1.0001, 0.001, "loose eps");
31+
}
32+
33+
fn test_assert_throws_pass() {
34+
assert_throws(fn() { h _ = error("boom"); }, "should throw");
35+
}
36+
37+
fn test_assert_throws_with_pass() {
38+
assert_throws_with(
39+
fn() { h _ = error("specific failure mode"); },
40+
"specific",
41+
"matches substring"
42+
);
43+
}
44+
45+
fn test_assert_contains_pass() {
46+
assert_contains("hello world", "world", "substring");
47+
}
48+
49+
fn test_assert_len_pass() {
50+
assert_len([1, 2, 3], 3, "array len");
51+
assert_len([], 0, "empty");
52+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Consume an OMC onboarding token.
2+
#
3+
# This is the receiver-side flow for a new-to-OMC LLM (or any agent
4+
# that wasn't pre-loaded with the language).
5+
#
6+
# Steps:
7+
# 1. Read the substrate-signed envelope from docs/onboarding_token.json
8+
# 2. Verify the substrate signature (content_hash must equal the
9+
# recomputed canonical hash of the inner content — defends
10+
# against bundle tampering, no shared key needed)
11+
# 3. Parse the inner bundle
12+
# 4. Print a summary so a downstream LLM knows what's available
13+
# 5. Demonstrate one round-trip: pick a fn from the manifest,
14+
# fetch its full source via the registry / kernel
15+
#
16+
# Run from the OMC repo root:
17+
# omnimcode-standalone examples/tools/consume_onboarding_token.omc
18+
19+
fn main() {
20+
h wire = read_file("docs/onboarding_token.json");
21+
h envelope = omc_msg_deserialize(wire);
22+
23+
# Step 1: verify the substrate signature.
24+
h check = omc_msg_verify(envelope);
25+
h valid = dict_get(check, "valid");
26+
if valid != 1 {
27+
print("FAIL: envelope signature invalid — bundle was tampered with.");
28+
print(concat_many(" expected_hash = ", to_string(dict_get(check, "expected_hash"))));
29+
print(concat_many(" actual_hash = ", to_string(dict_get(check, "actual_hash"))));
30+
return;
31+
}
32+
print("[OK] substrate envelope signature verified");
33+
print(concat_many(" content_hash = ", to_string(dict_get(envelope, "content_hash"))));
34+
print(concat_many(" attractor = ", to_string(dict_get(envelope, "attractor"))));
35+
36+
# Step 2: parse the inner bundle.
37+
h content = dict_get(check, "content");
38+
h bundle = json_parse(content);
39+
print("");
40+
print(concat_many("Bundle version : ", dict_get(bundle, "version")));
41+
print(concat_many("Bootstrap pack : ", to_string(dict_get(bundle, "bootstrap_bytes")), " bytes"));
42+
print(concat_many("Library fns : ", to_string(dict_get(bundle, "fn_count"))));
43+
44+
# Step 3: show a sample of the manifest.
45+
h manifest = dict_get(bundle, "lib_manifest");
46+
print("");
47+
print("Sample manifest entries (first 5):");
48+
h i = 0;
49+
while i < 5 && i < arr_len(manifest) {
50+
h entry = arr_get(manifest, i);
51+
print(concat_many(
52+
" ", dict_get(entry, "lib"),
53+
" ", dict_get(entry, "signature"),
54+
" hash=", to_string(dict_get(entry, "canonical_hash"))
55+
));
56+
i = i + 1;
57+
}
58+
59+
# Step 4: show that the LLM can now ask "do you have fns returning float[]?"
60+
print("");
61+
print("LLM discovery query: omc_find_by_signature(\"-> float[]\", 3)");
62+
h hits = omc_find_by_signature("-> float[]", 3);
63+
i = 0;
64+
while i < arr_len(hits) {
65+
h r = arr_get(hits, i);
66+
print(concat_many(" ", dict_get(r, "name"), " :: ", dict_get(r, "signature")));
67+
i = i + 1;
68+
}
69+
70+
print("");
71+
print("Onboarding complete. A downstream LLM now knows:");
72+
print(" * the language's bootstrap pack (categories, idioms, gotchas)");
73+
print(" * every standard-library fn's canonical address");
74+
print(" * how to discover builtins by signature");
75+
print(" * how to verify the bundle came from the expected substrate");
76+
}
77+
78+
main();
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# Generate the OMC onboarding bundle.
2+
#
3+
# Output: a single JSON file an LLM can ingest cold and immediately
4+
# be productive in OMC. The bundle is substrate-signed (integrity
5+
# verifiable without a shared key) and references registry/library
6+
# functions by canonical hash (alpha-rename invariant).
7+
#
8+
# Contents:
9+
# * version — build identifier
10+
# * bootstrap_pack — the ~20KB markdown intro from omc_bootstrap_pack()
11+
# * lib_manifest — for every fn in examples/lib/ :
12+
# {name, canonical_hash, attractor, signature_first_line}
13+
# * substrate_envelope — substrate-signed wrapper over the above
14+
# (sender=0 = kernel-level signing)
15+
#
16+
# Run from the OMC repo root:
17+
# omnimcode-standalone examples/tools/gen_onboarding_token.omc > docs/onboarding_token.json
18+
#
19+
# Or pipe directly into another tool. The output is one line of JSON.
20+
21+
fn extract_fns_from_file(path) {
22+
h src = read_file(path);
23+
h fns = [];
24+
h i = 0;
25+
h n = str_len(src);
26+
while i < n {
27+
# Look for "fn " at line start or after whitespace.
28+
if str_slice(src, i, i + 3) == "fn " && (i == 0 || str_slice(src, i - 1, i) == "\n" || str_slice(src, i - 1, i) == " ") {
29+
# Find the opening brace.
30+
h j = i;
31+
while j < n && str_slice(src, j, j + 1) != "{" { j = j + 1; }
32+
if j >= n { i = i + 1; }
33+
else {
34+
# Track brace depth.
35+
h depth = 0;
36+
h k = j;
37+
while k < n {
38+
h c = str_slice(src, k, k + 1);
39+
if c == "{" { depth = depth + 1; }
40+
elif c == "}" {
41+
depth = depth - 1;
42+
if depth == 0 { k = k + 1; break; }
43+
}
44+
k = k + 1;
45+
}
46+
if depth == 0 {
47+
arr_push(fns, str_slice(src, i, k));
48+
i = k;
49+
} else {
50+
i = i + 1;
51+
}
52+
}
53+
} else {
54+
i = i + 1;
55+
}
56+
}
57+
return fns;
58+
}
59+
60+
fn fn_name_of(src) {
61+
# Pull NAME from "fn NAME(...)". skip "fn " (3 chars).
62+
h after = str_slice(src, 3, str_len(src));
63+
h i = 0;
64+
while i < str_len(after) {
65+
h c = str_slice(after, i, i + 1);
66+
if c == "(" || c == " " { break; }
67+
i = i + 1;
68+
}
69+
return str_slice(after, 0, i);
70+
}
71+
72+
fn signature_first_line(src) {
73+
# Return the "fn NAME(...)" signature only.
74+
h i = 0;
75+
while i < str_len(src) && str_slice(src, i, i + 1) != "{" { i = i + 1; }
76+
return str_trim(str_slice(src, 0, i));
77+
}
78+
79+
fn build_lib_manifest() {
80+
h libs = [
81+
"examples/lib/np.omc",
82+
"examples/lib/pd.omc",
83+
"examples/lib/sklearn.omc",
84+
"examples/lib/torch.omc",
85+
"examples/lib/requests.omc",
86+
"examples/lib/sqlite.omc",
87+
"examples/lib/substrate.omc",
88+
"examples/lib/harmonic_anomaly.omc",
89+
"examples/lib/harmonic_clustering.omc",
90+
"examples/lib/harmonic_recommend.omc",
91+
"examples/lib/test.omc",
92+
];
93+
h manifest = [];
94+
h i = 0;
95+
while i < arr_len(libs) {
96+
h path = arr_get(libs, i);
97+
h fns = extract_fns_from_file(path);
98+
h j = 0;
99+
while j < arr_len(fns) {
100+
h fn_src = arr_get(fns, j);
101+
h entry = dict_new();
102+
dict_set(entry, "lib", path);
103+
dict_set(entry, "name", fn_name_of(fn_src));
104+
try {
105+
h hash_dict = omc_canonical_hash(fn_src);
106+
dict_set(entry, "canonical_hash", dict_get(hash_dict, "raw"));
107+
dict_set(entry, "attractor", dict_get(hash_dict, "attractor"));
108+
} catch e {
109+
# Some fns use library-specific syntax that doesn't parse
110+
# in isolation (e.g. closures referencing the module's
111+
# imports). Skip the hash for these; the name + sig still
112+
# serve as a discovery hint.
113+
dict_set(entry, "canonical_hash", 0);
114+
dict_set(entry, "attractor", 0);
115+
}
116+
dict_set(entry, "signature", signature_first_line(fn_src));
117+
arr_push(manifest, entry);
118+
j = j + 1;
119+
}
120+
i = i + 1;
121+
}
122+
return manifest;
123+
}
124+
125+
fn main() {
126+
h pack = omc_bootstrap_pack();
127+
h manifest = build_lib_manifest();
128+
h bundle = dict_new();
129+
dict_set(bundle, "kind", "omc_onboarding_token_v1");
130+
dict_set(bundle, "version", "2026.05.16");
131+
dict_set(bundle, "bootstrap_pack", pack);
132+
dict_set(bundle, "lib_manifest", manifest);
133+
dict_set(bundle, "fn_count", arr_len(manifest));
134+
dict_set(bundle, "bootstrap_bytes", str_len(pack));
135+
136+
# Substrate-sign the entire bundle so receivers can verify integrity.
137+
h bundle_json = json_stringify(bundle);
138+
h envelope = omc_msg_sign(bundle_json, 0, 7); # sender=0, kind=7 (onboarding)
139+
h envelope_json = omc_msg_serialize(envelope);
140+
141+
# Emit one JSON line: the substrate-signed envelope. The receiver
142+
# verifies with omc_msg_verify and then json_parse the content.
143+
print(envelope_json);
144+
}
145+
146+
main();

0 commit comments

Comments
 (0)