Skip to content

Commit 1157b64

Browse files
omc-kernel: store any content (omc_fn, json, prose, blob)
Generalize the kernel from code-only to substrate-keyed storage of arbitrary content. New `put` subcommand: omc-kernel put FILE [--kind KIND] KIND selects the canonicalizer: omc_fn — alpha-rename-invariant OMC canonical form (default of ingest) json — recursive key-sort + re-serialize (semantic-equal JSON objects collapse to same hash regardless of key order) prose — raw fnv1a over bytes (exact-text dedup, no canonicalization) blob — alias for prose Stdout = canonical hash (hex) so put output pipes into fetch/stat. Sidecar metadata now carries kind + addressing description so `stat` explains how the hash was derived. End-to-end test (in this commit): $ echo '{"b":2,"a":1}' | omc-kernel put /dev/stdin --kind json 20ebc03bdc71de7b $ echo '{ "a":1, "b":2 }' | omc-kernel put /dev/stdin --kind json 20ebc03bdc71de7b # same hash — JSON canonicalization collapses key order $ echo "hello world" | omc-kernel put /dev/stdin --kind prose <hash> $ echo "hello world" | omc-kernel put /dev/stdin --kind prose <same hash> # raw-byte dedup This is goal 1 of the four-item kernel-as-content-DAG plan: the substrate-keyed persistent memory layer now stores any canonicalizable content, not just OMC code. The address scheme is identical so kernel + codec + sign/verify all extend naturally. Existing ingest behavior unchanged (defaults to omc_fn kind in sidecar metadata for clarity). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 148cec4 commit 1157b64

1 file changed

Lines changed: 125 additions & 7 deletions

File tree

omnimcode-cli/src/bin/omc_kernel.rs

Lines changed: 125 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,98 @@ fn parse_hex_hash(s: &str) -> Option<i64> {
107107
// Subcommands
108108
// --------------------------------------------------------------------
109109

110+
/// Canonicalize a JSON string: parse, recursively sort dict keys,
111+
/// re-serialize. Used by `put` with --kind json so two semantically-
112+
/// equal JSON blobs (different key order) collapse to the same hash.
113+
fn canonicalize_json(s: &str) -> Option<String> {
114+
use serde_json::Value;
115+
fn sort_keys(v: Value) -> Value {
116+
match v {
117+
Value::Object(m) => {
118+
let mut entries: Vec<(String, Value)> = m.into_iter().collect();
119+
entries.sort_by(|a, b| a.0.cmp(&b.0));
120+
let mapped: serde_json::Map<String, Value> = entries
121+
.into_iter()
122+
.map(|(k, v)| (k, sort_keys(v)))
123+
.collect();
124+
Value::Object(mapped)
125+
}
126+
Value::Array(a) => Value::Array(a.into_iter().map(sort_keys).collect()),
127+
other => other,
128+
}
129+
}
130+
serde_json::from_str::<Value>(s)
131+
.ok()
132+
.map(sort_keys)
133+
.and_then(|v| serde_json::to_string(&v).ok())
134+
}
135+
136+
/// Store an arbitrary content blob keyed by canonical hash.
137+
/// `kind` selects the canonicalizer:
138+
/// * "omc_fn" : canonicalize as OMC source (the default, same as ingest)
139+
/// * "json" : sort-keys + re-serialize
140+
/// * "prose" : raw bytes (fnv1a of content), no canonicalization
141+
/// * "blob" : alias for "prose"
142+
fn cmd_put(path: &str, kind: &str) -> ExitCode {
143+
let Ok(content) = std::fs::read_to_string(path) else {
144+
eprintln!("put: cannot read: {}", path);
145+
return ExitCode::from(1);
146+
};
147+
if let Err(e) = ensure_store() {
148+
eprintln!("put: cannot create store: {}", e);
149+
return ExitCode::from(1);
150+
}
151+
let (canonical_form, addressing) = match kind {
152+
"omc_fn" => {
153+
let canon = canonical::canonicalize(&content).unwrap_or_else(|_| content.clone());
154+
(canon, "alpha-rename-invariant OMC canonical form")
155+
}
156+
"json" => match canonicalize_json(&content) {
157+
Some(c) => (c, "key-sorted JSON canonical form"),
158+
None => {
159+
eprintln!("put: --kind json but content does not parse as JSON");
160+
return ExitCode::from(2);
161+
}
162+
},
163+
"prose" | "blob" => (content.clone(), "raw bytes (no canonicalization)"),
164+
other => {
165+
eprintln!("put: unknown --kind {} (use omc_fn, json, prose, blob)", other);
166+
return ExitCode::from(2);
167+
}
168+
};
169+
let hash = tokenizer::fnv1a_64(canonical_form.as_bytes());
170+
let store_path = store_path_for(hash);
171+
let already_present = store_path.exists();
172+
if !already_present {
173+
if let Err(e) = std::fs::write(&store_path, &content) {
174+
eprintln!("put: write failed for {}: {}", store_path.display(), e);
175+
return ExitCode::from(1);
176+
}
177+
let (attractor, dist) = phi_pi_fib::nearest_attractor_with_dist(hash);
178+
let meta = serde_json::json!({
179+
"canonical_hash": hash.to_string(),
180+
"attractor": attractor.to_string(),
181+
"attractor_distance": dist.to_string(),
182+
"source_bytes": content.len(),
183+
"canonical_bytes": canonical_form.len(),
184+
"kind": kind,
185+
"addressing": addressing,
186+
"origin_file": path,
187+
});
188+
let _ = std::fs::write(meta_path_for(hash), meta.to_string());
189+
}
190+
// Stdout = the canonical hash (hex) so callers can pipe.
191+
println!("{:016x}", hash as u64);
192+
eprintln!(
193+
"put: {} ({} bytes, kind={}, addressing={})",
194+
if already_present { "exists" } else { "stored" },
195+
content.len(),
196+
kind,
197+
addressing
198+
);
199+
ExitCode::SUCCESS
200+
}
201+
110202
fn cmd_ingest(dir: &str) -> ExitCode {
111203
let root = Path::new(dir);
112204
if !root.is_dir() {
@@ -160,6 +252,8 @@ fn cmd_ingest(dir: &str) -> ExitCode {
160252
"attractor_distance": dist.to_string(),
161253
"source_bytes": fn_src.len(),
162254
"canonical_bytes": canon.len(),
255+
"kind": "omc_fn",
256+
"addressing": "alpha-rename-invariant OMC canonical form",
163257
"fn_name": extract_fn_name(&fn_src),
164258
"origin_file": p.display().to_string(),
165259
});
@@ -379,13 +473,16 @@ fn print_usage() {
379473
eprintln!("omc-kernel — content-addressed store keyed by canonical hash");
380474
eprintln!();
381475
eprintln!("Usage:");
382-
eprintln!(" omc-kernel ingest DIR extract every fn from DIR's .omc files, store");
383-
eprintln!(" omc-kernel fetch HASH retrieve stored fn by canonical hash (hex)");
384-
eprintln!(" omc-kernel stat HASH substrate metadata for stored fn");
385-
eprintln!(" omc-kernel ls list stored hashes + fn names");
386-
eprintln!(" omc-kernel sign FILE sign OMC source to a substrate-signed wire msg");
387-
eprintln!(" omc-kernel verify verify a wire msg from stdin, recover via store");
388-
eprintln!(" omc-kernel demo ingest examples/lib/, alpha-rename recovery demo");
476+
eprintln!(" omc-kernel ingest DIR extract every fn from DIR's .omc files, store");
477+
eprintln!(" omc-kernel put FILE [--kind K] store arbitrary content (kinds: omc_fn,");
478+
eprintln!(" json, prose, blob). Default: prose.");
479+
eprintln!(" Stdout = canonical hash for piping.");
480+
eprintln!(" omc-kernel fetch HASH retrieve stored entry by canonical hash (hex)");
481+
eprintln!(" omc-kernel stat HASH substrate metadata (kind, attractor, bytes)");
482+
eprintln!(" omc-kernel ls list stored hashes + first-line summary");
483+
eprintln!(" omc-kernel sign FILE sign OMC source to a substrate-signed wire msg");
484+
eprintln!(" omc-kernel verify verify a wire msg from stdin, recover via store");
485+
eprintln!(" omc-kernel demo ingest examples/lib/, alpha-rename recovery demo");
389486
eprintln!();
390487
eprintln!("Env:");
391488
eprintln!(" OMC_KERNEL_ROOT override store location (default: ~/.omc/kernel)");
@@ -406,6 +503,27 @@ fn main() -> ExitCode {
406503
}
407504
cmd_ingest(&args[2])
408505
}
506+
"put" => {
507+
// omc-kernel put FILE [--kind KIND]
508+
// KIND ∈ {omc_fn, json, prose, blob}; default = prose (raw bytes).
509+
if args.len() < 3 {
510+
eprintln!("put: missing FILE");
511+
return ExitCode::from(2);
512+
}
513+
let path = &args[2];
514+
let mut kind = "prose";
515+
let mut i = 3;
516+
while i < args.len() {
517+
if args[i] == "--kind" && i + 1 < args.len() {
518+
kind = args[i + 1].as_str();
519+
i += 2;
520+
} else {
521+
eprintln!("put: unknown arg `{}`", args[i]);
522+
return ExitCode::from(2);
523+
}
524+
}
525+
cmd_put(path, kind)
526+
}
409527
"fetch" => {
410528
if args.len() < 3 {
411529
eprintln!("fetch: missing HASH");

0 commit comments

Comments
 (0)