Skip to content

Commit bbb9898

Browse files
committed
B-unlock-yaml-style: YAML robustness + Instrument variant + persistence + 144-cell verb table
1 parent eeddfe9 commit bbb9898

3 files changed

Lines changed: 351 additions & 22 deletions

File tree

crates/lance-graph-contract/src/grammar/tekamolo.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
66
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
77
pub enum TekamoloSlot {
8-
Temporal, // when
9-
Kausal, // why / because
10-
Modal, // how / in what manner
11-
Lokal, // where
8+
Temporal, // when
9+
Kausal, // why / because
10+
Modal, // how / in what manner
11+
Lokal, // where
12+
Instrument, // by what means / with what
1213
}
1314

1415
/// Slot fillers as lightweight token-index pairs. Downstream crates carry

crates/lance-graph-contract/src/grammar/thinking_styles.rs

Lines changed: 226 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -200,18 +200,29 @@ impl GrammarStyleAwareness {
200200
/// Best NARS inference given current awareness — either the YAML
201201
/// primary (if its truth is healthy) or the highest-ranked NARS
202202
/// parameter we've accumulated evidence for.
203+
///
204+
/// **Bootstrap behaviour:** at zero observations the primary's
205+
/// truth is the neutral seed `(f=0.5, c=0.01)`. We keep `prior.primary`
206+
/// as long as `f >= 0.5` — strict-greater would have flipped to the
207+
/// fallback at bootstrap, which is wrong (the prior hasn't been
208+
/// contradicted yet). We additionally require `c > 0.05` for the
209+
/// "still healthy" branch so a single positive observation alone
210+
/// can't paint over a long-established negative truth.
203211
pub fn top_nars_inference(&self, prior: &GrammarStyleConfig) -> NarsInference {
204-
let primary_key = ParamKey::NarsPrimary(prior.nars.primary);
205-
let primary_truth = self
212+
let primary = prior.nars.primary;
213+
if let Some(t) = self
206214
.param_truths
207-
.get(&primary_key)
215+
.get(&ParamKey::NarsPrimary(primary))
208216
.copied()
209-
.unwrap_or(TruthValue::new(0.5, 0.01));
210-
211-
// If the primary still looks healthy (f > 0.5 AND any confidence),
212-
// keep using it — awareness has not yet contradicted the prior.
213-
if primary_truth.frequency > 0.5 {
214-
return prior.nars.primary;
217+
.or(Some(TruthValue::new(0.5, 0.01)))
218+
{
219+
// `>= 0.5` keeps the prior at bootstrap (neutral seed).
220+
// `c > 0.05` ensures we've seen at least the bootstrap
221+
// confidence (zero-evidence path falls through to ranked
222+
// search below; in practice the seed satisfies both).
223+
if t.frequency >= 0.5 && (t.confidence > 0.05 || self.parse_count == 0) {
224+
return primary;
225+
}
215226
}
216227

217228
// Otherwise pick the NARS parameter with the highest expected
@@ -278,6 +289,45 @@ impl GrammarStyleAwareness {
278289
coverage: prior.coverage,
279290
}
280291
}
292+
293+
/// Snapshot for cross-session persistence (E6 from PR #279 outlook).
294+
/// META-AGENT: when serde feature lands, derive Serialize on Snapshot.
295+
pub fn snapshot(&self) -> AwarenessSnapshot {
296+
let mut pairs: Vec<_> = self.param_truths.iter().map(|(k, v)| (k.clone(), *v)).collect();
297+
pairs.sort_by_key(|(k, _)| format!("{:?}", k)); // stable for diff
298+
AwarenessSnapshot {
299+
style: self.style,
300+
param_truths: pairs,
301+
recent_success: self.recent_success,
302+
parse_count: self.parse_count,
303+
}
304+
}
305+
306+
/// Restore from a snapshot. New + restore is the basic persistence cycle.
307+
pub fn restore(snap: AwarenessSnapshot) -> Self {
308+
Self {
309+
style: snap.style,
310+
param_truths: snap.param_truths.into_iter().collect(),
311+
recent_success: snap.recent_success,
312+
parse_count: snap.parse_count,
313+
}
314+
}
315+
}
316+
317+
/// Snapshot of [`GrammarStyleAwareness`] suitable for serialization.
318+
/// Serde-friendly: just the truth map + counters. No transient fields.
319+
///
320+
/// `param_truths` is a `Vec` (not the live `HashMap`) so the wire format
321+
/// has a stable iteration order for diffs — `snapshot()` sorts by the
322+
/// `Debug` representation of the key. When the `serde` feature lands,
323+
/// derive `Serialize` / `Deserialize` here without touching the runtime
324+
/// `GrammarStyleAwareness` shape.
325+
#[derive(Debug, Clone)]
326+
pub struct AwarenessSnapshot {
327+
pub style: ThinkingStyle,
328+
pub param_truths: Vec<(ParamKey, TruthValue)>, // Vec for stable ordering
329+
pub recent_success: TruthValue,
330+
pub parse_count: u64,
281331
}
282332

283333
// ---------------------------------------------------------------------------
@@ -406,6 +456,51 @@ pub fn config_from_pairs(pairs: &[(String, String)]) -> Result<GrammarStyleConfi
406456
})
407457
}
408458

459+
/// Strip an inline `#` comment from a YAML line. The unconditional
460+
/// `find('#')` from earlier versions ate any value containing `#`
461+
/// (e.g. `label: section#2`). Real YAML only treats `#` as a comment
462+
/// start when it follows whitespace (or starts the line); we mirror
463+
/// that subset here. Quoted scalars aren't supported by this loader,
464+
/// so we don't have to worry about `#` inside `"..."`.
465+
fn strip_inline_comment(line: &str) -> &str {
466+
// Only strip `#` when preceded by whitespace (or at start of line) AND
467+
// not inside quoted scalar (we don't support quoted scalars yet — but
468+
// the whitespace check protects against `label: section#2`).
469+
if let Some(idx) = line.find(" #").or_else(|| line.find("\t#")) {
470+
&line[..idx]
471+
} else if line.starts_with('#') {
472+
""
473+
} else {
474+
line
475+
}
476+
}
477+
478+
/// Parse a YAML flow-map line of the form
479+
/// `key: { k1: v1, k2: v2, … }`
480+
/// into `Vec<(qualified_key, value)>` pairs (`qualified_key = "key.k1"`).
481+
/// Returns `None` for any line that isn't a flow map — callers fall
482+
/// back to the regular scalar / block-map path.
483+
fn parse_flow_map(line: &str) -> Option<Vec<(String, String)>> {
484+
// Parse `key: { k1: v1, k2: v2 }` into Vec<(qualified_key, value)>.
485+
// Returns None if not a flow map.
486+
let line = line.trim();
487+
let colon = line.find(':')?;
488+
let outer_key = line[..colon].trim().to_string();
489+
let rest = line[colon+1..].trim();
490+
if !rest.starts_with('{') || !rest.ends_with('}') { return None; }
491+
let inner = &rest[1..rest.len()-1];
492+
let mut pairs = Vec::new();
493+
for piece in inner.split(',') {
494+
let piece = piece.trim();
495+
if let Some(c) = piece.find(':') {
496+
let k = piece[..c].trim();
497+
let v = piece[c+1..].trim();
498+
pairs.push((format!("{}.{}", outer_key, k), v.to_string()));
499+
}
500+
}
501+
Some(pairs)
502+
}
503+
409504
/// Flatten a YAML document into `(dotted.path, value)` pairs. List items
410505
/// repeat the key (`tekamolo.priority` appears once per slot).
411506
fn collect_yaml_pairs(yaml: &str) -> Result<Vec<(String, String)>, String> {
@@ -415,12 +510,9 @@ fn collect_yaml_pairs(yaml: &str) -> Result<Vec<(String, String)>, String> {
415510
let mut active_list_key: Option<(usize, String)> = None;
416511

417512
for (lineno, raw_line) in yaml.lines().enumerate() {
418-
// Strip comments (only when not inside a quoted scalar — our config
419-
// files don't use quoted scalars, so plain split is safe).
420-
let line_no_comment = match raw_line.find('#') {
421-
Some(idx) => &raw_line[..idx],
422-
None => raw_line,
423-
};
513+
// Strip comments only when `#` is preceded by whitespace or starts
514+
// the line — preserves values like `section#2`.
515+
let line_no_comment = strip_inline_comment(raw_line);
424516
let trimmed = line_no_comment.trim_end();
425517
if trimmed.trim().is_empty() {
426518
continue;
@@ -449,6 +541,26 @@ fn collect_yaml_pairs(yaml: &str) -> Result<Vec<(String, String)>, String> {
449541
path_stack.pop();
450542
}
451543

544+
// Flow-map tolerance: `key: { k1: v1, k2: v2 }` absorbed before
545+
// we try the scalar split (which would otherwise see the `{` as
546+
// a value and fail downstream type-parsing). Compute the dotted
547+
// outer key with the current path prefix so nested flow maps
548+
// still flatten correctly.
549+
if let Some(flow_pairs) = parse_flow_map(body) {
550+
// `parse_flow_map` returned a non-empty map of `(outer.k, v)`;
551+
// re-prefix with the current path stack.
552+
let prefix: Vec<&str> = path_stack.iter().map(|(_, k)| k.as_str()).collect();
553+
for (k, v) in flow_pairs {
554+
let dotted = if prefix.is_empty() {
555+
k
556+
} else {
557+
format!("{}.{}", prefix.join("."), k)
558+
};
559+
out.push((dotted, v));
560+
}
561+
continue;
562+
}
563+
452564
// Split `key: value` (value may be empty for parent maps).
453565
let (key_raw, value_raw) = match body.split_once(':') {
454566
Some((k, v)) => (k.trim(), v.trim()),
@@ -579,9 +691,11 @@ fn parse_tekamolo_slot(s: &str) -> Result<TekamoloSlot, String> {
579691
"kausal" => TekamoloSlot::Kausal,
580692
"modal" => TekamoloSlot::Modal,
581693
"lokal" => TekamoloSlot::Lokal,
582-
// `instrument` is in the spec's enum sketch but absent from the
583-
// canonical `TekamoloSlot`; map to Modal as the closest fit.
584-
"instrument" => TekamoloSlot::Modal,
694+
// `Instrument` is a distinct slot ("by what means / with what")
695+
// from `Modal` ("how / in what manner"). Per-slot logic that
696+
// differentiates the two will land on top of this scaffold —
697+
// for now, downstream matchers should treat them as siblings.
698+
"instrument" => TekamoloSlot::Instrument,
585699
other => return Err(format!("unknown tekamolo slot: {other}")),
586700
})
587701
}
@@ -1034,6 +1148,100 @@ nars: { primary: Deduction, fallback: Abduction }
10341148
}
10351149
}
10361150

1151+
// -- YAML robustness -----------------------------------------------------
1152+
1153+
#[test]
1154+
fn inline_comment_with_hash_in_value_preserved() {
1155+
// YAML `label: section#2` — `#` inside a value (no preceding space)
1156+
// must NOT be treated as a comment marker.
1157+
let yaml = "label: section#2\n";
1158+
let pairs = collect_yaml_pairs(yaml).expect("collect failed");
1159+
let val = pairs.iter().find(|(k, _)| k == "label").map(|(_, v)| v.as_str());
1160+
assert_eq!(val, Some("section#2"));
1161+
}
1162+
1163+
#[test]
1164+
fn inline_comment_at_end_stripped() {
1165+
// YAML `key: value # trailing` — `#` after whitespace is a comment.
1166+
let yaml = "key: value # trailing\n";
1167+
let pairs = collect_yaml_pairs(yaml).expect("collect failed");
1168+
let val = pairs.iter().find(|(k, _)| k == "key").map(|(_, v)| v.as_str());
1169+
assert_eq!(val, Some("value"));
1170+
}
1171+
1172+
#[test]
1173+
fn flow_map_parses_inline() {
1174+
// Flow map at top level: `nars: { primary: Deduction, fallback: Abduction }`
1175+
// expands to `nars.primary` and `nars.fallback` pairs.
1176+
let yaml = "nars: { primary: Deduction, fallback: Abduction }\n";
1177+
let pairs = collect_yaml_pairs(yaml).expect("collect failed");
1178+
let primary = pairs.iter().find(|(k, _)| k == "nars.primary").map(|(_, v)| v.as_str());
1179+
let fallback = pairs.iter().find(|(k, _)| k == "nars.fallback").map(|(_, v)| v.as_str());
1180+
assert_eq!(primary, Some("Deduction"));
1181+
assert_eq!(fallback, Some("Abduction"));
1182+
}
1183+
1184+
// -- TekamoloSlot::Instrument --------------------------------------------
1185+
1186+
#[test]
1187+
fn parse_tekamolo_slot_instrument_distinct_from_modal() {
1188+
// The `instrument` string must now parse to `Instrument` (not `Modal`).
1189+
assert_eq!(
1190+
parse_tekamolo_slot("instrument").ok(),
1191+
Some(TekamoloSlot::Instrument)
1192+
);
1193+
assert_ne!(TekamoloSlot::Instrument, TekamoloSlot::Modal);
1194+
}
1195+
1196+
// -- top_nars_inference threshold ----------------------------------------
1197+
1198+
#[test]
1199+
fn bootstrap_returns_primary_not_fallback() {
1200+
// Fresh awareness (zero observations) must keep `prior.nars.primary`
1201+
// — the prior has not been contradicted yet, so we should not flip
1202+
// to `fallback`.
1203+
let prior = base_prior();
1204+
let a = GrammarStyleAwareness::bootstrap(prior.style);
1205+
let inf = a.top_nars_inference(&prior);
1206+
assert_eq!(inf, prior.nars.primary);
1207+
}
1208+
1209+
// -- Persistence stub (E6) ----------------------------------------------
1210+
1211+
#[test]
1212+
fn snapshot_then_restore_round_trips() {
1213+
let prior = base_prior();
1214+
let mut a = GrammarStyleAwareness::bootstrap(prior.style);
1215+
// Apply a mix of 10 outcomes touching multiple ParamKey variants.
1216+
for i in 0..10 {
1217+
let key = if i % 2 == 0 {
1218+
ParamKey::NarsPrimary(NarsInference::Deduction)
1219+
} else {
1220+
ParamKey::MorphologyTable(MorphologyTableId::FinnishCase)
1221+
};
1222+
let outcome = if i % 3 == 0 {
1223+
ParseOutcome::LocalSuccess
1224+
} else {
1225+
ParseOutcome::EscalatedAndLLMDisagreed
1226+
};
1227+
a.revise(key, outcome);
1228+
}
1229+
1230+
let snap = a.snapshot();
1231+
let restored = GrammarStyleAwareness::restore(snap);
1232+
1233+
assert_eq!(restored.style, a.style);
1234+
assert_eq!(restored.parse_count, a.parse_count);
1235+
assert!((restored.recent_success.frequency - a.recent_success.frequency).abs() < 1e-6);
1236+
assert!((restored.recent_success.confidence - a.recent_success.confidence).abs() < 1e-6);
1237+
assert_eq!(restored.param_truths.len(), a.param_truths.len());
1238+
for (k, v) in a.param_truths.iter() {
1239+
let r = restored.param_truths.get(k).copied().expect("missing key after restore");
1240+
assert!((r.frequency - v.frequency).abs() < 1e-6);
1241+
assert!((r.confidence - v.confidence).abs() < 1e-6);
1242+
}
1243+
}
1244+
10371245
#[test]
10381246
fn effective_config_preserves_prior_shape_for_empty_awareness() {
10391247
// Empty awareness: every collection-shaped policy slot must round-trip

0 commit comments

Comments
 (0)