From 6103c2e24cbd3dfe7f0adc7fcaffb7aed263187e Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 17:49:06 +0000 Subject: [PATCH 1/2] fix(provenance): accept structured source frontmatter in learning audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The provenance audit's learningFrontmatter decoded source as a bare string and failed with "cannot unmarshal !!map into string" whenever a learning used a structured value — for example, the 2026-04-19 orchestrator-compression learning set source to a session+evidence map. Dream's INGEST stage then listed the file under degraded every run and never counted its source toward MissingSources. Switch Source to a flexibleString wrapper whose UnmarshalYAML accepts scalar, map, or sequence nodes. Structured values decode to a non-empty sentinel so the existing "missing source" predicate still works; scalar values are preserved verbatim. Surfaced during the 2026-04-20 nightly dream run. --- .../standards/references/test-pyramid.md | 2 +- cli/internal/provenance/provenance.go | 48 +++++++++++++++++-- cli/internal/provenance/provenance_test.go | 43 +++++++++++++++++ 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/cli/embedded/skills/standards/references/test-pyramid.md b/cli/embedded/skills/standards/references/test-pyramid.md index 50f963bdb..d967cf5ab 100644 --- a/cli/embedded/skills/standards/references/test-pyramid.md +++ b/cli/embedded/skills/standards/references/test-pyramid.md @@ -57,7 +57,7 @@ The Traditional Pyramid The AI-Native Shape │ regression guards, L0 for contracts, L3 for subsystems. │ │ │ │ L0: Contract — from SPEC.md alone │ -│ L1: Unit — always write for regression safety │ +│ L1: Unit — always write for regression safety │ │ L2: Integration — DEFAULT for all agent-written tests │ │ L3: Component — agent writes, human defines scenarios │ ├───────────────────────────────────────────────────────────┤ diff --git a/cli/internal/provenance/provenance.go b/cli/internal/provenance/provenance.go index 0d75f87e9..e9fc8e86a 100644 --- a/cli/internal/provenance/provenance.go +++ b/cli/internal/provenance/provenance.go @@ -45,12 +45,50 @@ type AuditReport struct { // fields the audit cares about. Additional fields in the source are // ignored. type learningFrontmatter struct { - Title string `yaml:"title"` - SourceBead string `yaml:"source_bead"` - Source string `yaml:"source"` - Date string `yaml:"date"` + Title string `yaml:"title"` + SourceBead string `yaml:"source_bead"` + Source flexibleString `yaml:"source"` + Date string `yaml:"date"` } +// flexibleString holds a learning frontmatter value that may be either a +// plain YAML scalar or a structured map/sequence. Older learnings used +// a bare string such as `source: retro-quick`; newer ones occasionally +// use a map like `source: {session: ..., evidence: [...]}` to carry +// provenance context. Either shape should be honored by the audit; the +// canonical "missing source" check only asks whether the value is empty. +type flexibleString string + +// UnmarshalYAML accepts scalar, map, or sequence nodes. Scalars decode +// to their literal value. Maps and sequences decode to a non-empty +// sentinel so the caller's MissingSources check does not mark the +// field as empty; the exact serialization is not interpreted further. +func (f *flexibleString) UnmarshalYAML(node *yaml.Node) error { + if node == nil { + *f = "" + return nil + } + switch node.Kind { + case yaml.ScalarNode: + *f = flexibleString(node.Value) + return nil + case yaml.MappingNode, yaml.SequenceNode: + *f = "" + return nil + case yaml.AliasNode: + if node.Alias != nil { + return f.UnmarshalYAML(node.Alias) + } + *f = "" + return nil + default: + *f = "" + return nil + } +} + +func (f flexibleString) String() string { return string(f) } + // Audit scans .agents/learnings/ under cwd and returns an // AuditReport. Never prints to stdout/stderr. Soft-fails on // individual file read/parse errors; returns a hard error only @@ -102,7 +140,7 @@ func Audit(cwd string) (*AuditReport, error) { } // Missing source: empty source_bead AND empty source fields. - if strings.TrimSpace(fm.SourceBead) == "" && strings.TrimSpace(fm.Source) == "" { + if strings.TrimSpace(fm.SourceBead) == "" && strings.TrimSpace(fm.Source.String()) == "" { report.MissingSources++ } diff --git a/cli/internal/provenance/provenance_test.go b/cli/internal/provenance/provenance_test.go index 20d22e45b..bb7780d18 100644 --- a/cli/internal/provenance/provenance_test.go +++ b/cli/internal/provenance/provenance_test.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "testing" "time" ) @@ -513,3 +514,45 @@ func TestProvenanceAudit_NoAgentsDir(t *testing.T) { report.StaleCitations, report.MissingSources) } } + +// Regression: a learning whose frontmatter uses a structured (map or +// sequence) `source:` value must still parse and must not be flagged as +// a missing source. Pre-fix the YAML decoder returned +// "cannot unmarshal !!map into string" and the file was skipped. +func TestProvenanceAudit_StructuredSourceAccepted(t *testing.T) { + cwd := t.TempDir() + learnings := filepath.Join(cwd, ".agents", "learnings") + if err := os.MkdirAll(learnings, 0o755); err != nil { + t.Fatal(err) + } + + recent := time.Now().AddDate(0, 0, -1).Format("2006-01-02") + mapSource := "---\ntitle: Structured\ndate: " + recent + "\nsource:\n session: 2026-01-01 demo\n evidence:\n - a.md\n - b.md\n---\n\nbody\n" + seqSource := "---\ntitle: Sequence\ndate: " + recent + "\nsource:\n - a.md\n - b.md\n---\n\nbody\n" + stringSource := "---\ntitle: Scalar\ndate: " + recent + "\nsource: retro-quick\n---\n\nbody\n" + + mustWrite := func(name, body string) { + if err := os.WriteFile(filepath.Join(learnings, name), []byte(body), 0o644); err != nil { + t.Fatal(err) + } + } + mustWrite("map-source.md", mapSource) + mustWrite("seq-source.md", seqSource) + mustWrite("scalar-source.md", stringSource) + + report, err := Audit(cwd) + if err != nil { + t.Fatalf("Audit: %v", err) + } + if report == nil { + t.Fatal("nil report") + } + for _, note := range report.Degraded { + if strings.Contains(note, "yaml:") { + t.Errorf("unexpected YAML degraded entry: %q", note) + } + } + if report.MissingSources != 0 { + t.Errorf("expected 0 missing sources for structured+scalar sources, got %d", report.MissingSources) + } +} From 8778fa97aafa09a4650ff509df57eb41c72f9f55 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 24 Apr 2026 17:53:23 +0000 Subject: [PATCH 2/2] fix(codex): convert remaining Skill() invocations to $skill notation Codex mirror SKILL.md files still carried Claude-native Skill(skill="...", args="...") calls in their executable DAG blocks, which codex-skill-api does not recognize as delegated sub-skill invocations. Rewrite all 11 occurrences in design, implement, red-team, and validation to the canonical $ form (e.g. $vibe recent, $test coverage --quick, $post-mortem ). Prose occurrences inside shared/references/strict-delegation-contract.md are left intact: they document the Claude-native shape. Regenerate skills-codex hashes so the manifest and per-skill generated markers match the new content; parity audit and codex-native lint pass. Addresses the low-severity council finding tracked under the 2026-04-19-rpi-dag-hardening epic in .agents/rpi/next-work.jsonl. --- skills-codex/.agentops-manifest.json | 14 +++++++------- skills-codex/crank/.agentops-generated.json | 2 +- skills-codex/design/.agentops-generated.json | 2 +- skills-codex/design/SKILL.md | 2 +- skills-codex/implement/.agentops-generated.json | 2 +- skills-codex/implement/SKILL.md | 2 +- skills-codex/quickstart/.agentops-generated.json | 2 +- skills-codex/red-team/.agentops-generated.json | 2 +- skills-codex/red-team/SKILL.md | 2 +- skills-codex/standards/.agentops-generated.json | 2 +- skills-codex/validation/.agentops-generated.json | 2 +- skills-codex/validation/SKILL.md | 16 ++++++++-------- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/skills-codex/.agentops-manifest.json b/skills-codex/.agentops-manifest.json index 55b696161..179330f97 100644 --- a/skills-codex/.agentops-manifest.json +++ b/skills-codex/.agentops-manifest.json @@ -692,7 +692,7 @@ "name": "crank", "source_skill": "skills/crank", "source_hash": "928d1301b7f3bf12607e779cdec279924b5b29d681faffd01e6819651def92e7", - "generated_hash": "63340a4214c757133c74ede362ed165cb55e32e1ca1909f0329d2155dc6e5695" + "generated_hash": "2a0078618d49f2048978451334d01c363202836165802eb0f13268dec5f9ffa5" }, { "name": "deps", @@ -704,7 +704,7 @@ "name": "design", "source_skill": "skills/design", "source_hash": "", - "generated_hash": "0623a63763fc0737ecb29686b689531574966933160d1fb37f963606e7e5f66f" + "generated_hash": "f3089a1425339cf222d714f2451216ba683474765f56bed4c644cbf76f168b92" }, { "name": "discovery", @@ -776,7 +776,7 @@ "name": "implement", "source_skill": "skills/implement", "source_hash": "2ab197d1810fd3eb56f73aac4645c02edc01aa41e524064b12b993c6861ebc32", - "generated_hash": "69fa694eaad00ead6eebce2590e981eca07660fba16887eb28f4e833e93eaf21" + "generated_hash": "aa7f88ad80c6fb371f277c90d2a9e45985f481cb5319e9e87ef137ea4aabea9f" }, { "name": "inject", @@ -884,7 +884,7 @@ "name": "quickstart", "source_skill": "skills/quickstart", "source_hash": "68a6463ab7dded893cbdd27d7697c70d871ed4149b5c7cbe8bf20369c1e0a813", - "generated_hash": "92759426fabae70bba9045ff448df760e9a7470b8395310018a8e78957949b20" + "generated_hash": "3069919895447f39b60d3f19a37bd96ec4d5b2f869e6266f2ad07c15870241b6" }, { "name": "ratchet", @@ -908,7 +908,7 @@ "name": "red-team", "source_skill": "skills/red-team", "source_hash": "e7ae62c4a820cf3b7c93e1e0e5913e046013d9dfe8ff48ffa9f85294376bb037", - "generated_hash": "0b59c335567b529586535737af1d8ab5b70e3d73c39bf9b1da1ac54cf73796a5" + "generated_hash": "345e71a7c9d165615b9f10d4c1826e1b3203e36e5aee03cdaa64ea682556bb3a" }, { "name": "refactor", @@ -986,7 +986,7 @@ "name": "standards", "source_skill": "skills/standards", "source_hash": "837a4c5f624c5de7dbe37193fb1671a97d2f4c9381355c4b3745e247b632577a", - "generated_hash": "33afeca1a2dc5ba35f1fec148563937f4730d83f17b414cf71e2c40cfab5b68b" + "generated_hash": "a0eeea87ffddd155c82fc10eee522d3d36cfd054067b03af72da292c08cd3ed3" }, { "name": "status", @@ -1028,7 +1028,7 @@ "name": "validation", "source_skill": "skills/validation", "source_hash": "2e71f8dbe94c65971f0dff7f82ac83396649ff036bbea388060174945f660c5c", - "generated_hash": "5cd2d80a2406d05f0eff3a7276ac14d46d00e66d8e0d5785c784bcaa0a8de463" + "generated_hash": "4c1bf07ba20d26e8c82d40272dcfbd520241f9033454c4bf2730a59defaf8d40" }, { "name": "vibe", diff --git a/skills-codex/crank/.agentops-generated.json b/skills-codex/crank/.agentops-generated.json index 318b0f25c..ecbed8720 100644 --- a/skills-codex/crank/.agentops-generated.json +++ b/skills-codex/crank/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/crank", "layout": "modular", "source_hash": "928d1301b7f3bf12607e779cdec279924b5b29d681faffd01e6819651def92e7", - "generated_hash": "63340a4214c757133c74ede362ed165cb55e32e1ca1909f0329d2155dc6e5695" + "generated_hash": "2a0078618d49f2048978451334d01c363202836165802eb0f13268dec5f9ffa5" } diff --git a/skills-codex/design/.agentops-generated.json b/skills-codex/design/.agentops-generated.json index 080982fab..3f5c62fb8 100644 --- a/skills-codex/design/.agentops-generated.json +++ b/skills-codex/design/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/design", "layout": "modular", "source_hash": "", - "generated_hash": "0623a63763fc0737ecb29686b689531574966933160d1fb37f963606e7e5f66f" + "generated_hash": "f3089a1425339cf222d714f2451216ba683474765f56bed4c644cbf76f168b92" } diff --git a/skills-codex/design/SKILL.md b/skills-codex/design/SKILL.md index 9e7395ca3..dec9c68d4 100644 --- a/skills-codex/design/SKILL.md +++ b/skills-codex/design/SKILL.md @@ -63,7 +63,7 @@ Compute the average score across all five dimensions. Invoke council with the product preset. See [references/product-council-preset.md](references/product-council-preset.md) for judge configuration. ``` -Skill(skill="council", args="--preset=product validate design alignment for: ") +$council --preset=product validate design alignment for: ``` Pass the alignment matrix from Step 2 as context to the council judges. diff --git a/skills-codex/implement/.agentops-generated.json b/skills-codex/implement/.agentops-generated.json index 501c4e3d7..720a1cd0a 100644 --- a/skills-codex/implement/.agentops-generated.json +++ b/skills-codex/implement/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/implement", "layout": "modular", "source_hash": "2ab197d1810fd3eb56f73aac4645c02edc01aa41e524064b12b993c6861ebc32", - "generated_hash": "69fa694eaad00ead6eebce2590e981eca07660fba16887eb28f4e833e93eaf21" + "generated_hash": "aa7f88ad80c6fb371f277c90d2a9e45985f481cb5319e9e87ef137ea4aabea9f" } diff --git a/skills-codex/implement/SKILL.md b/skills-codex/implement/SKILL.md index ecf115b48..25f5e3535 100644 --- a/skills-codex/implement/SKILL.md +++ b/skills-codex/implement/SKILL.md @@ -199,7 +199,7 @@ Or use `$test ` to auto-generate test candidates, then hand-refine. If skip conditions above are NOT met AND `--no-lifecycle` is NOT set: ``` -Skill(skill="test", args="generate --quick") +$test generate --quick ``` The generated test request must preserve the selected `test_levels` and BF expectations from Step 3.6. Review the generated tests. Adjust as needed (tests are MUTABLE in this context). If `$test` fails to produce useful output or is unavailable, fall back to manual test writing in Step 3.6 above. diff --git a/skills-codex/quickstart/.agentops-generated.json b/skills-codex/quickstart/.agentops-generated.json index eff1a40fe..8acac7d17 100644 --- a/skills-codex/quickstart/.agentops-generated.json +++ b/skills-codex/quickstart/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/quickstart", "layout": "modular", "source_hash": "68a6463ab7dded893cbdd27d7697c70d871ed4149b5c7cbe8bf20369c1e0a813", - "generated_hash": "92759426fabae70bba9045ff448df760e9a7470b8395310018a8e78957949b20" + "generated_hash": "3069919895447f39b60d3f19a37bd96ec4d5b2f869e6266f2ad07c15870241b6" } diff --git a/skills-codex/red-team/.agentops-generated.json b/skills-codex/red-team/.agentops-generated.json index c38c91434..0abfb0bf9 100644 --- a/skills-codex/red-team/.agentops-generated.json +++ b/skills-codex/red-team/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/red-team", "layout": "modular", "source_hash": "e7ae62c4a820cf3b7c93e1e0e5913e046013d9dfe8ff48ffa9f85294376bb037", - "generated_hash": "0b59c335567b529586535737af1d8ab5b70e3d73c39bf9b1da1ac54cf73796a5" + "generated_hash": "345e71a7c9d165615b9f10d4c1826e1b3203e36e5aee03cdaa64ea682556bb3a" } diff --git a/skills-codex/red-team/SKILL.md b/skills-codex/red-team/SKILL.md index 8b0f358e2..fb110ab2c 100644 --- a/skills-codex/red-team/SKILL.md +++ b/skills-codex/red-team/SKILL.md @@ -194,7 +194,7 @@ When the same finding appears from multiple personas: Run council with red-team preset to review and consolidate all findings: ``` -Skill(skill="council", args="--preset=red-team [--quick] validate .agents/red-team/") +$council --preset=red-team [--quick] validate .agents/red-team/ ``` Use `--quick` by default. Use full council (omit `--quick`) when `--deep` flag is set. diff --git a/skills-codex/standards/.agentops-generated.json b/skills-codex/standards/.agentops-generated.json index dbe44cc99..4b7345125 100644 --- a/skills-codex/standards/.agentops-generated.json +++ b/skills-codex/standards/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/standards", "layout": "modular", "source_hash": "837a4c5f624c5de7dbe37193fb1671a97d2f4c9381355c4b3745e247b632577a", - "generated_hash": "33afeca1a2dc5ba35f1fec148563937f4730d83f17b414cf71e2c40cfab5b68b" + "generated_hash": "a0eeea87ffddd155c82fc10eee522d3d36cfd054067b03af72da292c08cd3ed3" } diff --git a/skills-codex/validation/.agentops-generated.json b/skills-codex/validation/.agentops-generated.json index eec3b9019..5216437aa 100644 --- a/skills-codex/validation/.agentops-generated.json +++ b/skills-codex/validation/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/validation", "layout": "modular", "source_hash": "2e71f8dbe94c65971f0dff7f82ac83396649ff036bbea388060174945f660c5c", - "generated_hash": "5cd2d80a2406d05f0eff3a7276ac14d46d00e66d8e0d5785c784bcaa0a8de463" + "generated_hash": "4c1bf07ba20d26e8c82d40272dcfbd520241f9033454c4bf2730a59defaf8d40" } diff --git a/skills-codex/validation/SKILL.md b/skills-codex/validation/SKILL.md index 99eb44799..b0e31c2a4 100644 --- a/skills-codex/validation/SKILL.md +++ b/skills-codex/validation/SKILL.md @@ -50,7 +50,7 @@ Skip silently if ao is unavailable or returns no results. **Run every step in order. Do not stop between steps.** ``` -STEP 1 ── Skill(skill="vibe", args="recent [--quick]") +STEP 1 ── $vibe recent [--quick] Use --quick for fast/standard. Full council for full. PASS/WARN? → continue FAIL? → write summary, output FAIL, stop @@ -75,20 +75,20 @@ STEP 1.7 ── Lifecycle Checks (advisory except critical dependency findings) On budget expiry: skip remaining sub-steps, write [TIME-BOXED]. a) if lifecycle tier >= minimal AND test_framework_detected: - Skill(skill="test", args="coverage --quick") + $test coverage --quick Append coverage delta to phase summary. b) if lifecycle tier >= standard AND dependency_manifest_exists: - Skill(skill="deps", args="vuln --quick") + $deps vuln --quick CRITICAL vulns (CVSS >= 9.0): **FAIL** (block shipping). Opt-out: `--allow-critical-deps` for acknowledged risk acceptance. Non-critical: advisory note only. c) if lifecycle tier >= standard: - Skill(skill="review", args="--diff --quick") + $review --diff --quick Append review findings to summary as advisory. d) if lifecycle tier == full AND modified_files_touch_hot_path: - Skill(skill="perf", args="profile --quick") + $perf profile --quick Append perf findings to summary as advisory. Hot path detection: modified files match benchmark files or patterns (handler, middleware, router, parser, engine, @@ -123,15 +123,15 @@ STEP 1.8 ── Stage 4: Behavioral Validation (holdout scenarios + agent-built FAIL? → write summary, output FAIL, stop STEP 2 ── if epic_id: - Skill(skill="post-mortem", args=" [--quick]") + $post-mortem [--quick] else: - Skill(skill="post-mortem", args="recent [--quick]") + $post-mortem recent [--quick] Use --quick for fast/standard. Full council for full. PASS/WARN? → continue FAIL? → write summary, output FAIL, stop STEP 3 ── if not --no-retro: - Skill(skill="retro") + $retro STEP 4 ── if not --no-forge AND ao available: if [ -n "${CODEX_THREAD_ID:-}" ] || [ "${CODEX_INTERNAL_ORIGINATOR_OVERRIDE:-}" = "Codex Desktop" ]; then