Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/embedded/skills/standards/references/test-pyramid.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ The Traditional Pyramid The AI-Native Shape
│ regression guards, L0 for contracts, L3 for subsystems. │
│ │
│ L0: Contract — from SPEC.md alone │
│ L1: Unit — always write for regression safety
│ L1: Unit — always write for regression safety │
│ L2: Integration — DEFAULT for all agent-written tests │
│ L3: Component — agent writes, human defines scenarios │
├───────────────────────────────────────────────────────────┤
Expand Down
48 changes: 43 additions & 5 deletions cli/internal/provenance/provenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,50 @@ type AuditReport struct {
// fields the audit cares about. Additional fields in the source are
// ignored.
type learningFrontmatter struct {
Title string `yaml:"title"`
SourceBead string `yaml:"source_bead"`
Source string `yaml:"source"`
Date string `yaml:"date"`
Title string `yaml:"title"`
SourceBead string `yaml:"source_bead"`
Source flexibleString `yaml:"source"`
Date string `yaml:"date"`
}

// flexibleString holds a learning frontmatter value that may be either a
// plain YAML scalar or a structured map/sequence. Older learnings used
// a bare string such as `source: retro-quick`; newer ones occasionally
// use a map like `source: {session: ..., evidence: [...]}` to carry
// provenance context. Either shape should be honored by the audit; the
// canonical "missing source" check only asks whether the value is empty.
type flexibleString string

// UnmarshalYAML accepts scalar, map, or sequence nodes. Scalars decode
// to their literal value. Maps and sequences decode to a non-empty
// sentinel so the caller's MissingSources check does not mark the
// field as empty; the exact serialization is not interpreted further.
func (f *flexibleString) UnmarshalYAML(node *yaml.Node) error {
if node == nil {
*f = ""
return nil
}
switch node.Kind {
case yaml.ScalarNode:
*f = flexibleString(node.Value)
return nil
case yaml.MappingNode, yaml.SequenceNode:
*f = "<structured>"
return nil
case yaml.AliasNode:
if node.Alias != nil {
return f.UnmarshalYAML(node.Alias)
}
*f = ""
return nil
default:
*f = ""
return nil
}
}

func (f flexibleString) String() string { return string(f) }

// Audit scans .agents/learnings/ under cwd and returns an
// AuditReport. Never prints to stdout/stderr. Soft-fails on
// individual file read/parse errors; returns a hard error only
Expand Down Expand Up @@ -102,7 +140,7 @@ func Audit(cwd string) (*AuditReport, error) {
}

// Missing source: empty source_bead AND empty source fields.
if strings.TrimSpace(fm.SourceBead) == "" && strings.TrimSpace(fm.Source) == "" {
if strings.TrimSpace(fm.SourceBead) == "" && strings.TrimSpace(fm.Source.String()) == "" {
report.MissingSources++
}

Expand Down
43 changes: 43 additions & 0 deletions cli/internal/provenance/provenance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
Expand Down Expand Up @@ -513,3 +514,45 @@ func TestProvenanceAudit_NoAgentsDir(t *testing.T) {
report.StaleCitations, report.MissingSources)
}
}

// Regression: a learning whose frontmatter uses a structured (map or
// sequence) `source:` value must still parse and must not be flagged as
// a missing source. Pre-fix the YAML decoder returned
// "cannot unmarshal !!map into string" and the file was skipped.
func TestProvenanceAudit_StructuredSourceAccepted(t *testing.T) {
cwd := t.TempDir()
learnings := filepath.Join(cwd, ".agents", "learnings")
if err := os.MkdirAll(learnings, 0o755); err != nil {
t.Fatal(err)
}

recent := time.Now().AddDate(0, 0, -1).Format("2006-01-02")
mapSource := "---\ntitle: Structured\ndate: " + recent + "\nsource:\n session: 2026-01-01 demo\n evidence:\n - a.md\n - b.md\n---\n\nbody\n"
seqSource := "---\ntitle: Sequence\ndate: " + recent + "\nsource:\n - a.md\n - b.md\n---\n\nbody\n"
stringSource := "---\ntitle: Scalar\ndate: " + recent + "\nsource: retro-quick\n---\n\nbody\n"

mustWrite := func(name, body string) {
if err := os.WriteFile(filepath.Join(learnings, name), []byte(body), 0o644); err != nil {
t.Fatal(err)
}
}
mustWrite("map-source.md", mapSource)
mustWrite("seq-source.md", seqSource)
mustWrite("scalar-source.md", stringSource)

report, err := Audit(cwd)
if err != nil {
t.Fatalf("Audit: %v", err)
}
if report == nil {
t.Fatal("nil report")
}
for _, note := range report.Degraded {
if strings.Contains(note, "yaml:") {
t.Errorf("unexpected YAML degraded entry: %q", note)
}
}
if report.MissingSources != 0 {
t.Errorf("expected 0 missing sources for structured+scalar sources, got %d", report.MissingSources)
}
}
14 changes: 7 additions & 7 deletions skills-codex/.agentops-manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@
"name": "crank",
"source_skill": "skills/crank",
"source_hash": "928d1301b7f3bf12607e779cdec279924b5b29d681faffd01e6819651def92e7",
"generated_hash": "63340a4214c757133c74ede362ed165cb55e32e1ca1909f0329d2155dc6e5695"
"generated_hash": "2a0078618d49f2048978451334d01c363202836165802eb0f13268dec5f9ffa5"
},
{
"name": "deps",
Expand All @@ -704,7 +704,7 @@
"name": "design",
"source_skill": "skills/design",
"source_hash": "",
"generated_hash": "0623a63763fc0737ecb29686b689531574966933160d1fb37f963606e7e5f66f"
"generated_hash": "f3089a1425339cf222d714f2451216ba683474765f56bed4c644cbf76f168b92"
},
{
"name": "discovery",
Expand Down Expand Up @@ -776,7 +776,7 @@
"name": "implement",
"source_skill": "skills/implement",
"source_hash": "2ab197d1810fd3eb56f73aac4645c02edc01aa41e524064b12b993c6861ebc32",
"generated_hash": "69fa694eaad00ead6eebce2590e981eca07660fba16887eb28f4e833e93eaf21"
"generated_hash": "aa7f88ad80c6fb371f277c90d2a9e45985f481cb5319e9e87ef137ea4aabea9f"
},
{
"name": "inject",
Expand Down Expand Up @@ -884,7 +884,7 @@
"name": "quickstart",
"source_skill": "skills/quickstart",
"source_hash": "68a6463ab7dded893cbdd27d7697c70d871ed4149b5c7cbe8bf20369c1e0a813",
"generated_hash": "92759426fabae70bba9045ff448df760e9a7470b8395310018a8e78957949b20"
"generated_hash": "3069919895447f39b60d3f19a37bd96ec4d5b2f869e6266f2ad07c15870241b6"
},
{
"name": "ratchet",
Expand All @@ -908,7 +908,7 @@
"name": "red-team",
"source_skill": "skills/red-team",
"source_hash": "e7ae62c4a820cf3b7c93e1e0e5913e046013d9dfe8ff48ffa9f85294376bb037",
"generated_hash": "0b59c335567b529586535737af1d8ab5b70e3d73c39bf9b1da1ac54cf73796a5"
"generated_hash": "345e71a7c9d165615b9f10d4c1826e1b3203e36e5aee03cdaa64ea682556bb3a"
},
{
"name": "refactor",
Expand Down Expand Up @@ -986,7 +986,7 @@
"name": "standards",
"source_skill": "skills/standards",
"source_hash": "837a4c5f624c5de7dbe37193fb1671a97d2f4c9381355c4b3745e247b632577a",
"generated_hash": "33afeca1a2dc5ba35f1fec148563937f4730d83f17b414cf71e2c40cfab5b68b"
"generated_hash": "a0eeea87ffddd155c82fc10eee522d3d36cfd054067b03af72da292c08cd3ed3"
},
{
"name": "status",
Expand Down Expand Up @@ -1028,7 +1028,7 @@
"name": "validation",
"source_skill": "skills/validation",
"source_hash": "2e71f8dbe94c65971f0dff7f82ac83396649ff036bbea388060174945f660c5c",
"generated_hash": "5cd2d80a2406d05f0eff3a7276ac14d46d00e66d8e0d5785c784bcaa0a8de463"
"generated_hash": "4c1bf07ba20d26e8c82d40272dcfbd520241f9033454c4bf2730a59defaf8d40"
},
{
"name": "vibe",
Expand Down
2 changes: 1 addition & 1 deletion skills-codex/crank/.agentops-generated.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"source_skill": "skills/crank",
"layout": "modular",
"source_hash": "928d1301b7f3bf12607e779cdec279924b5b29d681faffd01e6819651def92e7",
"generated_hash": "63340a4214c757133c74ede362ed165cb55e32e1ca1909f0329d2155dc6e5695"
"generated_hash": "2a0078618d49f2048978451334d01c363202836165802eb0f13268dec5f9ffa5"
}
2 changes: 1 addition & 1 deletion skills-codex/design/.agentops-generated.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"source_skill": "skills/design",
"layout": "modular",
"source_hash": "",
"generated_hash": "0623a63763fc0737ecb29686b689531574966933160d1fb37f963606e7e5f66f"
"generated_hash": "f3089a1425339cf222d714f2451216ba683474765f56bed4c644cbf76f168b92"
}
2 changes: 1 addition & 1 deletion skills-codex/design/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Compute the average score across all five dimensions.
Invoke council with the product preset. See [references/product-council-preset.md](references/product-council-preset.md) for judge configuration.

```
Skill(skill="council", args="--preset=product validate design alignment for: <goal>")
$council --preset=product validate design alignment for: <goal>
```

Pass the alignment matrix from Step 2 as context to the council judges.
Expand Down
2 changes: 1 addition & 1 deletion skills-codex/implement/.agentops-generated.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"source_skill": "skills/implement",
"layout": "modular",
"source_hash": "2ab197d1810fd3eb56f73aac4645c02edc01aa41e524064b12b993c6861ebc32",
"generated_hash": "69fa694eaad00ead6eebce2590e981eca07660fba16887eb28f4e833e93eaf21"
"generated_hash": "aa7f88ad80c6fb371f277c90d2a9e45985f481cb5319e9e87ef137ea4aabea9f"
}
2 changes: 1 addition & 1 deletion skills-codex/implement/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ Or use `$test <feature>` to auto-generate test candidates, then hand-refine.
If skip conditions above are NOT met AND `--no-lifecycle` is NOT set:

```
Skill(skill="test", args="generate <feature-scope> --quick")
$test generate <feature-scope> --quick
```

The generated test request must preserve the selected `test_levels` and BF expectations from Step 3.6. Review the generated tests. Adjust as needed (tests are MUTABLE in this context). If `$test` fails to produce useful output or is unavailable, fall back to manual test writing in Step 3.6 above.
Expand Down
2 changes: 1 addition & 1 deletion skills-codex/quickstart/.agentops-generated.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"source_skill": "skills/quickstart",
"layout": "modular",
"source_hash": "68a6463ab7dded893cbdd27d7697c70d871ed4149b5c7cbe8bf20369c1e0a813",
"generated_hash": "92759426fabae70bba9045ff448df760e9a7470b8395310018a8e78957949b20"
"generated_hash": "3069919895447f39b60d3f19a37bd96ec4d5b2f869e6266f2ad07c15870241b6"
}
2 changes: 1 addition & 1 deletion skills-codex/red-team/.agentops-generated.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"source_skill": "skills/red-team",
"layout": "modular",
"source_hash": "e7ae62c4a820cf3b7c93e1e0e5913e046013d9dfe8ff48ffa9f85294376bb037",
"generated_hash": "0b59c335567b529586535737af1d8ab5b70e3d73c39bf9b1da1ac54cf73796a5"
"generated_hash": "345e71a7c9d165615b9f10d4c1826e1b3203e36e5aee03cdaa64ea682556bb3a"
}
2 changes: 1 addition & 1 deletion skills-codex/red-team/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ When the same finding appears from multiple personas:
Run council with red-team preset to review and consolidate all findings:

```
Skill(skill="council", args="--preset=red-team [--quick] validate .agents/red-team/")
$council --preset=red-team [--quick] validate .agents/red-team/
```

Use `--quick` by default. Use full council (omit `--quick`) when `--deep` flag is set.
Expand Down
2 changes: 1 addition & 1 deletion skills-codex/standards/.agentops-generated.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"source_skill": "skills/standards",
"layout": "modular",
"source_hash": "837a4c5f624c5de7dbe37193fb1671a97d2f4c9381355c4b3745e247b632577a",
"generated_hash": "33afeca1a2dc5ba35f1fec148563937f4730d83f17b414cf71e2c40cfab5b68b"
"generated_hash": "a0eeea87ffddd155c82fc10eee522d3d36cfd054067b03af72da292c08cd3ed3"
}
2 changes: 1 addition & 1 deletion skills-codex/validation/.agentops-generated.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"source_skill": "skills/validation",
"layout": "modular",
"source_hash": "2e71f8dbe94c65971f0dff7f82ac83396649ff036bbea388060174945f660c5c",
"generated_hash": "5cd2d80a2406d05f0eff3a7276ac14d46d00e66d8e0d5785c784bcaa0a8de463"
"generated_hash": "4c1bf07ba20d26e8c82d40272dcfbd520241f9033454c4bf2730a59defaf8d40"
}
16 changes: 8 additions & 8 deletions skills-codex/validation/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Skip silently if ao is unavailable or returns no results.
**Run every step in order. Do not stop between steps.**

```
STEP 1 ── Skill(skill="vibe", args="recent [--quick]")
STEP 1 ── $vibe recent [--quick]
Use --quick for fast/standard. Full council for full.
PASS/WARN? → continue
FAIL? → write summary, output <promise>FAIL</promise>, stop
Expand All @@ -75,20 +75,20 @@ STEP 1.7 ── Lifecycle Checks (advisory except critical dependency findings)
On budget expiry: skip remaining sub-steps, write [TIME-BOXED].

a) if lifecycle tier >= minimal AND test_framework_detected:
Skill(skill="test", args="coverage --quick")
$test coverage --quick
Append coverage delta to phase summary.

b) if lifecycle tier >= standard AND dependency_manifest_exists:
Skill(skill="deps", args="vuln --quick")
$deps vuln --quick
CRITICAL vulns (CVSS >= 9.0): **FAIL** (block shipping). Opt-out: `--allow-critical-deps` for acknowledged risk acceptance.
Non-critical: advisory note only.

c) if lifecycle tier >= standard:
Skill(skill="review", args="--diff --quick")
$review --diff --quick
Append review findings to summary as advisory.

d) if lifecycle tier == full AND modified_files_touch_hot_path:
Skill(skill="perf", args="profile --quick")
$perf profile --quick
Append perf findings to summary as advisory.
Hot path detection: modified files match benchmark files
or patterns (handler, middleware, router, parser, engine,
Expand Down Expand Up @@ -123,15 +123,15 @@ STEP 1.8 ── Stage 4: Behavioral Validation (holdout scenarios + agent-built
FAIL? → write summary, output <promise>FAIL</promise>, stop

STEP 2 ── if epic_id:
Skill(skill="post-mortem", args="<epic-id> [--quick]")
$post-mortem <epic-id> [--quick]
else:
Skill(skill="post-mortem", args="recent [--quick]")
$post-mortem recent [--quick]
Use --quick for fast/standard. Full council for full.
PASS/WARN? → continue
FAIL? → write summary, output <promise>FAIL</promise>, stop

STEP 3 ── if not --no-retro:
Skill(skill="retro")
$retro

STEP 4 ── if not --no-forge AND ao available:
if [ -n "${CODEX_THREAD_ID:-}" ] || [ "${CODEX_INTERNAL_ORIGINATOR_OVERRIDE:-}" = "Codex Desktop" ]; then
Expand Down