Skip to content

Commit ce4b5d4

Browse files
authored
fix(skills): reject invalid frontmatter YAML (#896)
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 59e865c commit ce4b5d4

2 files changed

Lines changed: 121 additions & 3 deletions

File tree

scripts/validate_skills.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,31 @@
44
from __future__ import annotations
55

66
import argparse
7+
import re
78
from pathlib import Path
89

910

11+
PLAIN_SCALAR_MAPPING_VALUE = re.compile(r":(?:\s|$)")
12+
13+
14+
def strip_matching_quotes(value: str) -> str:
15+
if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}:
16+
return value[1:-1]
17+
return value
18+
19+
20+
def validate_plain_scalar(path: Path, line_number: int, key: str, value: str) -> None:
21+
"""Catch invalid plain-scalar YAML that Codex rejects while loading skills."""
22+
stripped = value.strip()
23+
if not stripped or stripped[0] in {'"', "'"} or stripped in {"|", ">", "|-", ">-", "|+", ">+"}:
24+
return
25+
if PLAIN_SCALAR_MAPPING_VALUE.search(stripped):
26+
raise SystemExit(
27+
f"{path}:{line_number}: invalid YAML frontmatter for {key!r}: "
28+
"unquoted ':' followed by whitespace; quote the value"
29+
)
30+
31+
1032
def parse_frontmatter(path: Path) -> dict[str, str]:
1133
"""Extract top-level frontmatter keys from a Markdown file.
1234
@@ -15,22 +37,26 @@ def parse_frontmatter(path: Path) -> dict[str, str]:
1537
skipped, so nested blocks (a schema note's `schema:`/`settings:` children) can't
1638
overwrite a top-level key like `type` or `entity` via last-write-wins. It does
1739
not interpret block scalars or multi-line values; callers rely on single-line
18-
top-level fields (name, description, type, entity).
40+
top-level fields (name, description, type, entity). Keep the Codex-facing YAML
41+
guard here dependency-free so package checks work under bare `python3`.
1942
"""
2043
lines = path.read_text().splitlines()
2144
if not lines or lines[0] != "---":
2245
raise SystemExit(f"{path}: missing YAML frontmatter")
2346

2447
frontmatter: dict[str, str] = {}
25-
for line in lines[1:]:
48+
for line_number, line in enumerate(lines[1:], start=2):
2649
if line == "---":
2750
break
2851
if line[:1] in (" ", "\t"): # nested key — not a top-level field
2952
continue
3053
if ":" not in line:
3154
continue
3255
key, value = line.split(":", 1)
33-
frontmatter[key.strip()] = value.strip().strip('"')
56+
key = key.strip()
57+
value = value.strip()
58+
validate_plain_scalar(path, line_number, key, value)
59+
frontmatter[key] = strip_matching_quotes(value)
3460
else:
3561
raise SystemExit(f"{path}: unclosed YAML frontmatter")
3662

tests/ci/test_validate_skills.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
5+
from scripts.validate_skills import parse_frontmatter
6+
7+
8+
def test_parse_frontmatter_rejects_unquoted_mapping_colon(tmp_path: Path) -> None:
9+
skill = tmp_path / "SKILL.md"
10+
skill.write_text(
11+
"\n".join(
12+
[
13+
"---",
14+
"name: bm-qa",
15+
"description: Use when validating fixes. Drives the full loop: map issue to commit.",
16+
"---",
17+
"# Skill",
18+
"",
19+
]
20+
),
21+
encoding="utf-8",
22+
)
23+
24+
with pytest.raises(SystemExit, match="invalid YAML"):
25+
parse_frontmatter(skill)
26+
27+
28+
def test_parse_frontmatter_allows_url_colons_in_plain_values(tmp_path: Path) -> None:
29+
skill = tmp_path / "SKILL.md"
30+
skill.write_text(
31+
"\n".join(
32+
[
33+
"---",
34+
"name: memory-notes",
35+
"description: See https://docs.basicmemory.com for usage.",
36+
"---",
37+
"# Skill",
38+
"",
39+
]
40+
),
41+
encoding="utf-8",
42+
)
43+
44+
frontmatter = parse_frontmatter(skill)
45+
46+
assert frontmatter["description"] == "See https://docs.basicmemory.com for usage."
47+
48+
49+
def test_parse_frontmatter_strips_matching_single_quotes(tmp_path: Path) -> None:
50+
skill = tmp_path / "SKILL.md"
51+
skill.write_text(
52+
"\n".join(
53+
[
54+
"---",
55+
"name: memory-notes",
56+
"description: 'Use when values contain mapping-like text: safely.'",
57+
"---",
58+
"# Skill",
59+
"",
60+
]
61+
),
62+
encoding="utf-8",
63+
)
64+
65+
frontmatter = parse_frontmatter(skill)
66+
67+
assert frontmatter["description"] == "Use when values contain mapping-like text: safely."
68+
69+
70+
def test_parse_frontmatter_keeps_nested_fields_nested(tmp_path: Path) -> None:
71+
schema = tmp_path / "schema.md"
72+
schema.write_text(
73+
"\n".join(
74+
[
75+
"---",
76+
"type: schema",
77+
"entity: Task",
78+
"schema:",
79+
" type: object",
80+
"---",
81+
"# Task",
82+
"",
83+
]
84+
),
85+
encoding="utf-8",
86+
)
87+
88+
frontmatter = parse_frontmatter(schema)
89+
90+
assert frontmatter["type"] == "schema"
91+
assert frontmatter["entity"] == "Task"
92+
assert frontmatter["schema"] == ""

0 commit comments

Comments
 (0)