From a45fa079e88e62e747e34c21ed5be98473cf73ec Mon Sep 17 00:00:00 2001 From: phernandez Date: Fri, 5 Jun 2026 12:30:55 -0500 Subject: [PATCH 1/2] fix(skills): reject invalid frontmatter YAML Signed-off-by: phernandez --- scripts/validate_skills.py | 26 +++++++++++++++-- tests/ci/test_validate_skills.py | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 tests/ci/test_validate_skills.py diff --git a/scripts/validate_skills.py b/scripts/validate_skills.py index 753e6088..dbbb6290 100644 --- a/scripts/validate_skills.py +++ b/scripts/validate_skills.py @@ -4,9 +4,25 @@ from __future__ import annotations import argparse +import re from pathlib import Path +PLAIN_SCALAR_MAPPING_VALUE = re.compile(r":(?:\s|$)") + + +def validate_plain_scalar(path: Path, line_number: int, key: str, value: str) -> None: + """Catch invalid plain-scalar YAML that Codex rejects while loading skills.""" + stripped = value.strip() + if not stripped or stripped[0] in {'"', "'"} or stripped in {"|", ">", "|-", ">-", "|+", ">+"}: + return + if PLAIN_SCALAR_MAPPING_VALUE.search(stripped): + raise SystemExit( + f"{path}:{line_number}: invalid YAML frontmatter for {key!r}: " + "unquoted ':' followed by whitespace; quote the value" + ) + + def parse_frontmatter(path: Path) -> dict[str, str]: """Extract top-level frontmatter keys from a Markdown file. @@ -15,14 +31,15 @@ def parse_frontmatter(path: Path) -> dict[str, str]: skipped, so nested blocks (a schema note's `schema:`/`settings:` children) can't overwrite a top-level key like `type` or `entity` via last-write-wins. It does not interpret block scalars or multi-line values; callers rely on single-line - top-level fields (name, description, type, entity). + top-level fields (name, description, type, entity). Keep the Codex-facing YAML + guard here dependency-free so package checks work under bare `python3`. """ lines = path.read_text().splitlines() if not lines or lines[0] != "---": raise SystemExit(f"{path}: missing YAML frontmatter") frontmatter: dict[str, str] = {} - for line in lines[1:]: + for line_number, line in enumerate(lines[1:], start=2): if line == "---": break if line[:1] in (" ", "\t"): # nested key — not a top-level field @@ -30,7 +47,10 @@ def parse_frontmatter(path: Path) -> dict[str, str]: if ":" not in line: continue key, value = line.split(":", 1) - frontmatter[key.strip()] = value.strip().strip('"') + key = key.strip() + value = value.strip() + validate_plain_scalar(path, line_number, key, value) + frontmatter[key] = value.strip('"') else: raise SystemExit(f"{path}: unclosed YAML frontmatter") diff --git a/tests/ci/test_validate_skills.py b/tests/ci/test_validate_skills.py new file mode 100644 index 00000000..10ef274c --- /dev/null +++ b/tests/ci/test_validate_skills.py @@ -0,0 +1,49 @@ +from pathlib import Path + +import pytest + +from scripts.validate_skills import parse_frontmatter + + +def test_parse_frontmatter_rejects_unquoted_mapping_colon(tmp_path: Path) -> None: + skill = tmp_path / "SKILL.md" + skill.write_text( + "\n".join( + [ + "---", + "name: bm-qa", + "description: Use when validating fixes. Drives the full loop: map issue to commit.", + "---", + "# Skill", + "", + ] + ), + encoding="utf-8", + ) + + with pytest.raises(SystemExit, match="invalid YAML"): + parse_frontmatter(skill) + + +def test_parse_frontmatter_keeps_nested_fields_nested(tmp_path: Path) -> None: + schema = tmp_path / "schema.md" + schema.write_text( + "\n".join( + [ + "---", + "type: schema", + "entity: Task", + "schema:", + " type: object", + "---", + "# Task", + "", + ] + ), + encoding="utf-8", + ) + + frontmatter = parse_frontmatter(schema) + + assert frontmatter["type"] == "schema" + assert frontmatter["entity"] == "Task" From c816645f29a3db39d7fb195341233a70478d37a5 Mon Sep 17 00:00:00 2001 From: phernandez Date: Fri, 5 Jun 2026 12:35:01 -0500 Subject: [PATCH 2/2] test(skills): cover frontmatter edge cases Signed-off-by: phernandez --- scripts/validate_skills.py | 8 +++++- tests/ci/test_validate_skills.py | 43 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/scripts/validate_skills.py b/scripts/validate_skills.py index dbbb6290..25abf903 100644 --- a/scripts/validate_skills.py +++ b/scripts/validate_skills.py @@ -11,6 +11,12 @@ PLAIN_SCALAR_MAPPING_VALUE = re.compile(r":(?:\s|$)") +def strip_matching_quotes(value: str) -> str: + if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}: + return value[1:-1] + return value + + def validate_plain_scalar(path: Path, line_number: int, key: str, value: str) -> None: """Catch invalid plain-scalar YAML that Codex rejects while loading skills.""" stripped = value.strip() @@ -50,7 +56,7 @@ def parse_frontmatter(path: Path) -> dict[str, str]: key = key.strip() value = value.strip() validate_plain_scalar(path, line_number, key, value) - frontmatter[key] = value.strip('"') + frontmatter[key] = strip_matching_quotes(value) else: raise SystemExit(f"{path}: unclosed YAML frontmatter") diff --git a/tests/ci/test_validate_skills.py b/tests/ci/test_validate_skills.py index 10ef274c..251048cc 100644 --- a/tests/ci/test_validate_skills.py +++ b/tests/ci/test_validate_skills.py @@ -25,6 +25,48 @@ def test_parse_frontmatter_rejects_unquoted_mapping_colon(tmp_path: Path) -> Non parse_frontmatter(skill) +def test_parse_frontmatter_allows_url_colons_in_plain_values(tmp_path: Path) -> None: + skill = tmp_path / "SKILL.md" + skill.write_text( + "\n".join( + [ + "---", + "name: memory-notes", + "description: See https://docs.basicmemory.com for usage.", + "---", + "# Skill", + "", + ] + ), + encoding="utf-8", + ) + + frontmatter = parse_frontmatter(skill) + + assert frontmatter["description"] == "See https://docs.basicmemory.com for usage." + + +def test_parse_frontmatter_strips_matching_single_quotes(tmp_path: Path) -> None: + skill = tmp_path / "SKILL.md" + skill.write_text( + "\n".join( + [ + "---", + "name: memory-notes", + "description: 'Use when values contain mapping-like text: safely.'", + "---", + "# Skill", + "", + ] + ), + encoding="utf-8", + ) + + frontmatter = parse_frontmatter(skill) + + assert frontmatter["description"] == "Use when values contain mapping-like text: safely." + + def test_parse_frontmatter_keeps_nested_fields_nested(tmp_path: Path) -> None: schema = tmp_path / "schema.md" schema.write_text( @@ -47,3 +89,4 @@ def test_parse_frontmatter_keeps_nested_fields_nested(tmp_path: Path) -> None: assert frontmatter["type"] == "schema" assert frontmatter["entity"] == "Task" + assert frontmatter["schema"] == ""