Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions scripts/validate_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,31 @@
from __future__ import annotations

import argparse
import re
from pathlib import Path


PLAIN_SCALAR_MAPPING_VALUE = re.compile(r":(?:\s|$)")


def strip_matching_quotes(value: str) -> str:
if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}:
return value[1:-1]
return value


def validate_plain_scalar(path: Path, line_number: int, key: str, value: str) -> None:
"""Catch invalid plain-scalar YAML that Codex rejects while loading skills."""
stripped = value.strip()
if not stripped or stripped[0] in {'"', "'"} or stripped in {"|", ">", "|-", ">-", "|+", ">+"}:
return
if PLAIN_SCALAR_MAPPING_VALUE.search(stripped):
raise SystemExit(
f"{path}:{line_number}: invalid YAML frontmatter for {key!r}: "
"unquoted ':' followed by whitespace; quote the value"
)


def parse_frontmatter(path: Path) -> dict[str, str]:
"""Extract top-level frontmatter keys from a Markdown file.

Expand All @@ -15,22 +37,26 @@ def parse_frontmatter(path: Path) -> dict[str, str]:
skipped, so nested blocks (a schema note's `schema:`/`settings:` children) can't
overwrite a top-level key like `type` or `entity` via last-write-wins. It does
not interpret block scalars or multi-line values; callers rely on single-line
top-level fields (name, description, type, entity).
top-level fields (name, description, type, entity). Keep the Codex-facing YAML
guard here dependency-free so package checks work under bare `python3`.
"""
lines = path.read_text().splitlines()
if not lines or lines[0] != "---":
raise SystemExit(f"{path}: missing YAML frontmatter")

frontmatter: dict[str, str] = {}
for line in lines[1:]:
for line_number, line in enumerate(lines[1:], start=2):
if line == "---":
break
if line[:1] in (" ", "\t"): # nested key — not a top-level field
continue
if ":" not in line:
continue
key, value = line.split(":", 1)
frontmatter[key.strip()] = value.strip().strip('"')
key = key.strip()
value = value.strip()
validate_plain_scalar(path, line_number, key, value)
frontmatter[key] = strip_matching_quotes(value)
else:
raise SystemExit(f"{path}: unclosed YAML frontmatter")

Expand Down
92 changes: 92 additions & 0 deletions tests/ci/test_validate_skills.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from pathlib import Path

import pytest

from scripts.validate_skills import parse_frontmatter


def test_parse_frontmatter_rejects_unquoted_mapping_colon(tmp_path: Path) -> None:
skill = tmp_path / "SKILL.md"
skill.write_text(
"\n".join(
[
"---",
"name: bm-qa",
"description: Use when validating fixes. Drives the full loop: map issue to commit.",
"---",
"# Skill",
"",
]
),
encoding="utf-8",
)

with pytest.raises(SystemExit, match="invalid YAML"):
parse_frontmatter(skill)


def test_parse_frontmatter_allows_url_colons_in_plain_values(tmp_path: Path) -> None:
skill = tmp_path / "SKILL.md"
skill.write_text(
"\n".join(
[
"---",
"name: memory-notes",
"description: See https://docs.basicmemory.com for usage.",
"---",
"# Skill",
"",
]
),
encoding="utf-8",
)

frontmatter = parse_frontmatter(skill)

assert frontmatter["description"] == "See https://docs.basicmemory.com for usage."


def test_parse_frontmatter_strips_matching_single_quotes(tmp_path: Path) -> None:
skill = tmp_path / "SKILL.md"
skill.write_text(
"\n".join(
[
"---",
"name: memory-notes",
"description: 'Use when values contain mapping-like text: safely.'",
"---",
"# Skill",
"",
]
),
encoding="utf-8",
)

frontmatter = parse_frontmatter(skill)

assert frontmatter["description"] == "Use when values contain mapping-like text: safely."


def test_parse_frontmatter_keeps_nested_fields_nested(tmp_path: Path) -> None:
schema = tmp_path / "schema.md"
schema.write_text(
"\n".join(
[
"---",
"type: schema",
"entity: Task",
"schema:",
" type: object",
"---",
"# Task",
"",
]
),
encoding="utf-8",
)

frontmatter = parse_frontmatter(schema)

assert frontmatter["type"] == "schema"
assert frontmatter["entity"] == "Task"
assert frontmatter["schema"] == ""
Loading