From ccfb2b07019f7979f94e75b3d299f949f4162141 Mon Sep 17 00:00:00 2001 From: armel Date: Mon, 27 Apr 2026 09:44:33 +0400 Subject: [PATCH 1/2] fix(workflow-builder): walk steps-c/ and recognize step-NN naming in prepass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit prepass-workflow-integrity.py only walked the skill root and matched the legacy `^\d+-` filename pattern. That made it miss every BMad skill that uses the modern `steps-c/step-NN-.md` layout (enforced by `tools/validate-skills.js` STEP-04 in downstream modules like skill-forge): the walker found zero numbered files at the skill root, then matched the SKILL.md references against an empty actual-files set, producing 5+ false-positive `critical/missing-stage` findings on every quality scan of every affected skill. Three coordinated changes to fix it: - New `discover_step_files()` helper walks the skill root AND the conventional stage subdirectories (`steps-c/`, `steps-a/`, `steps-b/`, `prompts/`). Walks recursively (`rglob`) inside each to pick up nested layouts like `steps-c/sub/` for sub-step files (used by skill-forge's create-skill workflow, among others). - Filename regex broadened from `^\d+-` to `^(?:step-)?\d+\w*-` so it accepts both legacy `01-foo.md` and modern `step-01b-foo.md` (letter suffixes for sub-stages). - Reference-matching regex broadened from `(?:prompts/)?(\d+-…)` to allow any of the four stage-subdirectory prefixes plus optional nested path segments, so `steps-c/sub/step-02b-...` references resolve correctly. Both the SKILL.md side and the filesystem side now produce posix-style relative paths from the skill root, so cross-platform comparison is byte-identical. - Stage-numbering check updated to match the broader filename pattern and to dedupe by integer (so `1, 1b, 2, 3` no longer reports a phantom gap — `1b` is a sub-stage of `1`, not a missing number). Verified against the bmad-module-skill-forge SKF-* skill suite (7 skills): all now show 0 critical findings, 0 missing-stage entries, 0 orphaned-stage entries, and the correct stage count. Remaining high/medium findings on each skill are real workflow content issues, not the false-positive class fixed here. Tests: 11 passing in skills/bmad-workflow-builder/scripts/tests/ test_prepass_workflow_integrity.py covering discovery for all four stage-subdir conventions, the legacy root layout, nested sub-directories, letter-suffix sub-stages, the well-formed "1, 1b, 2, 3" sequence (no phantom gap), plus regression coverage asserting that genuinely missing or orphaned stages are still flagged. --- .../scripts/prepass-workflow-integrity.py | 92 +++++--- .../tests/test_prepass_workflow_integrity.py | 221 ++++++++++++++++++ 2 files changed, 283 insertions(+), 30 deletions(-) create mode 100644 skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py diff --git a/skills/bmad-workflow-builder/scripts/prepass-workflow-integrity.py b/skills/bmad-workflow-builder/scripts/prepass-workflow-integrity.py index d72aa75..a44ef83 100755 --- a/skills/bmad-workflow-builder/scripts/prepass-workflow-integrity.py +++ b/skills/bmad-workflow-builder/scripts/prepass-workflow-integrity.py @@ -207,24 +207,59 @@ def find_template_artifacts(filepath: Path, rel_path: str) -> list[dict]: return findings +STAGE_FILE_PATTERN = re.compile(r'^(?:step-)?\d+\w*-') +STAGE_SUBDIRS = ('steps-c', 'steps-a', 'steps-b', 'prompts') + + +def discover_step_files(skill_path: Path) -> list[Path]: + """Return Path objects for numbered step files anywhere they live. + + Walks the skill root (legacy layout), the conventional stage subdirectories + (`steps-c/`, `steps-a/`, `steps-b/`, `prompts/`), AND any nested + subdirectories underneath them (e.g. `steps-c/sub/` used by + skf-create-skill for sub-step files). Recognizes both the legacy + `NN-.md` filename pattern and the modern `step-NN-.md` + pattern (e.g. `step-01b-...`) enforced by `tools/validate-skills.js` + STEP-04. + """ + found: list[Path] = [] + for f in skill_path.iterdir(): + if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md' and STAGE_FILE_PATTERN.match(f.name): + found.append(f) + for subdir_name in STAGE_SUBDIRS: + subdir = skill_path / subdir_name + if not subdir.is_dir(): + continue + for f in subdir.rglob('*.md'): + if f.is_file() and STAGE_FILE_PATTERN.match(f.name): + found.append(f) + return found + + +def _rel_step_path(skill_path: Path, step_file: Path) -> str: + """Step file path relative to the skill root, with forward slashes for cross-platform comparison.""" + return step_file.relative_to(skill_path).as_posix() + + def cross_reference_stages(skill_path: Path, skill_content: str) -> tuple[dict, list[dict]]: - """Cross-reference stage files between SKILL.md and numbered prompt files at skill root.""" + """Cross-reference stage files between SKILL.md and numbered prompt files.""" findings = [] - # Get actual numbered prompt files at skill root (exclude SKILL.md) - actual_files = set() - for f in skill_path.iterdir(): - if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md' and re.match(r'^\d+-', f.name): - actual_files.add(f.name) + actual_paths = discover_step_files(skill_path) + actual_files = {_rel_step_path(skill_path, p) for p in actual_paths} - # Find stage references in SKILL.md — look for both old prompts/ style and new root style + # Match references with optional stage-subdir prefix (steps-c/, steps-a/, + # steps-b/, prompts/) plus optional nested subdirectories (e.g. + # `steps-c/sub/step-02b-...`), and either the legacy `NN-...` or modern + # `step-NN[suffix]-...` filename pattern. referenced = set() - # Match `prompts/XX-name.md` (legacy) or bare `XX-name.md` references - ref_pattern = re.compile(r'(?:prompts/)?(\d+-[^\s)`]+\.md)') + ref_pattern = re.compile( + r'((?:steps-c/|steps-a/|steps-b/|prompts/)(?:[^\s)`/]+/)*(?:step-)?\d+\w*-[^\s)`/]+\.md' + r'|(? tuple[dict, 'issue': f'Referenced stage file does not exist: {f}', }) - # Orphaned files (exist but not referenced) orphaned = actual_files - referenced for f in sorted(orphaned): findings.append({ @@ -242,16 +276,19 @@ def cross_reference_stages(skill_path: Path, skill_content: str) -> tuple[dict, 'issue': f'Stage file exists but not referenced in SKILL.md: {f}', }) - # Stage numbering check + # Stage numbering check — extract leading integer from each step file's basename. + # Letter suffixes (e.g. step-01b) are sub-stages of the integer base and don't + # break sequence; "1, 1b, 2, 3" is well-formed. Group by integer for the check. numbered = [] - for f in sorted(actual_files): - m = re.match(r'^(\d+)-(.+)\.md$', f) + num_pattern = re.compile(r'^(?:step-)?(\d+)\w*-(.+)\.md$') + for rel in sorted(actual_files): + basename = rel.split('/')[-1] + m = num_pattern.match(basename) if m: - numbered.append((int(m.group(1)), f)) + numbered.append((int(m.group(1)), rel)) if numbered: - numbered.sort() - nums = [n[0] for n in numbered] + nums = sorted({n[0] for n in numbered}) expected = list(range(nums[0], nums[0] + len(nums))) if nums != expected: gaps = set(expected) - set(nums) @@ -278,18 +315,14 @@ def check_prompt_basics(skill_path: Path) -> tuple[list[dict], list[dict]]: findings = [] prompt_details = [] - # Look for numbered prompt files at skill root - prompt_files = sorted( - f for f in skill_path.iterdir() - if f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md' and re.match(r'^\d+-', f.name) - ) + prompt_files = sorted(discover_step_files(skill_path), key=lambda p: _rel_step_path(skill_path, p)) if not prompt_files: return prompt_details, findings for f in prompt_files: content = f.read_text(encoding='utf-8') - rel_path = f.name - detail = {'file': f.name, 'has_config_header': False, 'has_progression': False} + rel_path = _rel_step_path(skill_path, f) + detail = {'file': rel_path, 'has_config_header': False, 'has_progression': False} # Config header check if '{communication_language}' in content or '{document_output_language}' in content: @@ -334,7 +367,9 @@ def check_prompt_basics(skill_path: Path) -> tuple[list[dict], list[dict]]: def detect_workflow_type(skill_content: str, has_prompts: bool) -> str: """Detect workflow type from SKILL.md content.""" - has_stage_refs = bool(re.search(r'(?:prompts/)?\d+-\S+\.md', skill_content)) + has_stage_refs = bool( + re.search(r'(?:steps-c/|steps-a/|steps-b/|prompts/)?(?:step-)?\d+\w*-\S+\.md', skill_content) + ) has_routing = bool(re.search(r'(?i)(rout|stage|branch|path)', skill_content)) if has_stage_refs or (has_prompts and has_routing): @@ -389,10 +424,7 @@ def scan_workflow_integrity(skill_path: Path) -> dict: }) # Workflow type - has_prompts = any( - f.is_file() and f.suffix == '.md' and f.name != 'SKILL.md' and re.match(r'^\d+-', f.name) - for f in skill_path.iterdir() - ) + has_prompts = bool(discover_step_files(skill_path)) workflow_type = detect_workflow_type(skill_content, has_prompts) # Stage cross-reference diff --git a/skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py b/skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py new file mode 100644 index 0000000..b0c02fb --- /dev/null +++ b/skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.9" +# /// +"""Tests for prepass-workflow-integrity.py. + +Focuses on the discovery + cross-reference behaviour for the modern +`steps-c/step-NN-.md` layout (and nested `steps-c/sub/` subdirs) +that the prior version of the script could not see — producing +false-positive `critical/missing-stage` findings on every skill that +used the layout. +""" + +from __future__ import annotations + +import sys +import tempfile +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path + +import pytest + +# Load the script as a module +_script_path = Path(__file__).resolve().parent.parent / 'prepass-workflow-integrity.py' +_spec = spec_from_file_location('prepass_workflow_integrity', _script_path) +_mod = module_from_spec(_spec) +_spec.loader.exec_module(_mod) + + +@pytest.fixture +def tmp_skill(): + """Yield (skill_path: Path) for a temp skill dir cleaned up at the end.""" + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +def _write(p: Path, content: str = "stub") -> None: + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content, encoding='utf-8') + + +# ─── discover_step_files ───────────────────────────────────────────────────── + + +def test_discovers_steps_c_layout(tmp_skill): + """The modern `steps-c/step-NN-.md` layout is found.""" + _write(tmp_skill / 'SKILL.md') + _write(tmp_skill / 'steps-c' / 'step-01-detect.md') + _write(tmp_skill / 'steps-c' / 'step-02-write.md') + + found = {p.name for p in _mod.discover_step_files(tmp_skill)} + assert found == {'step-01-detect.md', 'step-02-write.md'} + + +def test_discovers_legacy_root_layout(tmp_skill): + """The legacy `NN-.md` at skill root still works.""" + _write(tmp_skill / 'SKILL.md') + _write(tmp_skill / '01-foo.md') + _write(tmp_skill / '02-bar.md') + + found = {p.name for p in _mod.discover_step_files(tmp_skill)} + assert found == {'01-foo.md', '02-bar.md'} + + +def test_discovers_nested_sub_directory(tmp_skill): + """Nested `steps-c/sub/` (e.g. for sub-step files) is walked.""" + _write(tmp_skill / 'SKILL.md') + _write(tmp_skill / 'steps-c' / 'step-01-main.md') + _write(tmp_skill / 'steps-c' / 'sub' / 'step-01b-detail.md') + _write(tmp_skill / 'steps-c' / 'sub' / 'step-01c-more.md') + + found = {str(p.relative_to(tmp_skill)).replace('\\', '/') for p in _mod.discover_step_files(tmp_skill)} + assert found == { + 'steps-c/step-01-main.md', + 'steps-c/sub/step-01b-detail.md', + 'steps-c/sub/step-01c-more.md', + } + + +def test_recognizes_letter_suffix_step_numbers(tmp_skill): + """`step-01b-...` (sub-stage with letter suffix) is recognized.""" + _write(tmp_skill / 'SKILL.md') + _write(tmp_skill / 'steps-c' / 'step-01-main.md') + _write(tmp_skill / 'steps-c' / 'step-01b-detail.md') + _write(tmp_skill / 'steps-c' / 'step-01c-extra.md') + + found = {p.name for p in _mod.discover_step_files(tmp_skill)} + assert found == {'step-01-main.md', 'step-01b-detail.md', 'step-01c-extra.md'} + + +def test_skips_skill_md_and_non_numbered_md(tmp_skill): + _write(tmp_skill / 'SKILL.md') + _write(tmp_skill / 'README.md') + _write(tmp_skill / 'steps-c' / 'README.md') + _write(tmp_skill / 'steps-c' / 'step-01-yes.md') + + found = {p.name for p in _mod.discover_step_files(tmp_skill)} + assert found == {'step-01-yes.md'} + + +def test_supports_steps_a_and_steps_b_subdirs(tmp_skill): + """Other conventional stage subdirs (steps-a, steps-b, prompts) work too.""" + _write(tmp_skill / 'SKILL.md') + _write(tmp_skill / 'steps-a' / 'step-01-a.md') + _write(tmp_skill / 'steps-b' / 'step-01-b.md') + _write(tmp_skill / 'prompts' / '01-legacy.md') + + found = {p.name for p in _mod.discover_step_files(tmp_skill)} + assert found == {'step-01-a.md', 'step-01-b.md', '01-legacy.md'} + + +# ─── cross_reference_stages ───────────────────────────────────────────────── + + +_SKILL_MD_STEPS_C = """--- +name: example +description: Example. Use when testing. +--- + +# Example + +## Stages + +| # | Step | File | +|---|------|------| +| 1 | Detect | steps-c/step-01-detect.md | +| 2 | Write | steps-c/step-02-write.md | + +## On Activation + +Load `steps-c/step-01-detect.md`. +""" + + +_SKILL_MD_NESTED_SUB = """--- +name: example +description: Example with nested sub. Use when testing. +--- + +# Example + +## Stages + +| # | Step | File | +|---|------|------| +| 1 | Main | steps-c/step-01-main.md | +| 1b | Sub-step | steps-c/sub/step-01b-detail.md | + +## On Activation + +Begin at `steps-c/step-01-main.md`. +""" + + +def test_cross_reference_no_false_positive_critical_for_steps_c(tmp_skill): + """The bug this PR fixes: steps-c references must not produce missing-stage criticals.""" + _write(tmp_skill / 'SKILL.md', _SKILL_MD_STEPS_C) + _write(tmp_skill / 'steps-c' / 'step-01-detect.md') + _write(tmp_skill / 'steps-c' / 'step-02-write.md') + + summary, findings = _mod.cross_reference_stages(tmp_skill, _SKILL_MD_STEPS_C) + missing = [f for f in findings if f.get('category') == 'missing-stage'] + orphaned = [f for f in findings if f.get('category') == 'naming' and 'orphaned' in f.get('issue', '').lower() or 'not referenced' in f.get('issue', '').lower()] + assert missing == [], f"unexpected missing-stage findings: {missing}" + assert orphaned == [], f"unexpected orphaned-stage findings: {orphaned}" + assert summary['total_stages'] == 2 + + +def test_cross_reference_resolves_nested_sub_paths(tmp_skill): + """`steps-c/sub/step-NN-...` references resolve against nested files.""" + _write(tmp_skill / 'SKILL.md', _SKILL_MD_NESTED_SUB) + _write(tmp_skill / 'steps-c' / 'step-01-main.md') + _write(tmp_skill / 'steps-c' / 'sub' / 'step-01b-detail.md') + + summary, findings = _mod.cross_reference_stages(tmp_skill, _SKILL_MD_NESTED_SUB) + missing = [f for f in findings if f.get('category') == 'missing-stage'] + assert missing == [], f"unexpected missing-stage findings: {missing}" + assert 'steps-c/sub/step-01b-detail.md' in summary['actual'] + + +def test_letter_suffix_does_not_create_phantom_numbering_gap(tmp_skill): + """1, 1b, 2, 3 is a well-formed sequence (1b is a sub-stage of 1, not a gap).""" + _write(tmp_skill / 'SKILL.md', _SKILL_MD_NESTED_SUB) + _write(tmp_skill / 'steps-c' / 'step-01-main.md') + _write(tmp_skill / 'steps-c' / 'sub' / 'step-01b-detail.md') + + summary, findings = _mod.cross_reference_stages(tmp_skill, _SKILL_MD_NESTED_SUB) + naming_gap_findings = [ + f for f in findings + if f.get('category') == 'naming' and 'gap' in f.get('issue', '').lower() + ] + assert naming_gap_findings == [], f"unexpected gap finding: {naming_gap_findings}" + + +def test_genuinely_missing_stage_is_still_caught(tmp_skill): + """Sanity: when SKILL.md references a file that truly doesn't exist, we still flag it.""" + skill_md = _SKILL_MD_STEPS_C # references step-01 and step-02 + _write(tmp_skill / 'SKILL.md', skill_md) + _write(tmp_skill / 'steps-c' / 'step-01-detect.md') + # step-02 NOT created + + _summary, findings = _mod.cross_reference_stages(tmp_skill, skill_md) + missing = [f for f in findings if f.get('category') == 'missing-stage'] + assert any('step-02-write.md' in f['issue'] for f in missing), \ + f"genuinely missing file was not flagged: {findings}" + + +def test_genuinely_orphaned_stage_is_still_caught(tmp_skill): + """Sanity: when a step file exists but isn't referenced, we still flag it.""" + _write(tmp_skill / 'SKILL.md', _SKILL_MD_STEPS_C) + _write(tmp_skill / 'steps-c' / 'step-01-detect.md') + _write(tmp_skill / 'steps-c' / 'step-02-write.md') + _write(tmp_skill / 'steps-c' / 'step-99-orphan.md') + + _summary, findings = _mod.cross_reference_stages(tmp_skill, _SKILL_MD_STEPS_C) + orphan_msgs = [ + f['issue'] for f in findings + if f.get('category') == 'naming' and 'not referenced' in f.get('issue', '').lower() + ] + assert any('step-99-orphan.md' in msg for msg in orphan_msgs), \ + f"orphaned file was not flagged: {findings}" From 0155cf9b62ddd7d4e1d57af4be866348677e94a2 Mon Sep 17 00:00:00 2001 From: armel Date: Mon, 27 Apr 2026 10:25:43 +0400 Subject: [PATCH 2/2] test(workflow-builder): fix orphan-detection assertion (CodeRabbit RUF021) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit caught two compounding bugs on PR #82's test_cross_reference_no_false_positive_critical_for_steps_c assertion: 1. Operator precedence (RUF021). The expression category == 'naming' and 'orphaned' in issue or 'not referenced' in issue evaluated as (category == 'naming' and 'orphaned' in issue) or ('not referenced' in issue) because Python's `and` binds tighter than `or`. The right-hand disjunct matched ANY finding whose issue text contained "not referenced" — regardless of category. A non-orphan finding (e.g. a hypothetical frontmatter check that mentioned "not referenced" in its message) would have slipped into the orphan list and made the assertion stricter than intended in one direction while still permitting genuine orphan misses to pass in another. 2. Dead substring. The actual orphan finding text per prepass-workflow-integrity.py:276 is exactly Stage file exists but not referenced in SKILL.md: The word "orphaned" never appears in it. The `'orphaned' in issue.lower()` clause is therefore dead code that could never match. The assertion was effectively only the un-parenthesised OR branch from problem 1. Fix: replace the bug-prone expression with a canonical-prefix match against the exact string the script emits. The new check matches the finding shape directly: category == 'naming' AND issue.startswith('Stage file exists but not referenced in SKILL.md') This is precise (no category-leaks), readable (no precedence trap), and maintainable (locked to the prefix the producer actually uses — easier to grep for if either side changes). Also updates the matching positive-direction check in test_genuinely_orphaned_stage_is_still_caught for symmetry, so both tests share one definition of "what an orphan finding looks like". That assertion was not buggy (no `or`, and "not referenced" substring does appear in the finding text), but standardising the pattern across both tests prevents a future contributor from copy-pasting the older `'not referenced' in issue.lower()` shape into a new test that adds another condition with `or` and trips the same precedence trap. All 11 tests in test_prepass_workflow_integrity.py still pass. Refs: CodeRabbit comment on PR #82 --- .../tests/test_prepass_workflow_integrity.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py b/skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py index b0c02fb..b6d2a48 100644 --- a/skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py +++ b/skills/bmad-workflow-builder/scripts/tests/test_prepass_workflow_integrity.py @@ -160,7 +160,18 @@ def test_cross_reference_no_false_positive_critical_for_steps_c(tmp_skill): summary, findings = _mod.cross_reference_stages(tmp_skill, _SKILL_MD_STEPS_C) missing = [f for f in findings if f.get('category') == 'missing-stage'] - orphaned = [f for f in findings if f.get('category') == 'naming' and 'orphaned' in f.get('issue', '').lower() or 'not referenced' in f.get('issue', '').lower()] + # Orphan findings have category 'naming' AND an `issue` text that begins + # with the canonical "Stage file exists but not referenced in SKILL.md" + # prefix that prepass-workflow-integrity.py emits at line 276. Match the + # exact shape — the prior `'orphaned' in issue` substring check was dead + # code (the script never uses that word) and the `or 'not referenced'` + # disjunct was un-parenthesised, so it would have matched any finding + # regardless of category. CodeRabbit caught both bugs on PR #82. + orphaned = [ + f for f in findings + if f.get('category') == 'naming' + and f.get('issue', '').startswith('Stage file exists but not referenced in SKILL.md') + ] assert missing == [], f"unexpected missing-stage findings: {missing}" assert orphaned == [], f"unexpected orphaned-stage findings: {orphaned}" assert summary['total_stages'] == 2 @@ -213,9 +224,12 @@ def test_genuinely_orphaned_stage_is_still_caught(tmp_skill): _write(tmp_skill / 'steps-c' / 'step-99-orphan.md') _summary, findings = _mod.cross_reference_stages(tmp_skill, _SKILL_MD_STEPS_C) + # Use the same canonical-prefix match as test_cross_reference_no_false_positive_critical_for_steps_c + # so both tests share one definition of "what an orphan finding looks like". orphan_msgs = [ f['issue'] for f in findings - if f.get('category') == 'naming' and 'not referenced' in f.get('issue', '').lower() + if f.get('category') == 'naming' + and f.get('issue', '').startswith('Stage file exists but not referenced in SKILL.md') ] assert any('step-99-orphan.md' in msg for msg in orphan_msgs), \ f"orphaned file was not flagged: {findings}"