Lexus2016 · Lexus2016 · Jun 27, 2026 · Jun 27, 2026
diff --git a/scripts/evolution_test_shard.py b/scripts/evolution_test_shard.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python3
+"""Map edited source files to the most relevant test shard for pre-PR validation.
+
+This is the deterministic, import-safe helper behind the evolution
+implementation stage's pre-PR local-test gate (issue #580).  Given a list of
+changed file paths (relative to the repo root), it returns:
+
+  * a concrete pytest invocation that exercises the affected code,
+  * the list of test files selected,
+  * the heuristic used.
+
+Why a separate script?
+- Keeps the skill's decision logic in deterministic, unit-testable Python.
+- Avoids adding new core model tools; the skill simply runs this via the
+  existing ``terminal`` toolset.
+- The mapping can evolve (more languages, finer heuristics) without touching
+  the skill prose.
+
+Heuristic
+---------
+1. Tests live under ``tests/`` and mirror the source layout, or are named
+   ``test_<module>.py`` / ``test_<module>_<suffix>.py`` next to the module.
+2. For each changed source file under the repo root, look for:
+   a) a test file at the mirrored path ``tests/<dir>/test_<stem>.py``,
+   b) any ``test_*<stem>*.py`` file in the same directory as the source,
+   c) any ``test_*<stem>*.py`` file in ``tests/<dir>/``.
+3. If no test file is found for a source file, fall back to the broadest
+   affected directory under ``tests/`` (e.g. ``tests/agent/`` for
+   ``agent/foo.py``).
+4. Deduplicate and return the smallest useful command.
+
+CLI
+---
+    evolution_test_shard.py <changed-file> [<changed-file> ...]
+
+Prints JSON and exits 0 when a shard is found, 1 when nothing maps.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
+
+def _split_path_parts(path: str) -> List[str]:
+    return [p for p in path.replace("\\", "/").split("/") if p]
+
+
+def _stem(path: str) -> str:
+    return Path(path).stem
+
+
+def _is_source_file(path: str) -> bool:
+    p = Path(path)
+    if p.suffix != ".py":
+        return False
+    parts = _split_path_parts(path)
+    # Skip tests themselves, build dirs, and hidden dirs.
+    if any(part.startswith(".") for part in parts):
+        return False
+    if "tests" in parts:
+        return False
+    if "__pycache__" in parts:
+        return False
+    return True
+
+
+def _mirrored_test(path: str) -> Optional[str]:
+    """``agent/foo.py`` -> ``tests/agent/test_foo.py``."""
+    parts = _split_path_parts(path)
+    if not parts:
+        return None
+    stem = _stem(path)
+    return os.path.join("tests", *parts[:-1], f"test_{stem}.py")
+
+
+def _tests_in_dir(directory: Path, stem: str, repo_root: Path) -> List[str]:
+    """Find ``test_*<stem>*.py`` files in ``directory``."""
+    if not directory.is_dir():
+        return []
+    pattern = f"test_*{stem}*.py"
+    return sorted(str(p.relative_to(repo_root)) for p in directory.glob(pattern))
+
+
+def _collect_candidates(path: str, repo_root: Path) -> Tuple[List[str], List[str]]:
+    """Return (test_files, fallback_dirs) for one source path.
+
+    ``fallback_dirs`` are broad test directories to use when no specific test file
+    is found.
+    """
+    stem = _stem(path)
+    parts = _split_path_parts(path)
+
+    candidates: List[str] = []
+
+    # 1. Mirrored layout.
+    mirrored = _mirrored_test(path)
+    if mirrored:
+        resolved = repo_root / mirrored
+        if resolved.is_file():
+            candidates.append(mirrored)
+
+    # 2. Tests in the source's own directory (rare, but allowed for scripts).
+    if parts:
+        src_dir = repo_root / os.path.join(*parts[:-1])
+        candidates.extend(_tests_in_dir(src_dir, stem, repo_root))
+
+    # 3. Tests under the mirrored directory.
+    if len(parts) > 1:
+        test_dir = repo_root / "tests" / os.path.join(*parts[:-1])
+    else:
+        test_dir = repo_root / "tests"
+    candidates.extend(_tests_in_dir(test_dir, stem, repo_root))
+
+    # Fallback directories.
+    fallbacks: List[str] = []
+    if parts:
+        # If the source is in a package, map to tests/<package>/.
+        candidate_dir = repo_root / "tests" / parts[0]
+        if candidate_dir.is_dir():
+            fallbacks.append(str(candidate_dir.relative_to(repo_root)))
+        # One level deeper if available.
+        if len(parts) > 1:
+            deeper = repo_root / "tests" / parts[0] / parts[1]
+            if deeper.is_dir():
+                fallbacks.append(str(deeper.relative_to(repo_root)))
+
+    return candidates, fallbacks
+
+
+def _uniq(seq: Sequence[str]) -> List[str]:
+    """Deduplicate while preserving order."""
+    seen = set()
+    out: List[str] = []
+    for s in seq:
+        if s not in seen:
+            seen.add(s)
+            out.append(s)
+    return out
+
+
+def build_shard(
+    changed_files: Sequence[str],
+    repo_root: Optional[Path] = None,
+) -> Dict[str, Any]:
+    """Build a test shard from a list of changed file paths.
+
+    Returns a dict with keys:
+      - command: list of argv for the test runner,
+      - test_files: list of concrete test files selected,
+      - fallback_dirs: broad test dirs used for files without specific tests,
+      - heuristic: human-readable summary of how the shard was built,
+      - changed: the source files that contributed.
+    """
+    repo_root = repo_root or Path.cwd()
+    changed_files = [str(f) for f in changed_files]
+
+    source_files = [p for p in changed_files if _is_source_file(p)]
+    # If the PR only touches tests, run those directly.
+    if not source_files:
+        test_only = [
+            p
+            for p in changed_files
+            if p.endswith(".py") and "tests" in _split_path_parts(p)
+        ]
+        if test_only:
+            return {
+                "command": ["python", "-m", "pytest", *test_only, "-q"],
+                "test_files": test_only,
+                "fallback_dirs": [],
+                "heuristic": "test-only change: run the edited test files",
+                "changed": changed_files,
+            }
+
+    test_files: List[str] = []
+    fallback_dirs: List[str] = []
+
+    for path in source_files:
+        cands, falls = _collect_candidates(path, repo_root)
+        if cands:
+            test_files.extend(cands)
+        if falls:
+            fallback_dirs.extend(falls)
+
+    # Deduplicate while preserving order.
+    test_files = _uniq(test_files)
+    fallback_dirs = _uniq(fallback_dirs)
+
+    # Filter out paths that don't exist in this repo_root (e.g. a source file's
+    # own directory may have produced pattern candidates that don't exist here).
+    test_files = [t for t in test_files if (repo_root / t).is_file()]
+    fallback_dirs = [d for d in fallback_dirs if (repo_root / d).is_dir()]
+
+    # If we have concrete test files for a source directory, don't also schedule
+    # the whole directory as a fallback.
+    covered_dirs = {
+        str(Path(t).parts[0] + "/" + Path(t).parts[1])
+        for t in test_files
+        if len(Path(t).parts) >= 2
+    }
+    fallback_dirs = [d for d in fallback_dirs if d not in covered_dirs]
+
+    # Prefer concrete test files; fall back to directory shards only when needed.
+    targets = test_files or fallback_dirs
+    if not targets:
+        return {
+            "command": [],
+            "test_files": [],
+            "fallback_dirs": [],
+            "heuristic": "no mapped tests found",
+            "changed": source_files,
+        }
+
+    if test_files:
+        heuristic = f"mirrored/nearby tests for {len(source_files)} source file(s)"
+    else:
+        heuristic = f"directory fallback for {len(source_files)} source file(s)"
+
+    return {
+        "command": ["python", "-m", "pytest", *targets, "-q"],
+        "test_files": test_files,
+        "fallback_dirs": fallback_dirs,
+        "heuristic": heuristic,
+        "changed": source_files,
+    }
+
+
+def _find_changed_files(git_root: Path) -> List[str]:
+    """Read ``git diff --name-only HEAD`` to discover changed files."""
+    import subprocess
+
+    try:
+        proc = subprocess.run(
+            ["git", "-C", str(git_root), "diff", "--name-only", "HEAD"],
+            capture_output=True,
+            text=True,
+            check=False,
+            timeout=30,
+        )
+    except Exception:
+        return []
+    if proc.returncode != 0:
+        return []
+    return [ln.strip() for ln in proc.stdout.splitlines() if ln.strip()]
+
+
+def main(argv: List[str]) -> int:
+    repo_root = Path(os.environ.get("EVOLUTION_REPO_DIR", str(Path.cwd())))
+
+    # If no positional args, infer from git diff.
+    if len(argv) < 2:
+        changed_files = _find_changed_files(repo_root)
+    else:
+        changed_files = argv[1:]
+
+    shard = build_shard(changed_files, repo_root)
+    print(json.dumps(shard, indent=2))
+    return 0 if shard["command"] else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv))
@@ -238,10 +238,25 @@ final_priority = base_priority + community*0.1 + age*0.15 + compatibility*0.2 +
     capability) must NOT be selected for monolithic implementation — that is how
     big work fails at the merge gate, so the agent learns to avoid hard tasks
     (the opposite of "best at any level"). Instead, **decompose it first**: open
-    linked child issues (label `needs-decomposition` on the parent, reference the
-    parent in each child), each a shippable slice with `effort ≤ 0.6`, then let
-    those children compete normally. Select children, not the monolith. This is
-    how the agent takes on complexity without choking on it.
+    linked child issues, label the parent `needs-split` (the canonical
+    decomposition label), reference the parent in each child, and make each
+    child a shippable slice with `effort ≤ 0.6`, then let those children compete
+    normally. Select children, not the monolith. This is how the agent takes on
+    complexity without choking on it.
+
+    After creating child issues, update the parent issue with the decomposition so
+    the owner can see the plan at a glance:
+    ```bash
+    gh label create needs-split --color d4c5f9 \
+      --description "Wanted, but exceeds one cycle — needs decomposition" 2>/dev/null || true
+    gh issue edit <parent#> --repo Lexus2016/hermes-agent-evolution \
+      --add-label needs-split 2>/dev/null || true
+    gh issue comment <parent#> --repo Lexus2016/hermes-agent-evolution \
+      --body "Decomposed into child issues:\n- #<child1>\n- #<child2>"
+    ```
+    A `needs-split` parent without child issues is NOT ready for implementation
+    and will be skipped by the implementation gate (see evolution-implementation
+    step 1c).
 
 6c. **Realized-impact feedback — don't evolve blind (goal 3).** Read the sidecar
     `~/.hermes/profiles/user1/evolution/realized-impact.txt` (one

@@ -26,6 +26,21 @@ Implement selected issues, create versions, and self-update.
     with `"skipped": "stale analysis input (<date>) — upstream stage failed"`
     and STOP. Acting on outdated decisions is worse than skipping a cycle.
 
+1c. **Mandatory decomposition gate — NEVER select an issue for implementation
+    if it is flagged `needs-split` and has no decomposed child issues.** After
+    loading the selection and before branching, hydrate each selected issue's
+    labels and comments. If an issue carries the `needs-split` label, query
+    GitHub for child issues (open or closed) that reference this issue by number
+    in their title or body, or carry a parent-link label. If none exist, SKIP it,
+    keep the issue OPEN with the `needs-split` label, and log the reason. This
+    makes the analysis stage's decomposition rule blocking rather than advisory.
+
+    ```bash
+    # Example child-issue check (heuristic: title/body references #N or a parent label)
+    gh issue list --repo Lexus2016/hermes-agent-evolution --state all \
+      --search "#<N>" --json number,title,labels
+    ```
+
 1a0. **`next-increment` issues — CONTINUE a multi-phase roadmap feature.** If a
     selected issue is labelled `next-increment`, a PRIOR increment already MERGED
     and integration left a continuation brief in the comments listing what REMAINS
@@ -219,6 +234,11 @@ gh pr create --base main --head evolution/issue-123-feature-name \
 #   gh pr create --base main --head evolution/issue-123-feature-name \
 #     --title "feat: <feature name> — increment 1 of #123" \
 #     --body $'First coherent slice of #123.\n\nDeferred (next increment):\n- step 2 ...\n- step 3 ...'
+
+# Decomposition gate — when a selected issue was skipped because it is flagged
+# `needs-split` and has no child issues, do NOT create a branch/PR. Leave the
+# issue open with the `needs-split` label and record the skip in the
+# implementation report under `skipped` with reason `needs-decomposition`.
 ```
 
 Once the PR is open, flip the issue to `accepted` so the owner sees — straight
@@ -264,6 +284,44 @@ CI and been merged into `main`, with built-in backup + auto-rollback. The skill
 call `git pull` and does NOT restart the gateway itself — otherwise the agent
 would update itself in the middle of its own work.
 
+## Output
+
+After each run, append a Markdown report to
+`~/.hermes/profiles/user1/evolution/implementation/YYYY-MM-DD.md` with the
+following structure:
+
+```markdown
+# Evolution Implementation Report — 2026-06-27
+
+## Summary
+- Selected issues: 3
+- Implemented: 1
+- Skipped: 1
+- Rejected: 1
+
+## Implemented
+- #580: Pre-PR local test runner for the targeted change
+  - PR: #575
+  - Branch: `evolution/issue-580-test-shard`
+  - Files: `scripts/evolution_test_shard.py`, `tests/scripts/test_evolution_test_shard.py`
+  - Checks: lint ✓, format ✓, targeted tests ✓
+
+## Skipped
+- #579: Mandatory small-slice decomposition before implementation selection
+  - Reason: `needs-decomposition` — not a code change, requires skill-policy
+    revision. Defer to a dedicated skill-editing cycle with owner review.
+
+## Rejected
+- #578: Closed-PR postmortem miner
+  - Reason: `out-of-scope` — no closed-PR mining infrastructure exists in the
+    current repo; would require GitHub API pagination and persistent storage that
+    outstrips a single-cycle change.
+```
+
+The report is append-only (one file per calendar day) so multiple runs in the same
+day accumulate rather than overwrite. Use `## Run HH:MM` sub-headings if a report
+already exists.
+
 ## Safety — enforced by the gate, not by self-assessment
 
 There used to be a checklist here that the agent "ticked for itself" — that is not protection.