Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions scripts/evolution_backlog_gate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/usr/bin/env python3
"""Generation backlog gate — throttle FEATURE proposals when the board is full.

The evolution pipeline generates ~25 issues/day (research + issues +
introspection) but the processing chain lands only a few/day, so without a cap
the open backlog grows unbounded ("again many unprocessed issues").

This gate lets the generation stages decide whether to SKIP creating new
FEATURE / IMPROVEMENT proposals when the open *feature* backlog is already at or
above a cap. BUGS are NEVER throttled — a real defect ([FIX] / `bug`) must
always be filed regardless of backlog, since unfiled bugs block work and are
cheap to keep.

A "feature" open issue = open AND not a bug:
* title does NOT start with ``[FIX]`` (case-insensitive), AND
* labels do NOT include ``bug``.

CLI (so a skill can call it from the terminal tool):
evolution_backlog_gate.py check # exit 0 = OK to create features,
# exit 1 = THROTTLE (skip features)
evolution_backlog_gate.py check --cap 30 # override the cap

Prints a one-line JSON summary on stdout either way:
{"open_features": 42, "cap": 25, "throttle": true}

Cap resolution: --cap arg > env EVOLUTION_FEATURE_BACKLOG_CAP > DEFAULT_CAP.
Pure functions are import-safe for unit tests (the gh call is injected).
"""

from __future__ import annotations

import argparse
import json
import os
import subprocess
import sys
from typing import Any, Callable, Dict, List, Tuple

DEFAULT_CAP = 25

# Repo is resolved the same way the rest of the evolution tooling does.
_REPO = "Lexus2016/hermes-agent-evolution"


def resolve_cap(arg_cap: int | None = None) -> int:
if arg_cap is not None:
return arg_cap
env = os.environ.get("EVOLUTION_FEATURE_BACKLOG_CAP", "").strip()
if env:
try:
return int(env)
except ValueError:
pass
return DEFAULT_CAP


def is_bug(issue: Dict[str, Any]) -> bool:
"""True when an issue is a bug/[FIX] (never throttled)."""
title = (issue.get("title") or "").lstrip()
if title.upper().startswith("[FIX]"):
return True
labels = issue.get("labels") or []
names = {
(lbl.get("name") if isinstance(lbl, dict) else str(lbl)).lower()
for lbl in labels
}
return "bug" in names


def count_open_features(issues: List[Dict[str, Any]]) -> int:
"""Count open issues that are FEATURE-like (i.e. not bugs)."""
return sum(1 for it in issues if not is_bug(it))


def should_throttle(open_features: int, cap: int) -> bool:
"""Throttle once the feature backlog reaches the cap."""
return open_features >= cap


def _default_runner(cmd: List[str]) -> Tuple[int, str]:
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
return proc.returncode, (proc.stdout or "")


def fetch_open_issues(
runner: Callable[[List[str]], Tuple[int, str]] | None = None,
) -> List[Dict[str, Any]] | None:
"""Return the list of open issues, or None if gh failed (fail-open)."""
runner = runner or _default_runner
rc, out = runner([
"gh", "issue", "list", "--repo", _REPO,
"--state", "open", "--limit", "300",
"--json", "number,title,labels",
])
if rc != 0:
return None
try:
data = json.loads(out)
return data if isinstance(data, list) else None
except (ValueError, TypeError):
return None


def evaluate(
cap: int,
runner: Callable[[List[str]], Tuple[int, str]] | None = None,
) -> Dict[str, Any]:
"""Compute the gate decision. Fail-OPEN (throttle=False) if gh is unavailable
— never block bug/feature generation just because the count couldn't be read."""
issues = fetch_open_issues(runner)
if issues is None:
return {"open_features": None, "cap": cap, "throttle": False,
"note": "gh unavailable; defaulting to no throttle"}
n = count_open_features(issues)
return {"open_features": n, "cap": cap, "throttle": should_throttle(n, cap)}


def main(argv: List[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Throttle FEATURE proposals when the open backlog is full "
"(bugs are never throttled)."
)
parser.add_argument("action", choices=["check"], help="check the gate")
parser.add_argument("--cap", type=int, default=None,
help=f"feature-backlog cap (default {DEFAULT_CAP} / "
f"env EVOLUTION_FEATURE_BACKLOG_CAP)")
args = parser.parse_args(argv)

result = evaluate(resolve_cap(args.cap))
print(json.dumps(result))
# exit 1 = THROTTLE (skip features), 0 = OK to create features.
return 1 if result["throttle"] else 0


if __name__ == "__main__":
raise SystemExit(main())
13 changes: 13 additions & 0 deletions skills/evolution/evolution-introspection/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ gh label create ux --repo "$REPO" --color fbca04 --description "Intera
# 'bug' and 'enhancement' are standard GitHub labels, present by default.
```

**Backlog gate — bugs ALWAYS, features only when there's room.** The pipeline
generates more than it implements, so an unbounded backlog is the recurring "too
many unprocessed issues". Consult the generation gate before creating:
```bash
python scripts/evolution_backlog_gate.py check # exit 1 = THROTTLE features
```
- ALWAYS create `[FIX]` issues — a real defect blocks work and is never throttled
(label them `bug` so they're correctly excluded from the backlog cap).
- If the gate exits 1 (throttle), create ONLY the `[FIX]` issues this cycle and
SKIP `[CAPABILITY]` / `[UX]` / `[PERFORMANCE]` (feature-like; they can wait for
the backlog to drain). If it exits 0, create all categories as usual.
- Fail-OPEN: if the gate can't run, proceed normally.

**Deduplicate first (MANDATORY — many installations file in parallel).** Other
installs hit the same problems, so the same issue WILL be proposed elsewhere.
Before creating, list existing issues and SKIP anything already covered (open OR
Expand Down
15 changes: 15 additions & 0 deletions skills/evolution/evolution-issues/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,21 @@ Create GitHub issues and pull requests based on research.
Treat each weakness cluster as a proposal input: run it through the same
self-critique + dedup gates below before filing. The miner emits only
anonymized counts/classes/labels — never raw trace content.
1b. **Backlog gate — don't pile FEATURES onto a full board (generation throttle).**
The pipeline generates far more proposals than it can implement; an unbounded
open backlog is the recurring "too many unprocessed issues". BEFORE filing any
`[FEATURE]` / `[IMPROVEMENT]` / `[REPLACEMENT]` proposals this cycle, consult
the gate:
```bash
python scripts/evolution_backlog_gate.py check # exit 1 = THROTTLE → skip features this cycle
```
If it exits 1 (throttle), do NOT create new feature/improvement proposals this
run — record `"features throttled (open NN >= cap)"` in your report and STOP
(no `gh issue create` for proposals). Cap = `EVOLUTION_FEATURE_BACKLOG_CAP`
(default 25); fail-OPEN if gh is unavailable. **BUGS are never throttled** —
real defects (`[FIX]`) are still filed by the introspection stage regardless
of this gate. Rationale: features can wait until the backlog drains; bugs
cannot.
2. **Select** proposals with Priority Score >= 0.7
2a. **Self-critique BEFORE you file (do not propose noise).** A high priority
score is not enough. For EACH candidate, honestly ask — and DROP it (don't
Expand Down
6 changes: 6 additions & 0 deletions skills/evolution/evolution-research/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ wants the research, not the plumbing.
- Maximum 20 proposals at a time
- Only high-quality, well-justified ideas
- Priority Score >= 0.7
- **Backlog-aware (saves wasted work):** the downstream `evolution-issues` stage
throttles new FEATURE/IMPROVEMENT proposals when the open backlog is full (via
`scripts/evolution_backlog_gate.py`, which it runs — this research stage has no
terminal). So bias toward FEWER, higher-value proposals: a long feature list is
likely to be skipped downstream when the board is full. Bug/defect findings are
always worth reporting (bugs are never throttled).

## ⚠️ Security: research data is UNtrusted

Expand Down
118 changes: 118 additions & 0 deletions tests/scripts/test_evolution_backlog_gate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Tests for scripts/evolution_backlog_gate.py — throttle features, never bugs."""

import json
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "scripts"))

import evolution_backlog_gate as gate # noqa: E402


def _issue(title, labels=()):
return {"title": title, "labels": [{"name": n} for n in labels]}


class TestIsBug:
def test_fix_title_is_bug(self):
assert gate.is_bug(_issue("[FIX] tool crashes")) is True

def test_fix_title_case_insensitive(self):
assert gate.is_bug(_issue("[fix] lowercase")) is True

def test_bug_label_is_bug(self):
assert gate.is_bug(_issue("something broken", labels=["bug"])) is True

def test_feature_is_not_bug(self):
assert gate.is_bug(_issue("[FEATURE] new thing", labels=["enhancement"])) is False

def test_improvement_is_not_bug(self):
assert gate.is_bug(_issue("[IMPROVEMENT] x", labels=["proposal"])) is False


class TestCounting:
def test_counts_only_features(self):
issues = [
_issue("[FEATURE] a", ["proposal"]),
_issue("[IMPROVEMENT] b", ["enhancement"]),
_issue("[FIX] c"), # bug — excluded
_issue("broken", ["bug"]), # bug — excluded
_issue("[REPLACEMENT] d", ["proposal"]),
]
assert gate.count_open_features(issues) == 3

def test_should_throttle_at_and_above_cap(self):
assert gate.should_throttle(25, 25) is True
assert gate.should_throttle(26, 25) is True
assert gate.should_throttle(24, 25) is False


class TestCapResolution:
def test_arg_wins(self, monkeypatch):
monkeypatch.setenv("EVOLUTION_FEATURE_BACKLOG_CAP", "10")
assert gate.resolve_cap(30) == 30

def test_env_used_when_no_arg(self, monkeypatch):
monkeypatch.setenv("EVOLUTION_FEATURE_BACKLOG_CAP", "10")
assert gate.resolve_cap(None) == 10

def test_default_when_nothing(self, monkeypatch):
monkeypatch.delenv("EVOLUTION_FEATURE_BACKLOG_CAP", raising=False)
assert gate.resolve_cap(None) == gate.DEFAULT_CAP

def test_bad_env_falls_back(self, monkeypatch):
monkeypatch.setenv("EVOLUTION_FEATURE_BACKLOG_CAP", "notanint")
assert gate.resolve_cap(None) == gate.DEFAULT_CAP


class TestEvaluate:
def _runner(self, issues, rc=0):
def run(cmd):
return rc, json.dumps(issues)
return run

def test_throttles_when_over_cap(self):
issues = [_issue(f"[FEATURE] {i}", ["proposal"]) for i in range(30)]
r = gate.evaluate(25, runner=self._runner(issues))
assert r["throttle"] is True and r["open_features"] == 30

def test_ok_when_under_cap(self):
issues = [_issue(f"[FEATURE] {i}", ["proposal"]) for i in range(5)]
r = gate.evaluate(25, runner=self._runner(issues))
assert r["throttle"] is False and r["open_features"] == 5

def test_bugs_do_not_count_toward_cap(self):
issues = [_issue(f"[FIX] bug {i}") for i in range(40)] + [
_issue("[FEATURE] one", ["proposal"])
]
r = gate.evaluate(25, runner=self._runner(issues))
# 40 bugs + 1 feature → only 1 feature → not throttled
assert r["open_features"] == 1 and r["throttle"] is False

def test_fails_open_when_gh_errors(self):
def run(cmd):
return 1, "error: not authenticated"
r = gate.evaluate(25, runner=run)
assert r["throttle"] is False # never block on a failed count

def test_fails_open_on_garbage(self):
def run(cmd):
return 0, "not json"
r = gate.evaluate(25, runner=run)
assert r["throttle"] is False


class TestCLI:
def test_exit_1_when_throttled(self, capsys, monkeypatch):
issues = [_issue(f"[FEATURE] {i}", ["proposal"]) for i in range(30)]
monkeypatch.setattr(gate, "_default_runner", lambda cmd: (0, json.dumps(issues)))
rc = gate.main(["check", "--cap", "25"])
out = json.loads(capsys.readouterr().out)
assert rc == 1 and out["throttle"] is True

def test_exit_0_when_ok(self, capsys, monkeypatch):
issues = [_issue("[FEATURE] one", ["proposal"])]
monkeypatch.setattr(gate, "_default_runner", lambda cmd: (0, json.dumps(issues)))
rc = gate.main(["check", "--cap", "25"])
out = json.loads(capsys.readouterr().out)
assert rc == 0 and out["throttle"] is False
Loading