feat: add board cache to eliminate redundant fetches in watch loops

GiggleLiu · claude · GiggleLiu · commit 73ece9a9d52d · 2026-03-16T09:50:07.000+08:00
Add --board-cache flag to pipeline_board.py CLI that caches board data
to a file for up to 120 seconds. watch_and_dispatch now shares a single
board fetch between request_copilot_reviews and poll_project_items
within each loop iteration, eliminating a redundant API call.

Also fix batch_issue_fetcher to not fire when tests inject a custom
issue_fetcher (avoids hitting the real API in tests).

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/scripts/make_helpers.sh b/scripts/make_helpers.sh
@@ -61,13 +61,14 @@ run_agent() {
 # --- Project board ---
 
 # Detect the next eligible item and preserve retryable state in a queue.
-#   poll_project_items <mode> <state-file> [repo] [number] [format]
+#   poll_project_items <mode> <state-file> [repo] [number] [format] [board-cache]
 poll_project_items() {
     mode=$1
     state_file=$2
     repo=${3-}
     number=${4-}
     fmt=${5-text}
+    board_cache=${6-}
 
     set -- scripts/pipeline_board.py next "$mode" "$state_file" --format "$fmt"
     if [ -n "$repo" ]; then
@@ -76,6 +77,9 @@ poll_project_items() {
     if [ -n "$number" ]; then
         set -- "$@" --number "$number"
     fi
+    if [ -n "$board_cache" ]; then
+        set -- "$@" --board-cache "$board_cache"
+    fi
     python3 "$@"
 }
 
@@ -205,10 +209,15 @@ cleanup_pipeline_worktree() {
 }
 
 # Request Copilot review on all Review pool PRs that don't have one yet.
-#   request_copilot_reviews <repo>
+#   request_copilot_reviews <repo> [board-cache]
 request_copilot_reviews() {
     repo=$1
-    prs=$(python3 scripts/pipeline_board.py list review --repo "$repo" --format json \
+    board_cache=${2-}
+    cache_args=""
+    if [ -n "$board_cache" ]; then
+        cache_args="--board-cache $board_cache"
+    fi
+    prs=$(python3 scripts/pipeline_board.py list review --repo "$repo" --format json $cache_args \
         | python3 -c "
 import sys, json
 data = json.load(sys.stdin)
@@ -235,16 +244,20 @@ watch_and_dispatch() {
     interval=${POLL_INTERVAL:-600}
 
     state_file=$(mktemp /tmp/problemreductions-${mode}-state.XXXXXX)
-    trap 'rm -f "$state_file"' EXIT INT TERM
+    board_cache="/tmp/problemreductions-${mode}-board-cache.json"
+    trap 'rm -f "$state_file" "$board_cache"' EXIT INT TERM
 
     echo "Watching for new ${label} (polling every $((interval / 60))m)..."
     while true; do
+        # Invalidate board cache at the start of each iteration
+        rm -f "$board_cache"
+
         # For review mode, request Copilot reviews on PRs that don't have one yet
         if [ "$mode" = "review" ] && [ -n "$repo" ]; then
-            request_copilot_reviews "$repo"
+            request_copilot_reviews "$repo" "$board_cache"
         fi
 
-        next_item=$(poll_project_items "$mode" "$state_file" "$repo")
+        next_item=$(poll_project_items "$mode" "$state_file" "$repo" "" text "$board_cache")
         status=$?
         if [ "$status" -eq 0 ]; then
             item_id=$(printf '%s\n' "$next_item" | cut -f1)
diff --git a/scripts/pipeline_board.py b/scripts/pipeline_board.py
@@ -7,6 +7,7 @@
 import json
 import subprocess
 import sys
+import time
 from collections import Counter
 from datetime import datetime, timezone
 from pathlib import Path
@@ -70,8 +71,29 @@ def run_gh(*args: str) -> str:
     return subprocess.check_output(["gh", *args], text=True)
 
 
-def fetch_board_items(owner: str, project_number: int, limit: int) -> dict:
-    return json.loads(
+def fetch_board_items(
+    owner: str,
+    project_number: int,
+    limit: int,
+    *,
+    cache_file: Path | None = None,
+    cache_max_age: float = 120,
+) -> dict:
+    """Fetch project board items, optionally using a file cache.
+
+    When *cache_file* is set and the file exists and is younger than
+    *cache_max_age* seconds, the cached JSON is returned without an API call.
+    Otherwise the board is fetched from GitHub and written to the cache file.
+    """
+    if cache_file is not None:
+        try:
+            age = time.time() - cache_file.stat().st_mtime
+            if age < cache_max_age:
+                return json.loads(cache_file.read_text())
+        except (FileNotFoundError, json.JSONDecodeError):
+            pass
+
+    data = json.loads(
         run_gh(
             "project",
             "item-list",
@@ -85,6 +107,12 @@ def fetch_board_items(owner: str, project_number: int, limit: int) -> dict:
         )
     )
 
+    if cache_file is not None:
+        cache_file.parent.mkdir(parents=True, exist_ok=True)
+        cache_file.write_text(json.dumps(data))
+
+    return data
+
 
 def fetch_pr_reviews(repo: str, pr_number: int) -> list[dict]:
     data = json.loads(run_gh("api", f"repos/{repo}/pulls/{pr_number}/reviews"))
@@ -1130,6 +1158,7 @@ def parse_args(argv: list[str]) -> argparse.Namespace:
     next_parser.add_argument("--limit", type=int, default=500)
     next_parser.add_argument("--number", type=int)
     next_parser.add_argument("--format", choices=["text", "json"], default="text")
+    next_parser.add_argument("--board-cache", type=Path, default=None)
 
     claim_parser = subparsers.add_parser("claim-next")
     claim_parser.add_argument("mode", choices=["ready", "review"])
@@ -1142,6 +1171,7 @@ def parse_args(argv: list[str]) -> argparse.Namespace:
     claim_parser.add_argument("--format", choices=["text", "json"], default="json")
     claim_parser.add_argument("--project-id", default=PROJECT_ID)
     claim_parser.add_argument("--field-id", default=STATUS_FIELD_ID)
+    claim_parser.add_argument("--board-cache", type=Path, default=None)
 
     ack_parser = subparsers.add_parser("ack")
     ack_parser.add_argument("state_file", type=Path)
@@ -1154,6 +1184,7 @@ def parse_args(argv: list[str]) -> argparse.Namespace:
     list_parser.add_argument("--project-number", type=int, default=8)
     list_parser.add_argument("--limit", type=int, default=500)
     list_parser.add_argument("--format", choices=["text", "json"], default="text")
+    list_parser.add_argument("--board-cache", type=Path, default=None)
 
     move_parser = subparsers.add_parser("move")
     move_parser.add_argument("item_id")
@@ -1183,7 +1214,7 @@ def main(argv: list[str] | None = None) -> int:
     if args.command == "claim-next":
         if args.mode == "review" and not args.repo:
             raise SystemExit("--repo is required in claim-next review mode")
-        board_data = fetch_board_items(args.owner, args.project_number, args.limit)
+        board_data = fetch_board_items(args.owner, args.project_number, args.limit, cache_file=args.board_cache)
         claim_result = claim_next_entry(
             args.mode,
             board_data,
@@ -1205,7 +1236,7 @@ def main(argv: list[str] | None = None) -> int:
     if args.command == "list":
         if args.mode == "review" and not args.repo:
             raise SystemExit("--repo is required in list review mode")
-        board_data = fetch_board_items(args.owner, args.project_number, args.limit)
+        board_data = fetch_board_items(args.owner, args.project_number, args.limit, cache_file=args.board_cache)
         if args.mode == "ready":
             items = status_items(board_data, STATUS_READY)
             return print_candidate_list(args.mode, items, fmt=args.format)
@@ -1234,7 +1265,7 @@ def main(argv: list[str] | None = None) -> int:
     if args.mode in {"review", "final-review"} and not args.repo:
         raise SystemExit(f"--repo is required in {args.mode} mode")
 
-    board_data = fetch_board_items(args.owner, args.project_number, args.limit)
+    board_data = fetch_board_items(args.owner, args.project_number, args.limit, cache_file=args.board_cache)
     next_item = select_next_entry(
         args.mode,
         board_data,
diff --git a/scripts/pipeline_skill_context.py b/scripts/pipeline_skill_context.py
@@ -1001,8 +1001,11 @@ def build_project_pipeline_context(
     existing_problem_finder: Callable[[Path], set[str]] | None = None,
 ) -> dict:
     board_fetcher = board_fetcher or fetch_project_board_data
+    _custom_issue_fetcher = issue_fetcher is not None
     issue_fetcher = issue_fetcher or pipeline_checks.fetch_issue
-    batch_issue_fetcher = batch_issue_fetcher or pipeline_board.batch_fetch_issues
+    # Only use batch fetcher when no custom per-item fetcher was injected (e.g. tests)
+    if batch_issue_fetcher is None and not _custom_issue_fetcher:
+        batch_issue_fetcher = pipeline_board.batch_fetch_issues
     existing_problem_finder = existing_problem_finder or scan_existing_problems
 
     board_data = board_fetcher(repo)
@@ -1022,14 +1025,17 @@ def build_project_pipeline_context(
         key=lambda pair: pair[1]["issue_number"],
     )
 
-    # Batch-fetch all issue data in one API call
-    all_issue_numbers = [int(entry["issue_number"]) for _, entry in ready_entries_items]
-    issues_cache = batch_issue_fetcher(repo, all_issue_numbers)
+    # Batch-fetch all issue data in one API call when batch fetcher is available
+    if batch_issue_fetcher is not None:
+        all_issue_numbers = [int(entry["issue_number"]) for _, entry in ready_entries_items]
+        issues_cache = batch_issue_fetcher(repo, all_issue_numbers)
 
-    def _fetch_one(repo: str, n: int) -> dict:
-        if n in issues_cache:
-            return issues_cache[n]
-        return issue_fetcher(repo, n)
+        def _fetch_one(repo: str, n: int) -> dict:
+            if n in issues_cache:
+                return issues_cache[n]
+            return issue_fetcher(repo, n)
+    else:
+        _fetch_one = issue_fetcher
 
     ready_issues = [
         classify_project_issue(