Skip to content

Commit a86c8b6

Browse files
chore: cleanup dead code, add 9 EP benchmark config
- Remove unused _build_improvement_prompt, improve_tool, run_feedback_loop - Fix mislabeled "missed files" line in improvement prompt - Remove unused imports - Add config.yaml with 9 EPs across 4 operators - EP #1964 excluded (openshift/must-gather is Shell, not Go) Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 03406df commit a86c8b6

4 files changed

Lines changed: 79 additions & 315 deletions

File tree

benchmark/benchmark.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,14 @@ async def cmd_measure(args: argparse.Namespace) -> None:
161161
results: list[BenchmarkResult] = []
162162
for i, case in enumerate(config.cases, 1):
163163
logger.info("\n>>> EP %d of %d <<<", i, len(config.cases))
164-
result = await run_single_ep(case, config, output_dir, "measure", force=args.force)
165-
if result:
166-
results.append(result)
164+
try:
165+
result = await run_single_ep(case, config, output_dir, "measure", force=args.force)
166+
if result:
167+
results.append(result)
168+
except Exception:
169+
ep_num = _ep_num(case.ep_url)
170+
logger.error("EP #%s failed, skipping: %s", ep_num, __import__("traceback").format_exc())
171+
continue
167172

168173
if results:
169174
logger.info("\n" + "=" * 60)
@@ -250,9 +255,14 @@ async def cmd_verify(args: argparse.Namespace) -> None:
250255
results: list[BenchmarkResult] = []
251256
for i, case in enumerate(config.cases, 1):
252257
logger.info("\n>>> EP %d of %d <<<", i, len(config.cases))
253-
result = await run_single_ep(case, config, output_dir, "verify", force=args.force)
254-
if result:
255-
results.append(result)
258+
try:
259+
result = await run_single_ep(case, config, output_dir, "verify", force=args.force)
260+
if result:
261+
results.append(result)
262+
except Exception:
263+
ep_num = _ep_num(case.ep_url)
264+
logger.error("EP #%s failed, skipping: %s", ep_num, __import__("traceback").format_exc())
265+
continue
256266

257267
if results:
258268
logger.info("\n" + "=" * 60)

benchmark/config.yaml

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,52 @@
11
benchmark_cases:
2+
- ep_url: "https://github.com/openshift/enhancements/pull/1863"
3+
repo_url: "https://github.com/openshift/zero-trust-workload-identity-manager"
4+
description: "SPIRE federation support"
5+
implementation_prs: [68, 82]
6+
27
- ep_url: "https://github.com/openshift/enhancements/pull/1834"
38
repo_url: "https://github.com/openshift/external-secrets-operator"
4-
description: "External Secrets Operator enhancement"
9+
description: "Network Policy for ESO"
510
implementation_prs: [67, 74]
611

12+
- ep_url: "https://github.com/openshift/enhancements/pull/1898"
13+
repo_url: "https://github.com/openshift/external-secrets-operator"
14+
description: "ESO install-time customizations"
15+
implementation_prs: [91, 94, 97, 106, 111]
16+
17+
- ep_url: "https://github.com/openshift/enhancements/pull/1914"
18+
repo_url: "https://github.com/openshift/cert-manager-operator"
19+
description: "TrustManager support in cert-manager"
20+
implementation_prs: [362, 371, 379]
21+
22+
- ep_url: "https://github.com/openshift/enhancements/pull/1923"
23+
repo_url: "https://github.com/openshift/must-gather-operator"
24+
description: "since/sinceTime fields in MustGather CR"
25+
implementation_prs: [323]
26+
27+
- ep_url: "https://github.com/openshift/enhancements/pull/1903"
28+
repo_url: "https://github.com/openshift/must-gather-operator"
29+
description: "Remove proxy config from MustGather CR"
30+
implementation_prs: [313]
31+
32+
- ep_url: "https://github.com/openshift/enhancements/pull/1824"
33+
repo_url: "https://github.com/openshift/zero-trust-workload-identity-manager"
34+
description: "OIDC discovery provider route"
35+
implementation_prs: [33]
36+
37+
- ep_url: "https://github.com/openshift/enhancements/pull/1906"
38+
repo_url: "https://github.com/openshift/must-gather-operator"
39+
description: "Custom image option in MustGather spec"
40+
implementation_prs: [322]
41+
42+
- ep_url: "https://github.com/openshift/enhancements/pull/1839"
43+
repo_url: "https://github.com/openshift/must-gather-operator"
44+
description: "PV to persist must-gather data"
45+
implementation_prs: [294]
46+
47+
# EP #1964 excluded: openshift/must-gather is Shell-based, not a Go operator
48+
749
settings:
8-
tools_to_benchmark:
9-
- api-generate
10-
- api-implement
11-
iterations: 3
12-
output_dir: "benchmark/results"
13-
parallel: false
1450
model: "claude-opus-4-6"
1551
effort: "max"
52+
output_dir: "benchmark/results"

benchmark/isolate.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,20 @@
1515
logger = logging.getLogger(__name__)
1616

1717

18-
def _run(cmd: list[str], **kwargs) -> subprocess.CompletedProcess:
18+
def _run(cmd: list[str], retries: int = 3, **kwargs) -> subprocess.CompletedProcess:
1919
logger.debug("Running: %s", " ".join(cmd))
20-
return subprocess.run(cmd, capture_output=True, text=True, check=True, **kwargs)
20+
for attempt in range(retries):
21+
result = subprocess.run(cmd, capture_output=True, text=True, **kwargs)
22+
if result.returncode == 0:
23+
return result
24+
if attempt < retries - 1:
25+
import time
26+
wait = 5 * (attempt + 1)
27+
logger.warning("Command failed (attempt %d/%d), retrying in %ds: %s",
28+
attempt + 1, retries, wait, result.stderr[:200])
29+
time.sleep(wait)
30+
result.check_returncode()
31+
return result
2132

2233

2334
def _gh_json(args: list[str]) -> dict | list:

0 commit comments

Comments
 (0)