Skip to content

Commit c5b29a9

Browse files
sjarmakclaude
andcommitted
fix: add SDLC/MCP-unique suite prefixes to promote_run.py
DIR_PREFIX_TO_SUITE was missing all current suite prefixes (ccb_debug_, ccb_feature_, ccb_fix_, ccb_test_, ccb_mcp_*, etc.), causing "could not infer benchmark" errors that blocked promotion of valid runs. Also adds selection-file fallback lookup for task IDs, and checks longest prefixes first to avoid ccb_mcp_crossrepo_ matching before ccb_mcp_crossrepo_tracing_. 23 staging batches promoted. Gap analysis re-run: 27 tasks remain (was 26 — ansible-abc-imports-fix-001 uncovered after promotion). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1ad9c96 commit c5b29a9

File tree

9 files changed

+165
-42
lines changed

9 files changed

+165
-42
lines changed

configs/variance_reruns/variance_gap_all_sdlc.json

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
{
22
"metadata": {
3-
"title": "Variance all gaps: 26 tasks (8 need 2 passes, 18 need 1)",
4-
"description": "Targeted variance rerun. Generated by variance_gap_analysis.py.",
3+
"title": "Variance rerun: ALL SDLC gap tasks (27 tasks)",
4+
"description": "Combined rerun config for all SDLC tasks with < 3 paired passes.",
55
"generated_date": "2026-03-01",
6-
"total_tasks": 26,
7-
"note": "Run this config ONCE. Then run wave1 config once more to complete the 8 tasks that need 2 total additional passes. Total: 26*2 + 8*2 = 68 sandbox runs."
6+
"total_tasks": 27
87
},
98
"methodology": {
109
"sdlc_suites": [
@@ -15,11 +14,11 @@
1514
]
1615
},
1716
"statistics": {
18-
"total_tasks": 26,
17+
"total_tasks": 27,
1918
"per_suite": {
2019
"ccb_debug": 2,
2120
"ccb_feature": 4,
22-
"ccb_fix": 5,
21+
"ccb_fix": 6,
2322
"ccb_test": 15
2423
}
2524
},
@@ -112,6 +111,20 @@
112111
"repo": "microsoft/vscode",
113112
"mcp_benefit_score": 0.87
114113
},
114+
{
115+
"task_id": "ansible-abc-imports-fix-001",
116+
"benchmark": "ccb_fix",
117+
"task_dir": "ccb_fix/ansible-abc-imports-fix-001",
118+
"language": "python",
119+
"difficulty": "medium",
120+
"current_bl_runs": 3,
121+
"current_mcp_runs": 2,
122+
"current_paired": 2,
123+
"runs_needed": 1,
124+
"sdlc_phase": "fix",
125+
"repo": "ansible/ansible",
126+
"mcp_benefit_score": 0.75
127+
},
115128
{
116129
"task_id": "flink-window-late-data-fix-001",
117130
"benchmark": "ccb_fix",
@@ -134,7 +147,7 @@
134147
"language": "javascript",
135148
"difficulty": "medium",
136149
"current_bl_runs": 2,
137-
"current_mcp_runs": 4,
150+
"current_mcp_runs": 3,
138151
"current_paired": 2,
139152
"runs_needed": 1,
140153
"sdlc_phase": "fix",

configs/variance_reruns/variance_gap_ccb_fix.json

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
{
22
"metadata": {
3-
"title": "Variance rerun: ccb_fix gap tasks (5 tasks, target 3 pairs)",
3+
"title": "Variance rerun: ccb_fix gap tasks (6 tasks, target 3 pairs)",
44
"description": "Targeted rerun for ccb_fix tasks with < 3 paired passes. Generated by variance_gap_analysis.py.",
55
"generated_date": "2026-03-01",
6-
"total_tasks": 5,
6+
"total_tasks": 6,
77
"max_concurrency_needed": 1,
88
"note": "Run with --concurrency 1 to fill all gaps in one batch. Or run with --concurrency 1 multiple times."
99
},
@@ -13,12 +13,26 @@
1313
]
1414
},
1515
"statistics": {
16-
"total_tasks": 5,
16+
"total_tasks": 6,
1717
"per_suite": {
18-
"ccb_fix": 5
18+
"ccb_fix": 6
1919
}
2020
},
2121
"tasks": [
22+
{
23+
"task_id": "ansible-abc-imports-fix-001",
24+
"benchmark": "ccb_fix",
25+
"task_dir": "ccb_fix/ansible-abc-imports-fix-001",
26+
"language": "python",
27+
"difficulty": "medium",
28+
"current_bl_runs": 3,
29+
"current_mcp_runs": 2,
30+
"current_paired": 2,
31+
"runs_needed": 1,
32+
"sdlc_phase": "fix",
33+
"repo": "ansible/ansible",
34+
"mcp_benefit_score": 0.75
35+
},
2236
{
2337
"task_id": "flink-window-late-data-fix-001",
2438
"benchmark": "ccb_fix",
@@ -41,7 +55,7 @@
4155
"language": "javascript",
4256
"difficulty": "medium",
4357
"current_bl_runs": 2,
44-
"current_mcp_runs": 4,
58+
"current_mcp_runs": 3,
4559
"current_paired": 2,
4660
"runs_needed": 1,
4761
"sdlc_phase": "fix",

configs/variance_reruns/variance_gap_daytona.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"metadata": {
3-
"title": "Variance all gaps (Daytona): 22 tasks",
3+
"title": "Variance gap: 22 Daytona-compatible tasks",
44
"description": "Targeted variance rerun. Generated by variance_gap_analysis.py.",
55
"generated_date": "2026-03-01",
66
"total_tasks": 22,
7-
"note": "Run ONCE, then run wave1_daytona.json once more. 6 need 2 passes, 16 need 1. Total: 56 sandbox runs."
7+
"note": "Run ONCE for all tasks. Then run wave1_daytona.json once more for tasks needing 2 passes."
88
},
99
"methodology": {
1010
"sdlc_suites": [
@@ -353,4 +353,4 @@
353353
"mcp_benefit_score": 0.72
354354
}
355355
]
356-
}
356+
}

configs/variance_reruns/variance_gap_local.json

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"metadata": {
3-
"title": "Variance gaps (local Docker): 4 sweap-images tasks",
3+
"title": "Variance gap: 5 sweap-images tasks (local Docker only)",
44
"description": "Targeted variance rerun. Generated by variance_gap_analysis.py.",
55
"generated_date": "2026-03-01",
6-
"total_tasks": 4,
7-
"note": "Run with LOCAL Docker (no Daytona). These tasks use jefzda/sweap-images. Run TWICE for tasks needing 2 passes."
6+
"total_tasks": 5,
7+
"note": "Daytona-incompatible. Run on local Docker. Tasks needing 2 passes must be run twice."
88
},
99
"methodology": {
1010
"sdlc_suites": [
@@ -13,10 +13,10 @@
1313
]
1414
},
1515
"statistics": {
16-
"total_tasks": 4,
16+
"total_tasks": 5,
1717
"per_suite": {
1818
"ccb_debug": 2,
19-
"ccb_fix": 2
19+
"ccb_fix": 3
2020
}
2121
},
2222
"tasks": [
@@ -48,14 +48,28 @@
4848
"repo": "tutanota/tutanota",
4949
"mcp_benefit_score": 0.75
5050
},
51+
{
52+
"task_id": "ansible-abc-imports-fix-001",
53+
"benchmark": "ccb_fix",
54+
"task_dir": "ccb_fix/ansible-abc-imports-fix-001",
55+
"language": "python",
56+
"difficulty": "medium",
57+
"current_bl_runs": 3,
58+
"current_mcp_runs": 2,
59+
"current_paired": 2,
60+
"runs_needed": 1,
61+
"sdlc_phase": "fix",
62+
"repo": "ansible/ansible",
63+
"mcp_benefit_score": 0.75
64+
},
5165
{
5266
"task_id": "nodebb-plugin-validate-fix-001",
5367
"benchmark": "ccb_fix",
5468
"task_dir": "ccb_fix/nodebb-plugin-validate-fix-001",
5569
"language": "javascript",
5670
"difficulty": "medium",
5771
"current_bl_runs": 2,
58-
"current_mcp_runs": 4,
72+
"current_mcp_runs": 3,
5973
"current_paired": 2,
6074
"runs_needed": 1,
6175
"sdlc_phase": "fix",
@@ -78,4 +92,4 @@
7892
"mcp_benefit_score": 0.85
7993
}
8094
]
81-
}
95+
}

configs/variance_reruns/wave1_daytona.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"metadata": {
3-
"title": "Variance wave 1 (Daytona): 6 tasks needing 2+ passes",
3+
"title": "Wave 1 Daytona: 6 tasks needing 2+ passes",
44
"description": "Targeted variance rerun. Generated by variance_gap_analysis.py.",
55
"generated_date": "2026-03-01",
66
"total_tasks": 6,
7-
"note": "Run this config TWICE to fill gaps. Daytona-compatible only."
7+
"note": "Run this config TWICE on Daytona to fill the gap."
88
},
99
"methodology": {
1010
"sdlc_suites": [
@@ -111,4 +111,4 @@
111111
"mcp_benefit_score": 0.72
112112
}
113113
]
114-
}
114+
}

configs/variance_reruns/wave1_need2.json

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
{
22
"metadata": {
3-
"title": "Variance wave 1: tasks needing 2+ passes (8 tasks)",
3+
"title": "All tasks needing 2+ passes: 8 tasks",
44
"description": "Targeted variance rerun. Generated by variance_gap_analysis.py.",
55
"generated_date": "2026-03-01",
6-
"total_tasks": 8,
7-
"note": "Run this config TWICE (--concurrency 1, two separate invocations) to fill all gaps. These 8 tasks have only 1 paired pass each."
6+
"total_tasks": 8
87
},
98
"methodology": {
109
"sdlc_suites": [
@@ -141,4 +140,4 @@
141140
"mcp_benefit_score": 0.72
142141
}
143142
]
144-
}
143+
}

configs/variance_reruns/wave2_daytona.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"metadata": {
3-
"title": "Variance wave 2 (Daytona): 16 tasks needing 1 pass",
3+
"title": "Wave 2 Daytona: 16 tasks needing 1 pass",
44
"description": "Targeted variance rerun. Generated by variance_gap_analysis.py.",
55
"generated_date": "2026-03-01",
66
"total_tasks": 16,
7-
"note": "Run this config ONCE to fill gaps. Daytona-compatible only."
7+
"note": "Run this config ONCE on Daytona."
88
},
99
"methodology": {
1010
"sdlc_suites": [
@@ -263,4 +263,4 @@
263263
"mcp_benefit_score": 0.72
264264
}
265265
]
266-
}
266+
}

configs/variance_reruns/wave2_need1.json

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
{
22
"metadata": {
3-
"title": "Variance wave 2: tasks needing 1 pass (18 tasks)",
3+
"title": "All tasks needing 1 pass: 19 tasks",
44
"description": "Targeted variance rerun. Generated by variance_gap_analysis.py.",
55
"generated_date": "2026-03-01",
6-
"total_tasks": 18,
7-
"note": "Run this config ONCE to fill all gaps. These 18 tasks have 2 paired passes and need exactly 1 more."
6+
"total_tasks": 19
87
},
98
"methodology": {
109
"sdlc_suites": [
@@ -14,10 +13,10 @@
1413
]
1514
},
1615
"statistics": {
17-
"total_tasks": 18,
16+
"total_tasks": 19,
1817
"per_suite": {
1918
"ccb_feature": 1,
20-
"ccb_fix": 5,
19+
"ccb_fix": 6,
2120
"ccb_test": 12
2221
}
2322
},
@@ -37,6 +36,20 @@
3736
"repo": "kubernetes/kubernetes",
3837
"mcp_benefit_score": 0.88
3938
},
39+
{
40+
"task_id": "ansible-abc-imports-fix-001",
41+
"benchmark": "ccb_fix",
42+
"task_dir": "ccb_fix/ansible-abc-imports-fix-001",
43+
"language": "python",
44+
"difficulty": "medium",
45+
"current_bl_runs": 3,
46+
"current_mcp_runs": 2,
47+
"current_paired": 2,
48+
"runs_needed": 1,
49+
"sdlc_phase": "fix",
50+
"repo": "ansible/ansible",
51+
"mcp_benefit_score": 0.75
52+
},
4053
{
4154
"task_id": "flink-window-late-data-fix-001",
4255
"benchmark": "ccb_fix",
@@ -59,7 +72,7 @@
5972
"language": "javascript",
6073
"difficulty": "medium",
6174
"current_bl_runs": 2,
62-
"current_mcp_runs": 4,
75+
"current_mcp_runs": 3,
6376
"current_paired": 2,
6477
"runs_needed": 1,
6578
"sdlc_phase": "fix",
@@ -292,4 +305,4 @@
292305
"mcp_benefit_score": 0.72
293306
}
294307
]
295-
}
308+
}

0 commit comments

Comments
 (0)