Skip to content

Commit a9b36f2

Browse files
committed
feat: add MCP ablation taskpack experiment config
1 parent 35dd557 commit a9b36f2

File tree

2 files changed

+413
-0
lines changed

2 files changed

+413
-0
lines changed
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
{
2+
"description": "MCP ablation/preamble experiment pack (v1), balanced across regressions, improvements, and large-repo anchors",
3+
"created": "2026-02-17",
4+
"total_tasks": 13,
5+
"intended_category": "experimental",
6+
"notes": [
7+
"Use paired baseline + sourcegraph_full on same task set",
8+
"Do not publish to official until verifier integrity and pairing checks pass",
9+
"Task pack emphasizes new benchmark suites with observed MCP divergence"
10+
],
11+
"tasks": [
12+
{
13+
"task_id": "navprove-qb-url-001",
14+
"benchmark": "ccb_navprove",
15+
"sdlc_phase": "Debugging",
16+
"language": "python",
17+
"difficulty": "hard",
18+
"category": "navigation_verified",
19+
"repo": "qutebrowser/qutebrowser",
20+
"mcp_benefit_score": 0.83,
21+
"mcp_breakdown": {
22+
"context_complexity": 0.85,
23+
"cross_file_deps": 0.8,
24+
"semantic_search_potential": 0.9,
25+
"task_category_weight": 0.8
26+
},
27+
"selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
28+
"task_dir": "ccb_navprove/navprove-qb-url-001",
29+
"experiment_role": "mcp_regression_flip"
30+
},
31+
{
32+
"task_id": "navprove-qb-bookmark-001",
33+
"benchmark": "ccb_navprove",
34+
"sdlc_phase": "Debugging",
35+
"language": "python",
36+
"difficulty": "hard",
37+
"category": "navigation_verified",
38+
"repo": "qutebrowser/qutebrowser",
39+
"mcp_benefit_score": 0.83,
40+
"mcp_breakdown": {
41+
"context_complexity": 0.85,
42+
"cross_file_deps": 0.8,
43+
"semantic_search_potential": 0.9,
44+
"task_category_weight": 0.8
45+
},
46+
"selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
47+
"task_dir": "ccb_navprove/navprove-qb-bookmark-001",
48+
"experiment_role": "mcp_regression_flip"
49+
},
50+
{
51+
"task_id": "onboard-handoff-002",
52+
"benchmark": "ccb_onboarding",
53+
"sdlc_phase": "Requirements & Discovery",
54+
"language": "go",
55+
"difficulty": "hard",
56+
"category": "team_handoff",
57+
"repo": "hashicorp/terraform",
58+
"task_dir": "ccb_onboarding/onboard-handoff-002",
59+
"selection_rationale": "Team handoff for IaC provider framework",
60+
"mcp_benefit_score": 0.86,
61+
"mcp_breakdown": {
62+
"context_complexity": 0.9,
63+
"cross_file_deps": 0.85,
64+
"semantic_search_potential": 0.9,
65+
"task_category_weight": 0.8
66+
},
67+
"experiment_role": "mcp_regression_partial"
68+
},
69+
{
70+
"task_id": "onboard-workflow-002",
71+
"benchmark": "ccb_onboarding",
72+
"sdlc_phase": "Requirements & Discovery",
73+
"language": "java",
74+
"difficulty": "hard",
75+
"category": "workflow_discovery",
76+
"repo": "apache/kafka",
77+
"task_dir": "ccb_onboarding/onboard-workflow-002",
78+
"selection_rationale": "Developer workflow discovery in Gradle-based Java project",
79+
"mcp_benefit_score": 0.77,
80+
"mcp_breakdown": {
81+
"context_complexity": 0.8,
82+
"cross_file_deps": 0.7,
83+
"semantic_search_potential": 0.85,
84+
"task_category_weight": 0.75
85+
},
86+
"experiment_role": "mcp_regression_partial"
87+
},
88+
{
89+
"task_id": "onboard-orient-001",
90+
"benchmark": "ccb_onboarding",
91+
"sdlc_phase": "Requirements & Discovery",
92+
"language": "go",
93+
"difficulty": "hard",
94+
"category": "codebase_orientation",
95+
"repo": "cilium/cilium",
96+
"task_dir": "ccb_onboarding/onboard-orient-001",
97+
"selection_rationale": "Codebase orientation in large networking project",
98+
"mcp_benefit_score": 0.83,
99+
"mcp_breakdown": {
100+
"context_complexity": 0.85,
101+
"cross_file_deps": 0.8,
102+
"semantic_search_potential": 0.9,
103+
"task_category_weight": 0.8
104+
},
105+
"experiment_role": "mcp_improvement_partial"
106+
},
107+
{
108+
"task_id": "docgen-arch-003",
109+
"benchmark": "ccb_docgen",
110+
"sdlc_phase": "Documentation",
111+
"language": "go",
112+
"difficulty": "hard",
113+
"category": "architecture_doc",
114+
"repo": "hashicorp/terraform",
115+
"task_dir": "ccb_docgen/docgen-arch-003",
116+
"selection_rationale": "Architecture documentation for IaC plan/apply pipeline",
117+
"mcp_benefit_score": 0.87,
118+
"mcp_breakdown": {
119+
"context_complexity": 0.9,
120+
"cross_file_deps": 0.85,
121+
"semantic_search_potential": 0.9,
122+
"task_category_weight": 0.85
123+
},
124+
"experiment_role": "mcp_regression_partial"
125+
},
126+
{
127+
"task_id": "docgen-api-003",
128+
"benchmark": "ccb_docgen",
129+
"sdlc_phase": "Documentation",
130+
"language": "java",
131+
"difficulty": "hard",
132+
"category": "api_reference",
133+
"repo": "apache/kafka",
134+
"task_dir": "ccb_docgen/docgen-api-003",
135+
"selection_rationale": "API reference generation for distributed streaming platform",
136+
"mcp_benefit_score": 0.85,
137+
"mcp_breakdown": {
138+
"context_complexity": 0.85,
139+
"cross_file_deps": 0.8,
140+
"semantic_search_potential": 0.9,
141+
"task_category_weight": 0.85
142+
},
143+
"experiment_role": "mcp_improvement_partial"
144+
},
145+
{
146+
"task_id": "nlqa-debug-002",
147+
"benchmark": "ccb_nlqa",
148+
"sdlc_phase": "Debugging",
149+
"language": "go",
150+
"difficulty": "hard",
151+
"category": "debug_root_cause",
152+
"repo": "cilium/cilium",
153+
"task_dir": "ccb_nlqa/nlqa-debug-002",
154+
"selection_rationale": "Debug root cause analysis in eBPF networking codebase",
155+
"mcp_benefit_score": 0.79,
156+
"mcp_breakdown": {
157+
"context_complexity": 0.8,
158+
"cross_file_deps": 0.75,
159+
"semantic_search_potential": 0.85,
160+
"task_category_weight": 0.8
161+
},
162+
"experiment_role": "mcp_regression_partial"
163+
},
164+
{
165+
"task_id": "nlqa-debug-001",
166+
"benchmark": "ccb_nlqa",
167+
"sdlc_phase": "Debugging",
168+
"language": "typescript",
169+
"difficulty": "hard",
170+
"category": "debug_root_cause",
171+
"repo": "microsoft/vscode",
172+
"task_dir": "ccb_nlqa/nlqa-debug-001",
173+
"selection_rationale": "Debug root cause analysis in large TypeScript codebase",
174+
"mcp_benefit_score": 0.79,
175+
"mcp_breakdown": {
176+
"context_complexity": 0.8,
177+
"cross_file_deps": 0.75,
178+
"semantic_search_potential": 0.85,
179+
"task_category_weight": 0.8
180+
},
181+
"experiment_role": "mcp_improvement_partial"
182+
},
183+
{
184+
"task_id": "sec-cve-002",
185+
"benchmark": "ccb_security",
186+
"sdlc_phase": "Requirements & Discovery",
187+
"language": "cpp",
188+
"difficulty": "hard",
189+
"category": "cve_triage",
190+
"repo": "envoyproxy/envoy",
191+
"mcp_benefit_score": 0.88,
192+
"mcp_breakdown": {
193+
"context_complexity": 0.95,
194+
"cross_file_deps": 0.85,
195+
"semantic_search_potential": 0.85,
196+
"task_category_weight": 0.9
197+
},
198+
"selection_rationale": "CVE triage in large C++ proxy codebase requires navigating HTTP/2 connection management across many source files",
199+
"task_dir": "ccb_security/sec-cve-002",
200+
"experiment_role": "mcp_regression_partial"
201+
},
202+
{
203+
"task_id": "sec-reach-002",
204+
"benchmark": "ccb_security",
205+
"sdlc_phase": "Requirements & Discovery",
206+
"language": "cpp",
207+
"difficulty": "hard",
208+
"category": "reachability",
209+
"repo": "envoyproxy/envoy",
210+
"mcp_benefit_score": 0.88,
211+
"mcp_breakdown": {
212+
"context_complexity": 0.85,
213+
"cross_file_deps": 0.88,
214+
"semantic_search_potential": 0.9,
215+
"task_category_weight": 0.9
216+
},
217+
"selection_rationale": "UNREACHABLE vulnerability calibration task - agent must identify admin-only endpoint not exposed to external traffic. Requires understanding Envoy's dual-interface architecture (admin vs data plane).",
218+
"task_dir": "ccb_security/sec-reach-002",
219+
"experiment_role": "mcp_improvement_partial"
220+
},
221+
{
222+
"task_id": "big-code-k8s-001",
223+
"benchmark": "ccb_largerepo",
224+
"sdlc_phase": "Implementation (feature)",
225+
"language": "go",
226+
"difficulty": "hard",
227+
"category": "big_code_feature",
228+
"repo": "kubernetes/kubernetes",
229+
"mcp_benefit_score": 0.895,
230+
"mcp_breakdown": {
231+
"context_complexity": 0.95,
232+
"cross_file_deps": 0.8,
233+
"semantic_search_potential": 0.9,
234+
"task_category_weight": 0.95
235+
},
236+
"selection_rationale": "All ccb_largerepo tasks selected (small benchmark)",
237+
"task_dir": "ccb_largerepo/big-code-k8s-001",
238+
"experiment_role": "large_repo_anchor"
239+
},
240+
{
241+
"task_id": "big-code-vsc-001",
242+
"benchmark": "ccb_largerepo",
243+
"sdlc_phase": "Implementation (feature)",
244+
"language": "typescript",
245+
"difficulty": "hard",
246+
"category": "big_code_feature",
247+
"repo": "microsoft/vscode",
248+
"mcp_benefit_score": 0.895,
249+
"mcp_breakdown": {
250+
"context_complexity": 0.95,
251+
"cross_file_deps": 0.8,
252+
"semantic_search_potential": 0.9,
253+
"task_category_weight": 0.95
254+
},
255+
"selection_rationale": "All ccb_largerepo tasks selected (small benchmark)",
256+
"task_dir": "ccb_largerepo/big-code-vsc-001",
257+
"experiment_role": "large_repo_anchor"
258+
}
259+
]
260+
}

0 commit comments

Comments
 (0)