sourcegraph
diff --git a/‎configs/experiments/mcp_ablation_taskpack_v1.json‎
Lines changed: 260 additions & 0 deletions b/‎configs/experiments/mcp_ablation_taskpack_v1.json‎
Lines changed: 260 additions & 0 deletions
@@ -0,0 +1,260 @@
+{
+  "description": "MCP ablation/preamble experiment pack (v1), balanced across regressions, improvements, and large-repo anchors",
+  "created": "2026-02-17",
+  "total_tasks": 13,
+  "intended_category": "experimental",
+  "notes": [
+    "Use paired baseline + sourcegraph_full on same task set",
+    "Do not publish to official until verifier integrity and pairing checks pass",
+    "Task pack emphasizes new benchmark suites with observed MCP divergence"
+  ],
+  "tasks": [
+    {
+      "task_id": "navprove-qb-url-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "python",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "qutebrowser/qutebrowser",
+      "mcp_benefit_score": 0.83,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.8,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.8
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-qb-url-001",
+      "experiment_role": "mcp_regression_flip"
+    },
+    {
+      "task_id": "navprove-qb-bookmark-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "python",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "qutebrowser/qutebrowser",
+      "mcp_benefit_score": 0.83,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.8,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.8
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-qb-bookmark-001",
+      "experiment_role": "mcp_regression_flip"
+    },
+    {
+      "task_id": "onboard-handoff-002",
+      "benchmark": "ccb_onboarding",
+      "sdlc_phase": "Requirements & Discovery",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "team_handoff",
+      "repo": "hashicorp/terraform",
+      "task_dir": "ccb_onboarding/onboard-handoff-002",
+      "selection_rationale": "Team handoff for IaC provider framework",
+      "mcp_benefit_score": 0.86,
+      "mcp_breakdown": {
+        "context_complexity": 0.9,
+        "cross_file_deps": 0.85,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.8
+      },
+      "experiment_role": "mcp_regression_partial"
+    },
+    {
+      "task_id": "onboard-workflow-002",
+      "benchmark": "ccb_onboarding",
+      "sdlc_phase": "Requirements & Discovery",
+      "language": "java",
+      "difficulty": "hard",
+      "category": "workflow_discovery",
+      "repo": "apache/kafka",
+      "task_dir": "ccb_onboarding/onboard-workflow-002",
+      "selection_rationale": "Developer workflow discovery in Gradle-based Java project",
+      "mcp_benefit_score": 0.77,
+      "mcp_breakdown": {
+        "context_complexity": 0.8,
+        "cross_file_deps": 0.7,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.75
+      },
+      "experiment_role": "mcp_regression_partial"
+    },
+    {
+      "task_id": "onboard-orient-001",
+      "benchmark": "ccb_onboarding",
+      "sdlc_phase": "Requirements & Discovery",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "codebase_orientation",
+      "repo": "cilium/cilium",
+      "task_dir": "ccb_onboarding/onboard-orient-001",
+      "selection_rationale": "Codebase orientation in large networking project",
+      "mcp_benefit_score": 0.83,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.8,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.8
+      },
+      "experiment_role": "mcp_improvement_partial"
+    },
+    {
+      "task_id": "docgen-arch-003",
+      "benchmark": "ccb_docgen",
+      "sdlc_phase": "Documentation",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "architecture_doc",
+      "repo": "hashicorp/terraform",
+      "task_dir": "ccb_docgen/docgen-arch-003",
+      "selection_rationale": "Architecture documentation for IaC plan/apply pipeline",
+      "mcp_benefit_score": 0.87,
+      "mcp_breakdown": {
+        "context_complexity": 0.9,
+        "cross_file_deps": 0.85,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.85
+      },
+      "experiment_role": "mcp_regression_partial"
+    },
+    {
+      "task_id": "docgen-api-003",
+      "benchmark": "ccb_docgen",
+      "sdlc_phase": "Documentation",
+      "language": "java",
+      "difficulty": "hard",
+      "category": "api_reference",
+      "repo": "apache/kafka",
+      "task_dir": "ccb_docgen/docgen-api-003",
+      "selection_rationale": "API reference generation for distributed streaming platform",
+      "mcp_benefit_score": 0.85,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.8,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.85
+      },
+      "experiment_role": "mcp_improvement_partial"
+    },
+    {
+      "task_id": "nlqa-debug-002",
+      "benchmark": "ccb_nlqa",
+      "sdlc_phase": "Debugging",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "debug_root_cause",
+      "repo": "cilium/cilium",
+      "task_dir": "ccb_nlqa/nlqa-debug-002",
+      "selection_rationale": "Debug root cause analysis in eBPF networking codebase",
+      "mcp_benefit_score": 0.79,
+      "mcp_breakdown": {
+        "context_complexity": 0.8,
+        "cross_file_deps": 0.75,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.8
+      },
+      "experiment_role": "mcp_regression_partial"
+    },
+    {
+      "task_id": "nlqa-debug-001",
+      "benchmark": "ccb_nlqa",
+      "sdlc_phase": "Debugging",
+      "language": "typescript",
+      "difficulty": "hard",
+      "category": "debug_root_cause",
+      "repo": "microsoft/vscode",
+      "task_dir": "ccb_nlqa/nlqa-debug-001",
+      "selection_rationale": "Debug root cause analysis in large TypeScript codebase",
+      "mcp_benefit_score": 0.79,
+      "mcp_breakdown": {
+        "context_complexity": 0.8,
+        "cross_file_deps": 0.75,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.8
+      },
+      "experiment_role": "mcp_improvement_partial"
+    },
+    {
+      "task_id": "sec-cve-002",
+      "benchmark": "ccb_security",
+      "sdlc_phase": "Requirements & Discovery",
+      "language": "cpp",
+      "difficulty": "hard",
+      "category": "cve_triage",
+      "repo": "envoyproxy/envoy",
+      "mcp_benefit_score": 0.88,
+      "mcp_breakdown": {
+        "context_complexity": 0.95,
+        "cross_file_deps": 0.85,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.9
+      },
+      "selection_rationale": "CVE triage in large C++ proxy codebase requires navigating HTTP/2 connection management across many source files",
+      "task_dir": "ccb_security/sec-cve-002",
+      "experiment_role": "mcp_regression_partial"
+    },
+    {
+      "task_id": "sec-reach-002",
+      "benchmark": "ccb_security",
+      "sdlc_phase": "Requirements & Discovery",
+      "language": "cpp",
+      "difficulty": "hard",
+      "category": "reachability",
+      "repo": "envoyproxy/envoy",
+      "mcp_benefit_score": 0.88,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.88,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.9
+      },
+      "selection_rationale": "UNREACHABLE vulnerability calibration task - agent must identify admin-only endpoint not exposed to external traffic. Requires understanding Envoy's dual-interface architecture (admin vs data plane).",
+      "task_dir": "ccb_security/sec-reach-002",
+      "experiment_role": "mcp_improvement_partial"
+    },
+    {
+      "task_id": "big-code-k8s-001",
+      "benchmark": "ccb_largerepo",
+      "sdlc_phase": "Implementation (feature)",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "big_code_feature",
+      "repo": "kubernetes/kubernetes",
+      "mcp_benefit_score": 0.895,
+      "mcp_breakdown": {
+        "context_complexity": 0.95,
+        "cross_file_deps": 0.8,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.95
+      },
+      "selection_rationale": "All ccb_largerepo tasks selected (small benchmark)",
+      "task_dir": "ccb_largerepo/big-code-k8s-001",
+      "experiment_role": "large_repo_anchor"
+    },
+    {
+      "task_id": "big-code-vsc-001",
+      "benchmark": "ccb_largerepo",
+      "sdlc_phase": "Implementation (feature)",
+      "language": "typescript",
+      "difficulty": "hard",
+      "category": "big_code_feature",
+      "repo": "microsoft/vscode",
+      "mcp_benefit_score": 0.895,
+      "mcp_breakdown": {
+        "context_complexity": 0.95,
+        "cross_file_deps": 0.8,
+        "semantic_search_potential": 0.9,
+        "task_category_weight": 0.95
+      },
+      "selection_rationale": "All ccb_largerepo tasks selected (small benchmark)",
+      "task_dir": "ccb_largerepo/big-code-vsc-001",
+      "experiment_role": "large_repo_anchor"
+    }
+  ]
+}