sourcegraph
diff --git a/‎AGENTS.md‎
Lines changed: 7 additions & 6 deletions b/‎AGENTS.md‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 7 additions & 6 deletions b/‎CLAUDE.md‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎configs/_common.sh‎
Lines changed: 66 additions & 7 deletions b/‎configs/_common.sh‎
Lines changed: 66 additions & 7 deletions
diff --git a/‎configs/codex_2config.sh‎
Lines changed: 4 additions & 4 deletions b/‎configs/codex_2config.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎configs/copilot_2config.sh‎
Lines changed: 4 additions & 4 deletions b/‎configs/copilot_2config.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎configs/cursor_2config.sh‎
Lines changed: 4 additions & 4 deletions b/‎configs/cursor_2config.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎configs/gemini_2config.sh‎
Lines changed: 4 additions & 4 deletions b/‎configs/gemini_2config.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎configs/openhands_2config.sh‎
Lines changed: 4 additions & 4 deletions b/‎configs/openhands_2config.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎configs/run_selected_tasks.sh‎
Lines changed: 4 additions & 4 deletions b/‎configs/run_selected_tasks.sh‎
Lines changed: 4 additions & 4 deletions
@@ -44,12 +44,13 @@ Use these defaults unless there is a task-specific reason not to.
 - Planning/prioritization: `whats-next`
 
 ## Evaluation Configs
-Two configs per task: **Baseline** (full local code, no MCP) and **MCP-Full**
-(local source truncated, Sourcegraph MCP enabled). MCP-Full uses
-`Dockerfile.sg_only` so the agent cannot read source locally and must discover
-code via MCP tools. The verifier restores the full repo before scoring.
-See `docs/CONFIGS.md` for the full environment model, tool lists, and how to
-add sg_only support to new tasks.
+Config names encode three dimensions: `{agent}-{source}-{verifier}`.
+Standard pairing: **baseline-local-direct** (full local code, no MCP) and
+**mcp-remote-direct** (source deleted, Sourcegraph MCP). Artifact evaluation
+uses **baseline-local-artifact** + **mcp-remote-artifact** (review.json output).
+MCP configs use `Dockerfile.sg_only` or `Dockerfile.artifact_only` so the
+agent must discover code via MCP tools. The verifier restores the full repo
+before scoring. See `docs/CONFIGS.md` for the full config matrix.
 
 ## Standard Workflow
 0. **Before commit or push:** Run `python3 scripts/repo_health.py` (or `--quick`). Fix any failures so main stays clean and drift is caught early (see `docs/REPO_HEALTH.md`).
 
@@ -47,12 +47,13 @@ Use these defaults unless there is a task-specific reason not to.
 - Planning/prioritization: `whats-next`
 
 ## Evaluation Configs
-Two configs per task: **Baseline** (full local code, no MCP) and **MCP-Full**
-(local source truncated, Sourcegraph MCP enabled). MCP-Full uses
-`Dockerfile.sg_only` so the agent cannot read source locally and must discover
-code via MCP tools. The verifier restores the full repo before scoring.
-See `docs/CONFIGS.md` for the full environment model, tool lists, and how to
-add sg_only support to new tasks.
+Config names encode three dimensions: `{agent}-{source}-{verifier}`.
+Standard pairing: **baseline-local-direct** (full local code, no MCP) and
+**mcp-remote-direct** (source deleted, Sourcegraph MCP). Artifact evaluation
+uses **baseline-local-artifact** + **mcp-remote-artifact** (review.json output).
+MCP configs use `Dockerfile.sg_only` or `Dockerfile.artifact_only` so the
+agent must discover code via MCP tools. The verifier restores the full repo
+before scoring. See `docs/CONFIGS.md` for the full config matrix.
 
 ## Standard Workflow
 0. **Before commit or push:** Run `python3 scripts/repo_health.py` (or `--quick`). Fix any failures so main stays clean and drift is caught early (see `docs/REPO_HEALTH.md`).
 
@@ -35,6 +35,59 @@ load_credentials() {
     fi
 }
 
+# ============================================
+# CONFIG NAME MAPPING
+# ============================================
+# Three-dimensional config names: {agent}-{source}-{verifier}
+#   agent:    baseline (no MCP) | mcp (Sourcegraph MCP)
+#   source:   local (full source) | remote (source deleted)
+#   verifier: direct (git changes) | artifact (review.json)
+#
+# These map to internal Harbor mcp_type values via config_to_mcp_type().
+# Legacy names (baseline, sourcegraph_full, artifact_full) are accepted
+# for backward compatibility with existing run directories.
+
+VERIFIER_MODE="direct"
+SOURCE_ACCESS="local"
+
+# Map composite config name → internal mcp_type for Harbor.
+# Side effects: sets VERIFIER_MODE and SOURCE_ACCESS globals.
+config_to_mcp_type() {
+    local config_name="$1"
+    case "$config_name" in
+        baseline-local-direct)
+            VERIFIER_MODE="direct"; SOURCE_ACCESS="local"; echo "none" ;;
+        mcp-remote-direct)
+            VERIFIER_MODE="direct"; SOURCE_ACCESS="remote"; echo "sourcegraph_full" ;;
+        baseline-local-artifact)
+            VERIFIER_MODE="artifact"; SOURCE_ACCESS="local"; echo "none" ;;
+        mcp-remote-artifact)
+            VERIFIER_MODE="artifact"; SOURCE_ACCESS="remote"; echo "artifact_full" ;;
+        # Legacy names
+        baseline)
+            VERIFIER_MODE="direct"; SOURCE_ACCESS="local"; echo "none" ;;
+        sourcegraph_full)
+            VERIFIER_MODE="direct"; SOURCE_ACCESS="remote"; echo "sourcegraph_full" ;;
+        artifact_full)
+            VERIFIER_MODE="artifact"; SOURCE_ACCESS="remote"; echo "artifact_full" ;;
+        none)
+            VERIFIER_MODE="direct"; SOURCE_ACCESS="local"; echo "none" ;;
+        *)
+            echo "WARNING: Unknown config name: $config_name" >&2
+            VERIFIER_MODE="direct"; SOURCE_ACCESS="local"; echo "$config_name" ;;
+    esac
+}
+
+# Derive the baseline config name that pairs with a given FULL_CONFIG.
+# Artifact full configs pair with artifact baselines.
+baseline_config_for() {
+    local full="$1"
+    case "$full" in
+        *-artifact|artifact_full) echo "baseline-local-artifact" ;;
+        *)                        echo "baseline-local-direct" ;;
+    esac
+}
+
 # ============================================
 # VERIFIER DEBUG MODE
 # ============================================
@@ -934,8 +987,7 @@ run_canary_then_batch() {
 #   run_paired_configs TASK_IDS _my_run_fn "$JOBS_BASE"
 #
 # The run function must accept: $1=task_id $2=task_home $3=config_mode $4=mcp_type $5=jobs_base
-# It is responsible for creating the jobs_subdir (e.g., baseline/ or sourcegraph_full/) and
-# launching harbor.
+# It is responsible for creating the jobs_subdir and launching harbor.
 #
 # This launches 2 containers per task (1 baseline + 1 MCP) simultaneously, so the total
 # concurrent containers is 2x the number of tasks. PARALLEL_JOBS limits total concurrent PIDs.
@@ -952,18 +1004,25 @@ run_paired_configs() {
     echo "Paired execution: $num_tasks tasks x 2 configs"
     echo "========================================"
     echo ""
-    local full_config="${FULL_CONFIG:-sourcegraph_full}"
-    echo "Each task launches baseline + ${full_config} simultaneously."
+    local full_config="${FULL_CONFIG:-mcp-remote-direct}"
+    local bl_config
+    bl_config=$(baseline_config_for "$full_config")
+    echo "Each task launches ${bl_config} + ${full_config} simultaneously."
     echo "Total concurrent containers: up to $((num_tasks * 2)) (limited by PARALLEL_JOBS=$PARALLEL_JOBS)"
     echo ""
 
-    mkdir -p "${jobs_base}/baseline" "${jobs_base}/${full_config}"
+    mkdir -p "${jobs_base}/${bl_config}" "${jobs_base}/${full_config}"
+
+    # Resolve mcp_type values once
+    local bl_mcp full_mcp
+    bl_mcp=$(config_to_mcp_type "$bl_config")
+    full_mcp=$(config_to_mcp_type "$full_config")
 
     # Build paired task list: each task gets two entries
     local paired_ids=()
     for task_id in "${_paired_task_ids[@]}"; do
-        paired_ids+=("${task_id}|baseline|none")
-        paired_ids+=("${task_id}|${full_config}|${full_config}")
+        paired_ids+=("${task_id}|${bl_config}|${bl_mcp}")
+        paired_ids+=("${task_id}|${full_config}|${full_mcp}")
     done
 
     # Wrapper command function that splits the paired ID
 
@@ -2,8 +2,8 @@
 # Codex Harness 2-Config Runner
 #
 # Runs selected tasks across 2 configurations:
-#   1. Baseline (BASELINE_MCP_TYPE=none)
-#   2. MCP-Full (BASELINE_MCP_TYPE=sourcegraph_full)
+#   1. baseline-local-direct (BASELINE_MCP_TYPE=none)
+#   2. mcp-remote-direct (BASELINE_MCP_TYPE=sourcegraph_full)
 #
 # Usage:
 #   ./configs/codex_2config.sh [OPTIONS]
@@ -201,11 +201,11 @@ run_mode() {
 }
 
 if [ "$RUN_BASELINE" = true ]; then
-    run_mode "baseline" "none"
+    run_mode "baseline-local-direct" "none"
 fi
 
 if [ "$RUN_FULL" = true ]; then
-    run_mode "sourcegraph_full" "sourcegraph_full"
+    run_mode "mcp-remote-direct" "sourcegraph_full"
 fi
 
 print_validation_summary "$JOBS_BASE"
 
@@ -2,8 +2,8 @@
 # Copilot Harness 2-Config Runner
 #
 # Runs selected tasks across 2 configurations:
-#   1. Baseline (BASELINE_MCP_TYPE=none)
-#   2. MCP-Full (BASELINE_MCP_TYPE=sourcegraph_full)
+#   1. baseline-local-direct (BASELINE_MCP_TYPE=none)
+#   2. mcp-remote-direct (BASELINE_MCP_TYPE=sourcegraph_full)
 #
 # Usage:
 #   ./configs/copilot_2config.sh [OPTIONS]
@@ -201,11 +201,11 @@ run_mode() {
 }
 
 if [ "$RUN_BASELINE" = true ]; then
-    run_mode "baseline" "none"
+    run_mode "baseline-local-direct" "none"
 fi
 
 if [ "$RUN_FULL" = true ]; then
-    run_mode "sourcegraph_full" "sourcegraph_full"
+    run_mode "mcp-remote-direct" "sourcegraph_full"
 fi
 
 print_validation_summary "$JOBS_BASE"
 
@@ -2,8 +2,8 @@
 # Cursor Harness 2-Config Runner
 #
 # Runs selected tasks across 2 configurations:
-#   1. Baseline (BASELINE_MCP_TYPE=none)
-#   2. MCP-Full (BASELINE_MCP_TYPE=sourcegraph_full)
+#   1. baseline-local-direct (BASELINE_MCP_TYPE=none)
+#   2. mcp-remote-direct (BASELINE_MCP_TYPE=sourcegraph_full)
 #
 # Usage:
 #   ./configs/cursor_2config.sh [OPTIONS]
@@ -201,11 +201,11 @@ run_mode() {
 }
 
 if [ "$RUN_BASELINE" = true ]; then
-    run_mode "baseline" "none"
+    run_mode "baseline-local-direct" "none"
 fi
 
 if [ "$RUN_FULL" = true ]; then
-    run_mode "sourcegraph_full" "sourcegraph_full"
+    run_mode "mcp-remote-direct" "sourcegraph_full"
 fi
 
 print_validation_summary "$JOBS_BASE"
 
@@ -2,8 +2,8 @@
 # Gemini Harness 2-Config Runner
 #
 # Runs selected tasks across 2 configurations:
-#   1. Baseline (BASELINE_MCP_TYPE=none)
-#   2. MCP-Full (BASELINE_MCP_TYPE=sourcegraph_full)
+#   1. baseline-local-direct (BASELINE_MCP_TYPE=none)
+#   2. mcp-remote-direct (BASELINE_MCP_TYPE=sourcegraph_full)
 #
 # Usage:
 #   ./configs/gemini_2config.sh [OPTIONS]
@@ -201,11 +201,11 @@ run_mode() {
 }
 
 if [ "$RUN_BASELINE" = true ]; then
-    run_mode "baseline" "none"
+    run_mode "baseline-local-direct" "none"
 fi
 
 if [ "$RUN_FULL" = true ]; then
-    run_mode "sourcegraph_full" "sourcegraph_full"
+    run_mode "mcp-remote-direct" "sourcegraph_full"
 fi
 
 print_validation_summary "$JOBS_BASE"
 
@@ -2,8 +2,8 @@
 # OpenHands Harness 2-Config Runner
 #
 # Runs selected tasks across 2 configurations:
-#   1. Baseline (BASELINE_MCP_TYPE=none)
-#   2. MCP-Full (BASELINE_MCP_TYPE=sourcegraph_full)
+#   1. baseline-local-direct (BASELINE_MCP_TYPE=none)
+#   2. mcp-remote-direct (BASELINE_MCP_TYPE=sourcegraph_full)
 #
 # Usage:
 #   ./configs/openhands_2config.sh [OPTIONS]
@@ -211,11 +211,11 @@ run_mode() {
 }
 
 if [ "$RUN_BASELINE" = true ]; then
-    run_mode "baseline" "none"
+    run_mode "baseline-local-direct" "none"
 fi
 
 if [ "$RUN_FULL" = true ]; then
-    run_mode "sourcegraph_full" "sourcegraph_full"
+    run_mode "mcp-remote-direct" "sourcegraph_full"
 fi
 
 print_validation_summary "$JOBS_BASE"
 
@@ -16,7 +16,7 @@
 #   --selection-file PATH           Use alternate selection file (default: selected_benchmark_tasks.json)
 #   --use-case-category CATEGORY    Filter by MCP-unique use case category (A-J), only valid with --selection-file
 #   --baseline-only                 Run only baseline (no MCP)
-#   --full-only                     Run only MCP-Full (sourcegraph_full)
+#   --full-only                     Run only MCP-Full (mcp-remote-direct)
 #   --model MODEL                   Override model (default: claude-opus-4-6)
 #   --concurrency N                 Concurrent tasks (default: 2)
 #   --category CATEGORY             Run category (default: staging)
@@ -199,7 +199,7 @@ echo "Source:        $SELECTION_FILE"
 echo "Model:         $MODEL"
 echo "Total tasks:   $TOTAL_TASKS"
 echo "Concurrency:   $CONCURRENCY"
-echo "Configs:       baseline=$RUN_BASELINE sourcegraph_full=$RUN_FULL"
+echo "Configs:       baseline-local-direct=$RUN_BASELINE mcp-remote-direct=$RUN_FULL"
 echo "Skip done:     $SKIP_COMPLETED"
 [ -n "$USE_CASE_CATEGORY_FILTER" ] && echo "Category:      $USE_CASE_CATEGORY_FILTER"
 echo ""
@@ -324,10 +324,10 @@ run_benchmark() {
 # ============================================
 for bm in $(echo "${!BENCHMARK_COUNTS[@]}" | tr ' ' '\n' | sort); do
     if [ "$RUN_BASELINE" = true ]; then
-        run_benchmark "$bm" "baseline" "none"
+        run_benchmark "$bm" "baseline-local-direct" "none"
     fi
     if [ "$RUN_FULL" = true ]; then
-        run_benchmark "$bm" "sourcegraph_full" "sourcegraph_full"
+        run_benchmark "$bm" "mcp-remote-direct" "sourcegraph_full"
     fi
 done