braintrustdata
diff --git a/‎src/eval.rs‎
Lines changed: 88 additions & 23 deletions b/‎src/eval.rs‎
Lines changed: 88 additions & 23 deletions
diff --git a/‎tests/eval_fixtures.rs‎
Lines changed: 84 additions & 0 deletions b/‎tests/eval_fixtures.rs‎
Lines changed: 84 additions & 0 deletions
@@ -704,29 +704,52 @@ fn build_eval_plans(
     python_runner_override: Option<&str>,
 ) -> Result<Vec<EvalPlan>> {
     let partitions = partition_files_by_language(files)?;
-    partitions
-        .into_iter()
-        .map(|(language, files)| {
-            let plan_runner = match language {
-                EvalLanguage::JavaScript => js_runner_override,
-                EvalLanguage::Python => python_runner_override,
-            };
-            let show_js_hint = language == EvalLanguage::JavaScript && js_runner_override.is_none();
-            let has_ts_files = language == EvalLanguage::JavaScript && has_ts_eval_files(&files);
-            let retry_policy = if show_js_hint && has_ts_files {
-                RetryPolicy::Allow
-            } else {
-                RetryPolicy::Disallow
-            };
-            Ok(EvalPlan {
-                language,
-                files,
-                runner_override: plan_runner.map(ToOwned::to_owned),
-                show_js_hint,
-                retry_policy,
-            })
-        })
-        .collect()
+    let mut plans = Vec::new();
+    for (language, files) in partitions {
+        match language {
+            EvalLanguage::JavaScript => {
+                let show_js_hint = js_runner_override.is_none();
+                let has_ts_files = has_ts_eval_files(&files);
+                let retry_policy = if show_js_hint && has_ts_files {
+                    RetryPolicy::Allow
+                } else {
+                    RetryPolicy::Disallow
+                };
+                plans.push(EvalPlan {
+                    language,
+                    files,
+                    runner_override: js_runner_override.map(ToOwned::to_owned),
+                    show_js_hint,
+                    retry_policy,
+                });
+            }
+            EvalLanguage::Python => {
+                if let Some(override_) = python_runner_override {
+                    // User explicitly specified a runner; one plan for all Python files.
+                    plans.push(EvalPlan {
+                        language,
+                        files,
+                        runner_override: Some(override_.to_owned()),
+                        show_js_hint: false,
+                        retry_policy: RetryPolicy::Disallow,
+                    });
+                } else {
+                    // Group by nearest venv; one plan per group so each file runs with
+                    // the Python interpreter that owns its packages.
+                    for (group_files, venv_python) in group_python_files_by_venv(&files) {
+                        plans.push(EvalPlan {
+                            language: EvalLanguage::Python,
+                            files: group_files,
+                            runner_override: venv_python,
+                            show_js_hint: false,
+                            retry_policy: RetryPolicy::Disallow,
+                        });
+                    }
+                }
+            }
+        }
+    }
+    Ok(plans)
 }
 
 async fn run_eval_plan_once(
@@ -2733,6 +2756,48 @@ fn set_node_heap_size_env(command: &mut Command) {
     command.env("NODE_OPTIONS", merged);
 }
 
+fn venv_python_path(venv: &Path) -> PathBuf {
+    if cfg!(windows) {
+        venv.join("Scripts").join("python.exe")
+    } else {
+        venv.join("bin").join("python")
+    }
+}
+
+/// Walk up from `file`'s directory checking common venv directory names.
+fn find_venv_for_file(file: &str) -> Option<PathBuf> {
+    let start = Path::new(file)
+        .canonicalize()
+        .ok()
+        .and_then(|p| p.parent().map(PathBuf::from))?;
+
+    const VENV_NAMES: &[&str] = &[".venv", "venv"];
+    let mut current = Some(start.as_path());
+    while let Some(dir) = current {
+        for name in VENV_NAMES {
+            let python = venv_python_path(&dir.join(name));
+            if python.is_file() {
+                return Some(python);
+            }
+        }
+        current = dir.parent();
+    }
+    None
+}
+
+/// Group Python files by their nearest venv, preserving input order.
+/// Files with no discoverable venv are grouped together under `None`.
+fn group_python_files_by_venv(files: &[String]) -> Vec<(Vec<String>, Option<String>)> {
+    let mut groups: Vec<(Option<String>, Vec<String>)> = Vec::new();
+    for file in files {
+        let venv_python = find_venv_for_file(file).map(|p| p.to_string_lossy().to_string());
+        match groups.iter_mut().find(|(k, _)| *k == venv_python) {
+            Some(group) => group.1.push(file.clone()),
+            None => groups.push((venv_python, vec![file.clone()])),
+        }
+    }
+    groups.into_iter().map(|(python, files)| (files, python)).collect()
+}
 fn eval_runner_cache_dir() -> PathBuf {
     let root = std::env::var_os("XDG_CACHE_HOME")
         .map(PathBuf::from)
 
@@ -1602,3 +1602,87 @@ fn python_can_import_braintrust(python: &str) -> bool {
         .map(|status| status.success())
         .unwrap_or(false)
 }
+
+fn uv_sync(dir: &Path) {
+    let status = Command::new("uv")
+        .arg("sync")
+        .current_dir(dir)
+        .status()
+        .expect("uv sync failed to run");
+    assert!(status.success(), "uv sync failed in {}", dir.display());
+}
+
+/// Two eval files each with their own .venv pinned to distinct Python versions.
+/// Each eval asserts its Python version, so if bt uses the wrong venv the assertion fails.
+/// This proves bt selected the correct Python per file, not just any venv.
+#[test]
+fn eval_python_two_files_two_venvs() {
+    let _guard = test_lock();
+
+    assert!(command_exists("uv"), "uv is required for this test but is not installed");
+
+    let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    let bt_path = bt_binary_path(&root);
+    let fixtures_py = root.join("tests").join("evals").join("py");
+
+    let dir_a = fixtures_py.join("multi-venv-a");
+    let dir_b = fixtures_py.join("multi-venv-b");
+
+    // Each fixture has a pyproject.toml pinning a distinct Python version.
+    // uv sync creates the .venv; the eval files assert their respective version.
+    uv_sync(&dir_a);
+    uv_sync(&dir_b);
+
+    let output = Command::new(&bt_path)
+        .args(["eval", "--jsonl"])
+        .arg(dir_a.join("eval_a.py"))
+        .arg(dir_b.join("eval_b.py"))
+        .current_dir(&root)
+        .env_remove("VIRTUAL_ENV")
+        .env("BT_EVAL_LOCAL", "1")
+        .env("BRAINTRUST_API_KEY", "local")
+        .output()
+        .expect("run bt eval");
+
+    assert!(
+        output.status.success(),
+        "bt should run each file with its own versioned venv.\nstdout:\n{}\nstderr:\n{}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr),
+    );
+}
+
+/// bt auto-discovers the .venv adjacent to the eval file without VIRTUAL_ENV being set.
+/// Uses the multi-venv-a fixture (pyproject.toml pins Python 3.11; eval asserts the version)
+/// so the test is self-contained and doesn't depend on the system Python environment.
+#[test]
+fn eval_python_venv_adjacent_auto_discovered() {
+    let _guard = test_lock();
+
+    assert!(command_exists("uv"), "uv is required for this test but is not installed");
+
+    let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    let bt_path = bt_binary_path(&root);
+    let dir_a = root.join("tests").join("evals").join("py").join("multi-venv-a");
+
+    // uv sync creates the .venv from pyproject.toml (requires-python = "==3.11.*").
+    // The eval asserts sys.version_info == (3, 11), so if bt picks any other Python the test fails.
+    uv_sync(&dir_a);
+
+    let output = Command::new(&bt_path)
+        .args(["eval", "--jsonl"])
+        .arg(dir_a.join("eval_a.py"))
+        .current_dir(&root)
+        .env_remove("VIRTUAL_ENV")
+        .env("BT_EVAL_LOCAL", "1")
+        .env("BRAINTRUST_API_KEY", "local")
+        .output()
+        .expect("run bt eval");
+
+    assert!(
+        output.status.success(),
+        "bt should find the .venv adjacent to the eval file without VIRTUAL_ENV being set.\nstdout:\n{}\nstderr:\n{}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr),
+    );
+}