Skip to content

Commit dca599a

Browse files
ibolmoviadezo1er
authored andcommitted
sending up local
1 parent 2bfad84 commit dca599a

13 files changed

Lines changed: 6181 additions & 23 deletions

File tree

src/eval.rs

Lines changed: 88 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -704,29 +704,52 @@ fn build_eval_plans(
704704
python_runner_override: Option<&str>,
705705
) -> Result<Vec<EvalPlan>> {
706706
let partitions = partition_files_by_language(files)?;
707-
partitions
708-
.into_iter()
709-
.map(|(language, files)| {
710-
let plan_runner = match language {
711-
EvalLanguage::JavaScript => js_runner_override,
712-
EvalLanguage::Python => python_runner_override,
713-
};
714-
let show_js_hint = language == EvalLanguage::JavaScript && js_runner_override.is_none();
715-
let has_ts_files = language == EvalLanguage::JavaScript && has_ts_eval_files(&files);
716-
let retry_policy = if show_js_hint && has_ts_files {
717-
RetryPolicy::Allow
718-
} else {
719-
RetryPolicy::Disallow
720-
};
721-
Ok(EvalPlan {
722-
language,
723-
files,
724-
runner_override: plan_runner.map(ToOwned::to_owned),
725-
show_js_hint,
726-
retry_policy,
727-
})
728-
})
729-
.collect()
707+
let mut plans = Vec::new();
708+
for (language, files) in partitions {
709+
match language {
710+
EvalLanguage::JavaScript => {
711+
let show_js_hint = js_runner_override.is_none();
712+
let has_ts_files = has_ts_eval_files(&files);
713+
let retry_policy = if show_js_hint && has_ts_files {
714+
RetryPolicy::Allow
715+
} else {
716+
RetryPolicy::Disallow
717+
};
718+
plans.push(EvalPlan {
719+
language,
720+
files,
721+
runner_override: js_runner_override.map(ToOwned::to_owned),
722+
show_js_hint,
723+
retry_policy,
724+
});
725+
}
726+
EvalLanguage::Python => {
727+
if let Some(override_) = python_runner_override {
728+
// User explicitly specified a runner; one plan for all Python files.
729+
plans.push(EvalPlan {
730+
language,
731+
files,
732+
runner_override: Some(override_.to_owned()),
733+
show_js_hint: false,
734+
retry_policy: RetryPolicy::Disallow,
735+
});
736+
} else {
737+
// Group by nearest venv; one plan per group so each file runs with
738+
// the Python interpreter that owns its packages.
739+
for (group_files, venv_python) in group_python_files_by_venv(&files) {
740+
plans.push(EvalPlan {
741+
language: EvalLanguage::Python,
742+
files: group_files,
743+
runner_override: venv_python,
744+
show_js_hint: false,
745+
retry_policy: RetryPolicy::Disallow,
746+
});
747+
}
748+
}
749+
}
750+
}
751+
}
752+
Ok(plans)
730753
}
731754

732755
async fn run_eval_plan_once(
@@ -2733,6 +2756,48 @@ fn set_node_heap_size_env(command: &mut Command) {
27332756
command.env("NODE_OPTIONS", merged);
27342757
}
27352758

2759+
fn venv_python_path(venv: &Path) -> PathBuf {
2760+
if cfg!(windows) {
2761+
venv.join("Scripts").join("python.exe")
2762+
} else {
2763+
venv.join("bin").join("python")
2764+
}
2765+
}
2766+
2767+
/// Walk up from `file`'s directory checking common venv directory names.
2768+
fn find_venv_for_file(file: &str) -> Option<PathBuf> {
2769+
let start = Path::new(file)
2770+
.canonicalize()
2771+
.ok()
2772+
.and_then(|p| p.parent().map(PathBuf::from))?;
2773+
2774+
const VENV_NAMES: &[&str] = &[".venv", "venv"];
2775+
let mut current = Some(start.as_path());
2776+
while let Some(dir) = current {
2777+
for name in VENV_NAMES {
2778+
let python = venv_python_path(&dir.join(name));
2779+
if python.is_file() {
2780+
return Some(python);
2781+
}
2782+
}
2783+
current = dir.parent();
2784+
}
2785+
None
2786+
}
2787+
2788+
/// Group Python files by their nearest venv, preserving input order.
2789+
/// Files with no discoverable venv are grouped together under `None`.
2790+
fn group_python_files_by_venv(files: &[String]) -> Vec<(Vec<String>, Option<String>)> {
2791+
let mut groups: Vec<(Option<String>, Vec<String>)> = Vec::new();
2792+
for file in files {
2793+
let venv_python = find_venv_for_file(file).map(|p| p.to_string_lossy().to_string());
2794+
match groups.iter_mut().find(|(k, _)| *k == venv_python) {
2795+
Some(group) => group.1.push(file.clone()),
2796+
None => groups.push((venv_python, vec![file.clone()])),
2797+
}
2798+
}
2799+
groups.into_iter().map(|(python, files)| (files, python)).collect()
2800+
}
27362801
fn eval_runner_cache_dir() -> PathBuf {
27372802
let root = std::env::var_os("XDG_CACHE_HOME")
27382803
.map(PathBuf::from)

tests/eval_fixtures.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,3 +1602,87 @@ fn python_can_import_braintrust(python: &str) -> bool {
16021602
.map(|status| status.success())
16031603
.unwrap_or(false)
16041604
}
1605+
1606+
fn uv_sync(dir: &Path) {
1607+
let status = Command::new("uv")
1608+
.arg("sync")
1609+
.current_dir(dir)
1610+
.status()
1611+
.expect("uv sync failed to run");
1612+
assert!(status.success(), "uv sync failed in {}", dir.display());
1613+
}
1614+
1615+
/// Two eval files each with their own .venv pinned to distinct Python versions.
1616+
/// Each eval asserts its Python version, so if bt uses the wrong venv the assertion fails.
1617+
/// This proves bt selected the correct Python per file, not just any venv.
1618+
#[test]
1619+
fn eval_python_two_files_two_venvs() {
1620+
let _guard = test_lock();
1621+
1622+
assert!(command_exists("uv"), "uv is required for this test but is not installed");
1623+
1624+
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1625+
let bt_path = bt_binary_path(&root);
1626+
let fixtures_py = root.join("tests").join("evals").join("py");
1627+
1628+
let dir_a = fixtures_py.join("multi-venv-a");
1629+
let dir_b = fixtures_py.join("multi-venv-b");
1630+
1631+
// Each fixture has a pyproject.toml pinning a distinct Python version.
1632+
// uv sync creates the .venv; the eval files assert their respective version.
1633+
uv_sync(&dir_a);
1634+
uv_sync(&dir_b);
1635+
1636+
let output = Command::new(&bt_path)
1637+
.args(["eval", "--jsonl"])
1638+
.arg(dir_a.join("eval_a.py"))
1639+
.arg(dir_b.join("eval_b.py"))
1640+
.current_dir(&root)
1641+
.env_remove("VIRTUAL_ENV")
1642+
.env("BT_EVAL_LOCAL", "1")
1643+
.env("BRAINTRUST_API_KEY", "local")
1644+
.output()
1645+
.expect("run bt eval");
1646+
1647+
assert!(
1648+
output.status.success(),
1649+
"bt should run each file with its own versioned venv.\nstdout:\n{}\nstderr:\n{}",
1650+
String::from_utf8_lossy(&output.stdout),
1651+
String::from_utf8_lossy(&output.stderr),
1652+
);
1653+
}
1654+
1655+
/// bt auto-discovers the .venv adjacent to the eval file without VIRTUAL_ENV being set.
1656+
/// Uses the multi-venv-a fixture (pyproject.toml pins Python 3.11; eval asserts the version)
1657+
/// so the test is self-contained and doesn't depend on the system Python environment.
1658+
#[test]
1659+
fn eval_python_venv_adjacent_auto_discovered() {
1660+
let _guard = test_lock();
1661+
1662+
assert!(command_exists("uv"), "uv is required for this test but is not installed");
1663+
1664+
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1665+
let bt_path = bt_binary_path(&root);
1666+
let dir_a = root.join("tests").join("evals").join("py").join("multi-venv-a");
1667+
1668+
// uv sync creates the .venv from pyproject.toml (requires-python = "==3.11.*").
1669+
// The eval asserts sys.version_info == (3, 11), so if bt picks any other Python the test fails.
1670+
uv_sync(&dir_a);
1671+
1672+
let output = Command::new(&bt_path)
1673+
.args(["eval", "--jsonl"])
1674+
.arg(dir_a.join("eval_a.py"))
1675+
.current_dir(&root)
1676+
.env_remove("VIRTUAL_ENV")
1677+
.env("BT_EVAL_LOCAL", "1")
1678+
.env("BRAINTRUST_API_KEY", "local")
1679+
.output()
1680+
.expect("run bt eval");
1681+
1682+
assert!(
1683+
output.status.success(),
1684+
"bt should find the .venv adjacent to the eval file without VIRTUAL_ENV being set.\nstdout:\n{}\nstderr:\n{}",
1685+
String::from_utf8_lossy(&output.stdout),
1686+
String::from_utf8_lossy(&output.stderr),
1687+
);
1688+
}

0 commit comments

Comments
 (0)