Rollup merge of #154865 - sunshowers:binary-search-test, r=Mark-Simulacrum

jhpratt · web-flow · commit d8048a2d1c7f · 2026-04-18T20:07:50.000-04:00
libtest: use binary search for --exact test filtering

When `--exact` is passed in, use binary search for O(f log n) lookups instead of an O(n) linear scan, under the assumption that f &lt;&lt; n (which is true for the most relevant cases).

This is important for Miri, where the interpreted execution makes the linear scan very expensive.

I measured this against a repo with 1000 empty tests, running `cargo +stage1 miri nextest run test_00` (100 tests) under hyperfine:

* Before (linear scan): 49.7s ± 0.6s
* After (binary search): 41.9s ± 0.2s  (-15.7%)

I also tried a few other variations (particularly swapping matching tests to the front of the list + truncating the list), but the index + swap_remove approach proved to be the fastest.

Questions:

- [ ] To be conservative, I've assumed that test_main can potentially receive an unsorted list of tests. Is this assumption correct?
diff --git a/compiler/rustc_builtin_macros/src/test_harness.rs b/compiler/rustc_builtin_macros/src/test_harness.rs
@@ -370,6 +370,8 @@ fn mk_tests_slice(cx: &TestCtxt<'_>, sp: Span) -> Box<ast::Expr> {
     let ecx = &cx.ext_cx;
 
     let mut tests = cx.test_cases.clone();
+    // Note that this sort is load-bearing: the libtest harness uses binary search to find tests by
+    // name.
     tests.sort_by(|a, b| a.name.as_str().cmp(b.name.as_str()));
 
     ecx.expr_array_ref(
diff --git a/library/test/src/console.rs b/library/test/src/console.rs
@@ -16,7 +16,7 @@ use super::helpers::metrics::MetricMap;
 use super::options::{Options, OutputFormat};
 use super::test_result::TestResult;
 use super::time::{TestExecTime, TestSuiteExecTime};
-use super::types::{NamePadding, TestDesc, TestDescAndFn};
+use super::types::{NamePadding, TestDesc, TestDescAndFn, TestList};
 use super::{filter_tests, run_tests, term};
 
 /// Generic wrapper over stdout.
@@ -170,7 +170,7 @@ impl ConsoleTestState {
 }
 
 // List the tests to console, and optionally to logfile. Filters are honored.
-pub(crate) fn list_tests_console(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> io::Result<()> {
+pub(crate) fn list_tests_console(opts: &TestOpts, tests: TestList) -> io::Result<()> {
     let output = match term::stdout() {
         None => OutputLocation::Raw(io::stdout().lock()),
         Some(t) => OutputLocation::Pretty(t),
@@ -186,7 +186,7 @@ pub(crate) fn list_tests_console(opts: &TestOpts, tests: Vec<TestDescAndFn>) ->
     let mut st = ConsoleTestDiscoveryState::new(opts)?;
 
     out.write_discovery_start()?;
-    for test in filter_tests(opts, tests).into_iter() {
+    for test in filter_tests(opts, tests) {
         use crate::TestFn::*;
 
         let TestDescAndFn { desc, testfn } = test;
@@ -307,8 +307,9 @@ pub(crate) fn get_formatter(opts: &TestOpts, max_name_len: usize) -> Box<dyn Out
 
 /// A simple console test runner.
 /// Runs provided tests reporting process and results to the stdout.
-pub fn run_tests_console(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> io::Result<bool> {
+pub fn run_tests_console(opts: &TestOpts, tests: TestList) -> io::Result<bool> {
     let max_name_len = tests
+        .tests
         .iter()
         .max_by_key(|t| len_if_padded(t))
         .map(|t| t.desc.name.as_slice().len())
diff --git a/library/test/src/lib.rs b/library/test/src/lib.rs
@@ -49,7 +49,7 @@ pub mod test {
     pub use crate::time::{TestExecTime, TestTimeOptions};
     pub use crate::types::{
         DynTestFn, DynTestName, StaticBenchFn, StaticTestFn, StaticTestName, TestDesc,
-        TestDescAndFn, TestId, TestName, TestType,
+        TestDescAndFn, TestId, TestList, TestListOrder, TestName, TestType,
     };
     pub use crate::{assert_test_result, filter_tests, run_test, test_main, test_main_static};
 }
@@ -106,6 +106,16 @@ pub fn test_main_with_exit_callback<F: FnOnce()>(
     tests: Vec<TestDescAndFn>,
     options: Option<Options>,
     exit_callback: F,
+) {
+    let tests = TestList::new(tests, TestListOrder::Unsorted);
+    test_main_inner(args, tests, options, exit_callback)
+}
+
+fn test_main_inner<F: FnOnce()>(
+    args: &[String],
+    tests: TestList,
+    options: Option<Options>,
+    exit_callback: F,
 ) {
     let mut opts = match cli::parse_opts(args) {
         Some(Ok(o)) => o,
@@ -180,7 +190,9 @@ pub fn test_main_with_exit_callback<F: FnOnce()>(
 pub fn test_main_static(tests: &[&TestDescAndFn]) {
     let args = env::args().collect::<Vec<_>>();
     let owned_tests: Vec<_> = tests.iter().map(make_owned_test).collect();
-    test_main(&args, owned_tests, None)
+    // Tests are sorted by name at compile time by mk_tests_slice.
+    let tests = TestList::new(owned_tests, TestListOrder::Sorted);
+    test_main_inner(&args, tests, None, || {})
 }
 
 /// A variant optimized for invocation with a static test vector.
@@ -229,7 +241,9 @@ pub fn test_main_static_abort(tests: &[&TestDescAndFn]) {
 
     let args = env::args().collect::<Vec<_>>();
     let owned_tests: Vec<_> = tests.iter().map(make_owned_test).collect();
-    test_main(&args, owned_tests, Some(Options::new().panic_abort(true)))
+    // Tests are sorted by name at compile time by mk_tests_slice.
+    let tests = TestList::new(owned_tests, TestListOrder::Sorted);
+    test_main_inner(&args, tests, Some(Options::new().panic_abort(true)), || {})
 }
 
 /// Clones static values for putting into a dynamic vector, which test_main()
@@ -298,7 +312,7 @@ impl FilteredTests {
 
 pub fn run_tests<F>(
     opts: &TestOpts,
-    tests: Vec<TestDescAndFn>,
+    tests: TestList,
     mut notify_about_test_event: F,
 ) -> io::Result<()>
 where
@@ -334,7 +348,7 @@ where
         timeout: Instant,
     }
 
-    let tests_len = tests.len();
+    let tests_len = tests.tests.len();
 
     let mut filtered = FilteredTests { tests: Vec::new(), benches: Vec::new(), next_id: 0 };
 
@@ -512,25 +526,48 @@ where
     Ok(())
 }
 
-pub fn filter_tests(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> Vec<TestDescAndFn> {
+pub fn filter_tests(opts: &TestOpts, tests: TestList) -> Vec<TestDescAndFn> {
+    let TestList { tests, order } = tests;
     let mut filtered = tests;
-    let matches_filter = |test: &TestDescAndFn, filter: &str| {
-        let test_name = test.desc.name.as_slice();
-
-        match opts.filter_exact {
-            true => test_name == filter,
-            false => test_name.contains(filter),
-        }
-    };
 
-    // Remove tests that don't match the test filter
+    // Remove tests that don't match the test filter.
     if !opts.filters.is_empty() {
-        filtered.retain(|test| opts.filters.iter().any(|filter| matches_filter(test, filter)));
+        if opts.filter_exact && order == TestListOrder::Sorted {
+            // Let's say that `f` is the number of filters and `n` is the number
+            // of tests.
+            //
+            // The test array is sorted by name (guaranteed by the caller via
+            // TestListOrder::Sorted), so use binary search for O(f log n)
+            // exact-match lookups instead of an O(n) linear scan.
+            //
+            // This is important for Miri, where the interpreted execution makes
+            // the linear scan very expensive.
+            filtered = filter_exact_match(filtered, &opts.filters);
+        } else {
+            filtered.retain(|test| {
+                let test_name = test.desc.name.as_slice();
+                opts.filters.iter().any(|filter| {
+                    if opts.filter_exact {
+                        test_name == filter.as_str()
+                    } else {
+                        test_name.contains(filter.as_str())
+                    }
+                })
+            });
+        }
     }
 
     // Skip tests that match any of the skip filters
+    //
+    // After exact positive filtering above, the filtered set is small, so a
+    // linear scan is acceptable even under Miri.
     if !opts.skip.is_empty() {
-        filtered.retain(|test| !opts.skip.iter().any(|sf| matches_filter(test, sf)));
+        filtered.retain(|test| {
+            let name = test.desc.name.as_slice();
+            !opts.skip.iter().any(|sf| {
+                if opts.filter_exact { name == sf.as_str() } else { name.contains(sf.as_str()) }
+            })
+        });
     }
 
     // Excludes #[should_panic] tests
@@ -553,6 +590,30 @@ pub fn filter_tests(opts: &TestOpts, tests: Vec<TestDescAndFn>) -> Vec<TestDescA
     filtered
 }
 
+/// Extract tests whose names exactly match one of the given `filters`, using
+/// binary search on the (assumed sorted) test list.
+fn filter_exact_match(mut tests: Vec<TestDescAndFn>, filters: &[String]) -> Vec<TestDescAndFn> {
+    // Binary search for each filter in the sorted test list.
+    let mut indexes: Vec<usize> = filters
+        .iter()
+        .filter_map(|f| tests.binary_search_by(|t| t.desc.name.as_slice().cmp(f.as_str())).ok())
+        .collect();
+    indexes.sort_unstable();
+    indexes.dedup();
+
+    // Extract matching tests. Process indexes in descending order so that
+    // swap_remove (which replaces the removed element with the last) does not
+    // invalidate indexes we haven't visited yet.
+    let mut result = Vec::with_capacity(indexes.len());
+    for &idx in indexes.iter().rev() {
+        result.push(tests.swap_remove(idx));
+    }
+    // Reverse to restore the original sorted order, since we extracted the
+    // matching tests in descending index order.
+    result.reverse();
+    result
+}
+
 pub fn convert_benchmarks_to_tests(tests: Vec<TestDescAndFn>) -> Vec<TestDescAndFn> {
     // convert benchmarks to tests, if we're not benchmarking them
     tests
diff --git a/library/test/src/tests.rs b/library/test/src/tests.rs
@@ -477,7 +477,7 @@ fn filter_for_ignored_option() {
     opts.run_tests = true;
     opts.run_ignored = RunIgnored::Only;
 
-    let tests = one_ignored_one_unignored_test();
+    let tests = TestList::new(one_ignored_one_unignored_test(), TestListOrder::Unsorted);
     let filtered = filter_tests(&opts, tests);
 
     assert_eq!(filtered.len(), 1);
@@ -494,7 +494,7 @@ fn run_include_ignored_option() {
     opts.run_tests = true;
     opts.run_ignored = RunIgnored::Yes;
 
-    let tests = one_ignored_one_unignored_test();
+    let tests = TestList::new(one_ignored_one_unignored_test(), TestListOrder::Unsorted);
     let filtered = filter_tests(&opts, tests);
 
     assert_eq!(filtered.len(), 2);
@@ -527,16 +527,16 @@ fn exclude_should_panic_option() {
         testfn: DynTestFn(Box::new(move || Ok(()))),
     });
 
-    let filtered = filter_tests(&opts, tests);
+    let filtered = filter_tests(&opts, TestList::new(tests, TestListOrder::Unsorted));
 
     assert_eq!(filtered.len(), 2);
     assert!(filtered.iter().all(|test| test.desc.should_panic == ShouldPanic::No));
 }
 
 #[test]
 fn exact_filter_match() {
-    fn tests() -> Vec<TestDescAndFn> {
-        ["base", "base::test", "base::test1", "base::test2"]
+    fn tests() -> TestList {
+        let tests = ["base", "base::test", "base::test1", "base::test2"]
             .into_iter()
             .map(|name| TestDescAndFn {
                 desc: TestDesc {
@@ -555,7 +555,8 @@ fn exact_filter_match() {
                 },
                 testfn: DynTestFn(Box::new(move || Ok(()))),
             })
-            .collect()
+            .collect();
+        TestList::new(tests, TestListOrder::Sorted)
     }
 
     let substr =
@@ -908,7 +909,8 @@ fn test_dyn_bench_returning_err_fails_when_run_as_test() {
         }
         Ok(())
     };
-    run_tests(&TestOpts { run_tests: true, ..TestOpts::new() }, vec![desc], notify).unwrap();
+    let tests = TestList::new(vec![desc], TestListOrder::Unsorted);
+    run_tests(&TestOpts { run_tests: true, ..TestOpts::new() }, tests, notify).unwrap();
     let result = rx.recv().unwrap().result;
     assert_eq!(result, TrFailed);
 }
diff --git a/library/test/src/types.rs b/library/test/src/types.rs
@@ -284,3 +284,30 @@ impl TestDescAndFn {
         }
     }
 }
+
+/// Whether a [`TestList`]'s tests are known to be sorted by name.
+///
+/// When tests are sorted, `filter_tests` can use binary search for `--exact`
+/// matches instead of a linear scan.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum TestListOrder {
+    /// Tests are sorted by name. This is guaranteed for tests generated by
+    /// `rustc --test` (see `mk_tests_slice` in
+    /// `compiler/rustc_builtin_macros/src/test_harness.rs`).
+    Sorted,
+    /// Test order is unknown; binary search must not be used.
+    Unsorted,
+}
+
+/// A list of tests, tagged with whether they are sorted by name.
+#[derive(Debug)]
+pub struct TestList {
+    pub tests: Vec<TestDescAndFn>,
+    pub order: TestListOrder,
+}
+
+impl TestList {
+    pub fn new(tests: Vec<TestDescAndFn>, order: TestListOrder) -> Self {
+        Self { tests, order }
+    }
+}