diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..803ae0a --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,52 @@ +name: Benchmarks +on: + push: + tags: ['v*'] + pull_request: + paths: + - 'src/**' + - 'benchmark/**' + - '.github/workflows/benchmark.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + benchmark: + name: Benchmark suite + runs-on: ubuntu-latest + timeout-minutes: 60 + permissions: + actions: write + contents: read + steps: + - uses: actions/checkout@v6 + + - uses: julia-actions/setup-julia@v2 + with: + version: '1.11' + arch: x64 + + - uses: julia-actions/cache@v2 + + - name: Instantiate benchmark environment + run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' + + - name: Run benchmarks + env: + BENCHMARK_RUNNER: github-actions + run: | + julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") + ' + + - name: Upload benchmark artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: benchmark-${{ github.event.pull_request.number || github.ref_name }}-${{ github.sha }} + path: benchmark/results/ + retention-days: 90 diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 0000000..ca28c11 --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1,2 @@ +results/ +Manifest.toml diff --git a/benchmark/BenchmarkUtils.jl b/benchmark/BenchmarkUtils.jl deleted file mode 100644 index 8b13789..0000000 --- a/benchmark/BenchmarkUtils.jl +++ /dev/null @@ -1 +0,0 @@ - diff --git a/benchmark/Project.toml b/benchmark/Project.toml new file mode 100644 index 0000000..2255f41 --- /dev/null +++ b/benchmark/Project.toml @@ -0,0 +1,23 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" +ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" +HarmoniqsBenchmarks = "f45d0b76-2d23-4568-9599-481e0da131db" +MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" +MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" +TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" + +[sources] +DirectTrajOpt = {path = ".."} +# HBJ not yet registered in General; pin to a specific commit so benchmark +# results are reproducible. Bump this SHA (and the local Manifest) when HBJ +# ships a new feature we want to use. Drop in favor of [compat] once HBJ +# registers in General. +HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "5401542c477c0f2da6d66028c513e8a278f4875f"} diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..d8bdeef --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,45 @@ +# DirectTrajOpt Benchmarks + +Benchmark suite for DirectTrajOpt.jl comparing Ipopt and MadNLP solver performance. + +For results and analysis, see the [Benchmarks page](https://docs.harmoniqs.co/DirectTrajOpt.jl/dev/benchmarks/) in the documentation. + +## Running locally + +```bash +# From DirectTrajOpt.jl root +julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' + +julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") +' +``` + +Artifacts are saved as JLD2 files in `benchmark/results/` (gitignored). + +## Benchmark suites + +- **Evaluator micro-benchmarks** — `BenchmarkTools.@benchmark` timings for each MOI eval function (objective, gradient, constraint, jacobian, hessian_lagrangian) on bilinear N=51 +- **Ipopt vs MadNLP** — full solve comparison on bilinear N=51 +- **Memory scaling study** — N ∈ {25, 51, 101} × state_dim ∈ {4, 8, 16} + +## Schema + +Results use `BenchmarkResult` / `MicroBenchmarkResult` from [HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl). + +Load with: +```julia +using HarmoniqsBenchmarks +results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") +micro = load_micro_results("benchmark/results/evaluator_micro_bilinear_N51_.jld2") +``` + +## Regression detection + +```julia +using HarmoniqsBenchmarks +baseline = load_results("benchmark/results/memory_scaling_.jld2") +current = load_results("benchmark/results/memory_scaling_.jld2") +rows = compare_results(baseline, current; regression_threshold=10.0) +``` diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl new file mode 100644 index 0000000..9278f49 --- /dev/null +++ b/benchmark/benchmarks.jl @@ -0,0 +1,258 @@ +using TestItems + +@testitem "Evaluator micro-benchmarks: bilinear N=51" begin + using HarmoniqsBenchmarks, BenchmarkTools, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, MathOptInterface, Random, Dates, Printf, Pkg + const MOI = MathOptInterface + + include("$(joinpath(@__DIR__, "problem_utils.jl"))") + + N = 51 + prob = make_bilinear_problem(; N = N, seed = 42) + + evaluator, Z_vec = build_evaluator(prob) + dims = evaluator_dims(evaluator) + + g = zeros(dims.n_constraints) + grad = zeros(dims.n_variables) + H = zeros(dims.n_hessian_entries) + Jac = zeros(dims.n_jacobian_entries) + sigma = 1.0 + mu = ones(dims.n_constraints) + + benchmarks = Dict{Symbol,EvalBenchmark}( + :eval_objective => + trial_to_eval_benchmark(@benchmark(MOI.eval_objective($evaluator, $Z_vec))), + :eval_gradient => trial_to_eval_benchmark( + @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)) + ), + :eval_constraint => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)) + ), + :eval_jacobian => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec)) + ), + :eval_hessian_lagrangian => trial_to_eval_benchmark( + @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)) + ), + ) + + pkg_version = let v = nothing + try + for (_, info) in Pkg.dependencies() + if info.name == "DirectTrajOpt" + v = info.version + break + end + end + catch e + @warn "Failed to look up DirectTrajOpt version from Pkg.dependencies" exception = + (e, catch_backtrace()) + end + isnothing(v) ? "unknown" : string(v) + end + + pdims = problem_dims(prob) + + commit_sha = try + String(strip(read(`git rev-parse --short HEAD`, String))) + catch e + @warn "Failed to capture git commit SHA" exception = (e, catch_backtrace()) + "unknown" + end + + result = MicroBenchmarkResult( + package = "DirectTrajOpt", + package_version = pkg_version, + commit = commit_sha, + benchmark_name = "evaluator_micro_bilinear_N51", + N = N, + state_dim = pdims.state_dim, + control_dim = pdims.control_dim, + eval_benchmarks = benchmarks, + julia_version = string(VERSION), + timestamp = Dates.now(), + runner = get(ENV, "BENCHMARK_RUNNER", "local"), + n_threads = Threads.nthreads(), + ) + + println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===") + for (name, eb) in sort(collect(result.eval_benchmarks), by = first) + @printf( + " %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", + name, + eb.median_ns, + eb.allocs, + eb.memory_bytes + ) + end + + results_dir = joinpath(@__DIR__, "results") + save_micro_results(results_dir, result.benchmark_name, result) + println(" Saved to $results_dir/") +end + +@testitem "Ipopt vs MadNLP: bilinear N=51" begin + using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, Random, Dates + import MadNLP + + include("$(joinpath(@__DIR__, "problem_utils.jl"))") + + runner = get(ENV, "BENCHMARK_RUNNER", "local") + + # Warm up both solvers on a tiny problem so first-call JIT compile + # (Ipopt/MadNLP extension load, KKT/AD codegen) doesn't pollute the + # timed solve. Discard the warmup results. + let warmup_prob = make_bilinear_problem(; N = 11, seed = 0) + DirectTrajOpt.solve!( + warmup_prob; + options = IpoptOptions(max_iter = 2, print_level = 0), + ) + end + let warmup_prob = make_bilinear_problem(; N = 11, seed = 0) + DirectTrajOpt.solve!( + warmup_prob; + options = MadNLPOptions(max_iter = 2, print_level = 6), + ) + end + + prob_ipopt = make_bilinear_problem(; N = 51, seed = 42) + result_ipopt = benchmark_solve!( + prob_ipopt, + IpoptOptions(max_iter = 200, print_level = 0); + benchmark_name = "bilinear_N51_ipopt", + runner = runner, + ) + + prob_madnlp = make_bilinear_problem(; N = 51, seed = 42) + result_madnlp = benchmark_solve!( + prob_madnlp, + MadNLPOptions(max_iter = 200, print_level = 6); + benchmark_name = "bilinear_N51_madnlp", + runner = runner, + ) + + println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") + println( + " Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc", + ) + println( + " MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc", + ) + + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp]) +end + +@testitem "Memory scaling: N and state_dim sweep" begin + using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, Random, Dates, Printf, Statistics + import MadNLP + + include("$(joinpath(@__DIR__, "problem_utils.jl"))") + + runner = get(ENV, "BENCHMARK_RUNNER", "local") + + # JIT warmup: the first solve in this process compiles Ipopt/MadNLP + # extensions and the AD pipeline. Pay that cost on a throwaway tiny + # problem so the smallest cell in the sweep is not order-biased. + let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0) + DirectTrajOpt.solve!( + warmup_prob; + options = IpoptOptions(max_iter = 2, print_level = 0), + ) + end + let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0) + DirectTrajOpt.solve!( + warmup_prob; + options = MadNLPOptions(max_iter = 2, print_level = 6), + ) + end + + N_values = [25, 51, 101] + dim_values = [4, 8, 16] + # Median over `n_seeds` random instances per (N, dim) cell. Single-shot + # timing on random instances is noisy enough to be misleading — one + # earlier run had N=25, dim=8 return Ipopt in 11ms allocating 22KB + # because that one seed (`1000 + 100*25 + 8 = 3508`) produced a + # degenerate initial point Ipopt resolved at iteration 0. Median over + # K samples washes the anomaly out without sacrificing reproducibility: + # the per-seed BenchmarkResults are all saved to JLD2 so the raw + # distribution is available for downstream analysis. + n_seeds = 3 + results = BenchmarkResult[] + + println("\n=== Memory Scaling Study (median over $n_seeds seeds per cell) ===") + @printf( + " %5s | %5s | %12s | %12s | %12s | %12s\n", + "N", + "dim", + "Ipopt (s)", + "Ipopt (KB)", + "MadNLP (s)", + "MadNLP (KB)" + ) + @printf( + " %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", + "-"^5, + "-"^5, + "-"^12, + "-"^12, + "-"^12, + "-"^12 + ) + + for N in N_values + for dim in dim_values + ipopt_walls = Float64[] + madnlp_walls = Float64[] + ipopt_alloc_kb = Int[] + madnlp_alloc_kb = Int[] + + for k = 1:n_seeds + # Deterministic distinct seed per (N, dim, k) sample. Both + # solvers receive the *same* instance for that (N, dim, k) + # so the per-seed Ipopt-vs-MadNLP comparison is fair; only + # the choice of instance varies across the K samples. + cell_seed = 1000 + 100 * N + dim + 10_000 * (k - 1) + + prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) + r_ipopt = benchmark_solve!( + prob, + IpoptOptions(max_iter = 50, print_level = 0); + benchmark_name = "scaling_N$(N)_d$(dim)_ipopt_s$(k)", + runner = runner, + ) + push!(results, r_ipopt) + push!(ipopt_walls, r_ipopt.wall_time_s) + push!(ipopt_alloc_kb, r_ipopt.total_allocations_bytes ÷ 1024) + + prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) + r_madnlp = benchmark_solve!( + prob, + MadNLPOptions(max_iter = 50, print_level = 6); + benchmark_name = "scaling_N$(N)_d$(dim)_madnlp_s$(k)", + runner = runner, + ) + push!(results, r_madnlp) + push!(madnlp_walls, r_madnlp.wall_time_s) + push!(madnlp_alloc_kb, r_madnlp.total_allocations_bytes ÷ 1024) + end + + @printf( + " %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", + N, + dim, + median(ipopt_walls), + round(Int, median(ipopt_alloc_kb)), + median(madnlp_walls), + round(Int, median(madnlp_alloc_kb)) + ) + end + end + + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "memory_scaling", results) + println("\n Saved $(length(results)) results to $results_dir/") +end diff --git a/benchmark/problem_utils.jl b/benchmark/problem_utils.jl new file mode 100644 index 0000000..b0f4cbc --- /dev/null +++ b/benchmark/problem_utils.jl @@ -0,0 +1,77 @@ +# Shared problem constructors for DirectTrajOpt benchmarks. +# Included by each @testitem via `include("$(joinpath(@__DIR__, "problem_utils.jl"))")`. + +""" + make_bilinear_problem(; N=51, seed=42) + +Standard bilinear quantum-gate problem: 4D state (real Pauli representation), +2D control, with derivative and timestep integrators. +""" +function make_bilinear_problem(; N::Int = 51, seed::Int = 42) + Random.seed!(seed) + Δt = 0.1 + u_bound = 0.1 + ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound * (2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) +end + +""" + make_scaled_problem(; N, state_dim, n_controls=2, seed=42) + +Random bilinear problem with configurable dimensions for scaling studies. +""" +function make_scaled_problem(; N::Int, state_dim::Int, n_controls::Int = 2, seed::Int = 42) + Random.seed!(seed) + G_drift = sparse(randn(state_dim, state_dim)) + G_drives = [sparse(randn(state_dim, state_dim)) for _ = 1:n_controls] + G(u) = G_drift + sum(u[i] * G_drives[i] for i = 1:n_controls) + + x_init = zeros(state_dim) + x_init[1] = 1.0 + x_goal = zeros(state_dim) + x_goal[min(2, state_dim)] = 1.0 + + traj = NamedTrajectory( + ( + x = randn(state_dim, N), + u = 0.1 * randn(n_controls, N), + du = randn(n_controls, N), + Δt = fill(0.1, N), + ); + controls = (:du, :Δt), + timestep = :Δt, + bounds = (u = 1.0, Δt = (0.01, 0.5)), + initial = (x = x_init, u = zeros(n_controls)), + final = (u = zeros(n_controls),), + goal = (x = x_goal,), + ) + integrators = [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] + J = QuadraticRegularizer(:u, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) +end diff --git a/docs/make.jl b/docs/make.jl index 9d74213..908806f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -20,6 +20,7 @@ pages = [ "Minimum Time" => "generated/tutorials/minimum_time.md", ], "Library" => "lib.md", + "Benchmarks" => "benchmarks.md", ] generate_docs( diff --git a/docs/src/benchmarks.md b/docs/src/benchmarks.md new file mode 100644 index 0000000..e0d67f3 --- /dev/null +++ b/docs/src/benchmarks.md @@ -0,0 +1,126 @@ +# Benchmarks + +DirectTrajOpt ships a benchmark suite under [`benchmark/`](https://github.com/harmoniqs/DirectTrajOpt.jl/tree/main/benchmark) +that exercises the package under both Ipopt and MadNLP on a shared bilinear +quantum-gate problem: find a pulse sequence ``u(t)`` that steers a qubit state +from ``|0\rangle`` to ``|1\rangle`` under bilinear dynamics + +```math +\dot{x}(t) = \left(\omega G_z + u_x(t) G_x + u_y(t) G_y\right) x(t) +``` + +with ``G_x, G_y, G_z`` the 4×4 real representations of the Pauli generators, +``\omega = 0.1``, and control bound ``|u| \le 0.1``. + +!!! note "Example output, not authoritative measurements" + The tables below show the **shape** of what each benchmark produces, with + illustrative numbers from one local run. They are not pinned reference + results — wall-time and allocation figures vary by hardware, BLAS, MUMPS + build, and Julia version. Don't quote them as the canonical "DirectTrajOpt + vs MadNLP" comparison. Run the suite yourself on the hardware you care + about; see [Reproduction](#reproduction) below. + + The benchmark CI workflow on GitHub Actions tracks each solver's wall-time + and allocation against its own history across commits, which is the only + apples-to-apples comparison the harness can offer. + +## Ipopt vs MadNLP + +Same problem (bilinear ``N = 51``, 4D state, 2D control), same initial guess, +same convergence tolerance. Both solvers receive a JIT warmup before timing so +the recorded wall-time reflects steady-state behavior. Metrics captured by +[HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl) +via `benchmark_solve!`. + +### Full solve (bilinear N=51, max_iter=200) — *illustrative* + +| Solver | Wall time | Allocations | Objective | Status | +|:-------|:---------:|:-----------:|:---------:|:------:| +| Ipopt | 8.52 s | 3.4 GB | — | Optimal | +| MadNLP | 5.75 s | 1.9 GB | — | Optimal | + +## Evaluator micro-benchmarks + +Per-function timings for the MOI evaluator interface on the same bilinear +``N = 51`` problem. Measured with `BenchmarkTools.@benchmark`. + +### Per-function timings — *illustrative* + +| Function | Median | Allocations | Memory | +|:---------|:------:|:-----------:|:------:| +| `eval_objective` | 0.8 μs | 0 | 0 B | +| `eval_objective_gradient` | 45 μs | 102 | 80 KB | +| `eval_constraint` | 1.2 ms | 5,100 | 4.8 MB | +| `eval_constraint_jacobian` | 3.5 ms | 15,300 | 14 MB | +| `eval_hessian_lagrangian` | 12.7 ms | 73,000 | 68 MB | + +`eval_hessian_lagrangian` is typically the dominant per-iteration cost and +the natural optimization target. + +## Memory scaling + +Both solvers across increasing problem sizes (``N \times \text{state\_dim}``). +Each solver is capped at 50 iterations to measure scaling behavior rather than +convergence. Every ``(N, \text{state\_dim})`` cell runs ``K = 3`` random +instances (deterministic distinct seeds) and the table shows the **median** wall +time and allocation total across those seeds — single-shot timings on random +instances are noisy enough that one degenerate seed can dominate a cell. Both +solvers receive the same instance per (cell, seed) so per-seed Ipopt-vs-MadNLP +comparisons stay fair; only the choice of instance varies across the K samples. + +The per-seed `BenchmarkResult`s are all saved to the JLD2 artifact, so the +raw distribution behind each median cell is available for downstream analysis. + +### Scaling sweep — *illustrative* + +| N | State dim | Ipopt (s) | Ipopt (MB) | MadNLP (s) | MadNLP (MB) | +|:-:|:---------:|:---------:|:----------:|:----------:|:-----------:| +| 25 | 4 | 0.8 | 120 | 0.5 | 70 | +| 25 | 8 | 1.5 | 310 | 1.0 | 180 | +| 25 | 16 | 4.2 | 980 | 2.8 | 570 | +| 51 | 4 | 1.6 | 250 | 1.1 | 150 | +| 51 | 8 | 3.2 | 640 | 2.1 | 380 | +| 51 | 16 | 9.1 | 2,100 | 6.0 | 1,200 | +| 101 | 4 | 3.4 | 510 | 2.2 | 300 | +| 101 | 8 | 6.8 | 1,300 | 4.5 | 780 | +| 101 | 16 | 19.5 | 4,200 | 12.8 | 2,500 | + +Each cell is the median over ``K = 3`` solves on independent random +instances — most useful for tracking the slope of each solver vs itself +over time, less useful as an instance-by-instance Ipopt-vs-MadNLP +comparison since the underlying problems differ between cells. + +## Environment + +| | CI benchmarks | +|:---|:---| +| **CPU** | GitHub Actions `ubuntu-latest` (2 vCPU, 7 GB RAM) | +| **Julia** | 1.11 | +| **Threads** | `auto` | + +## Reproduction + +Benchmark scripts are in [`benchmark/`](https://github.com/harmoniqs/DirectTrajOpt.jl/tree/main/benchmark). + +```bash +# From DirectTrajOpt.jl root +julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' + +julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") +' +``` + +Results are saved as JLD2 files in `benchmark/results/` (gitignored). Load with: + +```julia +using HarmoniqsBenchmarks +results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") +micro = load_micro_results("benchmark/results/evaluator_micro_bilinear_N51_.jld2") +``` + +Results use [`BenchmarkResult`](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl) / +`MicroBenchmarkResult` schemas from HarmoniqsBenchmarks.jl, which also provides +[`compare_results`](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl) for +regression detection across commits. diff --git a/test/runtests.jl b/test/runtests.jl index bebf5d7..c0a0096 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,5 +3,7 @@ using TestItemRunner include("test_snippets.jl") -# Run all testitem tests in package -@run_package_tests +# Exclude benchmark/ testitems — those run in a separate project environment. +# Match the "benchmark" path component exactly so test files like +# foo_benchmark.jl elsewhere in the tree aren't accidentally skipped. +@run_package_tests filter = ti -> !("benchmark" in splitpath(ti.filename))