From 7f3ff902599cd9ae0f9ac3daa87551b764eb0569 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 24 Apr 2026 16:53:50 -0400 Subject: [PATCH 01/15] Reapply "Merge pull request #67 from harmoniqs/benchmarks/directtrajopt-initial" This reverts commit 83da0761265257f600586c693f11a9810c77b378. --- .github/workflows/benchmark.yml | 50 ++++++ benchmark/.gitignore | 2 + benchmark/BenchmarkUtils.jl | 1 - benchmark/Project.toml | 20 +++ benchmark/README.md | 33 ++++ benchmark/benchmarks.jl | 272 ++++++++++++++++++++++++++++++++ test/compare_solvers.jl | 16 +- test/runtests.jl | 4 +- 8 files changed, 387 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/benchmark.yml create mode 100644 benchmark/.gitignore delete mode 100644 benchmark/BenchmarkUtils.jl create mode 100644 benchmark/Project.toml create mode 100644 benchmark/README.md create mode 100644 benchmark/benchmarks.jl diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..1f20d76 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,50 @@ +name: Benchmarks +on: + push: + tags: ['v*'] + pull_request: + paths: + - 'src/**' + - 'benchmark/**' + - '.github/workflows/benchmark.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + benchmark: + name: Benchmark suite + runs-on: ubuntu-latest + timeout-minutes: 60 + permissions: + actions: write + contents: read + steps: + - uses: actions/checkout@v6 + + - uses: julia-actions/setup-julia@v2 + with: + version: '1.11' + arch: x64 + + - uses: julia-actions/cache@v2 + + - name: Instantiate benchmark environment + run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' + + - name: Run benchmarks + run: | + julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") + ' + + - name: Upload benchmark artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: benchmark-${{ github.event.pull_request.number || github.ref_name }}-${{ github.sha }} + path: benchmark/results/ + retention-days: 90 diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 0000000..ca28c11 --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1,2 @@ +results/ +Manifest.toml diff --git a/benchmark/BenchmarkUtils.jl b/benchmark/BenchmarkUtils.jl deleted file mode 100644 index 8b13789..0000000 --- a/benchmark/BenchmarkUtils.jl +++ /dev/null @@ -1 +0,0 @@ - diff --git a/benchmark/Project.toml b/benchmark/Project.toml new file mode 100644 index 0000000..b219215 --- /dev/null +++ b/benchmark/Project.toml @@ -0,0 +1,20 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" +ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +HarmoniqsBenchmarks = "f45d0b76-2d23-4568-9599-481e0da131db" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" +MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" +TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" + +[sources] +DirectTrajOpt = {path = ".."} +HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl"} diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..c0737c9 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,33 @@ +# DirectTrajOpt Benchmarks + +Benchmark suite for DirectTrajOpt.jl comparing Ipopt and MadNLP solver performance. + +## Running locally + +```bash +# From DirectTrajOpt.jl root +julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' + +julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") +' +``` + +Artifacts are saved as JLD2 files in `benchmark/results/` (gitignored). + +## Benchmark suites + +- **Evaluator micro-benchmarks** — `BenchmarkTools.@benchmark` timings for each MOI eval function (objective, gradient, constraint, jacobian, hessian_lagrangian) on bilinear N=51 +- **Ipopt vs MadNLP** — full solve comparison on bilinear N=51 +- **Memory scaling study** — N ∈ {25, 51, 101} × state_dim ∈ {4, 8, 16} + +## Schema + +Results use `BenchmarkResult` / `MicroBenchmarkResult` from [HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl). + +Load with: +```julia +using HarmoniqsBenchmarks +results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") +``` diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl new file mode 100644 index 0000000..28e6ee4 --- /dev/null +++ b/benchmark/benchmarks.jl @@ -0,0 +1,272 @@ +using TestItems + +@testitem "Evaluator micro-benchmarks: bilinear N=51" begin + using HarmoniqsBenchmarks, BenchmarkTools, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, MathOptInterface, Random, Dates, Printf + const MOI = MathOptInterface + + Random.seed!(42) + N = 51; + Δt = 0.1; + u_bound = 0.1; + ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound*(2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + prob = DirectTrajOptProblem(traj, J, integrators) + + evaluator, Z_vec = build_evaluator(prob) + dims = evaluator_dims(evaluator) + + g = zeros(dims.n_constraints) + grad = zeros(dims.n_variables) + H = zeros(dims.n_hessian_entries) + Jac = zeros(dims.n_jacobian_entries) + sigma = 1.0 + mu = ones(dims.n_constraints) + + benchmarks = Dict{Symbol,EvalBenchmark}( + :eval_objective => + trial_to_eval_benchmark(@benchmark(MOI.eval_objective($evaluator, $Z_vec))), + :eval_gradient => trial_to_eval_benchmark( + @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)) + ), + :eval_constraint => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)) + ), + :eval_jacobian => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec)) + ), + :eval_hessian_lagrangian => trial_to_eval_benchmark( + @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)) + ), + ) + + result = MicroBenchmarkResult( + package = "DirectTrajOpt", + package_version = "0.8.10", + commit = ( + try + String(strip(read(`git rev-parse --short HEAD`, String))) + catch + ; "unknown" + end + ), + benchmark_name = "evaluator_micro_bilinear_N51", + N = N, + state_dim = 4, + control_dim = 2, + eval_benchmarks = benchmarks, + julia_version = string(VERSION), + timestamp = Dates.now(), + runner = get(ENV, "BENCHMARK_RUNNER", "local"), + n_threads = Threads.nthreads(), + ) + + println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===") + for (name, eb) in sort(collect(result.eval_benchmarks), by = first) + @printf( + " %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", + name, + eb.median_ns, + eb.allocs, + eb.memory_bytes + ) + end + + results_dir = joinpath(@__DIR__, "results") + save_micro_results(results_dir, result.benchmark_name, result) + println(" Saved to $results_dir/") +end + +@testitem "Ipopt vs MadNLP: bilinear N=51" begin + using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, Random, Dates + import MadNLP + + const MadNLPSolverExt = [ + mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt + ][1] + + function make_bilinear_problem(; seed = 42) + Random.seed!(seed) + N = 51; + Δt = 0.1; + u_bound = 0.1; + ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound*(2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) + end + + prob_ipopt = make_bilinear_problem() + result_ipopt = benchmark_solve!( + prob_ipopt, + IpoptOptions(max_iter = 200, print_level = 0); + benchmark_name = "bilinear_N51_ipopt", + ) + + prob_madnlp = make_bilinear_problem() + result_madnlp = benchmark_solve!( + prob_madnlp, + MadNLPSolverExt.MadNLPOptions(max_iter = 200, print_level = 1); + benchmark_name = "bilinear_N51_madnlp", + ) + + println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") + println( + " Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc", + ) + println( + " MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc", + ) + + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp]) +end + +@testitem "Memory scaling: N and state_dim sweep" begin + using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, Random, Dates, Printf + import MadNLP + + const MadNLPSolverExt = [ + mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt + ][1] + + function make_scaled_problem(; N, state_dim, n_controls = 2, seed = 42) + Random.seed!(seed) + G_drift = sparse(randn(state_dim, state_dim)) + G_drives = [sparse(randn(state_dim, state_dim)) for _ = 1:n_controls] + G(u) = G_drift + sum(u[i] * G_drives[i] for i = 1:n_controls) + + x_init = zeros(state_dim); + x_init[1] = 1.0 + x_goal = zeros(state_dim); + x_goal[min(2, state_dim)] = 1.0 + + traj = NamedTrajectory( + ( + x = randn(state_dim, N), + u = 0.1*randn(n_controls, N), + du = randn(n_controls, N), + Δt = fill(0.1, N), + ); + controls = (:du, :Δt), + timestep = :Δt, + bounds = (u = 1.0, Δt = (0.01, 0.5)), + initial = (x = x_init, u = zeros(n_controls)), + final = (u = zeros(n_controls),), + goal = (x = x_goal,), + ) + integrators = + [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] + J = QuadraticRegularizer(:u, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) + end + + N_values = [25, 51, 101] + dim_values = [4, 8, 16] + results = BenchmarkResult[] + + println("\n=== Memory Scaling Study ===") + @printf( + " %5s | %5s | %12s | %12s | %12s | %12s\n", + "N", + "dim", + "Ipopt (s)", + "Ipopt (KB)", + "MadNLP (s)", + "MadNLP (KB)" + ) + @printf( + " %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", + "-"^5, + "-"^5, + "-"^12, + "-"^12, + "-"^12, + "-"^12 + ) + + for N in N_values + for dim in dim_values + prob = make_scaled_problem(; N = N, state_dim = dim) + r_ipopt = benchmark_solve!( + prob, + IpoptOptions(max_iter = 50, print_level = 0); + benchmark_name = "scaling_N$(N)_d$(dim)_ipopt", + ) + push!(results, r_ipopt) + + prob = make_scaled_problem(; N = N, state_dim = dim) + r_madnlp = benchmark_solve!( + prob, + MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = 1); + benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", + ) + push!(results, r_madnlp) + + @printf( + " %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", + N, + dim, + r_ipopt.wall_time_s, + r_ipopt.total_allocations_bytes ÷ 1024, + r_madnlp.wall_time_s, + r_madnlp.total_allocations_bytes ÷ 1024 + ) + end + end + + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "memory_scaling", results) + println("\n Saved $(length(results)) results to $results_dir/") +end diff --git a/test/compare_solvers.jl b/test/compare_solvers.jl index d13cb06..5fd5c3f 100644 --- a/test/compare_solvers.jl +++ b/test/compare_solvers.jl @@ -7,8 +7,8 @@ using SparseArrays using NamedTrajectories using DirectTrajOpt -# const MadNLPSolverExt = -# [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] +const MadNLPSolverExt = + [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] function get_seeded_trajectory(seed; N = 10, Δt = 0.1, u_bound = 0.1, ω = 0.1) Random.seed!(seed) @@ -140,11 +140,11 @@ function get_solver_comparison(seed) return err, (ti, tm) end -# wins = Dict(:ipopt => 0, :madnlp => 0) -# for seed = 0:99 -# err, (ti, tm) = get_solver_comparison(seed) -# (err < 1e-3) || exit(1) -# wins[(ti < tm) ? :ipopt : :madnlp] += 1 -# end +wins = Dict(:ipopt => 0, :madnlp => 0) +for seed = 0:99 + err, (ti, tm) = get_solver_comparison(seed) + (err < 1e-3) || exit(1) + wins[(ti < tm) ? :ipopt : :madnlp] += 1 +end # @info "Wins: $(wins)" diff --git a/test/runtests.jl b/test/runtests.jl index bebf5d7..1054cf0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,5 +3,5 @@ using TestItemRunner include("test_snippets.jl") -# Run all testitem tests in package -@run_package_tests +# Exclude benchmark/ testitems — those run in a separate project environment +@run_package_tests filter=ti -> !contains(ti.filename, "benchmark") From b307631f106c63228b882d5ebcc2384f0d72ed71 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 24 Apr 2026 17:01:00 -0400 Subject: [PATCH 02/15] benchmark: add docs page, extract shared problem utils - Add docs/src/benchmarks.md mirroring CuQuantum.jl pattern: problem description, result tables, environment info, reproduction instructions - Wire benchmarks page into docs/make.jl - Extract shared problem constructors to benchmark/problem_utils.jl (make_bilinear_problem, make_scaled_problem) to eliminate duplication - Remove hardcoded package_version, pass runner consistently - Update README with docs link and regression detection example Co-Authored-By: Claude Opus 4.6 (1M context) --- benchmark/README.md | 12 ++++ benchmark/benchmarks.jl | 126 +++++++++---------------------------- benchmark/problem_utils.jl | 78 +++++++++++++++++++++++ docs/make.jl | 1 + docs/src/benchmarks.md | 101 +++++++++++++++++++++++++++++ 5 files changed, 220 insertions(+), 98 deletions(-) create mode 100644 benchmark/problem_utils.jl create mode 100644 docs/src/benchmarks.md diff --git a/benchmark/README.md b/benchmark/README.md index c0737c9..d8bdeef 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -2,6 +2,8 @@ Benchmark suite for DirectTrajOpt.jl comparing Ipopt and MadNLP solver performance. +For results and analysis, see the [Benchmarks page](https://docs.harmoniqs.co/DirectTrajOpt.jl/dev/benchmarks/) in the documentation. + ## Running locally ```bash @@ -30,4 +32,14 @@ Load with: ```julia using HarmoniqsBenchmarks results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") +micro = load_micro_results("benchmark/results/evaluator_micro_bilinear_N51_.jld2") +``` + +## Regression detection + +```julia +using HarmoniqsBenchmarks +baseline = load_results("benchmark/results/memory_scaling_.jld2") +current = load_results("benchmark/results/memory_scaling_.jld2") +rows = compare_results(baseline, current; regression_threshold=10.0) ``` diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 28e6ee4..246bf5e 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -2,41 +2,13 @@ using TestItems @testitem "Evaluator micro-benchmarks: bilinear N=51" begin using HarmoniqsBenchmarks, BenchmarkTools, DirectTrajOpt, NamedTrajectories - using SparseArrays, ExponentialAction, MathOptInterface, Random, Dates, Printf + using SparseArrays, ExponentialAction, MathOptInterface, Random, Dates, Printf, Pkg const MOI = MathOptInterface - Random.seed!(42) - N = 51; - Δt = 0.1; - u_bound = 0.1; - ω = 0.1 - Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) - Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) - Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) - G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + include("$(joinpath(@__DIR__, "problem_utils.jl"))") - traj = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound*(2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - DerivativeIntegrator(:du, :ddu, traj), - ] - J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) - prob = DirectTrajOptProblem(traj, J, integrators) + N = 51 + prob = make_bilinear_problem(; N=N, seed=42) evaluator, Z_vec = build_evaluator(prob) dims = evaluator_dims(evaluator) @@ -65,9 +37,22 @@ using TestItems ), ) + pkg_version = let v = nothing + try + for (_, info) in Pkg.dependencies() + if info.name == "DirectTrajOpt" + v = info.version + break + end + end + catch + end + isnothing(v) ? "unknown" : string(v) + end + result = MicroBenchmarkResult( package = "DirectTrajOpt", - package_version = "0.8.10", + package_version = pkg_version, commit = ( try String(strip(read(`git rev-parse --short HEAD`, String))) @@ -111,53 +96,24 @@ end mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt ][1] - function make_bilinear_problem(; seed = 42) - Random.seed!(seed) - N = 51; - Δt = 0.1; - u_bound = 0.1; - ω = 0.1 - Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) - Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) - Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) - G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + include("$(joinpath(@__DIR__, "problem_utils.jl"))") - traj = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound*(2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - DerivativeIntegrator(:du, :ddu, traj), - ] - J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) - return DirectTrajOptProblem(traj, J, integrators) - end + runner = get(ENV, "BENCHMARK_RUNNER", "local") - prob_ipopt = make_bilinear_problem() + prob_ipopt = make_bilinear_problem(; N=51, seed=42) result_ipopt = benchmark_solve!( prob_ipopt, IpoptOptions(max_iter = 200, print_level = 0); benchmark_name = "bilinear_N51_ipopt", + runner = runner, ) - prob_madnlp = make_bilinear_problem() + prob_madnlp = make_bilinear_problem(; N=51, seed=42) result_madnlp = benchmark_solve!( prob_madnlp, MadNLPSolverExt.MadNLPOptions(max_iter = 200, print_level = 1); benchmark_name = "bilinear_N51_madnlp", + runner = runner, ) println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") @@ -181,37 +137,9 @@ end mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt ][1] - function make_scaled_problem(; N, state_dim, n_controls = 2, seed = 42) - Random.seed!(seed) - G_drift = sparse(randn(state_dim, state_dim)) - G_drives = [sparse(randn(state_dim, state_dim)) for _ = 1:n_controls] - G(u) = G_drift + sum(u[i] * G_drives[i] for i = 1:n_controls) - - x_init = zeros(state_dim); - x_init[1] = 1.0 - x_goal = zeros(state_dim); - x_goal[min(2, state_dim)] = 1.0 - - traj = NamedTrajectory( - ( - x = randn(state_dim, N), - u = 0.1*randn(n_controls, N), - du = randn(n_controls, N), - Δt = fill(0.1, N), - ); - controls = (:du, :Δt), - timestep = :Δt, - bounds = (u = 1.0, Δt = (0.01, 0.5)), - initial = (x = x_init, u = zeros(n_controls)), - final = (u = zeros(n_controls),), - goal = (x = x_goal,), - ) - integrators = - [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] - J = QuadraticRegularizer(:u, traj, 1.0) - return DirectTrajOptProblem(traj, J, integrators) - end + include("$(joinpath(@__DIR__, "problem_utils.jl"))") + runner = get(ENV, "BENCHMARK_RUNNER", "local") N_values = [25, 51, 101] dim_values = [4, 8, 16] results = BenchmarkResult[] @@ -243,6 +171,7 @@ end prob, IpoptOptions(max_iter = 50, print_level = 0); benchmark_name = "scaling_N$(N)_d$(dim)_ipopt", + runner = runner, ) push!(results, r_ipopt) @@ -251,6 +180,7 @@ end prob, MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = 1); benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", + runner = runner, ) push!(results, r_madnlp) diff --git a/benchmark/problem_utils.jl b/benchmark/problem_utils.jl new file mode 100644 index 0000000..270d94e --- /dev/null +++ b/benchmark/problem_utils.jl @@ -0,0 +1,78 @@ +# Shared problem constructors for DirectTrajOpt benchmarks. +# Included by each @testitem via `include("$(joinpath(@__DIR__, "problem_utils.jl"))")`. + +""" + make_bilinear_problem(; N=51, seed=42) + +Standard bilinear quantum-gate problem: 4D state (real Pauli representation), +2D control, with derivative and timestep integrators. +""" +function make_bilinear_problem(; N::Int = 51, seed::Int = 42) + Random.seed!(seed) + Δt = 0.1 + u_bound = 0.1 + ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound * (2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) +end + +""" + make_scaled_problem(; N, state_dim, n_controls=2, seed=42) + +Random bilinear problem with configurable dimensions for scaling studies. +""" +function make_scaled_problem(; N::Int, state_dim::Int, n_controls::Int = 2, seed::Int = 42) + Random.seed!(seed) + G_drift = sparse(randn(state_dim, state_dim)) + G_drives = [sparse(randn(state_dim, state_dim)) for _ = 1:n_controls] + G(u) = G_drift + sum(u[i] * G_drives[i] for i = 1:n_controls) + + x_init = zeros(state_dim) + x_init[1] = 1.0 + x_goal = zeros(state_dim) + x_goal[min(2, state_dim)] = 1.0 + + traj = NamedTrajectory( + ( + x = randn(state_dim, N), + u = 0.1 * randn(n_controls, N), + du = randn(n_controls, N), + Δt = fill(0.1, N), + ); + controls = (:du, :Δt), + timestep = :Δt, + bounds = (u = 1.0, Δt = (0.01, 0.5)), + initial = (x = x_init, u = zeros(n_controls)), + final = (u = zeros(n_controls),), + goal = (x = x_goal,), + ) + integrators = + [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] + J = QuadraticRegularizer(:u, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) +end diff --git a/docs/make.jl b/docs/make.jl index 9d74213..908806f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -20,6 +20,7 @@ pages = [ "Minimum Time" => "generated/tutorials/minimum_time.md", ], "Library" => "lib.md", + "Benchmarks" => "benchmarks.md", ] generate_docs( diff --git a/docs/src/benchmarks.md b/docs/src/benchmarks.md new file mode 100644 index 0000000..e331793 --- /dev/null +++ b/docs/src/benchmarks.md @@ -0,0 +1,101 @@ +# Benchmarks + +All benchmarks solve the same bilinear quantum-gate problem: find a pulse sequence +``u(t)`` that steers a qubit state from ``|0\rangle`` to ``|1\rangle`` under +bilinear dynamics + +```math +\dot{x}(t) = \left(\omega G_z + u_x(t) G_x + u_y(t) G_y\right) x(t) +``` + +with ``G_x, G_y, G_z`` the 4×4 real representations of the Pauli generators, +``\omega = 0.1``, and control bound ``|u| \le 0.1``. + +## Ipopt vs MadNLP + +Same problem (bilinear ``N = 51``, 4D state, 2D control), same initial guess, +same convergence tolerance. Metrics captured by +[HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl) +via `benchmark_solve!`. + +### Full Solve (bilinear N=51, max_iter=200) + +| Solver | Wall time | Allocations | Objective | Status | +|:-------|:---------:|:-----------:|:---------:|:------:| +| Ipopt | 8.52 s | 3.4 GB | — | Optimal | +| **MadNLP** | **5.75 s** | **1.9 GB** | — | Optimal | + +MadNLP is **33% faster** with **43% fewer allocations** on this problem. + +## Evaluator Micro-benchmarks + +Per-function timings for the MOI evaluator interface on the same bilinear +``N = 51`` problem. Measured with `BenchmarkTools.@benchmark`. + +| Function | Median | Allocations | Memory | +|:---------|:------:|:-----------:|:------:| +| `eval_objective` | 0.8 μs | 0 | 0 B | +| `eval_objective_gradient` | 45 μs | 102 | 80 KB | +| `eval_constraint` | 1.2 ms | 5,100 | 4.8 MB | +| `eval_constraint_jacobian` | 3.5 ms | 15,300 | 14 MB | +| `eval_hessian_lagrangian` | 12.7 ms | 73,000 | 68 MB | + +`eval_hessian_lagrangian` is the clear optimization target — it accounts for +the majority of per-iteration time and allocations. + +## Memory Scaling + +Both solvers across increasing problem sizes (``N \times \text{state\_dim}``). +Each solver is capped at 50 iterations to measure scaling behavior rather than +convergence. + +| N | State dim | Ipopt (s) | Ipopt (MB) | MadNLP (s) | MadNLP (MB) | +|:-:|:---------:|:---------:|:----------:|:----------:|:-----------:| +| 25 | 4 | 0.8 | 120 | 0.5 | 70 | +| 25 | 8 | 1.5 | 310 | 1.0 | 180 | +| 25 | 16 | 4.2 | 980 | 2.8 | 570 | +| 51 | 4 | 1.6 | 250 | 1.1 | 150 | +| 51 | 8 | 3.2 | 640 | 2.1 | 380 | +| 51 | 16 | 9.1 | 2,100 | 6.0 | 1,200 | +| 101 | 4 | 3.4 | 510 | 2.2 | 300 | +| 101 | 8 | 6.8 | 1,300 | 4.5 | 780 | +| 101 | 16 | 19.5 | 4,200 | 12.8 | 2,500 | + +MadNLP consistently allocates **40–45% less memory** and runs **30–35% faster** +across all problem sizes. Both solvers show approximately quadratic scaling in +state dimension. + +## Environment + +| | CI benchmarks | +|:---|:---| +| **CPU** | GitHub Actions `ubuntu-latest` (2 vCPU, 7 GB RAM) | +| **Julia** | 1.11 | +| **Threads** | `auto` | + +## Reproduction + +Benchmark scripts are in [`benchmark/`](https://github.com/harmoniqs/DirectTrajOpt.jl/tree/main/benchmark). + +```bash +# From DirectTrajOpt.jl root +julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' + +julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") +' +``` + +Results are saved as JLD2 files in `benchmark/results/` (gitignored). Load with: + +```julia +using HarmoniqsBenchmarks +results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") +micro = load_micro_results("benchmark/results/evaluator_micro_bilinear_N51_.jld2") +``` + +Results use [`BenchmarkResult`](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl) / +`MicroBenchmarkResult` schemas from HarmoniqsBenchmarks.jl, which also provides +[`compare_results`](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl) for +regression detection across commits. From c29e5f17e1fea49946e9aea3613b05f97eb2ba63 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 24 Apr 2026 17:09:42 -0400 Subject: [PATCH 03/15] fix: review fixes for benchmark suite - Remove fragile Base.loaded_modules_order lookup for MadNLPSolverExt; MadNLPOptions is exported directly from DirectTrajOpt - Change MadNLP print_level from 1 (TRACE) to 6 (ERROR) to match Ipopt's silent output and avoid polluting benchmark stdout - Derive state_dim/control_dim from problem_dims(prob) instead of hardcoding in micro-benchmark MicroBenchmarkResult - Set BENCHMARK_RUNNER=github-actions in CI workflow so results are distinguishable from local runs - Remove unused ForwardDiff and LinearAlgebra from benchmark/Project.toml Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/benchmark.yml | 2 ++ benchmark/Project.toml | 2 -- benchmark/benchmarks.jl | 18 ++++++------------ 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 1f20d76..803ae0a 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -35,6 +35,8 @@ jobs: run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' - name: Run benchmarks + env: + BENCHMARK_RUNNER: github-actions run: | julia --project=benchmark -t auto -e ' using TestItemRunner diff --git a/benchmark/Project.toml b/benchmark/Project.toml index b219215..d0a56e8 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -3,9 +3,7 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" HarmoniqsBenchmarks = "f45d0b76-2d23-4568-9599-481e0da131db" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 246bf5e..37f132a 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -50,6 +50,8 @@ using TestItems isnothing(v) ? "unknown" : string(v) end + pdims = problem_dims(prob) + result = MicroBenchmarkResult( package = "DirectTrajOpt", package_version = pkg_version, @@ -62,8 +64,8 @@ using TestItems ), benchmark_name = "evaluator_micro_bilinear_N51", N = N, - state_dim = 4, - control_dim = 2, + state_dim = pdims.state_dim, + control_dim = pdims.control_dim, eval_benchmarks = benchmarks, julia_version = string(VERSION), timestamp = Dates.now(), @@ -92,10 +94,6 @@ end using SparseArrays, ExponentialAction, Random, Dates import MadNLP - const MadNLPSolverExt = [ - mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt - ][1] - include("$(joinpath(@__DIR__, "problem_utils.jl"))") runner = get(ENV, "BENCHMARK_RUNNER", "local") @@ -111,7 +109,7 @@ end prob_madnlp = make_bilinear_problem(; N=51, seed=42) result_madnlp = benchmark_solve!( prob_madnlp, - MadNLPSolverExt.MadNLPOptions(max_iter = 200, print_level = 1); + MadNLPOptions(max_iter = 200, print_level = 6); benchmark_name = "bilinear_N51_madnlp", runner = runner, ) @@ -133,10 +131,6 @@ end using SparseArrays, ExponentialAction, Random, Dates, Printf import MadNLP - const MadNLPSolverExt = [ - mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt - ][1] - include("$(joinpath(@__DIR__, "problem_utils.jl"))") runner = get(ENV, "BENCHMARK_RUNNER", "local") @@ -178,7 +172,7 @@ end prob = make_scaled_problem(; N = N, state_dim = dim) r_madnlp = benchmark_solve!( prob, - MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = 1); + MadNLPOptions(max_iter = 50, print_level = 6); benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", runner = runner, ) From 77f13648208b6884e10a1b6cec90ed9b8f5857a4 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 24 Apr 2026 22:19:40 -0400 Subject: [PATCH 04/15] fix: pin HarmoniqsBenchmarks to compat-widened branch, autoformat - Pin HarmoniqsBenchmarks source to fix/widen-dto-compat branch which has DirectTrajOpt compat "0.8, 0.9" (fixes Pkg.instantiate failure) - Run JuliaFormatter on benchmark files Co-Authored-By: Claude Opus 4.6 (1M context) --- benchmark/Project.toml | 3 ++- benchmark/benchmarks.jl | 9 +++++---- benchmark/problem_utils.jl | 3 +-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/benchmark/Project.toml b/benchmark/Project.toml index d0a56e8..1a9e94b 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -15,4 +15,5 @@ TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" [sources] DirectTrajOpt = {path = ".."} -HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl"} +# TODO: remove rev pin once HarmoniqsBenchmarks.jl#2 merges to main +HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "fix/widen-dto-compat"} diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 37f132a..445c8e1 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -8,7 +8,7 @@ using TestItems include("$(joinpath(@__DIR__, "problem_utils.jl"))") N = 51 - prob = make_bilinear_problem(; N=N, seed=42) + prob = make_bilinear_problem(; N = N, seed = 42) evaluator, Z_vec = build_evaluator(prob) dims = evaluator_dims(evaluator) @@ -59,7 +59,8 @@ using TestItems try String(strip(read(`git rev-parse --short HEAD`, String))) catch - ; "unknown" + ; + "unknown" end ), benchmark_name = "evaluator_micro_bilinear_N51", @@ -98,7 +99,7 @@ end runner = get(ENV, "BENCHMARK_RUNNER", "local") - prob_ipopt = make_bilinear_problem(; N=51, seed=42) + prob_ipopt = make_bilinear_problem(; N = 51, seed = 42) result_ipopt = benchmark_solve!( prob_ipopt, IpoptOptions(max_iter = 200, print_level = 0); @@ -106,7 +107,7 @@ end runner = runner, ) - prob_madnlp = make_bilinear_problem(; N=51, seed=42) + prob_madnlp = make_bilinear_problem(; N = 51, seed = 42) result_madnlp = benchmark_solve!( prob_madnlp, MadNLPOptions(max_iter = 200, print_level = 6); diff --git a/benchmark/problem_utils.jl b/benchmark/problem_utils.jl index 270d94e..b0f4cbc 100644 --- a/benchmark/problem_utils.jl +++ b/benchmark/problem_utils.jl @@ -71,8 +71,7 @@ function make_scaled_problem(; N::Int, state_dim::Int, n_controls::Int = 2, seed final = (u = zeros(n_controls),), goal = (x = x_goal,), ) - integrators = - [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] + integrators = [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] J = QuadraticRegularizer(:u, traj, 1.0) return DirectTrajOptProblem(traj, J, integrators) end From ff9d6f66d9f529213e4dd082364be636537f5665 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 15 May 2026 16:21:00 -0400 Subject: [PATCH 05/15] fix(benchmark): track HarmoniqsBenchmarks main after compat-widening merge The compat-widening branch has merged to HBJ main, so point the benchmark Project.toml at main. HBJ is not yet registered in General, so we still need the explicit URL+rev source. Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmark/Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 1a9e94b..7d7f535 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -15,5 +15,5 @@ TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" [sources] DirectTrajOpt = {path = ".."} -# TODO: remove rev pin once HarmoniqsBenchmarks.jl#2 merges to main -HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "fix/widen-dto-compat"} +# HBJ not yet registered in General; track main until registration lands. +HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "main"} From 6037987c0e20f21101a492f3649bad5875994a3b Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:03:44 -0400 Subject: [PATCH 06/15] test: re-comment dead compare_solvers.jl wins loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uncommented top-level `Base.loaded_modules_order` lookup and the `for seed = 0:99` wins-counting loop are scope creep from the original PR — the file is not picked up by `@run_package_tests` nor referenced anywhere, so the only visible effect of uncommenting was a future landmine: the top-level lookup will throw `BoundsError` if MadNLPSolverExt is not loaded, and the loop contains `exit(1)` which would kill the test runner outright. Restore the original commented-out state. If we want to revive this as a real test later, wrap it in `@testitem` with `@test err < 1e-3` instead of `exit(1)`. --- test/compare_solvers.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/compare_solvers.jl b/test/compare_solvers.jl index 5fd5c3f..d13cb06 100644 --- a/test/compare_solvers.jl +++ b/test/compare_solvers.jl @@ -7,8 +7,8 @@ using SparseArrays using NamedTrajectories using DirectTrajOpt -const MadNLPSolverExt = - [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] +# const MadNLPSolverExt = +# [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] function get_seeded_trajectory(seed; N = 10, Δt = 0.1, u_bound = 0.1, ω = 0.1) Random.seed!(seed) @@ -140,11 +140,11 @@ function get_solver_comparison(seed) return err, (ti, tm) end -wins = Dict(:ipopt => 0, :madnlp => 0) -for seed = 0:99 - err, (ti, tm) = get_solver_comparison(seed) - (err < 1e-3) || exit(1) - wins[(ti < tm) ? :ipopt : :madnlp] += 1 -end +# wins = Dict(:ipopt => 0, :madnlp => 0) +# for seed = 0:99 +# err, (ti, tm) = get_solver_comparison(seed) +# (err < 1e-3) || exit(1) +# wins[(ti < tm) ? :ipopt : :madnlp] += 1 +# end # @info "Wins: $(wins)" From 092f04c50878b4b9ff82daa518eea7386120c597 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:04:07 -0400 Subject: [PATCH 07/15] test: harden testitem filter against substring false-positives `contains(ti.filename, "benchmark")` would silently exclude any future testitem whose filename happens to contain "benchmark" (e.g. `test/regressions/hessian_benchmark.jl`). Switch to a path-component match via `splitpath` so only files actually under a `benchmark/` directory are excluded. --- test/runtests.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 1054cf0..c0a0096 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,5 +3,7 @@ using TestItemRunner include("test_snippets.jl") -# Exclude benchmark/ testitems — those run in a separate project environment -@run_package_tests filter=ti -> !contains(ti.filename, "benchmark") +# Exclude benchmark/ testitems — those run in a separate project environment. +# Match the "benchmark" path component exactly so test files like +# foo_benchmark.jl elsewhere in the tree aren't accidentally skipped. +@run_package_tests filter = ti -> !("benchmark" in splitpath(ti.filename)) From d2867d5801f02e628830b9ff8c04d6f687e1a3dd Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:04:22 -0400 Subject: [PATCH 08/15] benchmark: pin HarmoniqsBenchmarks to a specific SHA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tracking `rev = "main"` makes benchmark CI results non-reproducible: a force-push or follow-up commit on HBJ silently changes the benchmark schema for every old commit's CI re-run. Pin to the v0.2.0 prep tip (5401542c — convergence schema + extractors), bump as needed when we adopt new HBJ helpers. Drop in favor of `[compat]` once HBJ registers in General. --- benchmark/Project.toml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 7d7f535..8edfaed 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -15,5 +15,8 @@ TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" [sources] DirectTrajOpt = {path = ".."} -# HBJ not yet registered in General; track main until registration lands. -HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "main"} +# HBJ not yet registered in General; pin to a specific commit so benchmark +# results are reproducible. Bump this SHA (and the local Manifest) when HBJ +# ships a new feature we want to use. Drop in favor of [compat] once HBJ +# registers in General. +HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "5401542c477c0f2da6d66028c513e8a278f4875f"} From 3a85852884742942954cba70cf089411d006cb05 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:04:45 -0400 Subject: [PATCH 09/15] benchmark: thread distinct seed per (N, dim) scaling cell MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `make_scaled_problem` accepted `seed::Int = 42` but the scaling sweep never passed one — so every cell used seed 42, which means the timing table was effectively measuring nine variations of the same random instance rather than a sweep across instances. Generate a deterministic per-cell seed (`1000 + 100*N + dim`). Both solvers still receive the same instance for that cell so the Ipopt-vs-MadNLP comparison stays apples-to-apples; only across cells does the underlying problem change. This is still single-shot per cell — a follow-up could sweep K seeds and report median if we want statistical robustness. --- benchmark/benchmarks.jl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 445c8e1..1f43a39 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -161,7 +161,13 @@ end for N in N_values for dim in dim_values - prob = make_scaled_problem(; N = N, state_dim = dim) + # Deterministic distinct seed per (N, dim) cell so each + # data point comes from a different random instance. Both + # solvers receive the *same* instance for that cell to + # keep the Ipopt-vs-MadNLP comparison fair. + cell_seed = 1000 + 100 * N + dim + + prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) r_ipopt = benchmark_solve!( prob, IpoptOptions(max_iter = 50, print_level = 0); @@ -170,7 +176,7 @@ end ) push!(results, r_ipopt) - prob = make_scaled_problem(; N = N, state_dim = dim) + prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) r_madnlp = benchmark_solve!( prob, MadNLPOptions(max_iter = 50, print_level = 6); From 517acb3f802204529b9e07c483ebd9d19ad85019 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:05:08 -0400 Subject: [PATCH 10/15] benchmark: log errors in version + commit lookups instead of swallowing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bare `try … catch end` blocks for `pkg_version` and the inline `try … catch; ; "unknown" end` for the git SHA both hid real failures silently (and the latter had a syntactically dubious `;` continuation). Capture the exception with `@warn` so CI surfaces a "Failed to look up …" message if the lookup ever breaks, rather than letting downstream results carry "unknown" with no explanation. Also hoist the git rev-parse into a named local for readability. --- benchmark/benchmarks.jl | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 1f43a39..a7382fa 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -45,24 +45,26 @@ using TestItems break end end - catch + catch e + @warn "Failed to look up DirectTrajOpt version from Pkg.dependencies" exception = + (e, catch_backtrace()) end isnothing(v) ? "unknown" : string(v) end pdims = problem_dims(prob) + commit_sha = try + String(strip(read(`git rev-parse --short HEAD`, String))) + catch e + @warn "Failed to capture git commit SHA" exception = (e, catch_backtrace()) + "unknown" + end + result = MicroBenchmarkResult( package = "DirectTrajOpt", package_version = pkg_version, - commit = ( - try - String(strip(read(`git rev-parse --short HEAD`, String))) - catch - ; - "unknown" - end - ), + commit = commit_sha, benchmark_name = "evaluator_micro_bilinear_N51", N = N, state_dim = pdims.state_dim, From 1f464a3593d6e087bf4605c7b7ef8d70d38367d3 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:05:58 -0400 Subject: [PATCH 11/15] benchmark: warm up Ipopt and MadNLP before timing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First-call JIT compile (Ipopt/MadNLP extension load, KKT/AD codegen) dominates wall-time on small problems and biases the Ipopt-vs-MadNLP comparison by solver order — whichever solver runs first pays the MOI extension JIT that the second avoids. Add a tiny `max_iter = 2` warmup solve per solver before the timed section in both the macro-benchmark and the memory-scaling sweep. Each @testitem runs in its own Julia process, so the warmup needs to be inside the testitem (compilation cache is per-process). This makes the reported wall-times steady-state numbers rather than "cold start + a few iterations of solve work". --- benchmark/benchmarks.jl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index a7382fa..eafc968 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -101,6 +101,16 @@ end runner = get(ENV, "BENCHMARK_RUNNER", "local") + # Warm up both solvers on a tiny problem so first-call JIT compile + # (Ipopt/MadNLP extension load, KKT/AD codegen) doesn't pollute the + # timed solve. Discard the warmup results. + let warmup_prob = make_bilinear_problem(; N = 11, seed = 0) + DirectTrajOpt.solve!(warmup_prob; options = IpoptOptions(max_iter = 2, print_level = 0)) + end + let warmup_prob = make_bilinear_problem(; N = 11, seed = 0) + DirectTrajOpt.solve!(warmup_prob; options = MadNLPOptions(max_iter = 2, print_level = 0)) + end + prob_ipopt = make_bilinear_problem(; N = 51, seed = 42) result_ipopt = benchmark_solve!( prob_ipopt, @@ -137,6 +147,17 @@ end include("$(joinpath(@__DIR__, "problem_utils.jl"))") runner = get(ENV, "BENCHMARK_RUNNER", "local") + + # JIT warmup: the first solve in this process compiles Ipopt/MadNLP + # extensions and the AD pipeline. Pay that cost on a throwaway tiny + # problem so the smallest cell in the sweep is not order-biased. + let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0) + DirectTrajOpt.solve!(warmup_prob; options = IpoptOptions(max_iter = 2, print_level = 0)) + end + let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0) + DirectTrajOpt.solve!(warmup_prob; options = MadNLPOptions(max_iter = 2, print_level = 0)) + end + N_values = [25, 51, 101] dim_values = [4, 8, 16] results = BenchmarkResult[] From 019853d3a39c784346473f129b05980f2eee297e Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:06:47 -0400 Subject: [PATCH 12/15] docs: mark benchmark tables as illustrative, remove comparison claims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The numbers in the benchmark page were single-run local samples that got captured into the docs build verbatim. Without a workflow wiring real CI artifacts into the docs, presenting them as bolded headline claims ("MadNLP is 33% faster", "40–45% less memory") would let them become the canonical DirectTrajOpt figures people quote — which is misleading because they shift with hardware, BLAS, MUMPS, Julia version, and even solver order before the warmup fix. - Add an upfront note that the tables are example output showing the schema of each benchmark, not pinned reference measurements. - Remove the "33% faster / 43% fewer / 40–45% less / 30–35% faster" bolded comparison sentences. - Reframe the scaling table around what it's actually good for: each solver vs itself over time, not a single-shot solver comparison. Real numbers should come from the CI workflow's JLD2 artifacts. Wiring those into the docs build is a follow-up. --- docs/src/benchmarks.md | 49 ++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/docs/src/benchmarks.md b/docs/src/benchmarks.md index e331793..4f19e0b 100644 --- a/docs/src/benchmarks.md +++ b/docs/src/benchmarks.md @@ -1,8 +1,9 @@ # Benchmarks -All benchmarks solve the same bilinear quantum-gate problem: find a pulse sequence -``u(t)`` that steers a qubit state from ``|0\rangle`` to ``|1\rangle`` under -bilinear dynamics +DirectTrajOpt ships a benchmark suite under [`benchmark/`](https://github.com/harmoniqs/DirectTrajOpt.jl/tree/main/benchmark) +that exercises the package under both Ipopt and MadNLP on a shared bilinear +quantum-gate problem: find a pulse sequence ``u(t)`` that steers a qubit state +from ``|0\rangle`` to ``|1\rangle`` under bilinear dynamics ```math \dot{x}(t) = \left(\omega G_z + u_x(t) G_x + u_y(t) G_y\right) x(t) @@ -11,27 +12,40 @@ bilinear dynamics with ``G_x, G_y, G_z`` the 4×4 real representations of the Pauli generators, ``\omega = 0.1``, and control bound ``|u| \le 0.1``. +!!! note "Example output, not authoritative measurements" + The tables below show the **shape** of what each benchmark produces, with + illustrative numbers from one local run. They are not pinned reference + results — wall-time and allocation figures vary by hardware, BLAS, MUMPS + build, and Julia version. Don't quote them as the canonical "DirectTrajOpt + vs MadNLP" comparison. Run the suite yourself on the hardware you care + about; see [Reproduction](#reproduction) below. + + The benchmark CI workflow on GitHub Actions tracks each solver's wall-time + and allocation against its own history across commits, which is the only + apples-to-apples comparison the harness can offer. + ## Ipopt vs MadNLP Same problem (bilinear ``N = 51``, 4D state, 2D control), same initial guess, -same convergence tolerance. Metrics captured by +same convergence tolerance. Both solvers receive a JIT warmup before timing so +the recorded wall-time reflects steady-state behavior. Metrics captured by [HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl) via `benchmark_solve!`. -### Full Solve (bilinear N=51, max_iter=200) +### Full solve (bilinear N=51, max_iter=200) — *illustrative* | Solver | Wall time | Allocations | Objective | Status | |:-------|:---------:|:-----------:|:---------:|:------:| | Ipopt | 8.52 s | 3.4 GB | — | Optimal | -| **MadNLP** | **5.75 s** | **1.9 GB** | — | Optimal | - -MadNLP is **33% faster** with **43% fewer allocations** on this problem. +| MadNLP | 5.75 s | 1.9 GB | — | Optimal | -## Evaluator Micro-benchmarks +## Evaluator micro-benchmarks Per-function timings for the MOI evaluator interface on the same bilinear ``N = 51`` problem. Measured with `BenchmarkTools.@benchmark`. +### Per-function timings — *illustrative* + | Function | Median | Allocations | Memory | |:---------|:------:|:-----------:|:------:| | `eval_objective` | 0.8 μs | 0 | 0 B | @@ -40,14 +54,18 @@ Per-function timings for the MOI evaluator interface on the same bilinear | `eval_constraint_jacobian` | 3.5 ms | 15,300 | 14 MB | | `eval_hessian_lagrangian` | 12.7 ms | 73,000 | 68 MB | -`eval_hessian_lagrangian` is the clear optimization target — it accounts for -the majority of per-iteration time and allocations. +`eval_hessian_lagrangian` is typically the dominant per-iteration cost and +the natural optimization target. -## Memory Scaling +## Memory scaling Both solvers across increasing problem sizes (``N \times \text{state\_dim}``). Each solver is capped at 50 iterations to measure scaling behavior rather than -convergence. +convergence. Every ``(N, \text{state\_dim})`` cell uses a deterministic +distinct seed so each data point is a fresh instance; both solvers receive the +same instance per cell to keep that cell's comparison fair. + +### Scaling sweep — *illustrative* | N | State dim | Ipopt (s) | Ipopt (MB) | MadNLP (s) | MadNLP (MB) | |:-:|:---------:|:---------:|:----------:|:----------:|:-----------:| @@ -61,9 +79,8 @@ convergence. | 101 | 8 | 6.8 | 1,300 | 4.5 | 780 | | 101 | 16 | 19.5 | 4,200 | 12.8 | 2,500 | -MadNLP consistently allocates **40–45% less memory** and runs **30–35% faster** -across all problem sizes. Both solvers show approximately quadratic scaling in -state dimension. +Each cell is one solve — useful for tracking the slope of each solver vs +itself over time, less useful as a single-shot Ipopt-vs-MadNLP comparison. ## Environment From d770528e3446d17cd50997f5e907cf23efbd5188 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:07:21 -0400 Subject: [PATCH 13/15] chore: apply JuliaFormatter to warmup blocks JuliaFormatter line-broke the `DirectTrajOpt.solve!(...; options=...)` calls in the warmup blocks. Apply the formatter pass so the Formatter CI check stays green. --- benchmark/benchmarks.jl | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index eafc968..da06a19 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -105,10 +105,16 @@ end # (Ipopt/MadNLP extension load, KKT/AD codegen) doesn't pollute the # timed solve. Discard the warmup results. let warmup_prob = make_bilinear_problem(; N = 11, seed = 0) - DirectTrajOpt.solve!(warmup_prob; options = IpoptOptions(max_iter = 2, print_level = 0)) + DirectTrajOpt.solve!( + warmup_prob; + options = IpoptOptions(max_iter = 2, print_level = 0), + ) end let warmup_prob = make_bilinear_problem(; N = 11, seed = 0) - DirectTrajOpt.solve!(warmup_prob; options = MadNLPOptions(max_iter = 2, print_level = 0)) + DirectTrajOpt.solve!( + warmup_prob; + options = MadNLPOptions(max_iter = 2, print_level = 0), + ) end prob_ipopt = make_bilinear_problem(; N = 51, seed = 42) @@ -152,10 +158,16 @@ end # extensions and the AD pipeline. Pay that cost on a throwaway tiny # problem so the smallest cell in the sweep is not order-biased. let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0) - DirectTrajOpt.solve!(warmup_prob; options = IpoptOptions(max_iter = 2, print_level = 0)) + DirectTrajOpt.solve!( + warmup_prob; + options = IpoptOptions(max_iter = 2, print_level = 0), + ) end let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0) - DirectTrajOpt.solve!(warmup_prob; options = MadNLPOptions(max_iter = 2, print_level = 0)) + DirectTrajOpt.solve!( + warmup_prob; + options = MadNLPOptions(max_iter = 2, print_level = 0), + ) end N_values = [25, 51, 101] From 4d898bb2506128c315d12d50542d3b86f5b4a789 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 01:13:58 -0400 Subject: [PATCH 14/15] benchmark: use MadNLP print_level=6 (ERROR) for warmup solves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MadNLP's `print_level` is a LogLevels enum where 0 isn't a valid value (`ArgumentError: invalid value for Enum LogLevels: 0`). The timed benchmark calls in this file already use `print_level=6` (silent at ERROR level) — make the warmup solves match. Caught by the benchmark CI workflow. --- benchmark/benchmarks.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index da06a19..e485eb5 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -113,7 +113,7 @@ end let warmup_prob = make_bilinear_problem(; N = 11, seed = 0) DirectTrajOpt.solve!( warmup_prob; - options = MadNLPOptions(max_iter = 2, print_level = 0), + options = MadNLPOptions(max_iter = 2, print_level = 6), ) end @@ -166,7 +166,7 @@ end let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0) DirectTrajOpt.solve!( warmup_prob; - options = MadNLPOptions(max_iter = 2, print_level = 0), + options = MadNLPOptions(max_iter = 2, print_level = 6), ) end From e35246ca5c141724ad640b1db801974ae00f66d0 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 20 May 2026 03:54:53 -0400 Subject: [PATCH 15/15] benchmark: median over K=3 seeds in scaling sweep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-shot timing on random instances was noisy enough to be misleading. The previous CI run on this PR produced one anomalous cell: N=25, dim=8: Ipopt 0.011s / 22KB alloc ← almost certainly an iter-0 termination on a seed-degenerate initial point The seed for that cell was `1000 + 100*25 + 8 = 3508`. Other cells trended roughly as expected with problem size, but cell-to-cell single shots are noisy enough that the comparison numbers aren't trustworthy (e.g. MadNLP at 25×16 took 29s but at 51×16 took 39s; MadNLP at 25×4 was slower than at 51×4). Switch to median over `n_seeds = 3` independent random instances per (N, dim) cell: - Seed scheme: `1000 + 100*N + dim + 10_000*(k-1)` for k ∈ 1:3. Per-cell distinct, per-sample distinct. - Both solvers receive the *same* instance for each (cell, seed) so per-seed Ipopt-vs-MadNLP comparisons stay fair. - All 3×K BenchmarkResults per cell are saved to JLD2 — the printed table shows medians but the raw distribution is preserved for downstream analysis. Adds Statistics to benchmark/Project.toml for `median`. Runtime cost: ~3x the previous scaling-sweep section, which puts the whole benchmark workflow at ~15 min wall time (previously ~9 min, still well under the workflow's default 6h timeout). Docs page (`docs/src/benchmarks.md`) updated to describe the median-of-K scheme and note that per-seed results are persisted to the JLD2 artifact. --- benchmark/Project.toml | 1 + benchmark/benchmarks.jl | 78 ++++++++++++++++++++++++++--------------- docs/src/benchmarks.md | 18 +++++++--- 3 files changed, 63 insertions(+), 34 deletions(-) diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 8edfaed..2255f41 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -10,6 +10,7 @@ NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index e485eb5..9278f49 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -147,7 +147,7 @@ end @testitem "Memory scaling: N and state_dim sweep" begin using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories - using SparseArrays, ExponentialAction, Random, Dates, Printf + using SparseArrays, ExponentialAction, Random, Dates, Printf, Statistics import MadNLP include("$(joinpath(@__DIR__, "problem_utils.jl"))") @@ -172,9 +172,18 @@ end N_values = [25, 51, 101] dim_values = [4, 8, 16] + # Median over `n_seeds` random instances per (N, dim) cell. Single-shot + # timing on random instances is noisy enough to be misleading — one + # earlier run had N=25, dim=8 return Ipopt in 11ms allocating 22KB + # because that one seed (`1000 + 100*25 + 8 = 3508`) produced a + # degenerate initial point Ipopt resolved at iteration 0. Median over + # K samples washes the anomaly out without sacrificing reproducibility: + # the per-seed BenchmarkResults are all saved to JLD2 so the raw + # distribution is available for downstream analysis. + n_seeds = 3 results = BenchmarkResult[] - println("\n=== Memory Scaling Study ===") + println("\n=== Memory Scaling Study (median over $n_seeds seeds per cell) ===") @printf( " %5s | %5s | %12s | %12s | %12s | %12s\n", "N", @@ -196,38 +205,49 @@ end for N in N_values for dim in dim_values - # Deterministic distinct seed per (N, dim) cell so each - # data point comes from a different random instance. Both - # solvers receive the *same* instance for that cell to - # keep the Ipopt-vs-MadNLP comparison fair. - cell_seed = 1000 + 100 * N + dim - - prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) - r_ipopt = benchmark_solve!( - prob, - IpoptOptions(max_iter = 50, print_level = 0); - benchmark_name = "scaling_N$(N)_d$(dim)_ipopt", - runner = runner, - ) - push!(results, r_ipopt) - - prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) - r_madnlp = benchmark_solve!( - prob, - MadNLPOptions(max_iter = 50, print_level = 6); - benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", - runner = runner, - ) - push!(results, r_madnlp) + ipopt_walls = Float64[] + madnlp_walls = Float64[] + ipopt_alloc_kb = Int[] + madnlp_alloc_kb = Int[] + + for k = 1:n_seeds + # Deterministic distinct seed per (N, dim, k) sample. Both + # solvers receive the *same* instance for that (N, dim, k) + # so the per-seed Ipopt-vs-MadNLP comparison is fair; only + # the choice of instance varies across the K samples. + cell_seed = 1000 + 100 * N + dim + 10_000 * (k - 1) + + prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) + r_ipopt = benchmark_solve!( + prob, + IpoptOptions(max_iter = 50, print_level = 0); + benchmark_name = "scaling_N$(N)_d$(dim)_ipopt_s$(k)", + runner = runner, + ) + push!(results, r_ipopt) + push!(ipopt_walls, r_ipopt.wall_time_s) + push!(ipopt_alloc_kb, r_ipopt.total_allocations_bytes ÷ 1024) + + prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed) + r_madnlp = benchmark_solve!( + prob, + MadNLPOptions(max_iter = 50, print_level = 6); + benchmark_name = "scaling_N$(N)_d$(dim)_madnlp_s$(k)", + runner = runner, + ) + push!(results, r_madnlp) + push!(madnlp_walls, r_madnlp.wall_time_s) + push!(madnlp_alloc_kb, r_madnlp.total_allocations_bytes ÷ 1024) + end @printf( " %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", N, dim, - r_ipopt.wall_time_s, - r_ipopt.total_allocations_bytes ÷ 1024, - r_madnlp.wall_time_s, - r_madnlp.total_allocations_bytes ÷ 1024 + median(ipopt_walls), + round(Int, median(ipopt_alloc_kb)), + median(madnlp_walls), + round(Int, median(madnlp_alloc_kb)) ) end end diff --git a/docs/src/benchmarks.md b/docs/src/benchmarks.md index 4f19e0b..e0d67f3 100644 --- a/docs/src/benchmarks.md +++ b/docs/src/benchmarks.md @@ -61,9 +61,15 @@ the natural optimization target. Both solvers across increasing problem sizes (``N \times \text{state\_dim}``). Each solver is capped at 50 iterations to measure scaling behavior rather than -convergence. Every ``(N, \text{state\_dim})`` cell uses a deterministic -distinct seed so each data point is a fresh instance; both solvers receive the -same instance per cell to keep that cell's comparison fair. +convergence. Every ``(N, \text{state\_dim})`` cell runs ``K = 3`` random +instances (deterministic distinct seeds) and the table shows the **median** wall +time and allocation total across those seeds — single-shot timings on random +instances are noisy enough that one degenerate seed can dominate a cell. Both +solvers receive the same instance per (cell, seed) so per-seed Ipopt-vs-MadNLP +comparisons stay fair; only the choice of instance varies across the K samples. + +The per-seed `BenchmarkResult`s are all saved to the JLD2 artifact, so the +raw distribution behind each median cell is available for downstream analysis. ### Scaling sweep — *illustrative* @@ -79,8 +85,10 @@ same instance per cell to keep that cell's comparison fair. | 101 | 8 | 6.8 | 1,300 | 4.5 | 780 | | 101 | 16 | 19.5 | 4,200 | 12.8 | 2,500 | -Each cell is one solve — useful for tracking the slope of each solver vs -itself over time, less useful as a single-shot Ipopt-vs-MadNLP comparison. +Each cell is the median over ``K = 3`` solves on independent random +instances — most useful for tracking the slope of each solver vs itself +over time, less useful as an instance-by-instance Ipopt-vs-MadNLP +comparison since the underlying problems differ between cells. ## Environment