From 0dc263a70754e703de4810cfecfbddcd821850b8 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 15 Apr 2026 01:00:39 -0400 Subject: [PATCH 01/13] Add benchmark environment with evaluator and solver @testitems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sets up benchmark/ with Project.toml, .gitignore, and three @testitem benchmarks: evaluator micro-benchmarks, Ipopt vs MadNLP comparison, and memory scaling sweep (N × state_dim grid). Co-Authored-By: Claude Opus 4.6 (1M context) --- benchmark/.gitignore | 1 + benchmark/Project.toml | 16 +++++ benchmark/benchmarks.jl | 155 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 benchmark/.gitignore create mode 100644 benchmark/Project.toml create mode 100644 benchmark/benchmarks.jl diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 0000000..fbca225 --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1 @@ +results/ diff --git a/benchmark/Project.toml b/benchmark/Project.toml new file mode 100644 index 0000000..9782442 --- /dev/null +++ b/benchmark/Project.toml @@ -0,0 +1,16 @@ +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" +ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +HarmoniqsBenchmarks = "f45d0b76-2d23-4568-9599-481e0da131db" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" +MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" +TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl new file mode 100644 index 0000000..a7ecc6f --- /dev/null +++ b/benchmark/benchmarks.jl @@ -0,0 +1,155 @@ +using TestItems + +@testitem "Evaluator micro-benchmarks: bilinear N=51" begin + using HarmoniqsBenchmarks, BenchmarkTools, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, MathOptInterface, Random, Dates, Printf + const MOI = MathOptInterface + + Random.seed!(42) + N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + (x=2rand(4,N).-1, u=u_bound*(2rand(2,N).-1), du=randn(2,N), ddu=randn(2,N), Δt=fill(Δt,N)); + controls=(:ddu,:Δt), timestep=:Δt, bounds=(u=u_bound, Δt=(0.01,0.5)), + initial=(x=[1.0,0.0,0.0,0.0], u=zeros(2)), final=(u=zeros(2),), + goal=(x=[0.0,1.0,0.0,0.0],), + ) + integrators = [BilinearIntegrator(G,:x,:u,traj), DerivativeIntegrator(:u,:du,traj), DerivativeIntegrator(:du,:ddu,traj)] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + prob = DirectTrajOptProblem(traj, J, integrators) + + evaluator, Z_vec = build_evaluator(prob) + dims = evaluator_dims(evaluator) + + g = zeros(dims.n_constraints) + grad = zeros(dims.n_variables) + H = zeros(dims.n_hessian_entries) + Jac = zeros(dims.n_jacobian_entries) + sigma = 1.0 + mu = ones(dims.n_constraints) + + benchmarks = Dict{Symbol,EvalBenchmark}( + :eval_objective => trial_to_eval_benchmark(@benchmark(MOI.eval_objective($evaluator, $Z_vec))), + :eval_gradient => trial_to_eval_benchmark(@benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec))), + :eval_constraint => trial_to_eval_benchmark(@benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec))), + :eval_jacobian => trial_to_eval_benchmark(@benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec))), + :eval_hessian_lagrangian => trial_to_eval_benchmark(@benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu))), + ) + + result = MicroBenchmarkResult( + package="DirectTrajOpt", package_version="0.8.10", + commit=(try String(strip(read(`git rev-parse --short HEAD`, String))) catch; "unknown" end), + benchmark_name="evaluator_micro_bilinear_N51", N=N, state_dim=4, control_dim=2, + eval_benchmarks=benchmarks, julia_version=string(VERSION), + timestamp=Dates.now(), runner=get(ENV, "BENCHMARK_RUNNER", "local"), n_threads=Threads.nthreads(), + ) + + println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===") + for (name, eb) in sort(collect(result.eval_benchmarks), by=first) + @printf(" %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", name, eb.median_ns, eb.allocs, eb.memory_bytes) + end + + results_dir = joinpath(@__DIR__, "results") + save_micro_results(results_dir, result.benchmark_name, result) + println(" Saved to $results_dir/") +end + +@testitem "Ipopt vs MadNLP: bilinear N=51" begin + using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, Random, Dates + import MadNLP + + const MadNLPSolverExt = [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] + + function make_bilinear_problem(; seed=42) + Random.seed!(seed) + N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + (x=2rand(4,N).-1, u=u_bound*(2rand(2,N).-1), du=randn(2,N), ddu=randn(2,N), Δt=fill(Δt,N)); + controls=(:ddu,:Δt), timestep=:Δt, bounds=(u=u_bound, Δt=(0.01,0.5)), + initial=(x=[1.0,0.0,0.0,0.0], u=zeros(2)), final=(u=zeros(2),), + goal=(x=[0.0,1.0,0.0,0.0],), + ) + integrators = [BilinearIntegrator(G,:x,:u,traj), DerivativeIntegrator(:u,:du,traj), DerivativeIntegrator(:du,:ddu,traj)] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) + end + + prob_ipopt = make_bilinear_problem() + result_ipopt = benchmark_solve!(prob_ipopt, IpoptOptions(max_iter=200, print_level=0); benchmark_name="bilinear_N51_ipopt") + + prob_madnlp = make_bilinear_problem() + result_madnlp = benchmark_solve!(prob_madnlp, MadNLPSolverExt.MadNLPOptions(max_iter=200, print_level=1); benchmark_name="bilinear_N51_madnlp") + + println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") + println(" Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc") + println(" MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc") + + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp]) +end + +@testitem "Memory scaling: N and state_dim sweep" begin + using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories + using SparseArrays, ExponentialAction, Random, Dates, Printf + import MadNLP + + const MadNLPSolverExt = [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] + + function make_scaled_problem(; N, state_dim, n_controls=2, seed=42) + Random.seed!(seed) + G_drift = sparse(randn(state_dim, state_dim)) + G_drives = [sparse(randn(state_dim, state_dim)) for _ in 1:n_controls] + G(u) = G_drift + sum(u[i] * G_drives[i] for i in 1:n_controls) + + x_init = zeros(state_dim); x_init[1] = 1.0 + x_goal = zeros(state_dim); x_goal[min(2,state_dim)] = 1.0 + + traj = NamedTrajectory( + (x=randn(state_dim,N), u=0.1*randn(n_controls,N), du=randn(n_controls,N), Δt=fill(0.1,N)); + controls=(:du,:Δt), timestep=:Δt, bounds=(u=1.0, Δt=(0.01,0.5)), + initial=(x=x_init, u=zeros(n_controls)), final=(u=zeros(n_controls),), + goal=(x=x_goal,), + ) + integrators = [BilinearIntegrator(G,:x,:u,traj), DerivativeIntegrator(:u,:du,traj)] + J = QuadraticRegularizer(:u, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) + end + + N_values = [25, 51, 101] + dim_values = [4, 8, 16] + results = BenchmarkResult[] + + println("\n=== Memory Scaling Study ===") + @printf(" %5s | %5s | %12s | %12s | %12s | %12s\n", "N", "dim", "Ipopt (s)", "Ipopt (KB)", "MadNLP (s)", "MadNLP (KB)") + @printf(" %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", "-"^5, "-"^5, "-"^12, "-"^12, "-"^12, "-"^12) + + for N in N_values + for dim in dim_values + prob = make_scaled_problem(; N=N, state_dim=dim) + r_ipopt = benchmark_solve!(prob, IpoptOptions(max_iter=50, print_level=0); benchmark_name="scaling_N$(N)_d$(dim)_ipopt") + push!(results, r_ipopt) + + prob = make_scaled_problem(; N=N, state_dim=dim) + r_madnlp = benchmark_solve!(prob, MadNLPSolverExt.MadNLPOptions(max_iter=50, print_level=1); benchmark_name="scaling_N$(N)_d$(dim)_madnlp") + push!(results, r_madnlp) + + @printf(" %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", + N, dim, r_ipopt.wall_time_s, r_ipopt.total_allocations_bytes ÷ 1024, + r_madnlp.wall_time_s, r_madnlp.total_allocations_bytes ÷ 1024) + end + end + + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "memory_scaling", results) + println("\n Saved $(length(results)) results to $results_dir/") +end From 354cabc0f3ad4e0a50befebedce7b1536a77db63 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 15 Apr 2026 16:35:31 -0400 Subject: [PATCH 02/13] docs: add benchmarking spec and implementation plan Specs for: - Overall HarmoniqsBenchmarks.jl architecture - Altissimo GPU benchmarks (3-way: Ipopt CPU / MadNLP-GPU / Altissimo-GPU) - Implementation plan for DirectTrajOpt + HarmoniqsBenchmarks tasks --- .../2026-04-15-benchmarking-infrastructure.md | 1620 +++++++++++++++++ ...6-04-15-altissimo-gpu-benchmarks-design.md | 198 ++ .../specs/2026-04-15-benchmarking-design.md | 383 ++++ 3 files changed, 2201 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md create mode 100644 docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md create mode 100644 docs/superpowers/specs/2026-04-15-benchmarking-design.md diff --git a/docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md b/docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md new file mode 100644 index 0000000..99dee3f --- /dev/null +++ b/docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md @@ -0,0 +1,1620 @@ +# HarmoniqsBenchmarks.jl + DirectTrajOpt Benchmark Suite — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Create a shared benchmarking package (`HarmoniqsBenchmarks.jl`) and wire up the first benchmark suite in DirectTrajOpt.jl comparing Ipopt vs MadNLP, with micro-benchmarks, full-solve benchmarks, and memory scaling studies. + +**Architecture:** HarmoniqsBenchmarks.jl provides schema types, a profiling harness, and JLD2 storage/comparison. DirectTrajOpt.jl's `benchmark/` directory contains `@testitem`-based benchmarks that use the shared harness. Both Ipopt and MadNLP benchmarks use the same shared `Evaluator` (in `src/solvers/evaluator.jl`), so micro-benchmarks are solver-agnostic while macro-benchmarks compare the two solver backends. + +**Tech Stack:** Julia 1.11+, BenchmarkTools.jl, JLD2.jl, TestItems/TestItemRunner, MathOptInterface + +**Spec:** `docs/superpowers/specs/2026-04-15-benchmarking-design.md` + +--- + +## File Structure + +### New repo: `HarmoniqsBenchmarks.jl` (at `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/`) + +| File | Responsibility | +|------|---------------| +| `Project.toml` | Package metadata + deps (BenchmarkTools, JLD2, Dates, DirectTrajOpt, MathOptInterface, NamedTrajectories) | +| `src/HarmoniqsBenchmarks.jl` | Module definition + exports | +| `src/schema.jl` | `BenchmarkResult`, `MicroBenchmarkResult`, `EvalBenchmark` structs | +| `src/harness.jl` | `build_evaluator`, `benchmark_solve!`, GC/allocation capture | +| `src/storage.jl` | `save_results`, `save_micro_results`, `load_results`, `load_micro_results` | +| `src/report.jl` | `compare_results` — diff tables + regression flagging | +| `test/runtests.jl` | Tests for all of the above | + +### Modified repo: `DirectTrajOpt.jl` (benchmark directory) + +| File | Responsibility | +|------|---------------| +| `benchmark/Project.toml` | Benchmark env deps (HarmoniqsBenchmarks, BenchmarkTools, TestItems, MadNLP) | +| `benchmark/benchmarks.jl` | `@testitem` definitions: micro, macro, scaling | +| `benchmark/.gitignore` | Ignore `results/` directory | + +--- + +## Task 1: Create HarmoniqsBenchmarks.jl Project Skeleton + +**Files:** +- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/Project.toml` +- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` + +- [ ] **Step 1: Initialize the package directory** + +```bash +mkdir -p /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src +mkdir -p /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +git init +``` + +- [ ] **Step 2: Create Project.toml** + +```toml +name = "HarmoniqsBenchmarks" +uuid = "GENERATE_UUID" +version = "0.1.0" +authors = ["harmoniqs contributors"] + +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" + +[compat] +BenchmarkTools = "1.6" +Dates = "1.10, 1.11, 1.12" +DirectTrajOpt = "0.8" +JLD2 = "0.5" +MathOptInterface = "1.49" +NamedTrajectories = "0.8" +julia = "1.10, 1.11, 1.12" +``` + +Generate the UUID with: `using UUIDs; uuid4()` + +- [ ] **Step 3: Create module stub** + +```julia +# src/HarmoniqsBenchmarks.jl +module HarmoniqsBenchmarks + +end +``` + +- [ ] **Step 4: Dev-install dependencies and verify the package loads** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e ' + using Pkg + Pkg.develop(path="../DirectTrajOpt.jl") + Pkg.develop(path="../NamedTrajectories.jl") + Pkg.instantiate() + using HarmoniqsBenchmarks + println("Package loads OK") +' +``` + +Expected: "Package loads OK" + +- [ ] **Step 5: Commit** + +```bash +git add Project.toml src/HarmoniqsBenchmarks.jl +git commit -m "feat: initialize HarmoniqsBenchmarks.jl package skeleton" +``` + +--- + +## Task 2: Implement Schema Types + +**Files:** +- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/schema.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` +- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` + +- [ ] **Step 1: Write tests for schema types** + +```julia +# test/runtests.jl +using Test +using HarmoniqsBenchmarks +using Dates + +@testset "HarmoniqsBenchmarks" begin + +@testset "Schema" begin + @testset "EvalBenchmark construction" begin + eb = EvalBenchmark( + times_ns = [100.0, 110.0, 105.0], + gctimes_ns = [0.0, 0.0, 5.0], + memory_bytes = 1024, + allocs = 3, + ) + @test eb.median_ns == 105.0 + @test eb.min_ns == 100.0 + @test 104.0 < eb.mean_ns < 106.0 + end + + @testset "BenchmarkResult construction" begin + r = BenchmarkResult( + package = "DirectTrajOpt", + package_version = "0.8.10", + commit = "abc1234", + benchmark_name = "test_bench", + N = 51, + state_dim = 4, + control_dim = 2, + n_constraints = 200, + n_variables = 765, + wall_time_s = 1.5, + iterations = 42, + objective_value = 0.001, + constraint_violation = 1e-8, + solver_status = :Optimal, + solver = "ipopt", + total_allocations_bytes = 1_000_000, + total_allocs_count = 500, + gc_time_ns = 10_000, + gc_count = 2, + gc_full_count = 0, + solver_options = Dict{Symbol,Any}(:tol => 1e-8, :max_iter => 1000), + julia_version = string(VERSION), + timestamp = now(), + runner = "local", + n_threads = 1, + ) + @test r.package == "DirectTrajOpt" + @test r.solver_status == :Optimal + end + + @testset "MicroBenchmarkResult construction" begin + eb = EvalBenchmark( + times_ns = [100.0], + gctimes_ns = [0.0], + memory_bytes = 0, + allocs = 0, + ) + mr = MicroBenchmarkResult( + package = "DirectTrajOpt", + package_version = "0.8.10", + commit = "abc1234", + benchmark_name = "micro_test", + N = 51, + state_dim = 4, + control_dim = 2, + eval_benchmarks = Dict{Symbol,EvalBenchmark}( + :eval_objective => eb, + ), + julia_version = string(VERSION), + timestamp = now(), + runner = "local", + n_threads = 1, + ) + @test mr.eval_benchmarks[:eval_objective].min_ns == 100.0 + end +end + +end # HarmoniqsBenchmarks testset +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: FAIL — `EvalBenchmark` not defined + +- [ ] **Step 3: Implement schema types** + +```julia +# src/schema.jl +using Dates +using Statistics: median, mean + +struct EvalBenchmark + times_ns::Vector{Float64} + gctimes_ns::Vector{Float64} + memory_bytes::Int + allocs::Int + # Derived stats (computed at construction) + median_ns::Float64 + min_ns::Float64 + mean_ns::Float64 +end + +function EvalBenchmark(; + times_ns::Vector{Float64}, + gctimes_ns::Vector{Float64}, + memory_bytes::Int, + allocs::Int, +) + return EvalBenchmark( + times_ns, + gctimes_ns, + memory_bytes, + allocs, + median(times_ns), + minimum(times_ns), + mean(times_ns), + ) +end + +struct BenchmarkResult + # Identity + package::String + package_version::String + commit::String + benchmark_name::String + # Problem dimensions + N::Int + state_dim::Int + control_dim::Int + n_constraints::Int + n_variables::Int + # Solve metrics + wall_time_s::Float64 + iterations::Int + objective_value::Float64 + constraint_violation::Float64 + solver_status::Symbol + solver::String + # Memory & allocations + total_allocations_bytes::Int + total_allocs_count::Int + gc_time_ns::Int + gc_count::Int + gc_full_count::Int + # Solver options snapshot + solver_options::Dict{Symbol,Any} + # Metadata + julia_version::String + timestamp::DateTime + runner::String + n_threads::Int +end + +struct MicroBenchmarkResult + package::String + package_version::String + commit::String + benchmark_name::String + N::Int + state_dim::Int + control_dim::Int + eval_benchmarks::Dict{Symbol,EvalBenchmark} + julia_version::String + timestamp::DateTime + runner::String + n_threads::Int +end +``` + +- [ ] **Step 4: Update module to include schema and export types** + +```julia +# src/HarmoniqsBenchmarks.jl +module HarmoniqsBenchmarks + +export EvalBenchmark, BenchmarkResult, MicroBenchmarkResult + +include("schema.jl") + +end +``` + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: All tests PASS + +- [ ] **Step 6: Commit** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +git add src/schema.jl src/HarmoniqsBenchmarks.jl test/runtests.jl +git commit -m "feat: add BenchmarkResult, MicroBenchmarkResult, EvalBenchmark schema types" +``` + +--- + +## Task 3: Implement JLD2 Storage + +**Files:** +- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/storage.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` + +- [ ] **Step 1: Add storage tests** + +Append to `test/runtests.jl`, inside the top-level `@testset "HarmoniqsBenchmarks"`: + +```julia +@testset "Storage" begin + mktempdir() do dir + r = BenchmarkResult( + package = "DirectTrajOpt", + package_version = "0.8.10", + commit = "abc1234", + benchmark_name = "storage_test", + N = 51, state_dim = 4, control_dim = 2, + n_constraints = 200, n_variables = 765, + wall_time_s = 1.5, iterations = 42, + objective_value = 0.001, constraint_violation = 1e-8, + solver_status = :Optimal, solver = "ipopt", + total_allocations_bytes = 1_000_000, total_allocs_count = 500, + gc_time_ns = 10_000, gc_count = 2, gc_full_count = 0, + solver_options = Dict{Symbol,Any}(:tol => 1e-8), + julia_version = string(VERSION), + timestamp = now(), runner = "local", n_threads = 1, + ) + + path = save_results(dir, "test_bench", [r]) + @test isfile(path) + @test endswith(path, ".jld2") + + loaded = load_results(path) + @test length(loaded) == 1 + @test loaded[1].package == "DirectTrajOpt" + @test loaded[1].wall_time_s == 1.5 + @test loaded[1].solver_options[:tol] == 1e-8 + end + + mktempdir() do dir + eb = EvalBenchmark( + times_ns = [100.0, 110.0], + gctimes_ns = [0.0, 0.0], + memory_bytes = 512, allocs = 1, + ) + mr = MicroBenchmarkResult( + package = "DirectTrajOpt", + package_version = "0.8.10", + commit = "abc1234", + benchmark_name = "micro_storage_test", + N = 51, state_dim = 4, control_dim = 2, + eval_benchmarks = Dict(:eval_objective => eb), + julia_version = string(VERSION), + timestamp = now(), runner = "local", n_threads = 1, + ) + + path = save_micro_results(dir, "micro_test", mr) + @test isfile(path) + + loaded = load_micro_results(path) + @test loaded.benchmark_name == "micro_storage_test" + @test loaded.eval_benchmarks[:eval_objective].min_ns == 100.0 + end +end +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: FAIL — `save_results` not defined + +- [ ] **Step 3: Implement storage functions** + +```julia +# src/storage.jl +using JLD2 + +""" + save_results(dir, name, results::Vector{BenchmarkResult}) -> String + +Save benchmark results to a JLD2 file in `dir`. Returns the file path. +""" +function save_results(dir::String, name::String, results::Vector{BenchmarkResult}) + mkpath(dir) + commit = isempty(results) ? "unknown" : results[1].commit + filename = "$(name)_$(commit).jld2" + path = joinpath(dir, filename) + JLD2.jldsave(path; results=results) + return path +end + +""" + load_results(path) -> Vector{BenchmarkResult} + +Load benchmark results from a JLD2 file. +""" +function load_results(path::String) + return JLD2.load(path, "results") +end + +""" + save_micro_results(dir, name, result::MicroBenchmarkResult) -> String + +Save micro-benchmark results to a JLD2 file in `dir`. Returns the file path. +""" +function save_micro_results(dir::String, name::String, result::MicroBenchmarkResult) + mkpath(dir) + filename = "$(name)_$(result.commit).jld2" + path = joinpath(dir, filename) + JLD2.jldsave(path; result=result) + return path +end + +""" + load_micro_results(path) -> MicroBenchmarkResult + +Load micro-benchmark results from a JLD2 file. +""" +function load_micro_results(path::String) + return JLD2.load(path, "result") +end +``` + +- [ ] **Step 4: Update module** + +```julia +# src/HarmoniqsBenchmarks.jl +module HarmoniqsBenchmarks + +export EvalBenchmark, BenchmarkResult, MicroBenchmarkResult +export save_results, load_results, save_micro_results, load_micro_results + +include("schema.jl") +include("storage.jl") + +end +``` + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: All tests PASS + +- [ ] **Step 6: Commit** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +git add src/storage.jl src/HarmoniqsBenchmarks.jl test/runtests.jl +git commit -m "feat: add JLD2 save/load for BenchmarkResult and MicroBenchmarkResult" +``` + +--- + +## Task 4: Implement build_evaluator Harness + +**Files:** +- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/harness.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` + +- [ ] **Step 1: Add test for build_evaluator** + +Append to `test/runtests.jl`, inside top-level testset: + +```julia +@testset "Harness" begin + using DirectTrajOpt + using NamedTrajectories + using SparseArrays + using ExponentialAction + using MathOptInterface + const MOI = MathOptInterface + + # Build a simple bilinear problem (same as DirectTrajOpt test_utils.jl) + N = 10; Δt = 0.1; u_bound = 0.1; ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound * (2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] + + J = QuadraticRegularizer(:u, traj, 1.0) + prob = DirectTrajOptProblem(traj, J, integrators) + + @testset "build_evaluator returns evaluator and Z vector" begin + evaluator, Z_vec = build_evaluator(prob) + @test evaluator isa MOI.AbstractNLPEvaluator + @test length(Z_vec) == traj.dim * traj.N + traj.global_dim + + # Verify eval functions are callable + obj = MOI.eval_objective(evaluator, Z_vec) + @test obj isa Float64 + @test isfinite(obj) + end + + @testset "evaluator_dims returns correct sizes" begin + evaluator, Z_vec = build_evaluator(prob) + dims = evaluator_dims(evaluator) + @test dims.n_constraints == evaluator.n_constraints + @test dims.n_variables == length(Z_vec) + @test dims.n_jacobian_entries == length(evaluator.jacobian_structure) + @test dims.n_hessian_entries == length(evaluator.hessian_structure) + end +end +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: FAIL — `build_evaluator` not defined + +- [ ] **Step 3: Implement build_evaluator and evaluator_dims** + +```julia +# src/harness.jl +using DirectTrajOpt +using NamedTrajectories +using MathOptInterface +const MOI = MathOptInterface + +""" + build_evaluator(prob::DirectTrajOptProblem; eval_hessian=true) -> (evaluator, Z_vec) + +Extract a MOI evaluator and the initial decision variable vector from a +DirectTrajOptProblem. Used for micro-benchmarking individual eval functions. + +Returns: +- `evaluator`: An `MOI.AbstractNLPEvaluator` ready for `MOI.eval_*` calls +- `Z_vec`: The flat decision variable vector `[trajectory_data; global_data]` +""" +function build_evaluator(prob::DirectTrajOpt.Problems.DirectTrajOptProblem; eval_hessian::Bool=true) + evaluator = DirectTrajOpt.Solvers.Evaluator(prob; eval_hessian=eval_hessian, verbose=false) + traj = prob.trajectory + Z_vec = vcat(collect(traj.datavec), collect(traj.global_data)) + return evaluator, Z_vec +end + +""" + evaluator_dims(evaluator) -> NamedTuple + +Return key dimensions of the evaluator for buffer pre-allocation. +""" +function evaluator_dims(evaluator::DirectTrajOpt.Solvers.Evaluator) + return ( + n_constraints = evaluator.n_constraints, + n_variables = evaluator.trajectory.dim * evaluator.trajectory.N + evaluator.trajectory.global_dim, + n_jacobian_entries = length(evaluator.jacobian_structure), + n_hessian_entries = length(evaluator.hessian_structure), + ) +end +``` + +- [ ] **Step 4: Update module** + +```julia +# src/HarmoniqsBenchmarks.jl +module HarmoniqsBenchmarks + +export EvalBenchmark, BenchmarkResult, MicroBenchmarkResult +export save_results, load_results, save_micro_results, load_micro_results +export build_evaluator, evaluator_dims + +include("schema.jl") +include("storage.jl") +include("harness.jl") + +end +``` + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: All tests PASS + +- [ ] **Step 6: Commit** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +git add src/harness.jl src/HarmoniqsBenchmarks.jl test/runtests.jl +git commit -m "feat: add build_evaluator and evaluator_dims harness functions" +``` + +--- + +## Task 5: Implement benchmark_solve! Harness + +**Files:** +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/harness.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` + +- [ ] **Step 1: Add test for benchmark_solve!** + +Append inside the `@testset "Harness"` block in `test/runtests.jl`: + +```julia +@testset "benchmark_solve! captures metrics" begin + # Rebuild a fresh problem (solve! mutates in place) + traj2 = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound * (2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + integrators2 = [ + BilinearIntegrator(G, :x, :u, traj2), + DerivativeIntegrator(:u, :du, traj2), + DerivativeIntegrator(:du, :ddu, traj2), + ] + J2 = QuadraticRegularizer(:u, traj2, 1.0) + prob2 = DirectTrajOptProblem(traj2, J2, integrators2) + + result = benchmark_solve!( + prob2, IpoptOptions(max_iter=10, print_level=0); + benchmark_name = "test_solve", + ) + + @test result isa BenchmarkResult + @test result.package == "DirectTrajOpt" + @test result.solver == "ipopt" + @test result.wall_time_s > 0.0 + @test result.iterations >= 0 + @test result.total_allocations_bytes >= 0 + @test result.gc_count >= 0 + @test result.N == N + @test result.state_dim == 4 + @test haskey(result.solver_options, :max_iter) + @test result.solver_options[:max_iter] == 10 +end +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: FAIL — `benchmark_solve!` not defined + +- [ ] **Step 3: Implement benchmark_solve!** + +Append to `src/harness.jl`: + +```julia +using Dates + +""" + benchmark_solve!(prob, options; benchmark_name, runner="local", kwargs...) -> BenchmarkResult + +Run `solve!(prob; options, kwargs...)` and capture timing, memory, GC stats, and solver options. +""" +function benchmark_solve!( + prob::DirectTrajOpt.Problems.DirectTrajOptProblem, + options::DirectTrajOpt.Solvers.AbstractSolverOptions; + benchmark_name::String = "unnamed", + runner::String = "local", + verbose::Bool = false, + kwargs..., +) + traj = prob.trajectory + + # Capture problem dimensions before solve + n_vars = traj.dim * traj.N + traj.global_dim + state_dim = _infer_state_dim(prob) + control_dim = _infer_control_dim(prob) + n_constraints_total = _count_constraints(prob, options) + + # Snapshot solver options + opts_snapshot = Dict{Symbol,Any}() + for name in fieldnames(typeof(options)) + opts_snapshot[name] = getfield(options, name) + end + + # GC baseline + GC.gc() + gc_before = Base.gc_num() + + # Timed solve + timed = @timed solve!(prob; options=options, verbose=verbose, kwargs...) + + gc_after = Base.gc_num() + + # Compute GC deltas + gc_time = timed.gctime # in seconds, convert to ns + gc_count_delta = gc_after.pause - gc_before.pause + gc_full_delta = gc_after.full_sweep - gc_before.full_sweep + + # Package version from Project.toml + pkg_version = _get_package_version("DirectTrajOpt") + commit = _get_git_commit() + + return BenchmarkResult( + package = "DirectTrajOpt", + package_version = pkg_version, + commit = commit, + benchmark_name = benchmark_name, + N = traj.N, + state_dim = state_dim, + control_dim = control_dim, + n_constraints = n_constraints_total, + n_variables = n_vars, + wall_time_s = timed.time, + iterations = -1, # TODO: extract from solver output when available + objective_value = NaN, # TODO: extract from solver + constraint_violation = NaN, + solver_status = :Unknown, + solver = _solver_name(options), + total_allocations_bytes = timed.bytes, + total_allocs_count = -1, # @timed doesn't give count; use gc_num delta + gc_time_ns = round(Int, timed.gctime * 1e9), + gc_count = gc_count_delta, + gc_full_count = gc_full_delta, + solver_options = opts_snapshot, + julia_version = string(VERSION), + timestamp = now(), + runner = runner, + n_threads = Threads.nthreads(), + ) +end + +# --- helpers --- + +function _solver_name(options::DirectTrajOpt.Solvers.AbstractSolverOptions) + name = string(typeof(options).name.name) + if occursin("Ipopt", name) + return "ipopt" + elseif occursin("MadNLP", name) + return "madnlp" + else + return lowercase(name) + end +end + +function _infer_state_dim(prob) + traj = prob.trajectory + # Heuristic: look for common state variable names + for name in [:x, :ψ̃, :Ũ⃗, :ρ̃] + if haskey(traj.dims, name) + return traj.dims[name] + end + end + # Fallback: first non-control component + return first(values(traj.dims)) +end + +function _infer_control_dim(prob) + traj = prob.trajectory + total = 0 + for name in traj.control_names + if name != traj.timestep_name + total += traj.dims[name] + end + end + return total +end + +function _count_constraints(prob, options) + n_dynamics = sum(integrator.dim for integrator in prob.integrators; init=0) + n_nonlinear = sum( + c.dim for c in prob.constraints + if c isa DirectTrajOpt.Constraints.AbstractNonlinearConstraint; + init=0 + ) + return n_dynamics * (prob.trajectory.N - 1) + n_nonlinear +end + +function _get_package_version(pkg_name::String) + try + deps = Pkg.dependencies() + for (_, info) in deps + if info.name == pkg_name + return string(info.version) + end + end + catch + end + return "unknown" +end + +function _get_git_commit() + try + return strip(read(`git rev-parse --short HEAD`, String)) + catch + return "unknown" + end +end +``` + +- [ ] **Step 4: Add `Pkg` import to harness.jl** + +Add at the top of `src/harness.jl`: + +```julia +import Pkg +``` + +- [ ] **Step 5: Update module exports** + +In `src/HarmoniqsBenchmarks.jl`, add to exports: + +```julia +export benchmark_solve! +``` + +- [ ] **Step 6: Run tests to verify they pass** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: All tests PASS + +- [ ] **Step 7: Commit** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +git add src/harness.jl src/HarmoniqsBenchmarks.jl test/runtests.jl +git commit -m "feat: add benchmark_solve! harness with GC stats and options snapshot" +``` + +--- + +## Task 6: Implement BenchmarkTools→EvalBenchmark Conversion + +**Files:** +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/harness.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` + +- [ ] **Step 1: Add test for trial_to_eval_benchmark** + +Append inside `@testset "Harness"`: + +```julia +@testset "trial_to_eval_benchmark extracts data from BenchmarkTools.Trial" begin + using BenchmarkTools + trial = @benchmark 1 + 1 + eb = trial_to_eval_benchmark(trial) + @test eb isa EvalBenchmark + @test length(eb.times_ns) > 0 + @test eb.min_ns > 0.0 + @test eb.memory_bytes >= 0 + @test eb.allocs >= 0 +end +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: FAIL — `trial_to_eval_benchmark` not defined + +- [ ] **Step 3: Implement trial_to_eval_benchmark** + +Append to `src/harness.jl`: + +```julia +using BenchmarkTools + +""" + trial_to_eval_benchmark(trial::BenchmarkTools.Trial) -> EvalBenchmark + +Convert a BenchmarkTools.Trial to an EvalBenchmark, extracting raw timing data. +""" +function trial_to_eval_benchmark(trial::BenchmarkTools.Trial) + return EvalBenchmark( + times_ns = Float64.(trial.times), + gctimes_ns = Float64.(trial.gctimes), + memory_bytes = trial.memory, + allocs = trial.allocs, + ) +end +``` + +- [ ] **Step 4: Export the function** + +Add `trial_to_eval_benchmark` to exports in `src/HarmoniqsBenchmarks.jl`. + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: All tests PASS + +- [ ] **Step 6: Commit** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +git add src/harness.jl src/HarmoniqsBenchmarks.jl test/runtests.jl +git commit -m "feat: add trial_to_eval_benchmark for BenchmarkTools integration" +``` + +--- + +## Task 7: Implement compare_results Reporter + +**Files:** +- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/report.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` +- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` + +- [ ] **Step 1: Add test for compare_results** + +Append to `test/runtests.jl`, inside top-level testset: + +```julia +@testset "Report" begin + @testset "compare_results detects regressions" begin + baseline = BenchmarkResult( + package="DirectTrajOpt", package_version="0.8.9", + commit="aaa1111", benchmark_name="test", + N=51, state_dim=4, control_dim=2, + n_constraints=200, n_variables=765, + wall_time_s=1.0, iterations=50, + objective_value=0.001, constraint_violation=1e-8, + solver_status=:Optimal, solver="ipopt", + total_allocations_bytes=1_000_000, total_allocs_count=500, + gc_time_ns=10_000, gc_count=2, gc_full_count=0, + solver_options=Dict{Symbol,Any}(), + julia_version=string(VERSION), timestamp=now(), + runner="local", n_threads=1, + ) + + # 20% regression in wall time + current = BenchmarkResult( + package="DirectTrajOpt", package_version="0.8.10", + commit="bbb2222", benchmark_name="test", + N=51, state_dim=4, control_dim=2, + n_constraints=200, n_variables=765, + wall_time_s=1.2, iterations=50, + objective_value=0.001, constraint_violation=1e-8, + solver_status=:Optimal, solver="ipopt", + total_allocations_bytes=900_000, total_allocs_count=450, + gc_time_ns=10_000, gc_count=2, gc_full_count=0, + solver_options=Dict{Symbol,Any}(), + julia_version=string(VERSION), timestamp=now(), + runner="local", n_threads=1, + ) + + comparison = compare_results([baseline], [current]) + @test length(comparison) == 1 + row = comparison[1] + @test row.benchmark_name == "test" + @test row.wall_time_pct_change > 15.0 # 20% regression + @test row.alloc_bytes_pct_change < 0.0 # 10% improvement + @test row.has_regression == true # wall time regressed >10% + end +end +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: FAIL — `compare_results` not defined + +- [ ] **Step 3: Implement compare_results** + +```julia +# src/report.jl + +struct ComparisonRow + benchmark_name::String + solver::String + N::Int + state_dim::Int + # Wall time + baseline_wall_s::Float64 + current_wall_s::Float64 + wall_time_pct_change::Float64 + # Allocations + baseline_alloc_bytes::Int + current_alloc_bytes::Int + alloc_bytes_pct_change::Float64 + # Regression flag + has_regression::Bool +end + +""" + compare_results(baseline, current; regression_threshold=10.0) -> Vector{ComparisonRow} + +Compare two sets of BenchmarkResults by matching on `benchmark_name`. +Returns comparison rows with percent changes and regression flags. + +A regression is flagged when wall_time or allocations increase by more than +`regression_threshold` percent. +""" +function compare_results( + baseline::Vector{BenchmarkResult}, + current::Vector{BenchmarkResult}; + regression_threshold::Float64 = 10.0, +) + baseline_by_name = Dict(r.benchmark_name => r for r in baseline) + rows = ComparisonRow[] + + for r in current + b = get(baseline_by_name, r.benchmark_name, nothing) + isnothing(b) && continue + + wall_pct = _pct_change(b.wall_time_s, r.wall_time_s) + alloc_pct = _pct_change(Float64(b.total_allocations_bytes), Float64(r.total_allocations_bytes)) + has_regression = wall_pct > regression_threshold || alloc_pct > regression_threshold + + push!(rows, ComparisonRow( + r.benchmark_name, r.solver, r.N, r.state_dim, + b.wall_time_s, r.wall_time_s, wall_pct, + b.total_allocations_bytes, r.total_allocations_bytes, alloc_pct, + has_regression, + )) + end + + return rows +end + +function _pct_change(old::Float64, new::Float64) + old == 0.0 && return new == 0.0 ? 0.0 : 100.0 + return (new - old) / abs(old) * 100.0 +end +``` + +- [ ] **Step 4: Update module** + +Add exports to `src/HarmoniqsBenchmarks.jl`: + +```julia +export compare_results, ComparisonRow +``` + +And add the include: + +```julia +include("report.jl") +``` + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +julia --project=. -e 'using Pkg; Pkg.test()' +``` + +Expected: All tests PASS + +- [ ] **Step 6: Commit** + +```bash +cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl +git add src/report.jl src/HarmoniqsBenchmarks.jl test/runtests.jl +git commit -m "feat: add compare_results reporter with regression detection" +``` + +--- + +## Task 8: Set Up DirectTrajOpt.jl Benchmark Environment + +**Files:** +- Create: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/Project.toml` +- Create: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/.gitignore` +- Create: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` + +- [ ] **Step 1: Create benchmark directory** + +```bash +mkdir -p /home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/results +``` + +- [ ] **Step 2: Create .gitignore** + +``` +# benchmark/.gitignore +results/ +``` + +- [ ] **Step 3: Create benchmark/Project.toml** + +```toml +[deps] +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" +ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +HarmoniqsBenchmarks = "INSERT_UUID" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" +MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" +TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" +``` + +Replace `INSERT_UUID` with the UUID generated in Task 1. + +- [ ] **Step 4: Instantiate the benchmark environment** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +julia --project=benchmark -e ' + using Pkg + Pkg.develop(path=".") + Pkg.develop(path="../HarmoniqsBenchmarks.jl") + Pkg.develop(path="../NamedTrajectories.jl") + Pkg.instantiate() + using HarmoniqsBenchmarks + println("Benchmark env OK") +' +``` + +Expected: "Benchmark env OK" + +- [ ] **Step 5: Create benchmarks.jl stub** + +```julia +# benchmark/benchmarks.jl +using TestItems +``` + +- [ ] **Step 6: Commit** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +git add benchmark/Project.toml benchmark/.gitignore benchmark/benchmarks.jl +git commit -m "feat: add benchmark/ environment for HarmoniqsBenchmarks integration" +``` + +--- + +## Task 9: Write Evaluator Micro-benchmarks + +**Files:** +- Modify: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` + +- [ ] **Step 1: Write the micro-benchmark @testitem** + +```julia +# benchmark/benchmarks.jl +using TestItems + +@testitem "Evaluator micro-benchmarks: bilinear N=51" begin + using HarmoniqsBenchmarks + using BenchmarkTools + using DirectTrajOpt + using NamedTrajectories + using SparseArrays + using ExponentialAction + using MathOptInterface + const MOI = MathOptInterface + using Dates + + # Build a deterministic bilinear problem + Random.seed!(42) + N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound * (2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + prob = DirectTrajOptProblem(traj, J, integrators) + + evaluator, Z_vec = build_evaluator(prob) + dims = evaluator_dims(evaluator) + + # Pre-allocate buffers + g = zeros(dims.n_constraints) + grad = zeros(dims.n_variables) + H = zeros(dims.n_hessian_entries) + Jac = zeros(dims.n_jacobian_entries) + sigma = 1.0 + mu = ones(dims.n_constraints) + + # Run benchmarks + benchmarks = Dict{Symbol,EvalBenchmark}( + :eval_objective => trial_to_eval_benchmark( + @benchmark(MOI.eval_objective($evaluator, $Z_vec)) + ), + :eval_gradient => trial_to_eval_benchmark( + @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)) + ), + :eval_constraint => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)) + ), + :eval_jacobian => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec)) + ), + :eval_hessian_lagrangian => trial_to_eval_benchmark( + @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)) + ), + ) + + result = MicroBenchmarkResult( + package = "DirectTrajOpt", + package_version = "0.8.10", + commit = try strip(read(`git rev-parse --short HEAD`, String)) catch; "unknown" end, + benchmark_name = "evaluator_micro_bilinear_N51", + N = N, state_dim = 4, control_dim = 2, + eval_benchmarks = benchmarks, + julia_version = string(VERSION), + timestamp = now(), + runner = get(ENV, "BENCHMARK_RUNNER", "local"), + n_threads = Threads.nthreads(), + ) + + # Print summary + println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===") + for (name, eb) in sort(collect(result.eval_benchmarks), by=first) + Printf = Base.Printf + @Printf.printf(" %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", + name, eb.median_ns, eb.allocs, eb.memory_bytes) + end + + # Save + results_dir = joinpath(@__DIR__, "results") + save_micro_results(results_dir, result.benchmark_name, result) + println(" Saved to $results_dir/") +end +``` + +- [ ] **Step 2: Run the micro-benchmark to verify it works** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +julia --project=benchmark -e ' + using TestItemRunner + @run_package_tests(filter=ti -> occursin("micro", ti.name), benchmark) +' +``` + +Expected: Benchmark runs, prints timing table, saves JLD2 to `benchmark/results/` + +- [ ] **Step 3: Verify the JLD2 output is loadable** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +julia --project=benchmark -e ' + using HarmoniqsBenchmarks + files = filter(f -> endswith(f, ".jld2"), readdir("benchmark/results", join=true)) + @assert length(files) >= 1 "Expected at least one JLD2 file" + result = load_micro_results(files[1]) + println("Loaded: $(result.benchmark_name)") + println("Functions benchmarked: $(keys(result.eval_benchmarks))") +' +``` + +Expected: Loads successfully, shows function names + +- [ ] **Step 4: Commit** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +git add benchmark/benchmarks.jl +git commit -m "feat: add evaluator micro-benchmarks with BenchmarkTools" +``` + +--- + +## Task 10: Write Ipopt vs MadNLP Macro-benchmarks + +**Files:** +- Modify: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` + +- [ ] **Step 1: Append the macro-benchmark @testitem** + +Append to `benchmark/benchmarks.jl`: + +```julia +@testitem "Ipopt vs MadNLP: bilinear N=51" begin + using HarmoniqsBenchmarks + using DirectTrajOpt + using NamedTrajectories + using SparseArrays + using ExponentialAction + import MadNLP + using Dates + + # Resolve MadNLPOptions from the extension + const MadNLPSolverExt = [ + mod for mod in reverse(Base.loaded_modules_order) + if Symbol(mod) == :MadNLPSolverExt + ][1] + + function make_bilinear_problem(; seed=42) + Random.seed!(seed) + N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 + Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) + Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) + Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) + G(u) = ω * Gz + u[1] * Gx + u[2] * Gy + + traj = NamedTrajectory( + ( + x = 2rand(4, N) .- 1, + u = u_bound * (2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), + ) + + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] + J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) + end + + # Ipopt solve + prob_ipopt = make_bilinear_problem() + result_ipopt = benchmark_solve!( + prob_ipopt, + IpoptOptions(max_iter=200, print_level=0); + benchmark_name = "bilinear_N51_ipopt", + ) + + # MadNLP solve (fresh problem) + prob_madnlp = make_bilinear_problem() + result_madnlp = benchmark_solve!( + prob_madnlp, + MadNLPSolverExt.MadNLPOptions(max_iter=200, print_level=1); + benchmark_name = "bilinear_N51_madnlp", + ) + + # Print comparison + println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") + println(" Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc") + println(" MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc") + + # Save + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp]) +end +``` + +- [ ] **Step 2: Run the macro-benchmark** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +julia --project=benchmark -e ' + using TestItemRunner + @run_package_tests(filter=ti -> occursin("Ipopt vs MadNLP", ti.name), benchmark) +' +``` + +Expected: Both solvers run, prints wall time and allocation comparison + +- [ ] **Step 3: Commit** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +git add benchmark/benchmarks.jl +git commit -m "feat: add Ipopt vs MadNLP macro-benchmark" +``` + +--- + +## Task 11: Write Memory Scaling Study + +**Files:** +- Modify: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` + +- [ ] **Step 1: Append the scaling study @testitem** + +Append to `benchmark/benchmarks.jl`: + +```julia +@testitem "Memory scaling: N and state_dim sweep" begin + using HarmoniqsBenchmarks + using DirectTrajOpt + using NamedTrajectories + using SparseArrays + using ExponentialAction + import MadNLP + using Dates, Printf + + const MadNLPSolverExt = [ + mod for mod in reverse(Base.loaded_modules_order) + if Symbol(mod) == :MadNLPSolverExt + ][1] + + function make_scaled_problem(; N, state_dim, n_controls=2, seed=42) + Random.seed!(seed) + + # Build random bilinear system at given state dimension + G_drift = sparse(randn(state_dim, state_dim)) + G_drives = [sparse(randn(state_dim, state_dim)) for _ in 1:n_controls] + G(u) = G_drift + sum(u[i] * G_drives[i] for i in 1:n_controls) + + x_init = zeros(state_dim); x_init[1] = 1.0 + x_goal = zeros(state_dim); x_goal[2] = 1.0 + + traj = NamedTrajectory( + ( + x = randn(state_dim, N), + u = 0.1 * randn(n_controls, N), + du = randn(n_controls, N), + Δt = fill(0.1, N), + ); + controls = (:du, :Δt), + timestep = :Δt, + bounds = (u = 1.0, Δt = (0.01, 0.5)), + initial = (x = x_init, u = zeros(n_controls)), + final = (u = zeros(n_controls),), + goal = (x = x_goal,), + ) + + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + ] + J = QuadraticRegularizer(:u, traj, 1.0) + return DirectTrajOptProblem(traj, J, integrators) + end + + N_values = [25, 51, 101] + dim_values = [4, 8, 16] + results = BenchmarkResult[] + + println("\n=== Memory Scaling Study ===") + @printf(" %5s | %5s | %12s | %12s | %12s | %12s\n", + "N", "dim", "Ipopt (s)", "Ipopt (KB)", "MadNLP (s)", "MadNLP (KB)") + @printf(" %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", + "-"^5, "-"^5, "-"^12, "-"^12, "-"^12, "-"^12) + + for N in N_values + for dim in dim_values + # Ipopt + prob = make_scaled_problem(; N=N, state_dim=dim) + r_ipopt = benchmark_solve!( + prob, IpoptOptions(max_iter=50, print_level=0); + benchmark_name = "scaling_N$(N)_d$(dim)_ipopt", + ) + push!(results, r_ipopt) + + # MadNLP + prob = make_scaled_problem(; N=N, state_dim=dim) + r_madnlp = benchmark_solve!( + prob, MadNLPSolverExt.MadNLPOptions(max_iter=50, print_level=1); + benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", + ) + push!(results, r_madnlp) + + @printf(" %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", + N, dim, + r_ipopt.wall_time_s, r_ipopt.total_allocations_bytes ÷ 1024, + r_madnlp.wall_time_s, r_madnlp.total_allocations_bytes ÷ 1024) + end + end + + # Save all results + results_dir = joinpath(@__DIR__, "results") + save_results(results_dir, "memory_scaling", results) + println("\n Saved $(length(results)) results to $results_dir/") +end +``` + +- [ ] **Step 2: Run the scaling study** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +julia --project=benchmark -e ' + using TestItemRunner + @run_package_tests(filter=ti -> occursin("Memory scaling", ti.name), benchmark) +' +``` + +Expected: Table printed with wall times and allocations for each (N, dim) combination + +- [ ] **Step 3: Commit** + +```bash +cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl +git add benchmark/benchmarks.jl +git commit -m "feat: add memory scaling study benchmark (N x state_dim sweep)" +``` + +--- + +## Verification Checklist + +After all tasks are complete: + +- [ ] `cd HarmoniqsBenchmarks.jl && julia --project=. -e 'using Pkg; Pkg.test()'` — all tests pass +- [ ] `cd DirectTrajOpt.jl && julia --project=benchmark -e 'using TestItemRunner; @run_package_tests(benchmark)'` — all three benchmark @testitems run +- [ ] `ls DirectTrajOpt.jl/benchmark/results/` — contains `.jld2` files for each benchmark +- [ ] Load and compare results: + ```julia + using HarmoniqsBenchmarks + results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") + println("Ipopt: $(results[1].wall_time_s)s, MadNLP: $(results[2].wall_time_s)s") + ``` + +--- + +## Follow-up Plans (Not in Scope) + +- **Piccolissimo benchmark suite** — migrate existing `benchmark/complex_vs_real_ode.jl` and `constraint_comparison.jl` to use HarmoniqsBenchmarks schema +- **Demo-repo problem generators** — clone bosonic-demo, nv-center-demo, atoms-demo, ions, fluxonium-demo, gkp-stanford and extract system Hamiltonians +- **CI workflows** — `.github/workflows/benchmark.yml` for DirectTrajOpt and other packages +- **Allocation profiling spike** — parallel worktree experiments with Profile.Allocs, AllocCheck.jl, --track-allocation +- **Aggregator repo** — `harmoniqs-benchmarks` with cross-package comparison tables diff --git a/docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md b/docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md new file mode 100644 index 0000000..50b7959 --- /dev/null +++ b/docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md @@ -0,0 +1,198 @@ +# Altissimo GPU Benchmark Suite — Design + +**Date:** 2026-04-15 +**Status:** Design (follow-up to HarmoniqsBenchmarks.jl core plan) +**Depends on:** HarmoniqsBenchmarks.jl (schema, harness, storage) +**Reference:** `gpu_benchmark.py` (Colab notebook from Raghav, T4 results) + +## Context + +Altissimo.jl is a GPU-accelerated augmented Lagrangian optimizer for quantum trajectory optimization. It uses matrix-free JVP/VJP callbacks, making it GPU-compatible where Ipopt (which requires sparse Jacobians/Hessians) is CPU-only. Raghav demonstrated 4.5x GPU speedup at 1024 state dim on a T4. This benchmark suite formalizes those measurements and tracks them across versions. + +Three benchmark categories, matching the existing Colab notebook structure: + +1. **Ipopt vs Altissimo (CPU)** — real quantum gate optimization +2. **Altissimo CPU vs GPU scaling** — structured optimization at increasing state dim +3. **cuDensityMat vs cuSPARSE** — Liouvillian operator action for open-system trajectory optimization + +## Benchmark 0: Three-Way Solver Comparison (Ipopt vs MadNLP-GPU vs Altissimo-GPU) + +The harmoniqs org maintains a MadNLP.jl fork with `MadNLPGPU` (in `lib/MadNLPGPU/`), which uses CUDSS for GPU-accelerated sparse KKT system solves. This enables a three-way comparison at increasing problem sizes: + +| Solver | Method | Linear Algebra | GPU? | +|--------|--------|---------------|------| +| Ipopt | Interior-point | MUMPS/Pardiso (sparse, CPU) | No | +| MadNLP + MadNLPGPU | Interior-point | CUDSS (sparse, GPU) | Yes | +| Altissimo | Augmented Lagrangian | Matrix-free JVP/VJP (GPU) | Yes | + +**Hypothesis:** At small state dims (sd < 256), Ipopt wins due to mature sparse factorization. At medium dims (256-1024), MadNLP-GPU may win due to GPU-accelerated CUDSS. At large dims (1024+), Altissimo wins due to matrix-free scaling (no sparse assembly). + +**Problem:** Same quantum-control-structured problem as Benchmark 2 below, swept across sd ∈ {64, 128, 256, 512, 1024, 2048}. For MadNLP-GPU, the problem requires Jacobian/Hessian sparsity (MOI interface), so it uses the same evaluator as Ipopt but with GPU-side linear solves. + +**Dependencies:** +- `MadNLPGPU` from `harmoniqs/MadNLP.jl` (lib/MadNLPGPU) +- `CUDA.jl` + `CUDSS.jl` for GPU linear algebra +- DirectTrajOpt MadNLP extension for evaluator hookup + +**Metrics:** Wall time, iterations, convergence quality, total allocations, GPU memory usage, speedup vs Ipopt baseline. + +**Note:** MadNLP-GPU requires the KKT system to fit in GPU memory. For very large problems, the sparse Jacobian/Hessian may exceed VRAM, which is exactly where Altissimo's matrix-free approach has the advantage. + +--- + +## Benchmark 1: Ipopt vs Altissimo (CPU) — Quantum Gate Optimization + +Directly comparable: same X gate problem, same initial conditions, both on CPU. + +**Problem setup** (from Colab Part 2): +- System: 1 qubit, H_drift = 0.5 σ_z, drives = [σ_x, σ_y], bounds = [1.0, 1.0] +- Gate: X gate, T=10.0, N=100 +- Integrator: HermitianExponentialIntegrator +- Template: SmoothPulseProblem(Q=100.0, R=1e-2, ddu_bound=1.0, Δt_bounds=(0.05, 0.15)) +- Deep copy for identical initial conditions + +**Metrics:** +- Wall time (s) +- Fidelity (infidelity = 1 - fidelity) +- Total allocations (bytes) +- GC time + +**Altissimo configuration** (reference values): +```julia +AltissimoOptions( + search_direction = :LBFGS, + lbfgs_memory = 50, + line_search = :StrongWolfe, + ls_max_evals = 100, + max_outer_iter = 20, + max_inner_iter = 500, + inner_tol = 1e-8, + ρ_init = 100.0, + ρ_max = 1e8, + polish = true, + polish_stall_min_iters = 10, + polish_δ_w = 1e-6, + polish_δ_c = 1e-8, +) +``` + +**Integration with HarmoniqsBenchmarks:** Both produce `BenchmarkResult` with `solver="ipopt"` / `solver="altissimo"`. The `solver_options` field captures the full AltissimoOptions snapshot. + +## Benchmark 2: Altissimo CPU vs GPU Scaling + +The core scaling benchmark. Uses a quantum-control-structured problem (NOT a real quantum system) to isolate solver scaling behavior from physics complexity. + +**Problem structure** (from Colab Part 3): +- Decision vector: z = [x_1; ...; x_N; u_1; ...; u_{N-1}] +- Dynamics: x_{k+1} = Φ(u_k) x_k, where Φ(u) = A + Σⱼ uⱼ Cⱼ +- A is orthogonal (norm-preserving, like unitary evolution) +- Coupling scaled: ‖Cⱼ‖_spectral ≈ 0.4 independent of state_dim (σ_c = 0.2/√sd) +- Target generated by forward simulation with known controls → guaranteed feasible +- Objective: ½|x_N - x_target|² + (α/2) Σ|u_k|² +- All callbacks GPU-native: cuBLAS matvec, broadcast, dot (no scalar indexing) + +**Sweep configurations** (from Colab): + +| state_dim | n_drives | N | n_vars | n_eq | +|-----------|----------|----|----------|----------| +| 512 | 2 | 20 | 10,278 | 10,240 | +| 1024 | 2 | 20 | 20,518 | 20,480 | +| 2048 | 2 | 20 | 41,998 | 40,960 | +| 4096 | 2 | 20 | 81,958 | 81,920 | + +**Metrics per (state_dim, device) pair:** +- Wall time (s) — after JIT warmup +- Objective value at convergence +- Constraint violation ‖c‖ +- Converged (bool) +- GPU speedup = CPU_time / GPU_time + +**Key implementation details:** +- JIT warmup run before timed run +- `CUDA.synchronize()` before and after timed run for accurate GPU timing +- `build_callbacks()` returns obj!, grad!, hvp!, eq!, eq_jvp!, eq_vjp! +- Optimizer: `Altissimo.LBFGS` with `Altissimo.StrongWolfe` line search +- `initialize_z!` does forward propagation with u=0 for feasible init + +**Schema extension:** Add to `BenchmarkResult`: +- `device::String` — "cpu" or "gpu" +- `gpu_name::String` — e.g. "Tesla T4", "A100" (from `CUDA.name(CUDA.device())`) +- `gpu_memory_bytes::Int` — VRAM (from `CUDA.totalmem`) + +OR: encode these in `solver_options` dict to avoid schema changes: +```julia +solver_options[:device] = "gpu" +solver_options[:gpu_name] = CUDA.name(CUDA.device()) +solver_options[:gpu_memory_bytes] = CUDA.totalmem(CUDA.device()) +``` + +Recommended: use `solver_options` dict to avoid breaking the schema for CPU-only packages. + +## Benchmark 3: cuDensityMat vs cuSPARSE — Liouvillian Operator + +This measures the fundamental operation for open-system trajectory optimization: applying a Liouvillian superoperator to a density matrix. + +**System:** M coupled cavities with Fock truncation d=3, Hilbert space D = 3^M. +- Hamiltonian: H(t) = Σᵢ δᵢ(t) aᵢ†aᵢ + Σᵢ Kᵢ aᵢ†aᵢ†aᵢaᵢ + Σ⟨i,j⟩ gᵢⱼ(t)(aᵢ†aⱼ + h.c.) +- Collapse operators: √κ aᵢ (photon loss) +- Liouvillian: L = -i(H⊗I - I⊗Hᵀ) + Σₖ (Cₖ⊗Cₖ* - ½(Cₖ†Cₖ⊗I + I⊗Cₖᵀ Cₖ*)) + +**Sweep:** + +| M | D | ρ elements (D²) | cuDensityMat | cuSPARSE | Dense CPU | +|---|------|-----------------|-------------|----------|-----------| +| 2 | 9 | 81 | 0.27 ms | 0.039 ms | 0.003 ms | +| 4 | 81 | 6,561 | 1.22 ms | 0.048 ms | 31.8 ms | +| 6 | 729 | 531,441 | 6.45 ms | 0.90 ms | infeasible| +| 8 | 6561 | 43,046,721 | 620 ms | infeasible| infeasible| + +**Batched evolution** (trajectory optimization workload): + +| M | D | Batch | Batched | Sequential | Speedup | +|---|----|-------|-----------|------------|---------| +| 2 | 9 | 256 | 0.38 ms | 70.1 ms | 186x | +| 4 | 81 | 256 | 8.05 ms | 280.7 ms | 35x | + +**Key insight:** cuSPARSE beats cuDensityMat for M ≤ 6 (tensor-network contraction overhead at small D). cuDensityMat wins at M=8+ where sparse Liouvillian can't be materialized (~50-70 GB). Batched evolution is critical for trajectory optimization (35-186x speedup). + +**Integration note:** This benchmark depends on CuQuantum.jl (harmoniqs org). The cuDensityMat portion requires the NVIDIA cuQuantum SDK and should run exclusively on EC2 GPU runners. + +## CI Runner Requirements + +| Benchmark | Runner | GPU Required | +|-----------|--------|-------------| +| Ipopt vs Altissimo (CPU) | `ubuntu-latest` (free) | No | +| 3-way solver (Ipopt/MadNLP-GPU/Altissimo) | `[self-hosted, gpu]` (EC2) | Yes (T4 minimum, CUDSS for MadNLP) | +| Altissimo CPU vs GPU scaling | `[self-hosted, gpu]` (EC2) | Yes (T4 minimum) | +| cuSPARSE / cuDensityMat | `[self-hosted, gpu]` (EC2) | Yes (A100 recommended for M=8) | + +## Where Benchmarks Live + +- **Benchmark 1** (Ipopt vs Altissimo CPU): In `Piccolissimo.jl/benchmark/` since it uses `SmoothPulseProblem` + `HermitianExponentialIntegrator` +- **Benchmark 2** (GPU scaling): In `Altissimo.jl/benchmark/` since it's Altissimo-specific with CUDA callbacks +- **Benchmark 3** (Liouvillian): In `CuQuantum.jl/benchmark/` or `Piccolissimo.jl/benchmark/` (TBD based on where cuDensityMat integration lands) + +All use `HarmoniqsBenchmarks.jl` schema for consistent artifact format. + +## Adaptation for HarmoniqsBenchmarks Schema + +The Colab notebook uses ad-hoc timing (`@elapsed`, `CUDA.@elapsed`). To integrate with HarmoniqsBenchmarks: + +**Benchmark 2 adaptation:** +- Wrap `run_one()` to return a `BenchmarkResult` instead of a NamedTuple +- Add `solver_options` dict with Altissimo config + device info +- Replace manual `time()` calls with `@timed` for allocation tracking +- Save JLD2 artifacts instead of printing tables + +**Benchmark 3 adaptation:** +- Create a `LiouvillianBenchmarkResult` (or use a new `MicroBenchmarkResult` variant) +- Key fields: M, D, D², nnz(L), method (:cusparse, :cudensitymat, :cpu_dense), time_ms, memory_bytes +- Batched results include batch_size and sequential/batched comparison + +## Implementation Notes + +- The `apply_Phi!` / `apply_Phi_t!` pattern from the notebook should be extracted into Altissimo's callback builder, not reimplemented in benchmarks +- `CUDA.synchronize()` is critical for accurate GPU timing — always call before starting and after stopping the timer +- JIT warmup run is mandatory — first Julia/CUDA execution compiles kernels +- Memory estimation before large allocations: check `CUDA.totalmem()` and skip if would exceed 80% VRAM +- The coupling scaling fix (σ_c = 0.2/√sd) is essential for well-conditioned problems at large state dim — without it, ‖C‖ ~ 0.1√sd makes convergence erratic diff --git a/docs/superpowers/specs/2026-04-15-benchmarking-design.md b/docs/superpowers/specs/2026-04-15-benchmarking-design.md new file mode 100644 index 0000000..bb9f943 --- /dev/null +++ b/docs/superpowers/specs/2026-04-15-benchmarking-design.md @@ -0,0 +1,383 @@ +# HarmoniqsBenchmarks.jl — Cross-Package Benchmarking Infrastructure + +**Date:** 2026-04-15 +**Status:** Design + +## Context + +The harmoniqs quantum optimal control stack (DirectTrajOpt, Piccolo, Piccolissimo, Altissimo, Intonato) needs a unified benchmarking system to: + +- Compare Ipopt vs MadNLP solver performance on the DirectTrajOpt `feat/madnlp-integration` branch +- Collect statistically robust histograms of key evaluator functions (eval_hessian_lagrangian, eval_constraint_jacobian, etc.) for regression detection +- Profile memory usage and allocations in MadNLP and across all packages, understanding how memory scales with knot points (N), state dimension, and control dimension +- Track allocations in the optimization hot path to drive them toward zero +- Publish version-tagged JLD2 artifacts so labs and enterprises can evaluate problem-size scaling + +This is driven by all three active workstreams needing memory/performance benchmarks (MadNLP integration, Altissimo GPU scaling at 1024 state dim, Intonato convergence tracking). + +## Architecture + +**Approach:** Shared `HarmoniqsBenchmarks.jl` package + per-package `benchmark/` directories + central aggregator repo. + +- `HarmoniqsBenchmarks.jl` — lightweight Julia package (own repo in harmoniqs org) providing schema, profiling harness, problem generators, and reporters +- Each downstream package (DirectTrajOpt, Piccolo, Piccolissimo, Altissimo, Intonato) has a `benchmark/` directory with `@testitem`-based benchmarks using `HarmoniqsBenchmarks` +- Central `harmoniqs-benchmarks` repo aggregates artifacts and generates cross-package comparison tables +- Artifacts are JLD2 files stored in CI (GitHub Actions artifact upload), not a live dashboard + +## Schema + +### BenchmarkResult + +```julia +struct BenchmarkResult + # Identity + package::String # "DirectTrajOpt", "Piccolissimo", etc. + package_version::String # semver tag + commit::String # short SHA + benchmark_name::String # "cz_gate_ipopt", "madnlp_scaling_N101_d16" + + # Problem dimensions + N::Int # knot points + state_dim::Int # state vector dimension + control_dim::Int # number of controls + n_constraints::Int # total nonlinear constraints + n_variables::Int # total NLP variables + + # Solve metrics + wall_time_s::Float64 + iterations::Int + objective_value::Float64 + constraint_violation::Float64 + solver_status::Symbol # :Optimal, :MaxIter, :Infeasible + solver::String # "ipopt", "madnlp", "altissimo" + + # Memory & allocations + total_allocations_bytes::Int + total_allocs_count::Int # number of allocation events + peak_memory_bytes::Int + + # GC stats + gc_time_ns::Int + gc_count::Int + gc_full_count::Int + + # Solver options snapshot + solver_options::Dict{Symbol,Any} + + # Metadata + julia_version::String + timestamp::DateTime + runner::String # "github-actions", "ec2-gpu", "local" + n_threads::Int +end +``` + +### MicroBenchmarkResult + +```julia +struct MicroBenchmarkResult + # Identity (same as above) + package::String + package_version::String + commit::String + benchmark_name::String + + # Problem dimensions + N::Int + state_dim::Int + control_dim::Int + + # Per-function BenchmarkTools results + # Each value is a serialized BenchmarkTools.Trial containing: + # times (ns), gctimes (ns), memory (bytes), allocs (count) + eval_benchmarks::Dict{Symbol, Any} + # Keys: :eval_objective, :eval_gradient, :eval_constraint, + # :eval_jacobian, :eval_hessian_lagrangian + + # Metadata + julia_version::String + timestamp::DateTime + runner::String + n_threads::Int +end +``` + +## Benchmarking Layers + +### Layer 1: Micro-benchmarks (Eval Function Histograms) + +Use `BenchmarkTools.@benchmark` on individual MOI evaluator methods. This gives statistically robust distributions with proper warmup, plus allocation counts per call. + +```julia +@testitem "Evaluator micro-benchmarks: CZ N=51" begin + using HarmoniqsBenchmarks, BenchmarkTools, Piccolissimo, Piccolo + + prob = build_cz_problem(N=51) + evaluator, Z_vec = build_evaluator(prob) + + # Pre-allocate output buffers + g = zeros(n_constraints(evaluator)) + grad = zeros(n_variables(evaluator)) + H = zeros(n_hessian_entries(evaluator)) + J = zeros(n_jacobian_entries(evaluator)) + sigma = 1.0 + mu = ones(n_constraints(evaluator)) + + benchmarks = Dict( + :eval_objective => @benchmark(MOI.eval_objective($evaluator, $Z_vec)), + :eval_gradient => @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)), + :eval_constraint => @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)), + :eval_jacobian => @benchmark(MOI.eval_constraint_jacobian($evaluator, $J, $Z_vec)), + :eval_hessian_lagrangian => @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)), + ) + + save_micro_results("cz_N51_ipopt", benchmarks; prob) +end +``` + +**Regression detection:** Compare median times and allocation counts across versions. A >10% regression in any eval function on the same problem size flags for review. + +### Layer 2: Macro-benchmarks (Full Solves) + +Use `@timed` for wall clock + total allocations on `solve!`. Full optimization is not repeatable in the BenchmarkTools sense (each call modifies the problem), so we capture single-run metrics. + +```julia +@testitem "CZ gate Ipopt vs MadNLP" begin + using HarmoniqsBenchmarks, Piccolissimo, Piccolo + + prob = build_cz_problem(N=51) + result_ipopt = benchmark_solve!(prob, IpoptOptions()) + + prob = build_cz_problem(N=51) # fresh problem + result_madnlp = benchmark_solve!(prob, MadNLPOptions()) + + save_results("cz_gate_comparison", [result_ipopt, result_madnlp]) +end +``` + +### Layer 3: Scaling Studies + +Parameterized sweeps over problem dimensions to characterize memory and time growth. + +```julia +@testitem "MadNLP memory scaling" begin + using HarmoniqsBenchmarks, Piccolissimo, Piccolo + + results = BenchmarkResult[] + for N in [25, 51, 101, 201, 401] + for state_dim in [4, 8, 16, 32, 64] + prob = build_bilinear_problem(; N, state_dim, n_controls=2) + r = benchmark_solve!(prob, MadNLPOptions()) + push!(results, r) + end + end + save_results("madnlp_memory_scaling", results) +end +``` + +### Layer 4: Allocation Profiling + +Tools for tracking down and eliminating allocations in the optimization hot path. + +**Profile.Allocs** — captures per-allocation stack traces during a solve: +```julia +@testitem "Allocation profile: CZ solve" begin + using HarmoniqsBenchmarks, Profile, Piccolissimo, Piccolo + + prob = build_cz_problem(N=51) + Profile.Allocs.clear() + Profile.Allocs.@profile sample_rate=1.0 solve!(prob) + alloc_results = Profile.Allocs.fetch() + + save_alloc_profile("cz_N51_alloc_profile", alloc_results) + # Visualize locally: using PProf; PProf.Allocs.pprof(alloc_results) +end +``` + +**AllocCheck.jl** — compile-time zero-allocation enforcement for evaluator hot paths. Can be added as an optional CI check: +```julia +@testitem "Zero-allocation check: evaluator methods" begin + using AllocCheck, DirectTrajOpt + + # These should be allocation-free once optimized + @check_allocs MOI.eval_constraint(ev::Evaluator, g::Vector{Float64}, Z::Vector{Float64}) + @check_allocs MOI.eval_constraint_jacobian(ev::Evaluator, J::Vector{Float64}, Z::Vector{Float64}) + @check_allocs MOI.eval_hessian_lagrangian(ev::Evaluator, H::Vector{Float64}, Z::Vector{Float64}, s::Float64, m::Vector{Float64}) +end +``` + +**Per-line tracking** (local development, not CI): +```bash +julia --track-allocation=user --project=benchmark benchmark/benchmarks.jl +# Generates .mem files with per-line allocation counts +``` + +**Implementation note:** The best allocation profiling approach for the evaluator hot path is TBD. During implementation, spike all three approaches (`Profile.Allocs`, `AllocCheck.jl`, `--track-allocation`) in parallel worktrees against a representative problem (e.g. CZ N=51) to determine which gives the most actionable results for tracking down and eliminating allocations in the MOI eval methods. + +## Problem Generators + +Deterministic, parameterized problem constructors for reproducibility. + +### DirectTrajOpt level +- `build_bilinear_problem(; N=51, state_dim=4, n_controls=2, seed=42)` — random Hermitian system matrices, bilinear integrator + quadratic regularizer +- `build_constrained_problem(; N=51, state_dim=4, n_nonlinear=3, seed=42)` — adds nonlinear knot-point constraints + +### Piccolo/Piccolissimo level +- `build_cz_problem(; N=51, integrator=:hermitian_exp)` — 2-qubit CZ gate, exchange-only system (4-level), matches spin-qubit-demo +- `build_cnot_problem(; N=101, integrator=:hermitian_exp)` — 2-qubit CNOT with 3 EDSR drives +- `build_transmon_problem(; levels=3, N=51)` — single-qubit X gate on multi-level transmon + +### Altissimo level +- `build_polish_problem(; N=51, state_dim=4)` — pre-solved Ipopt problem ready for Altissimo refinement +- `build_gpu_scaling_problem(; state_dim=1024)` — large-state-dim problem for GPU benchmarking + +### Intonato level +- `build_qilc_problem(; N=101, n_paulis=15, J_mismatch=1.3)` — QILC calibration loop with simulated experiment, matches spin-qubit-demo pattern + +### Demo-repo-derived problems + +The harmoniqs org has several hardware-platform demo repos that provide real-world benchmark problems. During implementation, clone and extract representative problem configurations from: + +| Repo | Platform | Typical Dimensions | Key Benchmark | +|------|----------|-------------------|---------------| +| `spin-qubit-demo` | Silicon spin qubits | N=51-101, 4-level, 1-3 drives | CZ, CNOT, QILC calibration | +| `bosonic-demo` | Bosonic cavity QED | Higher Hilbert space dims | Cavity control | +| `nv-center-demo` | NV centers | Spin-1 + nuclear spins | Dark matter sensing pulses | +| `atoms-demo` | Neutral atoms | Rydberg levels | Multi-qubit gates | +| `ions` | Trapped ions | Motional modes + qubits | MS gate, individual addressing | +| `fluxonium-demo` | Fluxonium qubits | Multi-level transmon-like | Single-qubit gates | +| `gkp-stanford` | GKP states | Bosonic Fock space | State preparation | + +These provide the "enterprise-scale" problem suite that demonstrates what problem sizes each solver can handle. Extract the system Hamiltonians and problem parameters from each demo, wrap them as generators in `HarmoniqsBenchmarks.problems/`. + +All generators use `Random.seed!(seed)` for determinism. + +## Harness Functions + +### build_evaluator(prob) -> (evaluator, Z_vec) + +Extracts the MOI evaluator and initial decision variable vector from a `DirectTrajOptProblem`. Used for micro-benchmarks so individual eval functions can be called directly. + +### benchmark_solve!(prob, options; kwargs...) -> BenchmarkResult + +```julia +function benchmark_solve!(prob, options; kwargs...) + GC.gc() + gc_before = Base.gc_num() + + timed = @timed solve!(prob; options, kwargs...) + + gc_after = Base.gc_num() + + return BenchmarkResult( + # ... populate from prob metadata, timed, gc delta, options snapshot + ) +end +``` + +### save_results(name, results) / save_micro_results(name, benchmarks) + +Write JLD2 to `benchmark/results/_.jld2`. + +### compare_results(baseline_path, current_path) -> ComparisonTable + +Load two result sets and produce a diff table with percent changes, flagging regressions. + +## CI Workflow + +### Per-package: `.github/workflows/benchmark.yml` + +```yaml +name: Benchmarks +on: + push: + tags: ['v*'] + workflow_dispatch: + inputs: + baseline_tag: + description: 'Tag to compare against' + required: false + +jobs: + benchmark: + runs-on: ubuntu-latest # free for OSS + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: '1.11' + - name: Instantiate benchmark env + run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' + - name: Run benchmarks + run: julia --project=benchmark -t auto -e ' + using TestItemRunner + @run_package_tests(benchmark) + ' + - uses: actions/upload-artifact@v4 + with: + name: benchmark-${{ github.ref_name }}-${{ github.sha }} + path: benchmark/results/ + retention-days: 365 + + # GPU/large-scale benchmarks (Altissimo, large N) + benchmark-gpu: + if: contains(github.repository, 'Altissimo') || github.event_name == 'workflow_dispatch' + runs-on: [self-hosted, gpu] # EC2 runners from CuQuantum.jl setup + steps: + # same as above but with CUDA-enabled Julia +``` + +### Central aggregator: `harmoniqs-benchmarks` repo + +Triggered by workflow_dispatch or cron. Downloads latest artifacts from each package repo, generates comparison tables, stores historical archive. + +## Package Structure + +``` +HarmoniqsBenchmarks.jl/ + src/ + HarmoniqsBenchmarks.jl # module + exports + schema.jl # BenchmarkResult, MicroBenchmarkResult + harness.jl # benchmark_solve!, build_evaluator + storage.jl # save/load JLD2, save_alloc_profile + report.jl # compare_results, regression detection + problems/ + bilinear.jl # DirectTrajOpt-level generators + quantum_gates.jl # Piccolo/Piccolissimo-level generators + polish.jl # Altissimo-level generators + qilc.jl # Intonato-level generators + Project.toml # deps: BenchmarkTools, JLD2, Dates + README.md + +# Per downstream package: +DirectTrajOpt.jl/ + benchmark/ + Project.toml # [deps] HarmoniqsBenchmarks, BenchmarkTools, TestItems, ... + benchmarks.jl # @testitems: micro, macro, scaling + results/ # .gitignored JLD2 output +``` + +## Verification + +1. **Unit test the harness:** `benchmark_solve!` returns a valid `BenchmarkResult` with all fields populated +2. **Run micro-benchmarks locally:** Confirm BenchmarkTools produces histograms for each eval function +3. **Run scaling sweep:** Verify memory grows as expected with N and state_dim +4. **CI dry run:** Trigger workflow_dispatch on DirectTrajOpt, confirm artifact upload +5. **Cross-package comparison:** Run aggregator on two package artifacts, verify comparison table output +6. **Allocation profiling:** Run Profile.Allocs on a solve, verify PProf flamegraph renders + +## Scope + +**In scope (this design):** +- HarmoniqsBenchmarks.jl package creation +- DirectTrajOpt benchmark suite (Ipopt vs MadNLP, scaling, micro-benchmarks, allocation profiling) +- Piccolissimo benchmark suite (integrate existing benchmarks + new scaling) +- CI workflows for DirectTrajOpt and Piccolissimo +- Aggregator script in harmoniqs-benchmarks repo + +**Future work:** +- Altissimo GPU benchmarks (requires CUDA runner validation) +- Intonato convergence benchmarks (requires stable Phase 5) +- Piccolo template benchmarks +- AllocCheck CI gates (after hot paths are optimized) +- Automated regression comments on PRs From 7fe44c572d074368d55e97caee63443b925b0d55 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 15 Apr 2026 18:55:37 -0400 Subject: [PATCH 03/13] ci: add benchmark workflow and README, remove stale files - Add .github/workflows/benchmark.yml that runs on PRs touching src/ or benchmark/ - Uses Pkg.add(url=...) to install HarmoniqsBenchmarks (unregistered) - Uploads JLD2 artifacts for 90 days - Add benchmark/README.md with run instructions - Remove empty BenchmarkUtils.jl leftover - Ignore Manifest.toml (regenerated on each CI run) --- .github/workflows/benchmark.yml | 56 +++++++++++++++++++++++++++++++++ benchmark/.gitignore | 1 + benchmark/BenchmarkUtils.jl | 1 - benchmark/README.md | 37 ++++++++++++++++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/benchmark.yml delete mode 100644 benchmark/BenchmarkUtils.jl create mode 100644 benchmark/README.md diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..ac64298 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,56 @@ +name: Benchmarks +on: + push: + tags: ['v*'] + pull_request: + paths: + - 'src/**' + - 'benchmark/**' + - '.github/workflows/benchmark.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + benchmark: + name: Benchmark suite + runs-on: ubuntu-latest + timeout-minutes: 60 + permissions: + actions: write + contents: read + steps: + - uses: actions/checkout@v6 + + - uses: julia-actions/setup-julia@v2 + with: + version: '1.11' + arch: x64 + + - uses: julia-actions/cache@v2 + + - name: Install HarmoniqsBenchmarks (unregistered) and dev-install DirectTrajOpt + run: | + julia --project=benchmark -e ' + using Pkg + Pkg.develop(path=".") + Pkg.add(url="https://github.com/harmoniqs/HarmoniqsBenchmarks.jl") + Pkg.instantiate() + ' + + - name: Run benchmarks + run: | + julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") + ' + + - name: Upload benchmark artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: benchmark-${{ github.ref_name }}-${{ github.sha }} + path: benchmark/results/ + retention-days: 90 diff --git a/benchmark/.gitignore b/benchmark/.gitignore index fbca225..ca28c11 100644 --- a/benchmark/.gitignore +++ b/benchmark/.gitignore @@ -1 +1,2 @@ results/ +Manifest.toml diff --git a/benchmark/BenchmarkUtils.jl b/benchmark/BenchmarkUtils.jl deleted file mode 100644 index 8b13789..0000000 --- a/benchmark/BenchmarkUtils.jl +++ /dev/null @@ -1 +0,0 @@ - diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..534cec0 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,37 @@ +# DirectTrajOpt Benchmarks + +Benchmark suite for DirectTrajOpt.jl comparing Ipopt and MadNLP solver performance. + +## Running locally + +```bash +# From DirectTrajOpt.jl root +julia --project=benchmark -e ' + using Pkg + Pkg.add(url="https://github.com/harmoniqs/HarmoniqsBenchmarks.jl") + Pkg.instantiate() +' + +julia --project=benchmark -t auto -e ' + using TestItemRunner + TestItemRunner.run_tests("benchmark/") +' +``` + +Artifacts are saved as JLD2 files in `benchmark/results/` (gitignored). + +## Benchmark suites + +- **Evaluator micro-benchmarks** — `BenchmarkTools.@benchmark` timings for each MOI eval function (objective, gradient, constraint, jacobian, hessian_lagrangian) on bilinear N=51 +- **Ipopt vs MadNLP** — full solve comparison on bilinear N=51 +- **Memory scaling study** — N ∈ {25, 51, 101} × state_dim ∈ {4, 8, 16} + +## Schema + +Results use `BenchmarkResult` / `MicroBenchmarkResult` from [HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl). + +Load with: +```julia +using HarmoniqsBenchmarks +results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") +``` From 3a5003c061675cb7f882ad5e13d064827c8c53ab Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 15 Apr 2026 19:01:52 -0400 Subject: [PATCH 04/13] benchmark: use [sources] in Project.toml instead of Pkg.add in CI Uses Julia 1.11+ [sources] section to resolve: - DirectTrajOpt from local path (parent dir) - HarmoniqsBenchmarks from public GitHub URL CI workflow simplified to just Pkg.instantiate (no manual Pkg.add needed). --- .github/workflows/benchmark.yml | 10 ++-------- benchmark/Project.toml | 8 ++++++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ac64298..e4bdf37 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -31,14 +31,8 @@ jobs: - uses: julia-actions/cache@v2 - - name: Install HarmoniqsBenchmarks (unregistered) and dev-install DirectTrajOpt - run: | - julia --project=benchmark -e ' - using Pkg - Pkg.develop(path=".") - Pkg.add(url="https://github.com/harmoniqs/HarmoniqsBenchmarks.jl") - Pkg.instantiate() - ' + - name: Instantiate benchmark environment + run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' - name: Run benchmarks run: | diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 9782442..b219215 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -1,5 +1,6 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -8,9 +9,12 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[sources] +DirectTrajOpt = {path = ".."} +HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl"} From 6d9fa524c4681ab13807a2106495be5507b2c673 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Wed, 15 Apr 2026 20:44:50 -0400 Subject: [PATCH 05/13] ci: sanitize artifact name (PR refs contain / which is invalid) --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index e4bdf37..1f20d76 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -45,6 +45,6 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: benchmark-${{ github.ref_name }}-${{ github.sha }} + name: benchmark-${{ github.event.pull_request.number || github.ref_name }}-${{ github.sha }} path: benchmark/results/ retention-days: 90 From 11720e35256255840f5e84254420db17d45846c3 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Thu, 16 Apr 2026 02:04:35 -0400 Subject: [PATCH 06/13] chore: move specs/plans to separate PR, fix stale README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove docs/superpowers/ (specs and plans) from this PR to keep the diff focused on benchmarks and MadNLP integration. Fix stale Pkg.add instruction in benchmark README — deps resolve via [sources] now. Co-Authored-By: Claude Opus 4.6 (1M context) --- benchmark/README.md | 6 +- .../2026-04-15-benchmarking-infrastructure.md | 1620 ----------------- ...6-04-15-altissimo-gpu-benchmarks-design.md | 198 -- .../specs/2026-04-15-benchmarking-design.md | 383 ---- 4 files changed, 1 insertion(+), 2206 deletions(-) delete mode 100644 docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md delete mode 100644 docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md delete mode 100644 docs/superpowers/specs/2026-04-15-benchmarking-design.md diff --git a/benchmark/README.md b/benchmark/README.md index 534cec0..c0737c9 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -6,11 +6,7 @@ Benchmark suite for DirectTrajOpt.jl comparing Ipopt and MadNLP solver performan ```bash # From DirectTrajOpt.jl root -julia --project=benchmark -e ' - using Pkg - Pkg.add(url="https://github.com/harmoniqs/HarmoniqsBenchmarks.jl") - Pkg.instantiate() -' +julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' julia --project=benchmark -t auto -e ' using TestItemRunner diff --git a/docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md b/docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md deleted file mode 100644 index 99dee3f..0000000 --- a/docs/superpowers/plans/2026-04-15-benchmarking-infrastructure.md +++ /dev/null @@ -1,1620 +0,0 @@ -# HarmoniqsBenchmarks.jl + DirectTrajOpt Benchmark Suite — Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Create a shared benchmarking package (`HarmoniqsBenchmarks.jl`) and wire up the first benchmark suite in DirectTrajOpt.jl comparing Ipopt vs MadNLP, with micro-benchmarks, full-solve benchmarks, and memory scaling studies. - -**Architecture:** HarmoniqsBenchmarks.jl provides schema types, a profiling harness, and JLD2 storage/comparison. DirectTrajOpt.jl's `benchmark/` directory contains `@testitem`-based benchmarks that use the shared harness. Both Ipopt and MadNLP benchmarks use the same shared `Evaluator` (in `src/solvers/evaluator.jl`), so micro-benchmarks are solver-agnostic while macro-benchmarks compare the two solver backends. - -**Tech Stack:** Julia 1.11+, BenchmarkTools.jl, JLD2.jl, TestItems/TestItemRunner, MathOptInterface - -**Spec:** `docs/superpowers/specs/2026-04-15-benchmarking-design.md` - ---- - -## File Structure - -### New repo: `HarmoniqsBenchmarks.jl` (at `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/`) - -| File | Responsibility | -|------|---------------| -| `Project.toml` | Package metadata + deps (BenchmarkTools, JLD2, Dates, DirectTrajOpt, MathOptInterface, NamedTrajectories) | -| `src/HarmoniqsBenchmarks.jl` | Module definition + exports | -| `src/schema.jl` | `BenchmarkResult`, `MicroBenchmarkResult`, `EvalBenchmark` structs | -| `src/harness.jl` | `build_evaluator`, `benchmark_solve!`, GC/allocation capture | -| `src/storage.jl` | `save_results`, `save_micro_results`, `load_results`, `load_micro_results` | -| `src/report.jl` | `compare_results` — diff tables + regression flagging | -| `test/runtests.jl` | Tests for all of the above | - -### Modified repo: `DirectTrajOpt.jl` (benchmark directory) - -| File | Responsibility | -|------|---------------| -| `benchmark/Project.toml` | Benchmark env deps (HarmoniqsBenchmarks, BenchmarkTools, TestItems, MadNLP) | -| `benchmark/benchmarks.jl` | `@testitem` definitions: micro, macro, scaling | -| `benchmark/.gitignore` | Ignore `results/` directory | - ---- - -## Task 1: Create HarmoniqsBenchmarks.jl Project Skeleton - -**Files:** -- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/Project.toml` -- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` - -- [ ] **Step 1: Initialize the package directory** - -```bash -mkdir -p /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src -mkdir -p /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -git init -``` - -- [ ] **Step 2: Create Project.toml** - -```toml -name = "HarmoniqsBenchmarks" -uuid = "GENERATE_UUID" -version = "0.1.0" -authors = ["harmoniqs contributors"] - -[deps] -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" -JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" -NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" - -[compat] -BenchmarkTools = "1.6" -Dates = "1.10, 1.11, 1.12" -DirectTrajOpt = "0.8" -JLD2 = "0.5" -MathOptInterface = "1.49" -NamedTrajectories = "0.8" -julia = "1.10, 1.11, 1.12" -``` - -Generate the UUID with: `using UUIDs; uuid4()` - -- [ ] **Step 3: Create module stub** - -```julia -# src/HarmoniqsBenchmarks.jl -module HarmoniqsBenchmarks - -end -``` - -- [ ] **Step 4: Dev-install dependencies and verify the package loads** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e ' - using Pkg - Pkg.develop(path="../DirectTrajOpt.jl") - Pkg.develop(path="../NamedTrajectories.jl") - Pkg.instantiate() - using HarmoniqsBenchmarks - println("Package loads OK") -' -``` - -Expected: "Package loads OK" - -- [ ] **Step 5: Commit** - -```bash -git add Project.toml src/HarmoniqsBenchmarks.jl -git commit -m "feat: initialize HarmoniqsBenchmarks.jl package skeleton" -``` - ---- - -## Task 2: Implement Schema Types - -**Files:** -- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/schema.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` -- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` - -- [ ] **Step 1: Write tests for schema types** - -```julia -# test/runtests.jl -using Test -using HarmoniqsBenchmarks -using Dates - -@testset "HarmoniqsBenchmarks" begin - -@testset "Schema" begin - @testset "EvalBenchmark construction" begin - eb = EvalBenchmark( - times_ns = [100.0, 110.0, 105.0], - gctimes_ns = [0.0, 0.0, 5.0], - memory_bytes = 1024, - allocs = 3, - ) - @test eb.median_ns == 105.0 - @test eb.min_ns == 100.0 - @test 104.0 < eb.mean_ns < 106.0 - end - - @testset "BenchmarkResult construction" begin - r = BenchmarkResult( - package = "DirectTrajOpt", - package_version = "0.8.10", - commit = "abc1234", - benchmark_name = "test_bench", - N = 51, - state_dim = 4, - control_dim = 2, - n_constraints = 200, - n_variables = 765, - wall_time_s = 1.5, - iterations = 42, - objective_value = 0.001, - constraint_violation = 1e-8, - solver_status = :Optimal, - solver = "ipopt", - total_allocations_bytes = 1_000_000, - total_allocs_count = 500, - gc_time_ns = 10_000, - gc_count = 2, - gc_full_count = 0, - solver_options = Dict{Symbol,Any}(:tol => 1e-8, :max_iter => 1000), - julia_version = string(VERSION), - timestamp = now(), - runner = "local", - n_threads = 1, - ) - @test r.package == "DirectTrajOpt" - @test r.solver_status == :Optimal - end - - @testset "MicroBenchmarkResult construction" begin - eb = EvalBenchmark( - times_ns = [100.0], - gctimes_ns = [0.0], - memory_bytes = 0, - allocs = 0, - ) - mr = MicroBenchmarkResult( - package = "DirectTrajOpt", - package_version = "0.8.10", - commit = "abc1234", - benchmark_name = "micro_test", - N = 51, - state_dim = 4, - control_dim = 2, - eval_benchmarks = Dict{Symbol,EvalBenchmark}( - :eval_objective => eb, - ), - julia_version = string(VERSION), - timestamp = now(), - runner = "local", - n_threads = 1, - ) - @test mr.eval_benchmarks[:eval_objective].min_ns == 100.0 - end -end - -end # HarmoniqsBenchmarks testset -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: FAIL — `EvalBenchmark` not defined - -- [ ] **Step 3: Implement schema types** - -```julia -# src/schema.jl -using Dates -using Statistics: median, mean - -struct EvalBenchmark - times_ns::Vector{Float64} - gctimes_ns::Vector{Float64} - memory_bytes::Int - allocs::Int - # Derived stats (computed at construction) - median_ns::Float64 - min_ns::Float64 - mean_ns::Float64 -end - -function EvalBenchmark(; - times_ns::Vector{Float64}, - gctimes_ns::Vector{Float64}, - memory_bytes::Int, - allocs::Int, -) - return EvalBenchmark( - times_ns, - gctimes_ns, - memory_bytes, - allocs, - median(times_ns), - minimum(times_ns), - mean(times_ns), - ) -end - -struct BenchmarkResult - # Identity - package::String - package_version::String - commit::String - benchmark_name::String - # Problem dimensions - N::Int - state_dim::Int - control_dim::Int - n_constraints::Int - n_variables::Int - # Solve metrics - wall_time_s::Float64 - iterations::Int - objective_value::Float64 - constraint_violation::Float64 - solver_status::Symbol - solver::String - # Memory & allocations - total_allocations_bytes::Int - total_allocs_count::Int - gc_time_ns::Int - gc_count::Int - gc_full_count::Int - # Solver options snapshot - solver_options::Dict{Symbol,Any} - # Metadata - julia_version::String - timestamp::DateTime - runner::String - n_threads::Int -end - -struct MicroBenchmarkResult - package::String - package_version::String - commit::String - benchmark_name::String - N::Int - state_dim::Int - control_dim::Int - eval_benchmarks::Dict{Symbol,EvalBenchmark} - julia_version::String - timestamp::DateTime - runner::String - n_threads::Int -end -``` - -- [ ] **Step 4: Update module to include schema and export types** - -```julia -# src/HarmoniqsBenchmarks.jl -module HarmoniqsBenchmarks - -export EvalBenchmark, BenchmarkResult, MicroBenchmarkResult - -include("schema.jl") - -end -``` - -- [ ] **Step 5: Run tests to verify they pass** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: All tests PASS - -- [ ] **Step 6: Commit** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -git add src/schema.jl src/HarmoniqsBenchmarks.jl test/runtests.jl -git commit -m "feat: add BenchmarkResult, MicroBenchmarkResult, EvalBenchmark schema types" -``` - ---- - -## Task 3: Implement JLD2 Storage - -**Files:** -- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/storage.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` - -- [ ] **Step 1: Add storage tests** - -Append to `test/runtests.jl`, inside the top-level `@testset "HarmoniqsBenchmarks"`: - -```julia -@testset "Storage" begin - mktempdir() do dir - r = BenchmarkResult( - package = "DirectTrajOpt", - package_version = "0.8.10", - commit = "abc1234", - benchmark_name = "storage_test", - N = 51, state_dim = 4, control_dim = 2, - n_constraints = 200, n_variables = 765, - wall_time_s = 1.5, iterations = 42, - objective_value = 0.001, constraint_violation = 1e-8, - solver_status = :Optimal, solver = "ipopt", - total_allocations_bytes = 1_000_000, total_allocs_count = 500, - gc_time_ns = 10_000, gc_count = 2, gc_full_count = 0, - solver_options = Dict{Symbol,Any}(:tol => 1e-8), - julia_version = string(VERSION), - timestamp = now(), runner = "local", n_threads = 1, - ) - - path = save_results(dir, "test_bench", [r]) - @test isfile(path) - @test endswith(path, ".jld2") - - loaded = load_results(path) - @test length(loaded) == 1 - @test loaded[1].package == "DirectTrajOpt" - @test loaded[1].wall_time_s == 1.5 - @test loaded[1].solver_options[:tol] == 1e-8 - end - - mktempdir() do dir - eb = EvalBenchmark( - times_ns = [100.0, 110.0], - gctimes_ns = [0.0, 0.0], - memory_bytes = 512, allocs = 1, - ) - mr = MicroBenchmarkResult( - package = "DirectTrajOpt", - package_version = "0.8.10", - commit = "abc1234", - benchmark_name = "micro_storage_test", - N = 51, state_dim = 4, control_dim = 2, - eval_benchmarks = Dict(:eval_objective => eb), - julia_version = string(VERSION), - timestamp = now(), runner = "local", n_threads = 1, - ) - - path = save_micro_results(dir, "micro_test", mr) - @test isfile(path) - - loaded = load_micro_results(path) - @test loaded.benchmark_name == "micro_storage_test" - @test loaded.eval_benchmarks[:eval_objective].min_ns == 100.0 - end -end -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: FAIL — `save_results` not defined - -- [ ] **Step 3: Implement storage functions** - -```julia -# src/storage.jl -using JLD2 - -""" - save_results(dir, name, results::Vector{BenchmarkResult}) -> String - -Save benchmark results to a JLD2 file in `dir`. Returns the file path. -""" -function save_results(dir::String, name::String, results::Vector{BenchmarkResult}) - mkpath(dir) - commit = isempty(results) ? "unknown" : results[1].commit - filename = "$(name)_$(commit).jld2" - path = joinpath(dir, filename) - JLD2.jldsave(path; results=results) - return path -end - -""" - load_results(path) -> Vector{BenchmarkResult} - -Load benchmark results from a JLD2 file. -""" -function load_results(path::String) - return JLD2.load(path, "results") -end - -""" - save_micro_results(dir, name, result::MicroBenchmarkResult) -> String - -Save micro-benchmark results to a JLD2 file in `dir`. Returns the file path. -""" -function save_micro_results(dir::String, name::String, result::MicroBenchmarkResult) - mkpath(dir) - filename = "$(name)_$(result.commit).jld2" - path = joinpath(dir, filename) - JLD2.jldsave(path; result=result) - return path -end - -""" - load_micro_results(path) -> MicroBenchmarkResult - -Load micro-benchmark results from a JLD2 file. -""" -function load_micro_results(path::String) - return JLD2.load(path, "result") -end -``` - -- [ ] **Step 4: Update module** - -```julia -# src/HarmoniqsBenchmarks.jl -module HarmoniqsBenchmarks - -export EvalBenchmark, BenchmarkResult, MicroBenchmarkResult -export save_results, load_results, save_micro_results, load_micro_results - -include("schema.jl") -include("storage.jl") - -end -``` - -- [ ] **Step 5: Run tests to verify they pass** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: All tests PASS - -- [ ] **Step 6: Commit** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -git add src/storage.jl src/HarmoniqsBenchmarks.jl test/runtests.jl -git commit -m "feat: add JLD2 save/load for BenchmarkResult and MicroBenchmarkResult" -``` - ---- - -## Task 4: Implement build_evaluator Harness - -**Files:** -- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/harness.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` - -- [ ] **Step 1: Add test for build_evaluator** - -Append to `test/runtests.jl`, inside top-level testset: - -```julia -@testset "Harness" begin - using DirectTrajOpt - using NamedTrajectories - using SparseArrays - using ExponentialAction - using MathOptInterface - const MOI = MathOptInterface - - # Build a simple bilinear problem (same as DirectTrajOpt test_utils.jl) - N = 10; Δt = 0.1; u_bound = 0.1; ω = 0.1 - Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) - Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) - Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) - G(u) = ω * Gz + u[1] * Gx + u[2] * Gy - - traj = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound * (2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - DerivativeIntegrator(:du, :ddu, traj), - ] - - J = QuadraticRegularizer(:u, traj, 1.0) - prob = DirectTrajOptProblem(traj, J, integrators) - - @testset "build_evaluator returns evaluator and Z vector" begin - evaluator, Z_vec = build_evaluator(prob) - @test evaluator isa MOI.AbstractNLPEvaluator - @test length(Z_vec) == traj.dim * traj.N + traj.global_dim - - # Verify eval functions are callable - obj = MOI.eval_objective(evaluator, Z_vec) - @test obj isa Float64 - @test isfinite(obj) - end - - @testset "evaluator_dims returns correct sizes" begin - evaluator, Z_vec = build_evaluator(prob) - dims = evaluator_dims(evaluator) - @test dims.n_constraints == evaluator.n_constraints - @test dims.n_variables == length(Z_vec) - @test dims.n_jacobian_entries == length(evaluator.jacobian_structure) - @test dims.n_hessian_entries == length(evaluator.hessian_structure) - end -end -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: FAIL — `build_evaluator` not defined - -- [ ] **Step 3: Implement build_evaluator and evaluator_dims** - -```julia -# src/harness.jl -using DirectTrajOpt -using NamedTrajectories -using MathOptInterface -const MOI = MathOptInterface - -""" - build_evaluator(prob::DirectTrajOptProblem; eval_hessian=true) -> (evaluator, Z_vec) - -Extract a MOI evaluator and the initial decision variable vector from a -DirectTrajOptProblem. Used for micro-benchmarking individual eval functions. - -Returns: -- `evaluator`: An `MOI.AbstractNLPEvaluator` ready for `MOI.eval_*` calls -- `Z_vec`: The flat decision variable vector `[trajectory_data; global_data]` -""" -function build_evaluator(prob::DirectTrajOpt.Problems.DirectTrajOptProblem; eval_hessian::Bool=true) - evaluator = DirectTrajOpt.Solvers.Evaluator(prob; eval_hessian=eval_hessian, verbose=false) - traj = prob.trajectory - Z_vec = vcat(collect(traj.datavec), collect(traj.global_data)) - return evaluator, Z_vec -end - -""" - evaluator_dims(evaluator) -> NamedTuple - -Return key dimensions of the evaluator for buffer pre-allocation. -""" -function evaluator_dims(evaluator::DirectTrajOpt.Solvers.Evaluator) - return ( - n_constraints = evaluator.n_constraints, - n_variables = evaluator.trajectory.dim * evaluator.trajectory.N + evaluator.trajectory.global_dim, - n_jacobian_entries = length(evaluator.jacobian_structure), - n_hessian_entries = length(evaluator.hessian_structure), - ) -end -``` - -- [ ] **Step 4: Update module** - -```julia -# src/HarmoniqsBenchmarks.jl -module HarmoniqsBenchmarks - -export EvalBenchmark, BenchmarkResult, MicroBenchmarkResult -export save_results, load_results, save_micro_results, load_micro_results -export build_evaluator, evaluator_dims - -include("schema.jl") -include("storage.jl") -include("harness.jl") - -end -``` - -- [ ] **Step 5: Run tests to verify they pass** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: All tests PASS - -- [ ] **Step 6: Commit** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -git add src/harness.jl src/HarmoniqsBenchmarks.jl test/runtests.jl -git commit -m "feat: add build_evaluator and evaluator_dims harness functions" -``` - ---- - -## Task 5: Implement benchmark_solve! Harness - -**Files:** -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/harness.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` - -- [ ] **Step 1: Add test for benchmark_solve!** - -Append inside the `@testset "Harness"` block in `test/runtests.jl`: - -```julia -@testset "benchmark_solve! captures metrics" begin - # Rebuild a fresh problem (solve! mutates in place) - traj2 = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound * (2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - integrators2 = [ - BilinearIntegrator(G, :x, :u, traj2), - DerivativeIntegrator(:u, :du, traj2), - DerivativeIntegrator(:du, :ddu, traj2), - ] - J2 = QuadraticRegularizer(:u, traj2, 1.0) - prob2 = DirectTrajOptProblem(traj2, J2, integrators2) - - result = benchmark_solve!( - prob2, IpoptOptions(max_iter=10, print_level=0); - benchmark_name = "test_solve", - ) - - @test result isa BenchmarkResult - @test result.package == "DirectTrajOpt" - @test result.solver == "ipopt" - @test result.wall_time_s > 0.0 - @test result.iterations >= 0 - @test result.total_allocations_bytes >= 0 - @test result.gc_count >= 0 - @test result.N == N - @test result.state_dim == 4 - @test haskey(result.solver_options, :max_iter) - @test result.solver_options[:max_iter] == 10 -end -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: FAIL — `benchmark_solve!` not defined - -- [ ] **Step 3: Implement benchmark_solve!** - -Append to `src/harness.jl`: - -```julia -using Dates - -""" - benchmark_solve!(prob, options; benchmark_name, runner="local", kwargs...) -> BenchmarkResult - -Run `solve!(prob; options, kwargs...)` and capture timing, memory, GC stats, and solver options. -""" -function benchmark_solve!( - prob::DirectTrajOpt.Problems.DirectTrajOptProblem, - options::DirectTrajOpt.Solvers.AbstractSolverOptions; - benchmark_name::String = "unnamed", - runner::String = "local", - verbose::Bool = false, - kwargs..., -) - traj = prob.trajectory - - # Capture problem dimensions before solve - n_vars = traj.dim * traj.N + traj.global_dim - state_dim = _infer_state_dim(prob) - control_dim = _infer_control_dim(prob) - n_constraints_total = _count_constraints(prob, options) - - # Snapshot solver options - opts_snapshot = Dict{Symbol,Any}() - for name in fieldnames(typeof(options)) - opts_snapshot[name] = getfield(options, name) - end - - # GC baseline - GC.gc() - gc_before = Base.gc_num() - - # Timed solve - timed = @timed solve!(prob; options=options, verbose=verbose, kwargs...) - - gc_after = Base.gc_num() - - # Compute GC deltas - gc_time = timed.gctime # in seconds, convert to ns - gc_count_delta = gc_after.pause - gc_before.pause - gc_full_delta = gc_after.full_sweep - gc_before.full_sweep - - # Package version from Project.toml - pkg_version = _get_package_version("DirectTrajOpt") - commit = _get_git_commit() - - return BenchmarkResult( - package = "DirectTrajOpt", - package_version = pkg_version, - commit = commit, - benchmark_name = benchmark_name, - N = traj.N, - state_dim = state_dim, - control_dim = control_dim, - n_constraints = n_constraints_total, - n_variables = n_vars, - wall_time_s = timed.time, - iterations = -1, # TODO: extract from solver output when available - objective_value = NaN, # TODO: extract from solver - constraint_violation = NaN, - solver_status = :Unknown, - solver = _solver_name(options), - total_allocations_bytes = timed.bytes, - total_allocs_count = -1, # @timed doesn't give count; use gc_num delta - gc_time_ns = round(Int, timed.gctime * 1e9), - gc_count = gc_count_delta, - gc_full_count = gc_full_delta, - solver_options = opts_snapshot, - julia_version = string(VERSION), - timestamp = now(), - runner = runner, - n_threads = Threads.nthreads(), - ) -end - -# --- helpers --- - -function _solver_name(options::DirectTrajOpt.Solvers.AbstractSolverOptions) - name = string(typeof(options).name.name) - if occursin("Ipopt", name) - return "ipopt" - elseif occursin("MadNLP", name) - return "madnlp" - else - return lowercase(name) - end -end - -function _infer_state_dim(prob) - traj = prob.trajectory - # Heuristic: look for common state variable names - for name in [:x, :ψ̃, :Ũ⃗, :ρ̃] - if haskey(traj.dims, name) - return traj.dims[name] - end - end - # Fallback: first non-control component - return first(values(traj.dims)) -end - -function _infer_control_dim(prob) - traj = prob.trajectory - total = 0 - for name in traj.control_names - if name != traj.timestep_name - total += traj.dims[name] - end - end - return total -end - -function _count_constraints(prob, options) - n_dynamics = sum(integrator.dim for integrator in prob.integrators; init=0) - n_nonlinear = sum( - c.dim for c in prob.constraints - if c isa DirectTrajOpt.Constraints.AbstractNonlinearConstraint; - init=0 - ) - return n_dynamics * (prob.trajectory.N - 1) + n_nonlinear -end - -function _get_package_version(pkg_name::String) - try - deps = Pkg.dependencies() - for (_, info) in deps - if info.name == pkg_name - return string(info.version) - end - end - catch - end - return "unknown" -end - -function _get_git_commit() - try - return strip(read(`git rev-parse --short HEAD`, String)) - catch - return "unknown" - end -end -``` - -- [ ] **Step 4: Add `Pkg` import to harness.jl** - -Add at the top of `src/harness.jl`: - -```julia -import Pkg -``` - -- [ ] **Step 5: Update module exports** - -In `src/HarmoniqsBenchmarks.jl`, add to exports: - -```julia -export benchmark_solve! -``` - -- [ ] **Step 6: Run tests to verify they pass** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: All tests PASS - -- [ ] **Step 7: Commit** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -git add src/harness.jl src/HarmoniqsBenchmarks.jl test/runtests.jl -git commit -m "feat: add benchmark_solve! harness with GC stats and options snapshot" -``` - ---- - -## Task 6: Implement BenchmarkTools→EvalBenchmark Conversion - -**Files:** -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/harness.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` - -- [ ] **Step 1: Add test for trial_to_eval_benchmark** - -Append inside `@testset "Harness"`: - -```julia -@testset "trial_to_eval_benchmark extracts data from BenchmarkTools.Trial" begin - using BenchmarkTools - trial = @benchmark 1 + 1 - eb = trial_to_eval_benchmark(trial) - @test eb isa EvalBenchmark - @test length(eb.times_ns) > 0 - @test eb.min_ns > 0.0 - @test eb.memory_bytes >= 0 - @test eb.allocs >= 0 -end -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: FAIL — `trial_to_eval_benchmark` not defined - -- [ ] **Step 3: Implement trial_to_eval_benchmark** - -Append to `src/harness.jl`: - -```julia -using BenchmarkTools - -""" - trial_to_eval_benchmark(trial::BenchmarkTools.Trial) -> EvalBenchmark - -Convert a BenchmarkTools.Trial to an EvalBenchmark, extracting raw timing data. -""" -function trial_to_eval_benchmark(trial::BenchmarkTools.Trial) - return EvalBenchmark( - times_ns = Float64.(trial.times), - gctimes_ns = Float64.(trial.gctimes), - memory_bytes = trial.memory, - allocs = trial.allocs, - ) -end -``` - -- [ ] **Step 4: Export the function** - -Add `trial_to_eval_benchmark` to exports in `src/HarmoniqsBenchmarks.jl`. - -- [ ] **Step 5: Run tests to verify they pass** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: All tests PASS - -- [ ] **Step 6: Commit** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -git add src/harness.jl src/HarmoniqsBenchmarks.jl test/runtests.jl -git commit -m "feat: add trial_to_eval_benchmark for BenchmarkTools integration" -``` - ---- - -## Task 7: Implement compare_results Reporter - -**Files:** -- Create: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/report.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/src/HarmoniqsBenchmarks.jl` -- Modify: `/home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl/test/runtests.jl` - -- [ ] **Step 1: Add test for compare_results** - -Append to `test/runtests.jl`, inside top-level testset: - -```julia -@testset "Report" begin - @testset "compare_results detects regressions" begin - baseline = BenchmarkResult( - package="DirectTrajOpt", package_version="0.8.9", - commit="aaa1111", benchmark_name="test", - N=51, state_dim=4, control_dim=2, - n_constraints=200, n_variables=765, - wall_time_s=1.0, iterations=50, - objective_value=0.001, constraint_violation=1e-8, - solver_status=:Optimal, solver="ipopt", - total_allocations_bytes=1_000_000, total_allocs_count=500, - gc_time_ns=10_000, gc_count=2, gc_full_count=0, - solver_options=Dict{Symbol,Any}(), - julia_version=string(VERSION), timestamp=now(), - runner="local", n_threads=1, - ) - - # 20% regression in wall time - current = BenchmarkResult( - package="DirectTrajOpt", package_version="0.8.10", - commit="bbb2222", benchmark_name="test", - N=51, state_dim=4, control_dim=2, - n_constraints=200, n_variables=765, - wall_time_s=1.2, iterations=50, - objective_value=0.001, constraint_violation=1e-8, - solver_status=:Optimal, solver="ipopt", - total_allocations_bytes=900_000, total_allocs_count=450, - gc_time_ns=10_000, gc_count=2, gc_full_count=0, - solver_options=Dict{Symbol,Any}(), - julia_version=string(VERSION), timestamp=now(), - runner="local", n_threads=1, - ) - - comparison = compare_results([baseline], [current]) - @test length(comparison) == 1 - row = comparison[1] - @test row.benchmark_name == "test" - @test row.wall_time_pct_change > 15.0 # 20% regression - @test row.alloc_bytes_pct_change < 0.0 # 10% improvement - @test row.has_regression == true # wall time regressed >10% - end -end -``` - -- [ ] **Step 2: Run tests to verify they fail** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: FAIL — `compare_results` not defined - -- [ ] **Step 3: Implement compare_results** - -```julia -# src/report.jl - -struct ComparisonRow - benchmark_name::String - solver::String - N::Int - state_dim::Int - # Wall time - baseline_wall_s::Float64 - current_wall_s::Float64 - wall_time_pct_change::Float64 - # Allocations - baseline_alloc_bytes::Int - current_alloc_bytes::Int - alloc_bytes_pct_change::Float64 - # Regression flag - has_regression::Bool -end - -""" - compare_results(baseline, current; regression_threshold=10.0) -> Vector{ComparisonRow} - -Compare two sets of BenchmarkResults by matching on `benchmark_name`. -Returns comparison rows with percent changes and regression flags. - -A regression is flagged when wall_time or allocations increase by more than -`regression_threshold` percent. -""" -function compare_results( - baseline::Vector{BenchmarkResult}, - current::Vector{BenchmarkResult}; - regression_threshold::Float64 = 10.0, -) - baseline_by_name = Dict(r.benchmark_name => r for r in baseline) - rows = ComparisonRow[] - - for r in current - b = get(baseline_by_name, r.benchmark_name, nothing) - isnothing(b) && continue - - wall_pct = _pct_change(b.wall_time_s, r.wall_time_s) - alloc_pct = _pct_change(Float64(b.total_allocations_bytes), Float64(r.total_allocations_bytes)) - has_regression = wall_pct > regression_threshold || alloc_pct > regression_threshold - - push!(rows, ComparisonRow( - r.benchmark_name, r.solver, r.N, r.state_dim, - b.wall_time_s, r.wall_time_s, wall_pct, - b.total_allocations_bytes, r.total_allocations_bytes, alloc_pct, - has_regression, - )) - end - - return rows -end - -function _pct_change(old::Float64, new::Float64) - old == 0.0 && return new == 0.0 ? 0.0 : 100.0 - return (new - old) / abs(old) * 100.0 -end -``` - -- [ ] **Step 4: Update module** - -Add exports to `src/HarmoniqsBenchmarks.jl`: - -```julia -export compare_results, ComparisonRow -``` - -And add the include: - -```julia -include("report.jl") -``` - -- [ ] **Step 5: Run tests to verify they pass** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -julia --project=. -e 'using Pkg; Pkg.test()' -``` - -Expected: All tests PASS - -- [ ] **Step 6: Commit** - -```bash -cd /home/jack/repos/harmoniqs/HarmoniqsBenchmarks.jl -git add src/report.jl src/HarmoniqsBenchmarks.jl test/runtests.jl -git commit -m "feat: add compare_results reporter with regression detection" -``` - ---- - -## Task 8: Set Up DirectTrajOpt.jl Benchmark Environment - -**Files:** -- Create: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/Project.toml` -- Create: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/.gitignore` -- Create: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` - -- [ ] **Step 1: Create benchmark directory** - -```bash -mkdir -p /home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/results -``` - -- [ ] **Step 2: Create .gitignore** - -``` -# benchmark/.gitignore -results/ -``` - -- [ ] **Step 3: Create benchmark/Project.toml** - -```toml -[deps] -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" -DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" -ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -HarmoniqsBenchmarks = "INSERT_UUID" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" -MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" -NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" -TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" -``` - -Replace `INSERT_UUID` with the UUID generated in Task 1. - -- [ ] **Step 4: Instantiate the benchmark environment** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -julia --project=benchmark -e ' - using Pkg - Pkg.develop(path=".") - Pkg.develop(path="../HarmoniqsBenchmarks.jl") - Pkg.develop(path="../NamedTrajectories.jl") - Pkg.instantiate() - using HarmoniqsBenchmarks - println("Benchmark env OK") -' -``` - -Expected: "Benchmark env OK" - -- [ ] **Step 5: Create benchmarks.jl stub** - -```julia -# benchmark/benchmarks.jl -using TestItems -``` - -- [ ] **Step 6: Commit** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -git add benchmark/Project.toml benchmark/.gitignore benchmark/benchmarks.jl -git commit -m "feat: add benchmark/ environment for HarmoniqsBenchmarks integration" -``` - ---- - -## Task 9: Write Evaluator Micro-benchmarks - -**Files:** -- Modify: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` - -- [ ] **Step 1: Write the micro-benchmark @testitem** - -```julia -# benchmark/benchmarks.jl -using TestItems - -@testitem "Evaluator micro-benchmarks: bilinear N=51" begin - using HarmoniqsBenchmarks - using BenchmarkTools - using DirectTrajOpt - using NamedTrajectories - using SparseArrays - using ExponentialAction - using MathOptInterface - const MOI = MathOptInterface - using Dates - - # Build a deterministic bilinear problem - Random.seed!(42) - N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 - Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) - Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) - Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) - G(u) = ω * Gz + u[1] * Gx + u[2] * Gy - - traj = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound * (2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - DerivativeIntegrator(:du, :ddu, traj), - ] - J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) - prob = DirectTrajOptProblem(traj, J, integrators) - - evaluator, Z_vec = build_evaluator(prob) - dims = evaluator_dims(evaluator) - - # Pre-allocate buffers - g = zeros(dims.n_constraints) - grad = zeros(dims.n_variables) - H = zeros(dims.n_hessian_entries) - Jac = zeros(dims.n_jacobian_entries) - sigma = 1.0 - mu = ones(dims.n_constraints) - - # Run benchmarks - benchmarks = Dict{Symbol,EvalBenchmark}( - :eval_objective => trial_to_eval_benchmark( - @benchmark(MOI.eval_objective($evaluator, $Z_vec)) - ), - :eval_gradient => trial_to_eval_benchmark( - @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)) - ), - :eval_constraint => trial_to_eval_benchmark( - @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)) - ), - :eval_jacobian => trial_to_eval_benchmark( - @benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec)) - ), - :eval_hessian_lagrangian => trial_to_eval_benchmark( - @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)) - ), - ) - - result = MicroBenchmarkResult( - package = "DirectTrajOpt", - package_version = "0.8.10", - commit = try strip(read(`git rev-parse --short HEAD`, String)) catch; "unknown" end, - benchmark_name = "evaluator_micro_bilinear_N51", - N = N, state_dim = 4, control_dim = 2, - eval_benchmarks = benchmarks, - julia_version = string(VERSION), - timestamp = now(), - runner = get(ENV, "BENCHMARK_RUNNER", "local"), - n_threads = Threads.nthreads(), - ) - - # Print summary - println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===") - for (name, eb) in sort(collect(result.eval_benchmarks), by=first) - Printf = Base.Printf - @Printf.printf(" %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", - name, eb.median_ns, eb.allocs, eb.memory_bytes) - end - - # Save - results_dir = joinpath(@__DIR__, "results") - save_micro_results(results_dir, result.benchmark_name, result) - println(" Saved to $results_dir/") -end -``` - -- [ ] **Step 2: Run the micro-benchmark to verify it works** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -julia --project=benchmark -e ' - using TestItemRunner - @run_package_tests(filter=ti -> occursin("micro", ti.name), benchmark) -' -``` - -Expected: Benchmark runs, prints timing table, saves JLD2 to `benchmark/results/` - -- [ ] **Step 3: Verify the JLD2 output is loadable** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -julia --project=benchmark -e ' - using HarmoniqsBenchmarks - files = filter(f -> endswith(f, ".jld2"), readdir("benchmark/results", join=true)) - @assert length(files) >= 1 "Expected at least one JLD2 file" - result = load_micro_results(files[1]) - println("Loaded: $(result.benchmark_name)") - println("Functions benchmarked: $(keys(result.eval_benchmarks))") -' -``` - -Expected: Loads successfully, shows function names - -- [ ] **Step 4: Commit** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -git add benchmark/benchmarks.jl -git commit -m "feat: add evaluator micro-benchmarks with BenchmarkTools" -``` - ---- - -## Task 10: Write Ipopt vs MadNLP Macro-benchmarks - -**Files:** -- Modify: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` - -- [ ] **Step 1: Append the macro-benchmark @testitem** - -Append to `benchmark/benchmarks.jl`: - -```julia -@testitem "Ipopt vs MadNLP: bilinear N=51" begin - using HarmoniqsBenchmarks - using DirectTrajOpt - using NamedTrajectories - using SparseArrays - using ExponentialAction - import MadNLP - using Dates - - # Resolve MadNLPOptions from the extension - const MadNLPSolverExt = [ - mod for mod in reverse(Base.loaded_modules_order) - if Symbol(mod) == :MadNLPSolverExt - ][1] - - function make_bilinear_problem(; seed=42) - Random.seed!(seed) - N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 - Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) - Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) - Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) - G(u) = ω * Gz + u[1] * Gx + u[2] * Gy - - traj = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound * (2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - DerivativeIntegrator(:du, :ddu, traj), - ] - J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) - return DirectTrajOptProblem(traj, J, integrators) - end - - # Ipopt solve - prob_ipopt = make_bilinear_problem() - result_ipopt = benchmark_solve!( - prob_ipopt, - IpoptOptions(max_iter=200, print_level=0); - benchmark_name = "bilinear_N51_ipopt", - ) - - # MadNLP solve (fresh problem) - prob_madnlp = make_bilinear_problem() - result_madnlp = benchmark_solve!( - prob_madnlp, - MadNLPSolverExt.MadNLPOptions(max_iter=200, print_level=1); - benchmark_name = "bilinear_N51_madnlp", - ) - - # Print comparison - println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") - println(" Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc") - println(" MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc") - - # Save - results_dir = joinpath(@__DIR__, "results") - save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp]) -end -``` - -- [ ] **Step 2: Run the macro-benchmark** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -julia --project=benchmark -e ' - using TestItemRunner - @run_package_tests(filter=ti -> occursin("Ipopt vs MadNLP", ti.name), benchmark) -' -``` - -Expected: Both solvers run, prints wall time and allocation comparison - -- [ ] **Step 3: Commit** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -git add benchmark/benchmarks.jl -git commit -m "feat: add Ipopt vs MadNLP macro-benchmark" -``` - ---- - -## Task 11: Write Memory Scaling Study - -**Files:** -- Modify: `/home/jack/repos/harmoniqs/DirectTrajOpt.jl/benchmark/benchmarks.jl` - -- [ ] **Step 1: Append the scaling study @testitem** - -Append to `benchmark/benchmarks.jl`: - -```julia -@testitem "Memory scaling: N and state_dim sweep" begin - using HarmoniqsBenchmarks - using DirectTrajOpt - using NamedTrajectories - using SparseArrays - using ExponentialAction - import MadNLP - using Dates, Printf - - const MadNLPSolverExt = [ - mod for mod in reverse(Base.loaded_modules_order) - if Symbol(mod) == :MadNLPSolverExt - ][1] - - function make_scaled_problem(; N, state_dim, n_controls=2, seed=42) - Random.seed!(seed) - - # Build random bilinear system at given state dimension - G_drift = sparse(randn(state_dim, state_dim)) - G_drives = [sparse(randn(state_dim, state_dim)) for _ in 1:n_controls] - G(u) = G_drift + sum(u[i] * G_drives[i] for i in 1:n_controls) - - x_init = zeros(state_dim); x_init[1] = 1.0 - x_goal = zeros(state_dim); x_goal[2] = 1.0 - - traj = NamedTrajectory( - ( - x = randn(state_dim, N), - u = 0.1 * randn(n_controls, N), - du = randn(n_controls, N), - Δt = fill(0.1, N), - ); - controls = (:du, :Δt), - timestep = :Δt, - bounds = (u = 1.0, Δt = (0.01, 0.5)), - initial = (x = x_init, u = zeros(n_controls)), - final = (u = zeros(n_controls),), - goal = (x = x_goal,), - ) - - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - ] - J = QuadraticRegularizer(:u, traj, 1.0) - return DirectTrajOptProblem(traj, J, integrators) - end - - N_values = [25, 51, 101] - dim_values = [4, 8, 16] - results = BenchmarkResult[] - - println("\n=== Memory Scaling Study ===") - @printf(" %5s | %5s | %12s | %12s | %12s | %12s\n", - "N", "dim", "Ipopt (s)", "Ipopt (KB)", "MadNLP (s)", "MadNLP (KB)") - @printf(" %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", - "-"^5, "-"^5, "-"^12, "-"^12, "-"^12, "-"^12) - - for N in N_values - for dim in dim_values - # Ipopt - prob = make_scaled_problem(; N=N, state_dim=dim) - r_ipopt = benchmark_solve!( - prob, IpoptOptions(max_iter=50, print_level=0); - benchmark_name = "scaling_N$(N)_d$(dim)_ipopt", - ) - push!(results, r_ipopt) - - # MadNLP - prob = make_scaled_problem(; N=N, state_dim=dim) - r_madnlp = benchmark_solve!( - prob, MadNLPSolverExt.MadNLPOptions(max_iter=50, print_level=1); - benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", - ) - push!(results, r_madnlp) - - @printf(" %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", - N, dim, - r_ipopt.wall_time_s, r_ipopt.total_allocations_bytes ÷ 1024, - r_madnlp.wall_time_s, r_madnlp.total_allocations_bytes ÷ 1024) - end - end - - # Save all results - results_dir = joinpath(@__DIR__, "results") - save_results(results_dir, "memory_scaling", results) - println("\n Saved $(length(results)) results to $results_dir/") -end -``` - -- [ ] **Step 2: Run the scaling study** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -julia --project=benchmark -e ' - using TestItemRunner - @run_package_tests(filter=ti -> occursin("Memory scaling", ti.name), benchmark) -' -``` - -Expected: Table printed with wall times and allocations for each (N, dim) combination - -- [ ] **Step 3: Commit** - -```bash -cd /home/jack/repos/harmoniqs/DirectTrajOpt.jl -git add benchmark/benchmarks.jl -git commit -m "feat: add memory scaling study benchmark (N x state_dim sweep)" -``` - ---- - -## Verification Checklist - -After all tasks are complete: - -- [ ] `cd HarmoniqsBenchmarks.jl && julia --project=. -e 'using Pkg; Pkg.test()'` — all tests pass -- [ ] `cd DirectTrajOpt.jl && julia --project=benchmark -e 'using TestItemRunner; @run_package_tests(benchmark)'` — all three benchmark @testitems run -- [ ] `ls DirectTrajOpt.jl/benchmark/results/` — contains `.jld2` files for each benchmark -- [ ] Load and compare results: - ```julia - using HarmoniqsBenchmarks - results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") - println("Ipopt: $(results[1].wall_time_s)s, MadNLP: $(results[2].wall_time_s)s") - ``` - ---- - -## Follow-up Plans (Not in Scope) - -- **Piccolissimo benchmark suite** — migrate existing `benchmark/complex_vs_real_ode.jl` and `constraint_comparison.jl` to use HarmoniqsBenchmarks schema -- **Demo-repo problem generators** — clone bosonic-demo, nv-center-demo, atoms-demo, ions, fluxonium-demo, gkp-stanford and extract system Hamiltonians -- **CI workflows** — `.github/workflows/benchmark.yml` for DirectTrajOpt and other packages -- **Allocation profiling spike** — parallel worktree experiments with Profile.Allocs, AllocCheck.jl, --track-allocation -- **Aggregator repo** — `harmoniqs-benchmarks` with cross-package comparison tables diff --git a/docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md b/docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md deleted file mode 100644 index 50b7959..0000000 --- a/docs/superpowers/specs/2026-04-15-altissimo-gpu-benchmarks-design.md +++ /dev/null @@ -1,198 +0,0 @@ -# Altissimo GPU Benchmark Suite — Design - -**Date:** 2026-04-15 -**Status:** Design (follow-up to HarmoniqsBenchmarks.jl core plan) -**Depends on:** HarmoniqsBenchmarks.jl (schema, harness, storage) -**Reference:** `gpu_benchmark.py` (Colab notebook from Raghav, T4 results) - -## Context - -Altissimo.jl is a GPU-accelerated augmented Lagrangian optimizer for quantum trajectory optimization. It uses matrix-free JVP/VJP callbacks, making it GPU-compatible where Ipopt (which requires sparse Jacobians/Hessians) is CPU-only. Raghav demonstrated 4.5x GPU speedup at 1024 state dim on a T4. This benchmark suite formalizes those measurements and tracks them across versions. - -Three benchmark categories, matching the existing Colab notebook structure: - -1. **Ipopt vs Altissimo (CPU)** — real quantum gate optimization -2. **Altissimo CPU vs GPU scaling** — structured optimization at increasing state dim -3. **cuDensityMat vs cuSPARSE** — Liouvillian operator action for open-system trajectory optimization - -## Benchmark 0: Three-Way Solver Comparison (Ipopt vs MadNLP-GPU vs Altissimo-GPU) - -The harmoniqs org maintains a MadNLP.jl fork with `MadNLPGPU` (in `lib/MadNLPGPU/`), which uses CUDSS for GPU-accelerated sparse KKT system solves. This enables a three-way comparison at increasing problem sizes: - -| Solver | Method | Linear Algebra | GPU? | -|--------|--------|---------------|------| -| Ipopt | Interior-point | MUMPS/Pardiso (sparse, CPU) | No | -| MadNLP + MadNLPGPU | Interior-point | CUDSS (sparse, GPU) | Yes | -| Altissimo | Augmented Lagrangian | Matrix-free JVP/VJP (GPU) | Yes | - -**Hypothesis:** At small state dims (sd < 256), Ipopt wins due to mature sparse factorization. At medium dims (256-1024), MadNLP-GPU may win due to GPU-accelerated CUDSS. At large dims (1024+), Altissimo wins due to matrix-free scaling (no sparse assembly). - -**Problem:** Same quantum-control-structured problem as Benchmark 2 below, swept across sd ∈ {64, 128, 256, 512, 1024, 2048}. For MadNLP-GPU, the problem requires Jacobian/Hessian sparsity (MOI interface), so it uses the same evaluator as Ipopt but with GPU-side linear solves. - -**Dependencies:** -- `MadNLPGPU` from `harmoniqs/MadNLP.jl` (lib/MadNLPGPU) -- `CUDA.jl` + `CUDSS.jl` for GPU linear algebra -- DirectTrajOpt MadNLP extension for evaluator hookup - -**Metrics:** Wall time, iterations, convergence quality, total allocations, GPU memory usage, speedup vs Ipopt baseline. - -**Note:** MadNLP-GPU requires the KKT system to fit in GPU memory. For very large problems, the sparse Jacobian/Hessian may exceed VRAM, which is exactly where Altissimo's matrix-free approach has the advantage. - ---- - -## Benchmark 1: Ipopt vs Altissimo (CPU) — Quantum Gate Optimization - -Directly comparable: same X gate problem, same initial conditions, both on CPU. - -**Problem setup** (from Colab Part 2): -- System: 1 qubit, H_drift = 0.5 σ_z, drives = [σ_x, σ_y], bounds = [1.0, 1.0] -- Gate: X gate, T=10.0, N=100 -- Integrator: HermitianExponentialIntegrator -- Template: SmoothPulseProblem(Q=100.0, R=1e-2, ddu_bound=1.0, Δt_bounds=(0.05, 0.15)) -- Deep copy for identical initial conditions - -**Metrics:** -- Wall time (s) -- Fidelity (infidelity = 1 - fidelity) -- Total allocations (bytes) -- GC time - -**Altissimo configuration** (reference values): -```julia -AltissimoOptions( - search_direction = :LBFGS, - lbfgs_memory = 50, - line_search = :StrongWolfe, - ls_max_evals = 100, - max_outer_iter = 20, - max_inner_iter = 500, - inner_tol = 1e-8, - ρ_init = 100.0, - ρ_max = 1e8, - polish = true, - polish_stall_min_iters = 10, - polish_δ_w = 1e-6, - polish_δ_c = 1e-8, -) -``` - -**Integration with HarmoniqsBenchmarks:** Both produce `BenchmarkResult` with `solver="ipopt"` / `solver="altissimo"`. The `solver_options` field captures the full AltissimoOptions snapshot. - -## Benchmark 2: Altissimo CPU vs GPU Scaling - -The core scaling benchmark. Uses a quantum-control-structured problem (NOT a real quantum system) to isolate solver scaling behavior from physics complexity. - -**Problem structure** (from Colab Part 3): -- Decision vector: z = [x_1; ...; x_N; u_1; ...; u_{N-1}] -- Dynamics: x_{k+1} = Φ(u_k) x_k, where Φ(u) = A + Σⱼ uⱼ Cⱼ -- A is orthogonal (norm-preserving, like unitary evolution) -- Coupling scaled: ‖Cⱼ‖_spectral ≈ 0.4 independent of state_dim (σ_c = 0.2/√sd) -- Target generated by forward simulation with known controls → guaranteed feasible -- Objective: ½|x_N - x_target|² + (α/2) Σ|u_k|² -- All callbacks GPU-native: cuBLAS matvec, broadcast, dot (no scalar indexing) - -**Sweep configurations** (from Colab): - -| state_dim | n_drives | N | n_vars | n_eq | -|-----------|----------|----|----------|----------| -| 512 | 2 | 20 | 10,278 | 10,240 | -| 1024 | 2 | 20 | 20,518 | 20,480 | -| 2048 | 2 | 20 | 41,998 | 40,960 | -| 4096 | 2 | 20 | 81,958 | 81,920 | - -**Metrics per (state_dim, device) pair:** -- Wall time (s) — after JIT warmup -- Objective value at convergence -- Constraint violation ‖c‖ -- Converged (bool) -- GPU speedup = CPU_time / GPU_time - -**Key implementation details:** -- JIT warmup run before timed run -- `CUDA.synchronize()` before and after timed run for accurate GPU timing -- `build_callbacks()` returns obj!, grad!, hvp!, eq!, eq_jvp!, eq_vjp! -- Optimizer: `Altissimo.LBFGS` with `Altissimo.StrongWolfe` line search -- `initialize_z!` does forward propagation with u=0 for feasible init - -**Schema extension:** Add to `BenchmarkResult`: -- `device::String` — "cpu" or "gpu" -- `gpu_name::String` — e.g. "Tesla T4", "A100" (from `CUDA.name(CUDA.device())`) -- `gpu_memory_bytes::Int` — VRAM (from `CUDA.totalmem`) - -OR: encode these in `solver_options` dict to avoid schema changes: -```julia -solver_options[:device] = "gpu" -solver_options[:gpu_name] = CUDA.name(CUDA.device()) -solver_options[:gpu_memory_bytes] = CUDA.totalmem(CUDA.device()) -``` - -Recommended: use `solver_options` dict to avoid breaking the schema for CPU-only packages. - -## Benchmark 3: cuDensityMat vs cuSPARSE — Liouvillian Operator - -This measures the fundamental operation for open-system trajectory optimization: applying a Liouvillian superoperator to a density matrix. - -**System:** M coupled cavities with Fock truncation d=3, Hilbert space D = 3^M. -- Hamiltonian: H(t) = Σᵢ δᵢ(t) aᵢ†aᵢ + Σᵢ Kᵢ aᵢ†aᵢ†aᵢaᵢ + Σ⟨i,j⟩ gᵢⱼ(t)(aᵢ†aⱼ + h.c.) -- Collapse operators: √κ aᵢ (photon loss) -- Liouvillian: L = -i(H⊗I - I⊗Hᵀ) + Σₖ (Cₖ⊗Cₖ* - ½(Cₖ†Cₖ⊗I + I⊗Cₖᵀ Cₖ*)) - -**Sweep:** - -| M | D | ρ elements (D²) | cuDensityMat | cuSPARSE | Dense CPU | -|---|------|-----------------|-------------|----------|-----------| -| 2 | 9 | 81 | 0.27 ms | 0.039 ms | 0.003 ms | -| 4 | 81 | 6,561 | 1.22 ms | 0.048 ms | 31.8 ms | -| 6 | 729 | 531,441 | 6.45 ms | 0.90 ms | infeasible| -| 8 | 6561 | 43,046,721 | 620 ms | infeasible| infeasible| - -**Batched evolution** (trajectory optimization workload): - -| M | D | Batch | Batched | Sequential | Speedup | -|---|----|-------|-----------|------------|---------| -| 2 | 9 | 256 | 0.38 ms | 70.1 ms | 186x | -| 4 | 81 | 256 | 8.05 ms | 280.7 ms | 35x | - -**Key insight:** cuSPARSE beats cuDensityMat for M ≤ 6 (tensor-network contraction overhead at small D). cuDensityMat wins at M=8+ where sparse Liouvillian can't be materialized (~50-70 GB). Batched evolution is critical for trajectory optimization (35-186x speedup). - -**Integration note:** This benchmark depends on CuQuantum.jl (harmoniqs org). The cuDensityMat portion requires the NVIDIA cuQuantum SDK and should run exclusively on EC2 GPU runners. - -## CI Runner Requirements - -| Benchmark | Runner | GPU Required | -|-----------|--------|-------------| -| Ipopt vs Altissimo (CPU) | `ubuntu-latest` (free) | No | -| 3-way solver (Ipopt/MadNLP-GPU/Altissimo) | `[self-hosted, gpu]` (EC2) | Yes (T4 minimum, CUDSS for MadNLP) | -| Altissimo CPU vs GPU scaling | `[self-hosted, gpu]` (EC2) | Yes (T4 minimum) | -| cuSPARSE / cuDensityMat | `[self-hosted, gpu]` (EC2) | Yes (A100 recommended for M=8) | - -## Where Benchmarks Live - -- **Benchmark 1** (Ipopt vs Altissimo CPU): In `Piccolissimo.jl/benchmark/` since it uses `SmoothPulseProblem` + `HermitianExponentialIntegrator` -- **Benchmark 2** (GPU scaling): In `Altissimo.jl/benchmark/` since it's Altissimo-specific with CUDA callbacks -- **Benchmark 3** (Liouvillian): In `CuQuantum.jl/benchmark/` or `Piccolissimo.jl/benchmark/` (TBD based on where cuDensityMat integration lands) - -All use `HarmoniqsBenchmarks.jl` schema for consistent artifact format. - -## Adaptation for HarmoniqsBenchmarks Schema - -The Colab notebook uses ad-hoc timing (`@elapsed`, `CUDA.@elapsed`). To integrate with HarmoniqsBenchmarks: - -**Benchmark 2 adaptation:** -- Wrap `run_one()` to return a `BenchmarkResult` instead of a NamedTuple -- Add `solver_options` dict with Altissimo config + device info -- Replace manual `time()` calls with `@timed` for allocation tracking -- Save JLD2 artifacts instead of printing tables - -**Benchmark 3 adaptation:** -- Create a `LiouvillianBenchmarkResult` (or use a new `MicroBenchmarkResult` variant) -- Key fields: M, D, D², nnz(L), method (:cusparse, :cudensitymat, :cpu_dense), time_ms, memory_bytes -- Batched results include batch_size and sequential/batched comparison - -## Implementation Notes - -- The `apply_Phi!` / `apply_Phi_t!` pattern from the notebook should be extracted into Altissimo's callback builder, not reimplemented in benchmarks -- `CUDA.synchronize()` is critical for accurate GPU timing — always call before starting and after stopping the timer -- JIT warmup run is mandatory — first Julia/CUDA execution compiles kernels -- Memory estimation before large allocations: check `CUDA.totalmem()` and skip if would exceed 80% VRAM -- The coupling scaling fix (σ_c = 0.2/√sd) is essential for well-conditioned problems at large state dim — without it, ‖C‖ ~ 0.1√sd makes convergence erratic diff --git a/docs/superpowers/specs/2026-04-15-benchmarking-design.md b/docs/superpowers/specs/2026-04-15-benchmarking-design.md deleted file mode 100644 index bb9f943..0000000 --- a/docs/superpowers/specs/2026-04-15-benchmarking-design.md +++ /dev/null @@ -1,383 +0,0 @@ -# HarmoniqsBenchmarks.jl — Cross-Package Benchmarking Infrastructure - -**Date:** 2026-04-15 -**Status:** Design - -## Context - -The harmoniqs quantum optimal control stack (DirectTrajOpt, Piccolo, Piccolissimo, Altissimo, Intonato) needs a unified benchmarking system to: - -- Compare Ipopt vs MadNLP solver performance on the DirectTrajOpt `feat/madnlp-integration` branch -- Collect statistically robust histograms of key evaluator functions (eval_hessian_lagrangian, eval_constraint_jacobian, etc.) for regression detection -- Profile memory usage and allocations in MadNLP and across all packages, understanding how memory scales with knot points (N), state dimension, and control dimension -- Track allocations in the optimization hot path to drive them toward zero -- Publish version-tagged JLD2 artifacts so labs and enterprises can evaluate problem-size scaling - -This is driven by all three active workstreams needing memory/performance benchmarks (MadNLP integration, Altissimo GPU scaling at 1024 state dim, Intonato convergence tracking). - -## Architecture - -**Approach:** Shared `HarmoniqsBenchmarks.jl` package + per-package `benchmark/` directories + central aggregator repo. - -- `HarmoniqsBenchmarks.jl` — lightweight Julia package (own repo in harmoniqs org) providing schema, profiling harness, problem generators, and reporters -- Each downstream package (DirectTrajOpt, Piccolo, Piccolissimo, Altissimo, Intonato) has a `benchmark/` directory with `@testitem`-based benchmarks using `HarmoniqsBenchmarks` -- Central `harmoniqs-benchmarks` repo aggregates artifacts and generates cross-package comparison tables -- Artifacts are JLD2 files stored in CI (GitHub Actions artifact upload), not a live dashboard - -## Schema - -### BenchmarkResult - -```julia -struct BenchmarkResult - # Identity - package::String # "DirectTrajOpt", "Piccolissimo", etc. - package_version::String # semver tag - commit::String # short SHA - benchmark_name::String # "cz_gate_ipopt", "madnlp_scaling_N101_d16" - - # Problem dimensions - N::Int # knot points - state_dim::Int # state vector dimension - control_dim::Int # number of controls - n_constraints::Int # total nonlinear constraints - n_variables::Int # total NLP variables - - # Solve metrics - wall_time_s::Float64 - iterations::Int - objective_value::Float64 - constraint_violation::Float64 - solver_status::Symbol # :Optimal, :MaxIter, :Infeasible - solver::String # "ipopt", "madnlp", "altissimo" - - # Memory & allocations - total_allocations_bytes::Int - total_allocs_count::Int # number of allocation events - peak_memory_bytes::Int - - # GC stats - gc_time_ns::Int - gc_count::Int - gc_full_count::Int - - # Solver options snapshot - solver_options::Dict{Symbol,Any} - - # Metadata - julia_version::String - timestamp::DateTime - runner::String # "github-actions", "ec2-gpu", "local" - n_threads::Int -end -``` - -### MicroBenchmarkResult - -```julia -struct MicroBenchmarkResult - # Identity (same as above) - package::String - package_version::String - commit::String - benchmark_name::String - - # Problem dimensions - N::Int - state_dim::Int - control_dim::Int - - # Per-function BenchmarkTools results - # Each value is a serialized BenchmarkTools.Trial containing: - # times (ns), gctimes (ns), memory (bytes), allocs (count) - eval_benchmarks::Dict{Symbol, Any} - # Keys: :eval_objective, :eval_gradient, :eval_constraint, - # :eval_jacobian, :eval_hessian_lagrangian - - # Metadata - julia_version::String - timestamp::DateTime - runner::String - n_threads::Int -end -``` - -## Benchmarking Layers - -### Layer 1: Micro-benchmarks (Eval Function Histograms) - -Use `BenchmarkTools.@benchmark` on individual MOI evaluator methods. This gives statistically robust distributions with proper warmup, plus allocation counts per call. - -```julia -@testitem "Evaluator micro-benchmarks: CZ N=51" begin - using HarmoniqsBenchmarks, BenchmarkTools, Piccolissimo, Piccolo - - prob = build_cz_problem(N=51) - evaluator, Z_vec = build_evaluator(prob) - - # Pre-allocate output buffers - g = zeros(n_constraints(evaluator)) - grad = zeros(n_variables(evaluator)) - H = zeros(n_hessian_entries(evaluator)) - J = zeros(n_jacobian_entries(evaluator)) - sigma = 1.0 - mu = ones(n_constraints(evaluator)) - - benchmarks = Dict( - :eval_objective => @benchmark(MOI.eval_objective($evaluator, $Z_vec)), - :eval_gradient => @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)), - :eval_constraint => @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)), - :eval_jacobian => @benchmark(MOI.eval_constraint_jacobian($evaluator, $J, $Z_vec)), - :eval_hessian_lagrangian => @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)), - ) - - save_micro_results("cz_N51_ipopt", benchmarks; prob) -end -``` - -**Regression detection:** Compare median times and allocation counts across versions. A >10% regression in any eval function on the same problem size flags for review. - -### Layer 2: Macro-benchmarks (Full Solves) - -Use `@timed` for wall clock + total allocations on `solve!`. Full optimization is not repeatable in the BenchmarkTools sense (each call modifies the problem), so we capture single-run metrics. - -```julia -@testitem "CZ gate Ipopt vs MadNLP" begin - using HarmoniqsBenchmarks, Piccolissimo, Piccolo - - prob = build_cz_problem(N=51) - result_ipopt = benchmark_solve!(prob, IpoptOptions()) - - prob = build_cz_problem(N=51) # fresh problem - result_madnlp = benchmark_solve!(prob, MadNLPOptions()) - - save_results("cz_gate_comparison", [result_ipopt, result_madnlp]) -end -``` - -### Layer 3: Scaling Studies - -Parameterized sweeps over problem dimensions to characterize memory and time growth. - -```julia -@testitem "MadNLP memory scaling" begin - using HarmoniqsBenchmarks, Piccolissimo, Piccolo - - results = BenchmarkResult[] - for N in [25, 51, 101, 201, 401] - for state_dim in [4, 8, 16, 32, 64] - prob = build_bilinear_problem(; N, state_dim, n_controls=2) - r = benchmark_solve!(prob, MadNLPOptions()) - push!(results, r) - end - end - save_results("madnlp_memory_scaling", results) -end -``` - -### Layer 4: Allocation Profiling - -Tools for tracking down and eliminating allocations in the optimization hot path. - -**Profile.Allocs** — captures per-allocation stack traces during a solve: -```julia -@testitem "Allocation profile: CZ solve" begin - using HarmoniqsBenchmarks, Profile, Piccolissimo, Piccolo - - prob = build_cz_problem(N=51) - Profile.Allocs.clear() - Profile.Allocs.@profile sample_rate=1.0 solve!(prob) - alloc_results = Profile.Allocs.fetch() - - save_alloc_profile("cz_N51_alloc_profile", alloc_results) - # Visualize locally: using PProf; PProf.Allocs.pprof(alloc_results) -end -``` - -**AllocCheck.jl** — compile-time zero-allocation enforcement for evaluator hot paths. Can be added as an optional CI check: -```julia -@testitem "Zero-allocation check: evaluator methods" begin - using AllocCheck, DirectTrajOpt - - # These should be allocation-free once optimized - @check_allocs MOI.eval_constraint(ev::Evaluator, g::Vector{Float64}, Z::Vector{Float64}) - @check_allocs MOI.eval_constraint_jacobian(ev::Evaluator, J::Vector{Float64}, Z::Vector{Float64}) - @check_allocs MOI.eval_hessian_lagrangian(ev::Evaluator, H::Vector{Float64}, Z::Vector{Float64}, s::Float64, m::Vector{Float64}) -end -``` - -**Per-line tracking** (local development, not CI): -```bash -julia --track-allocation=user --project=benchmark benchmark/benchmarks.jl -# Generates .mem files with per-line allocation counts -``` - -**Implementation note:** The best allocation profiling approach for the evaluator hot path is TBD. During implementation, spike all three approaches (`Profile.Allocs`, `AllocCheck.jl`, `--track-allocation`) in parallel worktrees against a representative problem (e.g. CZ N=51) to determine which gives the most actionable results for tracking down and eliminating allocations in the MOI eval methods. - -## Problem Generators - -Deterministic, parameterized problem constructors for reproducibility. - -### DirectTrajOpt level -- `build_bilinear_problem(; N=51, state_dim=4, n_controls=2, seed=42)` — random Hermitian system matrices, bilinear integrator + quadratic regularizer -- `build_constrained_problem(; N=51, state_dim=4, n_nonlinear=3, seed=42)` — adds nonlinear knot-point constraints - -### Piccolo/Piccolissimo level -- `build_cz_problem(; N=51, integrator=:hermitian_exp)` — 2-qubit CZ gate, exchange-only system (4-level), matches spin-qubit-demo -- `build_cnot_problem(; N=101, integrator=:hermitian_exp)` — 2-qubit CNOT with 3 EDSR drives -- `build_transmon_problem(; levels=3, N=51)` — single-qubit X gate on multi-level transmon - -### Altissimo level -- `build_polish_problem(; N=51, state_dim=4)` — pre-solved Ipopt problem ready for Altissimo refinement -- `build_gpu_scaling_problem(; state_dim=1024)` — large-state-dim problem for GPU benchmarking - -### Intonato level -- `build_qilc_problem(; N=101, n_paulis=15, J_mismatch=1.3)` — QILC calibration loop with simulated experiment, matches spin-qubit-demo pattern - -### Demo-repo-derived problems - -The harmoniqs org has several hardware-platform demo repos that provide real-world benchmark problems. During implementation, clone and extract representative problem configurations from: - -| Repo | Platform | Typical Dimensions | Key Benchmark | -|------|----------|-------------------|---------------| -| `spin-qubit-demo` | Silicon spin qubits | N=51-101, 4-level, 1-3 drives | CZ, CNOT, QILC calibration | -| `bosonic-demo` | Bosonic cavity QED | Higher Hilbert space dims | Cavity control | -| `nv-center-demo` | NV centers | Spin-1 + nuclear spins | Dark matter sensing pulses | -| `atoms-demo` | Neutral atoms | Rydberg levels | Multi-qubit gates | -| `ions` | Trapped ions | Motional modes + qubits | MS gate, individual addressing | -| `fluxonium-demo` | Fluxonium qubits | Multi-level transmon-like | Single-qubit gates | -| `gkp-stanford` | GKP states | Bosonic Fock space | State preparation | - -These provide the "enterprise-scale" problem suite that demonstrates what problem sizes each solver can handle. Extract the system Hamiltonians and problem parameters from each demo, wrap them as generators in `HarmoniqsBenchmarks.problems/`. - -All generators use `Random.seed!(seed)` for determinism. - -## Harness Functions - -### build_evaluator(prob) -> (evaluator, Z_vec) - -Extracts the MOI evaluator and initial decision variable vector from a `DirectTrajOptProblem`. Used for micro-benchmarks so individual eval functions can be called directly. - -### benchmark_solve!(prob, options; kwargs...) -> BenchmarkResult - -```julia -function benchmark_solve!(prob, options; kwargs...) - GC.gc() - gc_before = Base.gc_num() - - timed = @timed solve!(prob; options, kwargs...) - - gc_after = Base.gc_num() - - return BenchmarkResult( - # ... populate from prob metadata, timed, gc delta, options snapshot - ) -end -``` - -### save_results(name, results) / save_micro_results(name, benchmarks) - -Write JLD2 to `benchmark/results/_.jld2`. - -### compare_results(baseline_path, current_path) -> ComparisonTable - -Load two result sets and produce a diff table with percent changes, flagging regressions. - -## CI Workflow - -### Per-package: `.github/workflows/benchmark.yml` - -```yaml -name: Benchmarks -on: - push: - tags: ['v*'] - workflow_dispatch: - inputs: - baseline_tag: - description: 'Tag to compare against' - required: false - -jobs: - benchmark: - runs-on: ubuntu-latest # free for OSS - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - with: - version: '1.11' - - name: Instantiate benchmark env - run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' - - name: Run benchmarks - run: julia --project=benchmark -t auto -e ' - using TestItemRunner - @run_package_tests(benchmark) - ' - - uses: actions/upload-artifact@v4 - with: - name: benchmark-${{ github.ref_name }}-${{ github.sha }} - path: benchmark/results/ - retention-days: 365 - - # GPU/large-scale benchmarks (Altissimo, large N) - benchmark-gpu: - if: contains(github.repository, 'Altissimo') || github.event_name == 'workflow_dispatch' - runs-on: [self-hosted, gpu] # EC2 runners from CuQuantum.jl setup - steps: - # same as above but with CUDA-enabled Julia -``` - -### Central aggregator: `harmoniqs-benchmarks` repo - -Triggered by workflow_dispatch or cron. Downloads latest artifacts from each package repo, generates comparison tables, stores historical archive. - -## Package Structure - -``` -HarmoniqsBenchmarks.jl/ - src/ - HarmoniqsBenchmarks.jl # module + exports - schema.jl # BenchmarkResult, MicroBenchmarkResult - harness.jl # benchmark_solve!, build_evaluator - storage.jl # save/load JLD2, save_alloc_profile - report.jl # compare_results, regression detection - problems/ - bilinear.jl # DirectTrajOpt-level generators - quantum_gates.jl # Piccolo/Piccolissimo-level generators - polish.jl # Altissimo-level generators - qilc.jl # Intonato-level generators - Project.toml # deps: BenchmarkTools, JLD2, Dates - README.md - -# Per downstream package: -DirectTrajOpt.jl/ - benchmark/ - Project.toml # [deps] HarmoniqsBenchmarks, BenchmarkTools, TestItems, ... - benchmarks.jl # @testitems: micro, macro, scaling - results/ # .gitignored JLD2 output -``` - -## Verification - -1. **Unit test the harness:** `benchmark_solve!` returns a valid `BenchmarkResult` with all fields populated -2. **Run micro-benchmarks locally:** Confirm BenchmarkTools produces histograms for each eval function -3. **Run scaling sweep:** Verify memory grows as expected with N and state_dim -4. **CI dry run:** Trigger workflow_dispatch on DirectTrajOpt, confirm artifact upload -5. **Cross-package comparison:** Run aggregator on two package artifacts, verify comparison table output -6. **Allocation profiling:** Run Profile.Allocs on a solve, verify PProf flamegraph renders - -## Scope - -**In scope (this design):** -- HarmoniqsBenchmarks.jl package creation -- DirectTrajOpt benchmark suite (Ipopt vs MadNLP, scaling, micro-benchmarks, allocation profiling) -- Piccolissimo benchmark suite (integrate existing benchmarks + new scaling) -- CI workflows for DirectTrajOpt and Piccolissimo -- Aggregator script in harmoniqs-benchmarks repo - -**Future work:** -- Altissimo GPU benchmarks (requires CUDA runner validation) -- Intonato convergence benchmarks (requires stable Phase 5) -- Piccolo template benchmarks -- AllocCheck CI gates (after hot paths are optimized) -- Automated regression comments on PRs From ba121d399f0e830545b867d1b6d615ac034f52bb Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Thu, 16 Apr 2026 02:33:54 -0400 Subject: [PATCH 07/13] fix: exclude benchmark/ testitems from test suite The benchmark @testitems require HarmoniqsBenchmarks which is only available in the benchmark/ project environment, not the test extras. Filter them out so `Pkg.test()` / julia-runtest CI doesn't pick them up. Co-Authored-By: Claude Opus 4.6 (1M context) --- test/runtests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 9f95075..d57ccc9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,5 +2,5 @@ using DirectTrajOpt using TestItemRunner -# Run all testitem tests in package -@run_package_tests +# Exclude benchmark/ testitems — those run in a separate project environment +@run_package_tests filter=ti -> !contains(ti.filename, "benchmark") From 35ddb07257026f8b103689de9ae60fcf14946d1a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 17 Apr 2026 16:52:31 +0000 Subject: [PATCH 08/13] chore: autoformat --- benchmark/benchmarks.jl | 217 +++++++++++++++++++++++++++++++--------- test/compare_solvers.jl | 18 ++-- 2 files changed, 174 insertions(+), 61 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index a7ecc6f..28e6ee4 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -6,19 +6,35 @@ using TestItems const MOI = MathOptInterface Random.seed!(42) - N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 + N = 51; + Δt = 0.1; + u_bound = 0.1; + ω = 0.1 Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) G(u) = ω * Gz + u[1] * Gx + u[2] * Gy traj = NamedTrajectory( - (x=2rand(4,N).-1, u=u_bound*(2rand(2,N).-1), du=randn(2,N), ddu=randn(2,N), Δt=fill(Δt,N)); - controls=(:ddu,:Δt), timestep=:Δt, bounds=(u=u_bound, Δt=(0.01,0.5)), - initial=(x=[1.0,0.0,0.0,0.0], u=zeros(2)), final=(u=zeros(2),), - goal=(x=[0.0,1.0,0.0,0.0],), + ( + x = 2rand(4, N) .- 1, + u = u_bound*(2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), ) - integrators = [BilinearIntegrator(G,:x,:u,traj), DerivativeIntegrator(:u,:du,traj), DerivativeIntegrator(:du,:ddu,traj)] + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) prob = DirectTrajOptProblem(traj, J, integrators) @@ -33,24 +49,52 @@ using TestItems mu = ones(dims.n_constraints) benchmarks = Dict{Symbol,EvalBenchmark}( - :eval_objective => trial_to_eval_benchmark(@benchmark(MOI.eval_objective($evaluator, $Z_vec))), - :eval_gradient => trial_to_eval_benchmark(@benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec))), - :eval_constraint => trial_to_eval_benchmark(@benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec))), - :eval_jacobian => trial_to_eval_benchmark(@benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec))), - :eval_hessian_lagrangian => trial_to_eval_benchmark(@benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu))), + :eval_objective => + trial_to_eval_benchmark(@benchmark(MOI.eval_objective($evaluator, $Z_vec))), + :eval_gradient => trial_to_eval_benchmark( + @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)) + ), + :eval_constraint => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)) + ), + :eval_jacobian => trial_to_eval_benchmark( + @benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec)) + ), + :eval_hessian_lagrangian => trial_to_eval_benchmark( + @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)) + ), ) result = MicroBenchmarkResult( - package="DirectTrajOpt", package_version="0.8.10", - commit=(try String(strip(read(`git rev-parse --short HEAD`, String))) catch; "unknown" end), - benchmark_name="evaluator_micro_bilinear_N51", N=N, state_dim=4, control_dim=2, - eval_benchmarks=benchmarks, julia_version=string(VERSION), - timestamp=Dates.now(), runner=get(ENV, "BENCHMARK_RUNNER", "local"), n_threads=Threads.nthreads(), + package = "DirectTrajOpt", + package_version = "0.8.10", + commit = ( + try + String(strip(read(`git rev-parse --short HEAD`, String))) + catch + ; "unknown" + end + ), + benchmark_name = "evaluator_micro_bilinear_N51", + N = N, + state_dim = 4, + control_dim = 2, + eval_benchmarks = benchmarks, + julia_version = string(VERSION), + timestamp = Dates.now(), + runner = get(ENV, "BENCHMARK_RUNNER", "local"), + n_threads = Threads.nthreads(), ) println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===") - for (name, eb) in sort(collect(result.eval_benchmarks), by=first) - @printf(" %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", name, eb.median_ns, eb.allocs, eb.memory_bytes) + for (name, eb) in sort(collect(result.eval_benchmarks), by = first) + @printf( + " %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", + name, + eb.median_ns, + eb.allocs, + eb.memory_bytes + ) end results_dir = joinpath(@__DIR__, "results") @@ -63,36 +107,66 @@ end using SparseArrays, ExponentialAction, Random, Dates import MadNLP - const MadNLPSolverExt = [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] + const MadNLPSolverExt = [ + mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt + ][1] - function make_bilinear_problem(; seed=42) + function make_bilinear_problem(; seed = 42) Random.seed!(seed) - N = 51; Δt = 0.1; u_bound = 0.1; ω = 0.1 + N = 51; + Δt = 0.1; + u_bound = 0.1; + ω = 0.1 Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) G(u) = ω * Gz + u[1] * Gx + u[2] * Gy traj = NamedTrajectory( - (x=2rand(4,N).-1, u=u_bound*(2rand(2,N).-1), du=randn(2,N), ddu=randn(2,N), Δt=fill(Δt,N)); - controls=(:ddu,:Δt), timestep=:Δt, bounds=(u=u_bound, Δt=(0.01,0.5)), - initial=(x=[1.0,0.0,0.0,0.0], u=zeros(2)), final=(u=zeros(2),), - goal=(x=[0.0,1.0,0.0,0.0],), + ( + x = 2rand(4, N) .- 1, + u = u_bound*(2rand(2, N) .- 1), + du = randn(2, N), + ddu = randn(2, N), + Δt = fill(Δt, N), + ); + controls = (:ddu, :Δt), + timestep = :Δt, + bounds = (u = u_bound, Δt = (0.01, 0.5)), + initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), + final = (u = zeros(2),), + goal = (x = [0.0, 1.0, 0.0, 0.0],), ) - integrators = [BilinearIntegrator(G,:x,:u,traj), DerivativeIntegrator(:u,:du,traj), DerivativeIntegrator(:du,:ddu,traj)] + integrators = [ + BilinearIntegrator(G, :x, :u, traj), + DerivativeIntegrator(:u, :du, traj), + DerivativeIntegrator(:du, :ddu, traj), + ] J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) return DirectTrajOptProblem(traj, J, integrators) end prob_ipopt = make_bilinear_problem() - result_ipopt = benchmark_solve!(prob_ipopt, IpoptOptions(max_iter=200, print_level=0); benchmark_name="bilinear_N51_ipopt") + result_ipopt = benchmark_solve!( + prob_ipopt, + IpoptOptions(max_iter = 200, print_level = 0); + benchmark_name = "bilinear_N51_ipopt", + ) prob_madnlp = make_bilinear_problem() - result_madnlp = benchmark_solve!(prob_madnlp, MadNLPSolverExt.MadNLPOptions(max_iter=200, print_level=1); benchmark_name="bilinear_N51_madnlp") + result_madnlp = benchmark_solve!( + prob_madnlp, + MadNLPSolverExt.MadNLPOptions(max_iter = 200, print_level = 1); + benchmark_name = "bilinear_N51_madnlp", + ) println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") - println(" Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc") - println(" MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc") + println( + " Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc", + ) + println( + " MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc", + ) results_dir = joinpath(@__DIR__, "results") save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp]) @@ -103,24 +177,37 @@ end using SparseArrays, ExponentialAction, Random, Dates, Printf import MadNLP - const MadNLPSolverExt = [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] + const MadNLPSolverExt = [ + mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt + ][1] - function make_scaled_problem(; N, state_dim, n_controls=2, seed=42) + function make_scaled_problem(; N, state_dim, n_controls = 2, seed = 42) Random.seed!(seed) G_drift = sparse(randn(state_dim, state_dim)) - G_drives = [sparse(randn(state_dim, state_dim)) for _ in 1:n_controls] - G(u) = G_drift + sum(u[i] * G_drives[i] for i in 1:n_controls) + G_drives = [sparse(randn(state_dim, state_dim)) for _ = 1:n_controls] + G(u) = G_drift + sum(u[i] * G_drives[i] for i = 1:n_controls) - x_init = zeros(state_dim); x_init[1] = 1.0 - x_goal = zeros(state_dim); x_goal[min(2,state_dim)] = 1.0 + x_init = zeros(state_dim); + x_init[1] = 1.0 + x_goal = zeros(state_dim); + x_goal[min(2, state_dim)] = 1.0 traj = NamedTrajectory( - (x=randn(state_dim,N), u=0.1*randn(n_controls,N), du=randn(n_controls,N), Δt=fill(0.1,N)); - controls=(:du,:Δt), timestep=:Δt, bounds=(u=1.0, Δt=(0.01,0.5)), - initial=(x=x_init, u=zeros(n_controls)), final=(u=zeros(n_controls),), - goal=(x=x_goal,), + ( + x = randn(state_dim, N), + u = 0.1*randn(n_controls, N), + du = randn(n_controls, N), + Δt = fill(0.1, N), + ); + controls = (:du, :Δt), + timestep = :Δt, + bounds = (u = 1.0, Δt = (0.01, 0.5)), + initial = (x = x_init, u = zeros(n_controls)), + final = (u = zeros(n_controls),), + goal = (x = x_goal,), ) - integrators = [BilinearIntegrator(G,:x,:u,traj), DerivativeIntegrator(:u,:du,traj)] + integrators = + [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] J = QuadraticRegularizer(:u, traj, 1.0) return DirectTrajOptProblem(traj, J, integrators) end @@ -130,22 +217,52 @@ end results = BenchmarkResult[] println("\n=== Memory Scaling Study ===") - @printf(" %5s | %5s | %12s | %12s | %12s | %12s\n", "N", "dim", "Ipopt (s)", "Ipopt (KB)", "MadNLP (s)", "MadNLP (KB)") - @printf(" %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", "-"^5, "-"^5, "-"^12, "-"^12, "-"^12, "-"^12) + @printf( + " %5s | %5s | %12s | %12s | %12s | %12s\n", + "N", + "dim", + "Ipopt (s)", + "Ipopt (KB)", + "MadNLP (s)", + "MadNLP (KB)" + ) + @printf( + " %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", + "-"^5, + "-"^5, + "-"^12, + "-"^12, + "-"^12, + "-"^12 + ) for N in N_values for dim in dim_values - prob = make_scaled_problem(; N=N, state_dim=dim) - r_ipopt = benchmark_solve!(prob, IpoptOptions(max_iter=50, print_level=0); benchmark_name="scaling_N$(N)_d$(dim)_ipopt") + prob = make_scaled_problem(; N = N, state_dim = dim) + r_ipopt = benchmark_solve!( + prob, + IpoptOptions(max_iter = 50, print_level = 0); + benchmark_name = "scaling_N$(N)_d$(dim)_ipopt", + ) push!(results, r_ipopt) - prob = make_scaled_problem(; N=N, state_dim=dim) - r_madnlp = benchmark_solve!(prob, MadNLPSolverExt.MadNLPOptions(max_iter=50, print_level=1); benchmark_name="scaling_N$(N)_d$(dim)_madnlp") + prob = make_scaled_problem(; N = N, state_dim = dim) + r_madnlp = benchmark_solve!( + prob, + MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = 1); + benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", + ) push!(results, r_madnlp) - @printf(" %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", - N, dim, r_ipopt.wall_time_s, r_ipopt.total_allocations_bytes ÷ 1024, - r_madnlp.wall_time_s, r_madnlp.total_allocations_bytes ÷ 1024) + @printf( + " %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", + N, + dim, + r_ipopt.wall_time_s, + r_ipopt.total_allocations_bytes ÷ 1024, + r_madnlp.wall_time_s, + r_madnlp.total_allocations_bytes ÷ 1024 + ) end end diff --git a/test/compare_solvers.jl b/test/compare_solvers.jl index 77ac9a1..7dca12c 100644 --- a/test/compare_solvers.jl +++ b/test/compare_solvers.jl @@ -7,16 +7,12 @@ using SparseArrays using NamedTrajectories using DirectTrajOpt -const MadNLPSolverExt = [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] - -function get_seeded_trajectory(seed; - N = 10, - Δt = 0.1, - u_bound = 0.1, - ω = 0.1, -) +const MadNLPSolverExt = + [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] + +function get_seeded_trajectory(seed; N = 10, Δt = 0.1, u_bound = 0.1, ω = 0.1) Random.seed!(seed) - + Gx = sparse(Float64[ 0 0 0 1; 0 0 1 0; @@ -59,7 +55,7 @@ function get_seeded_trajectory(seed; ); controls = (:ddu, :Δt), timestep = :Δt, - bounds = (u = (-u_bound, u_bound), Δt = (1., 1.)), # timestep variability is a major source of error as in the "multiple comparisons problem" so we make them constant here + bounds = (u = (-u_bound, u_bound), Δt = (1.0, 1.0)), # timestep variability is a major source of error as in the "multiple comparisons problem" so we make them constant here initial = (x = x_init, u = zeros(2)), final = (u = zeros(2),), goal = (x = x_goal,), @@ -144,7 +140,7 @@ function get_solver_comparison(seed) return err, (ti, tm) end -wins = Dict(:ipopt => 0, :madnlp => 0,) +wins = Dict(:ipopt => 0, :madnlp => 0) for seed = 0:99 err, (ti, tm) = get_solver_comparison(seed) (err < 1e-3) || exit(1) From 1bf0416d7b421de69edf549d91a6adae1f74c031 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Thu, 23 Apr 2026 08:55:49 -0400 Subject: [PATCH 09/13] Add MadNLP pass-through fields (linear_solver, array_type, etc.) Restores pass-throughs consumed by MadNLP's MOI layer so users can select CUDSSSolver, CuArray, KKT variants, and cuDSS ordering through the MadNLPOptions struct (required for MadNLPGPU/cuDSS-on-GPU flows). set_options! now skips fields left as `nothing` so MadNLP's own defaults stand. --- ext/MadNLPSolverExt/solver.jl | 7 ++++++- src/solvers/madnlp_solver/options.jl | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ext/MadNLPSolverExt/solver.jl b/ext/MadNLPSolverExt/solver.jl index 514d29a..ca60e53 100644 --- a/ext/MadNLPSolverExt/solver.jl +++ b/ext/MadNLPSolverExt/solver.jl @@ -208,7 +208,12 @@ function DirectTrajOpt.set_options!(optimizer::AbstractOptimizer, options::MadNL if name in ignored_options continue end - # TODO: allow internal defaults, i.e. do not set the internal options dict unless the user actually specified the associated opt + # `nothing` means "use MadNLP's own default" — don't overwrite the optimizer's + # internal dict in that case. Applies to the pass-through fields + # (linear_solver, array_type, kkt_system, cudss_ordering). + if value === nothing + continue + end if name == :print_level optimizer.options[name] = MadNLP.LogLevels(value) elseif name == :hessian_approximation diff --git a/src/solvers/madnlp_solver/options.jl b/src/solvers/madnlp_solver/options.jl index 9a6fb73..53c10ae 100644 --- a/src/solvers/madnlp_solver/options.jl +++ b/src/solvers/madnlp_solver/options.jl @@ -7,6 +7,13 @@ export MadNLPOptions print_level::Int = 3 # (MadNLP.TRACE::MadNLP.LogLevels = 1, ..., MadNLP.ERROR::MadNLP.LogLevels = 6) hessian_approximation::String = "exact" # (exact = MadNLP.ExactHessian, compact_lbfgs = MadNLP.CompactLBFGS) # no other QN methods supported in conjunction with MadNLP.SparseCallback + # Pass-throughs consumed by MadNLP's MOI layer (not by MadNLP itself); + # leave as `nothing` to use MadNLP defaults. Only forwarded when non-nothing. + linear_solver::Any = nothing # e.g. MadNLPGPU.CUDSSSolver, MadNLP.LapackCPUSolver + array_type::Any = nothing # e.g. CUDA.CuArray for GPU + kkt_system::Any = nothing # e.g. MadNLP.SparseUnreducedKKTSystem + cudss_ordering::Any = nothing # e.g. MadNLPGPU.AMD_ORDERING + # # Only supported by DirectTrajOpt._solve, as an optional kwarg override of `hessian_approximation`; # # `hessian_approximation = eval_hessian ? "exact" : "compact_lbfgs"` # eval_hessian::Bool = true From 9239b1a1b2834c0ac69cb2e97f90f39dd78b3eb2 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Thu, 23 Apr 2026 08:58:35 -0400 Subject: [PATCH 10/13] Format MadNLPOptions per JuliaFormatter --- src/solvers/madnlp_solver/options.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solvers/madnlp_solver/options.jl b/src/solvers/madnlp_solver/options.jl index 53c10ae..6d3a382 100644 --- a/src/solvers/madnlp_solver/options.jl +++ b/src/solvers/madnlp_solver/options.jl @@ -9,9 +9,9 @@ export MadNLPOptions # Pass-throughs consumed by MadNLP's MOI layer (not by MadNLP itself); # leave as `nothing` to use MadNLP defaults. Only forwarded when non-nothing. - linear_solver::Any = nothing # e.g. MadNLPGPU.CUDSSSolver, MadNLP.LapackCPUSolver - array_type::Any = nothing # e.g. CUDA.CuArray for GPU - kkt_system::Any = nothing # e.g. MadNLP.SparseUnreducedKKTSystem + linear_solver::Any = nothing # e.g. MadNLPGPU.CUDSSSolver, MadNLP.LapackCPUSolver + array_type::Any = nothing # e.g. CUDA.CuArray for GPU + kkt_system::Any = nothing # e.g. MadNLP.SparseUnreducedKKTSystem cudss_ordering::Any = nothing # e.g. MadNLPGPU.AMD_ORDERING # # Only supported by DirectTrajOpt._solve, as an optional kwarg override of `hessian_approximation`; From 6b50bc76b4229c7261a6e798960ed1a87bb6e1d6 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 24 Apr 2026 16:46:20 -0400 Subject: [PATCH 11/13] chore: bump version to 0.8.11 Co-Authored-By: Claude Opus 4.6 (1M context) --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index ad27bd3..fe86d12 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DirectTrajOpt" uuid = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" -version = "0.8.10" +version = "0.8.11" authors = ["Aaron Trowbridge and contributors"] [deps] From 091c7b24bdae191a1f3a4893f520c06e3693f1d7 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 24 Apr 2026 16:47:43 -0400 Subject: [PATCH 12/13] Revert "chore: bump version to 0.8.11" This reverts commit 6b50bc76b4229c7261a6e798960ed1a87bb6e1d6. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index fe86d12..ad27bd3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DirectTrajOpt" uuid = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" -version = "0.8.11" +version = "0.8.10" authors = ["Aaron Trowbridge and contributors"] [deps] From 83da0761265257f600586c693f11a9810c77b378 Mon Sep 17 00:00:00 2001 From: Jack Champagne Date: Fri, 24 Apr 2026 16:53:14 -0400 Subject: [PATCH 13/13] Revert "Merge pull request #67 from harmoniqs/benchmarks/directtrajopt-initial" This reverts commit 19fa68ad630ccf30141aee73ac4aba2a9207dd1a, reversing changes made to 9239b1a1b2834c0ac69cb2e97f90f39dd78b3eb2. --- .github/workflows/benchmark.yml | 50 ------ Project.toml | 2 +- benchmark/.gitignore | 2 - benchmark/BenchmarkUtils.jl | 1 + benchmark/Project.toml | 20 --- benchmark/README.md | 33 ---- benchmark/benchmarks.jl | 272 -------------------------------- test/compare_solvers.jl | 16 +- test/runtests.jl | 4 +- 9 files changed, 12 insertions(+), 388 deletions(-) delete mode 100644 .github/workflows/benchmark.yml delete mode 100644 benchmark/.gitignore create mode 100644 benchmark/BenchmarkUtils.jl delete mode 100644 benchmark/Project.toml delete mode 100644 benchmark/README.md delete mode 100644 benchmark/benchmarks.jl diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml deleted file mode 100644 index 1f20d76..0000000 --- a/.github/workflows/benchmark.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Benchmarks -on: - push: - tags: ['v*'] - pull_request: - paths: - - 'src/**' - - 'benchmark/**' - - '.github/workflows/benchmark.yml' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} - -jobs: - benchmark: - name: Benchmark suite - runs-on: ubuntu-latest - timeout-minutes: 60 - permissions: - actions: write - contents: read - steps: - - uses: actions/checkout@v6 - - - uses: julia-actions/setup-julia@v2 - with: - version: '1.11' - arch: x64 - - - uses: julia-actions/cache@v2 - - - name: Instantiate benchmark environment - run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' - - - name: Run benchmarks - run: | - julia --project=benchmark -t auto -e ' - using TestItemRunner - TestItemRunner.run_tests("benchmark/") - ' - - - name: Upload benchmark artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: benchmark-${{ github.event.pull_request.number || github.ref_name }}-${{ github.sha }} - path: benchmark/results/ - retention-days: 90 diff --git a/Project.toml b/Project.toml index ee36b38..796bc0e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DirectTrajOpt" uuid = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" -version = "0.9.1" +version = "0.9.0" authors = ["Aaron Trowbridge and contributors"] [deps] diff --git a/benchmark/.gitignore b/benchmark/.gitignore deleted file mode 100644 index ca28c11..0000000 --- a/benchmark/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -results/ -Manifest.toml diff --git a/benchmark/BenchmarkUtils.jl b/benchmark/BenchmarkUtils.jl new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/benchmark/BenchmarkUtils.jl @@ -0,0 +1 @@ + diff --git a/benchmark/Project.toml b/benchmark/Project.toml deleted file mode 100644 index b219215..0000000 --- a/benchmark/Project.toml +++ /dev/null @@ -1,20 +0,0 @@ -[deps] -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56" -ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -HarmoniqsBenchmarks = "f45d0b76-2d23-4568-9599-481e0da131db" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" -MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" -NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" -TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" - -[sources] -DirectTrajOpt = {path = ".."} -HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl"} diff --git a/benchmark/README.md b/benchmark/README.md deleted file mode 100644 index c0737c9..0000000 --- a/benchmark/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# DirectTrajOpt Benchmarks - -Benchmark suite for DirectTrajOpt.jl comparing Ipopt and MadNLP solver performance. - -## Running locally - -```bash -# From DirectTrajOpt.jl root -julia --project=benchmark -e 'using Pkg; Pkg.instantiate()' - -julia --project=benchmark -t auto -e ' - using TestItemRunner - TestItemRunner.run_tests("benchmark/") -' -``` - -Artifacts are saved as JLD2 files in `benchmark/results/` (gitignored). - -## Benchmark suites - -- **Evaluator micro-benchmarks** — `BenchmarkTools.@benchmark` timings for each MOI eval function (objective, gradient, constraint, jacobian, hessian_lagrangian) on bilinear N=51 -- **Ipopt vs MadNLP** — full solve comparison on bilinear N=51 -- **Memory scaling study** — N ∈ {25, 51, 101} × state_dim ∈ {4, 8, 16} - -## Schema - -Results use `BenchmarkResult` / `MicroBenchmarkResult` from [HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl). - -Load with: -```julia -using HarmoniqsBenchmarks -results = load_results("benchmark/results/ipopt_vs_madnlp_N51_.jld2") -``` diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl deleted file mode 100644 index 28e6ee4..0000000 --- a/benchmark/benchmarks.jl +++ /dev/null @@ -1,272 +0,0 @@ -using TestItems - -@testitem "Evaluator micro-benchmarks: bilinear N=51" begin - using HarmoniqsBenchmarks, BenchmarkTools, DirectTrajOpt, NamedTrajectories - using SparseArrays, ExponentialAction, MathOptInterface, Random, Dates, Printf - const MOI = MathOptInterface - - Random.seed!(42) - N = 51; - Δt = 0.1; - u_bound = 0.1; - ω = 0.1 - Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) - Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) - Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) - G(u) = ω * Gz + u[1] * Gx + u[2] * Gy - - traj = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound*(2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - DerivativeIntegrator(:du, :ddu, traj), - ] - J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) - prob = DirectTrajOptProblem(traj, J, integrators) - - evaluator, Z_vec = build_evaluator(prob) - dims = evaluator_dims(evaluator) - - g = zeros(dims.n_constraints) - grad = zeros(dims.n_variables) - H = zeros(dims.n_hessian_entries) - Jac = zeros(dims.n_jacobian_entries) - sigma = 1.0 - mu = ones(dims.n_constraints) - - benchmarks = Dict{Symbol,EvalBenchmark}( - :eval_objective => - trial_to_eval_benchmark(@benchmark(MOI.eval_objective($evaluator, $Z_vec))), - :eval_gradient => trial_to_eval_benchmark( - @benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec)) - ), - :eval_constraint => trial_to_eval_benchmark( - @benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec)) - ), - :eval_jacobian => trial_to_eval_benchmark( - @benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec)) - ), - :eval_hessian_lagrangian => trial_to_eval_benchmark( - @benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu)) - ), - ) - - result = MicroBenchmarkResult( - package = "DirectTrajOpt", - package_version = "0.8.10", - commit = ( - try - String(strip(read(`git rev-parse --short HEAD`, String))) - catch - ; "unknown" - end - ), - benchmark_name = "evaluator_micro_bilinear_N51", - N = N, - state_dim = 4, - control_dim = 2, - eval_benchmarks = benchmarks, - julia_version = string(VERSION), - timestamp = Dates.now(), - runner = get(ENV, "BENCHMARK_RUNNER", "local"), - n_threads = Threads.nthreads(), - ) - - println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===") - for (name, eb) in sort(collect(result.eval_benchmarks), by = first) - @printf( - " %-25s median: %8.1f ns allocs: %d memory: %d bytes\n", - name, - eb.median_ns, - eb.allocs, - eb.memory_bytes - ) - end - - results_dir = joinpath(@__DIR__, "results") - save_micro_results(results_dir, result.benchmark_name, result) - println(" Saved to $results_dir/") -end - -@testitem "Ipopt vs MadNLP: bilinear N=51" begin - using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories - using SparseArrays, ExponentialAction, Random, Dates - import MadNLP - - const MadNLPSolverExt = [ - mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt - ][1] - - function make_bilinear_problem(; seed = 42) - Random.seed!(seed) - N = 51; - Δt = 0.1; - u_bound = 0.1; - ω = 0.1 - Gx = sparse(Float64[0 0 0 1; 0 0 1 0; 0 -1 0 0; -1 0 0 0]) - Gy = sparse(Float64[0 -1 0 0; 1 0 0 0; 0 0 0 -1; 0 0 1 0]) - Gz = sparse(Float64[0 0 1 0; 0 0 0 -1; -1 0 0 0; 0 1 0 0]) - G(u) = ω * Gz + u[1] * Gx + u[2] * Gy - - traj = NamedTrajectory( - ( - x = 2rand(4, N) .- 1, - u = u_bound*(2rand(2, N) .- 1), - du = randn(2, N), - ddu = randn(2, N), - Δt = fill(Δt, N), - ); - controls = (:ddu, :Δt), - timestep = :Δt, - bounds = (u = u_bound, Δt = (0.01, 0.5)), - initial = (x = [1.0, 0.0, 0.0, 0.0], u = zeros(2)), - final = (u = zeros(2),), - goal = (x = [0.0, 1.0, 0.0, 0.0],), - ) - integrators = [ - BilinearIntegrator(G, :x, :u, traj), - DerivativeIntegrator(:u, :du, traj), - DerivativeIntegrator(:du, :ddu, traj), - ] - J = QuadraticRegularizer(:u, traj, 1.0) + QuadraticRegularizer(:du, traj, 1.0) - return DirectTrajOptProblem(traj, J, integrators) - end - - prob_ipopt = make_bilinear_problem() - result_ipopt = benchmark_solve!( - prob_ipopt, - IpoptOptions(max_iter = 200, print_level = 0); - benchmark_name = "bilinear_N51_ipopt", - ) - - prob_madnlp = make_bilinear_problem() - result_madnlp = benchmark_solve!( - prob_madnlp, - MadNLPSolverExt.MadNLPOptions(max_iter = 200, print_level = 1); - benchmark_name = "bilinear_N51_madnlp", - ) - - println("\n=== Ipopt vs MadNLP: bilinear N=51 ===") - println( - " Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc", - ) - println( - " MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc", - ) - - results_dir = joinpath(@__DIR__, "results") - save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp]) -end - -@testitem "Memory scaling: N and state_dim sweep" begin - using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories - using SparseArrays, ExponentialAction, Random, Dates, Printf - import MadNLP - - const MadNLPSolverExt = [ - mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt - ][1] - - function make_scaled_problem(; N, state_dim, n_controls = 2, seed = 42) - Random.seed!(seed) - G_drift = sparse(randn(state_dim, state_dim)) - G_drives = [sparse(randn(state_dim, state_dim)) for _ = 1:n_controls] - G(u) = G_drift + sum(u[i] * G_drives[i] for i = 1:n_controls) - - x_init = zeros(state_dim); - x_init[1] = 1.0 - x_goal = zeros(state_dim); - x_goal[min(2, state_dim)] = 1.0 - - traj = NamedTrajectory( - ( - x = randn(state_dim, N), - u = 0.1*randn(n_controls, N), - du = randn(n_controls, N), - Δt = fill(0.1, N), - ); - controls = (:du, :Δt), - timestep = :Δt, - bounds = (u = 1.0, Δt = (0.01, 0.5)), - initial = (x = x_init, u = zeros(n_controls)), - final = (u = zeros(n_controls),), - goal = (x = x_goal,), - ) - integrators = - [BilinearIntegrator(G, :x, :u, traj), DerivativeIntegrator(:u, :du, traj)] - J = QuadraticRegularizer(:u, traj, 1.0) - return DirectTrajOptProblem(traj, J, integrators) - end - - N_values = [25, 51, 101] - dim_values = [4, 8, 16] - results = BenchmarkResult[] - - println("\n=== Memory Scaling Study ===") - @printf( - " %5s | %5s | %12s | %12s | %12s | %12s\n", - "N", - "dim", - "Ipopt (s)", - "Ipopt (KB)", - "MadNLP (s)", - "MadNLP (KB)" - ) - @printf( - " %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n", - "-"^5, - "-"^5, - "-"^12, - "-"^12, - "-"^12, - "-"^12 - ) - - for N in N_values - for dim in dim_values - prob = make_scaled_problem(; N = N, state_dim = dim) - r_ipopt = benchmark_solve!( - prob, - IpoptOptions(max_iter = 50, print_level = 0); - benchmark_name = "scaling_N$(N)_d$(dim)_ipopt", - ) - push!(results, r_ipopt) - - prob = make_scaled_problem(; N = N, state_dim = dim) - r_madnlp = benchmark_solve!( - prob, - MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = 1); - benchmark_name = "scaling_N$(N)_d$(dim)_madnlp", - ) - push!(results, r_madnlp) - - @printf( - " %5d | %5d | %12.3f | %12d | %12.3f | %12d\n", - N, - dim, - r_ipopt.wall_time_s, - r_ipopt.total_allocations_bytes ÷ 1024, - r_madnlp.wall_time_s, - r_madnlp.total_allocations_bytes ÷ 1024 - ) - end - end - - results_dir = joinpath(@__DIR__, "results") - save_results(results_dir, "memory_scaling", results) - println("\n Saved $(length(results)) results to $results_dir/") -end diff --git a/test/compare_solvers.jl b/test/compare_solvers.jl index 5fd5c3f..d13cb06 100644 --- a/test/compare_solvers.jl +++ b/test/compare_solvers.jl @@ -7,8 +7,8 @@ using SparseArrays using NamedTrajectories using DirectTrajOpt -const MadNLPSolverExt = - [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] +# const MadNLPSolverExt = +# [mod for mod in reverse(Base.loaded_modules_order) if Symbol(mod) == :MadNLPSolverExt][1] function get_seeded_trajectory(seed; N = 10, Δt = 0.1, u_bound = 0.1, ω = 0.1) Random.seed!(seed) @@ -140,11 +140,11 @@ function get_solver_comparison(seed) return err, (ti, tm) end -wins = Dict(:ipopt => 0, :madnlp => 0) -for seed = 0:99 - err, (ti, tm) = get_solver_comparison(seed) - (err < 1e-3) || exit(1) - wins[(ti < tm) ? :ipopt : :madnlp] += 1 -end +# wins = Dict(:ipopt => 0, :madnlp => 0) +# for seed = 0:99 +# err, (ti, tm) = get_solver_comparison(seed) +# (err < 1e-3) || exit(1) +# wins[(ti < tm) ? :ipopt : :madnlp] += 1 +# end # @info "Wins: $(wins)" diff --git a/test/runtests.jl b/test/runtests.jl index d57ccc9..9f95075 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,5 +2,5 @@ using DirectTrajOpt using TestItemRunner -# Exclude benchmark/ testitems — those run in a separate project environment -@run_package_tests filter=ti -> !contains(ti.filename, "benchmark") +# Run all testitem tests in package +@run_package_tests