Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Benchmarks
on:
push:
tags: ['v*']
pull_request:
paths:
- 'src/**'
- 'benchmark/**'
- '.github/workflows/benchmark.yml'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}

jobs:
benchmark:
name: Benchmark suite
runs-on: ubuntu-latest
timeout-minutes: 60
permissions:
actions: write
contents: read
steps:
- uses: actions/checkout@v6

- uses: julia-actions/setup-julia@v2
with:
version: '1.11'
arch: x64

- uses: julia-actions/cache@v2

- name: Instantiate benchmark environment
run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()'

- name: Run benchmarks
env:
BENCHMARK_RUNNER: github-actions
run: |
julia --project=benchmark -t auto -e '
using TestItemRunner
TestItemRunner.run_tests("benchmark/")
'

- name: Upload benchmark artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-${{ github.event.pull_request.number || github.ref_name }}-${{ github.sha }}
path: benchmark/results/
retention-days: 90
2 changes: 2 additions & 0 deletions benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
results/
Manifest.toml
1 change: 0 additions & 1 deletion benchmark/BenchmarkUtils.jl

This file was deleted.

23 changes: 23 additions & 0 deletions benchmark/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DirectTrajOpt = "c823fa1f-8872-4af5-b810-2b9b72bbbf56"
ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3"
HarmoniqsBenchmarks = "f45d0b76-2d23-4568-9599-481e0da131db"
MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
NamedTrajectories = "538bc3a1-5ab9-4fc3-b776-35ca1e893e08"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"

[sources]
DirectTrajOpt = {path = ".."}
# HBJ not yet registered in General; pin to a specific commit so benchmark
# results are reproducible. Bump this SHA (and the local Manifest) when HBJ
# ships a new feature we want to use. Drop in favor of [compat] once HBJ
# registers in General.
HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "5401542c477c0f2da6d66028c513e8a278f4875f"}
45 changes: 45 additions & 0 deletions benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# DirectTrajOpt Benchmarks

Benchmark suite for DirectTrajOpt.jl comparing Ipopt and MadNLP solver performance.

For results and analysis, see the [Benchmarks page](https://docs.harmoniqs.co/DirectTrajOpt.jl/dev/benchmarks/) in the documentation.

## Running locally

```bash
# From DirectTrajOpt.jl root
julia --project=benchmark -e 'using Pkg; Pkg.instantiate()'

julia --project=benchmark -t auto -e '
using TestItemRunner
TestItemRunner.run_tests("benchmark/")
'
```

Artifacts are saved as JLD2 files in `benchmark/results/` (gitignored).

## Benchmark suites

- **Evaluator micro-benchmarks** — `BenchmarkTools.@benchmark` timings for each MOI eval function (objective, gradient, constraint, jacobian, hessian_lagrangian) on bilinear N=51
- **Ipopt vs MadNLP** — full solve comparison on bilinear N=51
- **Memory scaling study** — N ∈ {25, 51, 101} × state_dim ∈ {4, 8, 16}

## Schema

Results use `BenchmarkResult` / `MicroBenchmarkResult` from [HarmoniqsBenchmarks.jl](https://github.com/harmoniqs/HarmoniqsBenchmarks.jl).

Load with:
```julia
using HarmoniqsBenchmarks
results = load_results("benchmark/results/ipopt_vs_madnlp_N51_<sha>.jld2")
micro = load_micro_results("benchmark/results/evaluator_micro_bilinear_N51_<sha>.jld2")
```

## Regression detection

```julia
using HarmoniqsBenchmarks
baseline = load_results("benchmark/results/memory_scaling_<old_sha>.jld2")
current = load_results("benchmark/results/memory_scaling_<new_sha>.jld2")
rows = compare_results(baseline, current; regression_threshold=10.0)
```
258 changes: 258 additions & 0 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
using TestItems

@testitem "Evaluator micro-benchmarks: bilinear N=51" begin
using HarmoniqsBenchmarks, BenchmarkTools, DirectTrajOpt, NamedTrajectories
using SparseArrays, ExponentialAction, MathOptInterface, Random, Dates, Printf, Pkg
const MOI = MathOptInterface

include("$(joinpath(@__DIR__, "problem_utils.jl"))")

N = 51
prob = make_bilinear_problem(; N = N, seed = 42)

evaluator, Z_vec = build_evaluator(prob)
dims = evaluator_dims(evaluator)

g = zeros(dims.n_constraints)
grad = zeros(dims.n_variables)
H = zeros(dims.n_hessian_entries)
Jac = zeros(dims.n_jacobian_entries)
sigma = 1.0
mu = ones(dims.n_constraints)

benchmarks = Dict{Symbol,EvalBenchmark}(
:eval_objective =>
trial_to_eval_benchmark(@benchmark(MOI.eval_objective($evaluator, $Z_vec))),
:eval_gradient => trial_to_eval_benchmark(
@benchmark(MOI.eval_objective_gradient($evaluator, $grad, $Z_vec))
),
:eval_constraint => trial_to_eval_benchmark(
@benchmark(MOI.eval_constraint($evaluator, $g, $Z_vec))
),
:eval_jacobian => trial_to_eval_benchmark(
@benchmark(MOI.eval_constraint_jacobian($evaluator, $Jac, $Z_vec))
),
:eval_hessian_lagrangian => trial_to_eval_benchmark(
@benchmark(MOI.eval_hessian_lagrangian($evaluator, $H, $Z_vec, $sigma, $mu))
),
)

pkg_version = let v = nothing
try
for (_, info) in Pkg.dependencies()
if info.name == "DirectTrajOpt"
v = info.version
break
end
end
catch e
@warn "Failed to look up DirectTrajOpt version from Pkg.dependencies" exception =
(e, catch_backtrace())
end
isnothing(v) ? "unknown" : string(v)
end

pdims = problem_dims(prob)

commit_sha = try
String(strip(read(`git rev-parse --short HEAD`, String)))
catch e
@warn "Failed to capture git commit SHA" exception = (e, catch_backtrace())
"unknown"
end

result = MicroBenchmarkResult(
package = "DirectTrajOpt",
package_version = pkg_version,
commit = commit_sha,
benchmark_name = "evaluator_micro_bilinear_N51",
N = N,
state_dim = pdims.state_dim,
control_dim = pdims.control_dim,
eval_benchmarks = benchmarks,
julia_version = string(VERSION),
timestamp = Dates.now(),
runner = get(ENV, "BENCHMARK_RUNNER", "local"),
n_threads = Threads.nthreads(),
)

println("\n=== Evaluator Micro-benchmarks (bilinear N=$N) ===")
for (name, eb) in sort(collect(result.eval_benchmarks), by = first)
@printf(
" %-25s median: %8.1f ns allocs: %d memory: %d bytes\n",
name,
eb.median_ns,
eb.allocs,
eb.memory_bytes
)
end

results_dir = joinpath(@__DIR__, "results")
save_micro_results(results_dir, result.benchmark_name, result)
println(" Saved to $results_dir/")
end

@testitem "Ipopt vs MadNLP: bilinear N=51" begin
using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories
using SparseArrays, ExponentialAction, Random, Dates
import MadNLP

include("$(joinpath(@__DIR__, "problem_utils.jl"))")

runner = get(ENV, "BENCHMARK_RUNNER", "local")

# Warm up both solvers on a tiny problem so first-call JIT compile
# (Ipopt/MadNLP extension load, KKT/AD codegen) doesn't pollute the
# timed solve. Discard the warmup results.
let warmup_prob = make_bilinear_problem(; N = 11, seed = 0)
DirectTrajOpt.solve!(
warmup_prob;
options = IpoptOptions(max_iter = 2, print_level = 0),
)
end
let warmup_prob = make_bilinear_problem(; N = 11, seed = 0)
DirectTrajOpt.solve!(
warmup_prob;
options = MadNLPOptions(max_iter = 2, print_level = 6),
)
end

prob_ipopt = make_bilinear_problem(; N = 51, seed = 42)
result_ipopt = benchmark_solve!(
prob_ipopt,
IpoptOptions(max_iter = 200, print_level = 0);
benchmark_name = "bilinear_N51_ipopt",
runner = runner,
)

prob_madnlp = make_bilinear_problem(; N = 51, seed = 42)
result_madnlp = benchmark_solve!(
prob_madnlp,
MadNLPOptions(max_iter = 200, print_level = 6);
benchmark_name = "bilinear_N51_madnlp",
runner = runner,
)

println("\n=== Ipopt vs MadNLP: bilinear N=51 ===")
println(
" Ipopt: $(round(result_ipopt.wall_time_s, digits=3))s, $(result_ipopt.total_allocations_bytes ÷ 1024) KB alloc",
)
println(
" MadNLP: $(round(result_madnlp.wall_time_s, digits=3))s, $(result_madnlp.total_allocations_bytes ÷ 1024) KB alloc",
)

results_dir = joinpath(@__DIR__, "results")
save_results(results_dir, "ipopt_vs_madnlp_N51", [result_ipopt, result_madnlp])
end

@testitem "Memory scaling: N and state_dim sweep" begin
using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories
using SparseArrays, ExponentialAction, Random, Dates, Printf, Statistics
import MadNLP

include("$(joinpath(@__DIR__, "problem_utils.jl"))")

runner = get(ENV, "BENCHMARK_RUNNER", "local")

# JIT warmup: the first solve in this process compiles Ipopt/MadNLP
# extensions and the AD pipeline. Pay that cost on a throwaway tiny
# problem so the smallest cell in the sweep is not order-biased.
let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0)
DirectTrajOpt.solve!(
warmup_prob;
options = IpoptOptions(max_iter = 2, print_level = 0),
)
end
let warmup_prob = make_scaled_problem(; N = 11, state_dim = 2, seed = 0)
DirectTrajOpt.solve!(
warmup_prob;
options = MadNLPOptions(max_iter = 2, print_level = 6),
)
end

N_values = [25, 51, 101]
dim_values = [4, 8, 16]
# Median over `n_seeds` random instances per (N, dim) cell. Single-shot
# timing on random instances is noisy enough to be misleading — one
# earlier run had N=25, dim=8 return Ipopt in 11ms allocating 22KB
# because that one seed (`1000 + 100*25 + 8 = 3508`) produced a
# degenerate initial point Ipopt resolved at iteration 0. Median over
# K samples washes the anomaly out without sacrificing reproducibility:
# the per-seed BenchmarkResults are all saved to JLD2 so the raw
# distribution is available for downstream analysis.
n_seeds = 3
results = BenchmarkResult[]

println("\n=== Memory Scaling Study (median over $n_seeds seeds per cell) ===")
@printf(
" %5s | %5s | %12s | %12s | %12s | %12s\n",
"N",
"dim",
"Ipopt (s)",
"Ipopt (KB)",
"MadNLP (s)",
"MadNLP (KB)"
)
@printf(
" %5s-+-%5s-+-%12s-+-%12s-+-%12s-+-%12s\n",
"-"^5,
"-"^5,
"-"^12,
"-"^12,
"-"^12,
"-"^12
)

for N in N_values
for dim in dim_values
ipopt_walls = Float64[]
madnlp_walls = Float64[]
ipopt_alloc_kb = Int[]
madnlp_alloc_kb = Int[]

for k = 1:n_seeds
# Deterministic distinct seed per (N, dim, k) sample. Both
# solvers receive the *same* instance for that (N, dim, k)
# so the per-seed Ipopt-vs-MadNLP comparison is fair; only
# the choice of instance varies across the K samples.
cell_seed = 1000 + 100 * N + dim + 10_000 * (k - 1)

prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed)
r_ipopt = benchmark_solve!(
prob,
IpoptOptions(max_iter = 50, print_level = 0);
benchmark_name = "scaling_N$(N)_d$(dim)_ipopt_s$(k)",
runner = runner,
)
push!(results, r_ipopt)
push!(ipopt_walls, r_ipopt.wall_time_s)
push!(ipopt_alloc_kb, r_ipopt.total_allocations_bytes ÷ 1024)

prob = make_scaled_problem(; N = N, state_dim = dim, seed = cell_seed)
r_madnlp = benchmark_solve!(
prob,
MadNLPOptions(max_iter = 50, print_level = 6);
benchmark_name = "scaling_N$(N)_d$(dim)_madnlp_s$(k)",
runner = runner,
)
push!(results, r_madnlp)
push!(madnlp_walls, r_madnlp.wall_time_s)
push!(madnlp_alloc_kb, r_madnlp.total_allocations_bytes ÷ 1024)
end

@printf(
" %5d | %5d | %12.3f | %12d | %12.3f | %12d\n",
N,
dim,
median(ipopt_walls),
round(Int, median(ipopt_alloc_kb)),
median(madnlp_walls),
round(Int, median(madnlp_alloc_kb))
)
end
end

results_dir = joinpath(@__DIR__, "results")
save_results(results_dir, "memory_scaling", results)
println("\n Saved $(length(results)) results to $results_dir/")
end
Loading
Loading