Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/alloc-profile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Alloc Profile
on:
push:
tags: ['v*']
pull_request:
paths:
- 'benchmark/alloc_profile.jl'
- 'benchmark/problem_utils.jl'
- 'benchmark/Project.toml'
- '.github/workflows/alloc-profile.yml'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}

jobs:
alloc-profile:
name: Alloc profile (Ipopt + MadNLP)
runs-on: ubuntu-latest
# Profile.Allocs has high per-allocation overhead that doesn't scale down
# with sample_rate — each Ipopt/MadNLP solve under sampling takes ~30-40
# min on GH Actions runners even at max_iter=30. Two solves + startup =
# ~75 min observed in practice; 90 min gives a comfortable cushion.
timeout-minutes: 90
permissions:
actions: write
contents: read
steps:
- uses: actions/checkout@v6

- uses: julia-actions/setup-julia@v2
with:
version: '1.11'
arch: x64

- uses: julia-actions/cache@v2

- name: Instantiate benchmark environment
run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()'

- name: Run alloc profile
env:
BENCHMARK_RUNNER: github-actions
run: |
julia --project=benchmark -t auto -e '
using TestItemRunner
TestItemRunner.run_tests("benchmark/"; filter = ti -> occursin("alloc_profile", ti.filename))
'

- name: Upload alloc-profile artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: alloc-profile-${{ github.event.pull_request.number || github.ref_name }}-${{ github.sha }}
path: benchmark/results/allocs/
retention-days: 90
7 changes: 5 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ jobs:
- name: Instantiate benchmark environment
run: julia --project=benchmark -e 'using Pkg; Pkg.instantiate()'

- name: Run benchmarks
- name: Run benchmarks (excluding alloc profile)
env:
BENCHMARK_RUNNER: github-actions
run: |
julia --project=benchmark -t auto -e '
using TestItemRunner
TestItemRunner.run_tests("benchmark/")
# Alloc profile testitem runs in `.github/workflows/alloc-profile.yml`
# because Profile.Allocs adds ~30-40min per solve regardless of
# max_iter, making it impractical to gate every PR on it.
TestItemRunner.run_tests("benchmark/"; filter = ti -> !occursin("alloc_profile", ti.filename))
'

- name: Upload benchmark artifacts
Expand Down
6 changes: 5 additions & 1 deletion benchmark/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,8 @@ DirectTrajOpt = {path = ".."}
# results are reproducible. Bump this SHA (and the local Manifest) when HBJ
# ships a new feature we want to use. Drop in favor of [compat] once HBJ
# registers in General.
HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "5401542c477c0f2da6d66028c513e8a278f4875f"}
#
# Bumped from 5401542c (v0.2.0 prep) to c38418cb (post-#12) to pick up the
# alloc profile analyzer (`top_alloc_types`, `report_alloc_profile`, …)
# used by `benchmark/alloc_profile.jl`.
HarmoniqsBenchmarks = {url = "https://github.com/harmoniqs/HarmoniqsBenchmarks.jl", rev = "c38418cb7f932f2ff9a9c6c6eacf9a11ff1018c1"}
89 changes: 89 additions & 0 deletions benchmark/alloc_profile.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
using TestItems

@testitem "Alloc profile: bilinear N=51 (Ipopt + MadNLP)" begin
using HarmoniqsBenchmarks, DirectTrajOpt, NamedTrajectories
using SparseArrays, ExponentialAction, Random, Dates
import MadNLP

include("$(joinpath(@__DIR__, "problem_utils.jl"))")

runner = get(ENV, "BENCHMARK_RUNNER", "local")

# `Profile.Allocs` slows the solve dramatically — `sample_rate = 1.0` is
# intractable for a full Ipopt/MadNLP run (>15 min on N=10 in early
# experiments), and even `0.01` runs MadNLP at ~3000× slowdown vs the
# un-profiled solve. `0.01` keeps the trace tractable while still giving
# statistically useful per-frame breakdowns; combined with `max_iter = 30`
# (representative per-iter allocation pattern — convergence isn't the
# goal) the testitem completes well inside the workflow timeout. The
# `1 / sample_rate` scaling applied by `report_alloc_profile` extrapolates
# back to total bytes.
sample_rate = 0.01

# JIT warmup so first-call compile of Ipopt/MadNLP extensions, KKT/AD
# codegen, and the Profile.Allocs machinery itself doesn't dominate the
# sampled trace. Discard the warmup results.
let warmup_prob = make_bilinear_problem(; N = 11, seed = 0)
DirectTrajOpt.solve!(
warmup_prob;
options = IpoptOptions(max_iter = 2, print_level = 0),
)
end
let warmup_prob = make_bilinear_problem(; N = 11, seed = 0)
DirectTrajOpt.solve!(
warmup_prob;
options = MadNLPOptions(max_iter = 2, print_level = 6),
)
end

results_dir = joinpath(@__DIR__, "results", "allocs")
pdims = problem_dims(make_bilinear_problem(; N = 51, seed = 42))

# Ipopt
let prob = make_bilinear_problem(; N = 51, seed = 42)
profile = benchmark_memory!(
() -> DirectTrajOpt.solve!(
prob;
options = IpoptOptions(max_iter = 30, print_level = 0),
);
package = "DirectTrajOpt",
solver = "Ipopt",
benchmark_name = "alloc_bilinear_N51_ipopt",
N = 51,
state_dim = pdims.state_dim,
control_dim = pdims.control_dim,
sample_rate = sample_rate,
warmup = false, # we did our own warmup above
runner = runner,
)
path = save_alloc_profile(results_dir, profile.benchmark_name, profile)
println("\n=== Alloc profile: Ipopt (bilinear N=51, sample_rate=$sample_rate) ===")
println(" samples=$(profile.total_count) total≈$(profile.total_bytes) B")
println(" saved $path")
report_alloc_profile(profile; k_types = 10, k_leaves = 15, k_frames = 15)
end

# MadNLP
let prob = make_bilinear_problem(; N = 51, seed = 42)
profile = benchmark_memory!(
() -> DirectTrajOpt.solve!(
prob;
options = MadNLPOptions(max_iter = 30, print_level = 6),
);
package = "DirectTrajOpt",
solver = "MadNLP",
benchmark_name = "alloc_bilinear_N51_madnlp",
N = 51,
state_dim = pdims.state_dim,
control_dim = pdims.control_dim,
sample_rate = sample_rate,
warmup = false,
runner = runner,
)
path = save_alloc_profile(results_dir, profile.benchmark_name, profile)
println("\n=== Alloc profile: MadNLP (bilinear N=51, sample_rate=$sample_rate) ===")
println(" samples=$(profile.total_count) total≈$(profile.total_bytes) B")
println(" saved $path")
report_alloc_profile(profile; k_types = 10, k_leaves = 15, k_frames = 15)
end
end
Loading