|
| 1 | +# Benchmark for PR #4032: caching strategies for moi_function on |
| 2 | +# GenericNonlinearExpr. |
| 3 | +# |
| 4 | +# Four modes are compared: |
| 5 | +# :none — master behaviour: NO cache. `add_constraint` runs |
| 6 | +# `check_belongs_to_model` and then `moi_function`, both |
| 7 | +# of which traverse the expression independently. |
| 8 | +# :per_call_oid — odow's actual PR snippet: a fresh `Dict{UInt64, ...}` |
| 9 | +# allocated INSIDE every call to `moi_function`, keyed by |
| 10 | +# `objectid(arg)` (so cache lookups are O(1)). |
| 11 | +# `check_belongs_to_model` still runs as a separate walk. |
| 12 | +# :model_struct — this branch as-is: cache stored on the model, keyed by |
| 13 | +# the JuMP `GenericNonlinearExpr` itself. Default `==`/ |
| 14 | +# `hash` on that struct is structural and walks the whole |
| 15 | +# sub-tree, so a cache hit still costs O(size) — which |
| 16 | +# can defeat the cache for deeply aliased DAGs. |
| 17 | +# :model_oid — model-level cache, but keyed by `objectid` like odow's |
| 18 | +# snippet. Shows what the branch could evolve to once |
| 19 | +# blegat's "use hash as keys" suggestion is applied. |
| 20 | +# |
| 21 | +# Run from the repo root, in an environment that has BenchmarkTools added on |
| 22 | +# top of dev'd JuMP (do NOT add BenchmarkTools to JuMP's own Project.toml): |
| 23 | +# |
| 24 | +# julia> using Pkg |
| 25 | +# julia> Pkg.activate(temp = true) |
| 26 | +# julia> Pkg.develop(path = ".") |
| 27 | +# julia> Pkg.add("BenchmarkTools") |
| 28 | +# julia> include("benchmark_cache.jl"); run_all() |
| 29 | +# |
| 30 | +# The script monkey-patches `JuMP.moi_function(::GenericNonlinearExpr, |
| 31 | +# ::GenericModel)` so the same JuMP build can be exercised under each mode |
| 32 | +# without rebuilding the package. |
| 33 | +# |
| 34 | +# Sample numbers from this branch (Julia 1.12, K=14, etc.): |
| 35 | +# |
| 36 | +# A: aliased tree (one big DAG, K=14) ~46 ms — all four modes within |
| 37 | +# 1% (MOI-tree alloc cost is |
| 38 | +# small vs. the rest of |
| 39 | +# add_constraint here). |
| 40 | +# B: many independent constraints (N=5000) none 84 / per_call_oid 86 / |
| 41 | +# model_struct 91 / model_oid 86 |
| 42 | +# → the *current branch* is the |
| 43 | +# slowest; struct keys hurt the |
| 44 | +# common case. Switching to |
| 45 | +# objectid keys fixes it. |
| 46 | +# C: shared big subexpr (N=200, M=200) model_struct 171 / model_oid 170 |
| 47 | +# vs none 191 / per_call_oid 195 |
| 48 | +# → 12–14% win for the model- |
| 49 | +# level cache. Per-call cannot |
| 50 | +# see the cross-constraint |
| 51 | +# sharing. |
| 52 | +# D: many aliased trees (M=200, K=8) all ~140 ms (per-constraint |
| 53 | +# sharing is small and JuMP's |
| 54 | +# `+` would flatten without the |
| 55 | +# explicit GenericNonlinearExpr |
| 56 | +# construction we use here). |
| 57 | +# |
| 58 | +# Takeaways: |
| 59 | +# * The model-level scope is the right call when subexpressions are shared |
| 60 | +# across constraints (scenario C). |
| 61 | +# * Using the JuMP struct as the cache key (current branch) is what costs |
| 62 | +# in scenario B — the deep `==`/`hash` adds overhead per node. Switching |
| 63 | +# to `objectid` keys (blegat's "use hash as keys" suggestion) recovers |
| 64 | +# most of it. |
| 65 | +# * Scenario A's exponential blow-up is more visible in MEMORY than in |
| 66 | +# TIME at K=14; bump K higher (or use the bench.jl example with |
| 67 | +# |R|*|S| ≈ 3600) to make the time gap obvious. |
| 68 | + |
| 69 | +using Printf |
| 70 | +using JuMP |
| 71 | +using BenchmarkTools |
| 72 | +import MathOptInterface as MOI |
| 73 | + |
| 74 | +const _G = JuMP.GenericNonlinearExpr |
| 75 | + |
| 76 | +# --------------------------------------------------------------------------- |
| 77 | +# Three implementations of the GenericNonlinearExpr → ScalarNonlinearFunction |
| 78 | +# walk. They share the same skeleton; only the cache differs. |
| 79 | +# --------------------------------------------------------------------------- |
| 80 | + |
| 81 | +function _moi_no_cache(f::_G{V}) where {V} |
| 82 | + ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args)) |
| 83 | + stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[] |
| 84 | + for i in length(f.args):-1:1 |
| 85 | + if f.args[i] isa _G{V} |
| 86 | + push!(stack, (ret, i, f.args[i])) |
| 87 | + else |
| 88 | + ret.args[i] = JuMP.moi_function(f.args[i]) |
| 89 | + end |
| 90 | + end |
| 91 | + while !isempty(stack) |
| 92 | + parent, i, arg = pop!(stack) |
| 93 | + child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args)) |
| 94 | + parent.args[i] = child |
| 95 | + for j in length(arg.args):-1:1 |
| 96 | + if arg.args[j] isa _G{V} |
| 97 | + push!(stack, (child, j, arg.args[j])) |
| 98 | + else |
| 99 | + child.args[j] = JuMP.moi_function(arg.args[j]) |
| 100 | + end |
| 101 | + end |
| 102 | + end |
| 103 | + return ret |
| 104 | +end |
| 105 | + |
| 106 | +# Cache walk parameterised by the `keyfn` that maps a JuMP NL expression to |
| 107 | +# the dict key. `keyfn = identity` matches the branch (struct keys, deep |
| 108 | +# hash). `keyfn = objectid` matches odow's snippet (UInt64 keys, O(1) hash). |
| 109 | +function _moi_with_cache(f::_G{V}, cache, keyfn::F) where {V,F} |
| 110 | + fk = keyfn(f) |
| 111 | + if haskey(cache, fk) |
| 112 | + return cache[fk] |
| 113 | + end |
| 114 | + ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args)) |
| 115 | + stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[] |
| 116 | + for i in length(f.args):-1:1 |
| 117 | + if f.args[i] isa _G{V} |
| 118 | + push!(stack, (ret, i, f.args[i])) |
| 119 | + else |
| 120 | + ret.args[i] = JuMP.moi_function(f.args[i]) |
| 121 | + end |
| 122 | + end |
| 123 | + while !isempty(stack) |
| 124 | + parent, i, arg = pop!(stack) |
| 125 | + argk = keyfn(arg) |
| 126 | + if haskey(cache, argk) |
| 127 | + parent.args[i] = cache[argk] |
| 128 | + continue |
| 129 | + end |
| 130 | + child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args)) |
| 131 | + parent.args[i] = child |
| 132 | + for j in length(arg.args):-1:1 |
| 133 | + if arg.args[j] isa _G{V} |
| 134 | + push!(stack, (child, j, arg.args[j])) |
| 135 | + else |
| 136 | + child.args[j] = JuMP.moi_function(arg.args[j]) |
| 137 | + end |
| 138 | + end |
| 139 | + cache[argk] = child |
| 140 | + end |
| 141 | + cache[fk] = ret |
| 142 | + return ret |
| 143 | +end |
| 144 | + |
| 145 | +# --------------------------------------------------------------------------- |
| 146 | +# Switching mechanism. We re-define JuMP's `moi_function(::GenericNonlinearExpr, |
| 147 | +# ::GenericModel)` so that `add_constraint` dispatches differently per mode. |
| 148 | +# |
| 149 | +# Modes :none and :per_call mirror what those PR options would actually |
| 150 | +# look like in production: a separate `check_belongs_to_model` walk, then |
| 151 | +# the (cached or uncached) build walk. |
| 152 | +# Mode :model_level matches the branch: a single integrated walk. |
| 153 | +# --------------------------------------------------------------------------- |
| 154 | + |
| 155 | +const BENCH_MODE = Ref(:model_struct) |
| 156 | + |
| 157 | +# Per-mode caches that need a UInt64 (objectid) key type rather than the |
| 158 | +# branch's `Dict{Any,...}`. We store them on the model as a side table so |
| 159 | +# they survive across constraints inside one build but are cleared between |
| 160 | +# builds (because each scenario builds a fresh `Model()`). |
| 161 | +const _OID_CACHE = IdDict{JuMP.GenericModel,Dict{UInt64,MOI.ScalarNonlinearFunction}}() |
| 162 | + |
| 163 | +function _oid_cache(model) |
| 164 | + get!(_OID_CACHE, model) do |
| 165 | + Dict{UInt64,MOI.ScalarNonlinearFunction}() |
| 166 | + end |
| 167 | +end |
| 168 | + |
| 169 | +@eval JuMP function moi_function( |
| 170 | + f::JuMP.GenericNonlinearExpr{V}, |
| 171 | + model::JuMP.GenericModel, |
| 172 | +) where {V} |
| 173 | + mode = Main.BENCH_MODE[] |
| 174 | + if mode === :none |
| 175 | + JuMP.check_belongs_to_model(f, model) |
| 176 | + return Main._moi_no_cache(f) |
| 177 | + elseif mode === :per_call_oid |
| 178 | + JuMP.check_belongs_to_model(f, model) |
| 179 | + cache = Dict{UInt64,$(MOI.ScalarNonlinearFunction)}() |
| 180 | + return Main._moi_with_cache(f, cache, objectid) |
| 181 | + elseif mode === :model_struct |
| 182 | + # branch behaviour: belongs-check is integrated; struct keys (deep hash) |
| 183 | + return Main._moi_with_cache(f, model.subexpressions, identity) |
| 184 | + else # :model_oid |
| 185 | + return Main._moi_with_cache(f, Main._oid_cache(model), objectid) |
| 186 | + end |
| 187 | +end |
| 188 | + |
| 189 | +# --------------------------------------------------------------------------- |
| 190 | +# Scenarios. Each `scenario_*` returns a 0-arg closure that builds a fresh |
| 191 | +# model from scratch, so each BenchmarkTools sample sees an empty |
| 192 | +# `model.subexpressions` cache. |
| 193 | +# --------------------------------------------------------------------------- |
| 194 | + |
| 195 | +# Scenario A — odow's case: ONE constraint with a deeply aliased binary tree. |
| 196 | +# `e_k = e_{k-1} + e_{k-1}`. With no cache this expands to 2^K leaves; with |
| 197 | +# either cache it stays linear in K. |
| 198 | +# |
| 199 | +# We build the `+` node directly via the GenericNonlinearExpr constructor so |
| 200 | +# JuMP's `+` operator overload does NOT flatten the n-ary sum. Without this, |
| 201 | +# `e + e` returns a flat `:+(args...)` of 2^K identical leaf references and |
| 202 | +# the master walk only iterates 2^K times instead of doing 2^K *recursive* |
| 203 | +# descents through K levels — which would mask the worst case the PR is |
| 204 | +# meant to fix. |
| 205 | +function scenario_aliased_tree(; K::Int = 14) |
| 206 | + V = JuMP.VariableRef |
| 207 | + return function () |
| 208 | + model = Model() |
| 209 | + @variable(model, x) |
| 210 | + e = sin(x) |
| 211 | + for _ in 1:K |
| 212 | + e = JuMP.GenericNonlinearExpr{V}(:+, e, e) |
| 213 | + end |
| 214 | + @constraint(model, e <= 1) |
| 215 | + return model |
| 216 | + end |
| 217 | +end |
| 218 | + |
| 219 | +# Scenario B — many small INDEPENDENT NL constraints. No subexpression |
| 220 | +# sharing within or across constraints. The model-level cache pays an |
| 221 | +# insertion + hash cost for every new node; the per-call cache pays an |
| 222 | +# extra Dict allocation per constraint; master pays nothing. |
| 223 | +# |
| 224 | +# This is the case blegat warned about in the PR thread: "we start |
| 225 | +# creating many small dictionaries if there are a lot of constraints, |
| 226 | +# but that's probably negligible". The benchmark tells us whether it |
| 227 | +# really is negligible. |
| 228 | +function scenario_many_independent(; N::Int = 5_000) |
| 229 | + return function () |
| 230 | + model = Model() |
| 231 | + @variable(model, x[1:N]) |
| 232 | + for i in 1:N |
| 233 | + @constraint(model, sin(x[i]) + cos(x[i]) * exp(x[i]) <= 1) |
| 234 | + end |
| 235 | + return model |
| 236 | + end |
| 237 | +end |
| 238 | + |
| 239 | +# Scenario C — many constraints sharing one big subexpression. |
| 240 | +# `big = sum(sin(x[i]) for i in 1:N)` appears once in each of M constraints. |
| 241 | +# The per-call cache cannot help here (each call sees `big` only once); only |
| 242 | +# the model-level cache deduplicates `big` across constraints. This is the |
| 243 | +# case that genuinely motivates the model-level cache. |
| 244 | +function scenario_shared_big(; N::Int = 200, M::Int = 200) |
| 245 | + return function () |
| 246 | + model = Model() |
| 247 | + @variable(model, x[1:N]) |
| 248 | + big = sum(sin(x[i]) for i in 1:N) |
| 249 | + for j in 1:M |
| 250 | + @constraint(model, big * x[j] <= 1) |
| 251 | + end |
| 252 | + return model |
| 253 | + end |
| 254 | +end |
| 255 | + |
| 256 | +# Scenario D — many constraints, each with internal aliasing (a smaller |
| 257 | +# binary tree per constraint, distinct leaf variable per constraint). |
| 258 | +# Both per-call and model-level caches help WITHIN each constraint; |
| 259 | +# across constraints there is nothing to share. This is the natural |
| 260 | +# territory of odow's per-call dict. |
| 261 | +function scenario_many_aliased(; M::Int = 200, K::Int = 8) |
| 262 | + V = JuMP.VariableRef |
| 263 | + return function () |
| 264 | + model = Model() |
| 265 | + @variable(model, x[1:M]) |
| 266 | + for i in 1:M |
| 267 | + e = sin(x[i]) |
| 268 | + for _ in 1:K |
| 269 | + e = JuMP.GenericNonlinearExpr{V}(:+, e, e) |
| 270 | + end |
| 271 | + @constraint(model, e <= 1) |
| 272 | + end |
| 273 | + return model |
| 274 | + end |
| 275 | +end |
| 276 | + |
| 277 | +# --------------------------------------------------------------------------- |
| 278 | +# Runner |
| 279 | +# --------------------------------------------------------------------------- |
| 280 | + |
| 281 | +const SCENARIOS = [ |
| 282 | + ("A: aliased tree (one big DAG, K=14)", scenario_aliased_tree(K = 14)), |
| 283 | + ("B: many independent constraints (N=5000)", scenario_many_independent(N = 5_000)), |
| 284 | + ("C: shared big subexpr (N=200, M=200)", scenario_shared_big(N = 200, M = 200)), |
| 285 | + ("D: many aliased trees (M=200, K=8)", scenario_many_aliased(M = 200, K = 8)), |
| 286 | +] |
| 287 | + |
| 288 | +const MODES = [:none, :per_call_oid, :model_struct, :model_oid] |
| 289 | + |
| 290 | +function run_all() |
| 291 | + println("Benchmarking PR #4032 cache strategies\n") |
| 292 | + for (label, build) in SCENARIOS |
| 293 | + println("="^72) |
| 294 | + println(label) |
| 295 | + println("="^72) |
| 296 | + # Warm up each mode (compile its method specializations) before |
| 297 | + # timing, so the first sample isn't biased by JIT. |
| 298 | + for m in MODES |
| 299 | + BENCH_MODE[] = m |
| 300 | + try |
| 301 | + build() |
| 302 | + catch err |
| 303 | + @warn "warm-up failed for mode $m" exception = err |
| 304 | + end |
| 305 | + end |
| 306 | + results = Dict{Symbol,Any}() |
| 307 | + for m in MODES |
| 308 | + BENCH_MODE[] = m |
| 309 | + b = @benchmark $build() samples = 5 evals = 1 seconds = 30 |
| 310 | + results[m] = b |
| 311 | + t_ms = minimum(b).time / 1e6 |
| 312 | + mem_mb = minimum(b).memory / 1024^2 |
| 313 | + allocs = minimum(b).allocs |
| 314 | + @printf(" %-13s %10.2f ms %9.2f MiB %12d allocs\n", |
| 315 | + string(m), t_ms, mem_mb, allocs) |
| 316 | + end |
| 317 | + # Ratios versus the branch's model_struct baseline so we can see |
| 318 | + # where each strategy wins, ties, or loses. |
| 319 | + base = minimum(results[:model_struct]).time |
| 320 | + println() |
| 321 | + for m in MODES |
| 322 | + r = minimum(results[m]).time / base |
| 323 | + @printf(" ratio(%-13s / model_struct) = %.2f\n", string(m), r) |
| 324 | + end |
| 325 | + println() |
| 326 | + end |
| 327 | +end |
| 328 | + |
| 329 | +if abspath(PROGRAM_FILE) == @__FILE__ |
| 330 | + run_all() |
| 331 | +end |
0 commit comments