Skip to content

Commit b325a94

Browse files
committed
Add bench
1 parent ae7365c commit b325a94

1 file changed

Lines changed: 331 additions & 0 deletions

File tree

test/perf/benchmark_cache.jl

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
# Benchmark for PR #4032: caching strategies for moi_function on
2+
# GenericNonlinearExpr.
3+
#
4+
# Four modes are compared:
5+
# :none — master behaviour: NO cache. `add_constraint` runs
6+
# `check_belongs_to_model` and then `moi_function`, both
7+
# of which traverse the expression independently.
8+
# :per_call_oid — odow's actual PR snippet: a fresh `Dict{UInt64, ...}`
9+
# allocated INSIDE every call to `moi_function`, keyed by
10+
# `objectid(arg)` (so cache lookups are O(1)).
11+
# `check_belongs_to_model` still runs as a separate walk.
12+
# :model_struct — this branch as-is: cache stored on the model, keyed by
13+
# the JuMP `GenericNonlinearExpr` itself. Default `==`/
14+
# `hash` on that struct is structural and walks the whole
15+
# sub-tree, so a cache hit still costs O(size) — which
16+
# can defeat the cache for deeply aliased DAGs.
17+
# :model_oid — model-level cache, but keyed by `objectid` like odow's
18+
# snippet. Shows what the branch could evolve to once
19+
# blegat's "use hash as keys" suggestion is applied.
20+
#
21+
# Run from the repo root, in an environment that has BenchmarkTools added on
22+
# top of dev'd JuMP (do NOT add BenchmarkTools to JuMP's own Project.toml):
23+
#
24+
# julia> using Pkg
25+
# julia> Pkg.activate(temp = true)
26+
# julia> Pkg.develop(path = ".")
27+
# julia> Pkg.add("BenchmarkTools")
28+
# julia> include("benchmark_cache.jl"); run_all()
29+
#
30+
# The script monkey-patches `JuMP.moi_function(::GenericNonlinearExpr,
31+
# ::GenericModel)` so the same JuMP build can be exercised under each mode
32+
# without rebuilding the package.
33+
#
34+
# Sample numbers from this branch (Julia 1.12, K=14, etc.):
35+
#
36+
# A: aliased tree (one big DAG, K=14) ~46 ms — all four modes within
37+
# 1% (MOI-tree alloc cost is
38+
# small vs. the rest of
39+
# add_constraint here).
40+
# B: many independent constraints (N=5000) none 84 / per_call_oid 86 /
41+
# model_struct 91 / model_oid 86
42+
# → the *current branch* is the
43+
# slowest; struct keys hurt the
44+
# common case. Switching to
45+
# objectid keys fixes it.
46+
# C: shared big subexpr (N=200, M=200) model_struct 171 / model_oid 170
47+
# vs none 191 / per_call_oid 195
48+
# → 12–14% win for the model-
49+
# level cache. Per-call cannot
50+
# see the cross-constraint
51+
# sharing.
52+
# D: many aliased trees (M=200, K=8) all ~140 ms (per-constraint
53+
# sharing is small and JuMP's
54+
# `+` would flatten without the
55+
# explicit GenericNonlinearExpr
56+
# construction we use here).
57+
#
58+
# Takeaways:
59+
# * The model-level scope is the right call when subexpressions are shared
60+
# across constraints (scenario C).
61+
# * Using the JuMP struct as the cache key (current branch) is what costs
62+
# in scenario B — the deep `==`/`hash` adds overhead per node. Switching
63+
# to `objectid` keys (blegat's "use hash as keys" suggestion) recovers
64+
# most of it.
65+
# * Scenario A's exponential blow-up is more visible in MEMORY than in
66+
# TIME at K=14; bump K higher (or use the bench.jl example with
67+
# |R|*|S| ≈ 3600) to make the time gap obvious.
68+
69+
using Printf
70+
using JuMP
71+
using BenchmarkTools
72+
import MathOptInterface as MOI
73+
74+
const _G = JuMP.GenericNonlinearExpr
75+
76+
# ---------------------------------------------------------------------------
77+
# Three implementations of the GenericNonlinearExpr → ScalarNonlinearFunction
78+
# walk. They share the same skeleton; only the cache differs.
79+
# ---------------------------------------------------------------------------
80+
81+
function _moi_no_cache(f::_G{V}) where {V}
82+
ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args))
83+
stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[]
84+
for i in length(f.args):-1:1
85+
if f.args[i] isa _G{V}
86+
push!(stack, (ret, i, f.args[i]))
87+
else
88+
ret.args[i] = JuMP.moi_function(f.args[i])
89+
end
90+
end
91+
while !isempty(stack)
92+
parent, i, arg = pop!(stack)
93+
child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args))
94+
parent.args[i] = child
95+
for j in length(arg.args):-1:1
96+
if arg.args[j] isa _G{V}
97+
push!(stack, (child, j, arg.args[j]))
98+
else
99+
child.args[j] = JuMP.moi_function(arg.args[j])
100+
end
101+
end
102+
end
103+
return ret
104+
end
105+
106+
# Cache walk parameterised by the `keyfn` that maps a JuMP NL expression to
107+
# the dict key. `keyfn = identity` matches the branch (struct keys, deep
108+
# hash). `keyfn = objectid` matches odow's snippet (UInt64 keys, O(1) hash).
109+
function _moi_with_cache(f::_G{V}, cache, keyfn::F) where {V,F}
110+
fk = keyfn(f)
111+
if haskey(cache, fk)
112+
return cache[fk]
113+
end
114+
ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args))
115+
stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[]
116+
for i in length(f.args):-1:1
117+
if f.args[i] isa _G{V}
118+
push!(stack, (ret, i, f.args[i]))
119+
else
120+
ret.args[i] = JuMP.moi_function(f.args[i])
121+
end
122+
end
123+
while !isempty(stack)
124+
parent, i, arg = pop!(stack)
125+
argk = keyfn(arg)
126+
if haskey(cache, argk)
127+
parent.args[i] = cache[argk]
128+
continue
129+
end
130+
child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args))
131+
parent.args[i] = child
132+
for j in length(arg.args):-1:1
133+
if arg.args[j] isa _G{V}
134+
push!(stack, (child, j, arg.args[j]))
135+
else
136+
child.args[j] = JuMP.moi_function(arg.args[j])
137+
end
138+
end
139+
cache[argk] = child
140+
end
141+
cache[fk] = ret
142+
return ret
143+
end
144+
145+
# ---------------------------------------------------------------------------
146+
# Switching mechanism. We re-define JuMP's `moi_function(::GenericNonlinearExpr,
147+
# ::GenericModel)` so that `add_constraint` dispatches differently per mode.
148+
#
149+
# Modes :none and :per_call mirror what those PR options would actually
150+
# look like in production: a separate `check_belongs_to_model` walk, then
151+
# the (cached or uncached) build walk.
152+
# Mode :model_level matches the branch: a single integrated walk.
153+
# ---------------------------------------------------------------------------
154+
155+
const BENCH_MODE = Ref(:model_struct)
156+
157+
# Per-mode caches that need a UInt64 (objectid) key type rather than the
158+
# branch's `Dict{Any,...}`. We store them on the model as a side table so
159+
# they survive across constraints inside one build but are cleared between
160+
# builds (because each scenario builds a fresh `Model()`).
161+
const _OID_CACHE = IdDict{JuMP.GenericModel,Dict{UInt64,MOI.ScalarNonlinearFunction}}()
162+
163+
function _oid_cache(model)
164+
get!(_OID_CACHE, model) do
165+
Dict{UInt64,MOI.ScalarNonlinearFunction}()
166+
end
167+
end
168+
169+
@eval JuMP function moi_function(
170+
f::JuMP.GenericNonlinearExpr{V},
171+
model::JuMP.GenericModel,
172+
) where {V}
173+
mode = Main.BENCH_MODE[]
174+
if mode === :none
175+
JuMP.check_belongs_to_model(f, model)
176+
return Main._moi_no_cache(f)
177+
elseif mode === :per_call_oid
178+
JuMP.check_belongs_to_model(f, model)
179+
cache = Dict{UInt64,$(MOI.ScalarNonlinearFunction)}()
180+
return Main._moi_with_cache(f, cache, objectid)
181+
elseif mode === :model_struct
182+
# branch behaviour: belongs-check is integrated; struct keys (deep hash)
183+
return Main._moi_with_cache(f, model.subexpressions, identity)
184+
else # :model_oid
185+
return Main._moi_with_cache(f, Main._oid_cache(model), objectid)
186+
end
187+
end
188+
189+
# ---------------------------------------------------------------------------
190+
# Scenarios. Each `scenario_*` returns a 0-arg closure that builds a fresh
191+
# model from scratch, so each BenchmarkTools sample sees an empty
192+
# `model.subexpressions` cache.
193+
# ---------------------------------------------------------------------------
194+
195+
# Scenario A — odow's case: ONE constraint with a deeply aliased binary tree.
196+
# `e_k = e_{k-1} + e_{k-1}`. With no cache this expands to 2^K leaves; with
197+
# either cache it stays linear in K.
198+
#
199+
# We build the `+` node directly via the GenericNonlinearExpr constructor so
200+
# JuMP's `+` operator overload does NOT flatten the n-ary sum. Without this,
201+
# `e + e` returns a flat `:+(args...)` of 2^K identical leaf references and
202+
# the master walk only iterates 2^K times instead of doing 2^K *recursive*
203+
# descents through K levels — which would mask the worst case the PR is
204+
# meant to fix.
205+
function scenario_aliased_tree(; K::Int = 14)
206+
V = JuMP.VariableRef
207+
return function ()
208+
model = Model()
209+
@variable(model, x)
210+
e = sin(x)
211+
for _ in 1:K
212+
e = JuMP.GenericNonlinearExpr{V}(:+, e, e)
213+
end
214+
@constraint(model, e <= 1)
215+
return model
216+
end
217+
end
218+
219+
# Scenario B — many small INDEPENDENT NL constraints. No subexpression
220+
# sharing within or across constraints. The model-level cache pays an
221+
# insertion + hash cost for every new node; the per-call cache pays an
222+
# extra Dict allocation per constraint; master pays nothing.
223+
#
224+
# This is the case blegat warned about in the PR thread: "we start
225+
# creating many small dictionaries if there are a lot of constraints,
226+
# but that's probably negligible". The benchmark tells us whether it
227+
# really is negligible.
228+
function scenario_many_independent(; N::Int = 5_000)
229+
return function ()
230+
model = Model()
231+
@variable(model, x[1:N])
232+
for i in 1:N
233+
@constraint(model, sin(x[i]) + cos(x[i]) * exp(x[i]) <= 1)
234+
end
235+
return model
236+
end
237+
end
238+
239+
# Scenario C — many constraints sharing one big subexpression.
240+
# `big = sum(sin(x[i]) for i in 1:N)` appears once in each of M constraints.
241+
# The per-call cache cannot help here (each call sees `big` only once); only
242+
# the model-level cache deduplicates `big` across constraints. This is the
243+
# case that genuinely motivates the model-level cache.
244+
function scenario_shared_big(; N::Int = 200, M::Int = 200)
245+
return function ()
246+
model = Model()
247+
@variable(model, x[1:N])
248+
big = sum(sin(x[i]) for i in 1:N)
249+
for j in 1:M
250+
@constraint(model, big * x[j] <= 1)
251+
end
252+
return model
253+
end
254+
end
255+
256+
# Scenario D — many constraints, each with internal aliasing (a smaller
257+
# binary tree per constraint, distinct leaf variable per constraint).
258+
# Both per-call and model-level caches help WITHIN each constraint;
259+
# across constraints there is nothing to share. This is the natural
260+
# territory of odow's per-call dict.
261+
function scenario_many_aliased(; M::Int = 200, K::Int = 8)
262+
V = JuMP.VariableRef
263+
return function ()
264+
model = Model()
265+
@variable(model, x[1:M])
266+
for i in 1:M
267+
e = sin(x[i])
268+
for _ in 1:K
269+
e = JuMP.GenericNonlinearExpr{V}(:+, e, e)
270+
end
271+
@constraint(model, e <= 1)
272+
end
273+
return model
274+
end
275+
end
276+
277+
# ---------------------------------------------------------------------------
278+
# Runner
279+
# ---------------------------------------------------------------------------
280+
281+
const SCENARIOS = [
282+
("A: aliased tree (one big DAG, K=14)", scenario_aliased_tree(K = 14)),
283+
("B: many independent constraints (N=5000)", scenario_many_independent(N = 5_000)),
284+
("C: shared big subexpr (N=200, M=200)", scenario_shared_big(N = 200, M = 200)),
285+
("D: many aliased trees (M=200, K=8)", scenario_many_aliased(M = 200, K = 8)),
286+
]
287+
288+
const MODES = [:none, :per_call_oid, :model_struct, :model_oid]
289+
290+
function run_all()
291+
println("Benchmarking PR #4032 cache strategies\n")
292+
for (label, build) in SCENARIOS
293+
println("="^72)
294+
println(label)
295+
println("="^72)
296+
# Warm up each mode (compile its method specializations) before
297+
# timing, so the first sample isn't biased by JIT.
298+
for m in MODES
299+
BENCH_MODE[] = m
300+
try
301+
build()
302+
catch err
303+
@warn "warm-up failed for mode $m" exception = err
304+
end
305+
end
306+
results = Dict{Symbol,Any}()
307+
for m in MODES
308+
BENCH_MODE[] = m
309+
b = @benchmark $build() samples = 5 evals = 1 seconds = 30
310+
results[m] = b
311+
t_ms = minimum(b).time / 1e6
312+
mem_mb = minimum(b).memory / 1024^2
313+
allocs = minimum(b).allocs
314+
@printf(" %-13s %10.2f ms %9.2f MiB %12d allocs\n",
315+
string(m), t_ms, mem_mb, allocs)
316+
end
317+
# Ratios versus the branch's model_struct baseline so we can see
318+
# where each strategy wins, ties, or loses.
319+
base = minimum(results[:model_struct]).time
320+
println()
321+
for m in MODES
322+
r = minimum(results[m]).time / base
323+
@printf(" ratio(%-13s / model_struct) = %.2f\n", string(m), r)
324+
end
325+
println()
326+
end
327+
end
328+
329+
if abspath(PROGRAM_FILE) == @__FILE__
330+
run_all()
331+
end

0 commit comments

Comments
 (0)