Skip to content

Commit 505cc36

Browse files
jack-champagneroot
authored andcommitted
bench: analyzer + coerce MadNLP.ERROR to Int
analyze_allocs.jl ranks alloc hotspots by scaled bytes across all profile JLD2s in benchmark/results/allocs/ with noise-frame filtering. Companion to the existing alloc_profile.jl driver. alloc_profile.jl: coerce MadNLP.ERROR to Int for MadNLPOptions — symmetric to Piccolissimo commit 472fe4d.
1 parent 8b329a5 commit 505cc36

2 files changed

Lines changed: 137 additions & 1 deletion

File tree

benchmark/alloc_profile.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ function main(; N = 10, sample_rate = 0.01)
117117

118118
madnlp_profile, madnlp_path = profile_solver(;
119119
solver_name = "MadNLP",
120-
options_ctor = () -> MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = MadNLP.ERROR),
120+
options_ctor = () -> MadNLPSolverExt.MadNLPOptions(max_iter = 50, print_level = Int(MadNLP.ERROR)),
121121
N = N,
122122
sample_rate = sample_rate,
123123
)

benchmark/analyze_allocs.jl

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
using HarmoniqsBenchmarks
2+
using Printf
3+
4+
const DEFAULT_RESULTS_DIR = joinpath(@__DIR__, "results", "allocs")
5+
results_dir() = isempty(ARGS) ? DEFAULT_RESULTS_DIR : ARGS[1]
6+
7+
# Noise filters — frames / types from Profile.Allocs itself or the Julia
8+
# toplevel/runtime that do not tell us anything about user-code hotpaths.
9+
const NOISE_FRAME_PATTERNS = [
10+
"Profile.Allocs",
11+
"gc-alloc-profiler",
12+
"gc-stock.c",
13+
"gc.c:",
14+
"jl_apply",
15+
"jl_toplevel_",
16+
"ijl_toplevel_",
17+
"jl_interpret_toplevel_thunk",
18+
"jl_repl_entrypoint",
19+
"interpreter.c",
20+
"_include(",
21+
"include_string(",
22+
"loading.jl",
23+
"client.jl",
24+
"_start() at sys.so",
25+
"ip:0x",
26+
"_start at ",
27+
" at Base.jl:",
28+
"true_main at jlapi.c",
29+
"__libc_start_main",
30+
"loader_exe.c",
31+
"jl_system_image_data",
32+
"macro expansion at Allocs.jl",
33+
"boot.jl:",
34+
"jl_f__call_latest",
35+
]
36+
37+
const WRAPPER_FRAME_PATTERNS = [
38+
"alloc_profile.jl",
39+
"benchmark_memory!",
40+
"HarmoniqsBenchmarks",
41+
]
42+
43+
const NOISE_TYPE_PATTERNS = [
44+
"Profile.Allocs",
45+
]
46+
47+
_is_noise_frame(f) = any(p -> occursin(p, f), NOISE_FRAME_PATTERNS)
48+
_is_noise_type(t) = any(p -> occursin(p, t), NOISE_TYPE_PATTERNS)
49+
50+
function _first_user_frame(stack)
51+
for f in stack
52+
_is_noise_frame(f) && continue
53+
any(p -> occursin(p, f), WRAPPER_FRAME_PATTERNS) && continue
54+
return f
55+
end
56+
return isempty(stack) ? "<empty>" : stack[end]
57+
end
58+
59+
_is_wrapper_frame(f) = any(p -> occursin(p, f), WRAPPER_FRAME_PATTERNS)
60+
61+
function top_frames(profile; k = 25, scale_to_total = true, drop_wrappers = true)
62+
by_frame = Dict{String, Tuple{Int, Int}}()
63+
for s in profile.samples
64+
_is_noise_type(s.type_name) && continue
65+
for frame in s.stacktrace
66+
_is_noise_frame(frame) && continue
67+
drop_wrappers && _is_wrapper_frame(frame) && continue
68+
cnt, bytes = get(by_frame, frame, (0, 0))
69+
by_frame[frame] = (cnt + 1, bytes + s.size_bytes)
70+
end
71+
end
72+
ranked = sort(collect(by_frame); by = x -> -x[2][2])[1:min(k, length(by_frame))]
73+
scale = scale_to_total ? (1 / profile.sample_rate) : 1.0
74+
println("\nTop $(length(ranked)) user frames by allocated bytes (scaled ×$(Int(scale))):")
75+
println(rpad(" bytes", 14), rpad("samples", 10), "frame")
76+
for (frame, (cnt, bytes)) in ranked
77+
@printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(frame, 140)
78+
end
79+
end
80+
81+
function top_leaf_callsites(profile; k = 25, scale_to_total = true)
82+
by_leaf = Dict{String, Tuple{Int, Int}}()
83+
for s in profile.samples
84+
_is_noise_type(s.type_name) && continue
85+
leaf = _first_user_frame(s.stacktrace)
86+
cnt, bytes = get(by_leaf, leaf, (0, 0))
87+
by_leaf[leaf] = (cnt + 1, bytes + s.size_bytes)
88+
end
89+
ranked = sort(collect(by_leaf); by = x -> -x[2][2])[1:min(k, length(by_leaf))]
90+
scale = scale_to_total ? (1 / profile.sample_rate) : 1.0
91+
println("\nTop $(length(ranked)) leaf call sites by allocated bytes (scaled ×$(Int(scale))):")
92+
println(rpad(" bytes", 14), rpad("samples", 10), "leaf")
93+
for (leaf, (cnt, bytes)) in ranked
94+
@printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(leaf, 140)
95+
end
96+
end
97+
98+
function top_types(profile; k = 15, scale_to_total = true)
99+
by_type = Dict{String, Tuple{Int, Int}}()
100+
for s in profile.samples
101+
_is_noise_type(s.type_name) && continue
102+
cnt, bytes = get(by_type, s.type_name, (0, 0))
103+
by_type[s.type_name] = (cnt + 1, bytes + s.size_bytes)
104+
end
105+
ranked = sort(collect(by_type); by = x -> -x[2][2])[1:min(k, length(by_type))]
106+
scale = scale_to_total ? (1 / profile.sample_rate) : 1.0
107+
println("\nTop $(length(ranked)) allocated types (scaled ×$(Int(scale))):")
108+
println(rpad(" bytes", 14), rpad("samples", 10), "type")
109+
for (t, (cnt, bytes)) in ranked
110+
@printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(t, 120)
111+
end
112+
end
113+
114+
_fmt_bytes(b) = b >= 1 << 30 ? @sprintf("%.2f GB", b / (1 << 30)) :
115+
b >= 1 << 20 ? @sprintf("%.2f MB", b / (1 << 20)) :
116+
b >= 1 << 10 ? @sprintf("%.2f KB", b / (1 << 10)) :
117+
@sprintf("%d B", Int(round(b)))
118+
119+
_truncate(s, n) = length(s) <= n ? s : string(first(s, n - 1), "")
120+
121+
function main()
122+
dir = results_dir()
123+
files = sort(filter(f -> endswith(f, "_allocs.jld2"), readdir(dir; join = true)))
124+
isempty(files) && (println("no *_allocs.jld2 files under $dir"); return)
125+
for path in files
126+
profile = load_alloc_profile(path)
127+
println("=" ^ 100)
128+
println(basename(path))
129+
@printf " solver=%s N=%d sample_rate=%g samples=%d total=%s\n" profile.solver profile.N profile.sample_rate profile.total_count _fmt_bytes(profile.total_bytes)
130+
top_types(profile; k = 10)
131+
top_leaf_callsites(profile; k = 20)
132+
top_frames(profile; k = 20)
133+
end
134+
end
135+
136+
main()

0 commit comments

Comments
 (0)