|
| 1 | +using HarmoniqsBenchmarks |
| 2 | +using Printf |
| 3 | + |
| 4 | +const DEFAULT_RESULTS_DIR = joinpath(@__DIR__, "results", "allocs") |
| 5 | +results_dir() = isempty(ARGS) ? DEFAULT_RESULTS_DIR : ARGS[1] |
| 6 | + |
| 7 | +# Noise filters — frames / types from Profile.Allocs itself or the Julia |
| 8 | +# toplevel/runtime that do not tell us anything about user-code hotpaths. |
| 9 | +const NOISE_FRAME_PATTERNS = [ |
| 10 | + "Profile.Allocs", |
| 11 | + "gc-alloc-profiler", |
| 12 | + "gc-stock.c", |
| 13 | + "gc.c:", |
| 14 | + "jl_apply", |
| 15 | + "jl_toplevel_", |
| 16 | + "ijl_toplevel_", |
| 17 | + "jl_interpret_toplevel_thunk", |
| 18 | + "jl_repl_entrypoint", |
| 19 | + "interpreter.c", |
| 20 | + "_include(", |
| 21 | + "include_string(", |
| 22 | + "loading.jl", |
| 23 | + "client.jl", |
| 24 | + "_start() at sys.so", |
| 25 | + "ip:0x", |
| 26 | + "_start at ", |
| 27 | + " at Base.jl:", |
| 28 | + "true_main at jlapi.c", |
| 29 | + "__libc_start_main", |
| 30 | + "loader_exe.c", |
| 31 | + "jl_system_image_data", |
| 32 | + "macro expansion at Allocs.jl", |
| 33 | + "boot.jl:", |
| 34 | + "jl_f__call_latest", |
| 35 | +] |
| 36 | + |
| 37 | +const WRAPPER_FRAME_PATTERNS = [ |
| 38 | + "alloc_profile.jl", |
| 39 | + "benchmark_memory!", |
| 40 | + "HarmoniqsBenchmarks", |
| 41 | +] |
| 42 | + |
| 43 | +const NOISE_TYPE_PATTERNS = [ |
| 44 | + "Profile.Allocs", |
| 45 | +] |
| 46 | + |
| 47 | +_is_noise_frame(f) = any(p -> occursin(p, f), NOISE_FRAME_PATTERNS) |
| 48 | +_is_noise_type(t) = any(p -> occursin(p, t), NOISE_TYPE_PATTERNS) |
| 49 | + |
| 50 | +function _first_user_frame(stack) |
| 51 | + for f in stack |
| 52 | + _is_noise_frame(f) && continue |
| 53 | + any(p -> occursin(p, f), WRAPPER_FRAME_PATTERNS) && continue |
| 54 | + return f |
| 55 | + end |
| 56 | + return isempty(stack) ? "<empty>" : stack[end] |
| 57 | +end |
| 58 | + |
| 59 | +_is_wrapper_frame(f) = any(p -> occursin(p, f), WRAPPER_FRAME_PATTERNS) |
| 60 | + |
| 61 | +function top_frames(profile; k = 25, scale_to_total = true, drop_wrappers = true) |
| 62 | + by_frame = Dict{String, Tuple{Int, Int}}() |
| 63 | + for s in profile.samples |
| 64 | + _is_noise_type(s.type_name) && continue |
| 65 | + for frame in s.stacktrace |
| 66 | + _is_noise_frame(frame) && continue |
| 67 | + drop_wrappers && _is_wrapper_frame(frame) && continue |
| 68 | + cnt, bytes = get(by_frame, frame, (0, 0)) |
| 69 | + by_frame[frame] = (cnt + 1, bytes + s.size_bytes) |
| 70 | + end |
| 71 | + end |
| 72 | + ranked = sort(collect(by_frame); by = x -> -x[2][2])[1:min(k, length(by_frame))] |
| 73 | + scale = scale_to_total ? (1 / profile.sample_rate) : 1.0 |
| 74 | + println("\nTop $(length(ranked)) user frames by allocated bytes (scaled ×$(Int(scale))):") |
| 75 | + println(rpad(" bytes", 14), rpad("samples", 10), "frame") |
| 76 | + for (frame, (cnt, bytes)) in ranked |
| 77 | + @printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(frame, 140) |
| 78 | + end |
| 79 | +end |
| 80 | + |
| 81 | +function top_leaf_callsites(profile; k = 25, scale_to_total = true) |
| 82 | + by_leaf = Dict{String, Tuple{Int, Int}}() |
| 83 | + for s in profile.samples |
| 84 | + _is_noise_type(s.type_name) && continue |
| 85 | + leaf = _first_user_frame(s.stacktrace) |
| 86 | + cnt, bytes = get(by_leaf, leaf, (0, 0)) |
| 87 | + by_leaf[leaf] = (cnt + 1, bytes + s.size_bytes) |
| 88 | + end |
| 89 | + ranked = sort(collect(by_leaf); by = x -> -x[2][2])[1:min(k, length(by_leaf))] |
| 90 | + scale = scale_to_total ? (1 / profile.sample_rate) : 1.0 |
| 91 | + println("\nTop $(length(ranked)) leaf call sites by allocated bytes (scaled ×$(Int(scale))):") |
| 92 | + println(rpad(" bytes", 14), rpad("samples", 10), "leaf") |
| 93 | + for (leaf, (cnt, bytes)) in ranked |
| 94 | + @printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(leaf, 140) |
| 95 | + end |
| 96 | +end |
| 97 | + |
| 98 | +function top_types(profile; k = 15, scale_to_total = true) |
| 99 | + by_type = Dict{String, Tuple{Int, Int}}() |
| 100 | + for s in profile.samples |
| 101 | + _is_noise_type(s.type_name) && continue |
| 102 | + cnt, bytes = get(by_type, s.type_name, (0, 0)) |
| 103 | + by_type[s.type_name] = (cnt + 1, bytes + s.size_bytes) |
| 104 | + end |
| 105 | + ranked = sort(collect(by_type); by = x -> -x[2][2])[1:min(k, length(by_type))] |
| 106 | + scale = scale_to_total ? (1 / profile.sample_rate) : 1.0 |
| 107 | + println("\nTop $(length(ranked)) allocated types (scaled ×$(Int(scale))):") |
| 108 | + println(rpad(" bytes", 14), rpad("samples", 10), "type") |
| 109 | + for (t, (cnt, bytes)) in ranked |
| 110 | + @printf " %-12s %-8d %s\n" _fmt_bytes(bytes * scale) cnt _truncate(t, 120) |
| 111 | + end |
| 112 | +end |
| 113 | + |
| 114 | +_fmt_bytes(b) = b >= 1 << 30 ? @sprintf("%.2f GB", b / (1 << 30)) : |
| 115 | + b >= 1 << 20 ? @sprintf("%.2f MB", b / (1 << 20)) : |
| 116 | + b >= 1 << 10 ? @sprintf("%.2f KB", b / (1 << 10)) : |
| 117 | + @sprintf("%d B", Int(round(b))) |
| 118 | + |
| 119 | +_truncate(s, n) = length(s) <= n ? s : string(first(s, n - 1), "…") |
| 120 | + |
| 121 | +function main() |
| 122 | + dir = results_dir() |
| 123 | + files = sort(filter(f -> endswith(f, "_allocs.jld2"), readdir(dir; join = true))) |
| 124 | + isempty(files) && (println("no *_allocs.jld2 files under $dir"); return) |
| 125 | + for path in files |
| 126 | + profile = load_alloc_profile(path) |
| 127 | + println("=" ^ 100) |
| 128 | + println(basename(path)) |
| 129 | + @printf " solver=%s N=%d sample_rate=%g samples=%d total=%s\n" profile.solver profile.N profile.sample_rate profile.total_count _fmt_bytes(profile.total_bytes) |
| 130 | + top_types(profile; k = 10) |
| 131 | + top_leaf_callsites(profile; k = 20) |
| 132 | + top_frames(profile; k = 20) |
| 133 | + end |
| 134 | +end |
| 135 | + |
| 136 | +main() |
0 commit comments