Skip to content

Commit 5381881

Browse files
committed
feat: enable memory profiling
1 parent ef1d67e commit 5381881

10 files changed

Lines changed: 549 additions & 6 deletions

File tree

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ set(mi_sources
6767
src/options.c
6868
src/os.c
6969
src/page.c
70+
src/profile.c
7071
src/random.c
7172
src/segment.c
7273
src/segment-map.c
@@ -727,7 +728,7 @@ if (MI_BUILD_TESTS)
727728
enable_testing()
728729

729730
# static link tests
730-
foreach(TEST_NAME api api-fill stress)
731+
foreach(TEST_NAME api api-fill stress profile)
731732
add_executable(mimalloc-test-${TEST_NAME} test/test-${TEST_NAME}.c)
732733
target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines})
733734
target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags})

include/mimalloc/internal.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,5 +1161,43 @@ static inline void _mi_memzero_aligned(void* dst, size_t n) {
11611161
}
11621162
#endif
11631163

1164+
// ------------------------------------------------------------------
1165+
// Heap profiler fast-path hooks (profile.c / profile.h)
1166+
//
1167+
// Inlined here so that the common case (profiling disabled or no
1168+
// sample due) adds only a handful of instructions at each call site.
1169+
// The slow paths are noinline and defined in profile.c.
1170+
//
1171+
// Thread safety: see profile.h for the acquire/release protocol on
1172+
// _mi_profiler.enabled.
1173+
// ------------------------------------------------------------------
1174+
#include "mimalloc/profile.h"
1175+
1176+
static inline void _mi_profiler_on_alloc(mi_heap_t* heap, mi_page_t* page, void* ptr, size_t size) {
1177+
// Relaxed load: we only need to know whether to do any work. The acquire
1178+
// that synchronizes on_alloc/on_free/record_extra_bytes visibility is in
1179+
// the slow path, which is the first place we actually read those fields.
1180+
if mi_likely(!mi_atomic_load_relaxed(&_mi_profiler.enabled)) return;
1181+
mi_profiler_tld_t* ptld = &heap->tld->profiler;
1182+
if (ptld->in_profiler) return;
1183+
ptld->bytes_since_sample += size; // accumulates requested_size, not usable_size
1184+
if mi_likely(ptld->bytes_since_sample < ptld->next_threshold) return;
1185+
_mi_profiler_on_alloc_slow(heap, page, ptr, size);
1186+
}
1187+
1188+
static inline void _mi_profiler_on_free_local(mi_page_t* page, void* ptr) {
1189+
// No acquire load on enabled: has_metadata is only ever set by the owning thread
1190+
// after it observed enabled=true via an acquire load in _mi_profiler_on_alloc.
1191+
// If has_metadata is true, all profiler fields are already visible to this thread.
1192+
if mi_likely(!page->has_metadata) return;
1193+
_mi_profiler_on_free_local_slow(page, ptr);
1194+
}
1195+
1196+
static inline void _mi_profiler_on_free_collected(mi_page_t* page, mi_block_t* head) {
1197+
// Same argument as _mi_profiler_on_free_local.
1198+
if mi_likely(!page->has_metadata) return;
1199+
_mi_profiler_on_free_collected_slow(page, head);
1200+
}
1201+
11641202

11651203
#endif

include/mimalloc/profile.h

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/* ----------------------------------------------------------------------------
2+
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
3+
This is free software; you can redistribute it and/or modify it under the
4+
terms of the MIT license. A copy of the license can be found in the file
5+
"LICENSE" at the root of this distribution.
6+
-----------------------------------------------------------------------------*/
7+
#pragma once
8+
#ifndef MIMALLOC_PROFILE_H
9+
#define MIMALLOC_PROFILE_H
10+
11+
#include <stddef.h>
12+
#include <stdint.h>
13+
#include <stdbool.h>
14+
#include "mimalloc/atomic.h"
15+
16+
// Forward declarations — full types come from types.h / internal.h.
17+
typedef struct mi_page_s mi_page_t;
18+
typedef struct mi_block_s mi_block_t;
19+
typedef struct mi_heap_s mi_heap_t;
20+
21+
// ------------------------------------------------------------------
22+
// Allocation record: one node per sampled live allocation, stored in
23+
// a singly-linked list at page->metadata. Opaque to callers; the
24+
// callbacks receive user_data directly.
25+
//
26+
// `ptr` — the sampled user pointer; used internally to match frees.
27+
// `user_data` — flexible array member for caller-owned metadata.
28+
// The number of bytes available is
29+
// _mi_profiler.record_extra_bytes, set at
30+
// mi_profiler_enable() time. Typical uses: a captured
31+
// stack trace, allocation size and weight for on_free,
32+
// a pointer to an external profiler node, or a timestamp.
33+
// The profiler does not initialize this region and never
34+
// reads it.
35+
// ------------------------------------------------------------------
36+
typedef struct mi_alloc_record_s {
37+
void* ptr;
38+
struct mi_alloc_record_s* next;
39+
char user_data[]; // length = _mi_profiler.record_extra_bytes
40+
} mi_alloc_record_t;
41+
42+
// ------------------------------------------------------------------
43+
// User-supplied callbacks.
44+
//
45+
// on_alloc: called when a sample is taken.
46+
// `user_data` — caller-owned region (record_extra_bytes bytes);
47+
// may write anything here for use in on_free.
48+
// NULL if on_free is not set (record_extra_bytes must
49+
// be 0 in that case) or if record_extra_bytes is 0.
50+
// `ptr` — the sampled user pointer.
51+
// `requested_size` — size passed by the caller to malloc/calloc/etc.
52+
// `usable_size` — actual usable bytes after size-class rounding;
53+
// reflects true memory consumption.
54+
// `threshold` — the threshold (bytes) that triggered this sample.
55+
// `bytes_since_last_sample` — bytes accumulated since the last sample; the
56+
// statistical weight of this sample.
57+
// `heap_tag` — tag of the heap that made the allocation, set via
58+
// mi_heap_new_ex(). Zero for the default heap.
59+
// Returns the number of bytes to accumulate before the next sample.
60+
// Returning 0 causes the next allocation to be sampled immediately.
61+
//
62+
// on_free: called when a sampled allocation is freed.
63+
// `user_data` — the same region written during on_alloc.
64+
// `ptr` — the freed user pointer.
65+
// May be NULL if free-time notification is not needed.
66+
// ------------------------------------------------------------------
67+
typedef size_t (*mi_profiler_alloc_cb)(void* user_data, void* ptr, size_t requested_size, size_t usable_size, size_t threshold, size_t bytes_since_last_sample, uint8_t heap_tag);
68+
typedef void (*mi_profiler_free_cb)(void* user_data, void* ptr);
69+
70+
// ------------------------------------------------------------------
71+
// Global profiler configuration.
72+
//
73+
// Profiling is one-way: once enabled it cannot be disabled.
74+
//
75+
// `enabled` is _Atomic(bool) so that mi_profiler_enable() can be called
76+
// from any thread. The store uses release order; reads in the inline
77+
// fast-path hooks (in internal.h) use relaxed order (sufficient to decide
78+
// whether to do any work); the slow path uses acquire order to ensure
79+
// on_alloc, on_free, and record_extra_bytes are visible before they are read.
80+
// ------------------------------------------------------------------
81+
typedef struct mi_profiler_s {
82+
_Atomic(bool) enabled;
83+
mi_profiler_alloc_cb on_alloc; // non-NULL when enabled=true
84+
mi_profiler_free_cb on_free; // may be NULL
85+
size_t record_extra_bytes; // bytes allocated after each mi_alloc_record_t for user_data
86+
} mi_profiler_t;
87+
88+
extern mi_profiler_t _mi_profiler;
89+
90+
// ------------------------------------------------------------------
91+
// Public API — must be called at most once. May be called from any
92+
// thread, before or after other threads have started. Each thread
93+
// samples its first allocation immediately; the on_alloc callback
94+
// controls all subsequent thresholds.
95+
// ------------------------------------------------------------------
96+
void mi_profiler_enable(size_t record_extra_bytes, mi_profiler_alloc_cb on_alloc, mi_profiler_free_cb on_free);
97+
98+
// ------------------------------------------------------------------
99+
// Slow-path implementations (defined in profile.c).
100+
// The inline fast-path wrappers are in internal.h so they have
101+
// access to the full type definitions they need.
102+
// ------------------------------------------------------------------
103+
void _mi_profiler_on_alloc_slow(mi_heap_t* heap, mi_page_t* page, void* ptr, size_t size);
104+
void _mi_profiler_on_free_local_slow(mi_page_t* page, void* ptr);
105+
void _mi_profiler_on_free_collected_slow(mi_page_t* page, mi_block_t* head);
106+
107+
#endif // MIMALLOC_PROFILE_H

include/mimalloc/types.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,9 @@ typedef uintptr_t mi_thread_free_t;
316316
// at least one block that will be added, or as already been added, to
317317
// the owning heap `thread_delayed_free` list. This guarantees that pages
318318
// will be freed correctly even if only other threads free blocks.
319+
// Forward declaration for the profiler record list stored in page->metadata.
320+
struct mi_alloc_record_s;
321+
319322
typedef struct mi_page_s {
320323
// "owned" by the segment
321324
uint32_t slice_count; // slices in this page (0 if not a page)
@@ -329,7 +332,9 @@ typedef struct mi_page_s {
329332
uint16_t reserved; // number of blocks reserved in memory
330333
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
331334
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
332-
uint8_t retire_expire:7; // expiration count for retired blocks
335+
uint8_t has_metadata:1; // `true` if page->metadata is non-NULL; on the same cache line as
336+
// the hot free fields to avoid a cache miss on every deallocation
337+
uint8_t retire_expire:6; // expiration count for retired blocks (max value is MI_RETIRE_CYCLES=16, so 6 bits suffices)
333338

334339
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
335340
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
@@ -351,7 +356,12 @@ typedef struct mi_page_s {
351356
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
352357

353358
// 64-bit 11 words, 32-bit 13 words, (+2 for secure)
354-
void* padding[1];
359+
// `metadata` is the head of a singly-linked list of profiler allocation
360+
// records (mi_alloc_record_t, defined in profile.h). It is zeroed when
361+
// the page is cleared (segment.c:mi_segment_page_clear) and must be
362+
// re-initialized when the page is reused. NULL when profiling is off or
363+
// no sampled allocations are live on this page.
364+
struct mi_alloc_record_s* metadata;
355365
} mi_page_t;
356366

357367

@@ -623,13 +633,21 @@ typedef struct mi_segments_tld_s {
623633
} mi_segments_tld_t;
624634

625635
// Thread local data
636+
// Per-thread profiler state (see include/mimalloc/profile.h)
637+
typedef struct mi_profiler_tld_s {
638+
size_t bytes_since_sample; // bytes allocated since the last sample
639+
size_t next_threshold; // sample when bytes_since_sample reaches this value
640+
bool in_profiler; // reentrancy guard: skip profiling inside profiler code
641+
} mi_profiler_tld_t;
642+
626643
struct mi_tld_s {
627644
unsigned long long heartbeat; // monotonic heartbeat count
628645
bool recurse; // true if deferred was called; used to prevent infinite recursion.
629646
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
630647
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
631648
mi_segments_tld_t segments; // segment tld
632649
mi_stats_t stats; // statistics
650+
mi_profiler_tld_t profiler; // heap profiler state
633651
};
634652

635653

src/alloc.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
111111
#endif
112112
#endif
113113

114+
_mi_profiler_on_alloc(heap, page, block, size - MI_PADDING_SIZE);
114115
return block;
115116
}
116117

@@ -303,6 +304,10 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero,
303304
// mi_track_resize(p,size,newsize)
304305
// if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); }
305306
if (usable_post!=NULL) { *usable_post = mi_page_usable_block_size(page); }
307+
// TODO(profiler): if p has a live profiler record, notify the profiler of the
308+
// resize so it can update the recorded size. The allocate-and-copy path below
309+
// is handled correctly because it goes through mi_free + mi_heap_umalloc which
310+
// hit the existing on_free and on_alloc hooks.
306311
return p; // reallocation still fits and not more than 50% waste
307312
}
308313
void* newp = mi_heap_umalloc(heap,newsize,usable_post);

src/free.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
3434
// checks
3535
if mi_unlikely(mi_check_is_double_free(page, block)) return;
3636
if (!was_guarded) { mi_check_padding(page, block); }
37+
// Profiler hook fires before any page state (local_free, used) is modified,
38+
// so page is fully consistent. mi_record_free may call mi_free internally,
39+
// which is safe because in_profiler suppresses recursion and the page is
40+
// in a valid pre-free state.
41+
_mi_profiler_on_free_local(page, block);
3742
if (track_stats) { mi_stat_free(page, block); }
3843
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
3944
if (!mi_page_is_huge(page)) { // huge page content may be already decommitted

src/init.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const mi_page_t _mi_page_empty = {
2020
0, // reserved capacity
2121
{ 0 }, // flags
2222
false, // is_zero
23+
false, // has_metadata
2324
0, // retire_expire
2425
NULL, // free
2526
NULL, // local_free
@@ -34,7 +35,7 @@ const mi_page_t _mi_page_empty = {
3435
MI_ATOMIC_VAR_INIT(0), // xthread_free
3536
MI_ATOMIC_VAR_INIT(0), // xheap
3637
NULL, NULL
37-
, { 0 } // padding
38+
, NULL // metadata
3839
};
3940

4041
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
@@ -140,7 +141,8 @@ mi_decl_cache_align static const mi_tld_t tld_empty = {
140141
false,
141142
NULL, NULL,
142143
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, tld_empty_stats }, // segments
143-
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL } // stats
144+
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats
145+
{ 0, 0, false } // profiler
144146
};
145147

146148
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
@@ -156,7 +158,8 @@ static mi_decl_cache_align mi_tld_t tld_main = {
156158
0, false,
157159
&_mi_heap_main, & _mi_heap_main,
158160
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, &tld_main.stats }, // segments
159-
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL } // stats
161+
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats
162+
{ 0, 0, false } // profiler
160163
};
161164

162165
mi_decl_cache_align mi_heap_t _mi_heap_main = {

src/page.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
206206
return; // the thread-free items cannot be freed
207207
}
208208

209+
_mi_profiler_on_free_collected(page, head);
210+
209211
// and append the current local free list
210212
mi_block_set_next(page,tail, page->local_free);
211213
page->local_free = head;

0 commit comments

Comments
 (0)