Hi,
I am implementing snmalloc support for an analytical database engine now. Everything works fine and the performance is really cool. But there is a problem on creating proper statistics of snmalloc:
Basically, I want something like resident memory and (de)committing information. Details like allocation size distribution can also be helpful but it is not an essence.
So I mimic the way of printing out the stats in snmalloc and wrote some code:
{
snmalloc::Stats stats;
snmalloc::current_alloc_pool()->aggregate_stats(stats);
using namespace snmalloc;
size_t current = 0;
size_t total = 0;
size_t max = 0;
static size_t large_alloc_max[NUM_LARGE_CLASSES]{0};
for (sizeclass_t i = 0; i < NUM_SIZECLASSES; i++)
{
if (stats.sizeclass[i].count.is_unused())
continue;
stats.sizeclass[i].addToRunningAverage();
auto size = sizeclass_to_size(i);
set(fmt::format("snmalloc.bucketed_stat_size_{}_current", size), stats.sizeclass[i].count.current);
set(fmt::format("snmalloc.bucketed_stat_size_{}_max", size), stats.sizeclass[i].count.max);
set(fmt::format("snmalloc.bucketed_stat_size_{}_total", size), stats.sizeclass[i].count.used);
set(fmt::format("snmalloc.bucketed_stat_size_{}_average_slab_usage", size), stats.sizeclass[i].online_average);
set(fmt::format("snmalloc.bucketed_stat_size_{}_average_wasted_space", size),
(1.0 - stats.sizeclass[i].online_average) * stats.sizeclass[i].slab_count.max);
current += stats.sizeclass[i].count.current * size;
total += stats.sizeclass[i].count.used * size;
max += stats.sizeclass[i].count.max * size;
}
for (uint8_t i = 0; i < NUM_LARGE_CLASSES; i++)
{
if ((stats.large_push_count[i] == 0) && (stats.large_pop_count[i] == 0))
continue;
auto size = large_sizeclass_to_size(i);
set(fmt::format("snmalloc.large_bucketed_stat_size_{}_push_count", size), stats.large_push_count[i]);
set(fmt::format("snmalloc.large_bucketed_stat_size_{}_pop_count", size), stats.large_pop_count[i]);
auto large_alloc = (stats.large_pop_count[i] - stats.large_push_count[i]) * size;
large_alloc_max[i] = std::max(large_alloc_max[i], large_alloc);
current += large_alloc;
total += stats.large_push_count[i] * size;
max += large_alloc_max[i];
}
set("snmalloc.global_stat_remote_freed", stats.remote_freed);
set("snmalloc.global_stat_remote_posted", stats.remote_posted);
set("snmalloc.global_stat_remote_received", stats.remote_received);
set("snmalloc.global_stat_superslab_pop_count", stats.superslab_pop_count);
set("snmalloc.global_stat_superslab_push_count", stats.superslab_push_count);
set("snmalloc.global_stat_segment_count", stats.segment_count);
set("snmalloc.global_stat_current_size", current);
set("snmalloc.global_stat_total_size", total);
set("snmalloc.global_stat_max_size", max);
}
I don't know. but maybe the above method would create too many entries in the summary?
And any suggestion on creating more concise async metrics for the allocator?
Hi,
I am implementing snmalloc support for an analytical database engine now. Everything works fine and the performance is really cool. But there is a problem on creating proper statistics of snmalloc:
Basically, I want something like resident memory and (de)committing information. Details like allocation size distribution can also be helpful but it is not an essence.
So I mimic the way of printing out the stats in snmalloc and wrote some code:
{ snmalloc::Stats stats; snmalloc::current_alloc_pool()->aggregate_stats(stats); using namespace snmalloc; size_t current = 0; size_t total = 0; size_t max = 0; static size_t large_alloc_max[NUM_LARGE_CLASSES]{0}; for (sizeclass_t i = 0; i < NUM_SIZECLASSES; i++) { if (stats.sizeclass[i].count.is_unused()) continue; stats.sizeclass[i].addToRunningAverage(); auto size = sizeclass_to_size(i); set(fmt::format("snmalloc.bucketed_stat_size_{}_current", size), stats.sizeclass[i].count.current); set(fmt::format("snmalloc.bucketed_stat_size_{}_max", size), stats.sizeclass[i].count.max); set(fmt::format("snmalloc.bucketed_stat_size_{}_total", size), stats.sizeclass[i].count.used); set(fmt::format("snmalloc.bucketed_stat_size_{}_average_slab_usage", size), stats.sizeclass[i].online_average); set(fmt::format("snmalloc.bucketed_stat_size_{}_average_wasted_space", size), (1.0 - stats.sizeclass[i].online_average) * stats.sizeclass[i].slab_count.max); current += stats.sizeclass[i].count.current * size; total += stats.sizeclass[i].count.used * size; max += stats.sizeclass[i].count.max * size; } for (uint8_t i = 0; i < NUM_LARGE_CLASSES; i++) { if ((stats.large_push_count[i] == 0) && (stats.large_pop_count[i] == 0)) continue; auto size = large_sizeclass_to_size(i); set(fmt::format("snmalloc.large_bucketed_stat_size_{}_push_count", size), stats.large_push_count[i]); set(fmt::format("snmalloc.large_bucketed_stat_size_{}_pop_count", size), stats.large_pop_count[i]); auto large_alloc = (stats.large_pop_count[i] - stats.large_push_count[i]) * size; large_alloc_max[i] = std::max(large_alloc_max[i], large_alloc); current += large_alloc; total += stats.large_push_count[i] * size; max += large_alloc_max[i]; } set("snmalloc.global_stat_remote_freed", stats.remote_freed); set("snmalloc.global_stat_remote_posted", stats.remote_posted); set("snmalloc.global_stat_remote_received", stats.remote_received); set("snmalloc.global_stat_superslab_pop_count", stats.superslab_pop_count); set("snmalloc.global_stat_superslab_push_count", stats.superslab_push_count); set("snmalloc.global_stat_segment_count", stats.segment_count); set("snmalloc.global_stat_current_size", current); set("snmalloc.global_stat_total_size", total); set("snmalloc.global_stat_max_size", max); }I don't know. but maybe the above method would create too many entries in the summary?
And any suggestion on creating more concise async metrics for the allocator?