forked from microsoft/snmalloc
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlotsofthread.cc
More file actions
131 lines (114 loc) · 2.99 KB
/
lotsofthread.cc
File metadata and controls
131 lines (114 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/**
* This benchmark is based on
* https://github.com/microsoft/mimalloc/issues/1002#issuecomment-2630410617
*
* It causes large batchs of memory to be freed on a remote thread, and causes
* many aspects of the backend to be under-contention.
*
* The benchmark has a single freeing thread, and many allocating threads. The
* allocating threads communicate using a shared list of memory to free, which
* is protected by a mutex. This causes interesting batch behaviour which
* triggered a bug in the linux backend.
*/
#include <assert.h>
#include <atomic>
#include <mutex>
#include <stdio.h>
#include <stdlib.h>
#include <thread>
#include <vector>
using namespace std;
#include <test/snmalloc_testlib.h>
#define malloc snmalloc::libc::malloc
#define free snmalloc::libc::free
#define malloc_usable_size snmalloc::libc::malloc_usable_size
std::mutex global_tofree_list_mtx;
std::vector<void*> global_tofree_list;
std::atomic_int mustexit;
void freeloop()
{
size_t max_list_bytes = 0;
while (1)
{
std::lock_guard<std::mutex> guard{global_tofree_list_mtx};
size_t list_bytes = 0;
for (auto& p : global_tofree_list)
{
list_bytes += malloc_usable_size(p);
free(p);
}
global_tofree_list.clear();
if (list_bytes > max_list_bytes)
{
printf("%zd bytes\n", list_bytes);
max_list_bytes = list_bytes;
}
if (mustexit)
return;
}
}
void looper(size_t iterations)
{
std::vector<void*> tofree_list;
auto flush = [&]() {
{
std::lock_guard<std::mutex> guard{global_tofree_list_mtx};
for (auto& p : tofree_list)
global_tofree_list.push_back(p);
}
tofree_list.clear();
};
auto do_free = [&](void* p) {
tofree_list.push_back(p);
if (tofree_list.size() > 100)
{
flush();
}
};
for (size_t i = 0; i < iterations; ++i)
{
size_t s = snmalloc::bits::one_at_bit(i % 20);
for (size_t j = 0; j < 8; j++)
{
auto ptr = (int*)malloc(s * sizeof(int));
if (ptr == nullptr)
continue;
*ptr = 1523;
do_free(ptr);
}
}
flush();
}
int main()
{
#ifdef SNMALLOC_THREAD_SANITIZER_ENABLED
size_t iterations = 50000;
#elif defined(__APPLE__) && !defined(SNMALLOC_APPLE_HAS_OS_SYNC_WAIT_ON_ADDRESS)
size_t iterations = 50000;
#elif defined(WIN32)
size_t iterations = 50000;
#else
size_t iterations = 200000;
#endif
#ifndef NDEBUG
// Debug builds run with full instrumentation enabled and are
// ~10x slower per iteration. The cross-thread batch behaviour
// this benchmark stresses is observable at much lower counts;
// reduce iterations so this test does not dominate Debug ctest
// wall-time. Release builds are unaffected.
iterations /= 10;
#endif
int threadcount = 8;
vector<thread> threads;
for (int i = 0; i < threadcount; ++i)
threads.emplace_back(looper, iterations);
std::thread freeloop_thread(freeloop);
for (auto& thread : threads)
{
thread.join();
}
mustexit.store(1);
freeloop_thread.join();
puts("Done!");
return 0;
}