Skip to content

Commit 4735549

Browse files
gonnetxnnpack-bot
authored andcommitted
Use the new pthreadpool_parallelize_1d_dynamic strategy in the unary and binary ops, where appropriate.
PiperOrigin-RevId: 670177277
1 parent 847fb99 commit 4735549

10 files changed

Lines changed: 3935 additions & 1215 deletions

File tree

bench/latency.cc

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,23 @@ static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) {
4747
}
4848
pthreadpool_destroy(threadpool);
4949
}
50+
BENCHMARK(pthreadpool_parallelize_1d_tile_1d)->UseRealTime()->Apply(SetNumberOfThreads);
51+
52+
static void compute_1d_dynamic(void*, size_t, size_t) {}
53+
54+
static void pthreadpool_parallelize_1d_dynamic(benchmark::State& state) {
55+
const uint32_t threads = static_cast<uint32_t>(state.range(0));
56+
pthreadpool_t threadpool = pthreadpool_create(threads);
57+
while (state.KeepRunning()) {
58+
pthreadpool_parallelize_1d_dynamic(threadpool, compute_1d_dynamic,
59+
nullptr /* context */, threads, 1,
60+
0 /* flags */);
61+
}
62+
pthreadpool_destroy(threadpool);
63+
}
64+
BENCHMARK(pthreadpool_parallelize_1d_dynamic)
65+
->UseRealTime()
66+
->Apply(SetNumberOfThreads);
5067

5168
BENCHMARK(pthreadpool_parallelize_1d_tile_1d)
5269
->UseRealTime()
@@ -79,7 +96,19 @@ static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) {
7996
pthreadpool_destroy(threadpool);
8097
}
8198

82-
BENCHMARK(pthreadpool_parallelize_2d_tile_2d)
99+
static void compute_2d_dynamic(void*, size_t, size_t, size_t, size_t) {}
100+
101+
static void pthreadpool_parallelize_2d_dynamic(benchmark::State& state) {
102+
const uint32_t threads = static_cast<uint32_t>(state.range(0));
103+
pthreadpool_t threadpool = pthreadpool_create(threads);
104+
while (state.KeepRunning()) {
105+
pthreadpool_parallelize_2d_dynamic(threadpool, compute_2d_dynamic,
106+
nullptr /* context */, 1, threads, 1, 1,
107+
0 /* flags */);
108+
}
109+
pthreadpool_destroy(threadpool);
110+
}
111+
BENCHMARK(pthreadpool_parallelize_2d_dynamic)
83112
->UseRealTime()
84113
->Apply(SetNumberOfThreads);
85114

bench/throughput.cc

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,29 @@ BENCHMARK(pthreadpool_parallelize_1d_tile_1d)
5454
->RangeMultiplier(10)
5555
->Range(10, 1000000);
5656

57-
static void compute_2d(void*, size_t, size_t) {}
57+
static void compute_1d_dynamic(void*, size_t, size_t) {}
58+
59+
static void pthreadpool_parallelize_1d_dynamic(benchmark::State& state) {
60+
pthreadpool_t threadpool = pthreadpool_create(2);
61+
const size_t threads = pthreadpool_get_threads_count(threadpool);
62+
const size_t items = static_cast<size_t>(state.range(0));
63+
while (state.KeepRunning()) {
64+
pthreadpool_parallelize_1d_dynamic(threadpool, compute_1d_dynamic,
65+
nullptr /* context */, items * threads,
66+
1, 0 /* flags */);
67+
}
68+
pthreadpool_destroy(threadpool);
69+
70+
/* Do not normalize by thread */
71+
state.SetItemsProcessed(int64_t(state.iterations()) * items);
72+
}
73+
BENCHMARK(pthreadpool_parallelize_1d_dynamic)
74+
->UseRealTime()
75+
->RangeMultiplier(10)
76+
->Range(10, 1000000);
77+
78+
static void compute_2d(void*, size_t, size_t) {
79+
}
5880

5981
static void pthreadpool_parallelize_2d(benchmark::State& state) {
6082
pthreadpool_t threadpool = pthreadpool_create(2);
@@ -95,7 +117,29 @@ BENCHMARK(pthreadpool_parallelize_2d_tile_1d)
95117
->RangeMultiplier(10)
96118
->Range(10, 1000000);
97119

98-
static void compute_2d_tile_2d(void*, size_t, size_t, size_t, size_t) {}
120+
static void compute_2d_tile_1d_dynamic(void*, size_t, size_t, size_t) {}
121+
122+
static void pthreadpool_parallelize_2d_tile_1d_dynamic(benchmark::State& state) {
123+
pthreadpool_t threadpool = pthreadpool_create(2);
124+
const size_t threads = pthreadpool_get_threads_count(threadpool);
125+
const size_t items = static_cast<size_t>(state.range(0));
126+
while (state.KeepRunning()) {
127+
pthreadpool_parallelize_2d_tile_1d_dynamic(threadpool, compute_2d_tile_1d_dynamic,
128+
nullptr /* context */, threads, items,
129+
1, 0 /* flags */);
130+
}
131+
pthreadpool_destroy(threadpool);
132+
133+
/* Do not normalize by thread */
134+
state.SetItemsProcessed(int64_t(state.iterations()) * items);
135+
}
136+
BENCHMARK(pthreadpool_parallelize_2d_tile_1d_dynamic)
137+
->UseRealTime()
138+
->RangeMultiplier(10)
139+
->Range(10, 1000000);
140+
141+
static void compute_2d_tile_2d(void*, size_t, size_t, size_t, size_t) {
142+
}
99143

100144
static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) {
101145
pthreadpool_t threadpool = pthreadpool_create(2);
@@ -116,7 +160,29 @@ BENCHMARK(pthreadpool_parallelize_2d_tile_2d)
116160
->RangeMultiplier(10)
117161
->Range(10, 1000000);
118162

119-
static void compute_3d(void*, size_t, size_t, size_t) {}
163+
static void compute_2d_dynamic(void*, size_t, size_t, size_t, size_t) {}
164+
165+
static void pthreadpool_parallelize_2d_dynamic(benchmark::State& state) {
166+
pthreadpool_t threadpool = pthreadpool_create(2);
167+
const size_t threads = pthreadpool_get_threads_count(threadpool);
168+
const size_t items = static_cast<size_t>(state.range(0));
169+
while (state.KeepRunning()) {
170+
pthreadpool_parallelize_2d_dynamic(threadpool, compute_2d_dynamic,
171+
nullptr /* context */, threads, items, 1,
172+
1, 0 /* flags */);
173+
}
174+
pthreadpool_destroy(threadpool);
175+
176+
/* Do not normalize by thread */
177+
state.SetItemsProcessed(int64_t(state.iterations()) * items);
178+
}
179+
BENCHMARK(pthreadpool_parallelize_2d_dynamic)
180+
->UseRealTime()
181+
->RangeMultiplier(10)
182+
->Range(10, 1000000);
183+
184+
static void compute_3d(void*, size_t, size_t, size_t) {
185+
}
120186

121187
static void pthreadpool_parallelize_3d(benchmark::State& state) {
122188
pthreadpool_t threadpool = pthreadpool_create(2);
@@ -178,7 +244,30 @@ BENCHMARK(pthreadpool_parallelize_3d_tile_2d)
178244
->RangeMultiplier(10)
179245
->Range(10, 1000000);
180246

181-
static void compute_4d(void*, size_t, size_t, size_t, size_t) {}
247+
static void compute_3d_tile_2d_dynamic(void*, size_t, size_t, size_t, size_t,
248+
size_t) {}
249+
250+
static void pthreadpool_parallelize_3d_tile_2d_dynamic(benchmark::State& state) {
251+
pthreadpool_t threadpool = pthreadpool_create(2);
252+
const size_t threads = pthreadpool_get_threads_count(threadpool);
253+
const size_t items = static_cast<size_t>(state.range(0));
254+
while (state.KeepRunning()) {
255+
pthreadpool_parallelize_3d_tile_2d_dynamic(threadpool, compute_3d_tile_2d_dynamic,
256+
nullptr /* context */, 1, threads,
257+
items, 1, 1, 0 /* flags */);
258+
}
259+
pthreadpool_destroy(threadpool);
260+
261+
/* Do not normalize by thread */
262+
state.SetItemsProcessed(int64_t(state.iterations()) * items);
263+
}
264+
BENCHMARK(pthreadpool_parallelize_3d_tile_2d_dynamic)
265+
->UseRealTime()
266+
->RangeMultiplier(10)
267+
->Range(10, 1000000);
268+
269+
static void compute_4d(void*, size_t, size_t, size_t, size_t) {
270+
}
182271

183272
static void pthreadpool_parallelize_4d(benchmark::State& state) {
184273
pthreadpool_t threadpool = pthreadpool_create(2);

0 commit comments

Comments
 (0)