-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparallel_algorithms.h
More file actions
56 lines (52 loc) · 2.3 KB
/
parallel_algorithms.h
File metadata and controls
56 lines (52 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#pragma once
#include <concepts>
#include <future>
#include <iterator>
template <std::input_iterator I, std::sentinel_for<I> S, class Pred>
requires std::indirect_unary_predicate<Pred, I>
I our_find_if(I first, S last, Pred pred) {
while (first != last) {
if (pred(*first)) {
break;
}
++first;
}
return first;
}
// this must be a random access iterator, otherwise we can't use fast traversal and the parallel algorithm would become useless
template <std::random_access_iterator I, std::sentinel_for<I> S, class Pred>
requires std::indirect_unary_predicate<Pred, I>
I parallel_find_if(I first, S last, Pred pred) {
static_assert(!std::is_same_v<S, std::unreachable_sentinel_t>,
"parallel_find_if does not support std::unreachable_sentinel_t as the sentinel type.");
// normally, this would check the length of the sequence and split it into chunks dynamically using an heuristic
// based on the length of the sequence and the available cpu cores.
// it should also accept a threadpool if the user already has one
auto chunk_size = std::distance(first, last) / 4;
auto limit1 = std::next(first, chunk_size);
auto limit2 = std::next(limit1, chunk_size);
auto limit3 = std::next(limit2, chunk_size);
// we pass non-everlapping subranges to each thread
auto thread1 = std::async(std::launch::async, our_find_if<I, S, Pred>, first, limit1, pred);
auto thread2 = std::async(std::launch::async, our_find_if<I, S, Pred>, limit1, limit2, pred);
auto thread3 = std::async(std::launch::async, our_find_if<I, S, Pred>, limit2, limit3, pred);
auto thread4 = std::async(std::launch::async, our_find_if<I, S, Pred>, limit3, last, pred);
// now we wait and ge the values from each thread
auto found1 = thread1.get();
if (found1 != limit1) {
// we found a value in the first subrange!
return found1;
}
// we didn't find a value in the first subrange
auto found2 = thread2.get();
if (found2 != limit2) {
return found2;
}
auto found3 = thread3.get();
if (found3 != limit3) {
return found3;
}
// we didn't find what we searched for in none of the first 3 subranges
// this means it's either in the last subrange, or we should return last
return thread4.get();
}