@@ -158,26 +158,80 @@ void parallelFor(IndexType start, IndexType end, const Function& func,
158158 }
159159}
160160
161+ template <typename IndexType, typename Function>
162+ void parallelRangeFor (IndexType start, IndexType end, const Function& func,
163+ ExecutionPolicy policy) {
164+ if (start > end) {
165+ return ;
166+ }
167+
168+ // Estimate number of threads in the pool
169+ static const unsigned int numThreadsHint =
170+ std::thread::hardware_concurrency ();
171+ const unsigned int numThreads =
172+ (policy == ExecutionPolicy::kParallel )
173+ ? (numThreadsHint == 0u ? 8u : numThreadsHint)
174+ : 1 ;
175+
176+ // Size of a slice for the range functions
177+ IndexType n = end - start + 1 ;
178+ IndexType slice =
179+ (IndexType)std::round (n / static_cast <double >(numThreads));
180+ slice = std::max (slice, IndexType (1 ));
181+
182+ // Create pool and launch jobs
183+ std::vector<std::thread> pool;
184+ pool.reserve (numThreads);
185+ IndexType i1 = start;
186+ IndexType i2 = std::min (start + slice, end);
187+ for (unsigned int i = 0 ; i + 1 < numThreads && i1 < end; ++i) {
188+ pool.emplace_back (func, i1, i2);
189+ i1 = i2;
190+ i2 = std::min (i2 + slice, end);
191+ }
192+ if (i1 < end) {
193+ pool.emplace_back (func, i1, end);
194+ }
195+
196+ // Wait for jobs to finish
197+ for (std::thread& t : pool) {
198+ if (t.joinable ()) {
199+ t.join ();
200+ }
201+ }
202+ }
203+
161204template <typename IndexType, typename Function>
162205void parallelFor (IndexType beginIndexX, IndexType endIndexX,
163206 IndexType beginIndexY, IndexType endIndexY,
164207 const Function& function, ExecutionPolicy policy) {
165208 parallelFor (beginIndexY, endIndexY,
166- [&](size_t j) {
209+ [&](IndexType j) {
167210 for (IndexType i = beginIndexX; i < endIndexX; ++i) {
168211 function (i, j);
169212 }
170213 },
171214 policy);
172215}
173216
217+ template <typename IndexType, typename Function>
218+ void parallelRangeFor (IndexType beginIndexX, IndexType endIndexX,
219+ IndexType beginIndexY, IndexType endIndexY,
220+ const Function& function, ExecutionPolicy policy) {
221+ parallelRangeFor (beginIndexY, endIndexY,
222+ [&](IndexType jBegin, IndexType jEnd) {
223+ function (beginIndexX, endIndexX, jBegin, jEnd);
224+ },
225+ policy);
226+ }
227+
174228template <typename IndexType, typename Function>
175229void parallelFor (IndexType beginIndexX, IndexType endIndexX,
176230 IndexType beginIndexY, IndexType endIndexY,
177231 IndexType beginIndexZ, IndexType endIndexZ,
178232 const Function& function, ExecutionPolicy policy) {
179233 parallelFor (beginIndexZ, endIndexZ,
180- [&](size_t k) {
234+ [&](IndexType k) {
181235 for (IndexType j = beginIndexY; j < endIndexY; ++j) {
182236 for (IndexType i = beginIndexX; i < endIndexX; ++i) {
183237 function (i, j, k);
@@ -187,6 +241,19 @@ void parallelFor(IndexType beginIndexX, IndexType endIndexX,
187241 policy);
188242}
189243
244+ template <typename IndexType, typename Function>
245+ void parallelRangeFor (IndexType beginIndexX, IndexType endIndexX,
246+ IndexType beginIndexY, IndexType endIndexY,
247+ IndexType beginIndexZ, IndexType endIndexZ,
248+ const Function& function, ExecutionPolicy policy) {
249+ parallelRangeFor (beginIndexZ, endIndexZ,
250+ [&](IndexType kBegin , IndexType kEnd ) {
251+ function (beginIndexX, endIndexX, beginIndexY,
252+ endIndexY, kBegin , kEnd );
253+ },
254+ policy);
255+ }
256+
190257template <typename IndexType, typename Value, typename Function,
191258 typename Reduce>
192259Value parallelReduce (IndexType start, IndexType end, const Value& identity,
0 commit comments