@@ -17,26 +17,26 @@ namespace {
1717using namespace celerity ;
1818using namespace celerity ::detail;
1919
20- [[maybe_unused]] void sanity_check_split (const chunk <3 >& full_chunk, const std::vector<chunk <3 >>& split) {
20+ [[maybe_unused]] void sanity_check_split (const box <3 >& full_chunk, const std::vector<box <3 >>& split) {
2121 region<3 > reconstructed_chunk;
2222 for (auto & chnk : split) {
23- assert (region_intersection (reconstructed_chunk, box< 3 >( chnk) ).empty ());
24- reconstructed_chunk = region_union (box< 3 >( chnk) , reconstructed_chunk);
23+ assert (region_intersection (reconstructed_chunk, chnk).empty ());
24+ reconstructed_chunk = region_union (chnk, reconstructed_chunk);
2525 }
26- assert (region_difference (reconstructed_chunk, box< 3 >( full_chunk) ).empty ());
26+ assert (region_difference (reconstructed_chunk, full_chunk).empty ());
2727}
2828
2929template <int Dims>
3030std::tuple<range<Dims>, range<Dims>, range<Dims>> compute_small_and_large_chunks (
31- const chunk <3 >& full_chunk, const range<3 >& granularity, const std::array<size_t , Dims>& actual_num_chunks) {
31+ const box <3 >& full_chunk, const range<3 >& granularity, const std::array<size_t , Dims>& actual_num_chunks) {
3232 range<Dims> small_chunk_size{zeros};
3333 range<Dims> large_chunk_size{zeros};
3434 range<Dims> num_large_chunks{zeros};
3535 for (int d = 0 ; d < Dims; ++d) {
36- const size_t ideal_chunk_size = full_chunk.range [d] / actual_num_chunks[d];
36+ const size_t ideal_chunk_size = full_chunk.get_range () [d] / actual_num_chunks[d];
3737 small_chunk_size[d] = (ideal_chunk_size / granularity[d]) * granularity[d];
3838 large_chunk_size[d] = small_chunk_size[d] + granularity[d];
39- num_large_chunks[d] = (full_chunk.range [d] - small_chunk_size[d] * actual_num_chunks[d]) / granularity[d];
39+ num_large_chunks[d] = (full_chunk.get_range () [d] - small_chunk_size[d] * actual_num_chunks[d]) / granularity[d];
4040 }
4141 return {small_chunk_size, large_chunk_size, num_large_chunks};
4242}
@@ -51,9 +51,9 @@ std::tuple<range<Dims>, range<Dims>, range<Dims>> compute_small_and_large_chunks
5151 * @returns The number of chunks that can be created in dimension 0 and dimension 1, respectively. These are at most
5252 * (f0, f1) or (f1, f0), however may be less if constrained by the split granularity.
5353 */
54- std::array<size_t , 2 > assign_split_factors_2d (const chunk <3 >& full_chunk, const range<3 >& granularity, const size_t factor, const size_t num_chunks) {
54+ std::array<size_t , 2 > assign_split_factors_2d (const box <3 >& full_chunk, const range<3 >& granularity, const size_t factor, const size_t num_chunks) {
5555 assert (num_chunks % factor == 0 );
56- const size_t max_chunks[2 ] = {full_chunk.range [0 ] / granularity[0 ], full_chunk.range [1 ] / granularity[1 ]};
56+ const size_t max_chunks[2 ] = {full_chunk.get_range () [0 ] / granularity[0 ], full_chunk.get_range () [1 ] / granularity[1 ]};
5757 const size_t f0 = factor;
5858 const size_t f1 = num_chunks / factor;
5959
@@ -71,12 +71,12 @@ std::array<size_t, 2> assign_split_factors_2d(const chunk<3>& full_chunk, const
7171
7272 // If domain is square(-ish), prefer splitting along slower dimension.
7373 // (These bounds have been chosen arbitrarily!)
74- const double squareishness = std::sqrt (full_chunk.range . size ()) / static_cast <double >(full_chunk.range [0 ]);
74+ const double squareishness = std::sqrt (full_chunk.get_area ()) / static_cast <double >(full_chunk.get_range () [0 ]);
7575 if (squareishness > 0.95 && squareishness < 1.05 ) { return (f0 >= f1) ? split_0_1 : split_1_0; }
7676
7777 // For non-square domains, prefer split that produces shorter edges (compare sum of circumferences)
78- const auto circ0 = full_chunk.range [0 ] / split_0_1[0 ] + full_chunk.range [1 ] / split_0_1[1 ];
79- const auto circ1 = full_chunk.range [0 ] / split_1_0[0 ] + full_chunk.range [1 ] / split_1_0[1 ];
78+ const auto circ0 = full_chunk.get_range () [0 ] / split_0_1[0 ] + full_chunk.get_range () [1 ] / split_0_1[1 ];
79+ const auto circ1 = full_chunk.get_range () [0 ] / split_1_0[0 ] + full_chunk.get_range () [1 ] / split_1_0[1 ];
8080 return circ0 < circ1 ? split_0_1 : split_1_0;
8181
8282 // TODO: Yet another heuristic we may want to consider is how even chunk sizes are,
@@ -87,28 +87,35 @@ std::array<size_t, 2> assign_split_factors_2d(const chunk<3>& full_chunk, const
8787
8888namespace celerity ::detail {
8989
90- std::vector<chunk <3 >> split_1d (const chunk <3 >& full_chunk, const range<3 >& granularity, const size_t num_chunks) {
90+ std::vector<box <3 >> split_1d (const box <3 >& full_chunk, const range<3 >& granularity, const size_t num_chunks) {
9191#ifndef NDEBUG
9292 assert (num_chunks > 0 );
9393 for (int d = 0 ; d < 3 ; ++d) {
9494 assert (granularity[d] > 0 );
95- assert (full_chunk.range [d] % granularity[d] == 0 );
95+ assert (full_chunk.get_range () [d] % granularity[d] == 0 );
9696 }
9797#endif
9898
9999 // Due to split granularity requirements or if num_workers > global_size[0],
100100 // we may not be able to create the requested number of chunks.
101- const std::array<size_t , 1 > actual_num_chunks = {std::min (num_chunks, full_chunk.range [0 ] / granularity[0 ])};
101+ const std::array<size_t , 1 > actual_num_chunks = {std::min (num_chunks, full_chunk.get_range () [0 ] / granularity[0 ])};
102102 const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<1 >(full_chunk, granularity, actual_num_chunks);
103103
104- std::vector<chunk<3 >> result (actual_num_chunks[0 ], {full_chunk.offset , full_chunk.range , full_chunk.global_size });
104+ std::vector<box<3 >> result;
105+ result.reserve (actual_num_chunks[0 ]);
105106 for (auto i = 0u ; i < num_large_chunks[0 ]; ++i) {
106- result[i].range [0 ] = large_chunk_size[0 ];
107- result[i].offset [0 ] += i * large_chunk_size[0 ];
107+ id<3 > min = full_chunk.get_min ();
108+ id<3 > max = full_chunk.get_max ();
109+ min[0 ] += i * large_chunk_size[0 ];
110+ max[0 ] = min[0 ] + large_chunk_size[0 ];
111+ result.emplace_back (min, max);
108112 }
109113 for (auto i = num_large_chunks[0 ]; i < actual_num_chunks[0 ]; ++i) {
110- result[i].range [0 ] = small_chunk_size[0 ];
111- result[i].offset [0 ] += num_large_chunks[0 ] * large_chunk_size[0 ] + (i - num_large_chunks[0 ]) * small_chunk_size[0 ];
114+ id<3 > min = full_chunk.get_min ();
115+ id<3 > max = full_chunk.get_max ();
116+ min[0 ] += num_large_chunks[0 ] * large_chunk_size[0 ] + (i - num_large_chunks[0 ]) * small_chunk_size[0 ];
117+ max[0 ] = min[0 ] + small_chunk_size[0 ];
118+ result.emplace_back (min, max);
112119 }
113120
114121#ifndef NDEBUG
@@ -119,12 +126,12 @@ std::vector<chunk<3>> split_1d(const chunk<3>& full_chunk, const range<3>& granu
119126}
120127
121128// TODO: Make the split dimensions configurable for 3D chunks?
122- std::vector<chunk <3 >> split_2d (const chunk <3 >& full_chunk, const range<3 >& granularity, const size_t num_chunks) {
129+ std::vector<box <3 >> split_2d (const box <3 >& full_chunk, const range<3 >& granularity, const size_t num_chunks) {
123130#ifndef NDEBUG
124131 assert (num_chunks > 0 );
125132 for (int d = 0 ; d < 3 ; ++d) {
126133 assert (granularity[d] > 0 );
127- assert (full_chunk.range [d] % granularity[d] == 0 );
134+ assert (full_chunk.get_range () [d] % granularity[d] == 0 );
128135 }
129136#endif
130137
@@ -147,21 +154,23 @@ std::vector<chunk<3>> split_2d(const chunk<3>& full_chunk, const range<3>& granu
147154 const auto actual_num_chunks = best_chunk_counts;
148155 const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<2 >(full_chunk, granularity, actual_num_chunks);
149156
150- std::vector<chunk<3 >> result (actual_num_chunks[0 ] * actual_num_chunks[1 ], {full_chunk.offset , full_chunk.range , full_chunk.global_size });
151- id<3 > offset = full_chunk.offset ;
157+ std::vector<box<3 >> result;
158+ result.reserve (actual_num_chunks[0 ] * actual_num_chunks[1 ]);
159+ id<3 > offset = full_chunk.get_min ();
152160
153161 for (size_t j = 0 ; j < actual_num_chunks[0 ]; ++j) {
154162 range<2 > chunk_size = {(j < num_large_chunks[0 ]) ? large_chunk_size[0 ] : small_chunk_size[0 ], 0 };
155163 for (size_t i = 0 ; i < actual_num_chunks[1 ]; ++i) {
156164 chunk_size[1 ] = (i < num_large_chunks[1 ]) ? large_chunk_size[1 ] : small_chunk_size[1 ];
157- auto & chnk = result[j * actual_num_chunks[1 ] + i];
158- chnk.offset = offset;
159- chnk.range [0 ] = chunk_size[0 ];
160- chnk.range [1 ] = chunk_size[1 ];
165+ const id<3 > min = offset;
166+ id<3 > max = full_chunk.get_max ();
167+ max[0 ] = min[0 ] + chunk_size[0 ];
168+ max[1 ] = min[1 ] + chunk_size[1 ];
169+ result.emplace_back (min, max);
161170 offset[1 ] += chunk_size[1 ];
162171 }
163172 offset[0 ] += chunk_size[0 ];
164- offset[1 ] = full_chunk.offset [1 ];
173+ offset[1 ] = full_chunk.get_min () [1 ];
165174 }
166175
167176#ifndef NDEBUG
0 commit comments