@@ -87,94 +87,94 @@ std::array<size_t, 2> assign_split_factors_2d(const box<3>& full_chunk, const ra
8787
8888namespace celerity ::detail {
8989
90- std::vector<box<3 >> split_1d (const box<3 >& full_chunk , const range<3 >& granularity, const size_t num_chunks ) {
90+ std::vector<box<3 >> split_1d (const box<3 >& full_box , const range<3 >& granularity, const size_t num_boxs ) {
9191#ifndef NDEBUG
92- assert (num_chunks > 0 );
92+ assert (num_boxs > 0 );
9393 for (int d = 0 ; d < 3 ; ++d) {
9494 assert (granularity[d] > 0 );
95- assert (full_chunk .get_range ()[d] % granularity[d] == 0 );
95+ assert (full_box .get_range ()[d] % granularity[d] == 0 );
9696 }
9797#endif
9898
9999 // Due to split granularity requirements or if num_workers > global_size[0],
100- // we may not be able to create the requested number of chunks .
101- const std::array<size_t , 1 > actual_num_chunks = {std::min (num_chunks, full_chunk .get_range ()[0 ] / granularity[0 ])};
102- const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<1 >(full_chunk , granularity, actual_num_chunks );
100+ // we may not be able to create the requested number of boxs .
101+ const std::array<size_t , 1 > actual_num_boxs = {std::min (num_boxs, full_box .get_range ()[0 ] / granularity[0 ])};
102+ const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<1 >(full_box , granularity, actual_num_boxs );
103103
104104 std::vector<box<3 >> result;
105- result.reserve (actual_num_chunks [0 ]);
105+ result.reserve (actual_num_boxs [0 ]);
106106 for (auto i = 0u ; i < num_large_chunks[0 ]; ++i) {
107- id<3 > min = full_chunk .get_min ();
108- id<3 > max = full_chunk .get_max ();
107+ id<3 > min = full_box .get_min ();
108+ id<3 > max = full_box .get_max ();
109109 min[0 ] += i * large_chunk_size[0 ];
110110 max[0 ] = min[0 ] + large_chunk_size[0 ];
111111 result.emplace_back (min, max);
112112 }
113- for (auto i = num_large_chunks[0 ]; i < actual_num_chunks [0 ]; ++i) {
114- id<3 > min = full_chunk .get_min ();
115- id<3 > max = full_chunk .get_max ();
113+ for (auto i = num_large_chunks[0 ]; i < actual_num_boxs [0 ]; ++i) {
114+ id<3 > min = full_box .get_min ();
115+ id<3 > max = full_box .get_max ();
116116 min[0 ] += num_large_chunks[0 ] * large_chunk_size[0 ] + (i - num_large_chunks[0 ]) * small_chunk_size[0 ];
117117 max[0 ] = min[0 ] + small_chunk_size[0 ];
118118 result.emplace_back (min, max);
119119 }
120120
121121#ifndef NDEBUG
122- sanity_check_split (full_chunk , result);
122+ sanity_check_split (full_box , result);
123123#endif
124124
125125 return result;
126126}
127127
128128// TODO: Make the split dimensions configurable for 3D chunks?
129- std::vector<box<3 >> split_2d (const box<3 >& full_chunk , const range<3 >& granularity, const size_t num_chunks ) {
129+ std::vector<box<3 >> split_2d (const box<3 >& full_box , const range<3 >& granularity, const size_t num_boxs ) {
130130#ifndef NDEBUG
131- assert (num_chunks > 0 );
131+ assert (num_boxs > 0 );
132132 for (int d = 0 ; d < 3 ; ++d) {
133133 assert (granularity[d] > 0 );
134- assert (full_chunk .get_range ()[d] % granularity[d] == 0 );
134+ assert (full_box .get_range ()[d] % granularity[d] == 0 );
135135 }
136136#endif
137137
138- // Factorize num_chunks
139- // We start out with an initial guess of `factor = floor(sqrt(num_chunks ))` (the other one is implicitly given by `num_chunks / factor`),
138+ // Factorize num_boxs
139+ // We start out with an initial guess of `factor = floor(sqrt(num_boxs ))` (the other one is implicitly given by `num_boxs / factor`),
140140 // and work our way down, keeping track of the best factorization we've found so far, until we find a factorization that produces
141- // the requested number of chunks, or until we reach (1, num_chunks ), i.e., a 1D split.
142- size_t factor = std::floor (std::sqrt (num_chunks ));
141+ // the requested number of chunks, or until we reach (1, num_boxs ), i.e., a 1D split.
142+ size_t factor = std::floor (std::sqrt (num_boxs ));
143143 std::array<size_t , 2 > best_chunk_counts = {0 , 0 };
144144 while (factor >= 1 ) {
145- while (factor > 1 && num_chunks % factor != 0 ) {
145+ while (factor > 1 && num_boxs % factor != 0 ) {
146146 factor--;
147147 }
148- // The returned counts are at most (factor, num_chunks / factor), however may be less if constrained by the split granularity.
149- const auto chunk_counts = assign_split_factors_2d (full_chunk , granularity, factor, num_chunks );
148+ // The returned counts are at most (factor, num_boxs / factor), however may be less if constrained by the split granularity.
149+ const auto chunk_counts = assign_split_factors_2d (full_box , granularity, factor, num_boxs );
150150 if (chunk_counts[0 ] * chunk_counts[1 ] > best_chunk_counts[0 ] * best_chunk_counts[1 ]) { best_chunk_counts = chunk_counts; }
151- if (chunk_counts[0 ] * chunk_counts[1 ] == num_chunks ) { break ; }
151+ if (chunk_counts[0 ] * chunk_counts[1 ] == num_boxs ) { break ; }
152152 factor--;
153153 }
154154 const auto actual_num_chunks = best_chunk_counts;
155- const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<2 >(full_chunk , granularity, actual_num_chunks);
155+ const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<2 >(full_box , granularity, actual_num_chunks);
156156
157157 std::vector<box<3 >> result;
158158 result.reserve (actual_num_chunks[0 ] * actual_num_chunks[1 ]);
159- id<3 > offset = full_chunk .get_min ();
159+ id<3 > offset = full_box .get_min ();
160160
161161 for (size_t j = 0 ; j < actual_num_chunks[0 ]; ++j) {
162162 range<2 > chunk_size = {(j < num_large_chunks[0 ]) ? large_chunk_size[0 ] : small_chunk_size[0 ], 0 };
163163 for (size_t i = 0 ; i < actual_num_chunks[1 ]; ++i) {
164164 chunk_size[1 ] = (i < num_large_chunks[1 ]) ? large_chunk_size[1 ] : small_chunk_size[1 ];
165165 const id<3 > min = offset;
166- id<3 > max = full_chunk .get_max ();
166+ id<3 > max = full_box .get_max ();
167167 max[0 ] = min[0 ] + chunk_size[0 ];
168168 max[1 ] = min[1 ] + chunk_size[1 ];
169169 result.emplace_back (min, max);
170170 offset[1 ] += chunk_size[1 ];
171171 }
172172 offset[0 ] += chunk_size[0 ];
173- offset[1 ] = full_chunk .get_min ()[1 ];
173+ offset[1 ] = full_box .get_min ()[1 ];
174174 }
175175
176176#ifndef NDEBUG
177- sanity_check_split (full_chunk , result);
177+ sanity_check_split (full_box , result);
178178#endif
179179
180180 return result;
0 commit comments