Skip to content

Commit 54312af

Browse files
obrejanktpadioleau
andauthored
Feature: Add profiling label support to splines and parallel reduce (#1114)
* Added profiling labels on the allocations in spline builders and evaluators * Added profiling labels support to parallel transforms * Apply suggestion from @tpadioleau * Format * Use concepts instead of sfinae * Pass by copy for constructors * Add Kevin to AUTHORS * Format --------- Co-authored-by: Thomas Padioleau <thomas.padioleau@cea.fr>
1 parent e6090ed commit 54312af

9 files changed

Lines changed: 271 additions & 10 deletions

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@ Padioleau Thomas - CEA (<thomas.padioleau@cea.fr>)
6464
## Minor contributions
6565

6666
Md. Sajid Khan
67+
Kevin Obrejan

src/ddc/kernels/fft.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ void impl(
223223
norm_coef = (backward_full_norm_coef(DiscreteDomain<DDimOut>(ddom_out)) * ...);
224224
}
225225

226-
ddc::parallel_transform(exec_space, out, ScaleFn<real_type_t<Tout>>(norm_coef));
226+
ddc::parallel_transform("ddc_fft", exec_space, out, ScaleFn<real_type_t<Tout>>(norm_coef));
227227
}
228228
}
229229

src/ddc/kernels/splines/spline_builder.hpp

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,13 +200,17 @@ class SplineBuilder
200200
// interpolator specific
201201
std::unique_ptr<ddc::detail::SplinesLinearProblem<exec_space>> m_matrix;
202202

203+
std::string m_label;
204+
203205
/// Calculate offset so that the matrix is diagonally dominant
204206
void compute_offset(interpolation_domain_type const& interpolation_domain, int& offset);
205207

206208
public:
207209
/**
208210
* @brief Build a SplineBuilder acting on interpolation_domain.
209211
*
212+
* @param label A label used to tag parallel regions and memory allocations for profiling.
213+
*
210214
* @param interpolation_domain The domain on which the interpolation points are defined.
211215
*
212216
* @param cols_per_chunk A parameter used by the slicer (internal to the solver) to define the size
@@ -221,12 +225,14 @@ class SplineBuilder
221225
* @see MatrixSparse
222226
*/
223227
explicit SplineBuilder(
228+
std::string label,
224229
interpolation_domain_type const& interpolation_domain,
225230
std::optional<std::size_t> cols_per_chunk = std::nullopt,
226231
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
227232
: m_interpolation_domain(interpolation_domain)
228233
, m_dx((ddc::discrete_space<BSplines>().rmax() - ddc::discrete_space<BSplines>().rmin())
229234
/ ddc::discrete_space<BSplines>().ncells())
235+
, m_label(std::move(label))
230236
{
231237
static_assert(
232238
((BcLower == BoundCond::PERIODIC) == (BcUpper == BoundCond::PERIODIC)),
@@ -252,6 +258,66 @@ class SplineBuilder
252258
preconditioner_max_block_size);
253259
}
254260

261+
/**
262+
* @brief Build a SplineBuilder acting on interpolation_domain.
263+
*
264+
* @param interpolation_domain The domain on which the interpolation points are defined.
265+
*
266+
* @param cols_per_chunk A parameter used by the slicer (internal to the solver) to define the size
267+
* of a chunk of right-hand sides of the linear problem to be computed in parallel (chunks are treated
268+
* by the linear solver one-after-the-other).
269+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
270+
*
271+
* @param preconditioner_max_block_size A parameter used by the slicer (internal to the solver) to
272+
* define the size of a block used by the Block-Jacobi preconditioner.
273+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
274+
*
275+
* @see MatrixSparse
276+
*/
277+
explicit SplineBuilder(
278+
interpolation_domain_type const& interpolation_domain,
279+
std::optional<std::size_t> cols_per_chunk = std::nullopt,
280+
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
281+
: SplineBuilder(
282+
"no-label",
283+
interpolation_domain,
284+
cols_per_chunk,
285+
preconditioner_max_block_size)
286+
{
287+
}
288+
289+
/**
290+
* @brief Build a SplineBuilder acting on the interpolation domain contained by batched_interpolation_domain.
291+
*
292+
* @param label A label used to tag parallel regions and memory allocations for profiling.
293+
*
294+
* @param batched_interpolation_domain The whole domain on which the interpolation points are defined.
295+
*
296+
* @param cols_per_chunk A parameter used by the slicer (internal to the solver) to define the size
297+
* of a chunk of right-hand sides of the linear problem to be computed in parallel (chunks are treated
298+
* by the linear solver one-after-the-other).
299+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
300+
*
301+
* @param preconditioner_max_block_size A parameter used by the slicer (internal to the solver) to
302+
* define the size of a block used by the Block-Jacobi preconditioner.
303+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
304+
*
305+
* @see MatrixSparse
306+
*/
307+
template <concepts::discrete_domain BatchedInterpolationDDom>
308+
explicit SplineBuilder(
309+
std::string label,
310+
BatchedInterpolationDDom const& batched_interpolation_domain,
311+
std::optional<std::size_t> cols_per_chunk = std::nullopt,
312+
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
313+
: SplineBuilder(
314+
std::move(label),
315+
interpolation_domain_type(batched_interpolation_domain),
316+
cols_per_chunk,
317+
preconditioner_max_block_size)
318+
{
319+
}
320+
255321
/**
256322
* @brief Build a SplineBuilder acting on the interpolation domain contained by batched_interpolation_domain.
257323
*
@@ -274,6 +340,7 @@ class SplineBuilder
274340
std::optional<std::size_t> cols_per_chunk = std::nullopt,
275341
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
276342
: SplineBuilder(
343+
"no-label",
277344
interpolation_domain_type(batched_interpolation_domain),
278345
cols_per_chunk,
279346
preconditioner_max_block_size)
@@ -938,11 +1005,12 @@ operator()(
9381005
// Allocate and fill a transposed version of spline in order to get dimension of interest as last dimension (optimal for GPU, necessary for Ginkgo). Also select only relevant rows in case of periodic boundaries
9391006
auto const& offset_proxy = m_offset;
9401007
ddc::Chunk spline_tr_alloc(
1008+
m_label + " > spline_tr (ddc::SplineBuilder::operator())",
9411009
batched_spline_tr_domain(batched_interpolation_domain),
9421010
ddc::KokkosAllocator<double, memory_space>());
9431011
ddc::ChunkSpan const spline_tr = spline_tr_alloc.span_view();
9441012
ddc::parallel_for_each(
945-
"ddc_splines_transpose_rhs",
1013+
m_label + " > ddc_splines_transpose_rhs",
9461014
exec_space(),
9471015
batch_domain(batched_interpolation_domain),
9481016
KOKKOS_LAMBDA(
@@ -961,7 +1029,7 @@ operator()(
9611029
m_matrix->solve(bcoef_section, false);
9621030
// Transpose back spline_tr into spline.
9631031
ddc::parallel_for_each(
964-
"ddc_splines_transpose_back_rhs",
1032+
m_label + " > ddc_splines_transpose_back_rhs",
9651033
exec_space(),
9661034
batch_domain(batched_interpolation_domain),
9671035
KOKKOS_LAMBDA(
@@ -975,7 +1043,7 @@ operator()(
9751043
// Duplicate the lower spline coefficients to the upper side in case of periodic boundaries
9761044
if (bsplines_type::is_periodic()) {
9771045
ddc::parallel_for_each(
978-
"ddc_splines_periodic_rows_duplicate_rhs",
1046+
m_label + " > ddc_splines_periodic_rows_duplicate_rhs",
9791047
exec_space(),
9801048
batch_domain(batched_interpolation_domain),
9811049
KOKKOS_LAMBDA(
@@ -1041,7 +1109,7 @@ SplineBuilder<ExecSpace, MemorySpace, BSplines, InterpolationDDim, BcLower, BcUp
10411109
// Allocate mirror with additional rows (cf. SplinesLinearProblem3x3Blocks documentation)
10421110
Kokkos::View<double**, Kokkos::LayoutRight, MemorySpace> const
10431111
integral_bsplines_mirror_with_additional_allocation(
1044-
"integral_bsplines_mirror_with_additional_allocation",
1112+
m_label + " > integral_bsplines_mirror_with_additional_allocation",
10451113
m_matrix->required_number_of_rhs_rows(),
10461114
1);
10471115

src/ddc/kernels/splines/spline_builder_2d.hpp

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <cassert>
88
#include <cstddef>
99
#include <optional>
10+
#include <string>
1011

1112
#include <ddc/ddc.hpp>
1213

@@ -179,8 +180,46 @@ class SplineBuilder2D
179180
private:
180181
builder_type1 m_spline_builder1;
181182
builder_type2 m_spline_builder2;
183+
std::string m_label;
182184

183185
public:
186+
/**
187+
* @brief Build a SplineBuilder2D acting on interpolation_domain.
188+
*
189+
* @param label A label used to tag parallel regions and memory allocations for profiling.
190+
*
191+
* @param interpolation_domain The domain on which the interpolation points are defined, without the batch dimensions.
192+
*
193+
* @param cols_per_chunk A parameter used by the slicer (internal to the solver) to define the size
194+
* of a chunk of right-hand-sides of the linear problem to be computed in parallel (chunks are treated
195+
* by the linear solver one-after-the-other).
196+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
197+
*
198+
* @param preconditioner_max_block_size A parameter used by the slicer (internal to the solver) to
199+
* define the size of a block used by the Block-Jacobi preconditioner.
200+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
201+
*
202+
* @see SplinesLinearProblemSparse
203+
*/
204+
explicit SplineBuilder2D(
205+
std::string const& label,
206+
interpolation_domain_type const& interpolation_domain,
207+
std::optional<std::size_t> cols_per_chunk = std::nullopt,
208+
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
209+
: m_spline_builder1(
210+
label,
211+
interpolation_domain,
212+
cols_per_chunk,
213+
preconditioner_max_block_size)
214+
, m_spline_builder2(
215+
label,
216+
interpolation_domain,
217+
cols_per_chunk,
218+
preconditioner_max_block_size)
219+
, m_label(label)
220+
{
221+
}
222+
184223
/**
185224
* @brief Build a SplineBuilder2D acting on interpolation_domain.
186225
*
@@ -201,8 +240,43 @@ class SplineBuilder2D
201240
interpolation_domain_type const& interpolation_domain,
202241
std::optional<std::size_t> cols_per_chunk = std::nullopt,
203242
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
204-
: m_spline_builder1(interpolation_domain, cols_per_chunk, preconditioner_max_block_size)
205-
, m_spline_builder2(interpolation_domain, cols_per_chunk, preconditioner_max_block_size)
243+
: SplineBuilder2D(
244+
"no-label",
245+
interpolation_domain,
246+
cols_per_chunk,
247+
preconditioner_max_block_size)
248+
{
249+
}
250+
251+
/**
252+
* @brief Build a SplineBuilder2D acting on the interpolation domain contained in batched_interpolation_domain.
253+
*
254+
* @param label A label used to tag parallel regions and memory allocations for profiling.
255+
*
256+
* @param batched_interpolation_domain The domain on which the interpolation points are defined.
257+
*
258+
* @param cols_per_chunk A parameter used by the slicer (internal to the solver) to define the size
259+
* of a chunk of right-hand-sides of the linear problem to be computed in parallel (chunks are treated
260+
* by the linear solver one-after-the-other).
261+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
262+
*
263+
* @param preconditioner_max_block_size A parameter used by the slicer (internal to the solver) to
264+
* define the size of a block used by the Block-Jacobi preconditioner.
265+
* This value is optional. If no value is provided then the default value is chosen by the requested solver.
266+
*
267+
* @see SplinesLinearProblemSparse
268+
*/
269+
template <concepts::discrete_domain BatchedInterpolationDDom>
270+
explicit SplineBuilder2D(
271+
std::string const& label,
272+
BatchedInterpolationDDom const& batched_interpolation_domain,
273+
std::optional<std::size_t> cols_per_chunk = std::nullopt,
274+
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
275+
: SplineBuilder2D(
276+
label,
277+
interpolation_domain_type(batched_interpolation_domain),
278+
cols_per_chunk,
279+
preconditioner_max_block_size)
206280
{
207281
}
208282

@@ -228,6 +302,7 @@ class SplineBuilder2D
228302
std::optional<std::size_t> cols_per_chunk = std::nullopt,
229303
std::optional<unsigned int> preconditioner_max_block_size = std::nullopt)
230304
: SplineBuilder2D(
305+
"no-label",
231306
interpolation_domain_type(batched_interpolation_domain),
232307
cols_per_chunk,
233308
preconditioner_max_block_size)
@@ -530,6 +605,7 @@ operator()(
530605
ddc::DiscreteVector<deriv_type2>(bsplines_type2::degree() / 2)));
531606

532607
ddc::Chunk spline1_deriv_min_alloc(
608+
m_label + " > spline1_deriv_min (ddc::SplineBuilder2D::operator())",
533609
m_spline_builder1.batched_spline_domain(batched_interpolation_deriv_domain),
534610
ddc::KokkosAllocator<double, MemorySpace>());
535611
auto spline1_deriv_min = spline1_deriv_min_alloc.span_view();
@@ -546,6 +622,7 @@ operator()(
546622

547623
// Spline1-approximate vals (to spline1)
548624
ddc::Chunk spline1_alloc(
625+
m_label + " > spline1 (ddc::SplineBuilder2D::operator())",
549626
m_spline_builder1.batched_spline_domain(batched_interpolation_domain),
550627
ddc::KokkosAllocator<double, MemorySpace>());
551628
ddc::ChunkSpan const spline1 = spline1_alloc.span_view();
@@ -554,6 +631,7 @@ operator()(
554631

555632
// Spline1-approximate derivs_max2 (to spline1_deriv_max)
556633
ddc::Chunk spline1_deriv_max_alloc(
634+
m_label + " > spline1_deriv_max (ddc::SplineBuilder2D::operator())",
557635
m_spline_builder1.batched_spline_domain(batched_interpolation_deriv_domain),
558636
ddc::KokkosAllocator<double, MemorySpace>());
559637
auto spline1_deriv_max = spline1_deriv_max_alloc.span_view();

0 commit comments

Comments
 (0)