diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 3b6148f9e9..1d09bfc587 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -30,6 +30,8 @@ using mdspan2_t = md::mdspan>; /// @brief Execute kernel over cells and accumulate result in a matrix. /// /// @tparam T Matrix/form scalar type. +/// @tparam LiftingMode If set true, only execute mat_set on cells with BCs in +/// column space. /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. @@ -60,7 +62,7 @@ using mdspan2_t = md::mdspan>; /// function mesh. /// @param cell_info1 Cell permutation information for the trial /// function mesh. -template +template void assemble_cells_matrix( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -101,6 +103,29 @@ void assemble_cells_matrix( std::int32_t cell0 = cells0[c]; std::int32_t cell1 = cells1[c]; + std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); + std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); + + // In "LiftingMode" only execute kernel if there are BCs on column space + if constexpr (LiftingMode) + { + auto has_bc = [&]() + { + for (std::int32_t dof : dofs1) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dof + k]) + return true; + } + } + return false; + }; + + if (!has_bc()) + continue; + } + // Get cell coordinates/geometry auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) @@ -115,38 +140,38 @@ void assemble_cells_matrix( P0(Ae, cell_info0, cell0, ndim1); // B = P0 \tilde{A} P1T(Ae, cell_info1, cell1, ndim0); // A = B P1_T - // Zero rows/columns for essential bcs - std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); - std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); - - if (!bc0.empty()) + // Do not clear BC rows/cols in "LiftingMode" + if constexpr (!LiftingMode) { - for (int i = 0; i < num_dofs0; ++i) + // Zero rows and columns for BCs + if (!bc0.empty()) { - for (int k = 0; k < bs0; ++k) + for (int i = 0; i < num_dofs0; ++i) { - if (bc0[bs0 * dofs0[i] + k]) + for (int k = 0; k < bs0; ++k) { - // Zero row bs0 * i + k - const int row = bs0 * i + k; - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + if (bc0[bs0 * dofs0[i] + k]) + { + // Zero row bs0 * i + k + const int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + } } } } - } - - if (!bc1.empty()) - { - for (int j = 0; j < num_dofs1; ++j) + if (!bc1.empty()) { - for (int k = 0; k < bs1; ++k) + for (int j = 0; j < num_dofs1; ++j) { - if (bc1[bs1 * dofs1[j] + k]) + for (int k = 0; k < bs1; ++k) { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; + if (bc1[bs1 * dofs1[j] + k]) + { + // Zero col bs1 * j + k + const int col = bs1 * j + k; + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; + } } } } @@ -168,6 +193,8 @@ void assemble_cells_matrix( /// from cell used to define the entity. /// /// @tparam T Matrix/form scalar type. +/// @tparam LiftingMode If set true, only execute mat_set on cells with BCs in +/// column space. /// @param[in] mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. @@ -198,7 +225,7 @@ void assemble_cells_matrix( /// function mesh. /// @param[in] perms Entity permutation integer. Empty if entity /// permutations are not required. -template +template void assemble_entities( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -236,6 +263,7 @@ void assemble_entities( const int ndim0 = bs0 * num_dofs0; const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); + assert(entities0.size() == entities.size()); assert(entities1.size() == entities.size()); for (std::size_t f = 0; f < entities.extent(0); ++f) @@ -248,6 +276,28 @@ void assemble_entities( std::int32_t cell0 = entities0(f, 0); std::int32_t cell1 = entities1(f, 0); + std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); + std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); + + // Check for BCs on column space + if constexpr (LiftingMode) + { + auto has_bc = [&]() + { + for (std::int32_t dof : dofs1) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dof + k]) + return true; + } + } + return false; + }; + if (!has_bc()) + continue; + } + // Get cell coordinates/geometry auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) @@ -263,36 +313,38 @@ void assemble_entities( P0(Ae, cell_info0, cell0, ndim1); P1T(Ae, cell_info1, cell1, ndim0); - // Zero rows/columns for essential bcs - std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); - std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); - if (!bc0.empty()) + // Don't clear rows/cols in LiftingMode + if constexpr (!LiftingMode) { - for (int i = 0; i < num_dofs0; ++i) + // Zero rows and columns for BCs + if (!bc0.empty()) { - for (int k = 0; k < bs0; ++k) + for (int i = 0; i < num_dofs0; ++i) { - if (bc0[bs0 * dofs0[i] + k]) + for (int k = 0; k < bs0; ++k) { - // Zero row bs0 * i + k - const int row = bs0 * i + k; - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + if (bc0[bs0 * dofs0[i] + k]) + { + // Zero row bs0 * i + k + const int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + } } } } - } - if (!bc1.empty()) - { - for (int j = 0; j < num_dofs1; ++j) + if (!bc1.empty()) { - for (int k = 0; k < bs1; ++k) + for (int j = 0; j < num_dofs1; ++j) { - if (bc1[bs1 * dofs1[j] + k]) + for (int k = 0; k < bs1; ++k) { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; + if (bc1[bs1 * dofs1[j] + k]) + { + // Zero column bs1 * j + k + const int col = bs1 * j + k; + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; + } } } } @@ -306,6 +358,8 @@ void assemble_entities( /// a matrix. /// /// @tparam T Matrix/form scalar type. +/// @tparam LiftingMode If set true, only execute mat_set on cells with BCs in +/// column space. /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. @@ -338,7 +392,7 @@ void assemble_entities( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_interior_facets( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -384,7 +438,7 @@ void assemble_interior_facets( const int num_cols = bs1 * 2 * dmap1_size; // Temporaries for joint dofmaps - std::vector Ae(num_rows * num_cols), be(num_rows); + std::vector Ae(num_rows * num_cols); std::vector dmapjoint0(2 * dmap0_size); std::vector dmapjoint1(2 * dmap1_size); assert(facets0.size() == facets.size()); @@ -433,6 +487,26 @@ void assemble_interior_facets( std::ranges::copy(dmap1_cell0, dmapjoint1.begin()); std::ranges::copy(dmap1_cell1, std::next(dmapjoint1.begin(), dmap1_size)); + // Check for BCs on column space + if constexpr (LiftingMode) + { + auto has_bc = [&]() + { + for (std::int32_t dof : dmapjoint1) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dof + k]) + return true; + } + } + return false; + }; + + if (!has_bc()) + continue; + } + // Tabulate tensor std::ranges::fill(Ae, 0); std::array perm = perms.empty() @@ -474,33 +548,37 @@ void assemble_interior_facets( } } - // Zero rows/columns for essential bcs - if (!bc0.empty()) + // Don't clear rows/cols in LiftingMode + if constexpr (!LiftingMode) { - for (std::size_t i = 0; i < dmapjoint0.size(); ++i) + // Zero rows and columns for BCs + if (!bc0.empty()) { - for (int k = 0; k < bs0; ++k) + for (std::size_t i = 0; i < dmapjoint0.size(); ++i) { - if (bc0[bs0 * dmapjoint0[i] + k]) + for (int k = 0; k < bs0; ++k) { - // Zero row bs0 * i + k - std::fill_n(std::next(Ae.begin(), num_cols * (bs0 * i + k)), - num_cols, 0); + if (bc0[bs0 * dmapjoint0[i] + k]) + { + // Zero row bs0 * i + k + int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), num_cols * row), num_cols, 0); + } } } } - } - if (!bc1.empty()) - { - for (std::size_t j = 0; j < dmapjoint1.size(); ++j) + if (!bc1.empty()) { - for (int k = 0; k < bs1; ++k) + for (std::size_t j = 0; j < dmapjoint1.size(); ++j) { - if (bc1[bs1 * dmapjoint1[j] + k]) + for (int k = 0; k < bs1; ++k) { - // Zero column bs1 * j + k - for (int m = 0; m < num_rows; ++m) - Ae[m * num_cols + bs1 * j + k] = 0; + if (bc1[bs1 * dmapjoint1[j] + k]) + { + // Zero column bs1 * j + k + for (int m = 0; m < num_rows; ++m) + Ae[m * num_cols + bs1 * j + k] = 0; + } } } } @@ -518,6 +596,8 @@ void assemble_interior_facets( /// /// @tparam T Scalar type. /// @tparam U Geometry type. +/// @tparam LiftingMode. Set to true to call the kernel only on cells with BCs +/// in bc1. /// @param[in] mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] a Bilinear form to assemble. @@ -528,7 +608,7 @@ void assemble_interior_facets( /// applied. /// @param bc1 Marker for columns with Dirichlet boundary conditions /// applied. -template +template void assemble_matrix( la::MatSet auto mat_set, const Form& a, md::mdspan, @@ -606,7 +686,7 @@ void assemble_matrix( std::span cells1 = a.domain_arg(IntegralType::cell, 1, i, cell_type_idx); auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); assert(cells.size() * cstride == coeffs.size()); - impl::assemble_cells_matrix( + impl::assemble_cells_matrix( mat_set, x_dofmap, x, cells, {dofs0, bs0, cells0}, P0, {dofs1, bs1, cells1}, P1T, bc0, bc1, fn, md::mdspan(coeffs.data(), cells.size(), cstride), constants, @@ -651,7 +731,7 @@ void assemble_matrix( std::span facets0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); std::span facets1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); assert((facets.size() / 4) * 2 * cstride == coeffs.size()); - impl::assemble_interior_facets( + impl::assemble_interior_facets( mat_set, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap0, bs0, @@ -696,7 +776,7 @@ void assemble_matrix( std::span e1 = a.domain_arg(itg_type, 1, i, 0); mdspanx2_t entities1(e1.data(), e1.size() / 2, 2); assert((entities.size() / 2) * cstride == coeffs.size()); - impl::assemble_entities( + impl::assemble_entities( mat_set, x_dofmap, x, entities, {dofs0, bs0, entities0}, P0, {dofs1, bs1, entities1}, P1T, bc0, bc1, fn, md::mdspan(coeffs.data(), entities.extent(0), cstride), constants, diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index db9f3a27b7..b045bd58fd 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -37,604 +37,6 @@ namespace dolfinx::fem::impl using mdspan2_t = md::mdspan>; /// @endcond -/// @brief Apply boundary condition lifting for cell integrals. -/// -/// @tparam T The scalar type. -/// @tparam _bs0 The block size of the form test function dof map. If -/// less than zero the block size is determined at runtime. If `_bs0` is -/// positive the block size is used as a compile-time constant, which -/// has performance benefits. -/// @tparam _bs1 The block size of the trial function dof map. -/// @param[in,out] b Vector to modify. -/// @param x_dofmap Dofmap for the mesh geometry. -/// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each cell. -/// @param[in] cells Cell indices to execute the kernel over. These are -/// the indices into the geometry dofmap `x_dofmap`. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. -/// @param[in] P0 Function that applies transformation `P_0 A` in-place -/// to the computed tensor `A` to transform its test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. -/// @param[in] P1T Function that applies transformation `A P_1^T` -/// in-place to to the computed tensor `A` to transform trial -/// degrees-of-freedom. -/// @param[in] constants Constant data in the kernel. -/// @param[in] coeffs Coefficient data in the kernel. It has shape -/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th -/// coefficient for cell `i`. -/// @param[in] cell_info0 Cell permutation information for the test -/// function mesh. -/// @param[in] cell_info1 Cell permutation information for the trial -/// function mesh. -/// @param[in] bc_values1 Value for entries with an applied boundary -/// condition. -/// @param[in] bc_markers1 Marker to identify which DOFs have boundary -/// conditions applied. -/// @param[in] x0 Vector used in the lifting. -/// @param[in] alpha Scaling to apply. -template ::value_type> - requires std::is_same_v::value_type, T> -void _lift_bc_cells( - V&& b, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - FEkernel auto kernel, std::span cells, - std::tuple> dofmap0, - fem::DofTransformKernel auto P0, - std::tuple> dofmap1, - fem::DofTransformKernel auto P1T, std::span constants, - md::mdspan> coeffs, - std::span cell_info0, - std::span cell_info1, std::span bc_values1, - std::span bc_markers1, std::span x0, T alpha) -{ - if (cells.empty()) - return; - - const auto [dmap0, bs0, cells0] = dofmap0; - const auto [dmap1, bs1, cells1] = dofmap1; - assert(_bs0 < 0 or _bs0 == bs0); - assert(_bs1 < 0 or _bs1 == bs1); - - const int num_rows = bs0 * dmap0.extent(1); - const int num_cols = bs1 * dmap1.extent(1); - - // Data structures used in bc application - std::vector> cdofs(3 * x_dofmap.extent(1)); - std::vector Ae(num_rows * num_cols), be(num_rows); - assert(cells0.size() == cells.size()); - assert(cells1.size() == cells.size()); - for (std::size_t index = 0; index < cells.size(); ++index) - { - // Cell index in integration domain mesh, test function mesh, and trial - // function mesh - std::int32_t c = cells[index]; - std::int32_t c0 = cells0[index]; - std::int32_t c1 = cells1[index]; - - // Get dof maps for cell - auto dofs1 = md::submdspan(dmap1, c1, md::full_extent); - - // Check if bc is applied to cell - bool has_bc = false; - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - assert(_bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[_bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - else - { - for (int k = 0; k < bs1; ++k) - { - assert(bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - } - - if (!has_bc) - continue; - - // Get cell coordinates/geometry - auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); - - // Size data structure for assembly - auto dofs0 = md::submdspan(dmap0, c0, md::full_extent); - - std::ranges::fill(Ae, 0); - kernel(Ae.data(), &coeffs(index, 0), constants.data(), cdofs.data(), - nullptr, nullptr, nullptr); - P0(Ae, cell_info0, c0, num_cols); - P1T(Ae, cell_info1, c1, num_rows); - - // Size data structure for assembly - std::ranges::fill(be, 0); - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - const std::int32_t jj = _bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // const T _x0 = 0; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + _bs1 * j + k] * alpha * (bc - _x0); - } - } - } - else - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - } - - for (std::size_t i = 0; i < dofs0.size(); ++i) - { - if constexpr (_bs0 > 0) - { - for (int k = 0; k < _bs0; ++k) - b[_bs0 * dofs0[i] + k] += be[_bs0 * i + k]; - } - else - { - for (int k = 0; k < bs0; ++k) - b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; - } - } - } -} - -/// @brief Apply lifting for exterior facet integrals. -/// -/// @tparam T Scalar type. -/// @param[in,out] b Vector to modify. -/// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. -/// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each entity. -/// @param[in] entities Entities to execute the kernel over, where for the -/// `i`th entity `enities(i, 0)` is the attached cell and `entities(i, 1)` -/// is the local index of the facet relative to the cell. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap -/// indices. See `facets` documentation for the dofmap indices layout. -/// @param[in] P0 Function that applies the transformation `P_0 A` -/// in-place to `A` to transform the test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data. -/// See `dofmap0` for a description. -/// @param[in] P1T Function that applies the transformation `A P_1^T` -/// in-place to `A` to transform the trial degrees-of-freedom. -/// @param[in] constants Constant coefficient data in the kernel. -/// @param[in] coeffs Coefficient data in the kernel. It has shape -/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th -/// coefficient for cell `i`. -/// @param[in] cell_info0 Cell permutation information for the test -/// function mesh. -/// @param[in] cell_info1 Cell permutation information for the trial -/// function mesh. -/// @param[in] bc_values1 Values for entries with an applied boundary -/// condition. -/// @param[in] bc_markers1 Marker to identify which DOFs have boundary -/// conditions applied. -/// @param[in] x0 The vector used in the lifting. -/// @param[in] alpha Scaling to apply. -/// @param[in] perms Facet permutation data, where `(cell_idx, -/// local_facet_idx)` is the permutation value for the facet attached to -/// the cell `cell_idx` with local index `local_facet_idx` relative to -/// the cell. Empty if facet permutations are not required. -template ::value_type> - requires std::is_same_v::value_type, T> -void _lift_bc_entities( - V&& b, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - FEkernel auto kernel, - md::mdspan> - entities, - std::tuple>> - dofmap0, - fem::DofTransformKernel auto P0, - std::tuple>> - dofmap1, - fem::DofTransformKernel auto P1T, std::span constants, - md::mdspan> coeffs, - std::span cell_info0, - std::span cell_info1, std::span bc_values1, - std::span bc_markers1, std::span x0, T alpha, - md::mdspan> perms) -{ - if (entities.empty()) - return; - - const auto [dmap0, bs0, entities0] = dofmap0; - const auto [dmap1, bs1, entities1] = dofmap1; - - const int num_rows = bs0 * dmap0.extent(1); - const int num_cols = bs1 * dmap1.extent(1); - - // Data structures used in bc application - std::vector> cdofs(3 * x_dofmap.extent(1)); - std::vector Ae(num_rows * num_cols), be(num_rows); - assert(entities0.size() == entities.size()); - assert(entities1.size() == entities.size()); - for (std::size_t index = 0; index < entities.extent(0); ++index) - { - // Cell in integration domain, test function and trial function - // meshes - std::int32_t cell = entities(index, 0); - std::int32_t cell0 = entities0(index, 0); - std::int32_t cell1 = entities1(index, 0); - - // Local entity index - std::int32_t local_entity = entities(index, 1); - - // Get dof maps for cell - auto dofs1 = md::submdspan(dmap1, cell1, md::full_extent); - - // Check if bc is applied to cell - bool has_bc = false; - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - if (bc_markers1[bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - - if (!has_bc) - continue; - - // Get cell coordinates/geometry - auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); - - // Size data structure for assembly - auto dofs0 = md::submdspan(dmap0, cell0, md::full_extent); - - // Permutations - std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_entity); - std::ranges::fill(Ae, 0); - kernel(Ae.data(), &coeffs(index, 0), constants.data(), cdofs.data(), - &local_entity, &perm, nullptr); - P0(Ae, cell_info0, cell0, num_cols); - P1T(Ae, cell_info1, cell1, num_rows); - - // Size data structure for assembly - std::ranges::fill(be, 0); - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dofs1[j] + k; - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - - for (std::size_t i = 0; i < dofs0.size(); ++i) - for (int k = 0; k < bs0; ++k) - b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; - } -} - -/// @brief Apply lifting for interior facet integrals. -/// -/// @tparam T Scalar type. -/// @param[in,out] b Vector to modify -/// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. -/// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each facet. -/// @param[in] facets Facets to execute the kernel over, where for the -/// `i`th facet `facets(i, 0, 0)` is the first attached cell and -/// `facets(i, 0, 1)` is the local index of the facet relative to the -/// first cell, and `facets(i, 1, 0)` is the second first attached cell -/// and `facets(i, 1, 1)` is the local index of the facet relative to -/// the second cell. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. See `facets` documentation for the dofmap indices layout. -/// Cells that don't exist in the test function domain should be -/// marked with -1 in the cell indices list. -/// @param[in] P0 Function that applies the transformation `P_0 A` -/// in-place to `A` to transform the test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data. -/// See `dofmap0` for a description. -/// @param[in] P1T Function that applies the transformation `A P_1^T` -/// in-place to `A` to transform the trial degrees-of-freedom. -/// @param[in] constants Constant coefficient data in the kernel. -/// @param[in] coeffs Coefficient data in the kernel. It has shape -/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th -/// coefficient for cell `i`. -/// @param[in] cell_info0 Cell permutation information for the test -/// function mesh. -/// @param[in] cell_info1 Cell permutation information for the trial -/// function mesh. -/// @param[in] bc_values1 Values for entries with an applied boundary -/// condition. -/// @param[in] bc_markers1 Marker to identify which DOFs have boundary -/// conditions applied. -/// @param[in] x0 Vector used in the lifting. -/// @param[in] alpha Scaling to apply -/// @param[in] perms Facet permutation data, where `(cell_idx, -/// local_facet_idx)` is the permutation value for the facet attached to -/// the cell `cell_idx` with local index `local_facet_idx` relative to -/// the cell. Empty if facet permutations are not required. -template ::value_type> - requires std::is_same_v::value_type, T> -void _lift_bc_interior_facets( - V&& b, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - FEkernel auto kernel, - md::mdspan> - facets, - std::tuple>> - dofmap0, - fem::DofTransformKernel auto P0, - std::tuple>> - dofmap1, - fem::DofTransformKernel auto P1T, std::span constants, - md::mdspan> - coeffs, - std::span cell_info0, - std::span cell_info1, std::span bc_values1, - std::span bc_markers1, std::span x0, T alpha, - md::mdspan> perms) -{ - if (facets.empty()) - return; - - const auto [dmap0, bs0, facets0] = dofmap0; - const auto [dmap1, bs1, facets1] = dofmap1; - - const int num_dofs0 = dmap0.extent(1); - const int num_dofs1 = dmap1.extent(1); - const int num_rows = bs0 * 2 * num_dofs0; - const int num_cols = bs1 * 2 * num_dofs1; - - // Data structures used in assembly - using X = scalar_value_t; - std::vector cdofs(2 * x_dofmap.extent(1) * 3); - std::span cdofs0(cdofs.data(), x_dofmap.extent(1) * 3); - std::span cdofs1(cdofs.data() + x_dofmap.extent(1) * 3, - x_dofmap.extent(1) * 3); - std::vector Ae(num_rows * num_cols), be(num_rows); - - // Temporaries for joint dofmaps - std::vector dmapjoint0(2 * num_dofs0); - std::vector dmapjoint1(2 * num_dofs1); - - assert(facets0.size() == facets.size()); - assert(facets1.size() == facets.size()); - for (std::size_t f = 0; f < facets.extent(0); ++f) - { - // Cells in integration domain, test function domain and trial - // function domain meshes - std::array cells{facets(f, 0, 0), facets(f, 1, 0)}; - std::array cells0{facets0(f, 0, 0), facets0(f, 1, 0)}; - std::array cells1{facets1(f, 0, 0), facets1(f, 1, 0)}; - - // Local facet indices - std::array local_facet = {facets(f, 0, 1), facets(f, 1, 1)}; - - // Get cell geometry - auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); - for (std::size_t i = 0; i < x_dofs0.size(); ++i) - std::copy_n(&x(x_dofs0[i], 0), 3, std::next(cdofs0.begin(), 3 * i)); - auto x_dofs1 = md::submdspan(x_dofmap, cells[1], md::full_extent); - for (std::size_t i = 0; i < x_dofs1.size(); ++i) - std::copy_n(&x(x_dofs1[i], 0), 3, std::next(cdofs1.begin(), 3 * i)); - - // Get dof maps for cells and pack - // When integrating over interfaces between two domains, the test function - // might only be defined on one side, so we check which cells exist in the - // test function domain - std::span dmap0_cell0 - = cells0[0] >= 0 - ? std::span(dmap0.data_handle() + cells0[0] * num_dofs0, - num_dofs0) - : std::span(); - std::span dmap0_cell1 - = cells0[1] >= 0 - ? std::span(dmap0.data_handle() + cells0[1] * num_dofs0, - num_dofs0) - : std::span(); - - std::ranges::copy(dmap0_cell0, dmapjoint0.begin()); - std::ranges::copy(dmap0_cell1, std::next(dmapjoint0.begin(), num_dofs0)); - - // Check which cells exist in the trial function domain - std::span dmap1_cell0 - = cells1[0] >= 0 - ? std::span(dmap1.data_handle() + cells1[0] * num_dofs1, - num_dofs1) - : std::span(); - std::span dmap1_cell1 - = cells1[1] >= 0 - ? std::span(dmap1.data_handle() + cells1[1] * num_dofs1, - num_dofs1) - : std::span(); - - std::ranges::copy(dmap1_cell0, dmapjoint1.begin()); - std::ranges::copy(dmap1_cell1, std::next(dmapjoint1.begin(), num_dofs1)); - - // Check if bc is applied to cell0 - bool has_bc = false; - for (std::size_t j = 0; j < dmap1_cell0.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - if (bc_markers1[bs1 * dmap1_cell0[j] + k]) - { - has_bc = true; - break; - } - } - } - - // Check if bc is applied to cell1 - for (std::size_t j = 0; j < dmap1_cell1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - if (bc_markers1[bs1 * dmap1_cell1[j] + k]) - { - has_bc = true; - break; - } - } - } - - if (!has_bc) - continue; - - // Tabulate tensor - std::ranges::fill(Ae, 0); - std::array perm = perms.empty() - ? std::array{0, 0} - : std::array{perms(cells[0], local_facet[0]), - perms(cells[1], local_facet[1])}; - kernel(Ae.data(), &coeffs(f, 0, 0), constants.data(), cdofs.data(), - local_facet.data(), perm.data(), nullptr); - - if (cells0[0] >= 0) - P0(Ae, cell_info0, cells0[0], num_cols); - if (cells0[1] >= 0) - { - std::span sub_Ae0(Ae.data() + bs0 * num_dofs0 * num_cols, - bs0 * num_dofs0 * num_cols); - P0(sub_Ae0, cell_info0, cells0[1], num_cols); - } - if (cells1[0] >= 0) - P1T(Ae, cell_info1, cells1[0], num_rows); - - if (cells1[1] >= 0) - { - for (int row = 0; row < num_rows; ++row) - { - // DOFs for dmap1 and cell1 are not stored contiguously in - // the block matrix, so each row needs a separate span access - std::span sub_Ae1(Ae.data() + row * num_cols + bs1 * num_dofs1, - bs1 * num_dofs1); - P1T(sub_Ae1, cell_info1, cells1[1], 1); - } - } - - std::ranges::fill(be, 0); - - // Compute b = b - A*b for cell0 - for (std::size_t j = 0; j < dmap1_cell0.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dmap1_cell0[j] + k; - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - - // Compute b = b - A*b for cell1 - const int offset = bs1 * num_dofs1; - for (std::size_t j = 0; j < dmap1_cell1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dmap1_cell1[j] + k; - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(offset + bs1 * j + k) * alpha * (bc - x0[jj]); - for (int m = 0; m < num_rows; ++m) - { - be[m] - -= Ae[m * num_cols + offset + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - } - - for (std::size_t i = 0; i < dmap0_cell0.size(); ++i) - for (int k = 0; k < bs0; ++k) - b[bs0 * dmap0_cell0[i] + k] += be[bs0 * i + k]; - - const int offset_be = bs0 * num_dofs0; - for (std::size_t i = 0; i < dmap0_cell1.size(); ++i) - for (int k = 0; k < bs0; ++k) - b[bs0 * dmap0_cell1[i] + k] += be[offset_be + bs0 * i + k]; - } -} - /// @brief Execute kernel over cells and accumulate result in vector. /// /// @tparam T Scalar type @@ -950,14 +352,80 @@ void assemble_interior_facets( } } +/// @brief Apply boundary condition lifting using the matrix assembler. +/// +/// @tparam T Scalar type. +/// @tparam U Geometry scalar type. +/// @tparam BS0 Compile-time block size for test function dofmap. Use -1 +/// for runtime block size. +/// @tparam BS1 Compile-time block size for trial function dofmap. Use -1 +/// for runtime block size. +/// @param[in,out] b Vector to modify. +/// @param[in] a The bilinear form. +/// @param[in] constants Constants that appear in `a`. +/// @param[in] coefficients Coefficients that appear in `a`. +/// @param[in] bc_values1 The boundary condition values. +/// @param[in] bc_markers1 Marker to identify which DOFs have boundary +/// conditions applied. +/// @param[in] x0 Vector used in the lifting. +/// @param[in] alpha Scaling to apply. +template + requires std::is_same_v::value_type, T> +void lift_bc_impl( + V&& b, const Form& a, std::span constants, + const std::map, + std::pair, int>>& coefficients, + std::span bc_values1, std::span bc_markers1, + std::span x0, T alpha) +{ + // Deduce runtime block sizes as fallback when compile-time sizes not given + const int bs0 = BS0 > 0 ? BS0 : a.function_spaces()[0]->dofmap()->bs(); + const int bs1 = BS1 > 0 ? BS1 : a.function_spaces()[1]->dofmap()->bs(); + + // Use default [=] capture for bs0, bs1 which may be compile-time constants + auto lifting_fn + = [=, &b, &bc_values1, &bc_markers1, + &x0](std::span rows, + std::span cols, std::span Ae) + { + const std::size_t nc = cols.size() * bs1; + for (std::size_t i = 0; i < cols.size(); ++i) + { + for (int k = 0; k < bs1; ++k) + { + const std::int32_t ii = cols[i] * bs1 + k; + if (bc_markers1[ii]) + { + const T x_bc = bc_values1[ii]; + const T _x0 = x0.empty() ? 0 : x0[ii]; + for (std::size_t j = 0; j < rows.size(); ++j) + { + for (int m = 0; m < bs0; ++m) + { + const std::int32_t jj = rows[j] * bs0 + m; + b[jj] -= Ae[(j * bs0 + m) * nc + (i * bs1 + k)] * alpha + * (x_bc - _x0); + } + } + } + } + } + }; + + // Repurpose the assemble_matrix assembler to work on the vector b instead. + // Use LiftingMode=true so the kernel is only called on cells that have + // BC-constrained DOFs in the column space. + assemble_matrix(lifting_fn, a, constants, coefficients, {}, + bc_markers1); +} + /// Modify RHS vector to account for boundary condition such that: /// /// b <- b - alpha * A.(x_bc - x0) /// /// @param[in,out] b The vector to be modified /// @param[in] a The bilinear form that generates A -/// @param[in] x_dofmap Mesh geometry dofmap -/// @param[in] x Mesh coordinates /// @param[in] constants Constants that appear in `a` /// @param[in] coefficients Coefficients that appear in `a` /// @param[in] bc_values1 The boundary condition 'values' @@ -969,159 +437,36 @@ void assemble_interior_facets( template ::value_type> requires std::is_same_v::value_type, T> -void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - std::span constants, +void lift_bc(V&& b, const Form& a, std::span constants, const std::map, std::pair, int>>& coefficients, std::span bc_values1, std::span bc_markers1, std::span x0, T alpha) { - // Integration domain mesh - std::shared_ptr> mesh = a.mesh(); - assert(mesh); - - // Test function mesh - auto mesh0 = a.function_spaces().at(0)->mesh(); - assert(mesh0); - - // Trial function mesh - auto mesh1 = a.function_spaces().at(1)->mesh(); - assert(mesh1); // Get dofmap for columns and rows of a assert(a.function_spaces().at(0)); assert(a.function_spaces().at(1)); - auto dofmap0 = a.function_spaces()[0]->dofmap()->map(); const int bs0 = a.function_spaces()[0]->dofmap()->bs(); - auto element0 = a.function_spaces()[0]->element(); - auto dofmap1 = a.function_spaces()[1]->dofmap()->map(); const int bs1 = a.function_spaces()[1]->dofmap()->bs(); - auto element1 = a.function_spaces()[1]->element(); - assert(element0); - - std::span cell_info0; - std::span cell_info1; - // TODO: Check for each element instead - if (element0->needs_dof_transformations() - or element1->needs_dof_transformations() or a.needs_facet_permutations()) - { - mesh0->topology_mutable()->create_entity_permutations(); - mesh1->topology_mutable()->create_entity_permutations(); - cell_info0 = std::span(mesh0->topology()->get_cell_permutation_info()); - cell_info1 = std::span(mesh1->topology()->get_cell_permutation_info()); - } - - fem::DofTransformKernel auto P0 - = element0->template dof_transformation_fn(doftransform::standard); - fem::DofTransformKernel auto P1T - = element1->template dof_transformation_right_fn( - doftransform::transpose); - for (int i = 0; i < a.num_integrals(IntegralType::cell, 0); ++i) - { - auto kernel = a.kernel(IntegralType::cell, i, 0); - assert(kernel); - auto& [_coeffs, cstride] = coefficients.at({IntegralType::cell, i}); - std::span cells = a.domain(IntegralType::cell, i, 0); - std::span cells0 = a.domain_arg(IntegralType::cell, 0, i, 0); - std::span cells1 = a.domain_arg(IntegralType::cell, 1, i, 0); - assert(_coeffs.size() == cells.size() * cstride); - auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); - if (bs0 == 1 and bs1 == 1) - { - _lift_bc_cells<1, 1>(b, x_dofmap, x, kernel, cells, - {dofmap0, bs0, cells0}, P0, {dofmap1, bs1, cells1}, - P1T, constants, coeffs, cell_info0, cell_info1, - bc_values1, bc_markers1, x0, alpha); - } - else if (bs0 == 3 and bs1 == 3) - { - _lift_bc_cells<3, 3>(b, x_dofmap, x, kernel, cells, - {dofmap0, bs0, cells0}, P0, {dofmap1, bs1, cells1}, - P1T, constants, coeffs, cell_info0, cell_info1, - bc_values1, bc_markers1, x0, alpha); - } - else - { - _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); - } - } + spdlog::debug("lifting: bs0={}, bs1={}", bs0, bs1); - md::mdspan> facet_perms; - if (a.needs_facet_permutations()) + if (bs0 == 1 && bs1 == 1) { - mesh::CellType cell_type = mesh->topology()->cell_type(); - int num_facets_per_cell - = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); - mesh->topology_mutable()->create_entity_permutations(); - const std::vector& p - = mesh->topology()->get_facet_permutations(); - facet_perms = md::mdspan(p.data(), p.size() / num_facets_per_cell, - num_facets_per_cell); + lift_bc_impl(std::forward(b), a, constants, coefficients, + bc_values1, bc_markers1, x0, alpha); } - - for (int i = 0; i < a.num_integrals(IntegralType::interior_facet, 0); ++i) + else if (bs0 == 3 && bs1 == 3) { - auto kernel = a.kernel(IntegralType::interior_facet, i, 0); - assert(kernel); - auto& [coeffs, cstride] - = coefficients.at({IntegralType::interior_facet, i}); - - using mdspanx22_t - = md::mdspan>; - using mdspanx2x_t - = md::mdspan>; - std::span f = a.domain(IntegralType::interior_facet, i, 0); - mdspanx22_t facets(f.data(), f.size() / 4, 2, 2); - std::span f0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); - mdspanx22_t facets0(f0.data(), f0.size() / 4, 2, 2); - std::span f1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); - mdspanx22_t facets1(f1.data(), f1.size() / 4, 2, 2); - _lift_bc_interior_facets( - b, x_dofmap, x, kernel, facets, {dofmap0, bs0, facets0}, P0, - {dofmap1, bs1, facets1}, P1T, constants, - mdspanx2x_t(coeffs.data(), facets.extent(0), 2, cstride), cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha, facet_perms); + lift_bc_impl(std::forward(b), a, constants, coefficients, + bc_values1, bc_markers1, x0, alpha); } - - for (auto itg_type : {fem::IntegralType::exterior_facet, - fem::IntegralType::vertex, fem::IntegralType::ridge}) + else { - md::mdspan> perms - = (itg_type == fem::IntegralType::exterior_facet) - ? facet_perms - : md::mdspan>{}; - - for (int i = 0; i < a.num_integrals(itg_type, 0); ++i) - { - auto kernel = a.kernel(itg_type, i, 0); - assert(kernel); - auto& [coeffs, cstride] = coefficients.at({itg_type, i}); - - using mdspanx2_t - = md::mdspan>; - std::span e = a.domain(itg_type, i, 0); - mdspanx2_t entities(e.data(), e.size() / 2, 2); - std::span e0 = a.domain_arg(itg_type, 0, i, 0); - mdspanx2_t entities0(e0.data(), e0.size() / 2, 2); - std::span e1 = a.domain_arg(itg_type, 1, i, 0); - mdspanx2_t entities1(e1.data(), e1.size() / 2, 2); - assert(coeffs.size() == entities.extent(0) * cstride); - _lift_bc_entities( - b, x_dofmap, x, kernel, entities, {dofmap0, bs0, entities0}, P0, - {dofmap1, bs1, entities1}, P1T, constants, - md::mdspan(coeffs.data(), entities.extent(0), cstride), cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha, perms); - } + lift_bc_impl(std::forward(b), a, constants, coefficients, + bc_values1, bc_markers1, x0, alpha); } } @@ -1176,16 +521,6 @@ void apply_lifting( std::vector bc_values1; if (a[j] and !bcs1[j].empty()) { - // Extract data from mesh - std::shared_ptr> mesh = a[j]->get().mesh(); - if (!mesh) - throw std::runtime_error("Unable to extract a mesh."); - mdspan2_t x_dofmap = mesh->geometry().dofmap(); - std::span _x = mesh->geometry().x(); - md::mdspan, - md::extents> - x(_x.data(), _x.size() / 3, 3); - assert(a[j]->get().function_spaces().at(0)); auto V1 = a[j]->get().function_spaces()[1]; assert(V1); @@ -1201,17 +536,12 @@ void apply_lifting( bc.get().set(bc_values1, std::nullopt, 1); } + std::span _x0; if (!x0.empty()) - { - lift_bc(b, a[j]->get(), x_dofmap, x, constants[j], coeffs[j], - std::span(bc_values1), bc_markers1, x0[j], alpha); - } - else - { - lift_bc(b, a[j]->get(), x_dofmap, x, constants[j], coeffs[j], - std::span(bc_values1), bc_markers1, - std::span(), alpha); - } + _x0 = x0[j]; + + lift_bc(b, a[j]->get(), constants[j], coeffs[j], + std::span(bc_values1), bc_markers1, _x0, alpha); } } } diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index 352196d502..77a8b6c7fe 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -337,6 +337,7 @@ void apply_lifting( if (std::ranges::all_of(a, [](auto ai) { return !ai; })) return; + common::Timer t("[Apply lifting]"); impl::apply_lifting(b, a, constants, coeffs, bcs1, x0, alpha); } @@ -415,6 +416,11 @@ void apply_lifting( /// @brief Assemble bilinear form into a matrix. Matrix must already be /// initialised. Does not zero or finalise the matrix. +/// @note This function can be used to insert kernels into different objects by +/// replacing the mat_add function appropriately. +/// @tparam T scalar type +/// @tparam U geometry scalar type +/// @tparam LiftingMode Set to true, if applying a lifting kernel in mat_add. /// @param[in] mat_add The function for adding values into the matrix. /// @param[in] a The bilinear form to assemble. /// @param[in] constants Constants that appear in `a`. @@ -425,7 +431,7 @@ void apply_lifting( /// @param[in] dof_marker1 Boundary condition markers for the columns. /// If bc[i] is true then rows i in A will be zeroed. The index i is a /// local index. -template +template void assemble_matrix( la::MatSet auto mat_add, const Form& a, std::span constants, @@ -435,6 +441,7 @@ void assemble_matrix( std::span dof_marker1) { + common::Timer t_assm("[Assemble Matrix]"); using mdspanx3_t = md::mdspan, md::extents>; @@ -444,14 +451,16 @@ void assemble_matrix( std::span x = mesh->geometry().x(); if constexpr (std::is_same_v>) { - impl::assemble_matrix(mat_add, a, mdspanx3_t(x.data(), x.size() / 3, 3), - constants, coefficients, dof_marker0, dof_marker1); + impl::assemble_matrix( + mat_add, a, mdspanx3_t(x.data(), x.size() / 3, 3), constants, + coefficients, dof_marker0, dof_marker1); } else { std::vector> _x(x.begin(), x.end()); - impl::assemble_matrix(mat_add, a, mdspanx3_t(_x.data(), _x.size() / 3, 3), - constants, coefficients, dof_marker0, dof_marker1); + impl::assemble_matrix( + mat_add, a, mdspanx3_t(_x.data(), _x.size() / 3, 3), constants, + coefficients, dof_marker0, dof_marker1); } }