From 27be17f8322a09579cfedf829faf4d897d01c3c8 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Tue, 17 Mar 2026 15:42:51 +0000 Subject: [PATCH 01/34] Generalise sub() --- cpp/dolfinx/fem/FunctionSpace.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cpp/dolfinx/fem/FunctionSpace.h b/cpp/dolfinx/fem/FunctionSpace.h index 201f846e0d9..65f6f682c7f 100644 --- a/cpp/dolfinx/fem/FunctionSpace.h +++ b/cpp/dolfinx/fem/FunctionSpace.h @@ -111,22 +111,27 @@ class FunctionSpace FunctionSpace sub(const std::vector& component) const { assert(_mesh); - assert(_elements.front()); - assert(_dofmaps.front()); + assert(_elements.size() > 0); + assert(_dofmaps.size() > 0); // Check that component is valid if (component.empty()) throw std::runtime_error("Component must be non-empty"); // Extract sub-element - auto element = this->_elements.front()->extract_sub_element(component); + std::vector>> + sub_elements; + for (auto e : _elements) + sub_elements.push_back(e->extract_sub_element(component)); // Extract sub dofmap - auto dofmap = std::make_shared( - _dofmaps.front()->extract_sub_dofmap(component)); + std::vector> sub_dofmaps; + for (auto d : _dofmaps) + sub_dofmaps.push_back( + std::make_shared(d->extract_sub_dofmap(component))); // Create new sub space - FunctionSpace sub_space(_mesh, element, dofmap); + FunctionSpace sub_space(_mesh, sub_elements, sub_dofmaps); // Set root space id and component w.r.t. root sub_space._root_space_id = _root_space_id; From 8c5d561a4d1f14a99d9125d30c2aad4f470ae9c9 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Wed, 18 Mar 2026 09:44:57 +0000 Subject: [PATCH 02/34] Work on collapse --- cpp/dolfinx/fem/Function.h | 11 +++++++---- cpp/dolfinx/fem/FunctionSpace.h | 19 +++++++++++++------ .../dolfinx/wrappers/dolfinx_wrappers/fem.h | 14 ++++++++++---- python/test/unit/fem/test_function_space.py | 4 ++-- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/cpp/dolfinx/fem/Function.h b/cpp/dolfinx/fem/Function.h index 1c998ac7f81..91bf8f5a716 100644 --- a/cpp/dolfinx/fem/Function.h +++ b/cpp/dolfinx/fem/Function.h @@ -128,11 +128,14 @@ class Function // Copy values into new vector std::span x_old = _x->array(); std::span x_new = x->array(); - for (std::size_t i = 0; i < map.size(); ++i) + for (auto mapj : map) { - assert(i < x_new.size()); - assert(static_cast(map[i]) < x_old.size()); - x_new[i] = x_old[map[i]]; + for (std::size_t i = 0; i < mapj.size(); ++i) + { + assert(i < x_new.size()); + assert(static_cast(mapj[i]) < x_old.size()); + x_new[i] = x_old[mapj[i]]; + } } return Function( diff --git a/cpp/dolfinx/fem/FunctionSpace.h b/cpp/dolfinx/fem/FunctionSpace.h index 65f6f682c7f..ec2b6ce46d2 100644 --- a/cpp/dolfinx/fem/FunctionSpace.h +++ b/cpp/dolfinx/fem/FunctionSpace.h @@ -170,18 +170,25 @@ class FunctionSpace /// Collapse a subspace and return a new function space and a map from /// new to old dofs /// @return The new function space and a map from new to old dofs - std::pair> collapse() const + std::pair>> + collapse() const { if (_component.empty()) throw std::runtime_error("Function space is not a subspace"); // Create collapsed DofMap - auto [_collapsed_dofmap, collapsed_dofs] - = _dofmaps.front()->collapse(_mesh->comm(), *_mesh->topology()); - auto collapsed_dofmap - = std::make_shared(std::move(_collapsed_dofmap)); + std::vector> collapsed_dofmaps; + std::vector> collapsed_dofs; + for (auto d : _dofmaps) + { + auto [_collapsed_dofmap, _collapsed_dofs] + = d->collapse(_mesh->comm(), *_mesh->topology()); + collapsed_dofmaps.push_back( + std::make_shared(std::move(_collapsed_dofmap))); + collapsed_dofs.push_back(_collapsed_dofs); + } - return {FunctionSpace(_mesh, _elements.front(), collapsed_dofmap), + return {FunctionSpace(_mesh, _elements, collapsed_dofmaps), std::move(collapsed_dofs)}; } diff --git a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h index 4b83cd26a1e..1aa00d55dd1 100644 --- a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h +++ b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h @@ -105,12 +105,18 @@ void declare_function_space(nb::module_& m, std::string type) .def("collapse", [](const dolfinx::fem::FunctionSpace& self) -> std::pair, - nanobind::ndarray> + std::vector>> { auto&& [collapsed_fs, dofs] = self.collapse(); - return {std::move(collapsed_fs), - dolfinx_wrappers::as_nbarray(std::move(dofs), - {dofs.size()})}; + std::vector> + collapsed_map; + for (auto d : dofs) + { + collapsed_map.push_back( + dolfinx_wrappers::as_nbarray(std::move(d), {d.size()})); + } + return {std::move(collapsed_fs), std::move(collapsed_map)}; }) .def("component", &dolfinx::fem::FunctionSpace::component) .def("contains", &dolfinx::fem::FunctionSpace::contains, diff --git a/python/test/unit/fem/test_function_space.py b/python/test/unit/fem/test_function_space.py index 8cc32cab789..bae0acd922f 100644 --- a/python/test/unit/fem/test_function_space.py +++ b/python/test/unit/fem/test_function_space.py @@ -177,7 +177,7 @@ def test_collapse(W, V): # Number of collapsed dofs in W numbering must agree with the number of dofs # of the collapsed space - assert Wi.dofmap.index_map.size_local + Wi.dofmap.index_map.num_ghosts == dofs.size + assert Wi.dofmap.index_map.size_local + Wi.dofmap.index_map.num_ghosts == dofs[0].size msh = W.mesh cell_imap = msh.topology.index_map(msh.topology.dim) @@ -188,7 +188,7 @@ def test_collapse(W, V): for i, dof in enumerate(cell_dofs): for k in range(bs): new_dof = Ws[k][0].dofmap.cell_dofs(c)[i] - new_to_old = Ws[k][1] + new_to_old = Ws[k][1][0] assert dof * bs + k == new_to_old[new_dof] f0 = Function(Ws[0][0]) From b6d58c4499a662b28d75f7dff254e8dde637adff Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Wed, 18 Mar 2026 09:45:49 +0000 Subject: [PATCH 03/34] Improve description --- cpp/dolfinx/fem/FunctionSpace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/FunctionSpace.h b/cpp/dolfinx/fem/FunctionSpace.h index ec2b6ce46d2..de506f38796 100644 --- a/cpp/dolfinx/fem/FunctionSpace.h +++ b/cpp/dolfinx/fem/FunctionSpace.h @@ -169,7 +169,8 @@ class FunctionSpace /// Collapse a subspace and return a new function space and a map from /// new to old dofs - /// @return The new function space and a map from new to old dofs + /// @return The new function space and a map from new to old dofs for each + /// cell type std::pair>> collapse() const { From df4b127050a54f3aa35ca29b9423bc7a0a5dc737 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Wed, 18 Mar 2026 15:17:55 +0000 Subject: [PATCH 04/34] Tabulate dof coordinates (mixed) --- cpp/dolfinx/fem/DirichletBC.h | 2 +- cpp/dolfinx/fem/FunctionSpace.h | 138 ++++++++++++++++---------------- 2 files changed, 72 insertions(+), 68 deletions(-) diff --git a/cpp/dolfinx/fem/DirichletBC.h b/cpp/dolfinx/fem/DirichletBC.h index 28b893c52af..03f8053f4e7 100644 --- a/cpp/dolfinx/fem/DirichletBC.h +++ b/cpp/dolfinx/fem/DirichletBC.h @@ -35,7 +35,7 @@ namespace dolfinx::fem /// facet/edge/vertex. /// /// @param[in] topology Mesh topology. -/// @param[in] dofmap Dofmap that associated DOFs with cells. +/// @param[in] dofmap Dofmap that associates DOFs with cells. /// @param[in] dim Topological dimension of mesh entities on which /// degrees-of-freedom will be located /// @param[in] entities Indices of mesh entities. All DOFs associated diff --git a/cpp/dolfinx/fem/FunctionSpace.h b/cpp/dolfinx/fem/FunctionSpace.h index de506f38796..e0b81a4db0c 100644 --- a/cpp/dolfinx/fem/FunctionSpace.h +++ b/cpp/dolfinx/fem/FunctionSpace.h @@ -254,91 +254,95 @@ class FunctionSpace = index_map_bs * (index_map->size_local() + index_map->num_ghosts()) / dofmap_bs; - // Get the dof coordinates on the reference element - if (!_elements.front()->interpolation_ident()) - { - throw std::runtime_error("Cannot evaluate dof coordinates - this element " - "does not have pointwise evaluation."); - } - const auto [X, Xshape] = _elements.front()->interpolation_points(); - - // Get coordinate map - const CoordinateElement& cmap = _mesh->geometry().cmap(); - - // Prepare cell geometry - auto x_dofmap = _mesh->geometry().dofmap(); - const std::size_t num_dofs_g = cmap.dim(); - std::span x_g = _mesh->geometry().x(); + std::size_t num_cell_types = _elements.size(); // Array to hold coordinates to return const std::size_t shape_c0 = transpose ? 3 : num_dofs; const std::size_t shape_c1 = transpose ? num_dofs : 3; std::vector coords(shape_c0 * shape_c1, 0); - using mdspan2_t = md::mdspan>; using cmdspan4_t = md::mdspan>; - - // Loop over cells and tabulate dofs + std::span x_g = _mesh->geometry().x(); std::vector x_b(scalar_dofs * gdim); mdspan2_t x(x_b.data(), scalar_dofs, gdim); - std::vector coordinate_dofs_b(num_dofs_g * gdim); - mdspan2_t coordinate_dofs(coordinate_dofs_b.data(), num_dofs_g, gdim); + for (std::size_t i = 0; i < num_cell_types; ++i) + { + // Get the dof coordinates on the reference element + if (!_elements[i]->interpolation_ident()) + { + throw std::runtime_error( + "Cannot evaluate dof coordinates - this element " + "does not have pointwise evaluation."); + } + const auto [X, Xshape] = _elements[i]->interpolation_points(); - auto map = _mesh->topology()->index_map(tdim); - assert(map); - const int num_cells = map->size_local() + map->num_ghosts(); + // Get coordinate map + const CoordinateElement& cmap + = _mesh->geometry().cmaps()[i]; - std::span cell_info; - if (_elements.front()->needs_dof_transformations()) - { - _mesh->topology_mutable()->create_entity_permutations(); - cell_info = std::span(_mesh->topology()->get_cell_permutation_info()); - } + // Prepare cell geometry + auto x_dofmap = _mesh->geometry().dofmap(i); + const std::size_t num_dofs_g = cmap.dim(); + std::vector coordinate_dofs_b(num_dofs_g * gdim); + mdspan2_t coordinate_dofs(coordinate_dofs_b.data(), num_dofs_g, gdim); - const std::array phi_shape - = cmap.tabulate_shape(0, Xshape[0]); - std::vector phi_b( - std::reduce(phi_shape.begin(), phi_shape.end(), 1, std::multiplies{})); - cmdspan4_t phi_full(phi_b.data(), phi_shape); - cmap.tabulate(0, X, Xshape, phi_b); - auto phi = md::submdspan(phi_full, 0, md::full_extent, md::full_extent, 0); - - // TODO: Check transform - // Basis function reference-to-conforming transformation function - auto apply_dof_transformation - = _elements.front()->template dof_transformation_fn( - doftransform::standard); - - for (int c = 0; c < num_cells; ++c) - { - // Extract cell geometry 'dofs' - auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - for (std::size_t j = 0; j < gdim; ++j) - coordinate_dofs(i, j) = x_g[3 * x_dofs[i] + j]; - - // Tabulate dof coordinates on cell - cmap.push_forward(x, coordinate_dofs, phi); - apply_dof_transformation( - x_b, std::span(cell_info.data(), cell_info.size()), c, x.extent(1)); - - // Get cell dofmap - auto dofs = _dofmaps.front()->cell_dofs(c); - - // Copy dof coordinates into vector - if (!transpose) + auto map = _mesh->topology()->index_maps(tdim)[i]; + assert(map); + const int num_cells = map->size_local() + map->num_ghosts(); + + std::span cell_info; + if (_elements[i]->needs_dof_transformations()) { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (std::size_t j = 0; j < gdim; ++j) - coords[dofs[i] * 3 + j] = x(i, j); + _mesh->topology_mutable()->create_entity_permutations(); + cell_info = std::span(_mesh->topology()->get_cell_permutation_info()); } - else + + const std::array phi_shape + = cmap.tabulate_shape(0, Xshape[0]); + std::vector phi_b(std::reduce( + phi_shape.begin(), phi_shape.end(), 1, std::multiplies{})); + cmdspan4_t phi_full(phi_b.data(), phi_shape); + cmap.tabulate(0, X, Xshape, phi_b); + auto phi + = md::submdspan(phi_full, 0, md::full_extent, md::full_extent, 0); + + // TODO: Check transform + // Basis function reference-to-conforming transformation function + auto apply_dof_transformation + = _elements[i]->template dof_transformation_fn( + doftransform::standard); + + for (int c = 0; c < num_cells; ++c) { - for (std::size_t i = 0; i < dofs.size(); ++i) + // Extract cell geometry 'dofs' + auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); + for (std::size_t i = 0; i < x_dofs.size(); ++i) for (std::size_t j = 0; j < gdim; ++j) - coords[j * num_dofs + dofs[i]] = x(i, j); + coordinate_dofs(i, j) = x_g[3 * x_dofs[i] + j]; + + // Tabulate dof coordinates on cell + cmap.push_forward(x, coordinate_dofs, phi); + apply_dof_transformation( + x_b, std::span(cell_info.data(), cell_info.size()), c, x.extent(1)); + + // Get cell dofmap + auto dofs = _dofmaps[i]->cell_dofs(c); + + // Copy dof coordinates into vector + if (!transpose) + { + for (std::size_t i = 0; i < dofs.size(); ++i) + for (std::size_t j = 0; j < gdim; ++j) + coords[dofs[i] * 3 + j] = x(i, j); + } + else + { + for (std::size_t i = 0; i < dofs.size(); ++i) + for (std::size_t j = 0; j < gdim; ++j) + coords[j * num_dofs + dofs[i]] = x(i, j); + } } } From 377407312797d969c60c6159772a423772d1da4d Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Wed, 18 Mar 2026 16:02:33 +0000 Subject: [PATCH 05/34] Work on DirichletBC --- cpp/dolfinx/fem/DirichletBC.h | 33 ++++++++++++++++++++++-------- python/demo/demo_mixed-topology.py | 19 +++++++++++++++++ 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/cpp/dolfinx/fem/DirichletBC.h b/cpp/dolfinx/fem/DirichletBC.h index 03f8053f4e7..62d1591c4c2 100644 --- a/cpp/dolfinx/fem/DirichletBC.h +++ b/cpp/dolfinx/fem/DirichletBC.h @@ -107,8 +107,8 @@ std::vector locate_dofs_geometrical(const FunctionSpace& V, // especially when we usually want the boundary dofs only. Add // interface that computes dofs coordinates only for specified cell. - assert(V.element()); - if (V.element()->is_mixed()) + assert(V.elements(0)); + if (V.elements(0)->is_mixed()) { throw std::runtime_error( "Cannot locate dofs geometrically for mixed space. Use subspaces."); @@ -329,13 +329,14 @@ class DirichletBC { assert(g); assert(V); - if (g->shape.size() != V->element()->value_shape().size()) + assert(V->elements(0)); + if (g->shape.size() != V->elements(0)->value_shape().size()) { throw std::runtime_error( "Rank mismatch between Constant and function space in DirichletBC"); } - if (g->value.size() != (std::size_t)_function_space->dofmap()->bs()) + if (g->value.size() != (std::size_t)_function_space->dofmaps(0)->bs()) { throw std::runtime_error( "Creating a DirichletBC using a Constant is not supported when the " @@ -343,17 +344,24 @@ class DirichletBC "(sub-)space. Use a fem::Function to create the fem::DirichletBC."); } - if (!V->element()->interpolation_ident()) + if (!V->elements(0)->interpolation_ident()) { throw std::runtime_error( "Constant can be used only with point-evaluation elements"); } // Unroll _dofs0 if dofmap block size > 1 - if (const int bs = V->dofmap()->bs(); bs > 1) + if (const int bs = V->dofmaps(0)->bs(); bs > 1) _dofs0 = unroll_dofs(_dofs0, bs); - _owned_indices0 = num_owned(*_function_space->dofmap(), _dofs0); + _owned_indices0 = 0; + for (std::size_t i = 0; + i < _function_space->mesh() + ->topology() + ->entity_types(_function_space->mesh()->topology()->dim()) + .size(); + ++i) + _owned_indices0 += num_owned(*_function_space->dofmaps(i), _dofs0); } /// @brief Create a representation of a Dirichlet boundary condition @@ -378,10 +386,17 @@ class DirichletBC assert(_function_space); // Unroll _dofs0 if dofmap block size > 1 - if (const int bs = _function_space->dofmap()->bs(); bs > 1) + if (const int bs = _function_space->dofmaps(0)->bs(); bs > 1) _dofs0 = unroll_dofs(_dofs0, bs); - _owned_indices0 = num_owned(*_function_space->dofmap(), _dofs0); + _owned_indices0 = 0; + for (std::size_t i = 0; + i < _function_space->mesh() + ->topology() + ->entity_types(_function_space->mesh()->topology()->dim()) + .size(); + ++i) + _owned_indices0 += num_owned(*_function_space->dofmaps(i), _dofs0); } /// @brief Create a representation of a Dirichlet boundary condition diff --git a/python/demo/demo_mixed-topology.py b/python/demo/demo_mixed-topology.py index 01fbf51a60b..d37301bc07b 100644 --- a/python/demo/demo_mixed-topology.py +++ b/python/demo/demo_mixed-topology.py @@ -34,6 +34,7 @@ import basix import dolfinx.cpp as _cpp import ufl +from dolfinx.cpp.fem import locate_dofs_geometrical from dolfinx.cpp.mesh import GhostMode, create_mesh from dolfinx.fem import ( FiniteElement, @@ -42,6 +43,7 @@ assemble_vector, coordinate_element, create_dofmaps, + dirichletbc, mixed_topology_form, ) from dolfinx.io.utils import cell_perm_vtk @@ -136,6 +138,17 @@ V_cpp = _cpp.fem.FunctionSpace_float64( mesh, [e._cpp_object for e in dolfinx_elements], [dofmap._cpp_object for dofmap in dofmaps] ) + + +# Select some BCs +def marker(x): + """BC Selector.""" + return np.logical_or(np.isclose(x[2], 0.0), np.isclose(x[2], 1.0)) + + +bcdofs = locate_dofs_geometrical(V_cpp, marker) +bc = dirichletbc(value=0.0, dofs=bcdofs, V=V_cpp) + # - # ## Creating and compiling a variational formulation @@ -178,6 +191,12 @@ A_scipy = A.to_scipy() b_scipy = b.array + +# Clear rows and set diagonal manually for BCs +A_scipy[bcdofs, :] = 0.0 +for i in bcdofs: + A_scipy[i, i] = 1.0 + b_scipy[i] = 0.0 x = spsolve(A_scipy, b_scipy) print(f"Solution vector norm {np.linalg.norm(x)}") From 2a1f56cb1e78eb08a4328f37e450426d8bee72e8 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 19 Mar 2026 11:17:27 +0000 Subject: [PATCH 06/34] Use BCs in demo --- cpp/dolfinx/fem/DirichletBC.h | 18 ++---------------- cpp/dolfinx/fem/assembler.h | 1 + python/demo/demo_mixed-topology.py | 5 ++--- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/cpp/dolfinx/fem/DirichletBC.h b/cpp/dolfinx/fem/DirichletBC.h index 62d1591c4c2..105af35d367 100644 --- a/cpp/dolfinx/fem/DirichletBC.h +++ b/cpp/dolfinx/fem/DirichletBC.h @@ -354,14 +354,7 @@ class DirichletBC if (const int bs = V->dofmaps(0)->bs(); bs > 1) _dofs0 = unroll_dofs(_dofs0, bs); - _owned_indices0 = 0; - for (std::size_t i = 0; - i < _function_space->mesh() - ->topology() - ->entity_types(_function_space->mesh()->topology()->dim()) - .size(); - ++i) - _owned_indices0 += num_owned(*_function_space->dofmaps(i), _dofs0); + _owned_indices0 = num_owned(*_function_space->dofmaps(0), _dofs0); } /// @brief Create a representation of a Dirichlet boundary condition @@ -389,14 +382,7 @@ class DirichletBC if (const int bs = _function_space->dofmaps(0)->bs(); bs > 1) _dofs0 = unroll_dofs(_dofs0, bs); - _owned_indices0 = 0; - for (std::size_t i = 0; - i < _function_space->mesh() - ->topology() - ->entity_types(_function_space->mesh()->topology()->dim()) - .size(); - ++i) - _owned_indices0 += num_owned(*_function_space->dofmaps(i), _dofs0); + _owned_indices0 = num_owned(*_function_space->dofmaps(0), _dofs0); } /// @brief Create a representation of a Dirichlet boundary condition diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index 898e14db845..352196d502f 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -597,6 +597,7 @@ void set_diagonal( const std::vector>>& bcs, T diagonal = 1.0) { + spdlog::debug("Set diagonal"); for (auto& bc : bcs) { if (V.contains(*bc.get().function_space())) diff --git a/python/demo/demo_mixed-topology.py b/python/demo/demo_mixed-topology.py index d37301bc07b..b0340ab73c0 100644 --- a/python/demo/demo_mixed-topology.py +++ b/python/demo/demo_mixed-topology.py @@ -183,7 +183,8 @@ def marker(x): # {py:class}`vector` format in DOLFINx to assemble # the left and right hand side of the linear system. -A = assemble_matrix(a_form) +print(V_cpp.dofmaps(0).index_map.size_local) +A = assemble_matrix(a_form, bcs=[bc]) b = assemble_vector(L_form) # We use {py:func}`scipy.sparse.linalg.spsolve` to solve the @@ -193,9 +194,7 @@ def marker(x): b_scipy = b.array # Clear rows and set diagonal manually for BCs -A_scipy[bcdofs, :] = 0.0 for i in bcdofs: - A_scipy[i, i] = 1.0 b_scipy[i] = 0.0 x = spsolve(A_scipy, b_scipy) From 547dc922a0d05f4a570521b9961c6ef00dbb4ba8 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 19 Mar 2026 11:19:54 +0000 Subject: [PATCH 07/34] Use bc.set --- python/demo/demo_mixed-topology.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/demo/demo_mixed-topology.py b/python/demo/demo_mixed-topology.py index b0340ab73c0..0c5a68f7523 100644 --- a/python/demo/demo_mixed-topology.py +++ b/python/demo/demo_mixed-topology.py @@ -186,6 +186,7 @@ def marker(x): print(V_cpp.dofmaps(0).index_map.size_local) A = assemble_matrix(a_form, bcs=[bc]) b = assemble_vector(L_form) +bc.set(b.array, alpha=0.0) # We use {py:func}`scipy.sparse.linalg.spsolve` to solve the # resulting linear system @@ -193,9 +194,6 @@ def marker(x): A_scipy = A.to_scipy() b_scipy = b.array -# Clear rows and set diagonal manually for BCs -for i in bcdofs: - b_scipy[i] = 0.0 x = spsolve(A_scipy, b_scipy) print(f"Solution vector norm {np.linalg.norm(x)}") From 39f8960a2b7bd6365c94499be2f3dca059fc44da Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 19 Mar 2026 12:03:02 +0000 Subject: [PATCH 08/34] Update to collapse API --- .github/workflows/windows.yml | 10 +++++----- python/demo/demo_half_loaded_waveguide.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index dea1cea6d12..1cb64593a72 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -129,11 +129,11 @@ jobs: cmake --install build-dir --config Release --prefix D:/a/dolfinx/dolfinx-install echo "D:/a/dolfinx/dolfinx-install/bin" | Out-File -Append -FilePath $env:GITHUB_PATH -Encoding utf8 - - name: Run tests, skip la_matrix (C++, serial) - working-directory: dolfinx-src/cpp/build-dir/test/Release - run: | - ls - mpiexec -n 1 ./unittests "~[la_matrix]" +# - name: Run tests, skip la_matrix (C++, serial) +# working-directory: dolfinx-src/cpp/build-dir/test/Release +# run: | +# ls +# mpiexec -n 1 ./unittests "~[la_matrix]" # - name: Run tests, skip la_matrix, geometry_compat, create_box (C++, MPI, np=3) # working-directory: dolfinx-src/cpp/build-dir/test/Release # run: | diff --git a/python/demo/demo_half_loaded_waveguide.py b/python/demo/demo_half_loaded_waveguide.py index 9fb95a0be5d..e6014219eae 100644 --- a/python/demo/demo_half_loaded_waveguide.py +++ b/python/demo/demo_half_loaded_waveguide.py @@ -499,7 +499,7 @@ def verify_mode( V_lagr, lagr_dofs = V.sub(1).collapse() V_cells, V_types, V_x = plot.vtk_mesh(V_lagr) V_grid = pyvista.UnstructuredGrid(V_cells, V_types, V_x) - V_grid.point_data["u"] = ezh.x.array.real[lagr_dofs] + V_grid.point_data["u"] = ezh.x.array.real[lagr_dofs[0]] plotter = pyvista.Plotter() plotter.add_mesh(V_grid.copy(), show_edges=False) plotter.view_xy() From 7b5843ca3a43aaeaa8ef6abb8499f621ab6531c8 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 19 Mar 2026 12:24:53 +0000 Subject: [PATCH 09/34] Remove shadow variable i --- cpp/dolfinx/fem/FunctionSpace.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/dolfinx/fem/FunctionSpace.h b/cpp/dolfinx/fem/FunctionSpace.h index e0b81a4db0c..93c6bec91b0 100644 --- a/cpp/dolfinx/fem/FunctionSpace.h +++ b/cpp/dolfinx/fem/FunctionSpace.h @@ -318,9 +318,9 @@ class FunctionSpace { // Extract cell geometry 'dofs' auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - for (std::size_t j = 0; j < gdim; ++j) - coordinate_dofs(i, j) = x_g[3 * x_dofs[i] + j]; + for (std::size_t j = 0; j < x_dofs.size(); ++j) + for (std::size_t k = 0; k < gdim; ++k) + coordinate_dofs(j, k) = x_g[3 * x_dofs[j] + k]; // Tabulate dof coordinates on cell cmap.push_forward(x, coordinate_dofs, phi); @@ -333,15 +333,15 @@ class FunctionSpace // Copy dof coordinates into vector if (!transpose) { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (std::size_t j = 0; j < gdim; ++j) - coords[dofs[i] * 3 + j] = x(i, j); + for (std::size_t j = 0; j < dofs.size(); ++j) + for (std::size_t k = 0; k < gdim; ++k) + coords[dofs[j] * 3 + k] = x(j, k); } else { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (std::size_t j = 0; j < gdim; ++j) - coords[j * num_dofs + dofs[i]] = x(i, j); + for (std::size_t j = 0; j < dofs.size(); ++j) + for (std::size_t k = 0; k < gdim; ++k) + coords[k * num_dofs + dofs[j]] = x(j, k); } } } From 52f334a22f40986f0d4c3df3a5a783d2447c4d71 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 19 Mar 2026 15:12:20 +0000 Subject: [PATCH 10/34] Simplify in collapse code, remove topology --- cpp/dolfinx/fem/DofMap.cpp | 43 +++++++++++++++++------------- cpp/dolfinx/fem/FunctionSpace.h | 6 +++-- python/demo/demo_mixed-topology.py | 2 +- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/cpp/dolfinx/fem/DofMap.cpp b/cpp/dolfinx/fem/DofMap.cpp index d3ad810eca9..8b790f15a9f 100644 --- a/cpp/dolfinx/fem/DofMap.cpp +++ b/cpp/dolfinx/fem/DofMap.cpp @@ -25,22 +25,19 @@ namespace //----------------------------------------------------------------------------- // Build a collapsed DofMap from a dofmap view. Extracts dofs and // doesn't build a new re-ordered dofmap. -fem::DofMap build_collapsed_dofmap(const DofMap& dofmap_view, - const mesh::Topology& topology) +fem::DofMap build_collapsed_dofmap(const DofMap& dofmap_view) { + spdlog::info("Build collapsed DofMap"); + if (dofmap_view.element_dof_layout().block_size() > 1) { - throw std::runtime_error( - "Cannot collapse a dofmap view with block size greater " - "than 1 when the parent has a block size of 1. Create new dofmap " - "first."); + throw std::runtime_error("Cannot collapse a dofmap view with " + "block size greater " + "than 1 when the parent has a block " + "size of 1. Create new dofmap " + "first."); } - // Get topological dimension - const int tdim = topology.dim(); - auto cells = topology.connectivity(tdim, 0); - assert(cells); - // Build set of dofs that are in the new dofmap (un-blocked) auto dofs_view_md = dofmap_view.map(); std::vector dofs_view(dofs_view_md.data_handle(), @@ -58,6 +55,7 @@ fem::DofMap build_collapsed_dofmap(const DofMap& dofmap_view, // Map from indices in the sub-index map to indices in the original // index map std::vector sub_imap_to_imap; + spdlog::debug("bs_view={}", bs_view); if (bs_view == 1) { auto [_index_map, _sub_imap_to_imap] = common::create_sub_index_map( @@ -77,6 +75,7 @@ fem::DofMap build_collapsed_dofmap(const DofMap& dofmap_view, sub_imap_to_imap = std::move(_sub_imap_to_imap); } + spdlog::debug("Map old to new dofs"); // Create a map from old dofs to new dofs std::size_t array_size = dofs_view.empty() ? 0 : dofs_view.back() + bs_view; std::vector old_to_new(array_size, -1); @@ -90,6 +89,7 @@ fem::DofMap build_collapsed_dofmap(const DofMap& dofmap_view, } } + spdlog::debug("Map to new collapsed indices"); // Map dofs to new collapsed indices for new dofmap auto dof_array_view = dofmap_view.map(); std::vector dofmap; @@ -99,12 +99,14 @@ fem::DofMap build_collapsed_dofmap(const DofMap& dofmap_view, std::back_inserter(dofmap), [&old_to_new](auto idx_old) { return old_to_new[idx_old]; }); - // Dimension sanity checks - assert((int)dofmap.size() - == (cells->num_nodes() * dofmap_view.element_dof_layout().num_dofs())); + // Sanity check + assert(dofmap.size() + == dofmap_view.map().extent(0) + * dofmap_view.element_dof_layout().num_dofs()); assert(dofmap.size() % dofmap_view.element_dof_layout().num_dofs() == 0); // Copy dof layout, discarding parent data + spdlog::debug("Copy element layout"); ElementDofLayout element_dof_layout = dofmap_view.element_dof_layout().copy(); // Create new dofmap and return @@ -206,6 +208,7 @@ std::pair> DofMap::collapse( std::function(const graph::AdjacencyList&)>&& reorder_fn) const { + spdlog::debug("DofMap::collapse"); if (!reorder_fn) { reorder_fn = [](const graph::AdjacencyList& g) @@ -231,7 +234,7 @@ std::pair> DofMap::collapse( else { // Collapse dof map, without building and re-ordering from scratch - return build_collapsed_dofmap(dmap, topology); + return build_collapsed_dofmap(dmap); } }; @@ -239,6 +242,7 @@ std::pair> DofMap::collapse( comm, index_map_bs(), _element_dof_layout, topology, reorder_fn, *this); // Build map from collapsed dof index to original dof index + spdlog::debug("New IndexMap"); auto index_map_new = dofmap_new.index_map; const std::int32_t size = (index_map_new->size_local() + index_map_new->num_ghosts()) @@ -246,10 +250,13 @@ std::pair> DofMap::collapse( std::vector collapsed_map(size); const int tdim = topology.dim(); - auto cells = topology.connectivity(tdim, 0); - assert(cells); + const int num_cell_types = topology.entity_types(tdim).size(); + spdlog::debug("Cell types = {}", num_cell_types); + const int bs = dofmap_new.bs(); - for (std::int32_t c = 0; c < cells->num_nodes(); ++c) + + std::size_t num_cells = _dofmap.size() / _shape1; + for (std::size_t c = 0; c < num_cells; ++c) { std::span cell_dofs_view = this->cell_dofs(c); std::span cell_dofs = dofmap_new.cell_dofs(c); diff --git a/cpp/dolfinx/fem/FunctionSpace.h b/cpp/dolfinx/fem/FunctionSpace.h index 93c6bec91b0..052a08a3c2a 100644 --- a/cpp/dolfinx/fem/FunctionSpace.h +++ b/cpp/dolfinx/fem/FunctionSpace.h @@ -169,11 +169,11 @@ class FunctionSpace /// Collapse a subspace and return a new function space and a map from /// new to old dofs - /// @return The new function space and a map from new to old dofs for each - /// cell type + /// @return The new function space and a map from new to old dofs std::pair>> collapse() const { + spdlog::debug("FunctionSpace::collapse"); if (_component.empty()) throw std::runtime_error("Function space is not a subspace"); @@ -182,8 +182,10 @@ class FunctionSpace std::vector> collapsed_dofs; for (auto d : _dofmaps) { + spdlog::debug("Call DofMap::collapse"); auto [_collapsed_dofmap, _collapsed_dofs] = d->collapse(_mesh->comm(), *_mesh->topology()); + spdlog::debug("Got collapsed dofmap"); collapsed_dofmaps.push_back( std::make_shared(std::move(_collapsed_dofmap))); collapsed_dofs.push_back(_collapsed_dofs); diff --git a/python/demo/demo_mixed-topology.py b/python/demo/demo_mixed-topology.py index 0c5a68f7523..e69439a63f8 100644 --- a/python/demo/demo_mixed-topology.py +++ b/python/demo/demo_mixed-topology.py @@ -125,7 +125,7 @@ basix.create_element(basix.ElementFamily.P, basix.CellType.prism, 1), ] dolfinx_elements = [ - FiniteElement(_cpp.fem.FiniteElement_float64(e._e, None, True)) for e in elements + FiniteElement(_cpp.fem.FiniteElement_float64(e._e, None, False)) for e in elements ] # NOTE: Both dofmaps have the same IndexMap, but different cell_dofs dofmaps = create_dofmaps( From e73ee11662f6eff65dde9cd40dd6587fc9ec2483 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Fri, 20 Mar 2026 13:22:19 +0000 Subject: [PATCH 11/34] Fixes from review --- cpp/dolfinx/fem/DirichletBC.h | 13 ++++++++----- cpp/dolfinx/fem/DofMap.cpp | 4 ---- cpp/dolfinx/fem/FunctionSpace.h | 8 +++++++- python/demo/demo_mixed-topology.py | 3 +-- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/cpp/dolfinx/fem/DirichletBC.h b/cpp/dolfinx/fem/DirichletBC.h index 105af35d367..5808bc3cbf4 100644 --- a/cpp/dolfinx/fem/DirichletBC.h +++ b/cpp/dolfinx/fem/DirichletBC.h @@ -107,11 +107,14 @@ std::vector locate_dofs_geometrical(const FunctionSpace& V, // especially when we usually want the boundary dofs only. Add // interface that computes dofs coordinates only for specified cell. - assert(V.elements(0)); - if (V.elements(0)->is_mixed()) + for (std::size_t i = 0; i < V.mesh()->topology()->cell_types().size(); ++i) { - throw std::runtime_error( - "Cannot locate dofs geometrically for mixed space. Use subspaces."); + assert(V.elements(i)); + if (V.elements(i)->is_mixed()) + { + throw std::runtime_error( + "Cannot locate dofs geometrically for mixed space. Use subspaces."); + } } // Compute dof coordinates @@ -546,7 +549,7 @@ class DirichletBC { auto g = std::get>>(_g); const std::vector& value = g->value; - std::int32_t bs = _function_space->dofmap()->bs(); + std::int32_t bs = _function_space->dofmaps(0)->bs(); if (x0) { assert(x.size() <= x0->size()); diff --git a/cpp/dolfinx/fem/DofMap.cpp b/cpp/dolfinx/fem/DofMap.cpp index 8b790f15a9f..e9896807047 100644 --- a/cpp/dolfinx/fem/DofMap.cpp +++ b/cpp/dolfinx/fem/DofMap.cpp @@ -249,10 +249,6 @@ std::pair> DofMap::collapse( * dofmap_new.index_map_bs(); std::vector collapsed_map(size); - const int tdim = topology.dim(); - const int num_cell_types = topology.entity_types(tdim).size(); - spdlog::debug("Cell types = {}", num_cell_types); - const int bs = dofmap_new.bs(); std::size_t num_cells = _dofmap.size() / _shape1; diff --git a/cpp/dolfinx/fem/FunctionSpace.h b/cpp/dolfinx/fem/FunctionSpace.h index 052a08a3c2a..67de1213cce 100644 --- a/cpp/dolfinx/fem/FunctionSpace.h +++ b/cpp/dolfinx/fem/FunctionSpace.h @@ -122,13 +122,18 @@ class FunctionSpace std::vector>> sub_elements; for (auto e : _elements) + { + assert(e); sub_elements.push_back(e->extract_sub_element(component)); - + } // Extract sub dofmap std::vector> sub_dofmaps; for (auto d : _dofmaps) + { + assert(d); sub_dofmaps.push_back( std::make_shared(d->extract_sub_dofmap(component))); + } // Create new sub space FunctionSpace sub_space(_mesh, sub_elements, sub_dofmaps); @@ -182,6 +187,7 @@ class FunctionSpace std::vector> collapsed_dofs; for (auto d : _dofmaps) { + assert(d); spdlog::debug("Call DofMap::collapse"); auto [_collapsed_dofmap, _collapsed_dofs] = d->collapse(_mesh->comm(), *_mesh->topology()); diff --git a/python/demo/demo_mixed-topology.py b/python/demo/demo_mixed-topology.py index e69439a63f8..cb18245b23c 100644 --- a/python/demo/demo_mixed-topology.py +++ b/python/demo/demo_mixed-topology.py @@ -183,10 +183,9 @@ def marker(x): # {py:class}`vector` format in DOLFINx to assemble # the left and right hand side of the linear system. -print(V_cpp.dofmaps(0).index_map.size_local) A = assemble_matrix(a_form, bcs=[bc]) b = assemble_vector(L_form) -bc.set(b.array, alpha=0.0) +bc.set(b.array) # We use {py:func}`scipy.sparse.linalg.spsolve` to solve the # resulting linear system From 434988a8bf09a92959a45aab196df4638c6eb9f9 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Fri, 20 Mar 2026 15:14:44 +0000 Subject: [PATCH 12/34] Try to cut out some lifting code --- cpp/dolfinx/fem/assemble_matrix_impl.h | 40 ++++++++++++++------------ cpp/dolfinx/fem/assemble_vector_impl.h | 29 +++++++++++++++++++ 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 3b6148f9e97..0b5d7cba325 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -30,6 +30,7 @@ using mdspan2_t = md::mdspan>; /// @brief Execute kernel over cells and accumulate result in a matrix. /// /// @tparam T Matrix/form scalar type. +/// @tparam AssMode Assembly mode (including BCs) /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. @@ -60,7 +61,7 @@ using mdspan2_t = md::mdspan>; /// function mesh. /// @param cell_info1 Cell permutation information for the trial /// function mesh. -template +template void assemble_cells_matrix( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -119,34 +120,37 @@ void assemble_cells_matrix( std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); - if (!bc0.empty()) + if constexpr (AssMode) { - for (int i = 0; i < num_dofs0; ++i) + if (!bc0.empty()) { - for (int k = 0; k < bs0; ++k) + for (int i = 0; i < num_dofs0; ++i) { - if (bc0[bs0 * dofs0[i] + k]) + for (int k = 0; k < bs0; ++k) { - // Zero row bs0 * i + k - const int row = bs0 * i + k; - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + if (bc0[bs0 * dofs0[i] + k]) + { + // Zero row bs0 * i + k + const int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + } } } } - } - if (!bc1.empty()) - { - for (int j = 0; j < num_dofs1; ++j) + if (!bc1.empty()) { - for (int k = 0; k < bs1; ++k) + for (int j = 0; j < num_dofs1; ++j) { - if (bc1[bs1 * dofs1[j] + k]) + for (int k = 0; k < bs1; ++k) { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; + if (bc1[bs1 * dofs1[j] + k]) + { + // Zero column bs1 * j + k + const int col = bs1 * j + k; + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; + } } } } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index db9f3a27b71..254e6081eb8 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -992,6 +992,35 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, auto mesh1 = a.function_spaces().at(1)->mesh(); assert(mesh1); + auto plugin_fn + = [&b, &x0, &bc_markers1, &bc_values1](std::span rows, + std::span cols, + std::span Ae) + { + spdlog::debug("{} {} {}", rows.size(), cols.size(), Ae.size()); + int nc = cols.size(); + for (std::size_t i = 0; i < rows.size(); ++i) + { + const std::int32_t ii = rows[i]; + if (bc_markers1[ii]) + { + const T x_bc = bc_values1[ii]; + for (std::size_t j = 0; j < cols.size(); ++j) + { + const std::int32_t jj = cols[j]; + spdlog::debug("ii={}, jj={}", ii, jj, b.size(), x0.size()); + if (x0.size() == 0) + b[jj] -= Ae[i * nc + j] * x_bc; + else + b[jj] -= Ae[i * nc + j] * (x_bc - x0[ii]); + } + } + } + }; + + assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); + return; + // Get dofmap for columns and rows of a assert(a.function_spaces().at(0)); assert(a.function_spaces().at(1)); From 67327444e966b6c3344cfd4e516872f757087a1f Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Fri, 20 Mar 2026 15:17:01 +0000 Subject: [PATCH 13/34] revert --- cpp/dolfinx/fem/assemble_matrix_impl.h | 40 ++++++++++++-------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 0b5d7cba325..3b6148f9e97 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -30,7 +30,6 @@ using mdspan2_t = md::mdspan>; /// @brief Execute kernel over cells and accumulate result in a matrix. /// /// @tparam T Matrix/form scalar type. -/// @tparam AssMode Assembly mode (including BCs) /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. @@ -61,7 +60,7 @@ using mdspan2_t = md::mdspan>; /// function mesh. /// @param cell_info1 Cell permutation information for the trial /// function mesh. -template +template void assemble_cells_matrix( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -120,37 +119,34 @@ void assemble_cells_matrix( std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); - if constexpr (AssMode) + if (!bc0.empty()) { - if (!bc0.empty()) + for (int i = 0; i < num_dofs0; ++i) { - for (int i = 0; i < num_dofs0; ++i) + for (int k = 0; k < bs0; ++k) { - for (int k = 0; k < bs0; ++k) + if (bc0[bs0 * dofs0[i] + k]) { - if (bc0[bs0 * dofs0[i] + k]) - { - // Zero row bs0 * i + k - const int row = bs0 * i + k; - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); - } + // Zero row bs0 * i + k + const int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); } } } + } - if (!bc1.empty()) + if (!bc1.empty()) + { + for (int j = 0; j < num_dofs1; ++j) { - for (int j = 0; j < num_dofs1; ++j) + for (int k = 0; k < bs1; ++k) { - for (int k = 0; k < bs1; ++k) + if (bc1[bs1 * dofs1[j] + k]) { - if (bc1[bs1 * dofs1[j] + k]) - { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; - } + // Zero column bs1 * j + k + const int col = bs1 * j + k; + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; } } } From 7f6ba25523c4ed24403834cdc115e8d957671740 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Fri, 20 Mar 2026 15:48:14 +0000 Subject: [PATCH 14/34] debug --- cpp/dolfinx/fem/assemble_vector_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 254e6081eb8..917317d5739 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1008,7 +1008,7 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, for (std::size_t j = 0; j < cols.size(); ++j) { const std::int32_t jj = cols[j]; - spdlog::debug("ii={}, jj={}", ii, jj, b.size(), x0.size()); + spdlog::debug("ii={}/{}, jj={}/{}", ii, x0.size(), jj, b.size()); if (x0.size() == 0) b[jj] -= Ae[i * nc + j] * x_bc; else From 3375eb5ce4165c9beddcd0c65cb5f433a2285b72 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Fri, 20 Mar 2026 16:50:05 +0000 Subject: [PATCH 15/34] Fix for rectangular matrix --- cpp/dolfinx/fem/assemble_vector_impl.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 917317d5739..29c8a05ec29 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -997,22 +997,24 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, std::span cols, std::span Ae) { - spdlog::debug("{} {} {}", rows.size(), cols.size(), Ae.size()); + spdlog::debug("{} {} {} {}", rows.size(), cols.size(), Ae.size(), + bc_values1.size()); int nc = cols.size(); - for (std::size_t i = 0; i < rows.size(); ++i) + int nr = rows.size(); + for (int i = 0; i < nc; ++i) { - const std::int32_t ii = rows[i]; + const std::int32_t ii = cols[i]; if (bc_markers1[ii]) { const T x_bc = bc_values1[ii]; - for (std::size_t j = 0; j < cols.size(); ++j) + for (int j = 0; j < nr; ++j) { - const std::int32_t jj = cols[j]; + const std::int32_t jj = rows[j]; spdlog::debug("ii={}/{}, jj={}/{}", ii, x0.size(), jj, b.size()); if (x0.size() == 0) - b[jj] -= Ae[i * nc + j] * x_bc; + b[jj] -= Ae[j * nc + i] * x_bc; else - b[jj] -= Ae[i * nc + j] * (x_bc - x0[ii]); + b[jj] -= Ae[j * nc + i] * (x_bc - x0[ii]); } } } From a30e56bfa784dc881aee9c41044cb281fadc66a2 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sat, 21 Mar 2026 08:03:52 +0000 Subject: [PATCH 16/34] Use alpha --- cpp/dolfinx/fem/assemble_vector_impl.h | 17 +++++++++-------- python/test/unit/fem/test_assembler.py | 4 ++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 29c8a05ec29..f1583746e6c 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -992,12 +992,15 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, auto mesh1 = a.function_spaces().at(1)->mesh(); assert(mesh1); + // const int bs0 = a.function_spaces()[0]->dofmap()->bs(); + // const int bs1 = a.function_spaces()[1]->dofmap()->bs(); + auto plugin_fn - = [&b, &x0, &bc_markers1, &bc_values1](std::span rows, - std::span cols, - std::span Ae) + = [&b, &x0, &bc_markers1, &bc_values1, + &alpha](std::span rows, + std::span cols, std::span Ae) { - spdlog::debug("{} {} {} {}", rows.size(), cols.size(), Ae.size(), + spdlog::debug("{}x{}={} {}", rows.size(), cols.size(), Ae.size(), bc_values1.size()); int nc = cols.size(); int nr = rows.size(); @@ -1011,10 +1014,8 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, { const std::int32_t jj = rows[j]; spdlog::debug("ii={}/{}, jj={}/{}", ii, x0.size(), jj, b.size()); - if (x0.size() == 0) - b[jj] -= Ae[j * nc + i] * x_bc; - else - b[jj] -= Ae[j * nc + i] * (x_bc - x0[ii]); + const T _x0 = x0.empty() ? 0 : x0[ii]; + b[jj] -= Ae[j * nc + i] * alpha * (x_bc - _x0); } } } diff --git a/python/test/unit/fem/test_assembler.py b/python/test/unit/fem/test_assembler.py index 55d77b88a51..5e2349a4765 100644 --- a/python/test/unit/fem/test_assembler.py +++ b/python/test/unit/fem/test_assembler.py @@ -494,6 +494,10 @@ def test_matrix_assembly_block_vector(self, mode): """ from petsc4py import PETSc + from dolfinx.log import set_log_level + + set_log_level(0) + from dolfinx.fem.petsc import apply_lifting as petsc_apply_lifting from dolfinx.fem.petsc import assemble_matrix as petsc_assemble_matrix from dolfinx.fem.petsc import assemble_vector as petsc_assemble_vector From be70ab75da4b54a7f943b8ab0831c77c66d1ebec Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sat, 21 Mar 2026 08:25:40 +0000 Subject: [PATCH 17/34] Work on BS --- cpp/dolfinx/fem/assemble_vector_impl.h | 57 ++++++++++++++------------ python/test/unit/fem/test_assembler.py | 4 -- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index f1583746e6c..95d2b2c5f27 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -992,30 +992,46 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, auto mesh1 = a.function_spaces().at(1)->mesh(); assert(mesh1); - // const int bs0 = a.function_spaces()[0]->dofmap()->bs(); - // const int bs1 = a.function_spaces()[1]->dofmap()->bs(); + // Get dofmap for columns and rows of a + assert(a.function_spaces().at(0)); + assert(a.function_spaces().at(1)); + auto dofmap0 = a.function_spaces()[0]->dofmap()->map(); + const int bs0 = a.function_spaces()[0]->dofmap()->bs(); + auto element0 = a.function_spaces()[0]->element(); + auto dofmap1 = a.function_spaces()[1]->dofmap()->map(); + const int bs1 = a.function_spaces()[1]->dofmap()->bs(); + auto element1 = a.function_spaces()[1]->element(); + assert(element0); + + spdlog::debug("bs0={}, bs1={}", bs0, bs1); auto plugin_fn - = [&b, &x0, &bc_markers1, &bc_values1, + = [&b, &x0, &bs0, &bs1, &bc_markers1, &bc_values1, &alpha](std::span rows, std::span cols, std::span Ae) { - spdlog::debug("{}x{}={} {}", rows.size(), cols.size(), Ae.size(), - bc_values1.size()); - int nc = cols.size(); - int nr = rows.size(); - for (int i = 0; i < nc; ++i) + int nr = rows.size() * bs0; + int nc = cols.size() * bs1; + spdlog::debug("{}x{}={} {}", nr, nc, Ae.size(), bc_values1.size()); + for (std::size_t i = 0; i < cols.size(); ++i) { - const std::int32_t ii = cols[i]; - if (bc_markers1[ii]) + for (int k = 0; k < bs1; ++k) { - const T x_bc = bc_values1[ii]; - for (int j = 0; j < nr; ++j) + const std::int32_t ii = cols[i] * bs1 + k; + if (bc_markers1[ii]) { - const std::int32_t jj = rows[j]; - spdlog::debug("ii={}/{}, jj={}/{}", ii, x0.size(), jj, b.size()); + const T x_bc = bc_values1[ii]; const T _x0 = x0.empty() ? 0 : x0[ii]; - b[jj] -= Ae[j * nc + i] * alpha * (x_bc - _x0); + for (std::size_t j = 0; j < rows.size(); ++j) + { + for (int m = 0; m < bs0; ++m) + { + const std::int32_t jj = rows[j] * bs0 + m; + spdlog::debug("ii={}/{}, jj={}/{}", ii, x0.size(), jj, b.size()); + b[jj] -= Ae[(j * bs0 + m) * nc + (i * bs1 + k)] * alpha + * (x_bc - _x0); + } + } } } } @@ -1024,17 +1040,6 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); return; - // Get dofmap for columns and rows of a - assert(a.function_spaces().at(0)); - assert(a.function_spaces().at(1)); - auto dofmap0 = a.function_spaces()[0]->dofmap()->map(); - const int bs0 = a.function_spaces()[0]->dofmap()->bs(); - auto element0 = a.function_spaces()[0]->element(); - auto dofmap1 = a.function_spaces()[1]->dofmap()->map(); - const int bs1 = a.function_spaces()[1]->dofmap()->bs(); - auto element1 = a.function_spaces()[1]->element(); - assert(element0); - std::span cell_info0; std::span cell_info1; // TODO: Check for each element instead diff --git a/python/test/unit/fem/test_assembler.py b/python/test/unit/fem/test_assembler.py index 5e2349a4765..55d77b88a51 100644 --- a/python/test/unit/fem/test_assembler.py +++ b/python/test/unit/fem/test_assembler.py @@ -494,10 +494,6 @@ def test_matrix_assembly_block_vector(self, mode): """ from petsc4py import PETSc - from dolfinx.log import set_log_level - - set_log_level(0) - from dolfinx.fem.petsc import apply_lifting as petsc_apply_lifting from dolfinx.fem.petsc import assemble_matrix as petsc_assemble_matrix from dolfinx.fem.petsc import assemble_vector as petsc_assemble_vector From fe626f9a4d455943bc5e7fccf700e74ad18f6cf7 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sat, 21 Mar 2026 10:59:11 +0000 Subject: [PATCH 18/34] simplify --- cpp/dolfinx/fem/assemble_vector_impl.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 95d2b2c5f27..ee5c980b418 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1238,17 +1238,12 @@ void apply_lifting( bc.get().set(bc_values1, std::nullopt, 1); } + std::span _x0; if (!x0.empty()) - { - lift_bc(b, a[j]->get(), x_dofmap, x, constants[j], coeffs[j], - std::span(bc_values1), bc_markers1, x0[j], alpha); - } - else - { - lift_bc(b, a[j]->get(), x_dofmap, x, constants[j], coeffs[j], - std::span(bc_values1), bc_markers1, - std::span(), alpha); - } + _x0 = x0[j]; + + lift_bc(b, a[j]->get(), x_dofmap, x, constants[j], coeffs[j], + std::span(bc_values1), bc_markers1, _x0, alpha); } } } From c029d7567b4368da0d3cfe445d0fefc383889f6d Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sat, 21 Mar 2026 11:47:07 +0000 Subject: [PATCH 19/34] delete --- cpp/dolfinx/fem/assemble_vector_impl.h | 822 ++----------------------- 1 file changed, 57 insertions(+), 765 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index ee5c980b418..e0fcff73dbe 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -37,604 +37,6 @@ namespace dolfinx::fem::impl using mdspan2_t = md::mdspan>; /// @endcond -/// @brief Apply boundary condition lifting for cell integrals. -/// -/// @tparam T The scalar type. -/// @tparam _bs0 The block size of the form test function dof map. If -/// less than zero the block size is determined at runtime. If `_bs0` is -/// positive the block size is used as a compile-time constant, which -/// has performance benefits. -/// @tparam _bs1 The block size of the trial function dof map. -/// @param[in,out] b Vector to modify. -/// @param x_dofmap Dofmap for the mesh geometry. -/// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each cell. -/// @param[in] cells Cell indices to execute the kernel over. These are -/// the indices into the geometry dofmap `x_dofmap`. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. -/// @param[in] P0 Function that applies transformation `P_0 A` in-place -/// to the computed tensor `A` to transform its test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. -/// @param[in] P1T Function that applies transformation `A P_1^T` -/// in-place to to the computed tensor `A` to transform trial -/// degrees-of-freedom. -/// @param[in] constants Constant data in the kernel. -/// @param[in] coeffs Coefficient data in the kernel. It has shape -/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th -/// coefficient for cell `i`. -/// @param[in] cell_info0 Cell permutation information for the test -/// function mesh. -/// @param[in] cell_info1 Cell permutation information for the trial -/// function mesh. -/// @param[in] bc_values1 Value for entries with an applied boundary -/// condition. -/// @param[in] bc_markers1 Marker to identify which DOFs have boundary -/// conditions applied. -/// @param[in] x0 Vector used in the lifting. -/// @param[in] alpha Scaling to apply. -template ::value_type> - requires std::is_same_v::value_type, T> -void _lift_bc_cells( - V&& b, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - FEkernel auto kernel, std::span cells, - std::tuple> dofmap0, - fem::DofTransformKernel auto P0, - std::tuple> dofmap1, - fem::DofTransformKernel auto P1T, std::span constants, - md::mdspan> coeffs, - std::span cell_info0, - std::span cell_info1, std::span bc_values1, - std::span bc_markers1, std::span x0, T alpha) -{ - if (cells.empty()) - return; - - const auto [dmap0, bs0, cells0] = dofmap0; - const auto [dmap1, bs1, cells1] = dofmap1; - assert(_bs0 < 0 or _bs0 == bs0); - assert(_bs1 < 0 or _bs1 == bs1); - - const int num_rows = bs0 * dmap0.extent(1); - const int num_cols = bs1 * dmap1.extent(1); - - // Data structures used in bc application - std::vector> cdofs(3 * x_dofmap.extent(1)); - std::vector Ae(num_rows * num_cols), be(num_rows); - assert(cells0.size() == cells.size()); - assert(cells1.size() == cells.size()); - for (std::size_t index = 0; index < cells.size(); ++index) - { - // Cell index in integration domain mesh, test function mesh, and trial - // function mesh - std::int32_t c = cells[index]; - std::int32_t c0 = cells0[index]; - std::int32_t c1 = cells1[index]; - - // Get dof maps for cell - auto dofs1 = md::submdspan(dmap1, c1, md::full_extent); - - // Check if bc is applied to cell - bool has_bc = false; - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - assert(_bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[_bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - else - { - for (int k = 0; k < bs1; ++k) - { - assert(bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - } - - if (!has_bc) - continue; - - // Get cell coordinates/geometry - auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); - - // Size data structure for assembly - auto dofs0 = md::submdspan(dmap0, c0, md::full_extent); - - std::ranges::fill(Ae, 0); - kernel(Ae.data(), &coeffs(index, 0), constants.data(), cdofs.data(), - nullptr, nullptr, nullptr); - P0(Ae, cell_info0, c0, num_cols); - P1T(Ae, cell_info1, c1, num_rows); - - // Size data structure for assembly - std::ranges::fill(be, 0); - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - const std::int32_t jj = _bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // const T _x0 = 0; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + _bs1 * j + k] * alpha * (bc - _x0); - } - } - } - else - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - } - - for (std::size_t i = 0; i < dofs0.size(); ++i) - { - if constexpr (_bs0 > 0) - { - for (int k = 0; k < _bs0; ++k) - b[_bs0 * dofs0[i] + k] += be[_bs0 * i + k]; - } - else - { - for (int k = 0; k < bs0; ++k) - b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; - } - } - } -} - -/// @brief Apply lifting for exterior facet integrals. -/// -/// @tparam T Scalar type. -/// @param[in,out] b Vector to modify. -/// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. -/// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each entity. -/// @param[in] entities Entities to execute the kernel over, where for the -/// `i`th entity `enities(i, 0)` is the attached cell and `entities(i, 1)` -/// is the local index of the facet relative to the cell. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap -/// indices. See `facets` documentation for the dofmap indices layout. -/// @param[in] P0 Function that applies the transformation `P_0 A` -/// in-place to `A` to transform the test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data. -/// See `dofmap0` for a description. -/// @param[in] P1T Function that applies the transformation `A P_1^T` -/// in-place to `A` to transform the trial degrees-of-freedom. -/// @param[in] constants Constant coefficient data in the kernel. -/// @param[in] coeffs Coefficient data in the kernel. It has shape -/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th -/// coefficient for cell `i`. -/// @param[in] cell_info0 Cell permutation information for the test -/// function mesh. -/// @param[in] cell_info1 Cell permutation information for the trial -/// function mesh. -/// @param[in] bc_values1 Values for entries with an applied boundary -/// condition. -/// @param[in] bc_markers1 Marker to identify which DOFs have boundary -/// conditions applied. -/// @param[in] x0 The vector used in the lifting. -/// @param[in] alpha Scaling to apply. -/// @param[in] perms Facet permutation data, where `(cell_idx, -/// local_facet_idx)` is the permutation value for the facet attached to -/// the cell `cell_idx` with local index `local_facet_idx` relative to -/// the cell. Empty if facet permutations are not required. -template ::value_type> - requires std::is_same_v::value_type, T> -void _lift_bc_entities( - V&& b, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - FEkernel auto kernel, - md::mdspan> - entities, - std::tuple>> - dofmap0, - fem::DofTransformKernel auto P0, - std::tuple>> - dofmap1, - fem::DofTransformKernel auto P1T, std::span constants, - md::mdspan> coeffs, - std::span cell_info0, - std::span cell_info1, std::span bc_values1, - std::span bc_markers1, std::span x0, T alpha, - md::mdspan> perms) -{ - if (entities.empty()) - return; - - const auto [dmap0, bs0, entities0] = dofmap0; - const auto [dmap1, bs1, entities1] = dofmap1; - - const int num_rows = bs0 * dmap0.extent(1); - const int num_cols = bs1 * dmap1.extent(1); - - // Data structures used in bc application - std::vector> cdofs(3 * x_dofmap.extent(1)); - std::vector Ae(num_rows * num_cols), be(num_rows); - assert(entities0.size() == entities.size()); - assert(entities1.size() == entities.size()); - for (std::size_t index = 0; index < entities.extent(0); ++index) - { - // Cell in integration domain, test function and trial function - // meshes - std::int32_t cell = entities(index, 0); - std::int32_t cell0 = entities0(index, 0); - std::int32_t cell1 = entities1(index, 0); - - // Local entity index - std::int32_t local_entity = entities(index, 1); - - // Get dof maps for cell - auto dofs1 = md::submdspan(dmap1, cell1, md::full_extent); - - // Check if bc is applied to cell - bool has_bc = false; - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - if (bc_markers1[bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - - if (!has_bc) - continue; - - // Get cell coordinates/geometry - auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); - - // Size data structure for assembly - auto dofs0 = md::submdspan(dmap0, cell0, md::full_extent); - - // Permutations - std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_entity); - std::ranges::fill(Ae, 0); - kernel(Ae.data(), &coeffs(index, 0), constants.data(), cdofs.data(), - &local_entity, &perm, nullptr); - P0(Ae, cell_info0, cell0, num_cols); - P1T(Ae, cell_info1, cell1, num_rows); - - // Size data structure for assembly - std::ranges::fill(be, 0); - for (std::size_t j = 0; j < dofs1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dofs1[j] + k; - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - - for (std::size_t i = 0; i < dofs0.size(); ++i) - for (int k = 0; k < bs0; ++k) - b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; - } -} - -/// @brief Apply lifting for interior facet integrals. -/// -/// @tparam T Scalar type. -/// @param[in,out] b Vector to modify -/// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. -/// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each facet. -/// @param[in] facets Facets to execute the kernel over, where for the -/// `i`th facet `facets(i, 0, 0)` is the first attached cell and -/// `facets(i, 0, 1)` is the local index of the facet relative to the -/// first cell, and `facets(i, 1, 0)` is the second first attached cell -/// and `facets(i, 1, 1)` is the local index of the facet relative to -/// the second cell. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. See `facets` documentation for the dofmap indices layout. -/// Cells that don't exist in the test function domain should be -/// marked with -1 in the cell indices list. -/// @param[in] P0 Function that applies the transformation `P_0 A` -/// in-place to `A` to transform the test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data. -/// See `dofmap0` for a description. -/// @param[in] P1T Function that applies the transformation `A P_1^T` -/// in-place to `A` to transform the trial degrees-of-freedom. -/// @param[in] constants Constant coefficient data in the kernel. -/// @param[in] coeffs Coefficient data in the kernel. It has shape -/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th -/// coefficient for cell `i`. -/// @param[in] cell_info0 Cell permutation information for the test -/// function mesh. -/// @param[in] cell_info1 Cell permutation information for the trial -/// function mesh. -/// @param[in] bc_values1 Values for entries with an applied boundary -/// condition. -/// @param[in] bc_markers1 Marker to identify which DOFs have boundary -/// conditions applied. -/// @param[in] x0 Vector used in the lifting. -/// @param[in] alpha Scaling to apply -/// @param[in] perms Facet permutation data, where `(cell_idx, -/// local_facet_idx)` is the permutation value for the facet attached to -/// the cell `cell_idx` with local index `local_facet_idx` relative to -/// the cell. Empty if facet permutations are not required. -template ::value_type> - requires std::is_same_v::value_type, T> -void _lift_bc_interior_facets( - V&& b, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - FEkernel auto kernel, - md::mdspan> - facets, - std::tuple>> - dofmap0, - fem::DofTransformKernel auto P0, - std::tuple>> - dofmap1, - fem::DofTransformKernel auto P1T, std::span constants, - md::mdspan> - coeffs, - std::span cell_info0, - std::span cell_info1, std::span bc_values1, - std::span bc_markers1, std::span x0, T alpha, - md::mdspan> perms) -{ - if (facets.empty()) - return; - - const auto [dmap0, bs0, facets0] = dofmap0; - const auto [dmap1, bs1, facets1] = dofmap1; - - const int num_dofs0 = dmap0.extent(1); - const int num_dofs1 = dmap1.extent(1); - const int num_rows = bs0 * 2 * num_dofs0; - const int num_cols = bs1 * 2 * num_dofs1; - - // Data structures used in assembly - using X = scalar_value_t; - std::vector cdofs(2 * x_dofmap.extent(1) * 3); - std::span cdofs0(cdofs.data(), x_dofmap.extent(1) * 3); - std::span cdofs1(cdofs.data() + x_dofmap.extent(1) * 3, - x_dofmap.extent(1) * 3); - std::vector Ae(num_rows * num_cols), be(num_rows); - - // Temporaries for joint dofmaps - std::vector dmapjoint0(2 * num_dofs0); - std::vector dmapjoint1(2 * num_dofs1); - - assert(facets0.size() == facets.size()); - assert(facets1.size() == facets.size()); - for (std::size_t f = 0; f < facets.extent(0); ++f) - { - // Cells in integration domain, test function domain and trial - // function domain meshes - std::array cells{facets(f, 0, 0), facets(f, 1, 0)}; - std::array cells0{facets0(f, 0, 0), facets0(f, 1, 0)}; - std::array cells1{facets1(f, 0, 0), facets1(f, 1, 0)}; - - // Local facet indices - std::array local_facet = {facets(f, 0, 1), facets(f, 1, 1)}; - - // Get cell geometry - auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); - for (std::size_t i = 0; i < x_dofs0.size(); ++i) - std::copy_n(&x(x_dofs0[i], 0), 3, std::next(cdofs0.begin(), 3 * i)); - auto x_dofs1 = md::submdspan(x_dofmap, cells[1], md::full_extent); - for (std::size_t i = 0; i < x_dofs1.size(); ++i) - std::copy_n(&x(x_dofs1[i], 0), 3, std::next(cdofs1.begin(), 3 * i)); - - // Get dof maps for cells and pack - // When integrating over interfaces between two domains, the test function - // might only be defined on one side, so we check which cells exist in the - // test function domain - std::span dmap0_cell0 - = cells0[0] >= 0 - ? std::span(dmap0.data_handle() + cells0[0] * num_dofs0, - num_dofs0) - : std::span(); - std::span dmap0_cell1 - = cells0[1] >= 0 - ? std::span(dmap0.data_handle() + cells0[1] * num_dofs0, - num_dofs0) - : std::span(); - - std::ranges::copy(dmap0_cell0, dmapjoint0.begin()); - std::ranges::copy(dmap0_cell1, std::next(dmapjoint0.begin(), num_dofs0)); - - // Check which cells exist in the trial function domain - std::span dmap1_cell0 - = cells1[0] >= 0 - ? std::span(dmap1.data_handle() + cells1[0] * num_dofs1, - num_dofs1) - : std::span(); - std::span dmap1_cell1 - = cells1[1] >= 0 - ? std::span(dmap1.data_handle() + cells1[1] * num_dofs1, - num_dofs1) - : std::span(); - - std::ranges::copy(dmap1_cell0, dmapjoint1.begin()); - std::ranges::copy(dmap1_cell1, std::next(dmapjoint1.begin(), num_dofs1)); - - // Check if bc is applied to cell0 - bool has_bc = false; - for (std::size_t j = 0; j < dmap1_cell0.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - if (bc_markers1[bs1 * dmap1_cell0[j] + k]) - { - has_bc = true; - break; - } - } - } - - // Check if bc is applied to cell1 - for (std::size_t j = 0; j < dmap1_cell1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - if (bc_markers1[bs1 * dmap1_cell1[j] + k]) - { - has_bc = true; - break; - } - } - } - - if (!has_bc) - continue; - - // Tabulate tensor - std::ranges::fill(Ae, 0); - std::array perm = perms.empty() - ? std::array{0, 0} - : std::array{perms(cells[0], local_facet[0]), - perms(cells[1], local_facet[1])}; - kernel(Ae.data(), &coeffs(f, 0, 0), constants.data(), cdofs.data(), - local_facet.data(), perm.data(), nullptr); - - if (cells0[0] >= 0) - P0(Ae, cell_info0, cells0[0], num_cols); - if (cells0[1] >= 0) - { - std::span sub_Ae0(Ae.data() + bs0 * num_dofs0 * num_cols, - bs0 * num_dofs0 * num_cols); - P0(sub_Ae0, cell_info0, cells0[1], num_cols); - } - if (cells1[0] >= 0) - P1T(Ae, cell_info1, cells1[0], num_rows); - - if (cells1[1] >= 0) - { - for (int row = 0; row < num_rows; ++row) - { - // DOFs for dmap1 and cell1 are not stored contiguously in - // the block matrix, so each row needs a separate span access - std::span sub_Ae1(Ae.data() + row * num_cols + bs1 * num_dofs1, - bs1 * num_dofs1); - P1T(sub_Ae1, cell_info1, cells1[1], 1); - } - } - - std::ranges::fill(be, 0); - - // Compute b = b - A*b for cell0 - for (std::size_t j = 0; j < dmap1_cell0.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dmap1_cell0[j] + k; - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - - // Compute b = b - A*b for cell1 - const int offset = bs1 * num_dofs1; - for (std::size_t j = 0; j < dmap1_cell1.size(); ++j) - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t jj = bs1 * dmap1_cell1[j] + k; - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(offset + bs1 * j + k) * alpha * (bc - x0[jj]); - for (int m = 0; m < num_rows; ++m) - { - be[m] - -= Ae[m * num_cols + offset + bs1 * j + k] * alpha * (bc - _x0); - } - } - } - } - - for (std::size_t i = 0; i < dmap0_cell0.size(); ++i) - for (int k = 0; k < bs0; ++k) - b[bs0 * dmap0_cell0[i] + k] += be[bs0 * i + k]; - - const int offset_be = bs0 * num_dofs0; - for (std::size_t i = 0; i < dmap0_cell1.size(); ++i) - for (int k = 0; k < bs0; ++k) - b[bs0 * dmap0_cell1[i] + k] += be[offset_be + bs0 * i + k]; - } -} - /// @brief Execute kernel over cells and accumulate result in vector. /// /// @tparam T Scalar type @@ -969,198 +371,88 @@ void assemble_interior_facets( template ::value_type> requires std::is_same_v::value_type, T> -void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, - md::mdspan, - md::extents> - x, - std::span constants, - const std::map, - std::pair, int>>& coefficients, - std::span bc_values1, - std::span bc_markers1, std::span x0, - T alpha) +void lift_bc( + V&& b, const Form& a, + // mdspan2_t x_dofmap, + // md::mdspan, + // md::extents> + // x, + std::span constants, + const std::map, + std::pair, int>>& coefficients, + std::span bc_values1, std::span bc_markers1, + std::span x0, T alpha) { - // Integration domain mesh - std::shared_ptr> mesh = a.mesh(); - assert(mesh); - - // Test function mesh - auto mesh0 = a.function_spaces().at(0)->mesh(); - assert(mesh0); - - // Trial function mesh - auto mesh1 = a.function_spaces().at(1)->mesh(); - assert(mesh1); // Get dofmap for columns and rows of a assert(a.function_spaces().at(0)); assert(a.function_spaces().at(1)); - auto dofmap0 = a.function_spaces()[0]->dofmap()->map(); const int bs0 = a.function_spaces()[0]->dofmap()->bs(); - auto element0 = a.function_spaces()[0]->element(); - auto dofmap1 = a.function_spaces()[1]->dofmap()->map(); const int bs1 = a.function_spaces()[1]->dofmap()->bs(); - auto element1 = a.function_spaces()[1]->element(); - assert(element0); spdlog::debug("bs0={}, bs1={}", bs0, bs1); - auto plugin_fn - = [&b, &x0, &bs0, &bs1, &bc_markers1, &bc_values1, - &alpha](std::span rows, - std::span cols, std::span Ae) + // Iterate over Form with plugin kernel + // TODO: only iterate over cells with BCs (efficiency) + // TODO: templating over block size? + + if (bs0 > 1 or bs1 > 1) { - int nr = rows.size() * bs0; - int nc = cols.size() * bs1; - spdlog::debug("{}x{}={} {}", nr, nc, Ae.size(), bc_values1.size()); - for (std::size_t i = 0; i < cols.size(); ++i) + auto plugin_fn + = [&b, &x0, &bs0, &bs1, &bc_markers1, &bc_values1, + &alpha](std::span rows, + std::span cols, std::span Ae) { - for (int k = 0; k < bs1; ++k) + std::size_t nc = cols.size() * bs1; + for (std::size_t i = 0; i < cols.size(); ++i) { - const std::int32_t ii = cols[i] * bs1 + k; - if (bc_markers1[ii]) + for (int k = 0; k < bs1; ++k) { - const T x_bc = bc_values1[ii]; - const T _x0 = x0.empty() ? 0 : x0[ii]; - for (std::size_t j = 0; j < rows.size(); ++j) + const std::int32_t ii = cols[i] * bs1 + k; + if (bc_markers1[ii]) { - for (int m = 0; m < bs0; ++m) + const T x_bc = bc_values1[ii]; + const T _x0 = x0.empty() ? 0 : x0[ii]; + for (std::size_t j = 0; j < rows.size(); ++j) { - const std::int32_t jj = rows[j] * bs0 + m; - spdlog::debug("ii={}/{}, jj={}/{}", ii, x0.size(), jj, b.size()); - b[jj] -= Ae[(j * bs0 + m) * nc + (i * bs1 + k)] * alpha - * (x_bc - _x0); + for (int m = 0; m < bs0; ++m) + { + const std::int32_t jj = rows[j] * bs0 + m; + b[jj] -= Ae[(j * bs0 + m) * nc + (i * bs1 + k)] * alpha + * (x_bc - _x0); + } } } } } - } - }; - - assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); - return; - - std::span cell_info0; - std::span cell_info1; - // TODO: Check for each element instead - if (element0->needs_dof_transformations() - or element1->needs_dof_transformations() or a.needs_facet_permutations()) - { - mesh0->topology_mutable()->create_entity_permutations(); - mesh1->topology_mutable()->create_entity_permutations(); - cell_info0 = std::span(mesh0->topology()->get_cell_permutation_info()); - cell_info1 = std::span(mesh1->topology()->get_cell_permutation_info()); - } - - fem::DofTransformKernel auto P0 - = element0->template dof_transformation_fn(doftransform::standard); - fem::DofTransformKernel auto P1T - = element1->template dof_transformation_right_fn( - doftransform::transpose); + }; - for (int i = 0; i < a.num_integrals(IntegralType::cell, 0); ++i) - { - auto kernel = a.kernel(IntegralType::cell, i, 0); - assert(kernel); - auto& [_coeffs, cstride] = coefficients.at({IntegralType::cell, i}); - std::span cells = a.domain(IntegralType::cell, i, 0); - std::span cells0 = a.domain_arg(IntegralType::cell, 0, i, 0); - std::span cells1 = a.domain_arg(IntegralType::cell, 1, i, 0); - assert(_coeffs.size() == cells.size() * cstride); - auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); - if (bs0 == 1 and bs1 == 1) - { - _lift_bc_cells<1, 1>(b, x_dofmap, x, kernel, cells, - {dofmap0, bs0, cells0}, P0, {dofmap1, bs1, cells1}, - P1T, constants, coeffs, cell_info0, cell_info1, - bc_values1, bc_markers1, x0, alpha); - } - else if (bs0 == 3 and bs1 == 3) - { - _lift_bc_cells<3, 3>(b, x_dofmap, x, kernel, cells, - {dofmap0, bs0, cells0}, P0, {dofmap1, bs1, cells1}, - P1T, constants, coeffs, cell_info0, cell_info1, - bc_values1, bc_markers1, x0, alpha); - } - else - { - _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); - } + assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); } - - md::mdspan> facet_perms; - if (a.needs_facet_permutations()) - { - mesh::CellType cell_type = mesh->topology()->cell_type(); - int num_facets_per_cell - = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); - mesh->topology_mutable()->create_entity_permutations(); - const std::vector& p - = mesh->topology()->get_facet_permutations(); - facet_perms = md::mdspan(p.data(), p.size() / num_facets_per_cell, - num_facets_per_cell); - } - - for (int i = 0; i < a.num_integrals(IntegralType::interior_facet, 0); ++i) - { - auto kernel = a.kernel(IntegralType::interior_facet, i, 0); - assert(kernel); - auto& [coeffs, cstride] - = coefficients.at({IntegralType::interior_facet, i}); - - using mdspanx22_t - = md::mdspan>; - using mdspanx2x_t - = md::mdspan>; - std::span f = a.domain(IntegralType::interior_facet, i, 0); - mdspanx22_t facets(f.data(), f.size() / 4, 2, 2); - std::span f0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); - mdspanx22_t facets0(f0.data(), f0.size() / 4, 2, 2); - std::span f1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); - mdspanx22_t facets1(f1.data(), f1.size() / 4, 2, 2); - _lift_bc_interior_facets( - b, x_dofmap, x, kernel, facets, {dofmap0, bs0, facets0}, P0, - {dofmap1, bs1, facets1}, P1T, constants, - mdspanx2x_t(coeffs.data(), facets.extent(0), 2, cstride), cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha, facet_perms); - } - - for (auto itg_type : {fem::IntegralType::exterior_facet, - fem::IntegralType::vertex, fem::IntegralType::ridge}) + else { - md::mdspan> perms - = (itg_type == fem::IntegralType::exterior_facet) - ? facet_perms - : md::mdspan>{}; - - for (int i = 0; i < a.num_integrals(itg_type, 0); ++i) + auto plugin_fn + = [&b, &x0, &bc_markers1, &bc_values1, + &alpha](std::span rows, + std::span cols, std::span Ae) { - auto kernel = a.kernel(itg_type, i, 0); - assert(kernel); - auto& [coeffs, cstride] = coefficients.at({itg_type, i}); + std::size_t nr = rows.size(); + std::size_t nc = cols.size(); + for (std::size_t i = 0; i < nc; ++i) + { + const std::int32_t ii = cols[i]; + if (bc_markers1[ii]) + { + const T x_bc = bc_values1[ii]; + const T _x0 = x0.empty() ? 0 : x0[ii]; + for (std::size_t j = 0; j < nr; ++j) + b[rows[j]] -= Ae[j * nc + i] * alpha * (x_bc - _x0); + } + } + }; - using mdspanx2_t - = md::mdspan>; - std::span e = a.domain(itg_type, i, 0); - mdspanx2_t entities(e.data(), e.size() / 2, 2); - std::span e0 = a.domain_arg(itg_type, 0, i, 0); - mdspanx2_t entities0(e0.data(), e0.size() / 2, 2); - std::span e1 = a.domain_arg(itg_type, 1, i, 0); - mdspanx2_t entities1(e1.data(), e1.size() / 2, 2); - assert(coeffs.size() == entities.extent(0) * cstride); - _lift_bc_entities( - b, x_dofmap, x, kernel, entities, {dofmap0, bs0, entities0}, P0, - {dofmap1, bs1, entities1}, P1T, constants, - md::mdspan(coeffs.data(), entities.extent(0), cstride), cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha, perms); - } + assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); } -} /// Modify b such that: /// @@ -1242,7 +534,7 @@ void apply_lifting( if (!x0.empty()) _x0 = x0[j]; - lift_bc(b, a[j]->get(), x_dofmap, x, constants[j], coeffs[j], + lift_bc(b, a[j]->get(), constants[j], coeffs[j], std::span(bc_values1), bc_markers1, _x0, alpha); } } From 52602aeabc670f9cc92463def5cbf2d3cfc8f45a Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sat, 21 Mar 2026 11:49:01 +0000 Subject: [PATCH 20/34] more --- cpp/dolfinx/fem/assemble_vector_impl.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index e0fcff73dbe..1363c0b8ed3 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -371,17 +371,12 @@ void assemble_interior_facets( template ::value_type> requires std::is_same_v::value_type, T> -void lift_bc( - V&& b, const Form& a, - // mdspan2_t x_dofmap, - // md::mdspan, - // md::extents> - // x, - std::span constants, - const std::map, - std::pair, int>>& coefficients, - std::span bc_values1, std::span bc_markers1, - std::span x0, T alpha) +void lift_bc(V&& b, const Form& a, std::span constants, + const std::map, + std::pair, int>>& coefficients, + std::span bc_values1, + std::span bc_markers1, std::span x0, + T alpha) { // Get dofmap for columns and rows of a From 0bdb5e8fb37869bd0439737ec593d06624c36c2f Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sat, 21 Mar 2026 12:20:12 +0000 Subject: [PATCH 21/34] fix --- cpp/dolfinx/fem/assemble_matrix_impl.h | 47 ++++++++++++++++---------- cpp/dolfinx/fem/assemble_vector_impl.h | 1 + 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 3b6148f9e97..c3b4c40ff59 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -89,6 +89,7 @@ void assemble_cells_matrix( const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); std::vector> cdofs(3 * x_dofmap.extent(1)); + std::vector bce0(ndim0), bce1(ndim1); // Iterate over active cells assert(cells0.size() == cells.size()); @@ -101,24 +102,11 @@ void assemble_cells_matrix( std::int32_t cell0 = cells0[c]; std::int32_t cell1 = cells1[c]; - // Get cell coordinates/geometry - auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); - - // Tabulate tensor - std::ranges::fill(Ae, 0); - kernel(Ae.data(), &coeffs(c, 0), constants.data(), cdofs.data(), nullptr, - nullptr, nullptr); - - // Compute A = P_0 \tilde{A} P_1^T (dof transformation) - P0(Ae, cell_info0, cell0, ndim1); // B = P0 \tilde{A} - P1T(Ae, cell_info1, cell1, ndim0); // A = B P1_T - - // Zero rows/columns for essential bcs + // Collect element rows/columns for essential bcs std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); + bce0.clear(); if (!bc0.empty()) { for (int i = 0; i < num_dofs0; ++i) @@ -129,12 +117,13 @@ void assemble_cells_matrix( { // Zero row bs0 * i + k const int row = bs0 * i + k; - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + bce0.push_back(row); } } } } + bce1.clear(); if (!bc1.empty()) { for (int j = 0; j < num_dofs1; ++j) @@ -145,13 +134,35 @@ void assemble_cells_matrix( { // Zero column bs1 * j + k const int col = bs1 * j + k; - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; + bce1.push_back(col); } } } } + // Get cell coordinates/geometry + auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); + for (std::size_t i = 0; i < x_dofs.size(); ++i) + std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); + + // Tabulate tensor + std::ranges::fill(Ae, 0); + kernel(Ae.data(), &coeffs(c, 0), constants.data(), cdofs.data(), nullptr, + nullptr, nullptr); + + // Compute A = P_0 \tilde{A} P_1^T (dof transformation) + P0(Ae, cell_info0, cell0, ndim1); // B = P0 \tilde{A} + P1T(Ae, cell_info1, cell1, ndim0); // A = B P1_T + + for (std::int32_t row : bce0) + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + + for (std::int32_t col : bce1) + { + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; + } + mat_set(dofs0, dofs1, Ae); } } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 1363c0b8ed3..2012c793c01 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -448,6 +448,7 @@ void lift_bc(V&& b, const Form& a, std::span constants, assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); } +} /// Modify b such that: /// From 0016010e087bb334a35e3b8aaa364c0827ab838b Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Mon, 23 Mar 2026 09:35:14 +0000 Subject: [PATCH 22/34] Doxygen fix: --- cpp/dolfinx/fem/assemble_matrix_impl.h | 2 +- cpp/dolfinx/fem/assemble_vector_impl.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index c3b4c40ff59..3d568922390 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -154,9 +154,9 @@ void assemble_cells_matrix( P0(Ae, cell_info0, cell0, ndim1); // B = P0 \tilde{A} P1T(Ae, cell_info1, cell1, ndim0); // A = B P1_T + // Clear rows and columns for BCs for (std::int32_t row : bce0) std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); - for (std::int32_t col : bce1) { for (int row = 0; row < ndim0; ++row) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 2012c793c01..bc19186d020 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -358,8 +358,6 @@ void assemble_interior_facets( /// /// @param[in,out] b The vector to be modified /// @param[in] a The bilinear form that generates A -/// @param[in] x_dofmap Mesh geometry dofmap -/// @param[in] x Mesh coordinates /// @param[in] constants Constants that appear in `a` /// @param[in] coefficients Coefficients that appear in `a` /// @param[in] bc_values1 The boundary condition 'values' From 584701fccdd66b6a2b847f943026d2138d3bb479 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Mon, 23 Mar 2026 11:04:28 +0000 Subject: [PATCH 23/34] Use templating in assembly --- cpp/dolfinx/fem/assemble_matrix_impl.h | 59 +++++++++++++++----------- cpp/dolfinx/fem/assemble_vector_impl.h | 6 ++- cpp/dolfinx/fem/assembler.h | 12 +++--- 3 files changed, 45 insertions(+), 32 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 3d568922390..4167f82c76d 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -60,7 +60,7 @@ using mdspan2_t = md::mdspan>; /// function mesh. /// @param cell_info1 Cell permutation information for the trial /// function mesh. -template +template void assemble_cells_matrix( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -106,35 +106,40 @@ void assemble_cells_matrix( std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); - bce0.clear(); - if (!bc0.empty()) + bce1.clear(); + if (!bc1.empty()) { - for (int i = 0; i < num_dofs0; ++i) + for (int j = 0; j < num_dofs1; ++j) { - for (int k = 0; k < bs0; ++k) + for (int k = 0; k < bs1; ++k) { - if (bc0[bs0 * dofs0[i] + k]) + if (bc1[bs1 * dofs1[j] + k]) { - // Zero row bs0 * i + k - const int row = bs0 * i + k; - bce0.push_back(row); + // Zero column bs1 * j + k + const int col = bs1 * j + k; + bce1.push_back(col); } } } } - bce1.clear(); - if (!bc1.empty()) + // In "BCMode" only execute kernel if there are BCs on column space + if constexpr (BCMode) + if (bce1.empty()) + continue; + + bce0.clear(); + if (!bc0.empty()) { - for (int j = 0; j < num_dofs1; ++j) + for (int i = 0; i < num_dofs0; ++i) { - for (int k = 0; k < bs1; ++k) + for (int k = 0; k < bs0; ++k) { - if (bc1[bs1 * dofs1[j] + k]) + if (bc0[bs0 * dofs0[i] + k]) { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - bce1.push_back(col); + // Zero row bs0 * i + k + const int row = bs0 * i + k; + bce0.push_back(row); } } } @@ -154,13 +159,17 @@ void assemble_cells_matrix( P0(Ae, cell_info0, cell0, ndim1); // B = P0 \tilde{A} P1T(Ae, cell_info1, cell1, ndim0); // A = B P1_T - // Clear rows and columns for BCs - for (std::int32_t row : bce0) - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); - for (std::int32_t col : bce1) + // Do not clear BC rows/cols in "BCMode" + if constexpr (!BCMode) { - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; + // Clear rows and columns for BCs + for (std::int32_t row : bce0) + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + for (std::int32_t col : bce1) + { + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; + } } mat_set(dofs0, dofs1, Ae); @@ -539,7 +548,7 @@ void assemble_interior_facets( /// applied. /// @param bc1 Marker for columns with Dirichlet boundary conditions /// applied. -template +template void assemble_matrix( la::MatSet auto mat_set, const Form& a, md::mdspan, @@ -617,7 +626,7 @@ void assemble_matrix( std::span cells1 = a.domain_arg(IntegralType::cell, 1, i, cell_type_idx); auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); assert(cells.size() * cstride == coeffs.size()); - impl::assemble_cells_matrix( + impl::assemble_cells_matrix( mat_set, x_dofmap, x, cells, {dofs0, bs0, cells0}, P0, {dofs1, bs1, cells1}, P1T, bc0, bc1, fn, md::mdspan(coeffs.data(), cells.size(), cstride), constants, diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index bc19186d020..f25d43e35f9 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -420,7 +420,8 @@ void lift_bc(V&& b, const Form& a, std::span constants, } }; - assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); + assemble_matrix(plugin_fn, a, constants, coefficients, {}, + bc_markers1); } else { @@ -444,7 +445,8 @@ void lift_bc(V&& b, const Form& a, std::span constants, } }; - assemble_matrix(plugin_fn, a, constants, coefficients, {}, {}); + assemble_matrix(plugin_fn, a, constants, coefficients, {}, + bc_markers1); } } diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index 352196d502f..d4489faf00d 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -425,7 +425,7 @@ void apply_lifting( /// @param[in] dof_marker1 Boundary condition markers for the columns. /// If bc[i] is true then rows i in A will be zeroed. The index i is a /// local index. -template +template void assemble_matrix( la::MatSet auto mat_add, const Form& a, std::span constants, @@ -444,14 +444,16 @@ void assemble_matrix( std::span x = mesh->geometry().x(); if constexpr (std::is_same_v>) { - impl::assemble_matrix(mat_add, a, mdspanx3_t(x.data(), x.size() / 3, 3), - constants, coefficients, dof_marker0, dof_marker1); + impl::assemble_matrix( + mat_add, a, mdspanx3_t(x.data(), x.size() / 3, 3), constants, + coefficients, dof_marker0, dof_marker1); } else { std::vector> _x(x.begin(), x.end()); - impl::assemble_matrix(mat_add, a, mdspanx3_t(_x.data(), _x.size() / 3, 3), - constants, coefficients, dof_marker0, dof_marker1); + impl::assemble_matrix( + mat_add, a, mdspanx3_t(_x.data(), _x.size() / 3, 3), constants, + coefficients, dof_marker0, dof_marker1); } } From 948f7862b043dff6c39113f5ef6f55149a8e89e2 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Tue, 24 Mar 2026 10:32:16 +0000 Subject: [PATCH 24/34] Full version using modified assemble_matrix --- cpp/dolfinx/fem/assemble_matrix_impl.h | 163 ++++++++++++++++--------- 1 file changed, 104 insertions(+), 59 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 4167f82c76d..d278efa9772 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -218,7 +218,7 @@ void assemble_cells_matrix( /// function mesh. /// @param[in] perms Entity permutation integer. Empty if entity /// permutations are not required. -template +template void assemble_entities( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -256,6 +256,9 @@ void assemble_entities( const int ndim0 = bs0 * num_dofs0; const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); + std::vector bce0; + std::vector bce1; + assert(entities0.size() == entities.size()); assert(entities1.size() == entities.size()); for (std::size_t f = 0; f < entities.extent(0); ++f) @@ -268,24 +271,34 @@ void assemble_entities( std::int32_t cell0 = entities0(f, 0); std::int32_t cell1 = entities1(f, 0); - // Get cell coordinates/geometry - auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); - for (std::size_t i = 0; i < x_dofs.size(); ++i) - std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); - - // Permutations - std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_entity); - - // Tabulate tensor - std::ranges::fill(Ae, 0); - kernel(Ae.data(), &coeffs(f, 0), constants.data(), cdofs.data(), - &local_entity, &perm, nullptr); - P0(Ae, cell_info0, cell0, ndim1); - P1T(Ae, cell_info1, cell1, ndim0); - // Zero rows/columns for essential bcs std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); + + bce1.clear(); + if (!bc1.empty()) + { + for (int j = 0; j < num_dofs1; ++j) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dofs1[j] + k]) + { + // Zero column bs1 * j + k + const int col = bs1 * j + k; + bce1.push_back(col); + } + } + } + } + + if constexpr (BCMode) + { + if (bce1.empty()) + continue; + } + + bce0.clear(); if (!bc0.empty()) { for (int i = 0; i < num_dofs0; ++i) @@ -296,25 +309,35 @@ void assemble_entities( { // Zero row bs0 * i + k const int row = bs0 * i + k; - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + bce0.push_back(row); } } } } - if (!bc1.empty()) + + // Get cell coordinates/geometry + auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); + for (std::size_t i = 0; i < x_dofs.size(); ++i) + std::copy_n(&x(x_dofs[i], 0), 3, std::next(cdofs.begin(), 3 * i)); + + // Permutations + std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_entity); + + // Tabulate tensor + std::ranges::fill(Ae, 0); + kernel(Ae.data(), &coeffs(f, 0), constants.data(), cdofs.data(), + &local_entity, &perm, nullptr); + P0(Ae, cell_info0, cell0, ndim1); + P1T(Ae, cell_info1, cell1, ndim0); + + if constexpr (!BCMode) { - for (int j = 0; j < num_dofs1; ++j) + for (std::int32_t row : bce0) + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + for (std::int32_t col : bce1) { - for (int k = 0; k < bs1; ++k) - { - if (bc1[bs1 * dofs1[j] + k]) - { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; - } - } + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; } } @@ -358,7 +381,7 @@ void assemble_entities( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_interior_facets( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -402,6 +425,8 @@ void assemble_interior_facets( const std::size_t dmap1_size = dmap1.map().extent(1); const int num_rows = bs0 * 2 * dmap0_size; const int num_cols = bs1 * 2 * dmap1_size; + std::vector bce0(num_rows); + std::vector bce1(num_cols); // Temporaries for joint dofmaps std::vector Ae(num_rows * num_cols), be(num_rows); @@ -453,6 +478,45 @@ void assemble_interior_facets( std::ranges::copy(dmap1_cell0, dmapjoint1.begin()); std::ranges::copy(dmap1_cell1, std::next(dmapjoint1.begin(), dmap1_size)); + // Zero rows/columns for essential bcs + bce1.clear(); + if (!bc1.empty()) + { + for (std::size_t j = 0; j < dmapjoint1.size(); ++j) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dmapjoint1[j] + k]) + { + // Zero column bs1 * j + k + bce1.push_back(bs1 * j + k); + } + } + } + } + + if constexpr (BCMode) + { + if (bce1.empty()) + continue; + } + + bce0.clear(); + if (!bc0.empty()) + { + for (std::size_t i = 0; i < dmapjoint0.size(); ++i) + { + for (int k = 0; k < bs0; ++k) + { + if (bc0[bs0 * dmapjoint0[i] + k]) + { + // Zero row bs0 * i + k + bce0.push_back(bs0 * i + k); + } + } + } + } + // Tabulate tensor std::ranges::fill(Ae, 0); std::array perm = perms.empty() @@ -494,35 +558,14 @@ void assemble_interior_facets( } } - // Zero rows/columns for essential bcs - if (!bc0.empty()) - { - for (std::size_t i = 0; i < dmapjoint0.size(); ++i) - { - for (int k = 0; k < bs0; ++k) - { - if (bc0[bs0 * dmapjoint0[i] + k]) - { - // Zero row bs0 * i + k - std::fill_n(std::next(Ae.begin(), num_cols * (bs0 * i + k)), - num_cols, 0); - } - } - } - } - if (!bc1.empty()) + if constexpr (!BCMode) { - for (std::size_t j = 0; j < dmapjoint1.size(); ++j) + for (std::int32_t row : bce0) + std::fill_n(std::next(Ae.begin(), num_cols * row), num_cols, 0); + for (std::int32_t col : bce1) { - for (int k = 0; k < bs1; ++k) - { - if (bc1[bs1 * dmapjoint1[j] + k]) - { - // Zero column bs1 * j + k - for (int m = 0; m < num_rows; ++m) - Ae[m * num_cols + bs1 * j + k] = 0; - } - } + for (int m = 0; m < num_rows; ++m) + Ae[m * num_cols + col] = 0; } } @@ -538,6 +581,8 @@ void assemble_interior_facets( /// /// @tparam T Scalar type. /// @tparam U Geometry type. +/// @tparam BCMode Boundary Condition mode. Set to true to call the kernel only +/// on cells with BCs in bc1. /// @param[in] mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] a Bilinear form to assemble. @@ -671,7 +716,7 @@ void assemble_matrix( std::span facets0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); std::span facets1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); assert((facets.size() / 4) * 2 * cstride == coeffs.size()); - impl::assemble_interior_facets( + impl::assemble_interior_facets( mat_set, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap0, bs0, @@ -716,7 +761,7 @@ void assemble_matrix( std::span e1 = a.domain_arg(itg_type, 1, i, 0); mdspanx2_t entities1(e1.data(), e1.size() / 2, 2); assert((entities.size() / 2) * cstride == coeffs.size()); - impl::assemble_entities( + impl::assemble_entities( mat_set, x_dofmap, x, entities, {dofs0, bs0, entities0}, P0, {dofs1, bs1, entities1}, P1T, bc0, bc1, fn, md::mdspan(coeffs.data(), entities.extent(0), cstride), constants, From da9718b48308fb6dfceefd4d79691cb10275fbbc Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Wed, 25 Mar 2026 16:24:11 +0000 Subject: [PATCH 25/34] Doc update --- cpp/dolfinx/fem/assemble_matrix_impl.h | 6 ++++++ cpp/dolfinx/fem/assemble_vector_impl.h | 15 +++++++++------ cpp/dolfinx/fem/assembler.h | 1 + 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index d278efa9772..8e46b6ac2ad 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -30,6 +30,8 @@ using mdspan2_t = md::mdspan>; /// @brief Execute kernel over cells and accumulate result in a matrix. /// /// @tparam T Matrix/form scalar type. +/// @tparam BCMode If set true, only execute mat_set on cells with BCs in column +/// space. /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. @@ -188,6 +190,8 @@ void assemble_cells_matrix( /// from cell used to define the entity. /// /// @tparam T Matrix/form scalar type. +/// @tparam BCMode If set true, only execute mat_set on cells with BCs in column +/// space. /// @param[in] mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. @@ -349,6 +353,8 @@ void assemble_entities( /// a matrix. /// /// @tparam T Matrix/form scalar type. +/// @tparam BCMode If set true, only execute mat_set on cells with BCs in column +/// space. /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index f25d43e35f9..f7528cdf5f4 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -386,12 +386,11 @@ void lift_bc(V&& b, const Form& a, std::span constants, spdlog::debug("bs0={}, bs1={}", bs0, bs1); // Iterate over Form with plugin kernel - // TODO: only iterate over cells with BCs (efficiency) - // TODO: templating over block size? if (bs0 > 1 or bs1 > 1) { - auto plugin_fn + // Lifting function for non-unit block size + auto lifting_fn = [&b, &x0, &bs0, &bs1, &bc_markers1, &bc_values1, &alpha](std::span rows, std::span cols, std::span Ae) @@ -420,12 +419,15 @@ void lift_bc(V&& b, const Form& a, std::span constants, } }; - assemble_matrix(plugin_fn, a, constants, coefficients, {}, + // Call matrix assembler in BCMode, only executing kernel on cells with BCs + // in bc_markers1. + assemble_matrix(lifting_fn, a, constants, coefficients, {}, bc_markers1); } else { - auto plugin_fn + // Specialised lifting function for scalar values (bs0=0, bs1=0) + auto lifting_fn = [&b, &x0, &bc_markers1, &bc_values1, &alpha](std::span rows, std::span cols, std::span Ae) @@ -445,7 +447,8 @@ void lift_bc(V&& b, const Form& a, std::span constants, } }; - assemble_matrix(plugin_fn, a, constants, coefficients, {}, + // Use matrix assembler in BCMode to lift RHS. + assemble_matrix(lifting_fn, a, constants, coefficients, {}, bc_markers1); } } diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index d4489faf00d..f7bf2dd8786 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -337,6 +337,7 @@ void apply_lifting( if (std::ranges::all_of(a, [](auto ai) { return !ai; })) return; + common::Timer t("Apply lifting"); impl::apply_lifting(b, a, constants, coeffs, bcs1, x0, alpha); } From b181a3ce54868c927afe91e760766add08e602e3 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Wed, 25 Mar 2026 17:02:49 +0000 Subject: [PATCH 26/34] typo --- cpp/dolfinx/fem/assemble_vector_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index f7528cdf5f4..c33d5bbb4ec 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -426,7 +426,7 @@ void lift_bc(V&& b, const Form& a, std::span constants, } else { - // Specialised lifting function for scalar values (bs0=0, bs1=0) + // Specialised lifting function for scalar values (bs0=1, bs1=1) auto lifting_fn = [&b, &x0, &bc_markers1, &bc_values1, &alpha](std::span rows, From a2c8451f482eea93c38ef954ff051d131b4f08bb Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 26 Mar 2026 10:24:03 +0000 Subject: [PATCH 27/34] fixes --- cpp/dolfinx/fem/assemble_matrix_impl.h | 17 +++++++++++++---- cpp/dolfinx/fem/assemble_vector_impl.h | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 8e46b6ac2ad..a2f426bfdfa 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -91,7 +91,10 @@ void assemble_cells_matrix( const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); std::vector> cdofs(3 * x_dofmap.extent(1)); - std::vector bce0(ndim0), bce1(ndim1); + std::vector bce0; + bce0.reserve(ndim0); + std::vector bce1; + bce1.reserve(ndim1); // Iterate over active cells assert(cells0.size() == cells.size()); @@ -127,8 +130,10 @@ void assemble_cells_matrix( // In "BCMode" only execute kernel if there are BCs on column space if constexpr (BCMode) + { if (bce1.empty()) continue; + } bce0.clear(); if (!bc0.empty()) @@ -261,7 +266,9 @@ void assemble_entities( const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); std::vector bce0; + bce0.reserve(ndim0); std::vector bce1; + bce1.reserve(ndim1); assert(entities0.size() == entities.size()); assert(entities1.size() == entities.size()); @@ -387,7 +394,7 @@ void assemble_entities( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_interior_facets( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -431,11 +438,13 @@ void assemble_interior_facets( const std::size_t dmap1_size = dmap1.map().extent(1); const int num_rows = bs0 * 2 * dmap0_size; const int num_cols = bs1 * 2 * dmap1_size; - std::vector bce0(num_rows); + std::vector bce0; + bce0.reserve(num_rows); std::vector bce1(num_cols); + bce1.reserve(num_cols); // Temporaries for joint dofmaps - std::vector Ae(num_rows * num_cols), be(num_rows); + std::vector Ae(num_rows * num_cols); std::vector dmapjoint0(2 * dmap0_size); std::vector dmapjoint1(2 * dmap1_size); assert(facets0.size() == facets.size()); diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index c33d5bbb4ec..a1f8725829b 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -383,7 +383,7 @@ void lift_bc(V&& b, const Form& a, std::span constants, const int bs0 = a.function_spaces()[0]->dofmap()->bs(); const int bs1 = a.function_spaces()[1]->dofmap()->bs(); - spdlog::debug("bs0={}, bs1={}", bs0, bs1); + spdlog::debug("lifting: bs0={}, bs1={}", bs0, bs1); // Iterate over Form with plugin kernel From 467ff4788dfd542fdb03e27bb932360c7cb74708 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 26 Mar 2026 10:34:17 +0000 Subject: [PATCH 28/34] Remove unused --- cpp/dolfinx/fem/assemble_vector_impl.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index a1f8725829b..d2089ce05db 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -504,16 +504,6 @@ void apply_lifting( std::vector bc_values1; if (a[j] and !bcs1[j].empty()) { - // Extract data from mesh - std::shared_ptr> mesh = a[j]->get().mesh(); - if (!mesh) - throw std::runtime_error("Unable to extract a mesh."); - mdspan2_t x_dofmap = mesh->geometry().dofmap(); - std::span _x = mesh->geometry().x(); - md::mdspan, - md::extents> - x(_x.data(), _x.size() / 3, 3); - assert(a[j]->get().function_spaces().at(0)); auto V1 = a[j]->get().function_spaces()[1]; assert(V1); From ef841d9a552955cb6023a65ef6876e969a307436 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 26 Mar 2026 10:59:46 +0000 Subject: [PATCH 29/34] Template for <3,3> --- cpp/dolfinx/fem/assemble_vector_impl.h | 137 ++++++++++++++----------- 1 file changed, 76 insertions(+), 61 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index d2089ce05db..329a5c671ea 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -352,6 +352,72 @@ void assemble_interior_facets( } } +/// @brief Apply boundary condition lifting using the matrix assembler. +/// +/// @tparam T Scalar type. +/// @tparam U Geometry scalar type. +/// @tparam BS0 Compile-time block size for test function dofmap. Use -1 +/// for runtime block size. +/// @tparam BS1 Compile-time block size for trial function dofmap. Use -1 +/// for runtime block size. +/// @param[in,out] b Vector to modify. +/// @param[in] a The bilinear form. +/// @param[in] constants Constants that appear in `a`. +/// @param[in] coefficients Coefficients that appear in `a`. +/// @param[in] bc_values1 The boundary condition values. +/// @param[in] bc_markers1 Marker to identify which DOFs have boundary +/// conditions applied. +/// @param[in] x0 Vector used in the lifting. +/// @param[in] alpha Scaling to apply. +template + requires std::is_same_v::value_type, T> +void lift_bc_impl( + V&& b, const Form& a, std::span constants, + const std::map, + std::pair, int>>& coefficients, + std::span bc_values1, std::span bc_markers1, + std::span x0, T alpha) +{ + // Deduce runtime block sizes as fallback when compile-time sizes not given + const int bs0 = BS0 > 0 ? BS0 : a.function_spaces()[0]->dofmap()->bs(); + const int bs1 = BS1 > 0 ? BS1 : a.function_spaces()[1]->dofmap()->bs(); + + auto lifting_fn + = [&b, &bc_values1, &bc_markers1, &x0, bs0, bs1, + alpha](std::span rows, + std::span cols, std::span Ae) + { + const std::size_t nc = cols.size() * bs1; + for (std::size_t i = 0; i < cols.size(); ++i) + { + for (int k = 0; k < bs1; ++k) + { + const std::int32_t ii = cols[i] * bs1 + k; + if (bc_markers1[ii]) + { + const T x_bc = bc_values1[ii]; + const T _x0 = x0.empty() ? 0 : x0[ii]; + for (std::size_t j = 0; j < rows.size(); ++j) + { + for (int m = 0; m < bs0; ++m) + { + const std::int32_t jj = rows[j] * bs0 + m; + b[jj] -= Ae[(j * bs0 + m) * nc + (i * bs1 + k)] * alpha + * (x_bc - _x0); + } + } + } + } + } + }; + + // Use BCMode=true so the kernel is only called on cells that have + // BC-constrained DOFs in the column space + assemble_matrix(lifting_fn, a, constants, coefficients, {}, + bc_markers1); +} + /// Modify RHS vector to account for boundary condition such that: /// /// b <- b - alpha * A.(x_bc - x0) @@ -385,71 +451,20 @@ void lift_bc(V&& b, const Form& a, std::span constants, spdlog::debug("lifting: bs0={}, bs1={}", bs0, bs1); - // Iterate over Form with plugin kernel - - if (bs0 > 1 or bs1 > 1) + if (bs0 == 1 && bs1 == 1) { - // Lifting function for non-unit block size - auto lifting_fn - = [&b, &x0, &bs0, &bs1, &bc_markers1, &bc_values1, - &alpha](std::span rows, - std::span cols, std::span Ae) - { - std::size_t nc = cols.size() * bs1; - for (std::size_t i = 0; i < cols.size(); ++i) - { - for (int k = 0; k < bs1; ++k) - { - const std::int32_t ii = cols[i] * bs1 + k; - if (bc_markers1[ii]) - { - const T x_bc = bc_values1[ii]; - const T _x0 = x0.empty() ? 0 : x0[ii]; - for (std::size_t j = 0; j < rows.size(); ++j) - { - for (int m = 0; m < bs0; ++m) - { - const std::int32_t jj = rows[j] * bs0 + m; - b[jj] -= Ae[(j * bs0 + m) * nc + (i * bs1 + k)] * alpha - * (x_bc - _x0); - } - } - } - } - } - }; - - // Call matrix assembler in BCMode, only executing kernel on cells with BCs - // in bc_markers1. - assemble_matrix(lifting_fn, a, constants, coefficients, {}, - bc_markers1); + lift_bc_impl(std::forward(b), a, constants, coefficients, + bc_values1, bc_markers1, x0, alpha); + } + else if (bs0 == 3 && bs1 == 3) + { + lift_bc_impl(std::forward(b), a, constants, coefficients, + bc_values1, bc_markers1, x0, alpha); } else { - // Specialised lifting function for scalar values (bs0=1, bs1=1) - auto lifting_fn - = [&b, &x0, &bc_markers1, &bc_values1, - &alpha](std::span rows, - std::span cols, std::span Ae) - { - std::size_t nr = rows.size(); - std::size_t nc = cols.size(); - for (std::size_t i = 0; i < nc; ++i) - { - const std::int32_t ii = cols[i]; - if (bc_markers1[ii]) - { - const T x_bc = bc_values1[ii]; - const T _x0 = x0.empty() ? 0 : x0[ii]; - for (std::size_t j = 0; j < nr; ++j) - b[rows[j]] -= Ae[j * nc + i] * alpha * (x_bc - _x0); - } - } - }; - - // Use matrix assembler in BCMode to lift RHS. - assemble_matrix(lifting_fn, a, constants, coefficients, {}, - bc_markers1); + lift_bc_impl(std::forward(b), a, constants, coefficients, + bc_values1, bc_markers1, x0, alpha); } } From bf03e90c40d3b5ad8ca081b4a353172b94624d26 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Thu, 26 Mar 2026 12:36:21 +0000 Subject: [PATCH 30/34] Fix capture of bs0, bs1 --- cpp/dolfinx/fem/assemble_vector_impl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 329a5c671ea..7fc6a01d588 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -383,10 +383,11 @@ void lift_bc_impl( const int bs0 = BS0 > 0 ? BS0 : a.function_spaces()[0]->dofmap()->bs(); const int bs1 = BS1 > 0 ? BS1 : a.function_spaces()[1]->dofmap()->bs(); + // Use default [=] capture for bs0, bs1 which may be compile-time auto lifting_fn - = [&b, &bc_values1, &bc_markers1, &x0, bs0, bs1, - alpha](std::span rows, - std::span cols, std::span Ae) + = [=, &b, &bc_values1, &bc_markers1, + &x0](std::span rows, + std::span cols, std::span Ae) { const std::size_t nc = cols.size() * bs1; for (std::size_t i = 0; i < cols.size(); ++i) From 049a7f0f8f0f81a9b17ee81ba8af664e33eb4397 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Fri, 27 Mar 2026 10:34:40 +0000 Subject: [PATCH 31/34] optimisation --- cpp/dolfinx/fem/assemble_matrix_impl.h | 116 +++++++++++-------------- 1 file changed, 51 insertions(+), 65 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index a2f426bfdfa..a6708de1806 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -91,8 +91,6 @@ void assemble_cells_matrix( const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); std::vector> cdofs(3 * x_dofmap.extent(1)); - std::vector bce0; - bce0.reserve(ndim0); std::vector bce1; bce1.reserve(ndim1); @@ -107,10 +105,10 @@ void assemble_cells_matrix( std::int32_t cell0 = cells0[c]; std::int32_t cell1 = cells1[c]; - // Collect element rows/columns for essential bcs std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); + // Collect columns for essential bcs bce1.clear(); if (!bc1.empty()) { @@ -135,23 +133,6 @@ void assemble_cells_matrix( continue; } - bce0.clear(); - if (!bc0.empty()) - { - for (int i = 0; i < num_dofs0; ++i) - { - for (int k = 0; k < bs0; ++k) - { - if (bc0[bs0 * dofs0[i] + k]) - { - // Zero row bs0 * i + k - const int row = bs0 * i + k; - bce0.push_back(row); - } - } - } - } - // Get cell coordinates/geometry auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) @@ -169,9 +150,23 @@ void assemble_cells_matrix( // Do not clear BC rows/cols in "BCMode" if constexpr (!BCMode) { + // Zero rows and columns for BCs + if (!bc0.empty()) + { + for (int i = 0; i < num_dofs0; ++i) + { + for (int k = 0; k < bs0; ++k) + { + if (bc0[bs0 * dofs0[i] + k]) + { + // Zero row bs0 * i + k + const int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + } + } + } + } // Clear rows and columns for BCs - for (std::int32_t row : bce0) - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); for (std::int32_t col : bce1) { for (int row = 0; row < ndim0; ++row) @@ -265,8 +260,6 @@ void assemble_entities( const int ndim0 = bs0 * num_dofs0; const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); - std::vector bce0; - bce0.reserve(ndim0); std::vector bce1; bce1.reserve(ndim1); @@ -282,10 +275,10 @@ void assemble_entities( std::int32_t cell0 = entities0(f, 0); std::int32_t cell1 = entities1(f, 0); - // Zero rows/columns for essential bcs std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); + // Collect columns for essential bcs bce1.clear(); if (!bc1.empty()) { @@ -309,23 +302,6 @@ void assemble_entities( continue; } - bce0.clear(); - if (!bc0.empty()) - { - for (int i = 0; i < num_dofs0; ++i) - { - for (int k = 0; k < bs0; ++k) - { - if (bc0[bs0 * dofs0[i] + k]) - { - // Zero row bs0 * i + k - const int row = bs0 * i + k; - bce0.push_back(row); - } - } - } - } - // Get cell coordinates/geometry auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) @@ -343,8 +319,22 @@ void assemble_entities( if constexpr (!BCMode) { - for (std::int32_t row : bce0) - std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + // Zero rows and columns for BCs + if (!bc0.empty()) + { + for (int i = 0; i < num_dofs0; ++i) + { + for (int k = 0; k < bs0; ++k) + { + if (bc0[bs0 * dofs0[i] + k]) + { + // Zero row bs0 * i + k + const int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), ndim1 * row), ndim1, 0); + } + } + } + } for (std::int32_t col : bce1) { for (int row = 0; row < ndim0; ++row) @@ -438,8 +428,6 @@ void assemble_interior_facets( const std::size_t dmap1_size = dmap1.map().extent(1); const int num_rows = bs0 * 2 * dmap0_size; const int num_cols = bs1 * 2 * dmap1_size; - std::vector bce0; - bce0.reserve(num_rows); std::vector bce1(num_cols); bce1.reserve(num_cols); @@ -493,7 +481,7 @@ void assemble_interior_facets( std::ranges::copy(dmap1_cell0, dmapjoint1.begin()); std::ranges::copy(dmap1_cell1, std::next(dmapjoint1.begin(), dmap1_size)); - // Zero rows/columns for essential bcs + // Collect columns for essential bcs bce1.clear(); if (!bc1.empty()) { @@ -516,22 +504,6 @@ void assemble_interior_facets( continue; } - bce0.clear(); - if (!bc0.empty()) - { - for (std::size_t i = 0; i < dmapjoint0.size(); ++i) - { - for (int k = 0; k < bs0; ++k) - { - if (bc0[bs0 * dmapjoint0[i] + k]) - { - // Zero row bs0 * i + k - bce0.push_back(bs0 * i + k); - } - } - } - } - // Tabulate tensor std::ranges::fill(Ae, 0); std::array perm = perms.empty() @@ -575,8 +547,22 @@ void assemble_interior_facets( if constexpr (!BCMode) { - for (std::int32_t row : bce0) - std::fill_n(std::next(Ae.begin(), num_cols * row), num_cols, 0); + // Zero rows and columns for BCs + if (!bc0.empty()) + { + for (std::size_t i = 0; i < dmapjoint0.size(); ++i) + { + for (int k = 0; k < bs0; ++k) + { + if (bc0[bs0 * dmapjoint0[i] + k]) + { + // Zero row bs0 * i + k + int row = bs0 * i + k; + std::fill_n(std::next(Ae.begin(), num_cols * row), num_cols, 0); + } + } + } + } for (std::int32_t col : bce1) { for (int m = 0; m < num_rows; ++m) From ee2a252a7cceb4ff37e67d2fca6cf7737cd28a30 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sun, 29 Mar 2026 13:06:45 +0100 Subject: [PATCH 32/34] add timers --- cpp/dolfinx/fem/assembler.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index f7bf2dd8786..160fc102a0d 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -337,7 +337,7 @@ void apply_lifting( if (std::ranges::all_of(a, [](auto ai) { return !ai; })) return; - common::Timer t("Apply lifting"); + common::Timer t("[Apply lifting]"); impl::apply_lifting(b, a, constants, coeffs, bcs1, x0, alpha); } @@ -436,6 +436,7 @@ void assemble_matrix( std::span dof_marker1) { + common::Timer t_assm("[Assemble Matrix]"); using mdspanx3_t = md::mdspan, md::extents>; From ed7e59e9abf4725a39e623353d13dd3d2567ea4f Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Sun, 29 Mar 2026 13:50:30 +0100 Subject: [PATCH 33/34] Some changes and documentation --- cpp/dolfinx/fem/assemble_matrix_impl.h | 170 +++++++++++++------------ cpp/dolfinx/fem/assemble_vector_impl.h | 2 +- cpp/dolfinx/fem/assembler.h | 11 +- 3 files changed, 101 insertions(+), 82 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index a6708de1806..5d42341765f 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -30,8 +30,8 @@ using mdspan2_t = md::mdspan>; /// @brief Execute kernel over cells and accumulate result in a matrix. /// /// @tparam T Matrix/form scalar type. -/// @tparam BCMode If set true, only execute mat_set on cells with BCs in column -/// space. +/// @tparam LiftingMode If set true, only execute mat_set on cells with BCs in +/// column space. /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. @@ -62,7 +62,7 @@ using mdspan2_t = md::mdspan>; /// function mesh. /// @param cell_info1 Cell permutation information for the trial /// function mesh. -template +template void assemble_cells_matrix( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -91,8 +91,6 @@ void assemble_cells_matrix( const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); std::vector> cdofs(3 * x_dofmap.extent(1)); - std::vector bce1; - bce1.reserve(ndim1); // Iterate over active cells assert(cells0.size() == cells.size()); @@ -108,28 +106,23 @@ void assemble_cells_matrix( std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); - // Collect columns for essential bcs - bce1.clear(); - if (!bc1.empty()) + // In "LiftingMode" only execute kernel if there are BCs on column space + if constexpr (LiftingMode) { - for (int j = 0; j < num_dofs1; ++j) + auto has_bc = [&]() -> bool { - for (int k = 0; k < bs1; ++k) + for (int j = 0; j < num_dofs1; ++j) { - if (bc1[bs1 * dofs1[j] + k]) + for (int k = 0; k < bs1; ++k) { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - bce1.push_back(col); + if (bc1[bs1 * dofs1[j] + k]) + return true; } } - } - } + return false; + }; - // In "BCMode" only execute kernel if there are BCs on column space - if constexpr (BCMode) - { - if (bce1.empty()) + if (!has_bc()) continue; } @@ -147,8 +140,8 @@ void assemble_cells_matrix( P0(Ae, cell_info0, cell0, ndim1); // B = P0 \tilde{A} P1T(Ae, cell_info1, cell1, ndim0); // A = B P1_T - // Do not clear BC rows/cols in "BCMode" - if constexpr (!BCMode) + // Do not clear BC rows/cols in "LiftingMode" + if constexpr (!LiftingMode) { // Zero rows and columns for BCs if (!bc0.empty()) @@ -166,11 +159,21 @@ void assemble_cells_matrix( } } } - // Clear rows and columns for BCs - for (std::int32_t col : bce1) + if (!bc1.empty()) { - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; + for (int j = 0; j < num_dofs1; ++j) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dofs1[j] + k]) + { + // Zero col bs1 * j + k + const int col = bs1 * j + k; + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; + } + } + } } } @@ -190,8 +193,8 @@ void assemble_cells_matrix( /// from cell used to define the entity. /// /// @tparam T Matrix/form scalar type. -/// @tparam BCMode If set true, only execute mat_set on cells with BCs in column -/// space. +/// @tparam LiftingMode If set true, only execute mat_set on cells with BCs in +/// column space. /// @param[in] mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. @@ -222,7 +225,7 @@ void assemble_cells_matrix( /// function mesh. /// @param[in] perms Entity permutation integer. Empty if entity /// permutations are not required. -template +template void assemble_entities( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -260,8 +263,6 @@ void assemble_entities( const int ndim0 = bs0 * num_dofs0; const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); - std::vector bce1; - bce1.reserve(ndim1); assert(entities0.size() == entities.size()); assert(entities1.size() == entities.size()); @@ -278,27 +279,22 @@ void assemble_entities( std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); - // Collect columns for essential bcs - bce1.clear(); - if (!bc1.empty()) + // Check for BCs on column space + if constexpr (LiftingMode) { - for (int j = 0; j < num_dofs1; ++j) + auto has_bc = [&]() -> bool { - for (int k = 0; k < bs1; ++k) + for (int j = 0; j < num_dofs1; ++j) { - if (bc1[bs1 * dofs1[j] + k]) + for (int k = 0; k < bs1; ++k) { - // Zero column bs1 * j + k - const int col = bs1 * j + k; - bce1.push_back(col); + if (bc1[bs1 * dofs1[j] + k]) + return true; } } - } - } - - if constexpr (BCMode) - { - if (bce1.empty()) + return false; + }; + if (!has_bc()) continue; } @@ -317,7 +313,8 @@ void assemble_entities( P0(Ae, cell_info0, cell0, ndim1); P1T(Ae, cell_info1, cell1, ndim0); - if constexpr (!BCMode) + // Don't clear rows/cols in LiftingMode + if constexpr (!LiftingMode) { // Zero rows and columns for BCs if (!bc0.empty()) @@ -335,10 +332,21 @@ void assemble_entities( } } } - for (std::int32_t col : bce1) + if (!bc1.empty()) { - for (int row = 0; row < ndim0; ++row) - Ae[row * ndim1 + col] = 0; + for (int j = 0; j < num_dofs1; ++j) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dofs1[j] + k]) + { + // Zero column bs1 * j + k + const int col = bs1 * j + k; + for (int row = 0; row < ndim0; ++row) + Ae[row * ndim1 + col] = 0; + } + } + } } } @@ -350,8 +358,8 @@ void assemble_entities( /// a matrix. /// /// @tparam T Matrix/form scalar type. -/// @tparam BCMode If set true, only execute mat_set on cells with BCs in column -/// space. +/// @tparam LiftingMode If set true, only execute mat_set on cells with BCs in +/// column space. /// @param mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. @@ -384,7 +392,7 @@ void assemble_entities( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_interior_facets( la::MatSet auto mat_set, mdspan2_t x_dofmap, md::mdspan, @@ -428,8 +436,6 @@ void assemble_interior_facets( const std::size_t dmap1_size = dmap1.map().extent(1); const int num_rows = bs0 * 2 * dmap0_size; const int num_cols = bs1 * 2 * dmap1_size; - std::vector bce1(num_cols); - bce1.reserve(num_cols); // Temporaries for joint dofmaps std::vector Ae(num_rows * num_cols); @@ -481,26 +487,23 @@ void assemble_interior_facets( std::ranges::copy(dmap1_cell0, dmapjoint1.begin()); std::ranges::copy(dmap1_cell1, std::next(dmapjoint1.begin(), dmap1_size)); - // Collect columns for essential bcs - bce1.clear(); - if (!bc1.empty()) + // Check for BCs on column space + if constexpr (LiftingMode) { - for (std::size_t j = 0; j < dmapjoint1.size(); ++j) + auto has_bc = [&]() -> bool { - for (int k = 0; k < bs1; ++k) + for (std::size_t j = 0; j < dmapjoint1.size(); ++j) { - if (bc1[bs1 * dmapjoint1[j] + k]) + for (int k = 0; k < bs1; ++k) { - // Zero column bs1 * j + k - bce1.push_back(bs1 * j + k); + if (bc1[bs1 * dmapjoint1[j] + k]) + return true; } } - } - } + return false; + }; - if constexpr (BCMode) - { - if (bce1.empty()) + if (!has_bc()) continue; } @@ -545,7 +548,8 @@ void assemble_interior_facets( } } - if constexpr (!BCMode) + // Don't clear rows/cols in LiftingMode + if constexpr (!LiftingMode) { // Zero rows and columns for BCs if (!bc0.empty()) @@ -563,10 +567,20 @@ void assemble_interior_facets( } } } - for (std::int32_t col : bce1) + if (!bc1.empty()) { - for (int m = 0; m < num_rows; ++m) - Ae[m * num_cols + col] = 0; + for (std::size_t j = 0; j < dmapjoint1.size(); ++j) + { + for (int k = 0; k < bs1; ++k) + { + if (bc1[bs1 * dmapjoint1[j] + k]) + { + // Zero column bs1 * j + k + for (int m = 0; m < num_rows; ++m) + Ae[m * num_cols + bs1 * j + k] = 0; + } + } + } } } @@ -582,8 +596,8 @@ void assemble_interior_facets( /// /// @tparam T Scalar type. /// @tparam U Geometry type. -/// @tparam BCMode Boundary Condition mode. Set to true to call the kernel only -/// on cells with BCs in bc1. +/// @tparam LiftingMode. Set to true to call the kernel only on cells with BCs +/// in bc1. /// @param[in] mat_set Function that accumulates computed entries into a /// matrix. /// @param[in] a Bilinear form to assemble. @@ -594,7 +608,7 @@ void assemble_interior_facets( /// applied. /// @param bc1 Marker for columns with Dirichlet boundary conditions /// applied. -template +template void assemble_matrix( la::MatSet auto mat_set, const Form& a, md::mdspan, @@ -672,7 +686,7 @@ void assemble_matrix( std::span cells1 = a.domain_arg(IntegralType::cell, 1, i, cell_type_idx); auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); assert(cells.size() * cstride == coeffs.size()); - impl::assemble_cells_matrix( + impl::assemble_cells_matrix( mat_set, x_dofmap, x, cells, {dofs0, bs0, cells0}, P0, {dofs1, bs1, cells1}, P1T, bc0, bc1, fn, md::mdspan(coeffs.data(), cells.size(), cstride), constants, @@ -717,7 +731,7 @@ void assemble_matrix( std::span facets0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); std::span facets1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); assert((facets.size() / 4) * 2 * cstride == coeffs.size()); - impl::assemble_interior_facets( + impl::assemble_interior_facets( mat_set, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap0, bs0, @@ -762,7 +776,7 @@ void assemble_matrix( std::span e1 = a.domain_arg(itg_type, 1, i, 0); mdspanx2_t entities1(e1.data(), e1.size() / 2, 2); assert((entities.size() / 2) * cstride == coeffs.size()); - impl::assemble_entities( + impl::assemble_entities( mat_set, x_dofmap, x, entities, {dofs0, bs0, entities0}, P0, {dofs1, bs1, entities1}, P1T, bc0, bc1, fn, md::mdspan(coeffs.data(), entities.extent(0), cstride), constants, diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 7fc6a01d588..4fd63a45c55 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -413,7 +413,7 @@ void lift_bc_impl( } }; - // Use BCMode=true so the kernel is only called on cells that have + // Use LiftingMode=true so the kernel is only called on cells that have // BC-constrained DOFs in the column space assemble_matrix(lifting_fn, a, constants, coefficients, {}, bc_markers1); diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index 160fc102a0d..77a8b6c7fef 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -416,6 +416,11 @@ void apply_lifting( /// @brief Assemble bilinear form into a matrix. Matrix must already be /// initialised. Does not zero or finalise the matrix. +/// @note This function can be used to insert kernels into different objects by +/// replacing the mat_add function appropriately. +/// @tparam T scalar type +/// @tparam U geometry scalar type +/// @tparam LiftingMode Set to true, if applying a lifting kernel in mat_add. /// @param[in] mat_add The function for adding values into the matrix. /// @param[in] a The bilinear form to assemble. /// @param[in] constants Constants that appear in `a`. @@ -426,7 +431,7 @@ void apply_lifting( /// @param[in] dof_marker1 Boundary condition markers for the columns. /// If bc[i] is true then rows i in A will be zeroed. The index i is a /// local index. -template +template void assemble_matrix( la::MatSet auto mat_add, const Form& a, std::span constants, @@ -446,14 +451,14 @@ void assemble_matrix( std::span x = mesh->geometry().x(); if constexpr (std::is_same_v>) { - impl::assemble_matrix( + impl::assemble_matrix( mat_add, a, mdspanx3_t(x.data(), x.size() / 3, 3), constants, coefficients, dof_marker0, dof_marker1); } else { std::vector> _x(x.begin(), x.end()); - impl::assemble_matrix( + impl::assemble_matrix( mat_add, a, mdspanx3_t(_x.data(), _x.size() / 3, 3), constants, coefficients, dof_marker0, dof_marker1); } From 4139667c43a275b8ba6b9ee8e37c648aa80d2d55 Mon Sep 17 00:00:00 2001 From: Chris Richardson Date: Mon, 30 Mar 2026 10:52:22 +0100 Subject: [PATCH 34/34] update suggestions from SonarCloud --- cpp/dolfinx/fem/assemble_matrix_impl.h | 18 +++++++++--------- cpp/dolfinx/fem/assemble_vector_impl.h | 5 +++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 5d42341765f..1d09bfc5878 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -109,13 +109,13 @@ void assemble_cells_matrix( // In "LiftingMode" only execute kernel if there are BCs on column space if constexpr (LiftingMode) { - auto has_bc = [&]() -> bool + auto has_bc = [&]() { - for (int j = 0; j < num_dofs1; ++j) + for (std::int32_t dof : dofs1) { for (int k = 0; k < bs1; ++k) { - if (bc1[bs1 * dofs1[j] + k]) + if (bc1[bs1 * dof + k]) return true; } } @@ -282,13 +282,13 @@ void assemble_entities( // Check for BCs on column space if constexpr (LiftingMode) { - auto has_bc = [&]() -> bool + auto has_bc = [&]() { - for (int j = 0; j < num_dofs1; ++j) + for (std::int32_t dof : dofs1) { for (int k = 0; k < bs1; ++k) { - if (bc1[bs1 * dofs1[j] + k]) + if (bc1[bs1 * dof + k]) return true; } } @@ -490,13 +490,13 @@ void assemble_interior_facets( // Check for BCs on column space if constexpr (LiftingMode) { - auto has_bc = [&]() -> bool + auto has_bc = [&]() { - for (std::size_t j = 0; j < dmapjoint1.size(); ++j) + for (std::int32_t dof : dmapjoint1) { for (int k = 0; k < bs1; ++k) { - if (bc1[bs1 * dmapjoint1[j] + k]) + if (bc1[bs1 * dof + k]) return true; } } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 4fd63a45c55..b045bd58fd3 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -383,7 +383,7 @@ void lift_bc_impl( const int bs0 = BS0 > 0 ? BS0 : a.function_spaces()[0]->dofmap()->bs(); const int bs1 = BS1 > 0 ? BS1 : a.function_spaces()[1]->dofmap()->bs(); - // Use default [=] capture for bs0, bs1 which may be compile-time + // Use default [=] capture for bs0, bs1 which may be compile-time constants auto lifting_fn = [=, &b, &bc_values1, &bc_markers1, &x0](std::span rows, @@ -413,8 +413,9 @@ void lift_bc_impl( } }; + // Repurpose the assemble_matrix assembler to work on the vector b instead. // Use LiftingMode=true so the kernel is only called on cells that have - // BC-constrained DOFs in the column space + // BC-constrained DOFs in the column space. assemble_matrix(lifting_fn, a, constants, coefficients, {}, bc_markers1); }