From ef59fbe757aea4eb4b9f510a4e4a6f04baf99353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20S=2E=20Dokken?= Date: Mon, 16 Mar 2026 16:05:00 +0000 Subject: [PATCH 1/5] Add pull back scratch memory --- cpp/dolfinx/fem/CoordinateElement.cpp | 44 ++++++++++--------- cpp/dolfinx/fem/CoordinateElement.h | 6 ++- cpp/dolfinx/fem/Function.h | 9 +++- .../dolfinx/wrappers/dolfinx_wrappers/fem.h | 11 ++++- 4 files changed, 47 insertions(+), 23 deletions(-) diff --git a/cpp/dolfinx/fem/CoordinateElement.cpp b/cpp/dolfinx/fem/CoordinateElement.cpp index 6452a484c87..f51ece33cf5 100644 --- a/cpp/dolfinx/fem/CoordinateElement.cpp +++ b/cpp/dolfinx/fem/CoordinateElement.cpp @@ -83,6 +83,7 @@ template void CoordinateElement::pull_back_nonaffine(mdspan2_t X, mdspan2_t x, mdspan2_t cell_geometry, + std::span working_array, double tol, int maxit) const { // Number of points @@ -97,33 +98,36 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, assert(X.extent(0) == num_points); assert(X.extent(1) == tdim); - std::vector dphi_b(tdim * num_xnodes); - mdspan2_t dphi(dphi_b.data(), tdim, num_xnodes); + // Allocate various components of working array + assert(working_array.size() >= tdim * num_xnodes + 2 * tdim + 2 * gdim * tdim + + gdim + (tdim + 1) * num_xnodes); - std::vector Xk_b(tdim); - mdspan2_t Xk(Xk_b.data(), 1, tdim); + mdspan2_t dphi(working_array.data(), tdim, num_xnodes); + mdspan2_t Xk(working_array.data() + tdim * num_xnodes, 1, tdim); + std::span Xk_span(working_array.data() + tdim * num_xnodes, tdim); - std::array xk{0, 0, 0}; - std::vector dX(tdim); - std::vector J_b(gdim * tdim); - mdspan2_t J(J_b.data(), gdim, tdim); - std::vector K_b(tdim * gdim); - mdspan2_t K(K_b.data(), tdim, gdim); + std::span dX(working_array.data() + (tdim * (num_xnodes + 1)), tdim); + std::span xk(working_array.data() + (tdim * (num_xnodes + 2)), gdim); + mdspan2_t J(working_array.data() + ((tdim * (num_xnodes + 2)) + gdim), + gdim, tdim); + mdspan2_t K(working_array.data() + + ((tdim * (num_xnodes + 2)) + gdim * (tdim + 1)), + tdim, gdim); using mdspan4_t = md::mdspan>; const std::array bsize = _element->tabulate_shape(1, 1); - std::vector basis_b(std::reduce(bsize.begin(), bsize.end(), std::size_t(1), - std::multiplies{})); - mdspan4_t basis(basis_b.data(), bsize); - std::vector phi(basis.extent(2)); + mdspan4_t basis(working_array.data() + + ((tdim * (num_xnodes + 2)) + gdim * (2 * tdim + 1)), + bsize); for (std::size_t p = 0; p < num_points; ++p) { - std::ranges::fill(Xk_b, 0); + // Reset Xk + std::ranges::fill(Xk_span, 0); int k; for (k = 0; k < maxit; ++k) { - _element->tabulate(1, Xk_b, {1, tdim}, basis_b); + _element->tabulate(1, Xk, basis); // x = cell_geometry * phi std::ranges::fill(xk, 0); @@ -132,7 +136,7 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, xk[j] += cell_geometry(i, j) * basis(0, 0, i, 0); // Compute Jacobian, its inverse and determinant - std::ranges::fill(J_b, 0); + std::ranges::fill(J.data_handle(), J.data_handle() + J.size(), 0); for (std::size_t i = 0; i < tdim; ++i) for (std::size_t j = 0; j < basis.extent(2); ++j) dphi(i, j) = basis(i + 1, 0, j, 0); @@ -147,12 +151,12 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, dX[i] += K(i, j) * (x(p, j) - xk[j]); // Compute Xk += dX - std::ranges::transform(dX, Xk_b, Xk_b.begin(), + std::ranges::transform(dX, Xk_span, Xk_span.begin(), [](auto a, auto b) { return a + b; }); // Compute norm(dX) if (auto dX_squared - = std::transform_reduce(dX.cbegin(), dX.cend(), 0.0, std::plus{}, + = std::transform_reduce(dX.begin(), dX.end(), 0.0, std::plus{}, [](auto v) { return v * v; }); std::sqrt(dX_squared) < tol) { @@ -160,7 +164,7 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, } } - std::copy(Xk_b.cbegin(), std::next(Xk_b.cbegin(), tdim), + std::copy(Xk_span.begin(), std::next(Xk_span.end(), tdim), X.data_handle() + p * tdim); if (k == maxit) { diff --git a/cpp/dolfinx/fem/CoordinateElement.h b/cpp/dolfinx/fem/CoordinateElement.h index 677d8e45670..6459725987b 100644 --- a/cpp/dolfinx/fem/CoordinateElement.h +++ b/cpp/dolfinx/fem/CoordinateElement.h @@ -240,13 +240,17 @@ class CoordinateElement /// @param [in] x Physical coordinates (`shape=(num_points, gdim)`). /// @param [in] cell_geometry Cell nodes coordinates (`shape=(num /// geometry nodes, gdim)`). + /// @param [in] working_array Working memory. Size must be at least + /// `tdim * (num_geometry_nodes+2+2*gdim) + gdim + + /// (tdim+1)*num_geometry_nodes`. /// @param [in] tol Tolerance for termination of Newton method. /// @param [in] maxit Maximum number of Newton iterations /// @note If convergence is not achieved within `maxit`, the function /// throws a runtime error. void pull_back_nonaffine(mdspan2_t X, mdspan2_t x, mdspan2_t cell_geometry, - double tol = 1.0e-6, int maxit = 15) const; + std::span working_array, double tol = 1.0e-6, + int maxit = 15) const; /// @brief Permute a list of DOF numbers on a cell. void permute(std::span dofs, std::uint32_t cell_perm) const; diff --git a/cpp/dolfinx/fem/Function.h b/cpp/dolfinx/fem/Function.h index 41ee5d758d7..921998a53d8 100644 --- a/cpp/dolfinx/fem/Function.h +++ b/cpp/dolfinx/fem/Function.h @@ -559,6 +559,13 @@ class Function std::vector detJ(xshape[0]); std::vector det_scratch(2 * gdim * tdim); + // Scrach space for pull-back of point coordinates for non-affine cells. + std::vector pull_back_scratch; + if (!cmap.is_affine()) + { + pull_back_scratch.resize(tdim * (num_dofs_g + 2 + 2 * gdim) + gdim + + (tdim + 1) * num_dofs_g); + } // Prepare geometry data in each cell for (auto cell_it = cells.begin(); cell_it != cells.end(); ++cell_it) { @@ -604,7 +611,7 @@ class Function else { // Pull-back physical point xp to reference coordinate Xp - cmap.pull_back_nonaffine(Xp, xp, coord_dofs); + cmap.pull_back_nonaffine(Xp, xp, coord_dofs, pull_back_scratch); cmap.tabulate(1, std::span(Xpb.data(), tdim), {1, tdim}, phi_b); CoordinateElement::compute_jacobian(dphi, coord_dofs, _J); diff --git a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h index 4b83cd26a1e..228a357de37 100644 --- a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h +++ b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h @@ -1060,8 +1060,17 @@ void declare_coordinate_element(nb::module_& m, const std::string& type) self.pull_back_affine(X, K, x0, _x); } else - self.pull_back_nonaffine(X, _x, g); + { + // Scratch space for pull-back of point coordinates for + // non-affine cells. + std::vector det_scratch(2 * gdim * tdim); + std::size_t num_dofs_g = cell_geometry.shape(0); + std::vector pull_back_scratch; + pull_back_scratch.resize(tdim * (num_dofs_g + 2 + 2 * gdim) + gdim + + (tdim + 1) * num_dofs_g); + self.pull_back_nonaffine(X, _x, g, pull_back_scratch); + } return dolfinx_wrappers::as_nbarray(std::move(Xb), {num_points, tdim}); }, From 3131648d4a485c725c704f538bdde86365e76293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20S=2E=20Dokken?= Date: Mon, 16 Mar 2026 17:36:25 +0000 Subject: [PATCH 2/5] Fix ranges and copy --- cpp/dolfinx/fem/CoordinateElement.cpp | 12 ++++++++---- cpp/dolfinx/fem/CoordinateElement.h | 7 +++---- cpp/dolfinx/fem/Function.h | 4 ++-- python/dolfinx/wrappers/dolfinx_wrappers/fem.h | 4 ++-- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/cpp/dolfinx/fem/CoordinateElement.cpp b/cpp/dolfinx/fem/CoordinateElement.cpp index f51ece33cf5..d0c0e124c6c 100644 --- a/cpp/dolfinx/fem/CoordinateElement.cpp +++ b/cpp/dolfinx/fem/CoordinateElement.cpp @@ -99,8 +99,8 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, assert(X.extent(1) == tdim); // Allocate various components of working array - assert(working_array.size() >= tdim * num_xnodes + 2 * tdim + 2 * gdim * tdim - + gdim + (tdim + 1) * num_xnodes); + assert(working_array.size() + >= tdim * (2 * gdim + 2 * num_xnodes + 2) + gdim + num_xnodes); mdspan2_t dphi(working_array.data(), tdim, num_xnodes); mdspan2_t Xk(working_array.data() + tdim * num_xnodes, 1, tdim); @@ -108,6 +108,7 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, std::span dX(working_array.data() + (tdim * (num_xnodes + 1)), tdim); std::span xk(working_array.data() + (tdim * (num_xnodes + 2)), gdim); + std::fill(xk.begin(), xk.end(), 0); mdspan2_t J(working_array.data() + ((tdim * (num_xnodes + 2)) + gdim), gdim, tdim); mdspan2_t K(working_array.data() @@ -117,6 +118,10 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, using mdspan4_t = md::mdspan>; const std::array bsize = _element->tabulate_shape(1, 1); + assert(bsize[0] == tdim + 1); // Tabulating basis and first derivatives + assert(bsize[1] == 1); // Tabulating at one point at a time + assert(bsize[2] == num_xnodes); + assert(bsize[3] == 1); // Scalar component coordinate element mdspan4_t basis(working_array.data() + ((tdim * (num_xnodes + 2)) + gdim * (2 * tdim + 1)), bsize); @@ -164,8 +169,7 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, } } - std::copy(Xk_span.begin(), std::next(Xk_span.end(), tdim), - X.data_handle() + p * tdim); + std::copy(Xk_span.begin(), Xk_span.end(), X.data_handle() + p * tdim); if (k == maxit) { throw std::runtime_error( diff --git a/cpp/dolfinx/fem/CoordinateElement.h b/cpp/dolfinx/fem/CoordinateElement.h index 6459725987b..0e1a8036c58 100644 --- a/cpp/dolfinx/fem/CoordinateElement.h +++ b/cpp/dolfinx/fem/CoordinateElement.h @@ -238,11 +238,10 @@ class CoordinateElement /// @param [in,out] X The reference coordinates to compute /// (shape=`(num_points, tdim)`). /// @param [in] x Physical coordinates (`shape=(num_points, gdim)`). - /// @param [in] cell_geometry Cell nodes coordinates (`shape=(num - /// geometry nodes, gdim)`). + /// @param [in] cell_geometry Cell nodes coordinates (`shape=(num_xnodes, + /// gdim)`). /// @param [in] working_array Working memory. Size must be at least - /// `tdim * (num_geometry_nodes+2+2*gdim) + gdim + - /// (tdim+1)*num_geometry_nodes`. + /// `tdim * (2 * gdim + 2 * num_xnodes + 2) + gdim + num_xnodes`. /// @param [in] tol Tolerance for termination of Newton method. /// @param [in] maxit Maximum number of Newton iterations /// @note If convergence is not achieved within `maxit`, the function diff --git a/cpp/dolfinx/fem/Function.h b/cpp/dolfinx/fem/Function.h index 921998a53d8..f685fd7dffb 100644 --- a/cpp/dolfinx/fem/Function.h +++ b/cpp/dolfinx/fem/Function.h @@ -563,8 +563,8 @@ class Function std::vector pull_back_scratch; if (!cmap.is_affine()) { - pull_back_scratch.resize(tdim * (num_dofs_g + 2 + 2 * gdim) + gdim - + (tdim + 1) * num_dofs_g); + pull_back_scratch.resize(tdim * (2 * gdim + 2 * num_dofs_g + 2) + gdim + + num_dofs_g); } // Prepare geometry data in each cell for (auto cell_it = cells.begin(); cell_it != cells.end(); ++cell_it) diff --git a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h index 228a357de37..a7536f6dcf2 100644 --- a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h +++ b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h @@ -1067,8 +1067,8 @@ void declare_coordinate_element(nb::module_& m, const std::string& type) std::vector det_scratch(2 * gdim * tdim); std::size_t num_dofs_g = cell_geometry.shape(0); std::vector pull_back_scratch; - pull_back_scratch.resize(tdim * (num_dofs_g + 2 + 2 * gdim) + gdim - + (tdim + 1) * num_dofs_g); + pull_back_scratch.resize(tdim * (2 * gdim + 2 * num_dofs_g + 2) + + gdim + num_dofs_g); self.pull_back_nonaffine(X, _x, g, pull_back_scratch); } return dolfinx_wrappers::as_nbarray(std::move(Xb), From ab888afa0fc9f33e2f8b82abc3f90ad5060b7c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20S=2E=20Dokken?= Date: Mon, 16 Mar 2026 17:55:01 +0000 Subject: [PATCH 3/5] Use ranges fill --- cpp/dolfinx/fem/CoordinateElement.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/CoordinateElement.cpp b/cpp/dolfinx/fem/CoordinateElement.cpp index d0c0e124c6c..85e1eb67392 100644 --- a/cpp/dolfinx/fem/CoordinateElement.cpp +++ b/cpp/dolfinx/fem/CoordinateElement.cpp @@ -108,7 +108,7 @@ void CoordinateElement::pull_back_nonaffine(mdspan2_t X, std::span dX(working_array.data() + (tdim * (num_xnodes + 1)), tdim); std::span xk(working_array.data() + (tdim * (num_xnodes + 2)), gdim); - std::fill(xk.begin(), xk.end(), 0); + std::ranges::fill(xk, 0); mdspan2_t J(working_array.data() + ((tdim * (num_xnodes + 2)) + gdim), gdim, tdim); mdspan2_t K(working_array.data() From ce32bc64ccd1ba10ad96bbb91773d3e3a5d34d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20S=2E=20Dokken?= Date: Mon, 16 Mar 2026 17:56:14 +0000 Subject: [PATCH 4/5] Remove extra scratchmemory --- python/dolfinx/wrappers/dolfinx_wrappers/fem.h | 1 - 1 file changed, 1 deletion(-) diff --git a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h index a7536f6dcf2..1699f2932b3 100644 --- a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h +++ b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h @@ -1064,7 +1064,6 @@ void declare_coordinate_element(nb::module_& m, const std::string& type) // Scratch space for pull-back of point coordinates for // non-affine cells. - std::vector det_scratch(2 * gdim * tdim); std::size_t num_dofs_g = cell_geometry.shape(0); std::vector pull_back_scratch; pull_back_scratch.resize(tdim * (2 * gdim + 2 * num_dofs_g + 2) From e11a06ac0c01e3affba647fc2fe7dae0defe5470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20S=2E=20Dokken?= Date: Mon, 16 Mar 2026 17:57:05 +0000 Subject: [PATCH 5/5] Simplify --- python/dolfinx/wrappers/dolfinx_wrappers/fem.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h index 1699f2932b3..f0bb478ef28 100644 --- a/python/dolfinx/wrappers/dolfinx_wrappers/fem.h +++ b/python/dolfinx/wrappers/dolfinx_wrappers/fem.h @@ -1065,9 +1065,8 @@ void declare_coordinate_element(nb::module_& m, const std::string& type) // Scratch space for pull-back of point coordinates for // non-affine cells. std::size_t num_dofs_g = cell_geometry.shape(0); - std::vector pull_back_scratch; - pull_back_scratch.resize(tdim * (2 * gdim + 2 * num_dofs_g + 2) - + gdim + num_dofs_g); + std::vector pull_back_scratch( + tdim * (2 * gdim + 2 * num_dofs_g + 2) + gdim + num_dofs_g); self.pull_back_nonaffine(X, _x, g, pull_back_scratch); } return dolfinx_wrappers::as_nbarray(std::move(Xb),