Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 29 additions & 21 deletions cpp/dolfinx/fem/CoordinateElement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
void CoordinateElement<T>::pull_back_nonaffine(mdspan2_t<T> X,
mdspan2_t<const T> x,
mdspan2_t<const T> cell_geometry,
std::span<T> working_array,
double tol, int maxit) const
{
// Number of points
Expand All @@ -97,33 +98,41 @@
assert(X.extent(0) == num_points);
assert(X.extent(1) == tdim);

std::vector<T> dphi_b(tdim * num_xnodes);
mdspan2_t<T> dphi(dphi_b.data(), tdim, num_xnodes);
// Allocate various components of working array
assert(working_array.size()
>= tdim * (2 * gdim + 2 * num_xnodes + 2) + gdim + num_xnodes);

std::vector<T> Xk_b(tdim);
mdspan2_t<T> Xk(Xk_b.data(), 1, tdim);
mdspan2_t<T> dphi(working_array.data(), tdim, num_xnodes);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could put all of these into a struct PullBackNonAffineScratch with a constructor taking the needed integer sizes, which then does the dynamic memory allocation with multiple std::vectors + holds the std::spans and mdspan2_t for use inside the routine.

Then the struct would handle the (in my view, a bit messy and error-prone) offsetting automatically.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless there is some genuine performance benefit from the current approach with just a single span as input - but on my reading, there probably isn't.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I vibed this out at jhale/pull_back_scratch_memory, if you like it feel free to merge here and tidy it up/fix up.

mdspan2_t<const T> Xk(working_array.data() + tdim * num_xnodes, 1, tdim);
std::span<T> Xk_span(working_array.data() + tdim * num_xnodes, tdim);

std::array<T, 3> xk{0, 0, 0};
std::vector<T> dX(tdim);
std::vector<T> J_b(gdim * tdim);
mdspan2_t<T> J(J_b.data(), gdim, tdim);
std::vector<T> K_b(tdim * gdim);
mdspan2_t<T> K(K_b.data(), tdim, gdim);
std::span<T> dX(working_array.data() + (tdim * (num_xnodes + 1)), tdim);
std::span<T> xk(working_array.data() + (tdim * (num_xnodes + 2)), gdim);
std::ranges::fill(xk, 0);
mdspan2_t<T> J(working_array.data() + ((tdim * (num_xnodes + 2)) + gdim),
gdim, tdim);
mdspan2_t<T> K(working_array.data()
+ ((tdim * (num_xnodes + 2)) + gdim * (tdim + 1)),
tdim, gdim);

using mdspan4_t = md::mdspan<T, md::dextents<std::size_t, 4>>;

const std::array<std::size_t, 4> bsize = _element->tabulate_shape(1, 1);
std::vector<T> basis_b(std::reduce(bsize.begin(), bsize.end(), std::size_t(1),
std::multiplies{}));
mdspan4_t basis(basis_b.data(), bsize);
std::vector<T> phi(basis.extent(2));
assert(bsize[0] == tdim + 1); // Tabulating basis and first derivatives
assert(bsize[1] == 1); // Tabulating at one point at a time
assert(bsize[2] == num_xnodes);
assert(bsize[3] == 1); // Scalar component coordinate element
mdspan4_t basis(working_array.data()
+ ((tdim * (num_xnodes + 2)) + gdim * (2 * tdim + 1)),
bsize);
for (std::size_t p = 0; p < num_points; ++p)
{
std::ranges::fill(Xk_b, 0);
// Reset Xk
std::ranges::fill(Xk_span, 0);
int k;
for (k = 0; k < maxit; ++k)
{
_element->tabulate(1, Xk_b, {1, tdim}, basis_b);
_element->tabulate(1, Xk, basis);

// x = cell_geometry * phi
std::ranges::fill(xk, 0);
Expand All @@ -132,7 +141,7 @@
xk[j] += cell_geometry(i, j) * basis(0, 0, i, 0);

// Compute Jacobian, its inverse and determinant
std::ranges::fill(J_b, 0);
std::ranges::fill(J.data_handle(), J.data_handle() + J.size(), 0);
for (std::size_t i = 0; i < tdim; ++i)
for (std::size_t j = 0; j < basis.extent(2); ++j)
dphi(i, j) = basis(i + 1, 0, j, 0);
Expand All @@ -147,21 +156,20 @@
dX[i] += K(i, j) * (x(p, j) - xk[j]);

// Compute Xk += dX
std::ranges::transform(dX, Xk_b, Xk_b.begin(),
std::ranges::transform(dX, Xk_span, Xk_span.begin(),
[](auto a, auto b) { return a + b; });

// Compute norm(dX)
if (auto dX_squared
= std::transform_reduce(dX.cbegin(), dX.cend(), 0.0, std::plus{},
= std::transform_reduce(dX.begin(), dX.end(), 0.0, std::plus{},
[](auto v) { return v * v; });
std::sqrt(dX_squared) < tol)
{
break;
}
}

std::copy(Xk_b.cbegin(), std::next(Xk_b.cbegin(), tdim),
X.data_handle() + p * tdim);
std::copy(Xk_span.begin(), Xk_span.end(), X.data_handle() + p * tdim);

Check warning on line 172 in cpp/dolfinx/fem/CoordinateElement.cpp

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Replace with the version of "std::ranges::copy" that takes a range.

See more on https://sonarcloud.io/project/issues?id=FEniCS_dolfinx&issues=AZz3xCh5GOviLjPOJiKB&open=AZz3xCh5GOviLjPOJiKB&pullRequest=4126
if (k == maxit)
{
throw std::runtime_error(
Expand Down
9 changes: 6 additions & 3 deletions cpp/dolfinx/fem/CoordinateElement.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,15 +238,18 @@ class CoordinateElement
/// @param [in,out] X The reference coordinates to compute
/// (shape=`(num_points, tdim)`).
/// @param [in] x Physical coordinates (`shape=(num_points, gdim)`).
/// @param [in] cell_geometry Cell nodes coordinates (`shape=(num
/// geometry nodes, gdim)`).
/// @param [in] cell_geometry Cell nodes coordinates (`shape=(num_xnodes,
/// gdim)`).
/// @param [in] working_array Working memory. Size must be at least
/// `tdim * (2 * gdim + 2 * num_xnodes + 2) + gdim + num_xnodes`.
/// @param [in] tol Tolerance for termination of Newton method.
/// @param [in] maxit Maximum number of Newton iterations
/// @note If convergence is not achieved within `maxit`, the function
/// throws a runtime error.
void pull_back_nonaffine(mdspan2_t<T> X, mdspan2_t<const T> x,
mdspan2_t<const T> cell_geometry,
double tol = 1.0e-6, int maxit = 15) const;
std::span<T> working_array, double tol = 1.0e-6,
int maxit = 15) const;

/// @brief Permute a list of DOF numbers on a cell.
void permute(std::span<std::int32_t> dofs, std::uint32_t cell_perm) const;
Expand Down
9 changes: 8 additions & 1 deletion cpp/dolfinx/fem/Function.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,13 @@ class Function
std::vector<geometry_type> detJ(xshape[0]);
std::vector<geometry_type> det_scratch(2 * gdim * tdim);

// Scrach space for pull-back of point coordinates for non-affine cells.
std::vector<geometry_type> pull_back_scratch;
if (!cmap.is_affine())
{
pull_back_scratch.resize(tdim * (2 * gdim + 2 * num_dofs_g + 2) + gdim
+ num_dofs_g);
}
// Prepare geometry data in each cell
for (auto cell_it = cells.begin(); cell_it != cells.end(); ++cell_it)
{
Expand Down Expand Up @@ -607,7 +614,7 @@ class Function
else
{
// Pull-back physical point xp to reference coordinate Xp
cmap.pull_back_nonaffine(Xp, xp, coord_dofs);
cmap.pull_back_nonaffine(Xp, xp, coord_dofs, pull_back_scratch);
cmap.tabulate(1, std::span(Xpb.data(), tdim), {1, tdim}, phi_b);
CoordinateElement<geometry_type>::compute_jacobian(dphi, coord_dofs,
_J);
Expand Down
9 changes: 8 additions & 1 deletion python/dolfinx/wrappers/dolfinx_wrappers/fem.h
Original file line number Diff line number Diff line change
Expand Up @@ -1066,8 +1066,15 @@ void declare_coordinate_element(nb::module_& m, const std::string& type)
self.pull_back_affine(X, K, x0, _x);
}
else
self.pull_back_nonaffine(X, _x, g);
{

// Scratch space for pull-back of point coordinates for
// non-affine cells.
std::size_t num_dofs_g = cell_geometry.shape(0);
std::vector<T> pull_back_scratch(
tdim * (2 * gdim + 2 * num_dofs_g + 2) + gdim + num_dofs_g);
self.pull_back_nonaffine(X, _x, g, pull_back_scratch);
}
return dolfinx_wrappers::as_nbarray(std::move(Xb),
{num_points, tdim});
},
Expand Down
Loading