Skip to content

Commit 249e657

Browse files
sifakisclaude
andcommitted
VoxelBlockManager: add resolveWenoLeafPtrs and computeWenoStencil
Factor the inlined WENO5 stencil computation from Benchmark.cu into two named static device functions on VoxelBlockManager<Log2BlockWidth>: - resolveWenoLeafPtrs: performs exactly 3 probeLeaf calls (one per axis) and returns a WenoLeafPtrs<BuildT> struct holding the resolved neighbor leaf pointers ({lo, center, hi} per axis). Intentionally scalar; probeLeaf is inherently pointer-chasing and not vectorizable. - computeWenoStencil: given pre-resolved leaf pointers, fills data[19] with global sequential indices for all 19 WENO5 stencil points. Uses nanovdb::math::WenoPt<i,j,k>::idx throughout (not hardcoded integers) to remain correct if the index convention is ever unified with OpenVDB's NineteenPt. Caller must zero-initialize data[]; missing neighbors remain 0. This is the auto-vectorization target for the planned CPU SIMD port. The split keeps all tree traversal in the scalar function and leaves only getValue calls and offset arithmetic in computeWenoStencil, which is the precondition for effective auto-vectorization on the CPU side. Signed-off-by: Efty Sifakis <esifakis@gmail.com> Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: Efty Sifakis <esifakis@nvidia.com>
1 parent fb1df48 commit 249e657

1 file changed

Lines changed: 139 additions & 0 deletions

File tree

nanovdb/nanovdb/tools/cuda/VoxelBlockManager.cuh

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <nanovdb/util/cuda/Util.h>
3131
#include <nanovdb/util/cuda/DeviceGridTraits.cuh>
3232
#include <nanovdb/tools/VoxelBlockManager.h>
33+
#include <nanovdb/math/Stencils.h>
3334

3435
namespace nanovdb {
3536

@@ -155,6 +156,144 @@ struct VoxelBlockManager : nanovdb::tools::VoxelBlockManagerBase<Log2BlockWidth>
155156
}
156157
}
157158
}
159+
160+
/// @brief Auxiliary type holding the resolved neighbor leaf pointers for
161+
/// the WENO5 stencil. ptrs[axis][0] is the lo neighbor along that axis
162+
/// (nullptr if outside the narrow band), ptrs[axis][1] is always the
163+
/// center leaf, and ptrs[axis][2] is the hi neighbor (nullptr if outside).
164+
template<class BuildT>
165+
struct WenoLeafPtrs {
166+
const NanoLeaf<BuildT>* ptrs[3][3];
167+
};
168+
169+
/// @brief Resolve the neighbor leaf pointers needed by computeWenoStencil.
170+
/// Performs exactly one probeLeaf call per axis (three total). Safe to call
171+
/// per-thread; does not synchronize.
172+
/// @tparam BuildT Build type of the grid (must be an index type)
173+
/// @param grid Device-resident grid
174+
/// @param leaf Center leaf node for the current voxel
175+
/// @param voxelOffset Intra-leaf voxel offset for the current voxel
176+
/// @return WenoLeafPtrs with center entries set to &leaf and lo/hi entries
177+
/// set to the probeLeaf result (nullptr if outside the narrow band)
178+
template<class BuildT>
179+
__device__
180+
static typename util::enable_if<BuildTraits<BuildT>::is_index, WenoLeafPtrs<BuildT>>::type
181+
resolveWenoLeafPtrs(
182+
const NanoGrid<BuildT>* grid,
183+
const NanoLeaf<BuildT>& leaf,
184+
uint16_t voxelOffset)
185+
{
186+
WenoLeafPtrs<BuildT> result;
187+
const auto coord = leaf.offsetToGlobalCoord(voxelOffset);
188+
const auto localCoord = leaf.OffsetToLocalCoord(voxelOffset);
189+
const auto& tree = grid->tree();
190+
191+
for (int axis = 0; axis < 3; ++axis) {
192+
result.ptrs[axis][0] = nullptr;
193+
result.ptrs[axis][1] = &leaf;
194+
result.ptrs[axis][2] = nullptr;
195+
196+
auto neighborCoord = coord;
197+
neighborCoord[axis] += (localCoord[axis] & 0x4) ? 4 : -4;
198+
result.ptrs[axis][(localCoord[axis] & 0x4) >> 1] =
199+
tree.root().probeLeaf(neighborCoord);
200+
}
201+
return result;
202+
}
203+
204+
/// @brief Compute global sequential indices for the 19 WENO5 stencil
205+
/// points of the given voxel, using pre-resolved leaf pointers.
206+
///
207+
/// Output layout follows nanovdb::math::WenoPt<i,j,k>::idx. Note that
208+
/// this convention differs from OpenVDB's NineteenPt<i,j,k>::idx.
209+
///
210+
/// Entries for neighbors outside the narrow band are left unchanged;
211+
/// the caller must zero-initialize data[] before calling this function.
212+
/// Does not synchronize; safe to call from divergent threads.
213+
///
214+
/// The voxelOffset arithmetic uses octal notation to exploit the fact that
215+
/// the NanoVDB leaf layout encodes (x,y,z) as x*64 + y*8 + z, making x, y,
216+
/// and z strides exactly 0100, 010, and 1 in octal respectively.
217+
///
218+
/// @tparam BuildT Build type of the grid (must be an index type)
219+
/// @param leaf Center leaf node for the current voxel
220+
/// @param voxelOffset Intra-leaf voxel offset for the current voxel
221+
/// @param leafPtrs Resolved neighbor leaf pointers from resolveWenoLeafPtrs
222+
/// @param data Output array of length >= 19, caller-zero-initialized
223+
template<class BuildT>
224+
__device__
225+
static typename util::enable_if<BuildTraits<BuildT>::is_index, void>::type
226+
computeWenoStencil(
227+
const NanoLeaf<BuildT>& leaf,
228+
uint16_t voxelOffset,
229+
const WenoLeafPtrs<BuildT>& leafPtrs,
230+
uint64_t* data)
231+
{
232+
using math::WenoPt;
233+
const auto lc = leaf.OffsetToLocalCoord(voxelOffset);
234+
235+
data[WenoPt< 0, 0, 0>::idx] = leaf.getValue(voxelOffset);
236+
237+
// x-axis: stride per step = 64 = 0100 octal; cross-leaf wrap = ±8*64 = ±0700
238+
if (leafPtrs.ptrs[0][(lc.x()+5)>>3])
239+
data[WenoPt<-3, 0, 0>::idx] = leafPtrs.ptrs[0][(lc.x()+5)>>3]->getValue(
240+
voxelOffset + ((lc[0]<3) ? 0500 : -0300));
241+
if (leafPtrs.ptrs[0][(lc.x()+6)>>3])
242+
data[WenoPt<-2, 0, 0>::idx] = leafPtrs.ptrs[0][(lc.x()+6)>>3]->getValue(
243+
voxelOffset + ((lc[0]<2) ? 0600 : -0200));
244+
if (leafPtrs.ptrs[0][(lc.x()+7)>>3])
245+
data[WenoPt<-1, 0, 0>::idx] = leafPtrs.ptrs[0][(lc.x()+7)>>3]->getValue(
246+
voxelOffset + ((lc[0]<1) ? 0700 : -0100));
247+
if (leafPtrs.ptrs[0][(lc.x()+9)>>3])
248+
data[WenoPt< 1, 0, 0>::idx] = leafPtrs.ptrs[0][(lc.x()+9)>>3]->getValue(
249+
voxelOffset + ((lc[0]<7) ? 0100 : -0700));
250+
if (leafPtrs.ptrs[0][(lc.x()+10)>>3])
251+
data[WenoPt< 2, 0, 0>::idx] = leafPtrs.ptrs[0][(lc.x()+10)>>3]->getValue(
252+
voxelOffset + ((lc[0]<6) ? 0200 : -0600));
253+
if (leafPtrs.ptrs[0][(lc.x()+11)>>3])
254+
data[WenoPt< 3, 0, 0>::idx] = leafPtrs.ptrs[0][(lc.x()+11)>>3]->getValue(
255+
voxelOffset + ((lc[0]<5) ? 0300 : -0500));
256+
257+
// y-axis: stride per step = 8 = 010 octal; cross-leaf wrap = ±8*8 = ±070
258+
if (leafPtrs.ptrs[1][(lc.y()+5)>>3])
259+
data[WenoPt< 0,-3, 0>::idx] = leafPtrs.ptrs[1][(lc.y()+5)>>3]->getValue(
260+
voxelOffset + ((lc[1]<3) ? 0050 : -0030));
261+
if (leafPtrs.ptrs[1][(lc.y()+6)>>3])
262+
data[WenoPt< 0,-2, 0>::idx] = leafPtrs.ptrs[1][(lc.y()+6)>>3]->getValue(
263+
voxelOffset + ((lc[1]<2) ? 0060 : -0020));
264+
if (leafPtrs.ptrs[1][(lc.y()+7)>>3])
265+
data[WenoPt< 0,-1, 0>::idx] = leafPtrs.ptrs[1][(lc.y()+7)>>3]->getValue(
266+
voxelOffset + ((lc[1]<1) ? 0070 : -0010));
267+
if (leafPtrs.ptrs[1][(lc.y()+9)>>3])
268+
data[WenoPt< 0, 1, 0>::idx] = leafPtrs.ptrs[1][(lc.y()+9)>>3]->getValue(
269+
voxelOffset + ((lc[1]<7) ? 0010 : -0070));
270+
if (leafPtrs.ptrs[1][(lc.y()+10)>>3])
271+
data[WenoPt< 0, 2, 0>::idx] = leafPtrs.ptrs[1][(lc.y()+10)>>3]->getValue(
272+
voxelOffset + ((lc[1]<6) ? 0020 : -0060));
273+
if (leafPtrs.ptrs[1][(lc.y()+11)>>3])
274+
data[WenoPt< 0, 3, 0>::idx] = leafPtrs.ptrs[1][(lc.y()+11)>>3]->getValue(
275+
voxelOffset + ((lc[1]<5) ? 0030 : -0050));
276+
277+
// z-axis: stride per step = 1; cross-leaf wrap = ±8
278+
if (leafPtrs.ptrs[2][(lc.z()+5)>>3])
279+
data[WenoPt< 0, 0,-3>::idx] = leafPtrs.ptrs[2][(lc.z()+5)>>3]->getValue(
280+
voxelOffset + ((lc[2]<3) ? 0005 : -0003));
281+
if (leafPtrs.ptrs[2][(lc.z()+6)>>3])
282+
data[WenoPt< 0, 0,-2>::idx] = leafPtrs.ptrs[2][(lc.z()+6)>>3]->getValue(
283+
voxelOffset + ((lc[2]<2) ? 0006 : -0002));
284+
if (leafPtrs.ptrs[2][(lc.z()+7)>>3])
285+
data[WenoPt< 0, 0,-1>::idx] = leafPtrs.ptrs[2][(lc.z()+7)>>3]->getValue(
286+
voxelOffset + ((lc[2]<1) ? 0007 : -0001));
287+
if (leafPtrs.ptrs[2][(lc.z()+9)>>3])
288+
data[WenoPt< 0, 0, 1>::idx] = leafPtrs.ptrs[2][(lc.z()+9)>>3]->getValue(
289+
voxelOffset + ((lc[2]<7) ? 0001 : -0007));
290+
if (leafPtrs.ptrs[2][(lc.z()+10)>>3])
291+
data[WenoPt< 0, 0, 2>::idx] = leafPtrs.ptrs[2][(lc.z()+10)>>3]->getValue(
292+
voxelOffset + ((lc[2]<6) ? 0002 : -0006));
293+
if (leafPtrs.ptrs[2][(lc.z()+11)>>3])
294+
data[WenoPt< 0, 0, 3>::idx] = leafPtrs.ptrs[2][(lc.z()+11)>>3]->getValue(
295+
voxelOffset + ((lc[2]<5) ? 0003 : -0005));
296+
}
158297
};
159298

160299
/// @brief This functor calculates the firstLeafID and jumpMap for the

0 commit comments

Comments
 (0)