Skip to content

Commit 79e9bab

Browse files
sifakisclaude
andcommitted
Formatting changes in Rasterization.cuh
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: Efty Sifakis <esifakis@nvidia.com>
1 parent f3b6c03 commit 79e9bab

1 file changed

Lines changed: 26 additions & 39 deletions

File tree

nanovdb/nanovdb/util/cuda/Rasterization.cuh

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,16 @@ struct RasterizeInternalNodesFunctor
4444

4545
const PairT *dPairs;
4646
const RootT *dRoot;
47-
Mask<5> *dUpperMasks;
48-
Mask<4> (*dLowerMasks)[Mask<5>::SIZE];
47+
Mask<5> *dUpperMasks;
48+
Mask<4> (*dLowerMasks)[Mask<5>::SIZE];
4949

5050
__device__ void operator()(size_t pairID) const
5151
{
5252
const auto &pair = dPairs[pairID];
5353

5454
// Locate the root tile containing this leaf origin
5555
const auto *tile = dRoot->probeTile(pair.origin);
56-
uint64_t tileIdx = util::PtrDiff(tile, dRoot->tile(0))
57-
/ sizeof(typename RootT::Tile);
56+
uint64_t tileIdx = util::PtrDiff(tile, dRoot->tile(0)) / sizeof(typename RootT::Tile);
5857

5958
// Offsets of the enclosing upper and lower nodes
6059
const uint32_t upperBit = UpperT::CoordToOffset(pair.origin);
@@ -65,20 +64,18 @@ struct RasterizeInternalNodesFunctor
6564
}
6665
};
6766

68-
/// @brief Fills leaf voxel value masks via exact point-to-triangle UDF.
67+
/// @brief Fills leaf voxel value masks via exact point-to-triangle distance calculation.
6968
///
7069
/// Intended to be called via nanovdb::util::cuda::operatorKernelInstance.
7170
/// Launched as <<<pairCount, MaxThreadsPerBlock>>> - 1 CTA per leaf/triangle
72-
/// pair, 512 threads (one per voxel in the 8^3 leaf). Each CTA:
73-
/// 1. Each thread decodes its voxel local coords (lx, ly, lz) and
74-
/// computes closestPointOnTriangleToPoint from the voxel center
75-
/// to the pair's triangle.
76-
/// 2. Warp ballot builds a local 16-word mask without atomics (same
77-
/// pattern as evaluateAndCountSubBoxesKernel).
71+
/// pair, 512 threads (one per voxel in the 8^3 leaf). For each CTA:
72+
/// 1. Each thread computes closestPointOnTriangleToPoint from the voxel
73+
/// center (lx,ly,lz) to the triangle.
74+
/// 2. Warp ballot builds a local 16-word mask without atomics
7875
/// 3. Thread 0 locates the destination leaf via probeLeaf().
7976
/// 4. Threads 0..7 each pack two 32-bit ballots into one uint64_t and
8077
/// atomicOr into the corresponding mask word, allowing multiple CTAs
81-
/// writing to the same leaf to coexist.
78+
/// to work concurrently on the same leaf.
8279
///
8380
/// @note Degenerate triangles (zero area) are handled implicitly: the face
8481
/// interior test naturally fails and the code falls through to the
@@ -91,32 +88,27 @@ struct RasterizeLeafNodesFunctor
9188
static constexpr int MaxThreadsPerBlock = 512;
9289
static constexpr int MinBlocksPerMultiprocessor = 1;
9390

94-
const PairT *dPairs;
95-
const TriangleT *dTriangles;
91+
const PairT *dPairs;
92+
const TriangleT *dTriangles;
9693
NanoGrid<BuildT> *dGrid;
97-
float bandWidthSqr;
94+
float bandWidthSqr;
9895

9996
__device__ void operator()() const
10097
{
10198
const uint64_t pairID = blockIdx.x;
102-
const int threadID = threadIdx.x; // 0..511 = voxel index within leaf
99+
const int threadID = threadIdx.x; // 0..511 = voxel index within leaf
103100

104101
const auto &pair = dPairs[pairID];
105-
const auto &tri = dTriangles[pair.triangleID];
102+
const auto &tri = dTriangles[pair.triangleID];
106103

107104
// Decode voxel local coords within the 8^3 leaf
108105
// Bit ordering: threadID = lx + ly*8 + lz*64 matches NanoVDB Mask<3> layout
109106
const int lx = threadID & 0x7;
110107
const int ly = (threadID >> 3) & 0x7;
111108
const int lz = (threadID >> 6) & 0x7;
112109

113-
const nanovdb::Vec3f voxelCenter(
114-
float(pair.origin[0] + lx),
115-
float(pair.origin[1] + ly),
116-
float(pair.origin[2] + lz));
117-
118-
const bool hit = nanovdb::math::pointToTriangleDistSqr(
119-
tri[0], tri[1], tri[2], voxelCenter) <= bandWidthSqr;
110+
const nanovdb::Vec3f voxelCenter(float(pair.origin[0] + lx), float(pair.origin[1] + ly), float(pair.origin[2] + lz));
111+
const bool hit = nanovdb::math::pointToTriangleDistSqr(tri[0], tri[1], tri[2], voxelCenter) <= bandWidthSqr;
120112

121113
// Build a per-block local mask via warp ballot (avoids per-voxel atomics).
122114
// 512 threads -> 16 warps -> 16 x 32-bit ballot words.
@@ -128,8 +120,7 @@ struct RasterizeLeafNodesFunctor
128120

129121
// Threads 0..7 each pack two ballots into one uint64_t and atomicOr into the
130122
// corresponding mask word.
131-
auto *leaf = const_cast<nanovdb::NanoLeaf<BuildT>*>(
132-
dGrid->tree().root().probeLeaf(pair.origin));
123+
auto *leaf = const_cast<nanovdb::NanoLeaf<BuildT>*>(dGrid->tree().root().probeLeaf(pair.origin));
133124

134125
if (threadID < int(nanovdb::Mask<3>::WORD_COUNT)) {
135126
const uint64_t word = uint64_t(s_ballots[2*threadID])
@@ -147,7 +138,7 @@ struct RasterizeLeafNodesFunctor
147138
///
148139
/// Intended to be called via nanovdb::util::cuda::operatorKernelInstance.
149140
/// Launched as <<<pairCount, MaxThreadsPerBlock>>> - 1 CTA per leaf/triangle
150-
/// pair, 512 threads (one per voxel in the 8^3 leaf). Each CTA:
141+
/// pair, 512 threads (one per voxel in the 8^3 leaf). For each CTA:
151142
/// 1. Probes the leaf pointer from the grid using the pair's origin.
152143
/// 2. Each thread skips its voxel if inactive in the leaf mask.
153144
/// 3. Active threads compute pointToTriangleDistSqr from the voxel center
@@ -163,19 +154,19 @@ struct RasterizeLeafNodesFunctor
163154
template<typename BuildT, typename PairT, typename TriangleT>
164155
struct ComputeUDFFunctor
165156
{
166-
static constexpr int MaxThreadsPerBlock = 512;
157+
static constexpr int MaxThreadsPerBlock = 512;
167158
static constexpr int MinBlocksPerMultiprocessor = 1;
168159

169-
const PairT *dPairs;
170-
const TriangleT *dTriangles;
160+
const PairT *dPairs;
161+
const TriangleT *dTriangles;
171162
const NanoGrid<BuildT> *dGrid;
172-
float *dSidecar;
173-
float bandWidthSqr;
163+
float *dSidecar;
164+
float bandWidthSqr;
174165

175166
__device__ void operator()() const
176167
{
177168
const uint64_t pairID = blockIdx.x;
178-
const int threadID = threadIdx.x;
169+
const int threadID = threadIdx.x;
179170

180171
const auto &pair = dPairs[pairID];
181172
const auto *leaf = dGrid->tree().root().probeLeaf(pair.origin);
@@ -186,14 +177,10 @@ struct ComputeUDFFunctor
186177
const int ly = (threadID >> 3) & 0x7;
187178
const int lz = (threadID >> 6) & 0x7;
188179

189-
const nanovdb::Vec3f voxelCenter(
190-
float(pair.origin[0] + lx),
191-
float(pair.origin[1] + ly),
192-
float(pair.origin[2] + lz));
180+
const nanovdb::Vec3f voxelCenter(float(pair.origin[0] + lx), float(pair.origin[1] + ly), float(pair.origin[2] + lz));
193181

194182
const auto &tri = dTriangles[pair.triangleID];
195-
const float distSqr = nanovdb::math::pointToTriangleDistSqr(
196-
tri[0], tri[1], tri[2], voxelCenter);
183+
const float distSqr = nanovdb::math::pointToTriangleDistSqr(tri[0], tri[1], tri[2], voxelCenter);
197184

198185
if (distSqr >= bandWidthSqr) return;
199186

0 commit comments

Comments
 (0)