33
44#pragma once
55
6+ #include " fastlanes_common.cuh"
67#include " patches.h"
78
89// / Load a chunk offset value, dispatching on the runtime type.
@@ -21,8 +22,8 @@ __device__ inline uint32_t load_chunk_offset(const GPUPatches &patches, uint32_t
2122}
2223
2324// / A single patch: a within-chunk index and its replacement value.
24- // / A sentinel patch has index == 1024 , which can never match a valid
25- // / within-chunk position (0–1023 ).
25+ // / A sentinel patch has index == FL_CHUNK , which can never match a valid
26+ // / within-chunk position (0–FL_CHUNK-1 ).
2627template <typename T>
2728struct Patch {
2829 uint16_t index;
@@ -38,7 +39,7 @@ struct Patch {
3839// /
3940// / PatchesCursor<uint32_t> cursor(patches, blockIdx.x, thread_idx, 32);
4041// / auto patch = cursor.next();
41- // / while (patch.index != 1024 ) {
42+ // / while (patch.index != FL_CHUNK ) {
4243// / shared_out[patch.index] = patch.value;
4344// / patch = cursor.next();
4445// / }
@@ -89,15 +90,15 @@ public:
8990 // The iterator returns indices relative to the start of the chunk.
9091 // `chunk_base` is the index of the first element within a chunk, accounting
9192 // for the slice offset.
92- chunk_base = chunk * 1024 + patches.offset ;
93- chunk_base -= min (chunk_base, patches.offset % 1024 );
93+ chunk_base = chunk * FL_CHUNK + patches.offset ;
94+ chunk_base -= min (chunk_base, patches.offset % FL_CHUNK );
9495 }
9596
9697 // / Return the current patch (with within-chunk index) and advance,
9798 // / or a sentinel {1024, 0} if exhausted.
9899 __device__ Patch<T> next () {
99100 if (remaining == 0 ) {
100- return {1024 , T {}};
101+ return {FL_CHUNK , T {}};
101102 }
102103 uint16_t within_chunk = static_cast <uint16_t >(*indices - chunk_base);
103104 Patch<T> patch = {within_chunk, *values};
0 commit comments