We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 76445a0 commit ecbec13Copy full SHA for ecbec13
1 file changed
vortex-cuda/src/dynamic_dispatch/plan_builder.rs
@@ -224,11 +224,12 @@ impl DispatchPlan {
224
}
225
226
impl FusedPlan {
227
- /// Maximum shared memory per block in bytes (96 KB).
+ /// Maximum shared memory per block in bytes (48 KB).
228
///
229
- /// NVIDIA GPUs from Fermi (CC 2.x) through Blackwell (CC 10.0)
230
- /// use 96 KB as their default limit for shared memory per block.
231
- const MAX_SHARED_MEM_BYTES: u32 = 96 * 1024;
+ /// 48 KB is the default per-block dynamic shared memory limit across
+ /// all CUDA architectures. Higher limits (up to 227 KB on Hopper)
+ /// require an explicit opt-in via `cuFuncSetAttribute`.
232
+ const MAX_SHARED_MEM_BYTES: u32 = 48 * 1024;
233
234
/// Build a plan by walking the encoding tree from root to leaf.
235
0 commit comments