From 61ffb1cde4d2e4c0362e055b57abb3365d0de24d Mon Sep 17 00:00:00 2001 From: Jiawei Shao Date: Mon, 18 May 2026 15:29:06 +0800 Subject: [PATCH 1/3] Add shader execution tests on the feature `subgroup-size-control` This patch adds the shader execution tests on the feature `subgroup-size-control`: - The value of the built-in variable `subgroup_size` must equal the value of the `@subgroup_size` attribute. - At least one value in the range of [GPUAdapterInfo.subgroupMinSize, GPUAdapterInfo.subgroupMaxSize] can be used as `@subgroup_size` attribute in a simple compute pipeline. --- .../shader_io/compute_builtins.spec.ts | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts index be42484ff7e7..e28940ce65f6 100644 --- a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts @@ -1168,3 +1168,203 @@ fn main(@builtin(local_invocation_id) local_id : vec3u, t.expectOK(checkNumSubgroupsConsistency(countData, outputData, wgThreads, t.params.numWGs)); }); + +g.test('subgroup_size_attribute') + .desc( + 'Tests that the value of the subgroup_size builtin must equal the value of the @subgroup_size attribute.' + ) + .params(u => + u.combine('numWorkGroups', [1, 2] as const).combine('numSubgroups', [1, 2, 4] as const) + ) + .fn(async t => { + t.skipIfDeviceDoesNotHaveFeature('subgroup-size-control' as GPUFeatureName); + + /** + * Returns a subgroup size value that is valid for use in the @subgroup_size + * attribute on the current adapter. + * + * On Intel gen-12lp, subgroupMinSize may be 8 in fragment stages, which is below the allowed range + * for `[WaveSize]` on D3D12 (can only be 16). subgroupMaxSize (16) is always within the explicit + * range, so it is returned for that architecture. + * On all other adapters, subgroupMinSize is returned as the conservative choice as on many D3D12 + * drivers only `waveLaneCountMin` is reliable, while `waveLaneCountMax` is not. + * + * @param adapterInfo The GPUAdapterInfo of the current device's adapter. + * @returns A power-of-two subgroup size valid for @subgroup_size on this adapter. + */ + const getValidSubgroupSizeForSubgroupSizeAttribute = (adapterInfo: GPUAdapterInfo): number => { + interface SubgroupAdapterInfo extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; + } + const { vendor, architecture, subgroupMinSize, subgroupMaxSize } = + adapterInfo as SubgroupAdapterInfo; + return vendor === 'intel' && architecture === 'gen-12lp' ? subgroupMaxSize : subgroupMinSize; + }; + + const subgroupSize = getValidSubgroupSizeForSubgroupSizeAttribute(t.device.adapterInfo); + + const { numWorkGroups, numSubgroups } = t.params; + const wgx = subgroupSize * numSubgroups; + + const wgsl = ` +enable subgroups; +enable subgroup_size_control; + +@group(0) @binding(0) +var output : array; + +@compute @workgroup_size(${wgx}, 1, 1) @subgroup_size(${subgroupSize}) +fn main(@builtin(subgroup_size) builtin_size : u32, + @builtin(local_invocation_index) lid : u32, + @builtin(workgroup_id) wgid : vec3u) { + let gid = lid + wgid.x * ${wgx}u; + // Store 1 if builtin subgroup_size matches the @subgroup_size attribute, 0 otherwise. + output[gid] = select(0u, 1u, builtin_size == ${subgroupSize}u); +}`; + + const numInvocations = wgx * numWorkGroups; + const outputBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(numInvocations, x => 0)]), + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + ); + t.trackForCleanup(outputBuffer); + + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { + module: t.device.createShaderModule({ + code: wgsl, + }), + entryPoint: 'main', + }, + }); + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer: outputBuffer, + }, + }, + ], + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.dispatchWorkgroups(numWorkGroups, 1, 1); + pass.end(); + t.queue.submit([encoder.finish()]); + + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: numInvocations, + method: 'copy', + }); + const outputData: Uint32Array = outputReadback.data; + + for (let i = 0; i < numInvocations; i++) { + if (outputData[i] !== 1) { + t.fail( + `@subgroup_size(${subgroupSize}): invocation ${i} has builtin subgroup_size != ${subgroupSize}` + ); + break; + } + } + }); + +g.test('subgroup_size_attribute_valid_size_exists') + .desc( + `Tests that at least one power-of-two value in [subgroupMinSize, subgroupMaxSize] can be +used as the @subgroup_size attribute in a simple compute pipeline.` + ) + .fn(async t => { + t.skipIfDeviceDoesNotHaveFeature('subgroup-size-control' as GPUFeatureName); + + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; + } + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + + let succeeded = false; + + for (let subgroupSize = subgroupMinSize; subgroupSize <= subgroupMaxSize; subgroupSize *= 2) { + const wgsl = ` +enable subgroups; +enable subgroup_size_control; + +@group(0) @binding(0) +var output : array; + +@compute @workgroup_size(${subgroupSize}, 1, 1) @subgroup_size(${subgroupSize}) +fn main(@builtin(local_invocation_index) lid : u32) { + output[lid] = lid; +}`; + + // Use an error scope to catch validation errors from pipeline creation + // without crashing the test. + t.device.pushErrorScope('validation'); + const module = t.device.createShaderModule({ code: wgsl }); + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { module, entryPoint: 'main' }, + }); + const error = await t.device.popErrorScope(); + if (error) { + continue; + } + + const outputBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(subgroupSize, x => 0)]), + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + ); + t.trackForCleanup(outputBuffer); + + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [{ binding: 0, resource: { buffer: outputBuffer } }], + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.dispatchWorkgroups(1, 1, 1); + pass.end(); + t.queue.submit([encoder.finish()]); + + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: subgroupSize, + method: 'copy', + }); + const outputData: Uint32Array = outputReadback.data; + + // Validate that each invocation wrote its local_invocation_index. + let valid = true; + for (let i = 0; i < subgroupSize; i++) { + if (outputData[i] !== i) { + t.fail( + `@subgroup_size(${subgroupSize}): output[${i}] expected ${i}, got ${outputData[i]}` + ); + valid = false; + break; + } + } + + if (valid) { + succeeded = true; + break; + } + } + + if (!succeeded) { + t.fail(`No valid @subgroup_size value found in [${subgroupMinSize}, ${subgroupMaxSize}]`); + } + }); From dadba47c88442e1a44ce48651a21fd4179ecb8f8 Mon Sep 17 00:00:00 2001 From: Jiawei Shao Date: Fri, 22 May 2026 13:45:43 +0800 Subject: [PATCH 2/3] Address reviewer's comments --- .../shader_io/compute_builtins.spec.ts | 213 ++++++------------ 1 file changed, 71 insertions(+), 142 deletions(-) diff --git a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts index e28940ce65f6..89cd237dffc6 100644 --- a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts @@ -1169,9 +1169,46 @@ fn main(@builtin(local_invocation_id) local_id : vec3u, t.expectOK(checkNumSubgroupsConsistency(countData, outputData, wgThreads, t.params.numWGs)); }); +/** + * Returns all valid subgroup sizes for the given adapter info, i.e. all power-of-two values + * between subgroupMinSize and subgroupMaxSize inclusive. + */ +async function getValidSubgroupSizes(device: GPUDevice): Promise { + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; + } + const { subgroupMinSize, subgroupMaxSize } = device.adapterInfo as SubgroupProperties; + + const sizes: number[] = []; + for (let subgroupSize = subgroupMinSize; subgroupSize <= subgroupMaxSize; subgroupSize *= 2) { + const wgsl = ` +enable subgroups; +enable subgroup_size_control; + +@compute @workgroup_size(${subgroupSize}, 1, 1) @subgroup_size(${subgroupSize}) +fn main(@builtin(local_invocation_index) lid : u32) { +}`; + device.pushErrorScope('validation'); + const module = device.createShaderModule({ code: wgsl }); + device.createComputePipeline({ + layout: 'auto', + compute: { module, entryPoint: 'main' }, + }); + const error = await device.popErrorScope(); + if (error) { + continue; + } + sizes.push(subgroupSize); + } + return sizes; +} + g.test('subgroup_size_attribute') .desc( - 'Tests that the value of the subgroup_size builtin must equal the value of the @subgroup_size attribute.' + `Tests that at least one power-of-two value in [subgroupMinSize, subgroupMaxSize] can be used as + the @subgroup_size attribute in a simple compute pipeline.the value of the subgroup_size builtin + must equal the value of the @subgroup_size attribute.` ) .params(u => u.combine('numWorkGroups', [1, 2] as const).combine('numSubgroups', [1, 2, 4] as const) @@ -1179,35 +1216,18 @@ g.test('subgroup_size_attribute') .fn(async t => { t.skipIfDeviceDoesNotHaveFeature('subgroup-size-control' as GPUFeatureName); - /** - * Returns a subgroup size value that is valid for use in the @subgroup_size - * attribute on the current adapter. - * - * On Intel gen-12lp, subgroupMinSize may be 8 in fragment stages, which is below the allowed range - * for `[WaveSize]` on D3D12 (can only be 16). subgroupMaxSize (16) is always within the explicit - * range, so it is returned for that architecture. - * On all other adapters, subgroupMinSize is returned as the conservative choice as on many D3D12 - * drivers only `waveLaneCountMin` is reliable, while `waveLaneCountMax` is not. - * - * @param adapterInfo The GPUAdapterInfo of the current device's adapter. - * @returns A power-of-two subgroup size valid for @subgroup_size on this adapter. - */ - const getValidSubgroupSizeForSubgroupSizeAttribute = (adapterInfo: GPUAdapterInfo): number => { - interface SubgroupAdapterInfo extends GPUAdapterInfo { - subgroupMinSize: number; - subgroupMaxSize: number; - } - const { vendor, architecture, subgroupMinSize, subgroupMaxSize } = - adapterInfo as SubgroupAdapterInfo; - return vendor === 'intel' && architecture === 'gen-12lp' ? subgroupMaxSize : subgroupMinSize; - }; + const { numWorkGroups, numSubgroups } = t.params; - const subgroupSize = getValidSubgroupSizeForSubgroupSizeAttribute(t.device.adapterInfo); + const subgroupSizes = await getValidSubgroupSizes(t.device); + t.expect( + subgroupSizes.length > 0, + `No valid @subgroup_size value found in [subgroupMinSize, subgroupMaxSize]` + ); - const { numWorkGroups, numSubgroups } = t.params; - const wgx = subgroupSize * numSubgroups; + for (const subgroupSize of subgroupSizes) { + const wgx = subgroupSize * numSubgroups; - const wgsl = ` + const wgsl = ` enable subgroups; enable subgroup_size_control; @@ -1223,148 +1243,57 @@ fn main(@builtin(subgroup_size) builtin_size : u32, output[gid] = select(0u, 1u, builtin_size == ${subgroupSize}u); }`; - const numInvocations = wgx * numWorkGroups; - const outputBuffer = t.makeBufferWithContents( - new Uint32Array([...iterRange(numInvocations, x => 0)]), - GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST - ); - t.trackForCleanup(outputBuffer); - - const pipeline = t.device.createComputePipeline({ - layout: 'auto', - compute: { - module: t.device.createShaderModule({ - code: wgsl, - }), - entryPoint: 'main', - }, - }); - const bg = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { - binding: 0, - resource: { - buffer: outputBuffer, - }, - }, - ], - }); - - const encoder = t.device.createCommandEncoder(); - const pass = encoder.beginComputePass(); - pass.setPipeline(pipeline); - pass.setBindGroup(0, bg); - pass.dispatchWorkgroups(numWorkGroups, 1, 1); - pass.end(); - t.queue.submit([encoder.finish()]); - - const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { - srcByteOffset: 0, - type: Uint32Array, - typedLength: numInvocations, - method: 'copy', - }); - const outputData: Uint32Array = outputReadback.data; - - for (let i = 0; i < numInvocations; i++) { - if (outputData[i] !== 1) { - t.fail( - `@subgroup_size(${subgroupSize}): invocation ${i} has builtin subgroup_size != ${subgroupSize}` - ); - break; - } - } - }); - -g.test('subgroup_size_attribute_valid_size_exists') - .desc( - `Tests that at least one power-of-two value in [subgroupMinSize, subgroupMaxSize] can be -used as the @subgroup_size attribute in a simple compute pipeline.` - ) - .fn(async t => { - t.skipIfDeviceDoesNotHaveFeature('subgroup-size-control' as GPUFeatureName); - - interface SubgroupProperties extends GPUAdapterInfo { - subgroupMinSize: number; - subgroupMaxSize: number; - } - const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; - - let succeeded = false; - - for (let subgroupSize = subgroupMinSize; subgroupSize <= subgroupMaxSize; subgroupSize *= 2) { - const wgsl = ` -enable subgroups; -enable subgroup_size_control; - -@group(0) @binding(0) -var output : array; - -@compute @workgroup_size(${subgroupSize}, 1, 1) @subgroup_size(${subgroupSize}) -fn main(@builtin(local_invocation_index) lid : u32) { - output[lid] = lid; -}`; - - // Use an error scope to catch validation errors from pipeline creation - // without crashing the test. - t.device.pushErrorScope('validation'); - const module = t.device.createShaderModule({ code: wgsl }); - const pipeline = t.device.createComputePipeline({ - layout: 'auto', - compute: { module, entryPoint: 'main' }, - }); - const error = await t.device.popErrorScope(); - if (error) { - continue; - } - + const numInvocations = wgx * numWorkGroups; const outputBuffer = t.makeBufferWithContents( - new Uint32Array([...iterRange(subgroupSize, x => 0)]), + new Uint32Array([...iterRange(numInvocations, x => 0)]), GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST ); t.trackForCleanup(outputBuffer); + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { + module: t.device.createShaderModule({ + code: wgsl, + }), + entryPoint: 'main', + }, + }); const bg = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), - entries: [{ binding: 0, resource: { buffer: outputBuffer } }], + entries: [ + { + binding: 0, + resource: { + buffer: outputBuffer, + }, + }, + ], }); const encoder = t.device.createCommandEncoder(); const pass = encoder.beginComputePass(); pass.setPipeline(pipeline); pass.setBindGroup(0, bg); - pass.dispatchWorkgroups(1, 1, 1); + pass.dispatchWorkgroups(numWorkGroups, 1, 1); pass.end(); t.queue.submit([encoder.finish()]); const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { srcByteOffset: 0, type: Uint32Array, - typedLength: subgroupSize, + typedLength: numInvocations, method: 'copy', }); const outputData: Uint32Array = outputReadback.data; - // Validate that each invocation wrote its local_invocation_index. - let valid = true; - for (let i = 0; i < subgroupSize; i++) { - if (outputData[i] !== i) { + for (let i = 0; i < numInvocations; i++) { + if (outputData[i] !== 1) { t.fail( - `@subgroup_size(${subgroupSize}): output[${i}] expected ${i}, got ${outputData[i]}` + `@subgroup_size(${subgroupSize}): invocation ${i} has builtin subgroup_size != ${subgroupSize}` ); - valid = false; break; } } - - if (valid) { - succeeded = true; - break; - } - } - - if (!succeeded) { - t.fail(`No valid @subgroup_size value found in [${subgroupMinSize}, ${subgroupMaxSize}]`); } }); From 4c1123872fb4f40228c3c32a41c8f19a3b74f9cd Mon Sep 17 00:00:00 2001 From: Jiawei Shao Date: Mon, 25 May 2026 13:42:15 +0800 Subject: [PATCH 3/3] Address reviewer's comments --- .../shader_io/compute_builtins.spec.ts | 81 +++++++------------ 1 file changed, 29 insertions(+), 52 deletions(-) diff --git a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts index 89cd237dffc6..e76f8f41681d 100644 --- a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts @@ -1169,46 +1169,11 @@ fn main(@builtin(local_invocation_id) local_id : vec3u, t.expectOK(checkNumSubgroupsConsistency(countData, outputData, wgThreads, t.params.numWGs)); }); -/** - * Returns all valid subgroup sizes for the given adapter info, i.e. all power-of-two values - * between subgroupMinSize and subgroupMaxSize inclusive. - */ -async function getValidSubgroupSizes(device: GPUDevice): Promise { - interface SubgroupProperties extends GPUAdapterInfo { - subgroupMinSize: number; - subgroupMaxSize: number; - } - const { subgroupMinSize, subgroupMaxSize } = device.adapterInfo as SubgroupProperties; - - const sizes: number[] = []; - for (let subgroupSize = subgroupMinSize; subgroupSize <= subgroupMaxSize; subgroupSize *= 2) { - const wgsl = ` -enable subgroups; -enable subgroup_size_control; - -@compute @workgroup_size(${subgroupSize}, 1, 1) @subgroup_size(${subgroupSize}) -fn main(@builtin(local_invocation_index) lid : u32) { -}`; - device.pushErrorScope('validation'); - const module = device.createShaderModule({ code: wgsl }); - device.createComputePipeline({ - layout: 'auto', - compute: { module, entryPoint: 'main' }, - }); - const error = await device.popErrorScope(); - if (error) { - continue; - } - sizes.push(subgroupSize); - } - return sizes; -} - g.test('subgroup_size_attribute') .desc( `Tests that at least one power-of-two value in [subgroupMinSize, subgroupMaxSize] can be used as - the @subgroup_size attribute in a simple compute pipeline.the value of the subgroup_size builtin - must equal the value of the @subgroup_size attribute.` + the @subgroup_size attribute in a simple compute pipeline. The value of the subgroup_size + builtin must equal the value of the @subgroup_size attribute.` ) .params(u => u.combine('numWorkGroups', [1, 2] as const).combine('numSubgroups', [1, 2, 4] as const) @@ -1218,13 +1183,15 @@ g.test('subgroup_size_attribute') const { numWorkGroups, numSubgroups } = t.params; - const subgroupSizes = await getValidSubgroupSizes(t.device); - t.expect( - subgroupSizes.length > 0, - `No valid @subgroup_size value found in [subgroupMinSize, subgroupMaxSize]` - ); + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; + } + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + + let atLeastOneSucceeded = false; - for (const subgroupSize of subgroupSizes) { + for (let subgroupSize = subgroupMinSize; subgroupSize <= subgroupMaxSize; subgroupSize *= 2) { const wgx = subgroupSize * numSubgroups; const wgsl = ` @@ -1243,6 +1210,20 @@ fn main(@builtin(subgroup_size) builtin_size : u32, output[gid] = select(0u, 1u, builtin_size == ${subgroupSize}u); }`; + // Try to create the pipeline; skip this subgroup size if it fails validation. + t.device.pushErrorScope('validation'); + const module = t.device.createShaderModule({ code: wgsl }); + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { module, entryPoint: 'main' }, + }); + const error = await t.device.popErrorScope(); + if (error) { + continue; + } + + atLeastOneSucceeded = true; + const numInvocations = wgx * numWorkGroups; const outputBuffer = t.makeBufferWithContents( new Uint32Array([...iterRange(numInvocations, x => 0)]), @@ -1250,15 +1231,6 @@ fn main(@builtin(subgroup_size) builtin_size : u32, ); t.trackForCleanup(outputBuffer); - const pipeline = t.device.createComputePipeline({ - layout: 'auto', - compute: { - module: t.device.createShaderModule({ - code: wgsl, - }), - entryPoint: 'main', - }, - }); const bg = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), entries: [ @@ -1296,4 +1268,9 @@ fn main(@builtin(subgroup_size) builtin_size : u32, } } } + + t.expect( + atLeastOneSucceeded, + `No valid @subgroup_size value found in [subgroupMinSize, subgroupMaxSize]` + ); });