From 3de18a68514c2b6a781d4b1c3ca63dffb23f8802 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 15 Apr 2026 17:02:30 -0600 Subject: [PATCH 1/2] [SM6.10] Update LinAlg header align parameter --- tools/clang/lib/Headers/hlsl/dx/linalg.h | 10 +++++----- .../hlsl/linalg/api/matrix-class.hlsl | 16 ++++++++-------- .../CodeGenDXIL/hlsl/linalg/api/vectors.hlsl | 14 +++++++------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 08bc79a755..209b0b3d5c 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -272,7 +272,7 @@ class Matrix { static Matrix Load(ByteAddressBuffer Res, uint StartOffset, uint Stride, MatrixLayoutEnum Layout, - uint Align = sizeof(ElementType)) { + uint Align = 128) { Matrix Result; __builtin_LinAlg_MatrixLoadFromDescriptor(Result.__handle, Res, StartOffset, Stride, Layout, Align); @@ -281,7 +281,7 @@ class Matrix { static Matrix Load(RWByteAddressBuffer Res, uint StartOffset, uint Stride, MatrixLayoutEnum Layout, - uint Align = sizeof(ElementType)) { + uint Align = 128) { Matrix Result; __builtin_LinAlg_MatrixLoadFromDescriptor(Result.__handle, Res, StartOffset, Stride, Layout, Align); @@ -331,7 +331,7 @@ class Matrix { } void Store(RWByteAddressBuffer Res, uint StartOffset, uint Stride, - MatrixLayoutEnum Layout, uint Align = sizeof(ElementType)) { + MatrixLayoutEnum Layout, uint Align = 128) { __builtin_LinAlg_MatrixStoreToDescriptor(__handle, Res, StartOffset, Stride, Layout, Align); } @@ -352,7 +352,7 @@ class Matrix { void>::type InterlockedAccumulate(RWByteAddressBuffer Res, uint StartOffset, uint Stride, MatrixLayoutEnum Layout, - uint Align = sizeof(ElementType)) { + uint Align = 128) { __builtin_LinAlg_MatrixAccumulateToDescriptor(__handle, Res, StartOffset, Stride, Layout, Align); } @@ -410,7 +410,7 @@ class Matrix { static typename hlsl::enable_if::type Load(ByteAddressBuffer Res, uint StartOffset, uint Stride, - uint Align = sizeof(ElementType)) { + uint Align = 128) { Matrix Result; __builtin_LinAlg_MatrixLoadFromDescriptor(Result.__handle, Res, StartOffset, Stride, Layout, Align); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl index 190f108fa7..cda3d0818e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/matrix-class.hlsl @@ -56,7 +56,7 @@ void main(uint ID : SV_GroupID) // // CHECK: %[[MATA2:.*]] = call %dx.types.LinAlgMatrixC9M4N4U0S1 // CHECK-SAME: @dx.op.linAlgMatrixLoadFromDescriptor.mC9M4N4U0S1(i32 -2147483634, -// CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 4) +// CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 128) // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) MatrixATy MatA2 = MatrixATy::Load(BAB, 0, 16, MatrixLayoutEnum::ColMajor); @@ -64,8 +64,8 @@ void main(uint ID : SV_GroupID) // // CHECK: %[[MATB2:.*]] = call %dx.types.LinAlgMatrixC9M4N4U1S1 // CHECK-SAME: @dx.op.linAlgMatrixLoadFromDescriptor.mC9M4N4U1S1(i32 -2147483634, -// CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 256, i32 16, i32 1, i32 4) -// CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) +// CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 256, i32 16, i32 1, i32 128) +// CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) MatrixBTy MatB2; MatB2 = MatrixBTy::Load(RWBAB, 256, 16, MatrixLayoutEnum::ColMajor); @@ -87,7 +87,7 @@ void main(uint ID : SV_GroupID) // Matrix::GetCoordinate // // CHECK: call <2 x i32> @dx.op.linAlgMatrixGetCoordinate.mC9M4N4U1S1(i32 -2147483631, -// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U1S1 %[[MATB1]], i32 %[[GROUP_ID]]) +// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U1S1 %[[MATB1]], i32 %[[GROUP_ID]]) // CHECK-SAME:; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex) uint2 coord = MatB1.GetCoordinate(ID); @@ -110,7 +110,7 @@ void main(uint ID : SV_GroupID) // // CHECK: call void @dx.op.linAlgMatrixStoreToDescriptor.mC9M4N4U1S1(i32 -2147483628, // CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U1S1 %[[MATB1_2]], %dx.types.Handle %{{[0-9]+}}, -// CHECK-SAME: i32 256, i32 16, i32 1, i32 4) ; +// CHECK-SAME: i32 256, i32 16, i32 1, i32 128) ; // CHECK-SAME: LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align) MatB1.Store(RWBAB, 256, 16, MatrixLayoutEnum::ColMajor); @@ -129,7 +129,7 @@ void main(uint ID : SV_GroupID) // Matrix::InterlockedAccumulate to resource descriptor // // CHECK: call void @dx.op.linAlgMatrixAccumulateToDescriptor.mC9M4N4U2S1(i32 -2147483621, -// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U2S1 %[[ACCUM0]], %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 4) +// CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U2S1 %[[ACCUM0]], %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 128) // CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align) AccMat1.InterlockedAccumulate(RWBAB, 0, 16, MatrixLayoutEnum::ColMajor); @@ -160,7 +160,7 @@ void main(uint ID : SV_GroupID) // CHECK-SAME: %dx.types.LinAlgMatrixC9M4N4U1S1 %[[MATB2]]) // CHECK-SAME: ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS) AccMat2.Accumulate(MatB2); - + // Matrix::MultiplyAccumulate // // CHECK: %[[ACCUM4:.*]] = call %dx.types.LinAlgMatrixC9M4N4U2S1 @@ -174,7 +174,7 @@ void main(uint ID : SV_GroupID) // Matrix::Load for thread-scope matrix // // CHECK: %[[TSMATA:.*]] = call %dx.types.LinAlgMatrixC9M4N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC9M4N4U0S0( -// CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 4) +// CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 128) // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) TSMatrixATy TSMatA = TSMatrixATy::Load(BAB, 0, 16); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl index 14e2f04e85..ea9e536182 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl @@ -1,5 +1,5 @@ // REQUIRES: dxil-1-10 -// RUN: %dxc -I %hlsl_headers -enable-16bit-types -T cs_6_10 %s | FileCheck %s +// RUN: %dxc -I %hlsl_headers -enable-16bit-types -T cs_6_10 %s | FileCheck %s #include using namespace dx::linalg; @@ -12,16 +12,16 @@ ByteAddressBuffer BAB : register(t0); [numthreads(4, 4, 4)] void main(uint ID : SV_GroupID) { - + // CHECK: %[[MAT1:.*]] = call %dx.types.LinAlgMatrixC8M8N4U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC8M8N4U0S0( -// CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 8, i32 1, i32 2) +// CHECK-SAME: i32 -2147483634, %dx.types.Handle %{{[0-9]+}}, i32 0, i32 8, i32 1, i32 128) // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) MatrixATy Mat1 = MatrixATy::Load(BAB, 0, 8); vector vec1 = 10.3f; // CHECK: %[[VEC2:.*]] = call <8 x half> @dx.op.linAlgMatVecMul.v8f16.mC8M8N4U0S0.v4f16(i32 -2147483623, -// CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> , i32 8) ; LinAlgMatVecMul(matrix,isOutputSigned,inputVector,interpretation) vector vec2 = Multiply(Mat1, vec1); @@ -42,9 +42,9 @@ void main(uint ID : SV_GroupID) { // CHECK: %[[RAWLOAD:.*]] = call %dx.types.ResRet.v8i16 @dx.op.rawBufferVectorLoad.v8i16(i32 303, // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 4096, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) - + // CHECK: %[[VEC_BIAS:.*]] = extractvalue %dx.types.ResRet.v8i16 %[[RAWLOAD]], 0 - + // CHECK: %[[VEC5:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8i16(i32 -2147483622, // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x i16> %[[VEC_BIAS]], i32 2) // CHECK-SAME:; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) @@ -56,7 +56,7 @@ void main(uint ID : SV_GroupID) { // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment) // CHECK: %[[VEC_BIAS:.*]] = extractvalue %dx.types.ResRet.v8i16 %[[RAWLOAD]], 0 - + // CHECK: %[[VEC6:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8i16(i32 -2147483622, // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x i16> %[[VEC_BIAS]], i32 2) // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) From 44ed4f45e6edeac093da41a5b81aeac3ef0e49f7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 16 Apr 2026 00:34:48 +0000 Subject: [PATCH 2/2] chore: autopublish 2026-04-16T00:34:48Z --- tools/clang/lib/Headers/hlsl/dx/linalg.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 209b0b3d5c..3a939db2ec 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -271,8 +271,7 @@ class Matrix { } static Matrix Load(ByteAddressBuffer Res, uint StartOffset, uint Stride, - MatrixLayoutEnum Layout, - uint Align = 128) { + MatrixLayoutEnum Layout, uint Align = 128) { Matrix Result; __builtin_LinAlg_MatrixLoadFromDescriptor(Result.__handle, Res, StartOffset, Stride, Layout, Align); @@ -280,8 +279,7 @@ class Matrix { } static Matrix Load(RWByteAddressBuffer Res, uint StartOffset, uint Stride, - MatrixLayoutEnum Layout, - uint Align = 128) { + MatrixLayoutEnum Layout, uint Align = 128) { Matrix Result; __builtin_LinAlg_MatrixLoadFromDescriptor(Result.__handle, Res, StartOffset, Stride, Layout, Align); @@ -351,8 +349,7 @@ class Matrix { typename hlsl::enable_if::type InterlockedAccumulate(RWByteAddressBuffer Res, uint StartOffset, uint Stride, - MatrixLayoutEnum Layout, - uint Align = 128) { + MatrixLayoutEnum Layout, uint Align = 128) { __builtin_LinAlg_MatrixAccumulateToDescriptor(__handle, Res, StartOffset, Stride, Layout, Align); } @@ -409,8 +406,7 @@ class Matrix { template static typename hlsl::enable_if::type - Load(ByteAddressBuffer Res, uint StartOffset, uint Stride, - uint Align = 128) { + Load(ByteAddressBuffer Res, uint StartOffset, uint Stride, uint Align = 128) { Matrix Result; __builtin_LinAlg_MatrixLoadFromDescriptor(Result.__handle, Res, StartOffset, Stride, Layout, Align);