From 4e8bfc9d55d2815d229b17a480015d73d7f3edb6 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Fri, 17 Apr 2026 17:38:51 -0700 Subject: [PATCH 1/2] [SM6.10][Bugfix] Fix Size check for input interpreted vector in MultiplyAdd Fixes vector size check for input interpreted vector. Adds tests for MultiplyAdd with odd sizes and with packed input vector. Fixes #8385 --- tools/clang/lib/Headers/hlsl/dx/linalg.h | 4 +- .../CodeGenDXIL/hlsl/linalg/api/vectors.hlsl | 57 +++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 3a939db2ec..d53df28e6b 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -506,7 +506,7 @@ template // clang-format off typename hlsl::enable_if< - InterpretedVector::Size == K, + InterpretedVector::Size >= K, vector >::type // clang-format on MultiplyAdd(Matrix MatrixA, @@ -542,7 +542,7 @@ template // clang-format off typename hlsl::enable_if< - InterpretedVector::Size == K, + InterpretedVector::Size >= K, vector >::type // clang-format on MultiplyAdd(Matrix MatrixA, diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl index ea9e536182..58f19b887c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl @@ -7,6 +7,7 @@ using namespace dx::linalg; using MatrixATy = Matrix; using MatrixAccum_8_8_Ty = Matrix; using MatrixAccum_8_4_Ty = Matrix; +using Matrix_7_15_ATy = Matrix; ByteAddressBuffer BAB : register(t0); @@ -87,4 +88,60 @@ void main(uint ID : SV_GroupID) { half3 ThreeF16 = BAB.Load(256); InterpretedVector convertedPacked2 = Convert(ThreeF16); + + // Test MultiplyAdd with odd sizes + // + vector vecH15 = BAB.Load< vector >(168); + vector vecH7 = BAB.Load< vector >(64); + + InterpretedVector interpVecH15 = MakeInterpretedVector(vecH15); + + // CHECK: %[[MAT_7_15:.*]] = call %dx.types.LinAlgMatrixC8M7N15U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC8M7N15U0S0(i32 -2147483634, + // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 128) ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align) + Matrix_7_15_ATy Mat_7_15 = Matrix_7_15_ATy::Load(BAB, 0, 16); + + // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + vector vec7 = MultiplyAdd(Mat_7_15, vecH15, vecH7); + + // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622, %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], + // CHECK-SAME; i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + vector vec8 = MultiplyAdd(Mat_7_15, interpVecH15, vecH7); + + // CHECK: %[[LOAD1:.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 2) + // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + // CHECK: %[[MEM_BIAS1:.*]] = extractvalue %dx.types.ResRet.v7f16 %[[LOAD1]], 0 + // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %29, i32 8, <7 x half> %37, i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + VectorRef memBias7 = {BAB, 512}; + vector vec9 = MultiplyAdd(Mat_7_15, vecH15, memBias7); + + // CHECK: %[[LOAD2:.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 2) + // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + // CHECK: %[[MEM_BIAS2:.*]] = extractvalue %dx.types.ResRet.v7f16 %[[LOAD2]], 0 + // CHECK-NEXT: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %[[MEM_BIAS2]], i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + vector vec10 = MultiplyAdd(Mat_7_15, interpVecH15, memBias7); + + // Test MultiplyAdd with packed input vector + // + // CHECK: %[[INTERP_VEC_H15_PACKED:.*]] = call <4 x i32> @dx.op.linAlgConvert.v4i32.v15f16(i32 -2147483618, + // CHECK-SAME: <15 x half> %{{[0-9]+}}, i32 8, i32 21) ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation) + InterpretedVector interpVecH15Packed = Convert(vecH15); + + // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v4i32.v7f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %43, i32 21, <7 x half> %31, i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + vector vec11 = MultiplyAdd(Mat_7_15, interpVecH15Packed, vecH7); + + // CHECK: %[[LOAD3:.+]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %45, i32 512, i32 undef, i32 2) + // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + // CHECK-NEXT: %[[MEM_BIAS3:.*]] = extractvalue %dx.types.ResRet.v7f16 %46, 0 + // CHECK-NEXT: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v4i32.v7f16(i32 -2147483622, + // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %[[INTERP_VEC_H15_PACKED]], i32 21, <7 x half> %[[MEM_BIAS3]], i32 8) + // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation) + vector vec12 = MultiplyAdd(Mat_7_15, interpVecH15Packed, memBias7); } From 4331e288148bfc68b9ead59878d8013d2dcce799 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Mon, 20 Apr 2026 13:56:22 -0700 Subject: [PATCH 2/2] Update the check to compare VecK against the scalar count calculated from packed elements --- tools/clang/lib/Headers/hlsl/dx/linalg.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index d53df28e6b..9b43dcc6cb 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -204,6 +204,14 @@ template struct DimMN { static const SIZE_TYPE N = MVal; }; +template +struct ScalarCountFromPackedComponents { + static const SIZE_TYPE ElementsPerScalar = + ComponentTypeTraits::ElementsPerScalar; + static const SIZE_TYPE Value = + (PackedComponentCount + ElementsPerScalar - 1) / ElementsPerScalar; +}; + } // namespace __detail template struct VectorRef { @@ -506,7 +514,7 @@ template // clang-format off typename hlsl::enable_if< - InterpretedVector::Size >= K, + VecK == __detail::ScalarCountFromPackedComponents::Value, vector >::type // clang-format on MultiplyAdd(Matrix MatrixA, @@ -542,7 +550,7 @@ template // clang-format off typename hlsl::enable_if< - InterpretedVector::Size >= K, + VecK == __detail::ScalarCountFromPackedComponents::Value, vector >::type // clang-format on MultiplyAdd(Matrix MatrixA,