Skip to content

Commit b6636d3

Browse files
committed
Fix OOB in subscript indexing of col-major matrix in cbuffer
A bug in HLOperationLower code that lowers the matrix subscript operation when the matrix is in a cbuffer could cause out-of-bounds alloca indexing. The indexing creates an alloca to store a column-vector loaded from one cbuffer vector. This should have one element per row of the matrix, so it can then index along that dimension using the local array. The bug was using the number of columns when sizing the alloca instead of the number of rows. This caused it to write to out-of-bounds index when the number of rows exceeded the number of columns (like for float3x2).
1 parent c522461 commit b6636d3

2 files changed

Lines changed: 159 additions & 1 deletion

File tree

lib/HLSL/HLOperationLower.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8189,7 +8189,7 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
81898189
// row.z = c[2].[idx]
81908190
// row.w = c[3].[idx]
81918191
Value *Elts[4];
8192-
ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns());
8192+
ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumRows());
81938193

81948194
IRBuilder<> AllocaBuilder(user->getParent()
81958195
->getParent()
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
2+
3+
; Source HLSL:
4+
; // dxc -T cs_6_0
5+
; float3x2 M;
6+
; RWStructuredBuffer<float2> SB;
7+
; [numthreads(3,1,1)]
8+
; void main(uint row : SV_GroupIndex) {
9+
; SB[row] = M[row];
10+
; }
11+
12+
; Check cbuffer column-major matrix subscript codegen uses correct alloca size.
13+
14+
; Column-major matrix subscript is lowered in HLOperationLower by:
15+
; Loading one column into an alloca, dynamically indexing that by row,
16+
; then doing the same for the next column.
17+
; Each column is 3 floats for 3 rows. Thus alloca size should be 3 floats.
18+
19+
; alloca size should be number of rows (3), not number of columns (2)
20+
; CHECK: %[[alloca:[^ ]+]] = alloca [3 x float]
21+
; CHECK: %[[row:[^ ]+]] = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
22+
; CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[cbh:[^ ]+]], i32 0)
23+
; CHECK: %[[_cb_ex0:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
24+
; CHECK: %[[_v3x:[^ ]+]] = insertelement <3 x float> undef, float %[[_cb_ex0]], i64 0
25+
; CHECK: %[[_cb_ex1:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1
26+
; CHECK: %[[_v3xy:[^ ]+]] = insertelement <3 x float> %[[_v3x]], float %[[_cb_ex1]], i64 1
27+
; CHECK: %[[_cb_ex2:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 2
28+
; CHECK: %[[_v3xyz:[^ ]+]] = insertelement <3 x float> %[[_v3xy]], float %[[_cb_ex2]], i64 2
29+
; CHECK: %[[cb0_x:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 0
30+
; CHECK: %[[_alloca_0:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 0
31+
; CHECK: store float %[[cb0_x]], float* %[[_alloca_0]]
32+
; CHECK: %[[cb0_y:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 1
33+
; CHECK: %[[_alloca_1:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 1
34+
; CHECK: store float %[[cb0_y]], float* %[[_alloca_1]]
35+
; CHECK: %[[cb0_z:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 2
36+
; CHECK: %[[_alloca_2:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 2
37+
; CHECK: store float %[[cb0_z]], float* %[[_alloca_2]]
38+
; CHECK: %[[_alloca_row:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 %[[row]]
39+
; CHECK: %[[M_row_0:[^ ]+]] = load float, float* %[[_alloca_row]]
40+
; CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[cbh]], i32 1)
41+
; CHECK: %[[_cb_ex0:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0
42+
; CHECK: %[[_v3x:[^ ]+]] = insertelement <3 x float> undef, float %[[_cb_ex0]], i64 0
43+
; CHECK: %[[_cb_ex1:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 1
44+
; CHECK: %[[_v3xy:[^ ]+]] = insertelement <3 x float> %[[_v3x]], float %[[_cb_ex1]], i64 1
45+
; CHECK: %[[_cb_ex2:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 2
46+
; CHECK: %[[_v3xyz:[^ ]+]] = insertelement <3 x float> %[[_v3xy]], float %[[_cb_ex2]], i64 2
47+
; CHECK: %[[cb1_x:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 0
48+
; CHECK: %[[_alloca_0:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 0
49+
; CHECK: store float %[[cb1_x]], float* %[[_alloca_0]]
50+
; CHECK: %[[cb1_y:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 1
51+
; CHECK: %[[_alloca_1:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 1
52+
; CHECK: store float %[[cb1_y]], float* %[[_alloca_1]]
53+
; CHECK: %[[cb1_z:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 2
54+
; CHECK: %[[_alloca_2:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 2
55+
; CHECK: store float %[[cb1_z]], float* %[[_alloca_2]]
56+
; CHECK: %[[_alloca_row:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 %[[row]]
57+
; CHECK: %[[M_row_1:[^ ]+]] = load float, float* %[[_alloca_row]]
58+
; CHECK: %[[M_row_v2x:[^ ]+]] = insertelement <2 x float> undef, float %[[M_row_0]], i64 0
59+
; CHECK: %[[M_row_v2xy:[^ ]+]] = insertelement <2 x float> %[[M_row_v2x]], float %[[M_row_1]], i64 1
60+
; CHECK: %[[ex_M_row_0:[^ ]+]] = extractelement <2 x float> %[[M_row_v2xy]], i64 0
61+
; CHECK: %[[ex_M_row_1:[^ ]+]] = extractelement <2 x float> %[[M_row_v2xy]], i64 1
62+
; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %{{[^,]+}}, i32 %[[row]], i32 0, float %[[ex_M_row_0]], float %[[ex_M_row_1]], float undef, float undef, i8 3, i32 4)
63+
64+
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
65+
target triple = "dxil-ms-dx"
66+
67+
%"class.RWStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
68+
%"$Globals" = type { %class.matrix.float.3.2 }
69+
%class.matrix.float.3.2 = type { [3 x <2 x float>] }
70+
%dx.types.Handle = type { i8* }
71+
%dx.types.ResourceProperties = type { i32, i32 }
72+
73+
@"\01?SB@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 2> >", align 4
74+
@"$Globals" = external constant %"$Globals"
75+
76+
; Function Attrs: nounwind
77+
define void @main(i32 %row) #0 {
78+
entry:
79+
%0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0)
80+
%1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 28 }, %"$Globals" undef)
81+
%2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0)
82+
%3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0
83+
%4 = add i32 3, %row
84+
%5 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.3.2*, i32, i32)"(i32 1, %class.matrix.float.3.2* %3, i32 %row, i32 %4)
85+
%6 = load <2 x float>, <2 x float>* %5
86+
%7 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?SB@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
87+
%8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %7)
88+
%9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
89+
%10 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %9, i32 %row)
90+
store <2 x float> %6, <2 x float>* %10
91+
ret void
92+
}
93+
94+
; Function Attrs: nounwind readnone
95+
declare <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.3.2*, i32, i32)"(i32, %class.matrix.float.3.2*, i32, i32) #1
96+
97+
; Function Attrs: nounwind readnone
98+
declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
99+
100+
; Function Attrs: nounwind readnone
101+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.RWStructuredBuffer<vector<float, 2> >") #1
102+
103+
; Function Attrs: nounwind readnone
104+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<float, 2> >") #1
105+
106+
; Function Attrs: nounwind readnone
107+
declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
108+
109+
; Function Attrs: nounwind readnone
110+
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1
111+
112+
; Function Attrs: nounwind readnone
113+
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1
114+
115+
attributes #0 = { nounwind }
116+
attributes #1 = { nounwind readnone }
117+
118+
!pauseresume = !{!0}
119+
!llvm.ident = !{!1}
120+
!dx.fnprops = !{!2}
121+
!dx.options = !{!3, !4}
122+
!dx.version = !{!5}
123+
!dx.valver = !{!6}
124+
!dx.shaderModel = !{!7}
125+
!dx.resources = !{!8}
126+
!dx.typeAnnotations = !{!14, !20}
127+
!dx.entryPoints = !{!27}
128+
129+
!0 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
130+
!1 = !{!"dxc(private) 1.8.0.15080 (main, c52246122)"}
131+
!2 = !{void (i32)* @main, i32 5, i32 3, i32 1, i32 1}
132+
!3 = !{i32 -2147483584}
133+
!4 = !{i32 -1}
134+
!5 = !{i32 1, i32 0}
135+
!6 = !{i32 1, i32 10}
136+
!7 = !{!"cs", i32 6, i32 0}
137+
!8 = !{null, !9, !12, null}
138+
!9 = !{!10}
139+
!10 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?SB@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"SB", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !11}
140+
!11 = !{i32 1, i32 8}
141+
!12 = !{!13}
142+
!13 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 28, null}
143+
!14 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" undef, !15, %"$Globals" undef, !17}
144+
!15 = !{i32 8, !16}
145+
!16 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9}
146+
!17 = !{i32 28, !18}
147+
!18 = !{i32 6, !"M", i32 2, !19, i32 3, i32 0, i32 7, i32 9}
148+
!19 = !{i32 3, i32 2, i32 2}
149+
!20 = !{i32 1, void (i32)* @main, !21}
150+
!21 = !{!22, !24}
151+
!22 = !{i32 1, !23, !23}
152+
!23 = !{}
153+
!24 = !{i32 0, !25, !26}
154+
!25 = !{i32 4, !"SV_GroupIndex", i32 7, i32 5}
155+
!26 = !{i32 0}
156+
!27 = !{void (i32)* @main, !"main", null, !8, !28}
157+
!28 = !{i32 4, !29}
158+
!29 = !{i32 -858993460, i32 -858993460, i32 -858993460}

0 commit comments

Comments
 (0)