Skip to content

Commit 3eba197

Browse files
ssjiaSS-JIA
authored andcommitted
[ET-VK] Replace dynamic UBO indexing with safe_idx across shaders
The Adreno 740 GPU driver crashes (SIGSEGV in vkCreateComputePipelines) when GLSL shaders dynamically index a UBO-backed ivec4/ivec3 with a specialization-constant-derived value. This was causing the skin segmentation model to crash during pipeline creation on Samsung S23. Fix all instances across 18 shader files by replacing patterns like `meta.sizes[packed_dim]` with `safe_idx(meta.sizes, packed_dim)`, which uses an if/else chain that the driver resolves at pipeline creation time. Changes: - Add safe_idx(ivec3) overload to indexing.glslh - Fix transfer_texture.glsl, slice.glslh, select.glslh (transfer ops) - Fix nchw_to_int8x4_buffer.glsl, full_texture.glsl (staging/utility) - Fix gather, split, index_tensor, where, expand, pad, repeat, arange texture shaders (1-line fixes each) - Fix softmax.glsl, reduce.glsl, reduce2d.glsl, var_texture3d.glsl (reduction shaders with multiple fixes + added indexing.glslh include) - Remove unused ShaderNameUtils.h include from Slice.cpp Differential Revision: [D98220450](https://our.internmc.facebook.com/intern/diff/D98220450/) ghstack-source-id: 357844383 Pull Request resolved: #18512
1 parent d7e20b5 commit 3eba197

19 files changed

Lines changed: 52 additions & 42 deletions

backends/vulkan/runtime/graph/ops/glsl/arange_texture.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void main() {
4545
// Compute the value for each element in the texel along the packed dim.
4646
VEC4_T outtex = VEC4_T(0);
4747
int limit = min(
48-
4, outp.sizes[packed_dim] - out_tidx.data[packed_dim]);
48+
4, safe_idx(outp.sizes, packed_dim) - out_tidx.data[packed_dim]);
4949
for (int comp = 0; comp < limit; comp++) {
5050
int elem_idx = out_tidx.data[0]; // W index is the linear element index
5151
outtex[comp] = VEC4_T(start + elem_idx * step).x;

backends/vulkan/runtime/graph/ops/glsl/expand_texture.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void main() {
4545
VEC4_T out_texel = VEC4_T(0);
4646

4747
int limit = min(
48-
4, outp.sizes[packed_dim] - out_tidx.data[packed_dim]);
48+
4, safe_idx(outp.sizes, packed_dim) - out_tidx.data[packed_dim]);
4949
for (int comp = 0; comp < 4; comp++) {
5050
if (comp >= limit) {
5151
break;

backends/vulkan/runtime/graph/ops/glsl/full_texture.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ void main() {
4040

4141
TensorIndex4D tidx =
4242
texture_pos_to_tensor4d_idx_simple(outp, pos, out_layout);
43-
const int packed_dim_size = outp.sizes[packed_dim];
43+
const int packed_dim_size = safe_idx(outp.sizes, packed_dim);
4444
int packed_idx = tidx.data[packed_dim];
4545

4646
if (packed_idx + 3 >= packed_dim_size) {

backends/vulkan/runtime/graph/ops/glsl/gather_texture.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void main() {
5353
VEC4_T out_texel = VEC4_T(0);
5454

5555
int limit = min(
56-
4, outp.sizes[out_packed_dim] - out_tidx.data[out_packed_dim]);
56+
4, safe_idx(outp.sizes, out_packed_dim) - out_tidx.data[out_packed_dim]);
5757
for (int comp = 0; comp < 4; comp++) {
5858
TensorIndex4D input_tidx = out_tidx;
5959
int gather_idx = idx_texel[comp];

backends/vulkan/runtime/graph/ops/glsl/index_tensor_texture.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void main() {
5555
VEC4_T out_texel = VEC4_T(0);
5656

5757
int limit = min(
58-
4, outp.sizes[out_packed_dim] - out_tidx.data[out_packed_dim]);
58+
4, safe_idx(outp.sizes, out_packed_dim) - out_tidx.data[out_packed_dim]);
5959
for (int comp = 0; comp < limit; comp++) {
6060
int idx = idx_texel[comp];
6161

backends/vulkan/runtime/graph/ops/glsl/indexing.glslh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,14 @@ uint safe_idx(const uvec4 v, const int idx) {
9999
return v.w;
100100
}
101101

102+
// Safe ivec3 component access via if/else chain. Same rationale as safe_idx
103+
// for ivec4.
104+
int safe_idx(const ivec3 v, const int idx) {
105+
if (idx == 0) return v.x;
106+
if (idx == 1) return v.y;
107+
return v.z;
108+
}
109+
102110
// Safe ivec4 component write via if/else chain. Companion to safe_idx for
103111
// cases where we need to set a component by a spec-const-derived index.
104112
void safe_set(inout ivec4 v, const int idx, const int val) {

backends/vulkan/runtime/graph/ops/glsl/nchw_to_int8x4_buffer.glsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void main() {
4242
texel_idx_to_tensor4d_idx(outp, texel_idx, outp_layout);
4343

4444
// Bounds check on outer dimension
45-
if (tidx.data[outer_dim] >= int(outp.sizes[0][outer_dim])) {
45+
if (tidx.data[outer_dim] >= int(safe_idx(outp.sizes[0], outer_dim))) {
4646
return;
4747
}
4848

@@ -55,7 +55,7 @@ void main() {
5555
int packed = 0;
5656
[[unroll]] for (int i = 0; i < 4; ++i) {
5757
const int elem_inner = tidx.data[inner_dim] + i;
58-
if (elem_inner < int(outp.sizes[0][inner_dim])) {
58+
if (elem_inner < int(safe_idx(outp.sizes[0], inner_dim))) {
5959
// Build element coordinates
6060
ivec4 elem = tidx.data;
6161
elem[inner_dim] = elem_inner;

backends/vulkan/runtime/graph/ops/glsl/pad_texture.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void main() {
5252

5353
// Tail texels may have fewer than 4 valid elements; leave extras as 0.
5454
const int limit =
55-
min(4, outp.sizes[packed_dim] - out_tidx.data[packed_dim]);
55+
min(4, safe_idx(outp.sizes, packed_dim) - out_tidx.data[packed_dim]);
5656

5757
VEC4_T out_texel = VEC4_T(0);
5858

backends/vulkan/runtime/graph/ops/glsl/reduce.glsl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ layout(constant_id = 5) const int group_dim = 1;
4343
shared vec4 shared_vecs[MAX_NTHREADS];
4444

4545
#include "indexing_utils.h"
46+
#include "indexing.glslh"
4647

4748
int tid_to_smi(const ivec2 tid) {
4849
return tid.x + tid.y * NWORKERS;
@@ -95,7 +96,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
9596
scan_pos[reduce_dim] = tid.x;
9697
// Partially accumulate over elements i, i + NWORKERS, i + 2*NWORKERS, ... of
9798
// the reduction row
98-
for (int i = tid.x; i < tin_sizes[reduce_dim];
99+
for (int i = tid.x; i < safe_idx(tin_sizes, reduce_dim);
99100
i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
100101
accum = UPDATE_ACCUM(accum, load_texel(tin, scan_pos));
101102
}
@@ -115,11 +116,11 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
115116

116117
// Determine if there are any padding elements in the final texel of the
117118
// packed dimension
118-
const int nspill = mod4(tin_sizes[packed_dim]);
119+
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
119120
// Detect if this thread is working on the final texels of the packed
120121
// dimension, which may have padding elements
121122
const bool is_last_texel =
122-
scan_pos[packed_dim] == (tin_limits[packed_dim] - 1);
123+
scan_pos[packed_dim] == (safe_idx(tin_limits, packed_dim) - 1);
123124

124125
// Explicitly set padding elements to 0
125126
if (is_last_texel && nspill > 0) {
@@ -145,10 +146,10 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
145146
const int smi = tid_to_smi(tid);
146147

147148
// Number of non-padding elements in the last texel in the reduction row
148-
const int nspill = mod4(tin_sizes[packed_dim]);
149+
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
149150
// Only reduce up to the last "complete" texel. The last texel will need to be
150151
// handled specially if it has padding elements.
151-
const int reduce_len = tin_sizes[packed_dim] - nspill;
152+
const int reduce_len = safe_idx(tin_sizes, packed_dim) - nspill;
152153

153154
scan_pos[reduce_dim] = 0;
154155
vec4 accum = INIT_ACCUM(vec4(load_texel(tin, scan_pos).x));
@@ -163,7 +164,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
163164
// For the last texel in the dim, if there are padding elements then each
164165
// element of the texel needs to be processed individually such that the
165166
// padding elements are ignored
166-
if (scan_pos[reduce_dim] == tin_limits[reduce_dim] - 1 && nspill > 0) {
167+
if (scan_pos[reduce_dim] == safe_idx(tin_limits, reduce_dim) - 1 && nspill > 0) {
167168
const vec4 intex = load_texel(tin, scan_pos);
168169
for (int i = 0; i < nspill; i++) {
169170
accum.x = UPDATE_ACCUM(accum.x, intex[i]);

backends/vulkan/runtime/graph/ops/glsl/reduce2d.glsl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ layout(constant_id = 6) const int group_dim = 2;
4444
shared vec4 shared_vecs[MAX_NTHREADS];
4545

4646
#include "indexing_utils.h"
47+
#include "indexing.glslh"
4748

4849
int tid_to_smi(const ivec2 tid) {
4950
return tid.x + tid.y * NWORKERS;
@@ -68,12 +69,12 @@ void reduce_2d_non_packed_dim(const ivec2 tid, ivec3 scan_pos) {
6869

6970
// First dimension reduction
7071
scan_pos[reduce_dim1] = tid.x;
71-
for (int i = tid.x; i < tin_sizes[reduce_dim1];
72+
for (int i = tid.x; i < safe_idx(tin_sizes, reduce_dim1);
7273
i += NWORKERS, scan_pos[reduce_dim1] += NWORKERS) {
7374

7475
// Second dimension reduction
7576
scan_pos[reduce_dim2] = 0;
76-
for (int j = 0; j < tin_sizes[reduce_dim2]; j++, scan_pos[reduce_dim2]++) {
77+
for (int j = 0; j < safe_idx(tin_sizes, reduce_dim2); j++, scan_pos[reduce_dim2]++) {
7778
accum = UPDATE_ACCUM(accum, load_texel(tin, scan_pos));
7879
}
7980
}
@@ -93,11 +94,11 @@ void reduce_2d_non_packed_dim(const ivec2 tid, ivec3 scan_pos) {
9394

9495
// Determine if there are any padding elements in the final texel of the
9596
// packed dimension
96-
const int nspill = mod4(tin_sizes[packed_dim]);
97+
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
9798
// Detect if this thread is working on the final texels of the packed
9899
// dimension, which may have padding elements
99-
const bool is_last_texel =
100-
scan_pos[packed_dim] == (tin_limits[packed_dim] - 1);
100+
const bool is_last_texel =
101+
scan_pos[packed_dim] == (safe_idx(tin_limits, packed_dim) - 1);
101102

102103
// Explicitly set padding elements to 0
103104
if (is_last_texel && nspill > 0) {

0 commit comments

Comments
 (0)