Skip to content

Commit ff806a1

Browse files
authored
opencl: refactor Adreno q4_0 (ggml-org#22335)
* opencl: refactor adreno q4_0 gemm/gemv dispatch * opencl: refactor q4_0 gemm/gemv loading, use consistent names * opencl: use consistent name for adreno q8_0 gemm/gemv * opencl: use consistent names for adreno q4_0 gemm/gemv * opencl: simplify adreno q4_0 set_tensor * opencl: refactor q4_0 get_tensor
1 parent d5003b6 commit ff806a1

7 files changed

Lines changed: 355 additions & 623 deletions

ggml/src/ggml-opencl/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,13 @@ set(GGML_OPENCL_KERNELS
6666
diag
6767
div
6868
gelu
69-
gemv_noshuffle_general
70-
gemv_noshuffle
7169
get_rows
7270
glu
7371
group_norm
7472
solve_tri
7573
im2col_f32
7674
im2col_f16
7775
mean
78-
mul_mat_Ab_Bi_8x4
7976
mul_mv_f16_f16
8077
mul_mv_f16_f32_1row
8178
mul_mv_f16_f32_l4
@@ -120,12 +117,15 @@ set(GGML_OPENCL_KERNELS
120117
mul_mm_q4_k_f32_l4_lm
121118
mul_mm_q5_k_f32_l4_lm
122119
mul_mm_q6_k_f32_l4_lm
123-
mul_mm_q8_0_f32_8x4
120+
gemv_noshuffle_q4_0_f32
121+
gemv_noshuffle_q4_0_f32_spec
122+
gemm_noshuffle_q4_0_f32
124123
gemv_noshuffle_q4_1_f32
125124
gemm_noshuffle_q4_1_f32
126125
gemv_noshuffle_iq4_nl_f32
127126
gemm_noshuffle_iq4_nl_f32
128-
gemv_noshuffle_general_q8_0_f32
127+
gemv_noshuffle_q8_0_f32
128+
gemm_noshuffle_q8_0_f32
129129
gemv_noshuffle_q4_k_f32
130130
gemm_noshuffle_q4_k_f32
131131
gemv_noshuffle_q6_k_f32

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 338 additions & 606 deletions
Large diffs are not rendered by default.

ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl renamed to ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_0_f32.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
REQD_SUBGROUP_SIZE_128
1818
#endif
1919

20-
kernel void kernel_mul_mat_Ab_Bi_8x4(
20+
kernel void kernel_gemm_noshuffle_q4_0_f32(
2121
global const ushort * src0_q, // quantized A
2222
global const half * src0_d, // A scales
2323
__read_only image1d_buffer_t src1, // B (1d image)

ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl renamed to ggml/src/ggml-opencl/kernels/gemm_noshuffle_q8_0_f32.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
REQD_SUBGROUP_SIZE_128
1212
#endif
1313

14-
kernel void kernel_mul_mm_q8_0_f32_8x4(
14+
kernel void kernel_gemm_noshuffle_q8_0_f32(
1515
global const uint * src0_q,
1616
global const half * src0_d,
1717
__read_only image1d_buffer_t src1,

ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl renamed to ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32.cl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@
191191
#ifdef ADRENO_GPU
192192
REQD_SUBGROUP_SIZE_64
193193
#endif
194-
__kernel void kernel_gemv_noshuffle(
194+
__kernel void kernel_gemv_noshuffle_q4_0_f32(
195195
__read_only image1d_buffer_t src0_q, // quantized A
196196
global half2 * src0_d, // A scales
197197
__read_only image1d_buffer_t src1, // B
@@ -238,21 +238,21 @@ __kernel void kernel_gemv_noshuffle(
238238
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 1)).x;
239239
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 2)).x;
240240
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 3)).x;
241-
#ifdef VECTOR_SUB_GROUP_BROADCAT
241+
#ifdef VECTOR_SUB_GROUP_BROADCAST
242242
dequantizeBlockAccum_ns_sgbroadcast_8_hi(totalSum, as_ushort8(regA), regS, regB);
243243
#else
244244
dequantizeBlockAccum_ns_sgbroadcast_1_hi(totalSum, as_ushort8(regA), regS, regB);
245-
#endif // VECTOR_SUB_GROUP_BROADCAT
245+
#endif // VECTOR_SUB_GROUP_BROADCAST
246246

247247
regA.s0 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 4)).x;
248248
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 5)).x;
249249
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 6)).x;
250250
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 7)).x;
251-
#ifdef VECTOR_SUB_GROUP_BROADCAT
251+
#ifdef VECTOR_SUB_GROUP_BROADCAST
252252
dequantizeBlockAccum_ns_sgbroadcast_8_lo(totalSum, as_ushort8(regA), regS, regB);
253253
#else
254254
dequantizeBlockAccum_ns_sgbroadcast_1_lo(totalSum, as_ushort8(regA), regS, regB);
255-
#endif // VECTOR_SUB_GROUP_BROADCAT
255+
#endif // VECTOR_SUB_GROUP_BROADCAST
256256
}
257257

258258
// reduction in local memory, assumes #wave=4

ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl renamed to ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_0_f32_spec.cl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@
191191
#ifdef ADRENO_GPU
192192
REQD_SUBGROUP_SIZE_64
193193
#endif
194-
__kernel void kernel_gemv_noshuffle(
194+
__kernel void kernel_gemv_noshuffle_q4_0_f32(
195195
__read_only image1d_buffer_t src0_q, // quantized A
196196
global half2 * src0_d, // A scales
197197
__read_only image1d_buffer_t src1, // B
@@ -232,21 +232,21 @@ __kernel void kernel_gemv_noshuffle(
232232
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 1)).x;
233233
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 2)).x;
234234
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 3)).x;
235-
#ifdef VECTOR_SUB_GROUP_BROADCAT
235+
#ifdef VECTOR_SUB_GROUP_BROADCAST
236236
dequantizeBlockAccum_ns_sgbroadcast_8_hi(totalSum, as_ushort8(regA), regS, regB);
237237
#else
238238
dequantizeBlockAccum_ns_sgbroadcast_1_hi(totalSum, as_ushort8(regA), regS, regB);
239-
#endif // VECTOR_SUB_GROUP_BROADCAT
239+
#endif // VECTOR_SUB_GROUP_BROADCAST
240240

241241
regA.s0 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 4)).x;
242242
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 5)).x;
243243
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 6)).x;
244244
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 7)).x;
245-
#ifdef VECTOR_SUB_GROUP_BROADCAT
245+
#ifdef VECTOR_SUB_GROUP_BROADCAST
246246
dequantizeBlockAccum_ns_sgbroadcast_8_lo(totalSum, as_ushort8(regA), regS, regB);
247247
#else
248248
dequantizeBlockAccum_ns_sgbroadcast_1_lo(totalSum, as_ushort8(regA), regS, regB);
249-
#endif // VECTOR_SUB_GROUP_BROADCAT
249+
#endif // VECTOR_SUB_GROUP_BROADCAST
250250
}
251251

252252
// reduction in local memory, assumes #wave=4

ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl renamed to ggml/src/ggml-opencl/kernels/gemv_noshuffle_q8_0_f32.cl

File renamed without changes.

0 commit comments

Comments
 (0)