Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,31 @@ void quantize(uint dst_idx, uint src_idx)
}
#endif

#if defined(DATA_A_Q1_0)
void quantize(uint dst_idx, uint src_idx)
{
float sum_abs = 0.0;

[[unroll]] for (int j = 0; j < QUANT_K_Q1_0; j++) {
sum_abs += abs(data_s[src_idx + j]);
}

const float d = sum_abs / QUANT_K_Q1_0;

data_q[dst_idx].d = float16_t(d);

[[unroll]] for (int j = 0; j < QUANT_K_Q1_0 / 8; ++j) {
data_q[dst_idx].qs[j] = uint8_t(0);
}

[[unroll]] for (int j = 0; j < QUANT_K_Q1_0; ++j) {
if (data_s[src_idx + j] >= 0.0) {
data_q[dst_idx].qs[j / 8] |= uint8_t(1 << (j % 8));
}
}
}
#endif

#if defined(DATA_A_IQ4_NL)
uint best_index(float x) {
if (x <= kvalues_iq4nl[0]) return 0;
Expand Down
24 changes: 24 additions & 0 deletions ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,23 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
}
#endif

#if defined(DATA_A_Q1_0)
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const uint bits = uint(data_a[a_offset + ib].qs[iqs / 8u]) >> (iqs % 8u);
return vec2(
(bits & 1u) != 0u ? 1.0f : -1.0f,
(bits & 2u) != 0u ? 1.0f : -1.0f);
}
vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
const uint bits = uint(data_a[a_offset + ib].qs[iqs / 8u]) >> (iqs % 8u);
return vec4(
(bits & 1u) != 0u ? 1.0f : -1.0f,
(bits & 2u) != 0u ? 1.0f : -1.0f,
(bits & 4u) != 0u ? 1.0f : -1.0f,
(bits & 8u) != 0u ? 1.0f : -1.0f);
}
#endif

#if defined(DATA_A_IQ1_S)
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const uint ib32 = iqs / 32;
Expand Down Expand Up @@ -454,6 +471,13 @@ vec2 get_dm(uint ib, uint a_offset) {
}
#endif

#if defined(DATA_A_Q1_0)
vec2 get_dm(uint ib, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
return vec2(d, 0);
}
#endif

#if defined(DATA_A_MXFP4)
vec2 get_dm(uint ib, uint a_offset) {
return vec2(e8m0_to_fp32(data_a[a_offset + ib].e), 0);
Expand Down
16 changes: 15 additions & 1 deletion ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ float16_t dequantFuncF32(const in decodeBufF32 bl, const in uint blockCoords[2],
return vf16[idx];
}

layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ1_0 {
block_q1_0 block;
};

float16_t dequantFuncQ1_0(const in decodeBufQ1_0 bl, const in uint blockCoords[2], const in uint coordInBlock[2])
{
const float16_t d = bl.block.d;
const uint idx = coordInBlock[1];
const uint bit = (uint(bl.block.qs[(idx & 0x78) >> 3]) >> (idx & 0x7)) & 1u;
return bit != 0u ? d : -d;
}

layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ4_0 {
block_q4_0_packed16 block;
};
Expand Down Expand Up @@ -685,7 +697,9 @@ float16_t dequantFuncMXFP4(const in decodeBufMXFP4 bl, const in uint blockCoords
}
#endif

#if defined(DATA_A_Q4_0)
#if defined(DATA_A_Q1_0)
#define dequantFuncA dequantFuncQ1_0
#elif defined(DATA_A_Q4_0)
#define dequantFuncA dequantFuncQ4_0
#elif defined(DATA_A_Q4_1)
#define dequantFuncA dequantFuncQ4_1
Expand Down
29 changes: 29 additions & 0 deletions ggml/src/ggml-vulkan/vulkan-shaders/dequant_q1_0.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#version 450

#include "dequant_head.glsl"

layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout (binding = 0) readonly buffer A {block_q1_0 data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};

void main() {
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;

const uint tid = gl_LocalInvocationID.x % 64;
const uint il = tid / 4;
const uint ir = tid % 4;
const uint ib = 4*i + ir;
if (ib >= p.nel / 128) {
return;
}

const uint b_idx = 512*i + 128*ir + 8*il;

const float d = float(data_a[ib].d);
const uint bits = uint(data_a[ib].qs[il]);

[[unroll]] for (uint l = 0; l < 8; ++l) {
data_b[b_idx + l] = D_TYPE((bits & (1u << l)) != 0u ? d : -d);
}
}
14 changes: 14 additions & 0 deletions ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,20 @@ void load_a_to_shmem(const uint pos_a, const uint row, const uint col, const uin

buf_a[buf_idx ] = FLOAT_TYPEV2(v.xy);
buf_a[buf_idx + 1] = FLOAT_TYPEV2(v.zw);
#elif defined(DATA_A_Q1_0)
const uint idx = pos_a + col * p.stride_a / LOAD_VEC_A + row;
const uint buf_idx = col * SHMEM_STRIDE + row * LOAD_VEC_A / 2;

const uint ib = idx / 16;
const uint iqs = idx & 0xfu;

const float d = float(data_a[ib].d);
const uint bits = uint(data_a[ib].qs[iqs]);

buf_a[buf_idx ] = FLOAT_TYPEV2((bits & 0x01u) != 0u ? d : -d, (bits & 0x02u) != 0u ? d : -d);
buf_a[buf_idx + 1] = FLOAT_TYPEV2((bits & 0x04u) != 0u ? d : -d, (bits & 0x08u) != 0u ? d : -d);
buf_a[buf_idx + 2] = FLOAT_TYPEV2((bits & 0x10u) != 0u ? d : -d, (bits & 0x20u) != 0u ? d : -d);
buf_a[buf_idx + 3] = FLOAT_TYPEV2((bits & 0x40u) != 0u ? d : -d, (bits & 0x80u) != 0u ? d : -d);
#elif defined(DATA_A_Q2_K)
const uint idx = pos_a + col * p.stride_a / LOAD_VEC_A + row;
const uint buf_idx = col * SHMEM_STRIDE + row * LOAD_VEC_A / 2;
Expand Down
16 changes: 16 additions & 0 deletions ggml/src/ggml-vulkan/vulkan-shaders/types.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,22 @@ struct block_q8_0_packed16
#define DATA_A_QUANT_LEGACY
#endif

#define QUANT_K_Q1_0 128
#define QUANT_R_Q1_0 1
Comment thread
0cc4m marked this conversation as resolved.

struct block_q1_0
{
float16_t d;
uint8_t qs[QUANT_K_Q1_0 / 8];
};

#if defined(DATA_A_Q1_0)
#define QUANT_K QUANT_K_Q1_0
#define QUANT_R QUANT_R_Q1_0
#define QUANT_AUXF 1
#define A_TYPE block_q1_0
#endif

#define QUANT_K_Q8_1 32
#define QUANT_R_Q8_1 1

Expand Down
7 changes: 4 additions & 3 deletions ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ std::string target_cpp = "";
const std::vector<std::string> type_names = {
"f32",
"f16",
"q1_0",
"q4_0",
"q4_1",
"q5_0",
Expand Down Expand Up @@ -553,7 +554,7 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c

for (const auto& tname : type_names) {
std::string load_vec_quant = "2";
if ((tname == "q4_0") || (tname == "q4_1") || (tname == "q5_1") || (tname == "iq1_s") || (tname == "iq1_m") || (tname == "iq2_xxs") || (tname == "iq2_xs") || (tname == "iq2_s"))
if ((tname == "q1_0") || (tname == "q4_0") || (tname == "q4_1") || (tname == "q5_1") || (tname == "iq1_s") || (tname == "iq1_m") || (tname == "iq2_xxs") || (tname == "iq2_xs") || (tname == "iq2_s"))
load_vec_quant = "8";
else if ((tname == "q5_0") || (tname == "q8_0") || (tname == "q2_k") || (tname == "q4_k") || (tname == "q5_k") || (tname == "iq3_xxs") || (tname == "iq3_s") || (tname == "iq4_xs") || (tname == "iq4_nl") || (tname == "mxfp4"))
load_vec_quant = "4";
Expand Down Expand Up @@ -758,13 +759,13 @@ void process_shaders() {
string_to_spv("cpy_transpose_16", "copy_transpose.comp", {{"A_TYPE", "uint16_t"}, {"D_TYPE", "uint16_t"}});
string_to_spv("cpy_transpose_32", "copy_transpose.comp", {{"A_TYPE", "uint"}, {"D_TYPE", "uint"}});

for (std::string t : {"q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) {
for (std::string t : {"q1_0", "q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) {
string_to_spv("cpy_f32_" + t, "copy_to_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
string_to_spv("cpy_f32_" + t + "_rte", "copy_to_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}, {"RTE16", "1"}});
string_to_spv("cpy_" + t + "_f32", "copy_from_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
}

for (std::string t : {"f32", "f16", "bf16", "q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) {
for (std::string t : {"f32", "f16", "bf16", "q1_0", "q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) {
string_to_spv("set_rows_" + t + "_i32", "copy_to_quant.comp", {{"SET_ROWS", "1"}, {"DATA_A_" + to_uppercase(t), "1"}, {"B_TYPE", "uint"}, {"B_SIZE", "32"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
string_to_spv("set_rows_" + t + "_i32_rte", "copy_to_quant.comp", {{"SET_ROWS", "1"}, {"DATA_A_" + to_uppercase(t), "1"}, {"B_TYPE", "uint"}, {"B_SIZE", "32"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}, {"RTE16", "1"}});
string_to_spv("set_rows_" + t + "_i64", "copy_to_quant.comp", {{"SET_ROWS", "1"}, {"DATA_A_" + to_uppercase(t), "1"}, {"B_TYPE", "uvec2"}, {"B_SIZE", "64"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
Expand Down
1 change: 1 addition & 0 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7265,6 +7265,7 @@ static const ggml_type all_types[] = {
static const ggml_type base_types[] = {
GGML_TYPE_F32, GGML_TYPE_F16,
GGML_TYPE_Q8_0, // for I8MM tests
GGML_TYPE_Q1_0,
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1, // for I8MM tests
GGML_TYPE_Q4_K,
Expand Down
Loading