Skip to content

Commit 062c7c6

Browse files
khosravipashaCISC
andauthored
Apply suggestions from code review
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
1 parent 9b51456 commit 062c7c6

4 files changed

Lines changed: 8 additions & 8 deletions

File tree

ggml/src/ggml-common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ typedef sycl::half2 ggml_half2;
176176

177177
#define QK1_0 128
178178
typedef struct {
179-
ggml_half d; // delta
180-
uint8_t qs[QK1_0 / 8]; // bits / quants
179+
ggml_half d; // delta
180+
uint8_t qs[QK1_0 / 8]; // bits / quants
181181
} block_q1_0;
182182
static_assert(sizeof(block_q1_0) == sizeof(ggml_half) + QK1_0 / 8, "wrong q1_0 block size/padding");
183183

ggml/src/ggml.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7661,8 +7661,8 @@ size_t ggml_quantize_chunk(
76617661
size_t result = 0;
76627662

76637663
switch (type) {
7664-
case GGML_TYPE_Q1_0: result = quantize_q1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7665-
case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7664+
case GGML_TYPE_Q1_0: result = quantize_q1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7665+
case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
76667666
case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
76677667
case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
76687668
case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;

gguf-py/gguf/constants.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3966,7 +3966,7 @@ class LlamaFileType(IntEnum):
39663966
MOSTLY_TQ2_0 = 37 # except 1d tensors
39673967
MOSTLY_MXFP4_MOE = 38 # except 1d tensors
39683968
MOSTLY_NVFP4 = 39 # except 1d tensors
3969-
MOSTLY_Q1_0 = 41 # except 1d tensors
3969+
MOSTLY_Q1_0 = 40 # except 1d tensors
39703970

39713971
GUESSED = 1024 # not specified in the model file
39723972

@@ -4075,8 +4075,8 @@ class VisionProjectorType:
40754075
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
40764076
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
40774077
GGMLQuantizationType.MXFP4: (32, 1 + 16),
4078-
GGMLQuantizationType.NVFP4: (64, 4 + 32),
4079-
GGMLQuantizationType.Q1_0: (128, 2 + 16), # 2 bytes fp16 scale + 16 bytes (128 bits)
4078+
GGMLQuantizationType.NVFP4: (64, 4 + 32),
4079+
GGMLQuantizationType.Q1_0: (128, 2 + 16),
40804080
}
40814081

40824082

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ extern "C" {
154154
LLAMA_FTYPE_MOSTLY_TQ2_0 = 37, // except 1d tensors
155155
LLAMA_FTYPE_MOSTLY_MXFP4_MOE = 38, // except 1d tensors
156156
LLAMA_FTYPE_MOSTLY_NVFP4 = 39, // except 1d tensors
157-
LLAMA_FTYPE_MOSTLY_Q1_0 = 41, // except 1d tensors
157+
LLAMA_FTYPE_MOSTLY_Q1_0 = 40, // except 1d tensors
158158

159159
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
160160
};

0 commit comments

Comments
 (0)