Apply suggestions from code review

khosravipasha · CISC · web-flow · commit 062c7c63a291 · 2026-04-05T01:20:46.000-07:00
Co-authored-by: Sigbjørn Skjæret &lt;sigbjorn.skjaeret@scala.com&gt;
diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h
@@ -176,8 +176,8 @@ typedef sycl::half2 ggml_half2;
 
 #define QK1_0 128
 typedef struct {
-    ggml_half d;               // delta
-    uint8_t qs[QK1_0 / 8]; // bits / quants
+    ggml_half d;           // delta
+    uint8_t qs[QK1_0 / 8]; // bits / quants
 } block_q1_0;
 static_assert(sizeof(block_q1_0) == sizeof(ggml_half) + QK1_0 / 8, "wrong q1_0 block size/padding");
 
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -7661,8 +7661,8 @@ size_t ggml_quantize_chunk(
     size_t result = 0;
 
     switch (type) {
-        case GGML_TYPE_Q1_0: result = quantize_q1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
-        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
+        case GGML_TYPE_Q1_0:    result = quantize_q1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
+        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
         case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
         case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
         case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -3966,7 +3966,7 @@ class LlamaFileType(IntEnum):
     MOSTLY_TQ2_0         = 37  # except 1d tensors
     MOSTLY_MXFP4_MOE     = 38  # except 1d tensors
     MOSTLY_NVFP4         = 39  # except 1d tensors
-    MOSTLY_Q1_0          = 41  # except 1d tensors
+    MOSTLY_Q1_0          = 40  # except 1d tensors
 
     GUESSED              = 1024  # not specified in the model file
 
@@ -4075,8 +4075,8 @@ class VisionProjectorType:
     GGMLQuantizationType.TQ1_0:   (256, 2 + 4 * 13),
     GGMLQuantizationType.TQ2_0:   (256, 2 + 64),
     GGMLQuantizationType.MXFP4:   (32, 1 + 16),
-    GGMLQuantizationType.NVFP4:   (64, 4 + 32),
-    GGMLQuantizationType.Q1_0: (128, 2 + 16),  # 2 bytes fp16 scale + 16 bytes (128 bits)
+    GGMLQuantizationType.NVFP4:   (64, 4 + 32),
+    GGMLQuantizationType.Q1_0:    (128, 2 + 16),
 }
 
 
diff --git a/include/llama.h b/include/llama.h
@@ -154,7 +154,7 @@ extern "C" {
         LLAMA_FTYPE_MOSTLY_TQ2_0         = 37, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_MXFP4_MOE     = 38, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_NVFP4         = 39, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q1_0          = 41, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q1_0          = 40, // except 1d tensors
 
         LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
     };