We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ce5c29a commit f3feefaCopy full SHA for f3feefa
1 file changed
ggml/src/ggml-cuda/quantize.cu
@@ -139,9 +139,8 @@ static __global__ void quantize_mmq_nvfp4(
139
uint8_t fp8_code = first_fp8_code;
140
float subblock_scale = test_scale;
141
142
- const int n_test_offsets = scales ? 1 : 5;
143
-
144
if (scales == nullptr) {
+ const int n_test_offsets = 5;
145
# pragma unroll // Check +/- 2 to find best code to reduce NVFP4 activation loss unless input scales define the scale.
146
for (int i = 1; i < n_test_offsets; i++) {
147
const int test_code = first_fp8_code + test_offsets[i];
0 commit comments