We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e529ad8 commit ce5c29aCopy full SHA for ce5c29a
1 file changed
ggml/src/ggml-cuda/mmq.cu
@@ -126,6 +126,10 @@ void ggml_cuda_mul_mat_q(
126
const ggml_tensor * scale_activations = src0->type == GGML_TYPE_NVFP4 ? (ids ? dst->src[4] : dst->src[3]) : nullptr;
127
const float * scale_activations_d = scale_activations ? (const float *) scale_activations->data : nullptr;
128
const int64_t n_scale_activations = scale_activations ? ggml_nelements(scale_activations) : 0;
129
+ if (use_native_fp4)
130
+ {
131
+ GGML_ASSERT(scale_activations != nullptr);
132
+ }
133
134
if (!ids) {
135
const size_t nbytes_src1_q8_1 = ne13*ne12 * ne11*ne10_padded * sizeof(block_q8_1)/QK8_1 +
0 commit comments