@@ -176,10 +176,10 @@ static void turbo_innerq_init(void) {
176176 // Zero accumulators and set calibrating flag on device
177177 float zeros[INNERQ_MAX_CHANNELS] = {0 };
178178 int zero = 0 , one = 1 ;
179- cudaMemcpyToSymbol (d_innerq_sq_accum, zeros, sizeof (zeros));
180- cudaMemcpyToSymbol (d_innerq_count, &zero, sizeof (int ));
181- cudaMemcpyToSymbol (d_innerq_active, &zero, sizeof (int ));
182- cudaMemcpyToSymbol (d_innerq_calibrating, &one, sizeof (int ));
179+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_sq_accum, zeros, sizeof (zeros) ));
180+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_count, &zero, sizeof (int ) ));
181+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_active, &zero, sizeof (int ) ));
182+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_calibrating, &one, sizeof (int ) ));
183183
184184 GGML_LOG_INFO (" %s: InnerQ calibration started (target=%d tokens, strength=%.2f)\n " ,
185185 __func__, innerq_target_tokens, innerq_strength);
@@ -190,14 +190,14 @@ static void turbo_innerq_finalize(int group_size) {
190190 // Read accumulators from device
191191 float sq_accum[INNERQ_MAX_CHANNELS];
192192 int count = 0 ;
193- cudaMemcpyFromSymbol (sq_accum, d_innerq_sq_accum, group_size * sizeof (float ));
194- cudaMemcpyFromSymbol (&count, d_innerq_count, sizeof (int ));
193+ CUDA_CHECK ( cudaMemcpyFromSymbol (sq_accum, d_innerq_sq_accum, group_size * sizeof (float ) ));
194+ CUDA_CHECK ( cudaMemcpyFromSymbol (&count, d_innerq_count, sizeof (int ) ));
195195
196196 if (count <= 0 ) {
197197 GGML_LOG_WARN (" %s: InnerQ calibration got 0 tokens, disabling\n " , __func__);
198198 innerq_enabled = 0 ;
199199 int zero = 0 ;
200- cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ));
200+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ) ));
201201 return ;
202202 }
203203
@@ -231,17 +231,17 @@ static void turbo_innerq_finalize(int group_size) {
231231 __func__, max_ratio);
232232 innerq_enabled = 0 ;
233233 int zero = 0 ;
234- cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ));
234+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ) ));
235235 return ;
236236 }
237237
238238 // Stop calibrating, upload scales, activate
239239 int zero = 0 , one = 1 ;
240- cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ));
241- cudaMemcpyToSymbol (d_innerq_scale, scale, group_size * sizeof (float ));
242- cudaMemcpyToSymbol (d_innerq_scale_inv, scale_inv, group_size * sizeof (float ));
243- cudaDeviceSynchronize (); // ensure scales are visible before activating
244- cudaMemcpyToSymbol (d_innerq_active, &one, sizeof (int ));
240+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ) ));
241+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_scale, scale, group_size * sizeof (float ) ));
242+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_scale_inv, scale_inv, group_size * sizeof (float ) ));
243+ CUDA_CHECK ( cudaDeviceSynchronize () ); // ensure scales are visible before activating
244+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_active, &one, sizeof (int ) ));
245245
246246 innerq_enabled = 2 ; // active
247247
@@ -272,15 +272,15 @@ static void turbo_innerq_check_finalize(int group_size, int64_t ne00) {
272272 __func__, (long long )ne00, group_size);
273273 innerq_enabled = 0 ;
274274 int zero = 0 ;
275- cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ));
275+ CUDA_CHECK ( cudaMemcpyToSymbol (d_innerq_calibrating, &zero, sizeof (int ) ));
276276 }
277277 return ;
278278 }
279279
280280 // Check if calibration is complete
281281 if (innerq_enabled == 1 ) {
282282 int count = 0 ;
283- cudaMemcpyFromSymbol (&count, d_innerq_count, sizeof (int ));
283+ CUDA_CHECK ( cudaMemcpyFromSymbol (&count, d_innerq_count, sizeof (int ) ));
284284 if (count >= innerq_target_tokens) {
285285 turbo_innerq_finalize (group_size);
286286 }
0 commit comments