@@ -9,6 +9,7 @@ import 'package:web/web.dart';
99
1010import '../../core/models/chat/content_part.dart' ;
1111import '../../core/models/config/gpu_backend.dart' ;
12+ import '../../core/models/config/llama_cpp_param_values.dart' ;
1213import '../../core/models/config/log_level.dart' ;
1314import '../../core/models/inference/generation_params.dart' ;
1415import '../../core/models/inference/model_params.dart' ;
@@ -615,6 +616,20 @@ class WebGpuLlamaBackend
615616 return (nBatch: tunedBatch, nUbatch: tunedUbatch);
616617 }
617618
619+ int _webGpuFlashAttentionValue (ModelParams params) {
620+ return llamaFlashAttentionTypeValueFor (
621+ resolveFlashAttention (
622+ requested: params.flashAttention,
623+ cacheTypeK: params.cacheTypeK,
624+ cacheTypeV: params.cacheTypeV,
625+ ),
626+ );
627+ }
628+
629+ bool ? _webGpuKvUnifiedValue (ModelParams params) {
630+ return params.kvUnified ?? (params.maxParallelSequences > 1 ? true : null );
631+ }
632+
618633 int _resolveSafeRequestedGpuLayers ({
619634 required String url,
620635 required ModelParams params,
@@ -815,6 +830,7 @@ class WebGpuLlamaBackend
815830 ModelParams params, {
816831 Function (double progress)? onProgress,
817832 }) async {
833+ params.validate ();
818834 _preferMemory64Override = null ;
819835 _forceRemoteFetchBackendOverride = null ;
820836
@@ -931,6 +947,15 @@ class WebGpuLlamaBackend
931947 ? params.microBatchSize
932948 : batchTuning.nUbatch,
933949 nGpuLayers: attempt.gpuLayers,
950+ nSeqMax: math.max (1 , params.maxParallelSequences),
951+ flashAttention: _webGpuFlashAttentionValue (params),
952+ cacheTypeK: ggmlTypeValueFor (params.cacheTypeK),
953+ cacheTypeV: ggmlTypeValueFor (params.cacheTypeV),
954+ kvUnified: _webGpuKvUnifiedValue (params),
955+ ropeFrequencyBase: params.ropeFrequencyBase,
956+ ropeFrequencyScale: params.ropeFrequencyScale,
957+ splitMode: params.splitMode.llamaCppValue,
958+ mainGpu: params.mainGpu,
934959 useCache: true ,
935960 forceRemoteFetchBackend: forceRemoteFetchBackend,
936961 remoteFetchChunkBytes: remoteFetchChunkBytesOverride,
0 commit comments