@@ -4859,6 +4859,9 @@ export class LlamaWebGpuBridge {
48594859 }
48604860
48614861 async createCompletion ( prompt , options = { } ) {
4862+ const isWarmup = options ?. warmup === true ;
4863+ const hasRetriedEmptyMultimodal =
4864+ options ?. __llamadartEmptyRetryAttempted === true ;
48624865 const workerAllowed = this . _config ?. disableWorker !== true ;
48634866 if ( this . _hasMediaParts ( options ) && workerAllowed ) {
48644867 const hasWorkerFallback =
@@ -4876,6 +4879,13 @@ export class LlamaWebGpuBridge {
48764879 await this . _ensureWorkerMultimodalCpuMode ( ) ;
48774880 } catch ( error ) {
48784881 const reason = serializeWorkerError ( error ) ;
4882+ if ( isWarmup ) {
4883+ this . _emitBridgeWarn (
4884+ `llamadart: multimodal warmup skipped after worker setup issue (${ reason } ).` ,
4885+ ) ;
4886+ return '' ;
4887+ }
4888+
48794889 this . _emitBridgeWarn (
48804890 `llamadart: unable to prepare multimodal worker CPU mode (${ reason } ).` ,
48814891 ) ;
@@ -4913,6 +4923,7 @@ export class LlamaWebGpuBridge {
49134923 const workerOptions = { ...options } ;
49144924 delete workerOptions . onToken ;
49154925 delete workerOptions . signal ;
4926+ delete workerOptions . __llamadartEmptyRetryAttempted ;
49164927
49174928 const stallTimeoutMs = this . _workerCompletionStallTimeoutMs ( options ) ;
49184929 let timeoutHandle = null ;
@@ -4978,14 +4989,32 @@ export class LlamaWebGpuBridge {
49784989
49794990 if (
49804991 this . _hasMediaParts ( options )
4992+ && ! isWarmup
49814993 && ! sawWorkerTokenEvent
49824994 && String ( workerResult || '' ) . trim ( ) . length == 0
49834995 ) {
4984- const emptyResponseError = new Error (
4985- 'Multimodal worker produced empty response without token events.' ,
4996+ this . _emitBridgeWarn (
4997+ 'llamadart: multimodal worker produced empty response without token events.' ,
49864998 ) ;
4987- emptyResponseError . llamadartEmptyMultimodalResponse = true ;
4988- throw emptyResponseError ;
4999+
5000+ if ( ! hasRetriedEmptyMultimodal ) {
5001+ this . _emitBridgeWarn (
5002+ 'llamadart: retrying multimodal worker once after empty response.' ,
5003+ ) ;
5004+ try {
5005+ await this . _replaceWorkerProxyForMultimodalCpuMode ( ) ;
5006+ await this . _ensureWorkerMultimodalCpuMode ( ) ;
5007+ } catch ( retrySetupError ) {
5008+ this . _emitBridgeWarn (
5009+ `llamadart: multimodal empty-response retry setup failed (${ serializeWorkerError ( retrySetupError ) } ).` ,
5010+ ) ;
5011+ }
5012+
5013+ return this . createCompletion ( prompt , {
5014+ ...options ,
5015+ __llamadartEmptyRetryAttempted : true ,
5016+ } ) ;
5017+ }
49895018 }
49905019
49915020 return workerResult ;
@@ -4996,6 +5025,13 @@ export class LlamaWebGpuBridge {
49965025 if ( this . _hasMediaParts ( options ) ) {
49975026 const reason = serializeWorkerError ( error ) ;
49985027
5028+ if ( isWarmup ) {
5029+ this . _emitBridgeWarn (
5030+ `llamadart: multimodal warmup skipped after worker request issue (${ reason } ).` ,
5031+ ) ;
5032+ return '' ;
5033+ }
5034+
49995035 if ( this . _isCpuModelMode ( ) ) {
50005036 this . _emitBridgeWarn (
50015037 `llamadart: CPU multimodal worker request failed (${ reason } ); skipping main-thread fallback.` ,
0 commit comments