Skip to content

Commit cee42cb

Browse files
committed
fix(webgpu): avoid disruptive fallback on warmup and empty multimodal responses
1 parent 7eee056 commit cee42cb

1 file changed

Lines changed: 40 additions & 4 deletions

File tree

js/llama_webgpu_bridge.js

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4859,6 +4859,9 @@ export class LlamaWebGpuBridge {
48594859
}
48604860

48614861
async createCompletion(prompt, options = {}) {
4862+
const isWarmup = options?.warmup === true;
4863+
const hasRetriedEmptyMultimodal =
4864+
options?.__llamadartEmptyRetryAttempted === true;
48624865
const workerAllowed = this._config?.disableWorker !== true;
48634866
if (this._hasMediaParts(options) && workerAllowed) {
48644867
const hasWorkerFallback =
@@ -4876,6 +4879,13 @@ export class LlamaWebGpuBridge {
48764879
await this._ensureWorkerMultimodalCpuMode();
48774880
} catch (error) {
48784881
const reason = serializeWorkerError(error);
4882+
if (isWarmup) {
4883+
this._emitBridgeWarn(
4884+
`llamadart: multimodal warmup skipped after worker setup issue (${reason}).`,
4885+
);
4886+
return '';
4887+
}
4888+
48794889
this._emitBridgeWarn(
48804890
`llamadart: unable to prepare multimodal worker CPU mode (${reason}).`,
48814891
);
@@ -4913,6 +4923,7 @@ export class LlamaWebGpuBridge {
49134923
const workerOptions = { ...options };
49144924
delete workerOptions.onToken;
49154925
delete workerOptions.signal;
4926+
delete workerOptions.__llamadartEmptyRetryAttempted;
49164927

49174928
const stallTimeoutMs = this._workerCompletionStallTimeoutMs(options);
49184929
let timeoutHandle = null;
@@ -4978,14 +4989,32 @@ export class LlamaWebGpuBridge {
49784989

49794990
if (
49804991
this._hasMediaParts(options)
4992+
&& !isWarmup
49814993
&& !sawWorkerTokenEvent
49824994
&& String(workerResult || '').trim().length == 0
49834995
) {
4984-
const emptyResponseError = new Error(
4985-
'Multimodal worker produced empty response without token events.',
4996+
this._emitBridgeWarn(
4997+
'llamadart: multimodal worker produced empty response without token events.',
49864998
);
4987-
emptyResponseError.llamadartEmptyMultimodalResponse = true;
4988-
throw emptyResponseError;
4999+
5000+
if (!hasRetriedEmptyMultimodal) {
5001+
this._emitBridgeWarn(
5002+
'llamadart: retrying multimodal worker once after empty response.',
5003+
);
5004+
try {
5005+
await this._replaceWorkerProxyForMultimodalCpuMode();
5006+
await this._ensureWorkerMultimodalCpuMode();
5007+
} catch (retrySetupError) {
5008+
this._emitBridgeWarn(
5009+
`llamadart: multimodal empty-response retry setup failed (${serializeWorkerError(retrySetupError)}).`,
5010+
);
5011+
}
5012+
5013+
return this.createCompletion(prompt, {
5014+
...options,
5015+
__llamadartEmptyRetryAttempted: true,
5016+
});
5017+
}
49895018
}
49905019

49915020
return workerResult;
@@ -4996,6 +5025,13 @@ export class LlamaWebGpuBridge {
49965025
if (this._hasMediaParts(options)) {
49975026
const reason = serializeWorkerError(error);
49985027

5028+
if (isWarmup) {
5029+
this._emitBridgeWarn(
5030+
`llamadart: multimodal warmup skipped after worker request issue (${reason}).`,
5031+
);
5032+
return '';
5033+
}
5034+
49995035
if (this._isCpuModelMode()) {
50005036
this._emitBridgeWarn(
50015037
`llamadart: CPU multimodal worker request failed (${reason}); skipping main-thread fallback.`,

0 commit comments

Comments
 (0)