|
1 | | -import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock} from "lifecycle-utils"; |
| 1 | +import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, Lock, withLock} from "lifecycle-utils"; |
2 | 2 | import {removeNullFields} from "../../utils/removeNullFields.js"; |
3 | 3 | import {Token} from "../../types.js"; |
4 | 4 | import {AddonContext, AddonModelLora, BatchLogitIndex} from "../../bindings/AddonTypes.js"; |
@@ -32,6 +32,10 @@ const defaultFailedCreationRemedy = { |
32 | 32 | } as const satisfies Required<LlamaContextOptions["failedCreationRemedy"]>; |
33 | 33 | const defaultEvaluationPriority: EvaluationPriority = 5; |
34 | 34 |
|
| 35 | +const decodeSyncWorkaround = { |
| 36 | + vulkanLock: {} |
| 37 | +}; |
| 38 | + |
35 | 39 | export class LlamaContext { |
36 | 40 | /** @internal */ public readonly _llama: Llama; |
37 | 41 | /** @internal */ public readonly _ctx: AddonContext; |
@@ -573,11 +577,17 @@ export class LlamaContext { |
573 | 577 | return; |
574 | 578 | } |
575 | 579 |
|
| 580 | + let decodeLock: Lock | undefined; |
| 581 | + // this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel |
| 582 | + if (this._llama.gpu === "vulkan") |
| 583 | + decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode"); |
| 584 | + |
576 | 585 | try { |
577 | 586 | await decodeTokenBatchItems(currentBatchItems, currentBatchSize); |
578 | 587 |
|
579 | 588 | shouldHaveAnotherLoop = this._queuedDecodes.length > 0; |
580 | 589 | } finally { |
| 590 | + decodeLock?.dispose(); |
581 | 591 | preventDisposalHandle.dispose(); |
582 | 592 | } |
583 | 593 | } |
|
0 commit comments