Skip to content

Commit 4baa480

Browse files
authored
fix: adapt to llama.cpp changes (#547)
* fix: adapt to `llama.cpp` changes * fix: change the level of common logs
1 parent 1997b4e commit 4baa480

File tree

6 files changed

+14
-38
lines changed

6 files changed

+14
-38
lines changed

llama/addon/AddonModel.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,13 @@ void AddonModel::dispose() {
362362
}
363363

364364
disposed = true;
365+
366+
if (data != nullptr) {
367+
auto currentData = data;
368+
data = nullptr;
369+
delete currentData;
370+
}
371+
365372
if (modelLoaded) {
366373
modelLoaded = false;
367374
llama_model_free(model);
@@ -370,12 +377,6 @@ void AddonModel::dispose() {
370377
loadedModelSize = 0;
371378
}
372379

373-
if (data != nullptr) {
374-
auto currentData = data;
375-
data = nullptr;
376-
delete currentData;
377-
}
378-
379380
if (hasAddonExportsRef) {
380381
addonExportsRef.Unref();
381382
hasAddonExportsRef = false;

llama/addon/AddonModelLora.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,8 @@ AddonModelLora::~AddonModelLora() {
5353

5454
void AddonModelLora::dispose(bool skipErase) {
5555
if (lora_adapter != nullptr) {
56-
auto loraAdapterToDispose = lora_adapter;
5756
lora_adapter = nullptr;
58-
llama_adapter_lora_free(loraAdapterToDispose);
59-
57+
6058
if (!skipErase && model->data != nullptr) {
6159
model->data->removeLora(this);
6260
}

src/bindings/Llama.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,10 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil
690690
return LlamaLogLevel.info;
691691
else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified"))
692692
return LlamaLogLevel.info;
693+
else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is enabled, disabling mmap"))
694+
return LlamaLogLevel.info;
695+
else if (level === LlamaLogLevel.warn && message.startsWith("llama_model_loader: direct I/O is not available, using mmap"))
696+
return LlamaLogLevel.info;
693697
else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU"))
694698
return LlamaLogLevel.info;
695699
else if (gpu === "metal" && level === LlamaLogLevel.warn && message.startsWith("ggml_metal_device_init: tensor API disabled for"))

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ export class LlamaContext {
6060
/** @internal */ private readonly _disposeAggregator = new AsyncDisposeAggregator();
6161
/** @internal */ private readonly _modelPreventDisposalHandle: DisposalPreventionHandle;
6262
/** @internal */ private readonly _loraAdapters = new Set<AddonModelLora>();
63-
/** @internal */ private readonly _gcRegistry: FinalizationRegistry<Set<AddonModelLora>>;
6463
/** @internal */ private _nextGeneratedSequenceId = 0;
6564
/** @internal */ private _dispatchDecodeScheduled = false;
6665
/** @internal */ private _batchDispatchPending = false;
@@ -146,30 +145,20 @@ export class LlamaContext {
146145
dispatchSchedule: batchingDispatchSchedule,
147146
itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
148147
};
149-
this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
150-
this._gcRegistry.register(this, this._loraAdapters);
151148

152149
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
153150
this._freeReservedThreads = this._freeReservedThreads.bind(this);
154151

155152
this._disposeAggregator.add(() => {
156153
this._disposed = true;
157154
});
158-
this._disposeAggregator.add(() => void this._gcRegistry.unregister(this));
159155
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
160156
this._disposeAggregator.add(this.onDispose.dispatchEvent);
161157
this._disposeAggregator.add(
162158
this.model.onDispose.createListener(
163159
disposeContextIfReferenced.bind(null, new WeakRef(this))
164160
)
165161
);
166-
this._disposeAggregator.add((): Promise<void> | void => {
167-
if (this._loraAdapters.size > 0) {
168-
const loraAdapters = new Set(this._loraAdapters);
169-
this._loraAdapters.clear();
170-
return this._model._removeLoraUsage(loraAdapters);
171-
}
172-
});
173162

174163
this._disposeAggregator.add(async () => {
175164
await this._backendContextDisposeGuard.acquireDisposeLock();

src/evaluator/LlamaContext/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ export type LlamaContextOptions = {
126126
* without the need for extensive retraining from scratch.
127127
*
128128
* If a string is provided, it will be treated as a path to a single LoRA adapter file.
129+
*
130+
* The adapters will be released from memory once the model (not just the context) is disposed.
129131
*/
130132
lora?: string | {
131133
adapters: Array<{

src/evaluator/LlamaModel/LlamaModel.ts

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,6 @@ export class LlamaModel {
275275
this._llamaPreventDisposalHandle.dispose();
276276
});
277277

278-
this._removeLoraUsage = this._removeLoraUsage.bind(this);
279-
280278
this.tokenize = this.tokenize.bind(this);
281279
this.detokenize = this.detokenize.bind(this);
282280
this.isSpecialToken = this.isSpecialToken.bind(this);
@@ -703,22 +701,6 @@ export class LlamaModel {
703701
});
704702
}
705703

706-
/** @internal */
707-
public async _removeLoraUsage(loraAdapters: Set<AddonModelLora>) {
708-
return await withLock([this._loraAdapters, "modify"], async () => {
709-
await Promise.all(
710-
[...loraAdapters].map(async (lora) => {
711-
lora.usages--;
712-
713-
if (lora.usages <= 0 && this._loraAdapters.get(lora.filePath) === lora) {
714-
this._loraAdapters.delete(lora.filePath);
715-
await lora.dispose();
716-
}
717-
})
718-
);
719-
});
720-
}
721-
722704
/** @internal */
723705
public static async _create(modelOptions: LlamaModelOptions, {
724706
_llama

0 commit comments

Comments
 (0)