Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6191ee2
fix: Qwen 3.5 memory estimation
giladgd Mar 6, 2026
140d888
fix: bugs
giladgd Mar 6, 2026
25effb9
fix: grammar use with HarmonyChatWrapper
giladgd Mar 8, 2026
48e5ed4
fix: Qwen 3.5 function calling syntax detection
giladgd Mar 10, 2026
8b284f4
feat(`QwenChatWrapper`): support Qwen 3.5
giladgd Mar 10, 2026
ab65037
feat: automatic checkpoints for models that need it
giladgd Mar 12, 2026
07fa753
fix: add mistral think segment detection
giladgd Mar 12, 2026
8f7209e
feat: initial disk cache dir option for future optimizations (disable…
giladgd Mar 12, 2026
1bb4a78
test: Qwen 3.5 checkpoint use
giladgd Mar 12, 2026
e121326
fix: bugs
giladgd Mar 12, 2026
d9bc17b
test: fix test
giladgd Mar 12, 2026
2a93abe
feat(`inspect gpu` command): detect and report missing prebuilt binar…
giladgd Mar 13, 2026
d2d38d9
test: fix test
giladgd Mar 13, 2026
f2908ca
fix: checkpoints memory restoration
giladgd Mar 14, 2026
4664976
fix: bug
giladgd Mar 14, 2026
12ce825
chore: update modules
giladgd Mar 14, 2026
82fa13c
test: better crash logs on test process crash
giladgd Mar 14, 2026
89b293f
fix: compress excessively long segments from the current response on …
giladgd Mar 14, 2026
28a236b
fix: default thinking budget to 75% of the context size to avoid low …
giladgd Mar 14, 2026
8c9c764
chore: update `package-lock.json`
giladgd Mar 14, 2026
b4b978d
fix(`chat` command): default reasoning budget option value description
giladgd Mar 14, 2026
841b122
docs: update awesome list
giladgd Mar 14, 2026
1868c02
test: fix the CI test runner
giladgd Mar 14, 2026
1b33d2c
fix: bugs
giladgd Mar 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/guide/awesome.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";
* [CatAI](https://github.com/withcatai/catai) - a simplified AI assistant API for Node.js, with REST API support
<br /><DataBadge title="License" content="MIT"/>

* [Manzoni](https://manzoni.app/) ([GitHub](https://github.com/gems-platforms/manzoni-app)) - a text editor running local LLMs
<br /><DataBadge title="License" content="AGPL-3.0"/>
* [QMD](https://github.com/tobi/qmd) (Query Markup Documents) - an on-device search engine for your markdown notes, meeting transcripts, documentation, and knowledge bases. Search with keywords or natural language
<br /><DataBadge title="License" content="MIT"/>

* [Clippy](https://felixrieseberg.github.io/clippy/) ([GitHub](https://github.com/felixrieseberg/clippy)) - Clippy, resurrected from the 1990s, now with some AI
<br /><DataBadge title="License" content="MIT"/>
Expand All @@ -25,6 +25,8 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";

* [nutshell](https://withnutshell.com) - Private AI meeting notes processed completely on your device

* [Manzoni](https://manzoni.app/) ([GitHub](https://github.com/gems-platforms/manzoni-app)) - a text editor running local LLMs



<br />
Expand Down
168 changes: 168 additions & 0 deletions llama/addon/AddonContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,69 @@ Napi::Value AddonContext::SetLoras(const Napi::CallbackInfo& info) {
return info.Env().Undefined();
}

class RestoreCheckpointWorker : public Napi::AsyncWorker {
public:
AddonContext* context;
AddonContextSequenceCheckpoint* checkpoint;
std::size_t maxPosIndex;
bool restoreSuccess = false;

RestoreCheckpointWorker(const Napi::CallbackInfo& info, AddonContext* context, AddonContextSequenceCheckpoint* checkpoint, std::size_t maxPosIndex)
: Napi::AsyncWorker(info.Env(), "RestoreCheckpointWorker"),
context(context),
checkpoint(checkpoint),
maxPosIndex(maxPosIndex),
deferred(Napi::Promise::Deferred::New(info.Env())) {
context->Ref();
checkpoint->Ref();
}
~RestoreCheckpointWorker() {
context->Unref();
checkpoint->Unref();
}

Napi::Promise GetPromise() {
return deferred.Promise();
}

protected:
Napi::Promise::Deferred deferred;

void Execute() {
try {
std::lock_guard<std::mutex> lock(checkpoint->dataMutex);

std::size_t dataSize = checkpoint->data.size();
std::size_t restoreSize = llama_state_seq_set_data_ext(context->ctx, checkpoint->data.data(), dataSize, checkpoint->sequenceId, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
if (restoreSize == dataSize) {
restoreSuccess = (
llama_memory_seq_rm(llama_get_memory(context->ctx), checkpoint->sequenceId, maxPosIndex + 1, -1) &&
llama_memory_seq_pos_max(llama_get_memory(context->ctx), checkpoint->sequenceId) == maxPosIndex
);
}
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_state_seq_set_data_ext\"");
}
}
void OnOK() {
deferred.Resolve(Napi::Boolean::New(Env(), restoreSuccess));
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};

Napi::Value AddonContext::RestoreCheckpoint(const Napi::CallbackInfo& info) {
AddonContextSequenceCheckpoint* checkpoint = Napi::ObjectWrap<AddonContextSequenceCheckpoint>::Unwrap(info[0].As<Napi::Object>());
std::size_t maxPosIndex = info[1].As<Napi::Number>().Int32Value();

RestoreCheckpointWorker* worker = new RestoreCheckpointWorker(info, this, checkpoint, maxPosIndex);
worker->Queue();
return worker->GetPromise();
}

void AddonContext::init(Napi::Object exports) {
exports.Set(
"AddonContext",
Expand Down Expand Up @@ -992,8 +1055,113 @@ void AddonContext::init(Napi::Object exports) {
InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
InstanceMethod("setLoras", &AddonContext::SetLoras),
InstanceMethod("restoreCheckpoint", &AddonContext::RestoreCheckpoint),
InstanceMethod("dispose", &AddonContext::Dispose),
}
)
);
}

AddonContextSequenceCheckpoint::AddonContextSequenceCheckpoint(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContextSequenceCheckpoint>(info) {

}
AddonContextSequenceCheckpoint::~AddonContextSequenceCheckpoint() {
dispose();
}

class AddonContextSequenceCheckpointInitWorker : public Napi::AsyncWorker {
public:
AddonContextSequenceCheckpoint* checkpoint;
AddonContext* context;

AddonContextSequenceCheckpointInitWorker(const Napi::CallbackInfo& info, AddonContextSequenceCheckpoint* checkpoint, AddonContext* context)
: Napi::AsyncWorker(info.Env(), "AddonContextSequenceCheckpointInitWorker"),
checkpoint(checkpoint),
context(context),
deferred(Napi::Promise::Deferred::New(info.Env())) {
checkpoint->Ref();
context->Ref();
}
~AddonContextSequenceCheckpointInitWorker() {
checkpoint->Unref();
context->Unref();
}

Napi::Promise GetPromise() {
return deferred.Promise();
}

protected:
Napi::Promise::Deferred deferred;

void Execute() {
try {
checkpoint->minPos = llama_memory_seq_pos_min(llama_get_memory(context->ctx), checkpoint->sequenceId);
checkpoint->maxPos = llama_memory_seq_pos_max(llama_get_memory(context->ctx), checkpoint->sequenceId);
const size_t checkpointSize = llama_state_seq_get_size_ext(context->ctx, checkpoint->sequenceId, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);

checkpoint->data.resize(checkpointSize, 0);
llama_state_seq_get_data_ext(context->ctx, checkpoint->data.data(), checkpointSize, checkpoint->sequenceId, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
} catch (const std::exception& e) {
SetError(e.what());
} catch(...) {
SetError("Unknown error when calling \"llama_state_seq_get_data_ext\"");
}
}
void OnOK() {
deferred.Resolve(Env().Undefined());
}
void OnError(const Napi::Error& err) {
deferred.Reject(err.Value());
}
};

Napi::Value AddonContextSequenceCheckpoint::Init(const Napi::CallbackInfo& info) {
AddonContext * context = Napi::ObjectWrap<AddonContext>::Unwrap(info[0].As<Napi::Object>());
sequenceId = info[1].As<Napi::Number>().Int32Value();

AddonContextSequenceCheckpointInitWorker* worker = new AddonContextSequenceCheckpointInitWorker(info, this, context);
worker->Queue();
return worker->GetPromise();
}

Napi::Value AddonContextSequenceCheckpoint::Dispose(const Napi::CallbackInfo& info) {
dispose();
return info.Env().Undefined();
}

void AddonContextSequenceCheckpoint::dispose() {
std::lock_guard<std::mutex> lock(dataMutex);
data.clear();
data.resize(0);
}

Napi::Value AddonContextSequenceCheckpoint::GetSize(const Napi::CallbackInfo& info) {
return Napi::Number::New(info.Env(), data.size());
}

Napi::Value AddonContextSequenceCheckpoint::GetMinPos(const Napi::CallbackInfo& info) {
return Napi::Number::New(info.Env(), minPos);
}

Napi::Value AddonContextSequenceCheckpoint::GetMaxPos(const Napi::CallbackInfo& info) {
return Napi::Number::New(info.Env(), maxPos);
}

void AddonContextSequenceCheckpoint::init(Napi::Object exports) {
exports.Set(
"AddonContextSequenceCheckpoint",
DefineClass(
exports.Env(),
"AddonContextSequenceCheckpoint",
{
InstanceMethod("init", &AddonContextSequenceCheckpoint::Init),
InstanceMethod("dispose", &AddonContextSequenceCheckpoint::Dispose),

InstanceAccessor("size", &AddonContextSequenceCheckpoint::GetSize, nullptr),
InstanceAccessor("minPos", &AddonContextSequenceCheckpoint::GetMinPos, nullptr),
InstanceAccessor("maxPos", &AddonContextSequenceCheckpoint::GetMaxPos, nullptr),
}
)
);
}
27 changes: 27 additions & 0 deletions llama/addon/AddonContext.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#pragma once

#include <mutex>

#include "llama.h"
#include "napi.h"
#include "addonGlobals.h"
Expand Down Expand Up @@ -53,6 +56,30 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);

Napi::Value SetLoras(const Napi::CallbackInfo& info);
Napi::Value RestoreCheckpoint(const Napi::CallbackInfo& info);

static void init(Napi::Object exports);
};

class AddonContextSequenceCheckpoint : public Napi::ObjectWrap<AddonContextSequenceCheckpoint> {
public:
std::mutex dataMutex;
std::vector<uint8_t> data;
llama_seq_id sequenceId = 0;
std::size_t minPos = 0;
std::size_t maxPos = 0;

AddonContextSequenceCheckpoint(const Napi::CallbackInfo& info);
~AddonContextSequenceCheckpoint();

Napi::Value Init(const Napi::CallbackInfo& info);
Napi::Value Dispose(const Napi::CallbackInfo& info);

void dispose();

Napi::Value GetSize(const Napi::CallbackInfo& info);
Napi::Value GetMinPos(const Napi::CallbackInfo& info);
Napi::Value GetMaxPos(const Napi::CallbackInfo& info);

static void init(Napi::Object exports);
};
1 change: 1 addition & 0 deletions llama/addon/addon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
AddonGrammar::init(exports);
AddonGrammarEvaluationState::init(exports);
AddonContext::init(exports);
AddonContextSequenceCheckpoint::init(exports);
AddonSampler::init(exports);

llama_log_set(addonLlamaCppLogCallback, nullptr);
Expand Down
Loading
Loading