Skip to content

Commit 0246815

Browse files
authored
Merge pull request #29 from vmlinuzx/feat/local-llamacpp-support
Add llama.cpp support for local OpenAI-compatible LLM backends
2 parents add231b + 847b54b commit 0246815

File tree

4 files changed

+171
-23
lines changed

4 files changed

+171
-23
lines changed

apps/webuiapps/src/components/ChatPanel/index.tsx

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ interface CharacterDisplayMessage extends DisplayMessage {
9595
toolCalls?: string[]; // collapsed tool call summaries
9696
}
9797

98+
function hasUsableLLMConfig(config: LLMConfig | null | undefined): config is LLMConfig {
99+
return !!config?.baseUrl.trim() && !!config.model.trim();
100+
}
101+
98102
// ---------------------------------------------------------------------------
99103
// Tool definitions for character system
100104
// ---------------------------------------------------------------------------
@@ -649,7 +653,7 @@ const ChatPanel: React.FC<{
649653
while (actionQueueRef.current.length > 0) {
650654
const actionMsg = actionQueueRef.current.shift()!;
651655
const cfg = configRef.current;
652-
if (!cfg?.apiKey) break;
656+
if (!hasUsableLLMConfig(cfg)) break;
653657

654658
const newHistory: ChatMessage[] = [
655659
...chatHistoryRef.current,
@@ -672,7 +676,7 @@ const ChatPanel: React.FC<{
672676
useEffect(() => {
673677
const unsubscribe = onUserAction((event: unknown) => {
674678
const cfg = configRef.current;
675-
if (!cfg?.apiKey) return;
679+
if (!hasUsableLLMConfig(cfg)) return;
676680

677681
const evt = event as {
678682
app_action?: {
@@ -704,7 +708,7 @@ const ChatPanel: React.FC<{
704708
async (overrideText?: string) => {
705709
const text = overrideText ?? input.trim();
706710
if (!text || loading) return;
707-
if (!config?.apiKey) {
711+
if (!hasUsableLLMConfig(config)) {
708712
setShowSettings(true);
709713
return;
710714
}
@@ -1102,9 +1106,9 @@ const ChatPanel: React.FC<{
11021106
<div className={styles.messages} data-testid="chat-messages">
11031107
{messages.length === 0 && (
11041108
<div className={styles.emptyState}>
1105-
{config?.apiKey
1109+
{hasUsableLLMConfig(config)
11061110
? `${character.character_name} is ready to chat...`
1107-
: 'Click the gear icon to configure your LLM API key'}
1111+
: 'Click the gear icon to configure your LLM connection'}
11081112
</div>
11091113
)}
11101114
{messages.map((msg) => (
@@ -1287,6 +1291,7 @@ const SettingsModal: React.FC<{
12871291
<option value="openai">OpenAI</option>
12881292
<option value="anthropic">Anthropic</option>
12891293
<option value="deepseek">DeepSeek</option>
1294+
<option value="llama.cpp">llama.cpp</option>
12901295
<option value="minimax">MiniMax</option>
12911296
<option value="z.ai">Z.ai</option>
12921297
<option value="kimi">Kimi</option>
@@ -1301,7 +1306,7 @@ const SettingsModal: React.FC<{
13011306
type="password"
13021307
value={apiKey}
13031308
onChange={(e) => setApiKey(e.target.value)}
1304-
placeholder="sk-..."
1309+
placeholder="Optional for local servers"
13051310
/>
13061311
</div>
13071312

apps/webuiapps/src/lib/__tests__/llmClient.test.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ const MOCK_ANTHROPIC_CONFIG: LLMConfig = {
3636
model: 'claude-opus-4-6',
3737
};
3838

39+
const MOCK_LLAMACPP_CONFIG: LLMConfig = {
40+
provider: 'llama.cpp',
41+
apiKey: '',
42+
baseUrl: 'http://athena:8081',
43+
model: 'Qwen_Qwen3.5-35B-A3B',
44+
};
45+
3946
const MOCK_MESSAGES: ChatMessage[] = [{ role: 'user', content: 'Hello' }];
4047

4148
const MOCK_TOOLS: ToolDef[] = [
@@ -114,6 +121,13 @@ describe('getDefaultProviderConfig()', () => {
114121
expect(cfg.model).toBe('deepseek-chat');
115122
});
116123

124+
it('returns correct defaults for llama.cpp', () => {
125+
const cfg = getDefaultProviderConfig('llama.cpp');
126+
expect(cfg.provider).toBe('llama.cpp');
127+
expect(cfg.baseUrl).toBe('http://localhost:8080');
128+
expect(cfg.model).toBe('local-model');
129+
});
130+
117131
it('returns correct defaults for minimax', () => {
118132
const cfg = getDefaultProviderConfig('minimax');
119133
expect(cfg.provider).toBe('minimax');
@@ -422,6 +436,51 @@ describe('chat()', () => {
422436
});
423437
});
424438

439+
describe('llama.cpp provider (OpenAI-compatible)', () => {
440+
it('routes to OpenAI path without requiring an API key', async () => {
441+
const mockFetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse('Local response'));
442+
globalThis.fetch = mockFetch;
443+
444+
const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG);
445+
446+
expect(result.content).toBe('Local response');
447+
const headers = mockFetch.mock.calls[0][1].headers as Record<string, string>;
448+
expect(headers['Authorization']).toBeUndefined();
449+
expect(headers['X-LLM-Target-URL']).toBe('http://athena:8081/v1/chat/completions');
450+
});
451+
452+
it('strips Qwen-style think tags from assistant content', async () => {
453+
const mockFetch = vi
454+
.fn()
455+
.mockResolvedValueOnce(makeOpenAIResponse('<think>hidden reasoning</think>Hello there'));
456+
globalThis.fetch = mockFetch;
457+
458+
const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG);
459+
460+
expect(result.content).toBe('Hello there');
461+
});
462+
463+
it('converts inline XML-style tool call content into structured tool calls', async () => {
464+
const inlineToolContent = `<tool_call>
465+
respond_to_user
466+
<arg_key>character_expression</arg_key>
467+
<arg_value>{"content":"What? Did I catch you off guard?","emotion":"happy"}</arg_value>
468+
<arg_key>user_interaction</arg_key>
469+
<arg_value>{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]}</arg_value>
470+
</tool_call>`;
471+
globalThis.fetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse(inlineToolContent));
472+
473+
const result = await chat(MOCK_MESSAGES, MOCK_TOOLS, MOCK_LLAMACPP_CONFIG);
474+
475+
expect(result.content).toBe('');
476+
expect(result.toolCalls).toHaveLength(1);
477+
expect(result.toolCalls[0].function.name).toBe('respond_to_user');
478+
expect(result.toolCalls[0].function.arguments).toBe(
479+
'{"character_expression":{"content":"What? Did I catch you off guard?","emotion":"happy"},"user_interaction":{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]}}',
480+
);
481+
});
482+
});
483+
425484
describe('Anthropic provider', () => {
426485
it('uses x-api-key and anthropic-version headers', async () => {
427486
const mockFetch = vi.fn().mockResolvedValueOnce(makeAnthropicResponse('Anthropic response'));

apps/webuiapps/src/lib/llmClient.ts

Lines changed: 93 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* Minimal LLM API Client
3-
* Supports OpenAI / DeepSeek / Anthropic formats
3+
* Supports OpenAI-compatible / Anthropic-compatible formats
44
*/
55

66
import type { LLMConfig } from './llmModels';
@@ -88,6 +88,73 @@ interface LLMResponse {
8888
toolCalls: ToolCall[];
8989
}
9090

91+
interface InlineToolParseResult {
92+
content: string;
93+
toolCalls: ToolCall[];
94+
}
95+
96+
function stripThinkTags(content: string): string {
97+
const withoutBlocks = content
98+
.replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, '')
99+
.replace(/<\/?think\b[^>]*>/gi, '');
100+
return withoutBlocks === content ? content : withoutBlocks.trim();
101+
}
102+
103+
function parseInlineArgValue(rawValue: string): unknown {
104+
const trimmed = rawValue.trim();
105+
if (!trimmed) return '';
106+
try {
107+
return JSON.parse(trimmed);
108+
} catch {
109+
return trimmed;
110+
}
111+
}
112+
113+
function extractInlineToolCalls(rawContent: string): InlineToolParseResult {
114+
const content = stripThinkTags(rawContent);
115+
if (!content.includes('<arg_key>') || !content.includes('<arg_value>')) {
116+
return { content, toolCalls: [] };
117+
}
118+
119+
const blockRegex = /(?:<tool_call>\s*|\()([a-zA-Z0-9_.-]+)\s*([\s\S]*?)<\/tool_call>/g;
120+
const toolCalls: ToolCall[] = [];
121+
let cleanedContent = content;
122+
let matchIndex = 0;
123+
124+
for (const match of content.matchAll(blockRegex)) {
125+
const toolName = match[1]?.trim();
126+
const body = match[2] ?? '';
127+
if (!toolName) continue;
128+
129+
const args: Record<string, unknown> = {};
130+
const pairRegex =
131+
/<arg_key>\s*([\s\S]*?)\s*<\/arg_key>\s*<arg_value>\s*([\s\S]*?)\s*<\/arg_value>/g;
132+
133+
for (const pair of body.matchAll(pairRegex)) {
134+
const key = pair[1]?.trim();
135+
if (!key) continue;
136+
args[key] = parseInlineArgValue(pair[2] ?? '');
137+
}
138+
139+
if (Object.keys(args).length === 0) continue;
140+
141+
toolCalls.push({
142+
id: `inline_tool_${matchIndex++}`,
143+
type: 'function',
144+
function: {
145+
name: toolName,
146+
arguments: JSON.stringify(args),
147+
},
148+
});
149+
cleanedContent = cleanedContent.replace(match[0], '');
150+
}
151+
152+
return {
153+
content: cleanedContent.trim(),
154+
toolCalls,
155+
};
156+
}
157+
91158
function hasVersionSuffix(url: string): boolean {
92159
return /\/v\d+\/?$/.test(url);
93160
}
@@ -162,14 +229,17 @@ async function chatOpenAI(
162229
messageCount: messages.length,
163230
toolCount: tools.length,
164231
});
232+
const headers: Record<string, string> = {
233+
'Content-Type': 'application/json',
234+
'X-LLM-Target-URL': targetUrl,
235+
...parseCustomHeaders(config.customHeaders),
236+
};
237+
if (config.apiKey.trim()) {
238+
headers.Authorization = `Bearer ${config.apiKey}`;
239+
}
165240
const res = await fetch('/api/llm-proxy', {
166241
method: 'POST',
167-
headers: {
168-
'Content-Type': 'application/json',
169-
Authorization: `Bearer ${config.apiKey}`,
170-
'X-LLM-Target-URL': targetUrl,
171-
...parseCustomHeaders(config.customHeaders),
172-
},
242+
headers,
173243
body: JSON.stringify(body),
174244
});
175245

@@ -183,7 +253,8 @@ async function chatOpenAI(
183253

184254
const data = JSON.parse(text);
185255
const choice = data.choices?.[0]?.message;
186-
const toolCalls = choice?.tool_calls || [];
256+
const parsedInline = extractInlineToolCalls(choice?.content || '');
257+
const toolCalls = choice?.tool_calls?.length ? choice.tool_calls : parsedInline.toolCalls;
187258
const calledNames = toolCalls
188259
.map((tc: { function?: { name?: string } }) => tc.function?.name)
189260
.filter(Boolean);
@@ -195,7 +266,9 @@ async function chatOpenAI(
195266
calledNames,
196267
);
197268
return {
198-
content: choice?.content || '',
269+
content: choice?.tool_calls?.length
270+
? stripThinkTags(choice?.content || '')
271+
: parsedInline.content,
199272
toolCalls,
200273
};
201274
}
@@ -267,15 +340,18 @@ async function chatAnthropic(
267340
messageCount: anthropicMessages.length,
268341
toolCount: anthropicTools.length,
269342
});
343+
const headers: Record<string, string> = {
344+
'Content-Type': 'application/json',
345+
'anthropic-version': '2023-06-01',
346+
'X-LLM-Target-URL': targetUrl,
347+
...parseCustomHeaders(config.customHeaders),
348+
};
349+
if (config.apiKey.trim()) {
350+
headers['x-api-key'] = config.apiKey;
351+
}
270352
const res = await fetch('/api/llm-proxy', {
271353
method: 'POST',
272-
headers: {
273-
'Content-Type': 'application/json',
274-
'x-api-key': config.apiKey,
275-
'anthropic-version': '2023-06-01',
276-
'X-LLM-Target-URL': targetUrl,
277-
...parseCustomHeaders(config.customHeaders),
278-
},
354+
headers,
279355
body: JSON.stringify(body),
280356
});
281357

@@ -314,5 +390,5 @@ async function chatAnthropic(
314390
'calledNames=',
315391
calledNames,
316392
);
317-
return { content, toolCalls };
393+
return { content: stripThinkTags(content), toolCalls };
318394
}

apps/webuiapps/src/lib/llmModels.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ export type LLMProvider =
22
| 'openai'
33
| 'anthropic'
44
| 'deepseek'
5+
| 'llama.cpp'
56
| 'minimax'
67
| 'z.ai'
78
| 'kimi'
@@ -77,6 +78,13 @@ export const LLM_PROVIDER_CONFIGS: Record<LLMProvider, ProviderModelConfig> = {
7778
],
7879
},
7980

81+
'llama.cpp': {
82+
displayName: 'llama.cpp',
83+
baseUrl: 'http://localhost:8080',
84+
defaultModel: 'local-model',
85+
models: [],
86+
},
87+
8088
minimax: {
8189
displayName: 'MiniMax',
8290
baseUrl: 'https://api.minimax.io/anthropic/v1',

0 commit comments

Comments
 (0)