Merge pull request #29 from vmlinuzx/feat/local-llamacpp-support

MinimaxLanbo · web-flow · commit 02468154c4d9 · 2026-03-29T15:04:51.000+08:00
Add llama.cpp support for local OpenAI-compatible LLM backends
diff --git a/apps/webuiapps/src/components/ChatPanel/index.tsx b/apps/webuiapps/src/components/ChatPanel/index.tsx
@@ -95,6 +95,10 @@ interface CharacterDisplayMessage extends DisplayMessage {
   toolCalls?: string[]; // collapsed tool call summaries
 }
 
+function hasUsableLLMConfig(config: LLMConfig | null | undefined): config is LLMConfig {
+  return !!config?.baseUrl.trim() && !!config.model.trim();
+}
+
 // ---------------------------------------------------------------------------
 // Tool definitions for character system
 // ---------------------------------------------------------------------------
@@ -649,7 +653,7 @@ const ChatPanel: React.FC<{
     while (actionQueueRef.current.length > 0) {
       const actionMsg = actionQueueRef.current.shift()!;
       const cfg = configRef.current;
-      if (!cfg?.apiKey) break;
+      if (!hasUsableLLMConfig(cfg)) break;
 
       const newHistory: ChatMessage[] = [
         ...chatHistoryRef.current,
@@ -672,7 +676,7 @@ const ChatPanel: React.FC<{
   useEffect(() => {
     const unsubscribe = onUserAction((event: unknown) => {
       const cfg = configRef.current;
-      if (!cfg?.apiKey) return;
+      if (!hasUsableLLMConfig(cfg)) return;
 
       const evt = event as {
         app_action?: {
@@ -704,7 +708,7 @@ const ChatPanel: React.FC<{
     async (overrideText?: string) => {
       const text = overrideText ?? input.trim();
       if (!text || loading) return;
-      if (!config?.apiKey) {
+      if (!hasUsableLLMConfig(config)) {
         setShowSettings(true);
         return;
       }
@@ -1102,9 +1106,9 @@ const ChatPanel: React.FC<{
           <div className={styles.messages} data-testid="chat-messages">
             {messages.length === 0 && (
               <div className={styles.emptyState}>
-                {config?.apiKey
+                {hasUsableLLMConfig(config)
                   ? `${character.character_name} is ready to chat...`
-                  : 'Click the gear icon to configure your LLM API key'}
+                  : 'Click the gear icon to configure your LLM connection'}
               </div>
             )}
             {messages.map((msg) => (
@@ -1287,6 +1291,7 @@ const SettingsModal: React.FC<{
             <option value="openai">OpenAI</option>
             <option value="anthropic">Anthropic</option>
             <option value="deepseek">DeepSeek</option>
+            <option value="llama.cpp">llama.cpp</option>
             <option value="minimax">MiniMax</option>
             <option value="z.ai">Z.ai</option>
             <option value="kimi">Kimi</option>
@@ -1301,7 +1306,7 @@ const SettingsModal: React.FC<{
             type="password"
             value={apiKey}
             onChange={(e) => setApiKey(e.target.value)}
-            placeholder="sk-..."
+            placeholder="Optional for local servers"
           />
         </div>
 
diff --git a/apps/webuiapps/src/lib/__tests__/llmClient.test.ts b/apps/webuiapps/src/lib/__tests__/llmClient.test.ts
@@ -36,6 +36,13 @@ const MOCK_ANTHROPIC_CONFIG: LLMConfig = {
   model: 'claude-opus-4-6',
 };
 
+const MOCK_LLAMACPP_CONFIG: LLMConfig = {
+  provider: 'llama.cpp',
+  apiKey: '',
+  baseUrl: 'http://athena:8081',
+  model: 'Qwen_Qwen3.5-35B-A3B',
+};
+
 const MOCK_MESSAGES: ChatMessage[] = [{ role: 'user', content: 'Hello' }];
 
 const MOCK_TOOLS: ToolDef[] = [
@@ -114,6 +121,13 @@ describe('getDefaultProviderConfig()', () => {
     expect(cfg.model).toBe('deepseek-chat');
   });
 
+  it('returns correct defaults for llama.cpp', () => {
+    const cfg = getDefaultProviderConfig('llama.cpp');
+    expect(cfg.provider).toBe('llama.cpp');
+    expect(cfg.baseUrl).toBe('http://localhost:8080');
+    expect(cfg.model).toBe('local-model');
+  });
+
   it('returns correct defaults for minimax', () => {
     const cfg = getDefaultProviderConfig('minimax');
     expect(cfg.provider).toBe('minimax');
@@ -422,6 +436,51 @@ describe('chat()', () => {
     });
   });
 
+  describe('llama.cpp provider (OpenAI-compatible)', () => {
+    it('routes to OpenAI path without requiring an API key', async () => {
+      const mockFetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse('Local response'));
+      globalThis.fetch = mockFetch;
+
+      const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG);
+
+      expect(result.content).toBe('Local response');
+      const headers = mockFetch.mock.calls[0][1].headers as Record<string, string>;
+      expect(headers['Authorization']).toBeUndefined();
+      expect(headers['X-LLM-Target-URL']).toBe('http://athena:8081/v1/chat/completions');
+    });
+
+    it('strips Qwen-style think tags from assistant content', async () => {
+      const mockFetch = vi
+        .fn()
+        .mockResolvedValueOnce(makeOpenAIResponse('<think>hidden reasoning</think>Hello there'));
+      globalThis.fetch = mockFetch;
+
+      const result = await chat(MOCK_MESSAGES, [], MOCK_LLAMACPP_CONFIG);
+
+      expect(result.content).toBe('Hello there');
+    });
+
+    it('converts inline XML-style tool call content into structured tool calls', async () => {
+      const inlineToolContent = `<tool_call>
+respond_to_user
+<arg_key>character_expression</arg_key>
+<arg_value>{"content":"What? Did I catch you off guard?","emotion":"happy"}</arg_value>
+<arg_key>user_interaction</arg_key>
+<arg_value>{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]}</arg_value>
+</tool_call>`;
+      globalThis.fetch = vi.fn().mockResolvedValueOnce(makeOpenAIResponse(inlineToolContent));
+
+      const result = await chat(MOCK_MESSAGES, MOCK_TOOLS, MOCK_LLAMACPP_CONFIG);
+
+      expect(result.content).toBe('');
+      expect(result.toolCalls).toHaveLength(1);
+      expect(result.toolCalls[0].function.name).toBe('respond_to_user');
+      expect(result.toolCalls[0].function.arguments).toBe(
+        '{"character_expression":{"content":"What? Did I catch you off guard?","emotion":"happy"},"user_interaction":{"suggested_replies":["Just hanging around","What reunion?","Tell me more"]}}',
+      );
+    });
+  });
+
   describe('Anthropic provider', () => {
     it('uses x-api-key and anthropic-version headers', async () => {
       const mockFetch = vi.fn().mockResolvedValueOnce(makeAnthropicResponse('Anthropic response'));
diff --git a/apps/webuiapps/src/lib/llmClient.ts b/apps/webuiapps/src/lib/llmClient.ts
@@ -1,6 +1,6 @@
 /**
  * Minimal LLM API Client
- * Supports OpenAI / DeepSeek / Anthropic formats
+ * Supports OpenAI-compatible / Anthropic-compatible formats
  */
 
 import type { LLMConfig } from './llmModels';
@@ -88,6 +88,73 @@ interface LLMResponse {
   toolCalls: ToolCall[];
 }
 
+interface InlineToolParseResult {
+  content: string;
+  toolCalls: ToolCall[];
+}
+
+function stripThinkTags(content: string): string {
+  const withoutBlocks = content
+    .replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, '')
+    .replace(/<\/?think\b[^>]*>/gi, '');
+  return withoutBlocks === content ? content : withoutBlocks.trim();
+}
+
+function parseInlineArgValue(rawValue: string): unknown {
+  const trimmed = rawValue.trim();
+  if (!trimmed) return '';
+  try {
+    return JSON.parse(trimmed);
+  } catch {
+    return trimmed;
+  }
+}
+
+function extractInlineToolCalls(rawContent: string): InlineToolParseResult {
+  const content = stripThinkTags(rawContent);
+  if (!content.includes('<arg_key>') || !content.includes('<arg_value>')) {
+    return { content, toolCalls: [] };
+  }
+
+  const blockRegex = /(?:<tool_call>\s*|\()([a-zA-Z0-9_.-]+)\s*([\s\S]*?)<\/tool_call>/g;
+  const toolCalls: ToolCall[] = [];
+  let cleanedContent = content;
+  let matchIndex = 0;
+
+  for (const match of content.matchAll(blockRegex)) {
+    const toolName = match[1]?.trim();
+    const body = match[2] ?? '';
+    if (!toolName) continue;
+
+    const args: Record<string, unknown> = {};
+    const pairRegex =
+      /<arg_key>\s*([\s\S]*?)\s*<\/arg_key>\s*<arg_value>\s*([\s\S]*?)\s*<\/arg_value>/g;
+
+    for (const pair of body.matchAll(pairRegex)) {
+      const key = pair[1]?.trim();
+      if (!key) continue;
+      args[key] = parseInlineArgValue(pair[2] ?? '');
+    }
+
+    if (Object.keys(args).length === 0) continue;
+
+    toolCalls.push({
+      id: `inline_tool_${matchIndex++}`,
+      type: 'function',
+      function: {
+        name: toolName,
+        arguments: JSON.stringify(args),
+      },
+    });
+    cleanedContent = cleanedContent.replace(match[0], '');
+  }
+
+  return {
+    content: cleanedContent.trim(),
+    toolCalls,
+  };
+}
+
 function hasVersionSuffix(url: string): boolean {
   return /\/v\d+\/?$/.test(url);
 }
@@ -162,14 +229,17 @@ async function chatOpenAI(
     messageCount: messages.length,
     toolCount: tools.length,
   });
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    'X-LLM-Target-URL': targetUrl,
+    ...parseCustomHeaders(config.customHeaders),
+  };
+  if (config.apiKey.trim()) {
+    headers.Authorization = `Bearer ${config.apiKey}`;
+  }
   const res = await fetch('/api/llm-proxy', {
     method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      Authorization: `Bearer ${config.apiKey}`,
-      'X-LLM-Target-URL': targetUrl,
-      ...parseCustomHeaders(config.customHeaders),
-    },
+    headers,
     body: JSON.stringify(body),
   });
 
@@ -183,7 +253,8 @@ async function chatOpenAI(
 
   const data = JSON.parse(text);
   const choice = data.choices?.[0]?.message;
-  const toolCalls = choice?.tool_calls || [];
+  const parsedInline = extractInlineToolCalls(choice?.content || '');
+  const toolCalls = choice?.tool_calls?.length ? choice.tool_calls : parsedInline.toolCalls;
   const calledNames = toolCalls
     .map((tc: { function?: { name?: string } }) => tc.function?.name)
     .filter(Boolean);
@@ -195,7 +266,9 @@ async function chatOpenAI(
     calledNames,
   );
   return {
-    content: choice?.content || '',
+    content: choice?.tool_calls?.length
+      ? stripThinkTags(choice?.content || '')
+      : parsedInline.content,
     toolCalls,
   };
 }
@@ -267,15 +340,18 @@ async function chatAnthropic(
     messageCount: anthropicMessages.length,
     toolCount: anthropicTools.length,
   });
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    'anthropic-version': '2023-06-01',
+    'X-LLM-Target-URL': targetUrl,
+    ...parseCustomHeaders(config.customHeaders),
+  };
+  if (config.apiKey.trim()) {
+    headers['x-api-key'] = config.apiKey;
+  }
   const res = await fetch('/api/llm-proxy', {
     method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      'x-api-key': config.apiKey,
-      'anthropic-version': '2023-06-01',
-      'X-LLM-Target-URL': targetUrl,
-      ...parseCustomHeaders(config.customHeaders),
-    },
+    headers,
     body: JSON.stringify(body),
   });
 
@@ -314,5 +390,5 @@ async function chatAnthropic(
     'calledNames=',
     calledNames,
   );
-  return { content, toolCalls };
+  return { content: stripThinkTags(content), toolCalls };
 }
diff --git a/apps/webuiapps/src/lib/llmModels.ts b/apps/webuiapps/src/lib/llmModels.ts
@@ -2,6 +2,7 @@ export type LLMProvider =
   | 'openai'
   | 'anthropic'
   | 'deepseek'
+  | 'llama.cpp'
   | 'minimax'
   | 'z.ai'
   | 'kimi'
@@ -77,6 +78,13 @@ export const LLM_PROVIDER_CONFIGS: Record<LLMProvider, ProviderModelConfig> = {
     ],
   },
 
+  'llama.cpp': {
+    displayName: 'llama.cpp',
+    baseUrl: 'http://localhost:8080',
+    defaultModel: 'local-model',
+    models: [],
+  },
+
   minimax: {
     displayName: 'MiniMax',
     baseUrl: 'https://api.minimax.io/anthropic/v1',