fix(deepseek): separate thinking process from response in --think mode (#1142)

kagura-agent · web-flow · commit 733ac0747de5 · 2026-04-22T18:03:37.000+08:00
* fix(deepseek): separate thinking process from response in --think mode (#1124) When --think is enabled, the response now includes separate fields: - response: clean final answer only - thinking: chain-of-thought reasoning content - thinking_time: time spent thinking (e.g. '1') Supports both English ('Thought for X seconds') and Chinese ('已思考（用时 X 秒）') thinking header patterns. Fixes #1124 * chore: regenerate cli-manifest.json * fix(deepseek): DOM-level think/response separation, dynamic columns Blocker 1: Replace fragile split(/\n\n+/) heuristic in parseThinkingResponse() with DOM-level extraction in waitForResponse(). The page evaluate now queries distinct DOM nodes (.ds-markdown--think vs .ds-markdown) for thinking and response content. The text-level parser falls back to treating everything after the header as thinking (no split), avoiding silent corruption of multi-paragraph content. Blocker 2: Remove static columns declaration from askCommand. The renderer infers columns from row keys, so non-think output only shows 'response' while think output shows all three columns. Tests added for multi-paragraph thinking, multi-paragraph answer, and non-think column regression guard. * chore: regenerate cli-manifest.json
diff --git a/cli-manifest.json b/cli-manifest.json
@@ -4408,9 +4408,6 @@
         "help": "Attach a file (PDF, image, text) with the prompt"
       }
     ],
-    "columns": [
-      "response"
-    ],
     "timeout": 180,
     "type": "js",
     "modulePath": "deepseek/ask.js",
diff --git a/clis/deepseek/ask.js b/clis/deepseek/ask.js
@@ -23,7 +23,7 @@ export const askCommand = cli({
         { name: 'search', type: 'boolean', default: false, help: 'Enable web search' },
         { name: 'file', help: 'Attach a file (PDF, image, text) with the prompt' },
     ],
-    columns: ['response'],
+    // columns omitted: derived from row keys so non-think output shows only 'response'
 
     func: async (page, kwargs) => {
         const prompt = kwargs.prompt;
@@ -71,11 +71,14 @@ export const askCommand = cli({
                 if (!String(err?.message || err).includes('Promise was collected')) throw err;
             }
             await page.wait(3);
-            const response = await waitForResponse(page, baseline, prompt, timeoutMs);
-            if (!response) {
+            const result = await waitForResponse(page, baseline, prompt, timeoutMs, wantThink);
+            if (!result) {
                 return [{ response: `[NO RESPONSE] No reply within ${kwargs.timeout}s.` }];
             }
-            return [{ response }];
+            if (wantThink && typeof result === 'object' && result.response !== undefined) {
+                return [result];
+            }
+            return [{ response: result }];
         }
 
         const baseline = await withRetry(() => getBubbleCount(page));
@@ -84,11 +87,14 @@ export const askCommand = cli({
             throw new CommandExecutionError(sendResult?.reason || 'Failed to send message');
         }
 
-        const response = await waitForResponse(page, baseline, prompt, timeoutMs);
-        if (!response) {
+        const result = await waitForResponse(page, baseline, prompt, timeoutMs, wantThink);
+        if (!result) {
             return [{ response: `[NO RESPONSE] No reply within ${kwargs.timeout}s.` }];
         }
 
-        return [{ response }];
+        if (wantThink && typeof result === 'object' && result.response !== undefined) {
+            return [result];
+        }
+        return [{ response: result }];
     },
 });
diff --git a/clis/deepseek/ask.test.js b/clis/deepseek/ask.test.js
@@ -69,7 +69,7 @@ describe('deepseek ask --file', () => {
     expect(rows).toEqual([{ response: 'new reply' }]);
     expect(mockGetBubbleCount).toHaveBeenCalledTimes(1);
     expect(mockSendWithFile).toHaveBeenCalledWith(page, './report.pdf', 'summarize this');
-    expect(mockWaitForResponse).toHaveBeenCalledWith(page, 7, 'summarize this', 120000);
+    expect(mockWaitForResponse).toHaveBeenCalledWith(page, 7, 'summarize this', 120000, false);
   });
 
   it('still fails when explicit instant model selection cannot be verified', async () => {
@@ -85,3 +85,81 @@ describe('deepseek ask --file', () => {
     })).rejects.toThrow(new CommandExecutionError('Could not switch to instant model'));
   });
 });
+
+describe('deepseek ask --think', () => {
+  const page = {
+    wait: vi.fn().mockResolvedValue(undefined),
+    goto: vi.fn().mockResolvedValue(undefined),
+  };
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockEnsureOnDeepSeek.mockResolvedValue(undefined);
+    mockSelectModel.mockResolvedValue({ ok: true, toggled: false });
+    mockSetFeature.mockResolvedValue({ ok: true, toggled: false });
+    mockSendMessage.mockResolvedValue({ ok: true });
+    mockGetBubbleCount.mockResolvedValue(5);
+  });
+
+  it('returns separate thinking and response fields when --think is enabled', async () => {
+    mockWaitForResponse.mockResolvedValue({
+      response: 'The answer is 42.',
+      thinking: 'Let me analyze this...',
+      thinking_time: '2.5',
+    });
+
+    const rows = await askCommand.func(page, {
+      prompt: 'what is the answer?',
+      timeout: 120,
+      new: false,
+      model: 'instant',
+      think: true,
+      search: false,
+    });
+
+    expect(rows).toEqual([{
+      response: 'The answer is 42.',
+      thinking: 'Let me analyze this...',
+      thinking_time: '2.5',
+    }]);
+    expect(mockWaitForResponse).toHaveBeenCalledWith(page, 5, 'what is the answer?', 120000, true);
+  });
+
+  it('returns plain response when --think is disabled', async () => {
+    mockWaitForResponse.mockResolvedValue('The answer is 42.');
+
+    const rows = await askCommand.func(page, {
+      prompt: 'what is the answer?',
+      timeout: 120,
+      new: false,
+      model: 'instant',
+      think: false,
+      search: false,
+    });
+
+    expect(rows).toEqual([{ response: 'The answer is 42.' }]);
+    expect(mockWaitForResponse).toHaveBeenCalledWith(page, 5, 'what is the answer?', 120000, false);
+  });
+
+  it('does not declare static columns (derived from row keys)', () => {
+    // columns should be undefined so the renderer infers from row keys,
+    // avoiding empty trailing columns on non-think output.
+    expect(askCommand.columns).toBeUndefined();
+  });
+
+  it('non-think rows only contain response key', async () => {
+    mockWaitForResponse.mockResolvedValue('Plain answer.');
+
+    const rows = await askCommand.func(page, {
+      prompt: 'hello',
+      timeout: 120,
+      new: false,
+      model: 'instant',
+      think: false,
+      search: false,
+    });
+
+    // Row keys drive rendered columns; no thinking/thinking_time present.
+    expect(Object.keys(rows[0])).toEqual(['response']);
+  });
+});
diff --git a/clis/deepseek/utils.js b/clis/deepseek/utils.js
@@ -97,7 +97,35 @@ export async function getBubbleCount(page) {
     return count || 0;
 }
 
-export async function waitForResponse(page, baselineCount, prompt, timeoutMs) {
+// Parse thinking response using text as a fallback when DOM-level extraction
+// is not available.  Does NOT split on \n\n — that heuristic silently corrupts
+// multi-paragraph thinking or multi-paragraph answers.  Instead, everything
+// after the header is treated as thinking content, and `response` stays empty
+// until the caller provides a DOM-separated answer.
+export function parseThinkingResponse(rawText) {
+    if (!rawText) return null;
+
+    // Match thinking header patterns: "Thought for X seconds" or "已思考（用时 X 秒）"
+    const thinkHeaderMatch = rawText.match(/^(Thought for ([\d.]+) seconds?|已思考（用时 ([\d.]+) 秒）)\s*/);
+
+    if (!thinkHeaderMatch) {
+        // No thinking section found, return plain response
+        return { response: rawText, thinking: null, thinking_time: null };
+    }
+
+    const thinkingTime = thinkHeaderMatch[2] || thinkHeaderMatch[3];
+    const afterHeader = rawText.slice(thinkHeaderMatch[0].length);
+
+    // Treat everything after the header as thinking.  The response will be
+    // populated by the DOM-level extraction in waitForResponse().
+    return {
+        response: '',
+        thinking: afterHeader.trim(),
+        thinking_time: thinkingTime,
+    };
+}
+
+export async function waitForResponse(page, baselineCount, prompt, timeoutMs, parseThinking = false) {
     const startTime = Date.now();
     let lastText = '';
     let stableCount = 0;
@@ -110,7 +138,51 @@ export async function waitForResponse(page, baselineCount, prompt, timeoutMs) {
             result = await page.evaluate(`(() => {
                 const bubbles = document.querySelectorAll('${MESSAGE_SELECTOR}');
                 const texts = Array.from(bubbles).map(b => (b.innerText || '').trim()).filter(Boolean);
-                return { count: texts.length, last: texts[texts.length - 1] || '' };
+                var last = texts[texts.length - 1] || '';
+
+                // DOM-level thinking/response separation.
+                // DeepSeek renders thinking in a collapsible container with a
+                // distinct class (e.g. .ds-markdown--think or similar) and the
+                // final answer in the main .ds-markdown region.  By querying
+                // these separately we avoid any text-heuristic split.
+                var thinkEl = null, answerEl = null, thinkTime = null;
+                if (${parseThinking} && bubbles.length > 0) {
+                    var lastBubble = bubbles[bubbles.length - 1];
+                    // Thinking container — DeepSeek uses various class names;
+                    // try common selectors.
+                    thinkEl = lastBubble.querySelector('.ds-markdown--think')
+                           || lastBubble.querySelector('[class*="think"]');
+                    // Final answer container — the main markdown block that is
+                    // NOT the thinking section.
+                    var markdownEls = lastBubble.querySelectorAll('.ds-markdown');
+                    for (var i = 0; i < markdownEls.length; i++) {
+                        if (markdownEls[i] !== thinkEl
+                            && !(thinkEl && thinkEl.contains(markdownEls[i]))
+                            && !markdownEls[i].classList.contains('ds-markdown--think')) {
+                            answerEl = markdownEls[i];
+                        }
+                    }
+                    // Thinking time from the toggle/header element
+                    var timeEl = lastBubble.querySelector('[class*="think"] ~ *')
+                              || lastBubble.querySelector('.ds-thinking-header');
+                    if (!timeEl) {
+                        // Fallback: parse from raw text header
+                        var m = last.match(/^(?:Thought for ([\\d.]+) seconds?|已思考（用时 ([\\d.]+) 秒）)/);
+                        if (m) thinkTime = m[1] || m[2];
+                    } else {
+                        var tm = (timeEl.textContent || '').match(/([\\d.]+)/);
+                        if (tm) thinkTime = tm[1];
+                    }
+                }
+
+                return {
+                    count: texts.length,
+                    last: last,
+                    // DOM-separated fields (null when not available)
+                    thinkText: thinkEl ? (thinkEl.innerText || '').trim() : null,
+                    answerText: answerEl ? (answerEl.innerText || '').trim() : null,
+                    thinkTime: thinkTime,
+                };
             })()`);
         } catch {
             continue;
@@ -122,14 +194,31 @@ export async function waitForResponse(page, baselineCount, prompt, timeoutMs) {
         if (candidate && result.count > baselineCount && candidate !== prompt.trim()) {
             if (candidate === lastText) {
                 stableCount++;
-                if (stableCount >= 3) return candidate;
+                if (stableCount >= 3) {
+                    if (parseThinking) {
+                        // Prefer DOM-level separation
+                        if (result.thinkText != null || result.answerText != null) {
+                            return {
+                                thinking: result.thinkText || '',
+                                response: result.answerText || '',
+                                thinking_time: result.thinkTime || null,
+                            };
+                        }
+                        // Fallback to text-header parsing (no \n\n split)
+                        return parseThinkingResponse(candidate);
+                    }
+                    return candidate;
+                }
             } else {
                 stableCount = 0;
             }
             lastText = candidate;
         }
     }
 
+    if (parseThinking && lastText) {
+        return parseThinkingResponse(lastText);
+    }
     return lastText || null;
 }
 
diff --git a/clis/deepseek/utils.test.js b/clis/deepseek/utils.test.js
@@ -2,7 +2,90 @@ import fs from 'node:fs';
 import os from 'node:os';
 import path from 'node:path';
 import { afterEach, describe, expect, it, vi } from 'vitest';
-import { selectModel, sendWithFile } from './utils.js';
+import { selectModel, sendWithFile, parseThinkingResponse } from './utils.js';
+
+describe('deepseek parseThinkingResponse', () => {
+  it('returns plain response when no thinking header is present', () => {
+    const rawText = 'This is a regular response without thinking.';
+    const result = parseThinkingResponse(rawText);
+
+    expect(result).toEqual({
+      response: rawText,
+      thinking: null,
+      thinking_time: null,
+    });
+  });
+
+  it('parses English thinking header — all content after header is thinking', () => {
+    const rawText = 'Thought for 3.5 seconds\n\nLet me analyze this problem...\nFirst, I need to consider X.\nThen, Y.\n\nThe answer is 42.';
+    const result = parseThinkingResponse(rawText);
+
+    // Text-level parser no longer splits on \n\n; everything after header is thinking.
+    // DOM-level extraction in waitForResponse() handles the actual separation.
+    expect(result).toEqual({
+      response: '',
+      thinking: 'Let me analyze this problem...\nFirst, I need to consider X.\nThen, Y.\n\nThe answer is 42.',
+      thinking_time: '3.5',
+    });
+  });
+
+  it('parses Chinese thinking header — all content after header is thinking', () => {
+    const rawText = '已思考（用时 2.3 秒）\n\n让我分析这个问题...\n首先需要考虑X。\n然后是Y。\n\n答案是42。';
+    const result = parseThinkingResponse(rawText);
+
+    expect(result).toEqual({
+      response: '',
+      thinking: '让我分析这个问题...\n首先需要考虑X。\n然后是Y。\n\n答案是42。',
+      thinking_time: '2.3',
+    });
+  });
+
+  it('multi-paragraph thinking without final answer is not corrupted', () => {
+    const rawText = 'Thought for 1.2 seconds\n\nFirst paragraph.\n\nSecond paragraph.';
+    const result = parseThinkingResponse(rawText);
+
+    // Both paragraphs must stay in thinking; response is empty.
+    expect(result).toEqual({
+      response: '',
+      thinking: 'First paragraph.\n\nSecond paragraph.',
+      thinking_time: '1.2',
+    });
+  });
+
+  it('multi-paragraph final answer is not split by text parser', () => {
+    const rawText = 'Thought for 3 seconds\n\nreasoning\n\nAnswer para 1.\n\nAnswer para 2.';
+    const result = parseThinkingResponse(rawText);
+
+    // Text parser treats everything as thinking; DOM handles separation.
+    expect(result).toEqual({
+      response: '',
+      thinking: 'reasoning\n\nAnswer para 1.\n\nAnswer para 2.',
+      thinking_time: '3',
+    });
+  });
+
+  it('handles thinking without final response', () => {
+    const rawText = 'Thought for 1.2 seconds\n\nThinking process here...';
+    const result = parseThinkingResponse(rawText);
+
+    expect(result).toEqual({
+      response: '',
+      thinking: 'Thinking process here...',
+      thinking_time: '1.2',
+    });
+  });
+
+  it('returns null for empty input', () => {
+    const result = parseThinkingResponse('');
+    expect(result).toBeNull();
+  });
+
+  it('returns null for null input', () => {
+    const result = parseThinkingResponse(null);
+    expect(result).toBeNull();
+  });
+});
+
 
 describe('deepseek sendWithFile', () => {
   const tempDirs = [];