Skip to content

Commit 733ac07

Browse files
authored
fix(deepseek): separate thinking process from response in --think mode (#1142)
* fix(deepseek): separate thinking process from response in --think mode (#1124) When --think is enabled, the response now includes separate fields: - response: clean final answer only - thinking: chain-of-thought reasoning content - thinking_time: time spent thinking (e.g. '1') Supports both English ('Thought for X seconds') and Chinese ('已思考(用时 X 秒)') thinking header patterns. Fixes #1124 * chore: regenerate cli-manifest.json * fix(deepseek): DOM-level think/response separation, dynamic columns Blocker 1: Replace fragile split(/\n\n+/) heuristic in parseThinkingResponse() with DOM-level extraction in waitForResponse(). The page evaluate now queries distinct DOM nodes (.ds-markdown--think vs .ds-markdown) for thinking and response content. The text-level parser falls back to treating everything after the header as thinking (no split), avoiding silent corruption of multi-paragraph content. Blocker 2: Remove static columns declaration from askCommand. The renderer infers columns from row keys, so non-think output only shows 'response' while think output shows all three columns. Tests added for multi-paragraph thinking, multi-paragraph answer, and non-think column regression guard. * chore: regenerate cli-manifest.json
1 parent e83148a commit 733ac07

5 files changed

Lines changed: 268 additions & 15 deletions

File tree

cli-manifest.json

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4408,9 +4408,6 @@
44084408
"help": "Attach a file (PDF, image, text) with the prompt"
44094409
}
44104410
],
4411-
"columns": [
4412-
"response"
4413-
],
44144411
"timeout": 180,
44154412
"type": "js",
44164413
"modulePath": "deepseek/ask.js",

clis/deepseek/ask.js

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export const askCommand = cli({
2323
{ name: 'search', type: 'boolean', default: false, help: 'Enable web search' },
2424
{ name: 'file', help: 'Attach a file (PDF, image, text) with the prompt' },
2525
],
26-
columns: ['response'],
26+
// columns omitted: derived from row keys so non-think output shows only 'response'
2727

2828
func: async (page, kwargs) => {
2929
const prompt = kwargs.prompt;
@@ -71,11 +71,14 @@ export const askCommand = cli({
7171
if (!String(err?.message || err).includes('Promise was collected')) throw err;
7272
}
7373
await page.wait(3);
74-
const response = await waitForResponse(page, baseline, prompt, timeoutMs);
75-
if (!response) {
74+
const result = await waitForResponse(page, baseline, prompt, timeoutMs, wantThink);
75+
if (!result) {
7676
return [{ response: `[NO RESPONSE] No reply within ${kwargs.timeout}s.` }];
7777
}
78-
return [{ response }];
78+
if (wantThink && typeof result === 'object' && result.response !== undefined) {
79+
return [result];
80+
}
81+
return [{ response: result }];
7982
}
8083

8184
const baseline = await withRetry(() => getBubbleCount(page));
@@ -84,11 +87,14 @@ export const askCommand = cli({
8487
throw new CommandExecutionError(sendResult?.reason || 'Failed to send message');
8588
}
8689

87-
const response = await waitForResponse(page, baseline, prompt, timeoutMs);
88-
if (!response) {
90+
const result = await waitForResponse(page, baseline, prompt, timeoutMs, wantThink);
91+
if (!result) {
8992
return [{ response: `[NO RESPONSE] No reply within ${kwargs.timeout}s.` }];
9093
}
9194

92-
return [{ response }];
95+
if (wantThink && typeof result === 'object' && result.response !== undefined) {
96+
return [result];
97+
}
98+
return [{ response: result }];
9399
},
94100
});

clis/deepseek/ask.test.js

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ describe('deepseek ask --file', () => {
6969
expect(rows).toEqual([{ response: 'new reply' }]);
7070
expect(mockGetBubbleCount).toHaveBeenCalledTimes(1);
7171
expect(mockSendWithFile).toHaveBeenCalledWith(page, './report.pdf', 'summarize this');
72-
expect(mockWaitForResponse).toHaveBeenCalledWith(page, 7, 'summarize this', 120000);
72+
expect(mockWaitForResponse).toHaveBeenCalledWith(page, 7, 'summarize this', 120000, false);
7373
});
7474

7575
it('still fails when explicit instant model selection cannot be verified', async () => {
@@ -85,3 +85,81 @@ describe('deepseek ask --file', () => {
8585
})).rejects.toThrow(new CommandExecutionError('Could not switch to instant model'));
8686
});
8787
});
88+
89+
describe('deepseek ask --think', () => {
90+
const page = {
91+
wait: vi.fn().mockResolvedValue(undefined),
92+
goto: vi.fn().mockResolvedValue(undefined),
93+
};
94+
95+
beforeEach(() => {
96+
vi.clearAllMocks();
97+
mockEnsureOnDeepSeek.mockResolvedValue(undefined);
98+
mockSelectModel.mockResolvedValue({ ok: true, toggled: false });
99+
mockSetFeature.mockResolvedValue({ ok: true, toggled: false });
100+
mockSendMessage.mockResolvedValue({ ok: true });
101+
mockGetBubbleCount.mockResolvedValue(5);
102+
});
103+
104+
it('returns separate thinking and response fields when --think is enabled', async () => {
105+
mockWaitForResponse.mockResolvedValue({
106+
response: 'The answer is 42.',
107+
thinking: 'Let me analyze this...',
108+
thinking_time: '2.5',
109+
});
110+
111+
const rows = await askCommand.func(page, {
112+
prompt: 'what is the answer?',
113+
timeout: 120,
114+
new: false,
115+
model: 'instant',
116+
think: true,
117+
search: false,
118+
});
119+
120+
expect(rows).toEqual([{
121+
response: 'The answer is 42.',
122+
thinking: 'Let me analyze this...',
123+
thinking_time: '2.5',
124+
}]);
125+
expect(mockWaitForResponse).toHaveBeenCalledWith(page, 5, 'what is the answer?', 120000, true);
126+
});
127+
128+
it('returns plain response when --think is disabled', async () => {
129+
mockWaitForResponse.mockResolvedValue('The answer is 42.');
130+
131+
const rows = await askCommand.func(page, {
132+
prompt: 'what is the answer?',
133+
timeout: 120,
134+
new: false,
135+
model: 'instant',
136+
think: false,
137+
search: false,
138+
});
139+
140+
expect(rows).toEqual([{ response: 'The answer is 42.' }]);
141+
expect(mockWaitForResponse).toHaveBeenCalledWith(page, 5, 'what is the answer?', 120000, false);
142+
});
143+
144+
it('does not declare static columns (derived from row keys)', () => {
145+
// columns should be undefined so the renderer infers from row keys,
146+
// avoiding empty trailing columns on non-think output.
147+
expect(askCommand.columns).toBeUndefined();
148+
});
149+
150+
it('non-think rows only contain response key', async () => {
151+
mockWaitForResponse.mockResolvedValue('Plain answer.');
152+
153+
const rows = await askCommand.func(page, {
154+
prompt: 'hello',
155+
timeout: 120,
156+
new: false,
157+
model: 'instant',
158+
think: false,
159+
search: false,
160+
});
161+
162+
// Row keys drive rendered columns; no thinking/thinking_time present.
163+
expect(Object.keys(rows[0])).toEqual(['response']);
164+
});
165+
});

clis/deepseek/utils.js

Lines changed: 92 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,35 @@ export async function getBubbleCount(page) {
9797
return count || 0;
9898
}
9999

100-
export async function waitForResponse(page, baselineCount, prompt, timeoutMs) {
100+
// Parse thinking response using text as a fallback when DOM-level extraction
101+
// is not available. Does NOT split on \n\n — that heuristic silently corrupts
102+
// multi-paragraph thinking or multi-paragraph answers. Instead, everything
103+
// after the header is treated as thinking content, and `response` stays empty
104+
// until the caller provides a DOM-separated answer.
105+
export function parseThinkingResponse(rawText) {
106+
if (!rawText) return null;
107+
108+
// Match thinking header patterns: "Thought for X seconds" or "已思考(用时 X 秒)"
109+
const thinkHeaderMatch = rawText.match(/^(Thought for ([\d.]+) seconds?| ([\d.]+) )\s*/);
110+
111+
if (!thinkHeaderMatch) {
112+
// No thinking section found, return plain response
113+
return { response: rawText, thinking: null, thinking_time: null };
114+
}
115+
116+
const thinkingTime = thinkHeaderMatch[2] || thinkHeaderMatch[3];
117+
const afterHeader = rawText.slice(thinkHeaderMatch[0].length);
118+
119+
// Treat everything after the header as thinking. The response will be
120+
// populated by the DOM-level extraction in waitForResponse().
121+
return {
122+
response: '',
123+
thinking: afterHeader.trim(),
124+
thinking_time: thinkingTime,
125+
};
126+
}
127+
128+
export async function waitForResponse(page, baselineCount, prompt, timeoutMs, parseThinking = false) {
101129
const startTime = Date.now();
102130
let lastText = '';
103131
let stableCount = 0;
@@ -110,7 +138,51 @@ export async function waitForResponse(page, baselineCount, prompt, timeoutMs) {
110138
result = await page.evaluate(`(() => {
111139
const bubbles = document.querySelectorAll('${MESSAGE_SELECTOR}');
112140
const texts = Array.from(bubbles).map(b => (b.innerText || '').trim()).filter(Boolean);
113-
return { count: texts.length, last: texts[texts.length - 1] || '' };
141+
var last = texts[texts.length - 1] || '';
142+
143+
// DOM-level thinking/response separation.
144+
// DeepSeek renders thinking in a collapsible container with a
145+
// distinct class (e.g. .ds-markdown--think or similar) and the
146+
// final answer in the main .ds-markdown region. By querying
147+
// these separately we avoid any text-heuristic split.
148+
var thinkEl = null, answerEl = null, thinkTime = null;
149+
if (${parseThinking} && bubbles.length > 0) {
150+
var lastBubble = bubbles[bubbles.length - 1];
151+
// Thinking container — DeepSeek uses various class names;
152+
// try common selectors.
153+
thinkEl = lastBubble.querySelector('.ds-markdown--think')
154+
|| lastBubble.querySelector('[class*="think"]');
155+
// Final answer container — the main markdown block that is
156+
// NOT the thinking section.
157+
var markdownEls = lastBubble.querySelectorAll('.ds-markdown');
158+
for (var i = 0; i < markdownEls.length; i++) {
159+
if (markdownEls[i] !== thinkEl
160+
&& !(thinkEl && thinkEl.contains(markdownEls[i]))
161+
&& !markdownEls[i].classList.contains('ds-markdown--think')) {
162+
answerEl = markdownEls[i];
163+
}
164+
}
165+
// Thinking time from the toggle/header element
166+
var timeEl = lastBubble.querySelector('[class*="think"] ~ *')
167+
|| lastBubble.querySelector('.ds-thinking-header');
168+
if (!timeEl) {
169+
// Fallback: parse from raw text header
170+
var m = last.match(/^(?:Thought for ([\\d.]+) seconds?|已思考(用时 ([\\d.]+) 秒))/);
171+
if (m) thinkTime = m[1] || m[2];
172+
} else {
173+
var tm = (timeEl.textContent || '').match(/([\\d.]+)/);
174+
if (tm) thinkTime = tm[1];
175+
}
176+
}
177+
178+
return {
179+
count: texts.length,
180+
last: last,
181+
// DOM-separated fields (null when not available)
182+
thinkText: thinkEl ? (thinkEl.innerText || '').trim() : null,
183+
answerText: answerEl ? (answerEl.innerText || '').trim() : null,
184+
thinkTime: thinkTime,
185+
};
114186
})()`);
115187
} catch {
116188
continue;
@@ -122,14 +194,31 @@ export async function waitForResponse(page, baselineCount, prompt, timeoutMs) {
122194
if (candidate && result.count > baselineCount && candidate !== prompt.trim()) {
123195
if (candidate === lastText) {
124196
stableCount++;
125-
if (stableCount >= 3) return candidate;
197+
if (stableCount >= 3) {
198+
if (parseThinking) {
199+
// Prefer DOM-level separation
200+
if (result.thinkText != null || result.answerText != null) {
201+
return {
202+
thinking: result.thinkText || '',
203+
response: result.answerText || '',
204+
thinking_time: result.thinkTime || null,
205+
};
206+
}
207+
// Fallback to text-header parsing (no \n\n split)
208+
return parseThinkingResponse(candidate);
209+
}
210+
return candidate;
211+
}
126212
} else {
127213
stableCount = 0;
128214
}
129215
lastText = candidate;
130216
}
131217
}
132218

219+
if (parseThinking && lastText) {
220+
return parseThinkingResponse(lastText);
221+
}
133222
return lastText || null;
134223
}
135224

clis/deepseek/utils.test.js

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,90 @@ import fs from 'node:fs';
22
import os from 'node:os';
33
import path from 'node:path';
44
import { afterEach, describe, expect, it, vi } from 'vitest';
5-
import { selectModel, sendWithFile } from './utils.js';
5+
import { selectModel, sendWithFile, parseThinkingResponse } from './utils.js';
6+
7+
describe('deepseek parseThinkingResponse', () => {
8+
it('returns plain response when no thinking header is present', () => {
9+
const rawText = 'This is a regular response without thinking.';
10+
const result = parseThinkingResponse(rawText);
11+
12+
expect(result).toEqual({
13+
response: rawText,
14+
thinking: null,
15+
thinking_time: null,
16+
});
17+
});
18+
19+
it('parses English thinking header — all content after header is thinking', () => {
20+
const rawText = 'Thought for 3.5 seconds\n\nLet me analyze this problem...\nFirst, I need to consider X.\nThen, Y.\n\nThe answer is 42.';
21+
const result = parseThinkingResponse(rawText);
22+
23+
// Text-level parser no longer splits on \n\n; everything after header is thinking.
24+
// DOM-level extraction in waitForResponse() handles the actual separation.
25+
expect(result).toEqual({
26+
response: '',
27+
thinking: 'Let me analyze this problem...\nFirst, I need to consider X.\nThen, Y.\n\nThe answer is 42.',
28+
thinking_time: '3.5',
29+
});
30+
});
31+
32+
it('parses Chinese thinking header — all content after header is thinking', () => {
33+
const rawText = '已思考(用时 2.3 秒)\n\n让我分析这个问题...\n首先需要考虑X。\n然后是Y。\n\n答案是42。';
34+
const result = parseThinkingResponse(rawText);
35+
36+
expect(result).toEqual({
37+
response: '',
38+
thinking: '让我分析这个问题...\n首先需要考虑X。\n然后是Y。\n\n答案是42。',
39+
thinking_time: '2.3',
40+
});
41+
});
42+
43+
it('multi-paragraph thinking without final answer is not corrupted', () => {
44+
const rawText = 'Thought for 1.2 seconds\n\nFirst paragraph.\n\nSecond paragraph.';
45+
const result = parseThinkingResponse(rawText);
46+
47+
// Both paragraphs must stay in thinking; response is empty.
48+
expect(result).toEqual({
49+
response: '',
50+
thinking: 'First paragraph.\n\nSecond paragraph.',
51+
thinking_time: '1.2',
52+
});
53+
});
54+
55+
it('multi-paragraph final answer is not split by text parser', () => {
56+
const rawText = 'Thought for 3 seconds\n\nreasoning\n\nAnswer para 1.\n\nAnswer para 2.';
57+
const result = parseThinkingResponse(rawText);
58+
59+
// Text parser treats everything as thinking; DOM handles separation.
60+
expect(result).toEqual({
61+
response: '',
62+
thinking: 'reasoning\n\nAnswer para 1.\n\nAnswer para 2.',
63+
thinking_time: '3',
64+
});
65+
});
66+
67+
it('handles thinking without final response', () => {
68+
const rawText = 'Thought for 1.2 seconds\n\nThinking process here...';
69+
const result = parseThinkingResponse(rawText);
70+
71+
expect(result).toEqual({
72+
response: '',
73+
thinking: 'Thinking process here...',
74+
thinking_time: '1.2',
75+
});
76+
});
77+
78+
it('returns null for empty input', () => {
79+
const result = parseThinkingResponse('');
80+
expect(result).toBeNull();
81+
});
82+
83+
it('returns null for null input', () => {
84+
const result = parseThinkingResponse(null);
85+
expect(result).toBeNull();
86+
});
87+
});
88+
689

790
describe('deepseek sendWithFile', () => {
891
const tempDirs = [];

0 commit comments

Comments
 (0)