Skip to content

Commit b9bb302

Browse files
Benjamin-eecsBenjamin Liujackwener
authored
feat(deepseek): add file upload support via --file flag (#1093)
* WIP: deepseek file upload (blocked by 30s idle timeout) * feat(deepseek): add file upload support via --file flag Closes #1092 * fix(deepseek): use native file input path for --file --------- Co-authored-by: Benjamin Liu <beneecs@Benjamins-Mac-mini.local> Co-authored-by: jackwener <jakevingoo@gmail.com>
1 parent b3db955 commit b9bb302

6 files changed

Lines changed: 226 additions & 3 deletions

File tree

cli-manifest.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4118,6 +4118,12 @@
41184118
"default": false,
41194119
"required": false,
41204120
"help": "Enable web search"
4121+
},
4122+
{
4123+
"name": "file",
4124+
"type": "str",
4125+
"required": false,
4126+
"help": "Attach a file (PDF, image, text) with the prompt"
41214127
}
41224128
],
41234129
"columns": [

clis/deepseek/ask.js

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { cli, Strategy } from '@jackwener/opencli/registry';
22
import { CommandExecutionError } from '@jackwener/opencli/errors';
33
import {
44
DEEPSEEK_DOMAIN, DEEPSEEK_URL, ensureOnDeepSeek, selectModel, setFeature,
5-
sendMessage, getBubbleCount, waitForResponse, parseBoolFlag, withRetry,
5+
sendMessage, sendWithFile, getBubbleCount, waitForResponse, parseBoolFlag, withRetry,
66
} from './utils.js';
77

88
export const askCommand = cli({
@@ -21,6 +21,7 @@ export const askCommand = cli({
2121
{ name: 'model', default: 'instant', choices: ['instant', 'expert'], help: 'Model to use: instant or expert' },
2222
{ name: 'think', type: 'boolean', default: false, help: 'Enable DeepThink mode' },
2323
{ name: 'search', type: 'boolean', default: false, help: 'Enable web search' },
24+
{ name: 'file', help: 'Attach a file (PDF, image, text) with the prompt' },
2425
],
2526
columns: ['response'],
2627

@@ -58,6 +59,25 @@ export const askCommand = cli({
5859

5960
if (thinkResult.toggled || searchResult.toggled) await page.wait(0.5);
6061

62+
if (kwargs.file) {
63+
const baseline = await withRetry(() => getBubbleCount(page));
64+
try {
65+
const fileResult = await sendWithFile(page, kwargs.file, prompt);
66+
if (fileResult && !fileResult.ok) {
67+
throw new CommandExecutionError(fileResult.reason || 'Failed to attach file');
68+
}
69+
} catch (err) {
70+
// SPA navigates after send; "Promise was collected" means send succeeded
71+
if (!String(err?.message || err).includes('Promise was collected')) throw err;
72+
}
73+
await page.wait(3);
74+
const response = await waitForResponse(page, baseline, prompt, timeoutMs);
75+
if (!response) {
76+
return [{ response: `[NO RESPONSE] No reply within ${kwargs.timeout}s.` }];
77+
}
78+
return [{ response }];
79+
}
80+
6181
const baseline = await withRetry(() => getBubbleCount(page));
6282
const sendResult = await withRetry(() => sendMessage(page, prompt));
6383
if (!sendResult?.ok) {

clis/deepseek/ask.test.js

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import { beforeEach, describe, expect, it, vi } from 'vitest';
2+
3+
const {
4+
mockEnsureOnDeepSeek,
5+
mockSelectModel,
6+
mockSetFeature,
7+
mockSendMessage,
8+
mockSendWithFile,
9+
mockGetBubbleCount,
10+
mockWaitForResponse,
11+
mockParseBoolFlag,
12+
mockWithRetry,
13+
} = vi.hoisted(() => ({
14+
mockEnsureOnDeepSeek: vi.fn(),
15+
mockSelectModel: vi.fn(),
16+
mockSetFeature: vi.fn(),
17+
mockSendMessage: vi.fn(),
18+
mockSendWithFile: vi.fn(),
19+
mockGetBubbleCount: vi.fn(),
20+
mockWaitForResponse: vi.fn(),
21+
mockParseBoolFlag: vi.fn((v) => v === true || v === 'true'),
22+
mockWithRetry: vi.fn(async (fn) => fn()),
23+
}));
24+
25+
vi.mock('./utils.js', () => ({
26+
DEEPSEEK_DOMAIN: 'chat.deepseek.com',
27+
DEEPSEEK_URL: 'https://chat.deepseek.com/',
28+
ensureOnDeepSeek: mockEnsureOnDeepSeek,
29+
selectModel: mockSelectModel,
30+
setFeature: mockSetFeature,
31+
sendMessage: mockSendMessage,
32+
sendWithFile: mockSendWithFile,
33+
getBubbleCount: mockGetBubbleCount,
34+
waitForResponse: mockWaitForResponse,
35+
parseBoolFlag: mockParseBoolFlag,
36+
withRetry: mockWithRetry,
37+
}));
38+
39+
import { askCommand } from './ask.js';
40+
41+
describe('deepseek ask --file', () => {
42+
const page = {
43+
wait: vi.fn().mockResolvedValue(undefined),
44+
goto: vi.fn().mockResolvedValue(undefined),
45+
};
46+
47+
beforeEach(() => {
48+
vi.clearAllMocks();
49+
mockEnsureOnDeepSeek.mockResolvedValue(undefined);
50+
mockSelectModel.mockResolvedValue({ ok: true, toggled: false });
51+
mockSetFeature.mockResolvedValue({ ok: true, toggled: false });
52+
mockSendWithFile.mockResolvedValue({ ok: true });
53+
mockGetBubbleCount.mockResolvedValue(7);
54+
mockWaitForResponse.mockResolvedValue('new reply');
55+
});
56+
57+
it('captures the existing baseline before sending a file prompt', async () => {
58+
const rows = await askCommand.func(page, {
59+
prompt: 'summarize this',
60+
timeout: 120,
61+
file: './report.pdf',
62+
new: false,
63+
model: 'instant',
64+
think: false,
65+
search: false,
66+
});
67+
68+
expect(rows).toEqual([{ response: 'new reply' }]);
69+
expect(mockGetBubbleCount).toHaveBeenCalledTimes(1);
70+
expect(mockSendWithFile).toHaveBeenCalledWith(page, './report.pdf', 'summarize this');
71+
expect(mockWaitForResponse).toHaveBeenCalledWith(page, 7, 'summarize this', 120000);
72+
});
73+
});

clis/deepseek/utils.js

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@ export async function getConversationList(page) {
161161
if (btn) btn.click();
162162
}
163163
})()`);
164-
// Poll for sidebar history links to render
165164
for (let attempt = 0; attempt < 5; attempt++) {
166165
await page.wait(2);
167166
const items = await page.evaluate(`(() => {
@@ -186,6 +185,90 @@ export async function getConversationList(page) {
186185
return [];
187186
}
188187

188+
async function waitForFilePreview(page, fileName) {
189+
for (let attempt = 0; attempt < 8; attempt++) {
190+
await page.wait(2);
191+
const ready = await page.evaluate(`(() => {
192+
const name = ${JSON.stringify(fileName)};
193+
return Array.from(document.querySelectorAll('div'))
194+
.some((el) => el.children.length === 0 && (el.textContent || '').trim() === name);
195+
})()`);
196+
if (ready) return true;
197+
}
198+
return false;
199+
}
200+
201+
export async function sendWithFile(page, filePath, prompt) {
202+
const fs = await import('node:fs');
203+
const path = await import('node:path');
204+
const absPath = path.default.resolve(filePath);
205+
206+
if (!fs.default.existsSync(absPath)) {
207+
return { ok: false, reason: `File not found: ${absPath}` };
208+
}
209+
210+
const stats = fs.default.statSync(absPath);
211+
if (stats.size > 100 * 1024 * 1024) {
212+
return { ok: false, reason: `File too large (${(stats.size / 1024 / 1024).toFixed(1)} MB). Max: 100 MB` };
213+
}
214+
215+
const fileName = path.default.basename(absPath);
216+
217+
// Collapse sidebar to keep DOM simple for send button matching
218+
await page.evaluate(`(() => {
219+
if (document.querySelectorAll('a[href*="/a/chat/s/"]').length > 0) {
220+
const btn = document.querySelector('div[tabindex="0"][role="button"]');
221+
if (btn) btn.click();
222+
}
223+
})()`);
224+
await page.wait(0.5);
225+
226+
let uploaded = false;
227+
if (page.setFileInput) {
228+
try {
229+
await page.setFileInput([absPath], 'input[type="file"]');
230+
uploaded = true;
231+
} catch (err) {
232+
const msg = String(err?.message || err);
233+
if (!msg.includes('Unknown action') && !msg.includes('not supported')) {
234+
throw err;
235+
}
236+
}
237+
}
238+
239+
if (!uploaded) {
240+
const content = fs.default.readFileSync(absPath);
241+
const base64 = content.toString('base64');
242+
const fallbackResult = await page.evaluate(`(async () => {
243+
var binary = atob('${base64}');
244+
var bytes = new Uint8Array(binary.length);
245+
for (var i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
246+
247+
var file = new File([bytes], ${JSON.stringify(fileName)});
248+
var dt = new DataTransfer();
249+
dt.items.add(file);
250+
251+
var inp = document.querySelector('input[type="file"]');
252+
if (!inp) return { ok: false, reason: 'file input not found' };
253+
254+
var propsKey = Object.keys(inp).find(function(k) { return k.startsWith('__reactProps$'); });
255+
if (!propsKey || typeof inp[propsKey].onChange !== 'function') {
256+
return { ok: false, reason: 'React onChange not found' };
257+
}
258+
259+
inp.files = dt.files;
260+
inp[propsKey].onChange({ target: { files: dt.files } });
261+
return { ok: true };
262+
})()`);
263+
if (fallbackResult && !fallbackResult.ok) return fallbackResult;
264+
}
265+
266+
const ready = await waitForFilePreview(page, fileName);
267+
if (!ready) return { ok: false, reason: 'file preview did not appear' };
268+
269+
return sendMessage(page, prompt);
270+
}
271+
189272
// Retries on CDP "Promise was collected" errors caused by DeepSeek's SPA router transitions.
190273
export async function withRetry(fn, retries = 2) {
191274
for (let i = 0; i <= retries; i++) {

clis/deepseek/utils.test.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import fs from 'node:fs';
2+
import os from 'node:os';
3+
import path from 'node:path';
4+
import { afterEach, describe, expect, it, vi } from 'vitest';
5+
import { sendWithFile } from './utils.js';
6+
7+
describe('deepseek sendWithFile', () => {
8+
const tempDirs = [];
9+
10+
afterEach(() => {
11+
vi.restoreAllMocks();
12+
while (tempDirs.length) {
13+
fs.rmSync(tempDirs.pop(), { recursive: true, force: true });
14+
}
15+
});
16+
17+
it('prefers page.setFileInput over base64-in-evaluate when supported', async () => {
18+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-deepseek-'));
19+
tempDirs.push(dir);
20+
const filePath = path.join(dir, 'report.txt');
21+
fs.writeFileSync(filePath, 'hello');
22+
23+
const page = {
24+
setFileInput: vi.fn().mockResolvedValue(undefined),
25+
wait: vi.fn().mockResolvedValue(undefined),
26+
evaluate: vi.fn()
27+
.mockResolvedValueOnce(undefined)
28+
.mockResolvedValueOnce(true)
29+
.mockResolvedValueOnce({ ok: true }),
30+
};
31+
32+
const result = await sendWithFile(page, filePath, 'summarize this');
33+
34+
expect(result).toEqual({ ok: true });
35+
expect(page.setFileInput).toHaveBeenCalledWith([filePath], 'input[type="file"]');
36+
});
37+
});

docs/adapters/browser/deepseek.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ opencli deepseek ask "prove that sqrt(2) is irrational" --think
3030
# Enable web search
3131
opencli deepseek ask "latest news about AI" --search
3232

33+
# Attach a file
34+
opencli deepseek ask "summarize this document" --file ./report.pdf
35+
3336
# Combine modes
3437
opencli deepseek ask "what happened today?" --model expert --think --search --new
3538

@@ -62,6 +65,7 @@ opencli deepseek history --limit 10
6265
| `--model` | Model to use: `instant` or `expert` (default: instant) |
6366
| `--think` | Enable DeepThink mode (default: false) |
6467
| `--search` | Enable web search (default: false) |
68+
| `--file` | Attach a file (PDF, image, text) with the prompt (max 100 MB) |
6569

6670
## Prerequisites
6771

@@ -73,4 +77,4 @@ opencli deepseek history --limit 10
7377
- This adapter drives the DeepSeek web UI in the browser, not an API
7478
- Default mode is Instant with DeepThink and Search disabled; each flag (`--model`, `--think`, `--search`) is synced on every invocation so omitting a flag resets it
7579
- Long responses (code, essays) may need a higher `--timeout`
76-
- File upload is not yet supported
80+
- File upload reads the file into memory and passes it via base64 to the browser; files over 100 MB are rejected

0 commit comments

Comments
 (0)