Skip to content

Commit cd48917

Browse files
authored
fix(xiaohongshu): require signed note URLs (#996)
* fix(xiaohongshu): require signed note urls * chore: drop generated manifest from pr
1 parent 45d6f5b commit cd48917

12 files changed

Lines changed: 185 additions & 88 deletions

File tree

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ OpenCLI supports downloading images, videos, and articles from supported platfor
263263
For video downloads, install `yt-dlp` first: `brew install yt-dlp`
264264

265265
```bash
266-
opencli xiaohongshu download abc123 --output ./xhs
266+
opencli xiaohongshu download "https://www.xiaohongshu.com/search_result/<id>?xsec_token=..." --output ./xhs
267+
opencli xiaohongshu download "https://xhslink.com/..." --output ./xhs
267268
opencli bilibili download BV1xxx --output ./bilibili
268269
opencli twitter download elonmusk --limit 20 --output ./twitter
269270
opencli 1688 download 841141931191 --output ./1688-downloads

README.zh-CN.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,8 @@ brew install yt-dlp
339339

340340
```bash
341341
# 下载小红书笔记中的图片/视频
342-
opencli xiaohongshu download abc123 --output ./xhs
342+
opencli xiaohongshu download "https://www.xiaohongshu.com/search_result/<id>?xsec_token=..." --output ./xhs
343+
opencli xiaohongshu download "https://xhslink.com/..." --output ./xhs
343344

344345
# 下载B站视频(需要 yt-dlp)
345346
opencli bilibili download BV1xxx --output ./bilibili

clis/xiaohongshu/comments.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ cli({
2222
strategy: Strategy.COOKIE,
2323
navigateBefore: false,
2424
args: [
25-
{ name: 'note-id', required: true, positional: true, help: 'Note ID or full URL (preserves xsec_token for access)' },
25+
{ name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
2626
{ name: 'limit', type: 'int', default: 20, help: 'Number of top-level comments (max 50)' },
2727
{ name: 'with-replies', type: 'boolean', default: false, help: 'Include nested replies (楼中楼)' },
2828
],
@@ -32,7 +32,7 @@ cli({
3232
const withReplies = Boolean(kwargs['with-replies']);
3333
const raw = String(kwargs['note-id']);
3434
const noteId = parseNoteId(raw);
35-
await page.goto(buildNoteUrl(raw));
35+
await page.goto(buildNoteUrl(raw, { commandName: 'xiaohongshu comments' }));
3636
await page.wait({ time: 2 + Math.random() * 3 });
3737
const data = await page.evaluate(`
3838
(async () => {

clis/xiaohongshu/comments.test.js

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,30 +27,40 @@ function createPageMock(evaluateResult) {
2727
}
2828
describe('xiaohongshu comments', () => {
2929
const command = getRegistry().get('xiaohongshu/comments');
30-
it('returns ranked comment rows', async () => {
30+
it('returns ranked comment rows for signed full URLs', async () => {
3131
const page = createPageMock({
3232
loginWall: false,
3333
results: [
3434
{ author: 'Alice', text: 'Great note!', likes: 10, time: '2024-01-01', is_reply: false, reply_to: '' },
3535
{ author: 'Bob', text: 'Very helpful', likes: 0, time: '2024-01-02', is_reply: false, reply_to: '' },
3636
],
3737
});
38-
const result = (await command.func(page, { 'note-id': '69aadbcb000000002202f131', limit: 5 }));
39-
expect(page.goto.mock.calls[0][0]).toContain('/search_result/69aadbcb000000002202f131');
38+
const signedUrl = 'https://www.xiaohongshu.com/search_result/69aadbcb000000002202f131?xsec_token=abc&xsec_source=pc_search';
39+
const result = (await command.func(page, { 'note-id': signedUrl, limit: 5 }));
40+
expect(page.goto.mock.calls[0][0]).toBe(signedUrl);
4041
expect(result).toHaveLength(2);
4142
expect(result[0]).toMatchObject({ rank: 1, author: 'Alice', text: 'Great note!', likes: 10 });
4243
expect(result[1]).toMatchObject({ rank: 2, author: 'Bob', text: 'Very helpful', likes: 0 });
4344
});
44-
it('preserves full /explore/ URL as-is for navigation', async () => {
45+
it('rejects bare note IDs before browser navigation', async () => {
46+
const page = createPageMock({ loginWall: false, results: [] });
47+
await expect(command.func(page, { 'note-id': '69aadbcb000000002202f131', limit: 5 })).rejects.toMatchObject({
48+
code: 'ARGUMENT',
49+
message: expect.stringContaining('signed URL'),
50+
hint: expect.stringContaining('xsec_token'),
51+
});
52+
expect(page.goto).not.toHaveBeenCalled();
53+
});
54+
it('preserves signed /explore/ URL as-is for navigation', async () => {
4555
const page = createPageMock({
4656
loginWall: false,
4757
results: [{ author: 'Alice', text: 'Nice', likes: 1, time: '2024-01-01', is_reply: false, reply_to: '' }],
4858
});
4959
await command.func(page, {
50-
'note-id': 'https://www.xiaohongshu.com/explore/69aadbcb000000002202f131',
60+
'note-id': 'https://www.xiaohongshu.com/explore/69aadbcb000000002202f131?xsec_token=abc&xsec_source=pc_search',
5161
limit: 5,
5262
});
53-
expect(page.goto.mock.calls[0][0]).toContain('/explore/69aadbcb000000002202f131');
63+
expect(page.goto.mock.calls[0][0]).toContain('/explore/69aadbcb000000002202f131?xsec_token=abc');
5464
});
5565
it('preserves full search_result URL with xsec_token for navigation', async () => {
5666
const page = createPageMock({
@@ -61,22 +71,21 @@ describe('xiaohongshu comments', () => {
6171
await command.func(page, { 'note-id': fullUrl, limit: 5 });
6272
expect(page.goto.mock.calls[0][0]).toBe(fullUrl);
6373
});
64-
it('throws AuthRequiredError when login wall is detected', async () => {
65-
const page = createPageMock({ loginWall: true, results: [] });
66-
await expect(command.func(page, { 'note-id': 'abc123', limit: 5 })).rejects.toThrow('Note comments require login');
67-
});
68-
it('throws SECURITY_BLOCK with bare-id guidance when risk control blocks the comments page', async () => {
74+
it('preserves signed /user/profile/<user>/<note> URLs for navigation', async () => {
6975
const page = createPageMock({
70-
pageUrl: 'https://www.xiaohongshu.com/website-login/error?error_code=300017',
71-
securityBlock: true,
7276
loginWall: false,
73-
results: [],
74-
});
75-
await expect(command.func(page, { 'note-id': 'abc123', limit: 5 })).rejects.toMatchObject({
76-
code: 'SECURITY_BLOCK',
77-
hint: expect.stringContaining('xsec_token'),
77+
results: [{ author: 'Alice', text: 'Nice', likes: 1, time: '2024-01-01', is_reply: false, reply_to: '' }],
7878
});
79-
expect(page.wait).toHaveBeenCalledWith(expect.objectContaining({ time: expect.any(Number) }));
79+
const fullUrl = 'https://www.xiaohongshu.com/user/profile/user123/69aadbcb000000002202f131?xsec_token=abc&xsec_source=pc_user';
80+
await command.func(page, { 'note-id': fullUrl, limit: 5 });
81+
expect(page.goto.mock.calls[0][0]).toBe(fullUrl);
82+
});
83+
it('throws AuthRequiredError when login wall is detected', async () => {
84+
const page = createPageMock({ loginWall: true, results: [] });
85+
await expect(command.func(page, {
86+
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
87+
limit: 5,
88+
})).rejects.toThrow('Note comments require login');
8089
});
8190
it('throws SECURITY_BLOCK with retry guidance when a full URL comments page is blocked', async () => {
8291
const page = createPageMock({
@@ -95,11 +104,17 @@ describe('xiaohongshu comments', () => {
95104
});
96105
it('returns empty array when no comments are found', async () => {
97106
const page = createPageMock({ loginWall: false, results: [] });
98-
await expect(command.func(page, { 'note-id': 'abc123', limit: 5 })).resolves.toEqual([]);
107+
await expect(command.func(page, {
108+
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
109+
limit: 5,
110+
})).resolves.toEqual([]);
99111
});
100112
it('uses condition-based comment scrolling instead of a fixed blind loop', async () => {
101113
const page = createPageMock({ loginWall: false, results: [] });
102-
await command.func(page, { 'note-id': 'abc123', limit: 5 });
114+
await command.func(page, {
115+
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
116+
limit: 5,
117+
});
103118
const script = page.evaluate.mock.calls[0][0];
104119
expect(script).toContain("const beforeCount = scroller.querySelectorAll('.parent-comment').length");
105120
expect(script).toContain("const afterCount = scroller.querySelectorAll('.parent-comment').length");
@@ -115,7 +130,10 @@ describe('xiaohongshu comments', () => {
115130
reply_to: '',
116131
}));
117132
const page = createPageMock({ loginWall: false, results: manyComments });
118-
const result = (await command.func(page, { 'note-id': 'abc123', limit: 3 }));
133+
const result = (await command.func(page, {
134+
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
135+
limit: 3,
136+
}));
119137
expect(result).toHaveLength(3);
120138
expect(result[0].rank).toBe(1);
121139
expect(result[2].rank).toBe(3);
@@ -128,7 +146,10 @@ describe('xiaohongshu comments', () => {
128146
{ author: 'Bob', text: 'Very helpful', likes: 0, time: '2024-01-02', is_reply: false, reply_to: '' },
129147
],
130148
});
131-
const result = (await command.func(page, { 'note-id': 'abc123', limit: -3 }));
149+
const result = (await command.func(page, {
150+
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
151+
limit: -3,
152+
}));
132153
expect(result).toHaveLength(1);
133154
expect(result[0]).toMatchObject({ rank: 1, author: 'Alice' });
134155
});
@@ -143,7 +164,7 @@ describe('xiaohongshu comments', () => {
143164
],
144165
});
145166
const result = (await command.func(page, {
146-
'note-id': 'abc123', limit: 50, 'with-replies': true,
167+
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok', limit: 50, 'with-replies': true,
147168
}));
148169
expect(result).toHaveLength(3);
149170
expect(result[0]).toMatchObject({ author: 'Alice', is_reply: false, reply_to: '' });
@@ -166,7 +187,7 @@ describe('xiaohongshu comments', () => {
166187
});
167188
// Limit to 2 top-level comments — should include A + 2 replies + B = 4 rows
168189
const result = (await command.func(page, {
169-
'note-id': 'abc123', limit: 2, 'with-replies': true,
190+
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok', limit: 2, 'with-replies': true,
170191
}));
171192
expect(result).toHaveLength(4);
172193
expect(result.map((r) => r.author)).toEqual(['A', 'A1', 'A2', 'B']);

clis/xiaohongshu/download.js

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
* Xiaohongshu download — download images and videos from a note.
33
*
44
* Usage:
5-
* opencli xiaohongshu download <note-id-or-url> --output ./xhs
5+
* opencli xiaohongshu download <signed-note-url-or-shortlink> --output ./xhs
66
*
7-
* Accepts a bare note ID, a full xiaohongshu.com URL (with xsec_token),
8-
* or a short link (http://xhslink.com/...).
7+
* Accepts a full xiaohongshu.com URL with xsec_token or an xhslink short link.
98
*/
109
import { cli, Strategy } from '@jackwener/opencli/registry';
1110
import { formatCookieHeader } from '@jackwener/opencli/download';
@@ -20,15 +19,15 @@ cli({
2019
strategy: Strategy.COOKIE,
2120
navigateBefore: false,
2221
args: [
23-
{ name: 'note-id', positional: true, required: true, help: 'Note ID, full URL, or short link' },
22+
{ name: 'note-id', positional: true, required: true, help: 'Full Xiaohongshu note URL with xsec_token, or xhslink short link' },
2423
{ name: 'output', default: './xiaohongshu-downloads', help: 'Output directory' },
2524
],
2625
columns: ['index', 'type', 'status', 'size'],
2726
func: async (page, kwargs) => {
2827
const rawInput = String(kwargs['note-id']);
2928
const output = kwargs.output;
3029
const noteId = parseNoteId(rawInput);
31-
await page.goto(buildNoteUrl(rawInput));
30+
await page.goto(buildNoteUrl(rawInput, { allowShortLink: true, commandName: 'xiaohongshu download' }));
3231
await page.wait({ time: 1 + Math.random() * 2 });
3332
// Extract note info and media URLs
3433
const data = await page.evaluate(`
@@ -51,9 +50,9 @@ cli({
5150
seenMedia.add(key);
5251
result.media.push({ type, url });
5352
};
54-
const locationMatch = (location.pathname || '').match(/\\/(?:explore|note|search_result|discovery\\/item)\\/([a-f0-9]+)/i);
53+
const locationMatch = (location.pathname || '').match(/\\/(?:explore|note|search_result|discovery\\/item)\\/([a-f0-9]+)|\\/user\\/profile\\/[^/?#]+\\/([a-f0-9]+)/i);
5554
if (locationMatch) {
56-
result.noteId = locationMatch[1];
55+
result.noteId = locationMatch[1] || locationMatch[2];
5756
}
5857
5958
// Get title

clis/xiaohongshu/download.test.js

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,19 +70,31 @@ describe('xiaohongshu download', () => {
7070
filenamePrefix: '69bc166f000000001a02069a',
7171
}));
7272
});
73-
it('throws SECURITY_BLOCK with bare-id guidance before starting downloads', async () => {
73+
it('uses canonical note id for signed user profile note URLs', async () => {
74+
const page = createPageMock({
75+
noteId: '',
76+
media: [{ type: 'image', url: 'https://ci.xiaohongshu.com/example.jpg' }],
77+
});
78+
const fullUrl = 'https://www.xiaohongshu.com/user/profile/user123/69bc166f000000001a02069a?xsec_token=abc&xsec_source=pc_user';
79+
await command.func(page, { 'note-id': fullUrl, output: './out' });
80+
expect(page.goto.mock.calls[0][0]).toBe(fullUrl);
81+
expect(mockDownloadMedia).toHaveBeenCalledWith([{ type: 'image', url: 'https://ci.xiaohongshu.com/example.jpg' }], expect.objectContaining({
82+
subdir: '69bc166f000000001a02069a',
83+
filenamePrefix: '69bc166f000000001a02069a',
84+
}));
85+
});
86+
it('rejects bare note IDs before browser navigation', async () => {
7487
const page = createPageMock({
75-
pageUrl: 'https://www.xiaohongshu.com/website-login/error?error_code=300017',
76-
securityBlock: true,
7788
noteId: '69bc166f000000001a02069a',
7889
media: [],
7990
});
8091
await expect(command.func(page, { 'note-id': '69bc166f000000001a02069a', output: './out' })).rejects.toMatchObject({
81-
code: 'SECURITY_BLOCK',
92+
code: 'ARGUMENT',
93+
message: expect.stringContaining('signed URL'),
8294
hint: expect.stringContaining('xsec_token'),
8395
});
96+
expect(page.goto).not.toHaveBeenCalled();
8497
expect(mockDownloadMedia).not.toHaveBeenCalled();
85-
expect(page.wait).toHaveBeenCalledWith(expect.objectContaining({ time: expect.any(Number) }));
8698
});
8799
it('throws SECURITY_BLOCK with retry guidance for blocked full URLs', async () => {
88100
const page = createPageMock({

clis/xiaohongshu/note-helpers.js

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,59 @@
1+
import { ArgumentError } from '@jackwener/opencli/errors';
2+
13
/** Side-effect-free helpers shared by xiaohongshu note and comments commands. */
24
/** Extract a bare note ID from a full URL or raw ID string. */
35
export function parseNoteId(input) {
46
const trimmed = input.trim();
5-
const match = trimmed.match(/\/(?:explore|note|search_result)\/([a-f0-9]+)/);
6-
return match ? match[1] : trimmed;
7+
const match = trimmed.match(/\/(?:explore|note|search_result|discovery\/item)\/([a-f0-9]+)|\/user\/profile\/[^/?#]+\/([a-f0-9]+)/i);
8+
return match ? (match[1] || match[2]) : trimmed;
79
}
10+
11+
export const XHS_SIGNED_URL_HINT = 'Pass a full Xiaohongshu note URL with xsec_token from search results or user/profile context.';
12+
13+
function isShortLink(input) {
14+
return /^https?:\/\/xhslink\.com\//i.test(input);
15+
}
16+
17+
function isXiaohongshuHost(hostname) {
18+
const normalized = hostname.toLowerCase();
19+
return normalized === 'xiaohongshu.com' || normalized.endsWith('.xiaohongshu.com');
20+
}
21+
22+
function isSupportedNotePath(pathname) {
23+
return /^\/(?:explore|note|search_result|discovery\/item)\/[a-f0-9]+(?:[/?#]|$)/i.test(pathname)
24+
|| /^\/user\/profile\/[^/?#]+\/[a-f0-9]+(?:[/?#]|$)/i.test(pathname);
25+
}
26+
827
/**
928
* Build the best navigation URL for a note.
1029
*
11-
* XHS blocks direct `/explore/<id>` access without a valid `xsec_token`.
12-
* When the user passes a full URL (from search results), we preserve it
13-
* so the browser navigates with the token intact. For bare IDs we now use
14-
* `/search_result/<id>` which works without xsec_token when cookies are present.
30+
* XHS note detail pages now require a valid signed URL for reliable access.
31+
* Bare note IDs no longer resolve deterministically, so callers must provide
32+
* a full note URL with xsec_token or, for downloads only, an xhslink short link.
1533
*/
16-
export function buildNoteUrl(input) {
34+
export function buildNoteUrl(input, options = {}) {
35+
const { allowShortLink = false, commandName = 'xiaohongshu note' } = options;
1736
const trimmed = input.trim();
37+
const message = `${commandName} now requires a full signed URL`;
38+
const hint = allowShortLink
39+
? `${XHS_SIGNED_URL_HINT} For downloads, xhslink short links are also supported.`
40+
: XHS_SIGNED_URL_HINT;
41+
1842
if (/^https?:\/\//.test(trimmed)) {
19-
// Full URL — navigate as-is; the browser will follow any redirects
20-
return trimmed;
43+
if (isShortLink(trimmed)) {
44+
if (allowShortLink)
45+
return trimmed;
46+
throw new ArgumentError(message, hint);
47+
}
48+
try {
49+
const url = new URL(trimmed);
50+
const xsecToken = url.searchParams.get('xsec_token')?.trim();
51+
if (isXiaohongshuHost(url.hostname) && isSupportedNotePath(url.pathname) && xsecToken) {
52+
return trimmed;
53+
}
54+
}
55+
catch { }
56+
throw new ArgumentError(message, hint);
2157
}
22-
// Use /search_result/<id> instead of /explore/<id> — works without xsec_token
23-
// when the user is logged in via cookies (which is always the case with opencli).
24-
return `https://www.xiaohongshu.com/search_result/${trimmed}`;
58+
throw new ArgumentError(message, hint);
2559
}

clis/xiaohongshu/note.js

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
* Extracts title, author, description text, and engagement metrics
55
* (likes, collects, comment count) via DOM extraction.
66
*
7-
* Supports both bare note IDs and full URLs (with xsec_token).
8-
* Bare IDs now use /search_result/<id> which works without xsec_token
9-
* when the user is logged in via cookies.
7+
* Requires a full Xiaohongshu note URL with xsec_token.
108
*/
119
import { cli, Strategy } from '@jackwener/opencli/registry';
1210
import { AuthRequiredError, CliError, EmptyResultError } from '@jackwener/opencli/errors';
@@ -19,13 +17,13 @@ cli({
1917
strategy: Strategy.COOKIE,
2018
navigateBefore: false,
2119
args: [
22-
{ name: 'note-id', required: true, positional: true, help: 'Note ID or full URL (preserves xsec_token for access)' },
20+
{ name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
2321
],
2422
columns: ['field', 'value'],
2523
func: async (page, kwargs) => {
2624
const raw = String(kwargs['note-id']);
2725
const noteId = parseNoteId(raw);
28-
const url = buildNoteUrl(raw);
26+
const url = buildNoteUrl(raw, { commandName: 'xiaohongshu note' });
2927
await page.goto(url);
3028
await page.wait({ time: 2 + Math.random() * 3 });
3129
const data = await page.evaluate(`

0 commit comments

Comments
 (0)