Skip to content

Commit b02e7e3

Browse files
committed
fix(xhs): tighten note detail capture matching
1 parent 85603ab commit b02e7e3

2 files changed

Lines changed: 230 additions & 19 deletions

File tree

clis/xiaohongshu/creator-note-detail.js

Lines changed: 94 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* Requires: logged into creator.xiaohongshu.com in Chrome.
1010
*/
1111
import { cli, Strategy } from '@jackwener/opencli/registry';
12-
import { EmptyResultError } from '@jackwener/opencli/errors';
12+
import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
1313
const NOTE_DETAIL_DATETIME_RE = /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}$/;
1414
const NOTE_DETAIL_METRICS = [
1515
{ label: '曝光数', section: '基础数据' },
@@ -251,10 +251,84 @@ const DETAIL_API_ENDPOINTS = [
251251
];
252252
const CAPTURE_POLL_ATTEMPTS = 20;
253253
const CAPTURE_POLL_INTERVAL_S = 0.5;
254-
// Capture the dashboard's signed /api/galaxy/* responses on window.__xhsCapture
254+
function detailApiEndpointForUrl(url) {
255+
if (!url)
256+
return null;
257+
try {
258+
const parsed = new URL(String(url), 'https://creator.xiaohongshu.com');
259+
return DETAIL_API_ENDPOINTS.find((endpoint) => parsed.pathname === endpoint.suffix) ?? null;
260+
}
261+
catch {
262+
return null;
263+
}
264+
}
265+
function findCapturedUrl(captureMap, suffix) {
266+
return Object.keys(captureMap).find((url) => detailApiEndpointForUrl(url)?.suffix === suffix);
267+
}
268+
function isPlainObject(value) {
269+
return value !== null && typeof value === 'object' && !Array.isArray(value);
270+
}
271+
function assertOptionalArray(payload, key, suffix) {
272+
if (key in payload && !Array.isArray(payload[key])) {
273+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned malformed ${key}`);
274+
}
275+
}
276+
function assertOptionalPlainObject(payload, key, suffix) {
277+
if (key in payload && !isPlainObject(payload[key])) {
278+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned malformed ${key}`);
279+
}
280+
}
281+
function validateCapturedPayload(payload, endpoint) {
282+
const suffix = endpoint.suffix;
283+
if (!isPlainObject(payload)) {
284+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned a malformed payload`);
285+
}
286+
if (endpoint.key === 'noteBase') {
287+
assertOptionalPlainObject(payload, 'hour', suffix);
288+
assertOptionalPlainObject(payload, 'day', suffix);
289+
}
290+
if (endpoint.key === 'audienceSource') {
291+
assertOptionalArray(payload, 'source', suffix);
292+
}
293+
if (endpoint.key === 'audienceSourceDetail') {
294+
for (const key of ['gender', 'age', 'city', 'interest']) {
295+
assertOptionalArray(payload, key, suffix);
296+
}
297+
}
298+
return payload;
299+
}
300+
function parseCapturedJson(capture, endpoint) {
301+
const suffix = endpoint.suffix;
302+
if (!capture || typeof capture !== 'object') {
303+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: malformed capture for ${suffix}`);
304+
}
305+
if (capture.ok !== true) {
306+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned HTTP ${capture.status ?? 'non-2xx'}`);
307+
}
308+
if (typeof capture.body !== 'string') {
309+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned a non-text body`);
310+
}
311+
try {
312+
const envelope = JSON.parse(capture.body);
313+
const payload = isPlainObject(envelope) && Object.hasOwn(envelope, 'data') ? envelope.data : envelope;
314+
return validateCapturedPayload(payload, endpoint);
315+
}
316+
catch {
317+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned invalid JSON or payload shape`);
318+
}
319+
}
320+
// Capture the dashboard's signed datacenter/note responses on window.__xhsCapture
255321
// since a direct fetch() from page.evaluate bypasses the x-s signing and gets 406.
256322
async function installXhsFetchCaptureHook(page) {
257323
await page.evaluate(`(() => {
324+
const targetPaths = ${JSON.stringify(DETAIL_API_ENDPOINTS.map((endpoint) => endpoint.suffix))};
325+
const shouldCapture = (url) => {
326+
try {
327+
return targetPaths.includes(new URL(String(url), window.location.origin).pathname);
328+
} catch (_) {
329+
return false;
330+
}
331+
};
258332
// Reset the buffer every call so stale captures from a previous run on
259333
// the same tab cannot leak into the current navigation's harvest.
260334
window.__xhsCapture = {};
@@ -265,7 +339,7 @@ async function installXhsFetchCaptureHook(page) {
265339
const resp = await origFetch.apply(this, args);
266340
try {
267341
const url = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
268-
if (url.includes('/api/galaxy/')) {
342+
if (shouldCapture(url)) {
269343
resp.clone().text().then((body) => {
270344
try { window.__xhsCapture[url] = { status: resp.status, ok: resp.ok, body }; } catch (_) {}
271345
}).catch(() => {});
@@ -284,7 +358,7 @@ async function installXhsFetchCaptureHook(page) {
284358
};
285359
xhr.addEventListener('load', () => {
286360
try {
287-
if (capturedUrl.includes('/api/galaxy/')) {
361+
if (shouldCapture(capturedUrl)) {
288362
window.__xhsCapture[capturedUrl] = { status: xhr.status, ok: xhr.status >= 200 && xhr.status < 300, body: xhr.responseText };
289363
}
290364
} catch (_) {}
@@ -314,25 +388,27 @@ async function captureNoteDetailPayload(page, noteId) {
314388
let captureMap = {};
315389
for (let i = 0; i < CAPTURE_POLL_ATTEMPTS; i++) {
316390
await page.wait(CAPTURE_POLL_INTERVAL_S);
317-
const raw = await page.evaluate('JSON.stringify(window.__xhsCapture || {})');
318-
captureMap = typeof raw === 'string' ? JSON.parse(raw) : {};
319-
const captured = wantedSuffixes.filter((suffix) => Object.keys(captureMap).some((url) => url.includes(suffix)));
391+
let raw;
392+
try {
393+
raw = await page.evaluate('JSON.stringify(window.__xhsCapture || {})');
394+
captureMap = typeof raw === 'string' ? JSON.parse(raw) : {};
395+
}
396+
catch {
397+
throw new CommandExecutionError('xiaohongshu creator-note-detail: failed to read signed datacenter/note capture buffer');
398+
}
399+
if (!captureMap || typeof captureMap !== 'object' || Array.isArray(captureMap)) {
400+
throw new CommandExecutionError('xiaohongshu creator-note-detail: malformed signed datacenter/note capture buffer');
401+
}
402+
const captured = wantedSuffixes.filter((suffix) => findCapturedUrl(captureMap, suffix));
320403
if (captured.length === wantedSuffixes.length)
321404
break;
322405
}
323406
const payload = {};
324-
for (const { suffix, key } of DETAIL_API_ENDPOINTS) {
325-
const matchUrl = Object.keys(captureMap).find((url) => url.includes(suffix));
407+
for (const endpoint of DETAIL_API_ENDPOINTS) {
408+
const matchUrl = findCapturedUrl(captureMap, endpoint.suffix);
326409
if (!matchUrl)
327410
continue;
328-
const capture = captureMap[matchUrl];
329-
if (!capture || !capture.ok)
330-
continue;
331-
try {
332-
const json = JSON.parse(capture.body);
333-
payload[key] = json.data ?? json;
334-
}
335-
catch { }
411+
payload[endpoint.key] = parseCapturedJson(captureMap[matchUrl], endpoint);
336412
}
337413
return Object.keys(payload).length > 0 ? payload : null;
338414
}
@@ -370,7 +446,7 @@ export async function fetchCreatorNoteDetailRows(page, noteId) {
370446
// the fetch+XHR hook and SPA-navigates to /statistics/note-detail under
371447
// it, which is what surfaces the audience / trend rows.
372448
await page.goto('https://creator.xiaohongshu.com/statistics');
373-
const apiPayload = await captureNoteDetailPayload(page, noteId).catch(() => null);
449+
const apiPayload = await captureNoteDetailPayload(page, noteId);
374450
const domData = await captureNoteDetailDomData(page).catch(() => null);
375451
let rows = parseCreatorNoteDetailDomData(domData, noteId);
376452
if (rows.length === 0) {

clis/xiaohongshu/creator-note-detail.test.js

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, expect, it, vi } from 'vitest';
2-
import { EmptyResultError } from '@jackwener/opencli/errors';
2+
import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
33
import { getRegistry } from '@jackwener/opencli/registry';
44
import { appendAudienceRows, appendTrendRows, parseCreatorNoteDetailDomData, parseCreatorNoteDetailText } from './creator-note-detail.js';
55
import './creator-note-detail.js';
@@ -303,6 +303,141 @@ describe('xiaohongshu creator-note-detail', () => {
303303
const captureProbeCalls = page.evaluate.mock.calls.filter(([script]) => String(script).includes('JSON.stringify(window.__xhsCapture'));
304304
expect(captureProbeCalls.length).toBeGreaterThanOrEqual(1);
305305
});
306+
it('matches signed API captures by exact pathname so source/detail cannot shadow source', async () => {
307+
const cmd = getRegistry().get('xiaohongshu/creator-note-detail');
308+
const domData = {
309+
title: '示例笔记',
310+
infoText: '示例笔记\n2026-03-19 12:00\n切换笔记',
311+
sections: [
312+
{
313+
title: '基础数据',
314+
metrics: [
315+
{ label: '曝光数', value: '100', extra: '粉丝占比 10%' },
316+
],
317+
},
318+
],
319+
};
320+
const detailCapture = [
321+
'https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/audience/source/detail?note_id=demo-note-id',
322+
{
323+
status: 200,
324+
ok: true,
325+
body: JSON.stringify({ data: { gender: [{ title: '女性', value: 64 }] } }),
326+
},
327+
];
328+
const sourceCapture = [
329+
'https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/audience/source?note_id=demo-note-id',
330+
{
331+
status: 200,
332+
ok: true,
333+
body: JSON.stringify({
334+
data: {
335+
source: [
336+
{
337+
title: '首页推荐',
338+
value_with_double: 88.8,
339+
info: { imp_count: 1000, view_count: 400, interaction_count: 20 },
340+
},
341+
],
342+
},
343+
}),
344+
},
345+
];
346+
const baseCapture = [
347+
'https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/base?note_id=demo-note-id',
348+
{
349+
status: 200,
350+
ok: true,
351+
body: JSON.stringify({ data: { hour: { view_list: [{ date: new Date('2026-03-19T12:00:00+08:00').getTime(), count: 7 }] } } }),
352+
},
353+
];
354+
const trendCapture = [
355+
'https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/analyze/audience/trend?note_id=demo-note-id',
356+
{
357+
status: 200,
358+
ok: true,
359+
body: JSON.stringify({ data: { no_data: false, no_data_tip_msg: '趋势可用' } }),
360+
},
361+
];
362+
for (const orderedCaptures of [
363+
[detailCapture, sourceCapture, baseCapture, trendCapture],
364+
[sourceCapture, detailCapture, baseCapture, trendCapture],
365+
]) {
366+
const captureMap = Object.fromEntries(orderedCaptures);
367+
const page = createPageMock(undefined);
368+
page.evaluate = vi.fn(async (script) => {
369+
const s = String(script);
370+
if (s.includes('window.__xhsCapture =')) return undefined;
371+
if (s.includes('history.pushState')) return undefined;
372+
if (s.includes('JSON.stringify(window.__xhsCapture')) return JSON.stringify(captureMap);
373+
if (s.includes("document.querySelector('.note-title')")) return domData;
374+
return undefined;
375+
});
376+
const result = await cmd.func(page, { 'note-id': 'demo-note-id' });
377+
expect(result).toEqual(expect.arrayContaining([
378+
{ section: '观看来源', metric: '首页推荐', value: '88.8%', extra: '曝光 1000 · 观看 400 · 互动 20' },
379+
{ section: '观众画像', metric: '性别/女性', value: '64%', extra: '' },
380+
{ section: '趋势数据', metric: '按小时/观看数', value: '1 points', extra: '03-19 12:00=7' },
381+
]));
382+
}
383+
});
384+
it('throws a typed error when a captured signed API returns non-2xx', async () => {
385+
const cmd = getRegistry().get('xiaohongshu/creator-note-detail');
386+
const captureMap = {
387+
'https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/base?note_id=demo-note-id': {
388+
status: 406,
389+
ok: false,
390+
body: '{"msg":"not acceptable"}',
391+
},
392+
};
393+
const page = createPageMock(undefined);
394+
page.evaluate = vi.fn(async (script) => {
395+
const s = String(script);
396+
if (s.includes('window.__xhsCapture =')) return undefined;
397+
if (s.includes('history.pushState')) return undefined;
398+
if (s.includes('JSON.stringify(window.__xhsCapture')) return JSON.stringify(captureMap);
399+
return null;
400+
});
401+
await expect(cmd.func(page, { 'note-id': 'demo-note-id' })).rejects.toBeInstanceOf(CommandExecutionError);
402+
});
403+
it('throws a typed error for wrong-shaped signed API payloads instead of falling back to DOM rows', async () => {
404+
const cmd = getRegistry().get('xiaohongshu/creator-note-detail');
405+
const domData = {
406+
title: '示例笔记',
407+
infoText: '示例笔记\n2026-03-19 12:00\n切换笔记',
408+
sections: [
409+
{
410+
title: '基础数据',
411+
metrics: [
412+
{ label: '曝光数', value: '100', extra: '粉丝占比 10%' },
413+
],
414+
},
415+
],
416+
};
417+
for (const body of [
418+
JSON.stringify({ data: null }),
419+
JSON.stringify({ data: [] }),
420+
JSON.stringify({ data: { source: {} } }),
421+
]) {
422+
const captureMap = {
423+
'https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/audience/source?note_id=demo-note-id': {
424+
status: 200,
425+
ok: true,
426+
body,
427+
},
428+
};
429+
const page = createPageMock(undefined);
430+
page.evaluate = vi.fn(async (script) => {
431+
const s = String(script);
432+
if (s.includes('window.__xhsCapture =')) return undefined;
433+
if (s.includes('history.pushState')) return undefined;
434+
if (s.includes('JSON.stringify(window.__xhsCapture')) return JSON.stringify(captureMap);
435+
if (s.includes("document.querySelector('.note-title')")) return domData;
436+
return null;
437+
});
438+
await expect(cmd.func(page, { 'note-id': 'demo-note-id' })).rejects.toBeInstanceOf(CommandExecutionError);
439+
}
440+
});
306441
it('throws EmptyResultError when the detail page exposes no metrics', async () => {
307442
const cmd = getRegistry().get('xiaohongshu/creator-note-detail');
308443
const page = createPageMock(undefined);

0 commit comments

Comments
 (0)