Skip to content

Commit 6b8d30b

Browse files
fix(xiaohongshu): hook dashboard fetch to capture signed datacenter/note/* responses (jackwener#1732)
* fix(xiaohongshu): hook dashboard fetch to capture signed datacenter/note/* responses The four /api/galaxy/creator/datacenter/note/* endpoints behind the creator-note-detail view require an x-s / x-t / x-s-common signing interceptor that the dashboard's own JS installs at page load. The previous in-page roundtrip called fetch() directly from page.evaluate, which bypasses the interceptor and gets HTTP 406, so 观看来源 / 观众画像 / 趋势数据 rows silently never landed even though the help string promised them. Instead of forging signatures, install a fetch + XHR capture hook on window.__xhsCapture, SPA-navigate to /statistics/note-detail via history.pushState + popstate (a hard page.goto would wipe the hook before the first auto-fetch fires), and harvest the dashboard's own signed responses out of the capture buffer. Also fix a 1-character endpoint name: /note/audience -> /note/audience/source. The old path returned 404 even when signed; the page actually fetches /note/audience/source for the 观看来源 panel. Confirmed against the live dashboard XHR list while logged in. Tests updated to mock the new install-hook + SPA-nav + poll-capture sequence at page.evaluate (the previous burst-wait-between-fetches assertion no longer applies). Closes jackwener#1728. Reporter diagnosis: @ppop123 traced the signing bypass + endpoint typo and verified the hook + SPA-nav workaround on 86 notes. * test(xiaohongshu): trim installXhsFetchCaptureHook comment to match sibling tone Sibling helper functions in creator-note-detail.js have no doc-comment block above the declaration; the 5-line WHY block on the new hook was out of style. Compress to two lines covering the same WHY (signed API bypass + 406) and let the rest of the context live in the commit body of the parent fix. * test(xiaohongshu): name the creator-note-detail poll bounds Inline literals (20 iteration cap, 0.5s wait) drift from sibling convention in clis/xiaohongshu/delete-note.js where the same kind of post-write polling is named VERIFY_TIMEOUT_MS / VERIFY_POLL_MS. Promote the two values to CAPTURE_POLL_ATTEMPTS / CAPTURE_POLL_INTERVAL_S so the loop reads against an explicit budget and future tuning lands in one place. * fix(xiaohongshu): address copilot review on creator-note-detail hook Two polish items from the Copilot review on jackwener#1732: - Buffer reset: window.__xhsCapture is now cleared on every install call so stale captures from a previous run on the same tab cannot leak into the current navigation's harvest. The wrapper-install guard moves to a separate __xhsCaptureInstalled flag so the fetch/XHR monkey-patches themselves are still installed exactly once per page lifetime. - XHR static constants: HookedXHR now copies the readyState constants (UNSENT / OPENED / HEADERS_RECEIVED / LOADING / DONE) from the original constructor so dashboard code that reads XMLHttpRequest.DONE etc against the constructor keeps working. * fix(xhs): tighten note detail capture matching --------- Co-authored-by: jackwener <jakevingoo@gmail.com>
1 parent e318522 commit 6b8d30b

2 files changed

Lines changed: 351 additions & 65 deletions

File tree

clis/xiaohongshu/creator-note-detail.js

Lines changed: 165 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* Requires: logged into creator.xiaohongshu.com in Chrome.
1010
*/
1111
import { cli, Strategy } from '@jackwener/opencli/registry';
12-
import { EmptyResultError } from '@jackwener/opencli/errors';
12+
import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
1313
const NOTE_DETAIL_DATETIME_RE = /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}$/;
1414
const NOTE_DETAIL_METRICS = [
1515
{ label: '曝光数', section: '基础数据' },
@@ -247,37 +247,170 @@ const DETAIL_API_ENDPOINTS = [
247247
{ suffix: '/api/galaxy/creator/datacenter/note/base', key: 'noteBase' },
248248
{ suffix: '/api/galaxy/creator/datacenter/note/analyze/audience/trend', key: 'audienceTrend' },
249249
{ suffix: '/api/galaxy/creator/datacenter/note/audience/source/detail', key: 'audienceSourceDetail' },
250-
{ suffix: '/api/galaxy/creator/datacenter/note/audience', key: 'audienceSource' },
250+
{ suffix: '/api/galaxy/creator/datacenter/note/audience/source', key: 'audienceSource' },
251251
];
252+
const CAPTURE_POLL_ATTEMPTS = 20;
253+
const CAPTURE_POLL_INTERVAL_S = 0.5;
254+
function detailApiEndpointForUrl(url) {
255+
if (!url)
256+
return null;
257+
try {
258+
const parsed = new URL(String(url), 'https://creator.xiaohongshu.com');
259+
return DETAIL_API_ENDPOINTS.find((endpoint) => parsed.pathname === endpoint.suffix) ?? null;
260+
}
261+
catch {
262+
return null;
263+
}
264+
}
265+
function findCapturedUrl(captureMap, suffix) {
266+
return Object.keys(captureMap).find((url) => detailApiEndpointForUrl(url)?.suffix === suffix);
267+
}
268+
function isPlainObject(value) {
269+
return value !== null && typeof value === 'object' && !Array.isArray(value);
270+
}
271+
function assertOptionalArray(payload, key, suffix) {
272+
if (key in payload && !Array.isArray(payload[key])) {
273+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned malformed ${key}`);
274+
}
275+
}
276+
function assertOptionalPlainObject(payload, key, suffix) {
277+
if (key in payload && !isPlainObject(payload[key])) {
278+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned malformed ${key}`);
279+
}
280+
}
281+
function validateCapturedPayload(payload, endpoint) {
282+
const suffix = endpoint.suffix;
283+
if (!isPlainObject(payload)) {
284+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned a malformed payload`);
285+
}
286+
if (endpoint.key === 'noteBase') {
287+
assertOptionalPlainObject(payload, 'hour', suffix);
288+
assertOptionalPlainObject(payload, 'day', suffix);
289+
}
290+
if (endpoint.key === 'audienceSource') {
291+
assertOptionalArray(payload, 'source', suffix);
292+
}
293+
if (endpoint.key === 'audienceSourceDetail') {
294+
for (const key of ['gender', 'age', 'city', 'interest']) {
295+
assertOptionalArray(payload, key, suffix);
296+
}
297+
}
298+
return payload;
299+
}
300+
function parseCapturedJson(capture, endpoint) {
301+
const suffix = endpoint.suffix;
302+
if (!capture || typeof capture !== 'object') {
303+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: malformed capture for ${suffix}`);
304+
}
305+
if (capture.ok !== true) {
306+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned HTTP ${capture.status ?? 'non-2xx'}`);
307+
}
308+
if (typeof capture.body !== 'string') {
309+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned a non-text body`);
310+
}
311+
try {
312+
const envelope = JSON.parse(capture.body);
313+
const payload = isPlainObject(envelope) && Object.hasOwn(envelope, 'data') ? envelope.data : envelope;
314+
return validateCapturedPayload(payload, endpoint);
315+
}
316+
catch {
317+
throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned invalid JSON or payload shape`);
318+
}
319+
}
320+
// Capture the dashboard's signed datacenter/note responses on window.__xhsCapture
321+
// since a direct fetch() from page.evaluate bypasses the x-s signing and gets 406.
322+
async function installXhsFetchCaptureHook(page) {
323+
await page.evaluate(`(() => {
324+
const targetPaths = ${JSON.stringify(DETAIL_API_ENDPOINTS.map((endpoint) => endpoint.suffix))};
325+
const shouldCapture = (url) => {
326+
try {
327+
return targetPaths.includes(new URL(String(url), window.location.origin).pathname);
328+
} catch (_) {
329+
return false;
330+
}
331+
};
332+
// Reset the buffer every call so stale captures from a previous run on
333+
// the same tab cannot leak into the current navigation's harvest.
334+
window.__xhsCapture = {};
335+
if (window.__xhsCaptureInstalled) return;
336+
window.__xhsCaptureInstalled = true;
337+
const origFetch = window.fetch;
338+
window.fetch = async function(...args) {
339+
const resp = await origFetch.apply(this, args);
340+
try {
341+
const url = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
342+
if (shouldCapture(url)) {
343+
resp.clone().text().then((body) => {
344+
try { window.__xhsCapture[url] = { status: resp.status, ok: resp.ok, body }; } catch (_) {}
345+
}).catch(() => {});
346+
}
347+
} catch (_) {}
348+
return resp;
349+
};
350+
const OrigXHR = window.XMLHttpRequest;
351+
function HookedXHR() {
352+
const xhr = new OrigXHR();
353+
const origOpen = xhr.open;
354+
let capturedUrl = '';
355+
xhr.open = function(method, url, ...rest) {
356+
capturedUrl = url;
357+
return origOpen.call(this, method, url, ...rest);
358+
};
359+
xhr.addEventListener('load', () => {
360+
try {
361+
if (shouldCapture(capturedUrl)) {
362+
window.__xhsCapture[capturedUrl] = { status: xhr.status, ok: xhr.status >= 200 && xhr.status < 300, body: xhr.responseText };
363+
}
364+
} catch (_) {}
365+
});
366+
return xhr;
367+
}
368+
HookedXHR.prototype = OrigXHR.prototype;
369+
// Preserve readyState constants (UNSENT / OPENED / HEADERS_RECEIVED / LOADING / DONE)
370+
// since dashboard code may read XMLHttpRequest.DONE etc against the constructor.
371+
for (const key of ['UNSENT', 'OPENED', 'HEADERS_RECEIVED', 'LOADING', 'DONE']) {
372+
if (key in OrigXHR) HookedXHR[key] = OrigXHR[key];
373+
}
374+
window.XMLHttpRequest = HookedXHR;
375+
})()`);
376+
}
252377
async function captureNoteDetailPayload(page, noteId) {
253-
const payload = {};
254-
let captured = 0;
255-
// Try to fetch each API endpoint through the page context (uses the browser's cookies)
256-
for (const { suffix, key } of DETAIL_API_ENDPOINTS) {
257-
await page.wait({ time: 0.5 + Math.random() });
258-
const apiUrl = `${suffix}?note_id=${noteId}`;
378+
await installXhsFetchCaptureHook(page);
379+
// SPA-navigate inside the dashboard so the React router re-fires the
380+
// signed datacenter/note/* requests under our hook. A second page.goto
381+
// would wipe the hook before the first auto-fetch can land.
382+
await page.evaluate(`(() => {
383+
const target = '/statistics/note-detail?noteId=' + ${JSON.stringify(noteId)};
384+
history.pushState({}, '', target);
385+
window.dispatchEvent(new PopStateEvent('popstate'));
386+
})()`);
387+
const wantedSuffixes = DETAIL_API_ENDPOINTS.map((endpoint) => endpoint.suffix);
388+
let captureMap = {};
389+
for (let i = 0; i < CAPTURE_POLL_ATTEMPTS; i++) {
390+
await page.wait(CAPTURE_POLL_INTERVAL_S);
391+
let raw;
259392
try {
260-
const data = await page.evaluate(`
261-
async () => {
262-
try {
263-
const resp = await fetch(${JSON.stringify(apiUrl)}, { credentials: 'include' });
264-
if (!resp.ok) return null;
265-
const json = await resp.json();
266-
return JSON.stringify(json.data ?? {});
267-
} catch { return null; }
393+
raw = await page.evaluate('JSON.stringify(window.__xhsCapture || {})');
394+
captureMap = typeof raw === 'string' ? JSON.parse(raw) : {};
395+
}
396+
catch {
397+
throw new CommandExecutionError('xiaohongshu creator-note-detail: failed to read signed datacenter/note capture buffer');
268398
}
269-
`);
270-
if (data && typeof data === 'string') {
271-
try {
272-
payload[key] = JSON.parse(data);
273-
captured++;
274-
}
275-
catch { }
276-
}
399+
if (!captureMap || typeof captureMap !== 'object' || Array.isArray(captureMap)) {
400+
throw new CommandExecutionError('xiaohongshu creator-note-detail: malformed signed datacenter/note capture buffer');
277401
}
278-
catch { }
402+
const captured = wantedSuffixes.filter((suffix) => findCapturedUrl(captureMap, suffix));
403+
if (captured.length === wantedSuffixes.length)
404+
break;
405+
}
406+
const payload = {};
407+
for (const endpoint of DETAIL_API_ENDPOINTS) {
408+
const matchUrl = findCapturedUrl(captureMap, endpoint.suffix);
409+
if (!matchUrl)
410+
continue;
411+
payload[endpoint.key] = parseCapturedJson(captureMap[matchUrl], endpoint);
279412
}
280-
return captured > 0 ? payload : null;
413+
return Object.keys(payload).length > 0 ? payload : null;
281414
}
282415
async function captureNoteDetailDomData(page) {
283416
const result = await page.evaluate(`() => {
@@ -308,14 +441,18 @@ async function captureNoteDetailDomData(page) {
308441
return result;
309442
}
310443
export async function fetchCreatorNoteDetailRows(page, noteId) {
311-
await page.goto(`https://creator.xiaohongshu.com/statistics/note-detail?noteId=${encodeURIComponent(noteId)}`);
444+
// Land on the dashboard root first so the React app boots before the
445+
// note-specific signed APIs fire. captureNoteDetailPayload then installs
446+
// the fetch+XHR hook and SPA-navigates to /statistics/note-detail under
447+
// it, which is what surfaces the audience / trend rows.
448+
await page.goto('https://creator.xiaohongshu.com/statistics');
449+
const apiPayload = await captureNoteDetailPayload(page, noteId);
312450
const domData = await captureNoteDetailDomData(page).catch(() => null);
313451
let rows = parseCreatorNoteDetailDomData(domData, noteId);
314452
if (rows.length === 0) {
315453
const bodyText = await page.evaluate('() => document.body.innerText');
316454
rows = parseCreatorNoteDetailText(typeof bodyText === 'string' ? bodyText : '', noteId);
317455
}
318-
const apiPayload = await captureNoteDetailPayload(page, noteId).catch(() => null);
319456
appendTrendRows(rows, apiPayload ?? undefined);
320457
appendAudienceRows(rows, apiPayload ?? undefined);
321458
return rows;

0 commit comments

Comments
 (0)