|
9 | 9 | * Requires: logged into creator.xiaohongshu.com in Chrome. |
10 | 10 | */ |
11 | 11 | import { cli, Strategy } from '@jackwener/opencli/registry'; |
12 | | -import { EmptyResultError } from '@jackwener/opencli/errors'; |
| 12 | +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; |
13 | 13 | const NOTE_DETAIL_DATETIME_RE = /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}$/; |
14 | 14 | const NOTE_DETAIL_METRICS = [ |
15 | 15 | { label: '曝光数', section: '基础数据' }, |
@@ -247,37 +247,170 @@ const DETAIL_API_ENDPOINTS = [ |
247 | 247 | { suffix: '/api/galaxy/creator/datacenter/note/base', key: 'noteBase' }, |
248 | 248 | { suffix: '/api/galaxy/creator/datacenter/note/analyze/audience/trend', key: 'audienceTrend' }, |
249 | 249 | { suffix: '/api/galaxy/creator/datacenter/note/audience/source/detail', key: 'audienceSourceDetail' }, |
250 | | - { suffix: '/api/galaxy/creator/datacenter/note/audience', key: 'audienceSource' }, |
| 250 | + { suffix: '/api/galaxy/creator/datacenter/note/audience/source', key: 'audienceSource' }, |
251 | 251 | ]; |
| 252 | +const CAPTURE_POLL_ATTEMPTS = 20; |
| 253 | +const CAPTURE_POLL_INTERVAL_S = 0.5; |
| 254 | +function detailApiEndpointForUrl(url) { |
| 255 | + if (!url) |
| 256 | + return null; |
| 257 | + try { |
| 258 | + const parsed = new URL(String(url), 'https://creator.xiaohongshu.com'); |
| 259 | + return DETAIL_API_ENDPOINTS.find((endpoint) => parsed.pathname === endpoint.suffix) ?? null; |
| 260 | + } |
| 261 | + catch { |
| 262 | + return null; |
| 263 | + } |
| 264 | +} |
| 265 | +function findCapturedUrl(captureMap, suffix) { |
| 266 | + return Object.keys(captureMap).find((url) => detailApiEndpointForUrl(url)?.suffix === suffix); |
| 267 | +} |
| 268 | +function isPlainObject(value) { |
| 269 | + return value !== null && typeof value === 'object' && !Array.isArray(value); |
| 270 | +} |
| 271 | +function assertOptionalArray(payload, key, suffix) { |
| 272 | + if (key in payload && !Array.isArray(payload[key])) { |
| 273 | + throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned malformed ${key}`); |
| 274 | + } |
| 275 | +} |
| 276 | +function assertOptionalPlainObject(payload, key, suffix) { |
| 277 | + if (key in payload && !isPlainObject(payload[key])) { |
| 278 | + throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned malformed ${key}`); |
| 279 | + } |
| 280 | +} |
| 281 | +function validateCapturedPayload(payload, endpoint) { |
| 282 | + const suffix = endpoint.suffix; |
| 283 | + if (!isPlainObject(payload)) { |
| 284 | + throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned a malformed payload`); |
| 285 | + } |
| 286 | + if (endpoint.key === 'noteBase') { |
| 287 | + assertOptionalPlainObject(payload, 'hour', suffix); |
| 288 | + assertOptionalPlainObject(payload, 'day', suffix); |
| 289 | + } |
| 290 | + if (endpoint.key === 'audienceSource') { |
| 291 | + assertOptionalArray(payload, 'source', suffix); |
| 292 | + } |
| 293 | + if (endpoint.key === 'audienceSourceDetail') { |
| 294 | + for (const key of ['gender', 'age', 'city', 'interest']) { |
| 295 | + assertOptionalArray(payload, key, suffix); |
| 296 | + } |
| 297 | + } |
| 298 | + return payload; |
| 299 | +} |
| 300 | +function parseCapturedJson(capture, endpoint) { |
| 301 | + const suffix = endpoint.suffix; |
| 302 | + if (!capture || typeof capture !== 'object') { |
| 303 | + throw new CommandExecutionError(`xiaohongshu creator-note-detail: malformed capture for ${suffix}`); |
| 304 | + } |
| 305 | + if (capture.ok !== true) { |
| 306 | + throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned HTTP ${capture.status ?? 'non-2xx'}`); |
| 307 | + } |
| 308 | + if (typeof capture.body !== 'string') { |
| 309 | + throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned a non-text body`); |
| 310 | + } |
| 311 | + try { |
| 312 | + const envelope = JSON.parse(capture.body); |
| 313 | + const payload = isPlainObject(envelope) && Object.hasOwn(envelope, 'data') ? envelope.data : envelope; |
| 314 | + return validateCapturedPayload(payload, endpoint); |
| 315 | + } |
| 316 | + catch { |
| 317 | + throw new CommandExecutionError(`xiaohongshu creator-note-detail: signed API ${suffix} returned invalid JSON or payload shape`); |
| 318 | + } |
| 319 | +} |
| 320 | +// Capture the dashboard's signed datacenter/note responses on window.__xhsCapture |
| 321 | +// since a direct fetch() from page.evaluate bypasses the x-s signing and gets 406. |
| 322 | +async function installXhsFetchCaptureHook(page) { |
| 323 | + await page.evaluate(`(() => { |
| 324 | + const targetPaths = ${JSON.stringify(DETAIL_API_ENDPOINTS.map((endpoint) => endpoint.suffix))}; |
| 325 | + const shouldCapture = (url) => { |
| 326 | + try { |
| 327 | + return targetPaths.includes(new URL(String(url), window.location.origin).pathname); |
| 328 | + } catch (_) { |
| 329 | + return false; |
| 330 | + } |
| 331 | + }; |
| 332 | + // Reset the buffer every call so stale captures from a previous run on |
| 333 | + // the same tab cannot leak into the current navigation's harvest. |
| 334 | + window.__xhsCapture = {}; |
| 335 | + if (window.__xhsCaptureInstalled) return; |
| 336 | + window.__xhsCaptureInstalled = true; |
| 337 | + const origFetch = window.fetch; |
| 338 | + window.fetch = async function(...args) { |
| 339 | + const resp = await origFetch.apply(this, args); |
| 340 | + try { |
| 341 | + const url = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || ''; |
| 342 | + if (shouldCapture(url)) { |
| 343 | + resp.clone().text().then((body) => { |
| 344 | + try { window.__xhsCapture[url] = { status: resp.status, ok: resp.ok, body }; } catch (_) {} |
| 345 | + }).catch(() => {}); |
| 346 | + } |
| 347 | + } catch (_) {} |
| 348 | + return resp; |
| 349 | + }; |
| 350 | + const OrigXHR = window.XMLHttpRequest; |
| 351 | + function HookedXHR() { |
| 352 | + const xhr = new OrigXHR(); |
| 353 | + const origOpen = xhr.open; |
| 354 | + let capturedUrl = ''; |
| 355 | + xhr.open = function(method, url, ...rest) { |
| 356 | + capturedUrl = url; |
| 357 | + return origOpen.call(this, method, url, ...rest); |
| 358 | + }; |
| 359 | + xhr.addEventListener('load', () => { |
| 360 | + try { |
| 361 | + if (shouldCapture(capturedUrl)) { |
| 362 | + window.__xhsCapture[capturedUrl] = { status: xhr.status, ok: xhr.status >= 200 && xhr.status < 300, body: xhr.responseText }; |
| 363 | + } |
| 364 | + } catch (_) {} |
| 365 | + }); |
| 366 | + return xhr; |
| 367 | + } |
| 368 | + HookedXHR.prototype = OrigXHR.prototype; |
| 369 | + // Preserve readyState constants (UNSENT / OPENED / HEADERS_RECEIVED / LOADING / DONE) |
| 370 | + // since dashboard code may read XMLHttpRequest.DONE etc against the constructor. |
| 371 | + for (const key of ['UNSENT', 'OPENED', 'HEADERS_RECEIVED', 'LOADING', 'DONE']) { |
| 372 | + if (key in OrigXHR) HookedXHR[key] = OrigXHR[key]; |
| 373 | + } |
| 374 | + window.XMLHttpRequest = HookedXHR; |
| 375 | + })()`); |
| 376 | +} |
252 | 377 | async function captureNoteDetailPayload(page, noteId) { |
253 | | - const payload = {}; |
254 | | - let captured = 0; |
255 | | - // Try to fetch each API endpoint through the page context (uses the browser's cookies) |
256 | | - for (const { suffix, key } of DETAIL_API_ENDPOINTS) { |
257 | | - await page.wait({ time: 0.5 + Math.random() }); |
258 | | - const apiUrl = `${suffix}?note_id=${noteId}`; |
| 378 | + await installXhsFetchCaptureHook(page); |
| 379 | + // SPA-navigate inside the dashboard so the React router re-fires the |
| 380 | + // signed datacenter/note/* requests under our hook. A second page.goto |
| 381 | + // would wipe the hook before the first auto-fetch can land. |
| 382 | + await page.evaluate(`(() => { |
| 383 | + const target = '/statistics/note-detail?noteId=' + ${JSON.stringify(noteId)}; |
| 384 | + history.pushState({}, '', target); |
| 385 | + window.dispatchEvent(new PopStateEvent('popstate')); |
| 386 | + })()`); |
| 387 | + const wantedSuffixes = DETAIL_API_ENDPOINTS.map((endpoint) => endpoint.suffix); |
| 388 | + let captureMap = {}; |
| 389 | + for (let i = 0; i < CAPTURE_POLL_ATTEMPTS; i++) { |
| 390 | + await page.wait(CAPTURE_POLL_INTERVAL_S); |
| 391 | + let raw; |
259 | 392 | try { |
260 | | - const data = await page.evaluate(` |
261 | | - async () => { |
262 | | - try { |
263 | | - const resp = await fetch(${JSON.stringify(apiUrl)}, { credentials: 'include' }); |
264 | | - if (!resp.ok) return null; |
265 | | - const json = await resp.json(); |
266 | | - return JSON.stringify(json.data ?? {}); |
267 | | - } catch { return null; } |
| 393 | + raw = await page.evaluate('JSON.stringify(window.__xhsCapture || {})'); |
| 394 | + captureMap = typeof raw === 'string' ? JSON.parse(raw) : {}; |
| 395 | + } |
| 396 | + catch { |
| 397 | + throw new CommandExecutionError('xiaohongshu creator-note-detail: failed to read signed datacenter/note capture buffer'); |
268 | 398 | } |
269 | | - `); |
270 | | - if (data && typeof data === 'string') { |
271 | | - try { |
272 | | - payload[key] = JSON.parse(data); |
273 | | - captured++; |
274 | | - } |
275 | | - catch { } |
276 | | - } |
| 399 | + if (!captureMap || typeof captureMap !== 'object' || Array.isArray(captureMap)) { |
| 400 | + throw new CommandExecutionError('xiaohongshu creator-note-detail: malformed signed datacenter/note capture buffer'); |
277 | 401 | } |
278 | | - catch { } |
| 402 | + const captured = wantedSuffixes.filter((suffix) => findCapturedUrl(captureMap, suffix)); |
| 403 | + if (captured.length === wantedSuffixes.length) |
| 404 | + break; |
| 405 | + } |
| 406 | + const payload = {}; |
| 407 | + for (const endpoint of DETAIL_API_ENDPOINTS) { |
| 408 | + const matchUrl = findCapturedUrl(captureMap, endpoint.suffix); |
| 409 | + if (!matchUrl) |
| 410 | + continue; |
| 411 | + payload[endpoint.key] = parseCapturedJson(captureMap[matchUrl], endpoint); |
279 | 412 | } |
280 | | - return captured > 0 ? payload : null; |
| 413 | + return Object.keys(payload).length > 0 ? payload : null; |
281 | 414 | } |
282 | 415 | async function captureNoteDetailDomData(page) { |
283 | 416 | const result = await page.evaluate(`() => { |
@@ -308,14 +441,18 @@ async function captureNoteDetailDomData(page) { |
308 | 441 | return result; |
309 | 442 | } |
310 | 443 | export async function fetchCreatorNoteDetailRows(page, noteId) { |
311 | | - await page.goto(`https://creator.xiaohongshu.com/statistics/note-detail?noteId=${encodeURIComponent(noteId)}`); |
| 444 | + // Land on the dashboard root first so the React app boots before the |
| 445 | + // note-specific signed APIs fire. captureNoteDetailPayload then installs |
| 446 | + // the fetch+XHR hook and SPA-navigates to /statistics/note-detail under |
| 447 | + // it, which is what surfaces the audience / trend rows. |
| 448 | + await page.goto('https://creator.xiaohongshu.com/statistics'); |
| 449 | + const apiPayload = await captureNoteDetailPayload(page, noteId); |
312 | 450 | const domData = await captureNoteDetailDomData(page).catch(() => null); |
313 | 451 | let rows = parseCreatorNoteDetailDomData(domData, noteId); |
314 | 452 | if (rows.length === 0) { |
315 | 453 | const bodyText = await page.evaluate('() => document.body.innerText'); |
316 | 454 | rows = parseCreatorNoteDetailText(typeof bodyText === 'string' ? bodyText : '', noteId); |
317 | 455 | } |
318 | | - const apiPayload = await captureNoteDetailPayload(page, noteId).catch(() => null); |
319 | 456 | appendTrendRows(rows, apiPayload ?? undefined); |
320 | 457 | appendAudienceRows(rows, apiPayload ?? undefined); |
321 | 458 | return rows; |
|
0 commit comments