Skip to content

Commit e7c58ca

Browse files
committed
feat(xiaohongshu): paginate creator-notes past the 10-row /analyze/list cap
The /api/galaxy/creator/datacenter/note/analyze/list endpoint serves 10 note rows per page, and the previous fetchCreatorNotesByApi only ever requested page 1 because the in-page direct fetch() bypassed xhs's signing interceptor and returned HTTP 406 for subsequent pages. As a result `opencli xiaohongshu creator-notes --limit 25` silently capped at 10 even for accounts with hundreds of notes. Install the same window.__xhsCapture fetch + XHR hook used by creator-note-detail (jackwener#1732), SPA-navigate to /statistics/data-analysis so the dashboard fires its own signed page_num=1 request under the hook, then click .d-pagination-page buttons for pages 2..N to make the dashboard's React router fire successive signed requests. Dedupe by note.id and return up to --limit. Pagination buttons render the page number duplicated in textContent ("22" for page 2 because of an inner accessibility span + visible span), so the click selector tolerates both the raw digit and the doubled form. CAPTURE_POLL_ATTEMPTS / CAPTURE_POLL_INTERVAL_S match the constant naming used by sibling delete-note.js. Fresh notes whose title field is still empty in the API response get enriched from the note-manager card DOM (which derives a title from the content's first line), so the pre-existing title coverage is preserved for the rows the API surfaces empty. Live-verified on benjamin-eecs's 圣诞薯 account (11 published notes, data-analysis permission active): creator-notes --limit 15 now returns all 11 rows, with 10 titles enriched via note-manager and 1 left empty because that note is older than note-manager's first 10 visible cards. For real-world use (e.g. @ppop123's reported 148-note account), all titles populate directly from the API. Closes jackwener#1729.
1 parent 3329a23 commit e7c58ca

1 file changed

Lines changed: 172 additions & 1 deletion

File tree

clis/xiaohongshu/creator-notes.js

Lines changed: 172 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ const DATE_LINE_RE = /^发布于 (\d{4}年\d{2}月\d{2}日 \d{2}:\d{2})$/;
1313
const METRIC_LINE_RE = /^\d+$/;
1414
const VISIBILITY_LINE_RE = /$/;
1515
const NOTE_ANALYZE_API_PATH = '/api/galaxy/creator/datacenter/note/analyze/list';
16+
const NOTE_ANALYZE_PAGE_SIZE = 10;
17+
const CAPTURE_POLL_ATTEMPTS = 20;
18+
const CAPTURE_POLL_INTERVAL_S = 0.5;
1619
const NOTE_DETAIL_PAGE_URL = 'https://creator.xiaohongshu.com/statistics/note-detail';
1720
const NOTE_ID_HTML_RE = /"noteId":"([0-9a-f]{24})"/g;
1821
function buildNoteDetailUrl(noteId) {
@@ -105,6 +108,171 @@ function mapAnalyzeItems(items) {
105108
url: buildNoteDetailUrl(item.id),
106109
}));
107110
}
111+
// Capture the dashboard's signed /api/galaxy/* responses on window.__xhsCapture
112+
// since a direct fetch() from page.evaluate bypasses the x-s signing and gets 406.
113+
async function installXhsFetchCaptureHook(page) {
114+
await page.evaluate(`(() => {
115+
window.__xhsCapture = {};
116+
if (window.__xhsCaptureInstalled) return;
117+
window.__xhsCaptureInstalled = true;
118+
const origFetch = window.fetch;
119+
window.fetch = async function(...args) {
120+
const resp = await origFetch.apply(this, args);
121+
try {
122+
const url = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
123+
if (url.includes('/api/galaxy/')) {
124+
resp.clone().text().then((body) => {
125+
try { window.__xhsCapture[url] = { status: resp.status, ok: resp.ok, body }; } catch (_) {}
126+
}).catch(() => {});
127+
}
128+
} catch (_) {}
129+
return resp;
130+
};
131+
const OrigXHR = window.XMLHttpRequest;
132+
function HookedXHR() {
133+
const xhr = new OrigXHR();
134+
const origOpen = xhr.open;
135+
let capturedUrl = '';
136+
xhr.open = function(method, url, ...rest) {
137+
capturedUrl = url;
138+
return origOpen.call(this, method, url, ...rest);
139+
};
140+
xhr.addEventListener('load', () => {
141+
try {
142+
if (capturedUrl.includes('/api/galaxy/')) {
143+
window.__xhsCapture[capturedUrl] = { status: xhr.status, ok: xhr.status >= 200 && xhr.status < 300, body: xhr.responseText };
144+
}
145+
} catch (_) {}
146+
});
147+
return xhr;
148+
}
149+
HookedXHR.prototype = OrigXHR.prototype;
150+
for (const key of ['UNSENT', 'OPENED', 'HEADERS_RECEIVED', 'LOADING', 'DONE']) {
151+
if (key in OrigXHR) HookedXHR[key] = OrigXHR[key];
152+
}
153+
window.XMLHttpRequest = HookedXHR;
154+
})()`);
155+
}
156+
function harvestAnalyzeListCaptures(captureMap) {
157+
const items = [];
158+
const seen = new Set();
159+
let total = 0;
160+
for (const [url, capture] of Object.entries(captureMap)) {
161+
if (!url.includes('/note/analyze/list')) continue;
162+
if (!capture?.ok) continue;
163+
try {
164+
const json = JSON.parse(capture.body);
165+
const data = json?.data ?? {};
166+
if (typeof data.total === 'number' && data.total > total) total = data.total;
167+
for (const note of data.note_infos ?? []) {
168+
if (!note?.id || seen.has(note.id)) continue;
169+
seen.add(note.id);
170+
items.push(note);
171+
}
172+
}
173+
catch { }
174+
}
175+
return { items, total };
176+
}
177+
async function pollCaptureMap(page) {
178+
let captureMap = {};
179+
for (let i = 0; i < CAPTURE_POLL_ATTEMPTS; i++) {
180+
await page.wait(CAPTURE_POLL_INTERVAL_S);
181+
const raw = await page.evaluate('JSON.stringify(window.__xhsCapture || {})');
182+
captureMap = typeof raw === 'string' ? JSON.parse(raw) : {};
183+
if (Object.keys(captureMap).some((url) => url.includes('/note/analyze/list'))) break;
184+
}
185+
return captureMap;
186+
}
187+
// Fresh-published notes return title: "" from /note/analyze/list (the field
188+
// only populates once xhs's backend has indexed the content). Scrape the
189+
// /new/note-manager card DOM as a secondary source so freshly-published
190+
// notes still get a title even before backend indexing catches up.
191+
async function fetchNoteManagerTitleMap(page, neededCount) {
192+
const map = new Map();
193+
try {
194+
await page.goto('https://creator.xiaohongshu.com/new/note-manager');
195+
await page.wait(2);
196+
// The note-manager renders 10 cards per scroll batch and lazy-loads more
197+
// on PageDown. Scroll enough batches to cover all caller-requested ids.
198+
const scrollBatches = Math.max(1, Math.ceil(neededCount / NOTE_ANALYZE_PAGE_SIZE) + 1);
199+
for (let i = 0; i < scrollBatches; i++) {
200+
const cards = await page.evaluate(`() => {
201+
const noteIdRe = /"noteId":"([0-9a-f]{24})"/;
202+
return Array.from(document.querySelectorAll('div.note[data-impression], div.note')).map((card) => {
203+
const impression = card.getAttribute('data-impression') || '';
204+
const id = impression.match(noteIdRe)?.[1] || '';
205+
const title = (card.querySelector('.title, .raw')?.innerText || '').trim();
206+
return { id, title };
207+
}).filter((entry) => entry.id && entry.title);
208+
}`);
209+
for (const card of Array.isArray(cards) ? cards : []) {
210+
if (!map.has(card.id)) map.set(card.id, card.title);
211+
}
212+
if (map.size >= neededCount) break;
213+
await page.pressKey('PageDown');
214+
await page.wait(1);
215+
}
216+
return map;
217+
}
218+
catch {
219+
return map;
220+
}
221+
}
222+
async function fetchCreatorNotesByCapture(page, limit) {
223+
// Land on dashboard root before installing the hook so the data-analysis
224+
// SPA navigation fires page_num=1's signed request UNDER the hook.
225+
await page.goto('https://creator.xiaohongshu.com/statistics');
226+
await installXhsFetchCaptureHook(page);
227+
await page.evaluate(`(() => {
228+
history.pushState({}, '', '/statistics/data-analysis?source=official');
229+
window.dispatchEvent(new PopStateEvent('popstate'));
230+
})()`);
231+
let captureMap = await pollCaptureMap(page);
232+
let { items, total } = harvestAnalyzeListCaptures(captureMap);
233+
if (items.length === 0) return [];
234+
const totalPages = total > 0 ? Math.ceil(total / NOTE_ANALYZE_PAGE_SIZE) : 1;
235+
const neededPages = Math.min(totalPages, Math.ceil(limit / NOTE_ANALYZE_PAGE_SIZE));
236+
for (let pageNum = 2; pageNum <= neededPages && items.length < limit; pageNum++) {
237+
const clicked = await page.evaluate(`(() => {
238+
const target = String(${pageNum});
239+
// .d-pagination-page renders the page number doubled (a visible span +
240+
// an accessibility span), so textContent for page 2 reads "22". Match
241+
// both the raw digit and the doubled form to tolerate either render.
242+
const btns = Array.from(document.querySelectorAll('.d-pagination-page'));
243+
const match = btns.find((btn) => {
244+
const text = (btn.textContent || '').trim();
245+
return text === target || text === target + target;
246+
});
247+
if (match) { match.click(); return true; }
248+
return false;
249+
})()`);
250+
if (!clicked) break;
251+
const before = items.length;
252+
for (let attempt = 0; attempt < CAPTURE_POLL_ATTEMPTS; attempt++) {
253+
await page.wait(CAPTURE_POLL_INTERVAL_S);
254+
const raw = await page.evaluate('JSON.stringify(window.__xhsCapture || {})');
255+
captureMap = typeof raw === 'string' ? JSON.parse(raw) : {};
256+
const harvested = harvestAnalyzeListCaptures(captureMap);
257+
if (harvested.items.length > before) {
258+
items = harvested.items;
259+
total = Math.max(total, harvested.total);
260+
break;
261+
}
262+
}
263+
}
264+
const notes = mapAnalyzeItems(items).slice(0, limit);
265+
const missingTitles = notes.filter((note) => !note.title).length;
266+
if (missingTitles > 0) {
267+
const titleMap = await fetchNoteManagerTitleMap(page, notes.length);
268+
for (const note of notes) {
269+
if (!note.title && note.id && titleMap.has(note.id)) {
270+
note.title = titleMap.get(note.id);
271+
}
272+
}
273+
}
274+
return notes;
275+
}
108276
async function fetchCreatorNotesByApi(page, limit) {
109277
const pageSize = Math.min(Math.max(limit, 10), 20);
110278
const maxPages = Math.max(1, Math.ceil(limit / pageSize));
@@ -148,7 +316,10 @@ async function fetchCreatorNotesByApi(page, limit) {
148316
return notes.slice(0, limit);
149317
}
150318
export async function fetchCreatorNotes(page, limit) {
151-
let notes = await fetchCreatorNotesByApi(page, limit);
319+
let notes = await fetchCreatorNotesByCapture(page, limit).catch(() => []);
320+
if (notes.length === 0) {
321+
notes = await fetchCreatorNotesByApi(page, limit);
322+
}
152323
if (notes.length === 0) {
153324
await page.goto('https://creator.xiaohongshu.com/new/note-manager');
154325
const maxPageDowns = Math.max(0, Math.ceil(limit / 10) + 1);

0 commit comments

Comments
 (0)