Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions cli-manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -34317,6 +34317,17 @@
"default": 20,
"required": false,
"help": "Number of results"
},
{
"name": "sort",
"type": "string",
"default": "general",
"required": false,
"help": "Sort order: general | latest",
"choices": [
"general",
"latest"
]
}
],
"columns": [
Expand Down
2 changes: 1 addition & 1 deletion clis/rednote/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ cli({
.map((item, i) => ({
rank: i + 1,
...item,
published_at: noteIdToDate(item.url),
published_at: item.published_at || noteIdToDate(item.url),
}));
},
});
116 changes: 114 additions & 2 deletions clis/xiaohongshu/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ export function stripXhsAuthorDateSuffix(value) {
const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim();
return stripped || text;
}
export function extractXhsPublishText(value) {
const text = (value || '').replace(/\s+/g, ' ').trim();
const match = text.match(/(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天(?:\s+\d{1,2}:\d{2})?|前天(?:\s+\d{1,2}:\d{2})?|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u);
return match ? match[0] : '';
}
/**
* `page.evaluate` may return either the raw IIFE value or a
* `{ session, data }` envelope depending on the browser-bridge version.
Expand All @@ -80,6 +85,43 @@ function requireSearchRows(payload, phase) {
}
return rows;
}
function requireSortOptionIndex(payload) {
const result = unwrapEvaluateResult(payload);
if (!result || typeof result !== 'object' || result.ok !== true) {
const reason = result && typeof result === 'object' && 'reason' in result ? result.reason : 'unknown';
throw new CommandExecutionError(`Xiaohongshu search could not apply --sort latest (${reason}).`);
}
if (!Number.isSafeInteger(result.index) || result.index < 0) {
throw new CommandExecutionError('Xiaohongshu search could not apply --sort latest (invalid_option_index).');
}
return result.index;
}
export function buildDismissKnownXhsOverlaysJs() {
return `
(() => {
const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
const isVisible = (el) => {
const rect = el.getBoundingClientRect();
if (rect.width <= 0 || rect.height <= 0) return false;
const style = getComputedStyle(el);
return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0';
};
const isBlockingNotice = (text) => /温馨提示|广告屏蔽|插件|申诉|浏览器|正常使用|风险/.test(text);
let clicked = 0;
for (const button of Array.from(document.querySelectorAll('button, [role="button"]'))) {
if (!isVisible(button)) continue;
const text = cleanText(button.innerText || button.textContent || '');
if (text !== '我知道了' && text !== '知道了') continue;
const container = button.closest('[role="dialog"], .d-modal, .reds-modal, .el-dialog, body');
const noticeText = cleanText(container?.innerText || '');
if (!isBlockingNotice(noticeText)) continue;
button.click();
clicked++;
}
return { ok: true, clicked };
})()
`;
}
export function parseLimit(raw) {
const parsed = Number(raw ?? 20);
if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
Expand All @@ -90,6 +132,14 @@ export function parseLimit(raw) {
}
return parsed;
}
export function parseSort(raw) {
const value = String(raw ?? 'general').trim().toLowerCase();
if (value === 'general' || value === '综合')
return 'general';
if (value === 'latest' || value === '最新')
return 'latest';
throw new ArgumentError(`--sort must be one of: general, latest, got ${JSON.stringify(raw)}`);
}
/**
* Build a "scroll until enough or plateaued" IIFE used in place of a fixed
* `autoScroll({ times: N })`. Xiaohongshu's search results page lazy-loads
Expand Down Expand Up @@ -174,6 +224,49 @@ export function buildScrollUntilJs(targetCount, maxScrolls = 15) {
})()
`;
}
export function buildSearchSortOptionIndexJs(sort) {
const label = sort === 'latest' ? '最新' : '综合';
return `
(() => {
const targetLabel = ${JSON.stringify(label)};
const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
const isVisible = (el) => {
const rect = el.getBoundingClientRect();
if (rect.width <= 0 || rect.height <= 0) return false;
const style = getComputedStyle(el);
return style.display !== 'none' && style.visibility !== 'hidden';
};
const visibleTextIs = (el, text) => cleanText(el.innerText || el.textContent || '') === text;
const allTags = Array.from(document.querySelectorAll('.filter-panel .tags'));
if (allTags.length === 0) return { ok: false, reason: 'filter_panel_not_found' };
let index = allTags.findIndex((el) => isVisible(el) && visibleTextIs(el, targetLabel) && !el.classList.contains('active'));
if (index < 0) {
index = allTags.findIndex((el) => isVisible(el) && visibleTextIs(el, targetLabel));
}
if (index < 0) return { ok: false, reason: 'sort_option_not_found', label: targetLabel };
return { ok: true, label: targetLabel, index };
})()
`;
}
async function applySearchSort(page, sort) {
await page.evaluate(buildDismissKnownXhsOverlaysJs());
await page.wait({ time: 0.2 });
let lastResult = null;
for (let attempt = 0; attempt < 3; attempt++) {
await page.click('.search-layout__top .filter span');
for (let poll = 0; poll < 5; poll++) {
await page.wait({ time: 0.2 });
lastResult = unwrapEvaluateResult(await page.evaluate(buildSearchSortOptionIndexJs(sort)));
if (lastResult && typeof lastResult === 'object' && lastResult.ok === true) {
const optionIndex = requireSortOptionIndex(lastResult);
await page.click('.filter-panel .tags', { nth: optionIndex });
await page.wait({ time: 1.5 });
return;
}
}
}
requireSortOptionIndex(lastResult);
}
/**
* Build the search-result extraction IIFE. The web host is baked into the
* `normalizeUrl` fallback so relative `/explore/...` hrefs resolve to a full
Expand All @@ -192,6 +285,7 @@ export function buildSearchExtractJs(webHost) {

const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
const stripXhsAuthorDateSuffix = ${stripXhsAuthorDateSuffix.toString()};
const extractXhsPublishText = ${extractXhsPublishText.toString()};
const isVisibleNote = (el) => {
const rect = el.getBoundingClientRect();
if (rect.width <= 0 || rect.height <= 0) return false;
Expand Down Expand Up @@ -225,9 +319,20 @@ export function buildSearchExtractJs(webHost) {
const nameEl = el.querySelector('a.author .name, .author-name, .nick-name, .name');
const authorWrapEl = el.querySelector('a.author');
let author = cleanText(nameEl?.textContent || '');
let publishedAt = '';
if (!author && authorWrapEl) {
const nameChild = authorWrapEl.querySelector('.name');
author = nameChild ? cleanText(nameChild.textContent || '') : stripXhsAuthorDateSuffix(authorWrapEl.textContent || '');
const authorCandidates = Array.from(authorWrapEl.querySelectorAll('*'))
.map((node) => cleanText(node.textContent || ''))
.filter((text) => text && !extractXhsPublishText(text));
author = nameChild ? cleanText(nameChild.textContent || '') : (authorCandidates[0] || stripXhsAuthorDateSuffix(authorWrapEl.textContent || ''));
}
if (authorWrapEl) {
const publishCandidates = Array.from(authorWrapEl.querySelectorAll('*'))
.map((node) => extractXhsPublishText(node.textContent || ''))
.filter(Boolean)
.sort((a, b) => a.length - b.length);
publishedAt = publishCandidates[0] || extractXhsPublishText(authorWrapEl.textContent || '');
}
const likesEl = el.querySelector('.count, .like-count, .like-wrapper .count');
// Prefer search_result link (preserves xsec_token) over generic /explore/ link
Expand Down Expand Up @@ -258,6 +363,7 @@ export function buildSearchExtractJs(webHost) {
title,
author,
likes: cleanText(likesEl?.textContent || '0'),
published_at: publishedAt,
url,
author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''),
});
Expand All @@ -278,10 +384,12 @@ export const command = cli({
args: [
{ name: 'query', required: true, positional: true, help: 'Search keyword' },
{ name: 'limit', type: 'int', default: 20, help: 'Number of results' },
{ name: 'sort', type: 'string', default: 'general', choices: ['general', 'latest'], help: 'Sort order: general | latest' },
],
columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'],
func: async (page, kwargs) => {
const limit = parseLimit(kwargs.limit);
const sort = parseSort(kwargs.sort);
const keyword = encodeURIComponent(kwargs.query);
await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
// Wait for search results to render (or login wall to appear).
Expand All @@ -291,6 +399,9 @@ export const command = cli({
if (waitResult === 'login_wall') {
throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
}
if (sort === 'latest') {
await applySearchSort(page, sort);
}
// Extract before scrolling. Xiaohongshu uses a virtualized masonry
// layout, so scrolling to the bottom can evict the initially visible
// note cards from the DOM and make extraction return [] even though the
Expand Down Expand Up @@ -321,10 +432,11 @@ export const command = cli({
.map((item, i) => ({
rank: i + 1,
...item,
published_at: noteIdToDate(item.url),
published_at: item.published_at || noteIdToDate(item.url),
}));
},
});
export const __test__ = {
stripXhsAuthorDateSuffix,
extractXhsPublishText,
};
119 changes: 118 additions & 1 deletion clis/xiaohongshu/search.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, expect, it, vi } from 'vitest';
import { getRegistry } from '@jackwener/opencli/registry';
import { JSDOM } from 'jsdom';
import { __test__, buildScrollUntilJs, noteIdToDate, unwrapEvaluateResult } from './search.js';
import { __test__, buildDismissKnownXhsOverlaysJs, buildScrollUntilJs, buildSearchSortOptionIndexJs, noteIdToDate, parseSort, unwrapEvaluateResult } from './search.js';

function markVisible(el) {
el.getBoundingClientRect = () => ({ width: 100, height: 100 });
Expand Down Expand Up @@ -45,6 +45,16 @@ describe('xiaohongshu search', () => {
});
expect(page.goto).not.toHaveBeenCalled();
});
it('rejects invalid sort before browser navigation', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
const page = createPageMock([]);

await expect(cmd.func(page, { query: '特斯拉', limit: 5, sort: 'date' })).rejects.toMatchObject({
code: 'ARGUMENT',
message: expect.stringContaining('--sort'),
});
expect(page.goto).not.toHaveBeenCalled();
});
it('throws a clear error when the search page is blocked by a login wall', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
expect(cmd?.func).toBeTypeOf('function');
Expand Down Expand Up @@ -104,6 +114,55 @@ describe('xiaohongshu search', () => {
},
]);
});
it('applies the latest search filter through the page UI before extracting', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
const detailUrl = 'https://www.xiaohongshu.com/search_result/6a1ded130000000013020400?xsec_token=test-token&xsec_source=';
const rows = [
{
title: '转租Santa Clara Orchard Glen公寓',
author: 'Jack Wang',
likes: '3',
url: detailUrl,
author_url: '',
},
];
const page = createPageMock([
'content',
{ ok: true, clicked: 1 },
{ session: 'site:xiaohongshu', data: { ok: true, label: '最新', index: 2 } },
rows,
]);

const result = await cmd.func(page, { query: '湾区租房', limit: 1, sort: 'latest' });

expect(page.evaluate).toHaveBeenCalledTimes(4);
expect(String(page.evaluate.mock.calls[1][0])).toContain('广告屏蔽');
expect(String(page.evaluate.mock.calls[2][0])).toContain('targetLabel = "最新"');
expect(page.click).toHaveBeenNthCalledWith(1, '.search-layout__top .filter span');
expect(page.click).toHaveBeenNthCalledWith(2, '.filter-panel .tags', { nth: 2 });
expect(page.wait).toHaveBeenNthCalledWith(1, { time: 0.2 });
expect(page.wait).toHaveBeenNthCalledWith(2, { time: 0.2 });
expect(page.wait).toHaveBeenNthCalledWith(3, { time: 1.5 });
expect(result[0]).toMatchObject({
rank: 1,
title: '转租Santa Clara Orchard Glen公寓',
published_at: '2026-06-02',
});
});
it('fails typed when the latest search filter cannot be applied', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
const page = createPageMock([
'content',
{ ok: true, clicked: 0 },
...Array.from({ length: 15 }, () => ({ ok: false, reason: 'sort_option_not_found' })),
]);

await expect(cmd.func(page, { query: '湾区租房', limit: 1, sort: 'latest' })).rejects.toMatchObject({
code: 'COMMAND_EXEC',
message: expect.stringContaining('sort_option_not_found'),
});
expect(page.evaluate).toHaveBeenCalledTimes(17);
});
it('fails typed instead of silently returning [] for malformed extraction payloads', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
const page = createPageMock([
Expand Down Expand Up @@ -216,10 +275,35 @@ describe('xiaohongshu search', () => {
expect(result[0]).toMatchObject({
title: '数字作者测试',
author: '数字3天前端',
published_at: '3天前',
likes: '8',
author_url: 'https://www.xiaohongshu.com/user/profile/author123',
});
});
it('does not merge an author-name trailing digit into the publish-time text', async () => {
const cmd = getRegistry().get('xiaohongshu/search');
const dom = new JSDOM(`
<section class="note-item">
<a class="cover mask" href="/search_result/6a1f47270000000006036186?xsec_token=test-token"></a>
<div class="title">湾区找室友|7月Palo Alto附近上班女生</div>
<a class="author" href="/user/profile/author456">
<div><div>Wonyii_7</div><div>1小时前</div></div>
</a>
<span class="count">4</span>
</section>
`, { url: 'https://www.xiaohongshu.com/search_result?keyword=test' });
markVisible(dom.window.document.querySelector('section.note-item'));
const page = createPageMock([]);
page.evaluate.mockImplementationOnce(async () => 'content');
page.evaluate.mockImplementationOnce(async (script) => Function('document', 'getComputedStyle', `return (${script})`)(dom.window.document, dom.window.getComputedStyle.bind(dom.window)));

const result = await cmd.func(page, { query: '测试', limit: 1 });

expect(result[0]).toMatchObject({
author: 'Wonyii_7',
published_at: '1小时前',
});
});
});
describe('buildScrollUntilJs', () => {
it('inlines the target count and default maxScrolls into the generated IIFE', () => {
Expand Down Expand Up @@ -258,6 +342,32 @@ describe('buildScrollUntilJs', () => {
expect(() => buildScrollUntilJs(10, 0)).toThrow(/maxScrolls/);
});
});
describe('parseSort', () => {
it('normalizes supported English and Chinese sort labels', () => {
expect(parseSort(undefined)).toBe('general');
expect(parseSort('general')).toBe('general');
expect(parseSort('综合')).toBe('general');
expect(parseSort('latest')).toBe('latest');
expect(parseSort('最新')).toBe('latest');
});
it('rejects unknown sort labels', () => {
expect(() => parseSort('date')).toThrow(/--sort/);
});
});
describe('buildSearchSortOptionIndexJs', () => {
it('targets the Xiaohongshu latest label for latest sort', () => {
expect(buildSearchSortOptionIndexJs('latest')).toContain('targetLabel = "最新"');
});
it('targets the Xiaohongshu general label for default sort', () => {
expect(buildSearchSortOptionIndexJs('general')).toContain('targetLabel = "综合"');
});
});
describe('buildDismissKnownXhsOverlaysJs', () => {
it('targets common Xiaohongshu blocking notice text', () => {
expect(buildDismissKnownXhsOverlaysJs()).toContain('广告屏蔽');
expect(buildDismissKnownXhsOverlaysJs()).toContain('我知道了');
});
});
describe('stripXhsAuthorDateSuffix', () => {
it('only strips trailing date suffixes and preserves date-like author text', () => {
expect(__test__.stripXhsAuthorDateSuffix('作者名 3天前')).toBe('作者名');
Expand All @@ -267,6 +377,13 @@ describe('stripXhsAuthorDateSuffix', () => {
expect(__test__.stripXhsAuthorDateSuffix('刚刚')).toBe('刚刚');
});
});
describe('extractXhsPublishText', () => {
it('extracts visible Xiaohongshu publish-time labels only at the end', () => {
expect(__test__.extractXhsPublishText('作者名 30分钟前')).toBe('30分钟前');
expect(__test__.extractXhsPublishText('作者名 昨天 10:16')).toBe('昨天 10:16');
expect(__test__.extractXhsPublishText('数字3天前端')).toBe('');
});
});
describe('noteIdToDate (ObjectID timestamp parsing)', () => {
it('parses a known note ID to the correct China-timezone date', () => {
// 0x697f6c74 = 1769958516 → 2026-02-01 in UTC+8
Expand Down