From e9453f849d86114ac6128eeca858167a4f53a7df Mon Sep 17 00:00:00 2001 From: FlanChanOwO Date: Wed, 8 Apr 2026 17:36:53 +0800 Subject: [PATCH 01/13] feat(route/baidu): add support for BAIDU_COOKIE in various baidu tieba routes and implement user post retrieval --- lib/config.ts | 9 +- lib/routes/baidu/tieba/forum.tsx | 177 ++++++++++++++++++-------- lib/routes/baidu/tieba/post.tsx | 202 ++++++++++++++++++++++-------- lib/routes/baidu/tieba/search.tsx | 150 +++++++++++++++++----- lib/routes/baidu/tieba/user.ts | 54 -------- lib/routes/baidu/tieba/user.tsx | 152 ++++++++++++++++++++++ lib/routes/baidu/tieba/utils.ts | 128 +++++++++++++++++++ 7 files changed, 675 insertions(+), 197 deletions(-) delete mode 100644 lib/routes/baidu/tieba/user.ts create mode 100644 lib/routes/baidu/tieba/user.tsx create mode 100644 lib/routes/baidu/tieba/utils.ts diff --git a/lib/config.ts b/lib/config.ts index 9ba567508c9e..2892b843be5d 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -85,6 +85,7 @@ type ConfigEnvKeys = | 'BITBUCKET_USERNAME' | 'BITBUCKET_PASSWORD' | 'BTBYR_HOST' + | 'BAIDU_COOKIE' | 'BTBYR_COOKIE' | 'BUPT_PORTAL_COOKIE' | 'CAIXIN_COOKIE' @@ -348,6 +349,9 @@ export type Config = { }; // Route-specific Configurations + baidu: { + cookie?: string; + }; bilibili: { cookies: Record; dmImgList?: string; @@ -763,7 +767,7 @@ const calculateValue = () => { type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // 缓存类型,支持 'memory' 和 'redis',设为空可以禁止缓存 requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60), routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒 - contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒 + contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒 }, memory: { max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger. @@ -843,6 +847,9 @@ const calculateValue = () => { }, // Route-specific Configurations + baidu: { + cookie: envs.BAIDU_COOKIE, + }, bilibili: { cookies: bilibili_cookies, dmImgList: envs.BILIBILI_DM_IMG_LIST, diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index 8ba4ab80957b..64e277554f0a 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -1,85 +1,154 @@ import { load } from 'cheerio'; -import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; +import { config } from '@/config'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; +import cache from '@/utils/cache'; import timezone from '@/utils/timezone'; +import { parseRelativeTime, parseThreads } from './utils'; + export const route: Route = { path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'], categories: ['bbs'], example: '/baidu/tieba/forum/good/女图', parameters: { kw: '吧名', cid: '精品分类,默认为 `0`(全部分类),如果不传 `cid` 则获取全部分类', sortBy: '排序方式:`created`, `replied`。默认为 `created`' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, }, name: '精品帖子', - maintainers: ['u3u'], + maintainers: ['u3u', 'FlanChanXwO'], handler, }; async function handler(ctx) { // sortBy: created, replied const { kw, cid = '0', sortBy = 'created' } = ctx.req.param(); + const cookie = config.baidu.cookie; - // PC端:https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)} - // 移动端接口:https://tieba.baidu.com/mo/q/m?kw=${encodeURIComponent(kw)}&lp=5024&forum_recommend=1&lm=0&cid=0&has_url_param=1&pn=0&is_ajax=1 - const params = { kw: encodeURIComponent(kw) }; - ctx.req.path.includes('good') && (params.tab = 'good'); - cid && (params.cid = cid); - const { data } = await got(`https://tieba.baidu.com/f`, { - headers: { - Referer: 'https://tieba.baidu.com/', - }, - searchParams: params, - }); + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + // 检查Cookie是否包含必要的BDUSS + if (!cookie.includes('BDUSS')) { + throw new Error('BAIDU_COOKIE must contain BDUSS. Please check your cookie format.'); + } + + // 固定抓取3页,约30条帖子 + const maxPages = 3; + let allThreads: any[] = []; + + // 先获取第一页 + const { getPuppeteerPage } = await import('@/utils/puppeteer'); + const sortParam = sortBy === 'replied' ? '&sc=67108864' : ''; + + // 并发获取所有页面 + const pagePromises = []; + for (let pageNum = 0; pageNum < maxPages; pageNum++) { + const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=${pageNum * 50}${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${pageNum === 0 ? '' : '&ie=utf-8'}${sortParam}`; + + const promise = cache.tryGet( + `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, + async () => { + const { page, destroy } = await getPuppeteerPage(pageUrl, { + noGoto: true, + }); + + try { + await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); + + const cookies = cookie.split(';').map((c) => { + const [name, value] = c.trim().split('='); + return { + name: name.trim(), + value: value || '', + domain: '.tieba.baidu.com', + }; + }); + await page.setCookie(...cookies); - const threadListHTML = load(data)('code[id="pagelet_html_frs-list/pagelet/thread_list"]') - .contents() - .filter((e) => e.nodeType === '8'); - - const $ = load(threadListHTML.prevObject[0].data); - const list = $('#thread_list > .j_thread_list[data-field]') - .toArray() - .map((element) => { - const item = $(element); - const { id, author_name } = item.data('field'); - const time = sortBy === 'created' ? item.find('.is_show_create_time').text().trim() : item.find('.threadlist_reply_date').text().trim(); - const title = item.find('a.j_th_tit').text().trim(); - const details = item.find('.threadlist_abs').text().trim(); - const medias = item - .find('.threadlist_media img') - .toArray() - .map((element) => { - const item = $(element); - return ``; - }) - .join(''); - - return { - title, - description: renderToString( - <> -

{details}

-

{raw(medias)}

-

作者:{author_name}

- - ), - pubDate: timezone(parseDate(time, ['HH:mm', 'M-D', 'YYYY-MM'], true), +8), - link: `https://tieba.baidu.com/p/${id}`, - }; - }); + await page.goto(pageUrl, { waitUntil: 'networkidle2', timeout: 60000 }); + + // 动态等待帖子卡片加载,最多3秒 + try { + await page.waitForSelector('.thread-card-wrapper', { timeout: 3000 }); + } catch { + // 如果3秒内没加载出来,继续执行 + } + + const html = await page.content(); + return html; + } finally { + await destroy(); + } + }, + config.cache.routeExpire, + false + ); + pagePromises.push(promise); + } + + // 等待所有页面获取完成 + const pageResults = await Promise.all(pagePromises); + + // 解析所有页面数据并去重 + const threadMap = new Map(); + for (const pageData of pageResults) { + if (pageData && typeof pageData === 'string') { + const $ = load(pageData); + const threads = parseThreads($); + for (const thread of threads) { + // 使用帖子ID去重,只保留第一次出现的 + if (!threadMap.has(thread.id)) { + threadMap.set(thread.id, thread); + } + } + } + } + + allThreads = [...threadMap.values()]; + + if (allThreads.length === 0) { + throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.'); + } + + const list = allThreads.map((thread) => { + const parsedDate = parseRelativeTime(thread.time); + return { + title: thread.title, + link: thread.link || `https://tieba.baidu.com/p/${thread.id}`, + pubDate: parsedDate ? timezone(parsedDate, +8) : undefined, + author: thread.author, + description: renderToString( + <> + {thread.content ?

{thread.content}

: null} + {thread.images && thread.images.length > 0 ? ( +
+ {thread.images.map((img) => ( + + ))} +
+ ) : null} + + ), + }; + }); return { title: `${kw}吧`, - description: load(data)('meta[name="description"]').attr('content'), link: `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}`, item: list, }; diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index cfc0a02ddb85..db6dd33c2822 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -1,12 +1,14 @@ import { load } from 'cheerio'; -import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; +import { config } from '@/config'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; +import cache from '@/utils/cache'; import timezone from '@/utils/timezone'; +import { parseRelativeTime } from './utils'; + /** * 获取最新的帖子回复(倒序查看) * @@ -16,18 +18,62 @@ import timezone from '@/utils/timezone'; * 这个默认值我测试下来 7e6 是比较接近最大值了,因为当我输入 8e6 就会返回第一页的数据而不是最后一页了 * @returns */ -async function getPost(id, lz = 0, pn = 7e6) { - const { data } = await got(`https://tieba.baidu.com/p/${id}?see_lz=${lz}&pn=${pn}&ajax=1`, { - headers: { - Referer: 'https://tieba.baidu.com/', +async function getPost(id: string, lz = 0, pn = 7e6) { + const cookie = config.baidu.cookie; + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + const { getPuppeteerPage } = await import('@/utils/puppeteer'); + const url = `https://tieba.baidu.com/p/${id}?see_lz=${lz}&pn=${pn}`; + + const data = await cache.tryGet( + `tieba:post:${id}:${lz}:${pn}`, + async () => { + const { page, destroy } = await getPuppeteerPage(url, { + noGoto: true, + }); + + try { + // 先访问以设置域名 + await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); + + // 设置 Cookie + const cookies = cookie.split(';').map((c) => { + const [name, value] = c.trim().split('='); + return { + name: name.trim(), + value: value || '', + domain: '.tieba.baidu.com', + }; + }); + await page.setCookie(...cookies); + + // 访问目标页面 + await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); + + // 动态等待回复内容加载,最多3秒 + try { + await page.waitForSelector('.virtual-list-item', { timeout: 3000 }); + } catch { + // 如果3秒内没加载出来,继续执行 + } + + return await page.content(); + } finally { + await destroy(); + } }, - }); - const $ = load(data); - const max = Number.parseInt($('[max-page]').attr('max-page')); + config.cache.routeExpire, + false + ); + + const $ = load(data as string); + const max = Number.parseInt($('[max-page]').attr('max-page') || '0'); if (max > pn) { - return getPost(id, max); + return getPost(id, lz, max); } - return data; + return data as string; } export const route: Route = { @@ -36,9 +82,15 @@ export const route: Route = { example: '/baidu/tieba/post/686961453', parameters: { id: '帖子 ID' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, @@ -49,7 +101,7 @@ export const route: Route = { }, ], name: '帖子动态', - maintainers: ['u3u'], + maintainers: ['u3u', 'FlanChanXwO'], handler, }; @@ -58,49 +110,89 @@ async function handler(ctx) { const lz = ctx.req.path.includes('lz') ? 1 : 0; const html = await getPost(id, lz); const $ = load(html); - const title = $('.core_title_txt').attr('title'); - // .substr(3); - const list = $('.p_postlist > [data-field]:not(:has(.ad_bottom_view))'); + + // 检查是否遇到安全验证 + if ($('title').text().includes('安全验证') || html.includes('百度安全验证')) { + throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); + } + + const title = $('.pb-title-wrap .pb-title').text().trim() || ''; + + // 使用新的 Vue 渲染页面选择器 - 只选择 virtual-list-item 避免重复 + const list = $('.virtual-list-item'); + + if (list.length === 0) { + throw new Error('No post replies found. The post may not exist or the cookie is invalid.'); + } return { title: lz ? `【只看楼主】${title}` : title, link: `https://tieba.baidu.com/p/${id}?see_lz=${lz}`, description: `${title}的最新回复`, - item: list.toArray().map((element) => { - const item = $(element); - const { author, content } = item.data('field'); - const tempList = item - .find('.post-tail-wrap > .tail-info') - .toArray() - .map((element) => $(element).text()); - let [pubContent, from, num, time] = ['', '', '', '']; - if (0 === tempList.length && 'date' in content) { - num = `${content.post_no}楼`; - time = content.date; - pubContent = item.find('.j_d_post_content').html(); - } else if (2 === tempList.length) { - [num, time] = tempList; - pubContent = content.content; - } else if (3 === tempList.length) { - [from, num, time] = tempList; - pubContent = content.content; - } - return { - title: `${author.user_name}回复了帖子《${title}》`, - description: renderToString( - <> -

{raw(pubContent)}

-
- 作者:{author.user_name} -
- 楼层:{num} -
- {from} - - ), - pubDate: timezone(parseDate(time, 'YYYY-MM-DD hh:mm'), +8), - link: `https://tieba.baidu.com/p/${id}?pid=${content.post_id}#${content.post_id}`, - }; - }), + item: list + .toArray() + .map((element) => { + const item = $(element); + + // 作者名 + const authorName = item.find('.head-name').text().trim(); + + // 跳过无效用户(无作者名的条目) + if (!authorName) { + return null; + } + + // 内容 - 从 pb-rich-text 获取 + const contentItems = item.find('.pb-rich-text .pb-content-item'); + let postContent = ''; + contentItems.each((_, el) => { + const text = $(el).text().trim(); + if (text) { + postContent += `

${text}

`; + } + }); + + // 图片 + const images = item + .find('.image-list-wrapper img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('data-src') || '') + .filter(Boolean) + .map((src) => `${title}`) + .join(''); + + // 楼层和时间 + const descText = item.find('.pc-pb-comments-desc, .comment-desc-left').text().trim(); + const floorMatch = descText.match(/第(\d+)楼/); + const floor = floorMatch ? `${floorMatch[1]}楼` : ''; + + // 时间 - 可能是 "2分钟前" 这样的相对时间 + const timeMatch = descText.match(/(\d+分钟前|\d+小时前|今天\s*\d{2}:\d{2}|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2})/); + const timeText = timeMatch ? timeMatch[1] : ''; + + // 解析时间并验证有效性 + const parsedDate = timeText ? parseRelativeTime(timeText) : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + return { + title: `${authorName} 回复了帖子《${title}》`, + description: renderToString( + <> +
+
+

+ 楼层:{floor} +
+ 时间:{timeText} +

+ + ), + + pubDate: validPubDate, + author: authorName, + link: `https://tieba.baidu.com/p/${id}`, + }; + }) + .filter((item): item is NonNullable => item !== null), }; } diff --git a/lib/routes/baidu/tieba/search.tsx b/lib/routes/baidu/tieba/search.tsx index 3af8d03288c7..c8c2577fda1e 100644 --- a/lib/routes/baidu/tieba/search.tsx +++ b/lib/routes/baidu/tieba/search.tsx @@ -1,10 +1,11 @@ import { load } from 'cheerio'; import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; -import iconv from 'iconv-lite'; +import { config } from '@/config'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; import type { Route } from '@/types'; -import got from '@/utils/got'; +import cache from '@/utils/cache'; import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; @@ -14,15 +15,21 @@ export const route: Route = { example: '/baidu/tieba/search/neuro', parameters: { qw: '搜索关键词', routeParams: '额外参数;请参阅以下说明和表格' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, }, name: '贴吧搜索', - maintainers: ['JimenezLi'], + maintainers: ['JimenezLi', 'FlanChanXwO'], handler, description: `| 键 | 含义 | 接受的值 | 默认值 | | ------------ | ---------------------------------------------------------- | ------------- | ------ | @@ -36,43 +43,124 @@ export const route: Route = { async function handler(ctx) { const qw = ctx.req.param('qw'); + const cookie = config.baidu.cookie; + + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + const query = new URLSearchParams(ctx.req.param('routeParams')); query.set('ie', 'utf-8'); query.set('qw', qw); - query.set('rn', query.get('rn') || '20'); // Number of returned items + query.set('rn', query.get('rn') || '20'); const link = `https://tieba.baidu.com/f/search/res?${query.toString()}`; - const response = await got.get(link, { - headers: { - Referer: 'https://tieba.baidu.com', + const { getPuppeteerPage } = await import('@/utils/puppeteer'); + + const data = await cache.tryGet( + `tieba:search:${qw}:${query.toString()}`, + async () => { + const { page, destroy } = await getPuppeteerPage(link, { + noGoto: true, + }); + + try { + // 先访问以设置域名 + await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); + + // 设置 Cookie + const cookies = cookie.split(';').map((c) => { + const [name, value] = c.trim().split('='); + return { + name: name.trim(), + value: value || '', + domain: '.tieba.baidu.com', + }; + }); + await page.setCookie(...cookies); + + // 访问目标页面 + await page.goto(link, { waitUntil: 'networkidle2', timeout: 60000 }); + + // 动态等待搜索结果加载,最多3秒 + try { + await page.waitForSelector('.thread-content-box', { timeout: 3000 }); + } catch { + // 如果3秒内没加载出来,尝试滚动触发 + } + + // 滚动触发内容加载 + await page.evaluate(() => { + window.scrollTo(0, 500); + }); + + // 再次等待内容加载,最多2秒 + try { + await page.waitForFunction(() => document.querySelectorAll('.thread-content-box').length > 0, { timeout: 2000 }); + } catch { + // 继续执行 + } + + return await page.content(); + } finally { + await destroy(); + } }, - responseType: 'buffer', - }); - const data = iconv.decode(response.data, 'gbk'); + config.cache.routeExpire, + false + ); - const $ = load(data); - const resultList = $('div.s_post'); + const $ = load(data as string); + + // 检查是否遇到安全验证 + if ($('title').text().includes('安全验证') || (data as string).includes('百度安全验证')) { + throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); + } + + const resultList = $('.thread-content-box'); + + if (resultList.length === 0) { + throw new Error('No search results found. The page structure may have changed.'); + } return { title: `${qw} - ${query.get('kw') || '百度贴'}吧搜索`, link, item: resultList.toArray().map((element) => { const item = $(element); - const titleItem = item.find('.p_title a'); - const title = titleItem.text().trim(); - const link = titleItem.attr('href'); - const time = item.find('.p_date').text().trim(); - const details = item.find('.p_content').text().trim(); + + // 标题 + const title = item.find('.title-content-wrap .title-wrap span').text().trim(); + + // 内容摘要 + const details = item.find('.abstract-wrap span').text().trim(); + + // 从链接中提取帖子URL + const linkHref = item.find('.action-bar-warp a.action-link-bg').attr('href') || ''; + + // 作者 + const author = item.find('.forum-attention.user').text().trim(); + + // 时间 - 从 top-title 中提取 "发布于 YYYY-M-D" + const timeText = item.find('.top-title').text().trim(); + const timeMatch = timeText.match(/发布于\s+(\d{4}-\d{1,2}-\d{1,2})/); + const time = timeMatch ? timeMatch[1] : ''; + const parsedDate = time ? parseDate(time, 'YYYY-M-D') : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 图片 const medias = item - .find('.p_mediaCont img') + .find('.thread-media-new img') .toArray() - .map((element) => { - const item = $(element); - return ``; + .map((el) => { + const img = $(el); + const src = img.attr('src') || img.attr('data-src') || ''; + return `${title}`; }) .join(''); - const tieba = item.find('a.p_forum').text().trim(); - const author = item.find('a').last().text().trim(); + + // 贴吧名 + const tieba = item.find('.forum-name-text').text().trim(); return { title, @@ -80,16 +168,12 @@ async function handler(ctx) { <>

{details}

{raw(medias)}

-

- 贴吧:{tieba} -
- 作者:{author} -

+

贴吧:{tieba}

), author, - pubDate: timezone(parseDate(time, 'YYYY-MM-DD HH:mm'), +8), - link, + pubDate: validPubDate, + link: linkHref, }; }), }; diff --git a/lib/routes/baidu/tieba/user.ts b/lib/routes/baidu/tieba/user.ts deleted file mode 100644 index a5a9288e45b8..000000000000 --- a/lib/routes/baidu/tieba/user.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { load } from 'cheerio'; - -import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; -import timezone from '@/utils/timezone'; - -export const route: Route = { - path: '/tieba/user/:uid', - categories: ['bbs'], - example: '/baidu/tieba/user/斗鱼游戏君', - parameters: { uid: '用户 ID' }, - features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, - supportBT: false, - supportPodcast: false, - supportScihub: false, - }, - name: '用户帖子', - maintainers: ['igxlin', 'nczitzk'], - handler, - description: `用户 ID 可以通过打开用户的主页后查看地址栏的 \`un\` 字段来获取。`, -}; - -async function handler(ctx) { - const uid = ctx.req.param('uid'); - const response = await got(`https://tieba.baidu.com/home/main?un=${uid}`); - - const data = response.data; - - const $ = load(data); - const name = $('span.userinfo_username').text(); - const list = $('div.n_right.clearfix'); - let imgurl; - - return { - title: `${name} 的贴吧`, - link: `https://tieba.baidu.com/home/main?un=${uid}`, - item: - list && - list.toArray().map((item) => { - item = $(item).find('.n_contain'); - imgurl = item.find('ul.n_media.clearfix img').attr('original'); - return { - title: item.find('div.thread_name a').attr('title'), - pubDate: timezone(parseDate(item.parent().find('div .n_post_time').text(), ['YYYY-MM-DD', 'HH:mm']), +8), - description: `${item.find('div.n_txt').text()}
`, - link: item.find('div.thread_name a').attr('href'), - }; - }), - }; -} diff --git a/lib/routes/baidu/tieba/user.tsx b/lib/routes/baidu/tieba/user.tsx new file mode 100644 index 000000000000..be0511577406 --- /dev/null +++ b/lib/routes/baidu/tieba/user.tsx @@ -0,0 +1,152 @@ +import { load } from 'cheerio'; +import { renderToString } from 'hono/jsx/dom/server'; + +import { config } from '@/config'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; +import type { Route } from '@/types'; +import cache from '@/utils/cache'; +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; + +export const route: Route = { + path: '/tieba/user/:uid', + categories: ['bbs'], + example: '/baidu/tieba/user/斗鱼游戏君', + parameters: { uid: '用户 ID' }, + features: { + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + name: '用户帖子', + maintainers: ['igxlin', 'nczitzk', 'FlanChanXwO'], + handler, + description: `用户 ID 可以通过打开用户的主页后查看地址栏的 \`un\` 字段来获取。`, +}; + +async function handler(ctx) { + const uid = ctx.req.param('uid'); + const cookie = config.baidu.cookie; + + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + const { getPuppeteerPage } = await import('@/utils/puppeteer'); + const url = `https://tieba.baidu.com/home/main?un=${uid}`; + + const data = await cache.tryGet( + `tieba:user:${uid}`, + async () => { + const { page, destroy } = await getPuppeteerPage(url, { + noGoto: true, + }); + + try { + // 先访问以设置域名 + await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); + + // 设置 Cookie + const cookies = cookie.split(';').map((c) => { + const [name, value] = c.trim().split('='); + return { + name: name.trim(), + value: value || '', + domain: '.tieba.baidu.com', + }; + }); + await page.setCookie(...cookies); + + // 访问目标页面 + await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); + + // 动态等待帖子卡片加载,最多3秒 + try { + await page.waitForSelector('.thread-card', { timeout: 3000 }); + } catch { + // 如果3秒内没加载出来,继续执行 + } + + return await page.content(); + } finally { + await destroy(); + } + }, + config.cache.routeExpire, + false + ); + + const $ = load(data as string); + + // 检查是否遇到安全验证 + if ($('title').text().includes('安全验证') || (data as string).includes('百度安全验证')) { + throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); + } + + const name = $('span.userinfo_username').text() || uid; + const list = $('.thread-card'); + + if (list.length === 0) { + throw new Error('No user posts found. The page structure may have changed or the user does not exist.'); + } + + return { + title: `${name} 的贴吧`, + link: `https://tieba.baidu.com/home/main?un=${uid}`, + item: list.toArray().map((element) => { + const item = $(element); + + // 作者 + const authorName = item.find('.head-name').text().trim() || name; + + // 标题 + const title = item.find('.title-text').text().trim(); + + // 内容 + const content = item.find('.tb-richtext .text').text().trim(); + + // 图片 + const images = item + .find('.image-list-item img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('data-src') || '') + .filter(Boolean); + + // 时间 + const timeText = item.find('.post-num').text().trim(); + const parsedDate = timeText ? parseDate(timeText, ['YYYY-MM-DD']) : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 链接 + const link = item.find('a.thread-card-content').attr('href') || ''; + + return { + title, + pubDate: validPubDate, + author: authorName, + description: renderToString( + <> + {content ?

{content}

: null} + {images.length > 0 ? ( +
+ {images.map((img) => ( + + ))} +
+ ) : null} + + ), + link, + }; + }), + }; +} diff --git a/lib/routes/baidu/tieba/utils.ts b/lib/routes/baidu/tieba/utils.ts new file mode 100644 index 000000000000..88ed77a6e7f4 --- /dev/null +++ b/lib/routes/baidu/tieba/utils.ts @@ -0,0 +1,128 @@ +import type { load } from 'cheerio'; + +import { parseDate } from '@/utils/parse-date'; + +/** + * 解析相对时间(如"回复于4小时前")为实际日期 + */ +export function parseRelativeTime(timeStr: string): Date { + const now = new Date(); + + // 如果时间为空,返回当前时间 + if (!timeStr || timeStr.trim() === '') { + return now; + } + + // 移除"回复于"前缀 + const cleanStr = timeStr.replace(/^回复于/, '').trim(); + + // 匹配 "刚刚" + if (cleanStr === '刚刚' || cleanStr.includes('刚刚')) { + return now; + } + + // 匹配 "X小时前" + const hourMatch = cleanStr.match(/(\d+)\s*小时前/); + if (hourMatch) { + const hours = Number.parseInt(hourMatch[1], 10); + return new Date(now.getTime() - hours * 60 * 60 * 1000); + } + + // 匹配 "X分钟前" + const minMatch = cleanStr.match(/(\d+)\s*分钟前/); + if (minMatch) { + const mins = Number.parseInt(minMatch[1], 10); + return new Date(now.getTime() - mins * 60 * 1000); + } + + // 匹配 "X天前" + const dayMatch = cleanStr.match(/(\d+)\s*天前/); + if (dayMatch) { + const days = Number.parseInt(dayMatch[1], 10); + return new Date(now.getTime() - days * 24 * 60 * 60 * 1000); + } + + // 匹配 "昨天 HH:mm" + const yesterdayMatch = cleanStr.match(/昨天\s*(\d{1,2}):(\d{2})/); + if (yesterdayMatch) { + const date = new Date(now); + date.setDate(date.getDate() - 1); + date.setHours(Number.parseInt(yesterdayMatch[1], 10), Number.parseInt(yesterdayMatch[2], 10), 0, 0); + return date; + } + + // 匹配 "今天 HH:mm" + const todayMatch = cleanStr.match(/今天\s*(\d{1,2}):(\d{2})/); + if (todayMatch) { + const date = new Date(now); + date.setHours(Number.parseInt(todayMatch[1], 10), Number.parseInt(todayMatch[2], 10), 0, 0); + return date; + } + + // 尝试标准日期格式 + try { + // @ts-ignore + const parsed = parseDate(cleanStr, ['M-D', 'YYYY-MM-DD', 'HH:mm', 'YYYY-MM-DD HH:mm', 'YYYY-M-D HH:mm'], true); + // 检查是否是有效日期 + if (parsed && !Number.isNaN(parsed.getTime())) { + return parsed; + } + } catch { + // 解析失败,返回当前时间 + } + + // 默认返回当前时间 + return now; +} + +/** + * 帖子数据接口 + */ +export interface Thread { + id: string; + title: string; + content: string; + author: string; + time: string; + images: string[]; + link: string; +} + +/** + * 解析帖子列表 + */ +export function parseThreads($: ReturnType): Thread[] { + return $('.thread-card-wrapper') + .toArray() + .map((element) => { + const item = $(element); + + const linkHref = item.find('a.thread-content-link').first().attr('href') || ''; + const idMatch = linkHref.match(/\/p\/(\d+)/); + const id = idMatch ? idMatch[1] : ''; + + const title = item.find('.thread-title .text').text().trim(); + const content = item.find('.thread-content .text').text().trim(); + const author = item.find('.head-name').text().trim(); + + const descInfo = item.find('.desc-info').first(); + const timeText = descInfo.length > 0 ? descInfo.text().trim() : item.find('[class*="time"], [class*="date"]').first().text().trim(); + + const images = item + .find('.image-list-item img') + .toArray() + .map((img) => $(img).attr('data-src')) + .filter((src): src is string => !!src); + + return { + id, + title, + content, + author, + time: timeText, + images, + link: linkHref, + }; + }) + .filter((t) => t.id && t.title); +} From 7b880dd0185fda9035d75e6d8b0573cc33707603 Mon Sep 17 00:00:00 2001 From: FlanChanOwO Date: Wed, 8 Apr 2026 17:45:27 +0800 Subject: [PATCH 02/13] refactor(forum): improve code readability and structure in forum.tsx --- lib/routes/baidu/tieba/forum.tsx | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index 64e277554f0a..edbceb77eba4 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -49,7 +49,6 @@ async function handler(ctx) { // 固定抓取3页,约30条帖子 const maxPages = 3; - let allThreads: any[] = []; // 先获取第一页 const { getPuppeteerPage } = await import('@/utils/puppeteer'); @@ -89,8 +88,7 @@ async function handler(ctx) { // 如果3秒内没加载出来,继续执行 } - const html = await page.content(); - return html; + return await page.content(); } finally { await destroy(); } @@ -107,8 +105,9 @@ async function handler(ctx) { // 解析所有页面数据并去重 const threadMap = new Map(); for (const pageData of pageResults) { - if (pageData && typeof pageData === 'string') { - const $ = load(pageData); + const html = pageData as string; + if (html && html.length > 0) { + const $ = load(html); const threads = parseThreads($); for (const thread of threads) { // 使用帖子ID去重,只保留第一次出现的 @@ -119,7 +118,7 @@ async function handler(ctx) { } } - allThreads = [...threadMap.values()]; + const allThreads = [...threadMap.values()]; if (allThreads.length === 0) { throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.'); From e0836618c55ad30fe81dbf5232e9233d7fe38963 Mon Sep 17 00:00:00 2001 From: FlanChanOwO Date: Wed, 8 Apr 2026 17:59:26 +0800 Subject: [PATCH 03/13] feat(post): enhance time parsing and reply link generation in post.tsx --- lib/routes/baidu/tieba/post.tsx | 18 +++++++++++------- lib/routes/baidu/tieba/utils.ts | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index db6dd33c2822..1efc457b3efe 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -166,14 +166,18 @@ async function handler(ctx) { const floorMatch = descText.match(/第(\d+)楼/); const floor = floorMatch ? `${floorMatch[1]}楼` : ''; - // 时间 - 可能是 "2分钟前" 这样的相对时间 - const timeMatch = descText.match(/(\d+分钟前|\d+小时前|今天\s*\d{2}:\d{2}|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2})/); - const timeText = timeMatch ? timeMatch[1] : ''; - - // 解析时间并验证有效性 - const parsedDate = timeText ? parseRelativeTime(timeText) : null; + // 解析时间并验证有效性 - 使用完整的 descText 以支持 parseRelativeTime 能处理的所有格式 + const parsedDate = descText ? parseRelativeTime(descText) : null; const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + // 提取时间文本用于显示 + const timeMatch = descText.match(/(\d+分钟前|\d+小时前|今天\s*\d{2}:\d{2}|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}|昨天\s*\d{2}:\d{2}|刚刚)/); + const timeText = timeMatch ? timeMatch[1] : descText; + + // 尝试获取回复的唯一ID用于生成直接链接 + const postId = item.attr('data-post-id') || item.attr('id') || ''; + const replyLink = postId ? `https://tieba.baidu.com/p/${id}?pid=${postId}#${postId}` : `https://tieba.baidu.com/p/${id}`; + return { title: `${authorName} 回复了帖子《${title}》`, description: renderToString( @@ -190,7 +194,7 @@ async function handler(ctx) { pubDate: validPubDate, author: authorName, - link: `https://tieba.baidu.com/p/${id}`, + link: replyLink, }; }) .filter((item): item is NonNullable => item !== null), diff --git a/lib/routes/baidu/tieba/utils.ts b/lib/routes/baidu/tieba/utils.ts index 88ed77a6e7f4..45400df3796c 100644 --- a/lib/routes/baidu/tieba/utils.ts +++ b/lib/routes/baidu/tieba/utils.ts @@ -1,4 +1,4 @@ -import type { load } from 'cheerio'; +import type { CheerioAPI } from 'cheerio'; import { parseDate } from '@/utils/parse-date'; @@ -91,7 +91,7 @@ export interface Thread { /** * 解析帖子列表 */ -export function parseThreads($: ReturnType): Thread[] { +export function parseThreads($: CheerioAPI): Thread[] { return $('.thread-card-wrapper') .toArray() .map((element) => { From e2d9a67b4a0e669332d7d432374c6d85fa2d9d0b Mon Sep 17 00:00:00 2001 From: FlanChanOwO Date: Wed, 8 Apr 2026 18:19:29 +0800 Subject: [PATCH 04/13] feat(route/baidu): refactor page content retrieval and enhance cookie handling in forum, post, search, and user routes --- lib/routes/baidu/tieba/common.ts | 101 ++++++++++++++++++++++++++++++ lib/routes/baidu/tieba/forum.tsx | 62 ++---------------- lib/routes/baidu/tieba/post.tsx | 64 +++---------------- lib/routes/baidu/tieba/search.tsx | 82 ++++-------------------- lib/routes/baidu/tieba/user.tsx | 67 +++----------------- 5 files changed, 133 insertions(+), 243 deletions(-) create mode 100644 lib/routes/baidu/tieba/common.ts diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts new file mode 100644 index 000000000000..3e696fbb0af5 --- /dev/null +++ b/lib/routes/baidu/tieba/common.ts @@ -0,0 +1,101 @@ +import { config } from '@/config'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; +import cache from '@/utils/cache'; + +/** + * 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组 + * 正确处理包含 '=' 的 cookie 值 + */ +export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> { + return cookieStr.split(';').map((c) => { + const trimmed = c.trim(); + const firstEqualIndex = trimmed.indexOf('='); + if (firstEqualIndex === -1) { + return { name: trimmed, value: '', domain: '.tieba.baidu.com' }; + } + const name = trimmed.slice(0, firstEqualIndex).trim(); + const value = trimmed.slice(firstEqualIndex + 1).trim(); + return { name, value, domain: '.tieba.baidu.com' }; + }); +} + +/** + * 检查 HTML 内容是否包含百度安全验证页面 + */ +export function checkSecurityVerification(html: string): void { + if (html.includes('安全验证') || html.includes('百度安全验证')) { + throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); + } +} + +/** + * 使用 Puppeteer 获取贴吧页面内容 + * 包含统一的 cookie 设置、安全验证检查和缓存逻辑 + */ +export async function getTiebaPageContent( + url: string, + cacheKey: string, + options: { + waitForSelector?: string; + timeout?: number; + } = {} +): Promise { + const cookie = config.baidu.cookie; + + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + const { getPuppeteerPage } = await import('@/utils/puppeteer'); + const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000 } = options; + + const data = await cache.tryGet( + cacheKey, + async () => { + const { page, destroy } = await getPuppeteerPage(url, { noGoto: true }); + + try { + // 先访问以设置域名 + await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); + + // 设置 Cookie + const cookies = parseBaiduCookies(cookie); + await page.setCookie(...cookies); + + // 访问目标页面 + await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); + + // 动态等待内容加载 + try { + await page.waitForSelector(waitForSelector, { timeout }); + } catch { + // 如果超时,继续执行 + } + + return await page.content(); + } finally { + await destroy(); + } + }, + config.cache.routeExpire, + false + ); + + const html = data as string; + checkSecurityVerification(html); + return html; +} + +/** + * 规范化 URL 为绝对地址 + */ +export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.com'): string { + if (!href) { + return ''; + } + if (href.startsWith('http')) { + return href; + } + const path = href.startsWith('/') ? href : `/${href}`; + return `${base}${path}`; +} diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index edbceb77eba4..33c8ad68db45 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -1,12 +1,10 @@ import { load } from 'cheerio'; import { renderToString } from 'hono/jsx/dom/server'; -import { config } from '@/config'; -import ConfigNotFoundError from '@/errors/types/config-not-found'; import type { Route } from '@/types'; -import cache from '@/utils/cache'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent, normalizeUrl } from './common'; import { parseRelativeTime, parseThreads } from './utils'; export const route: Route = { @@ -36,66 +34,17 @@ export const route: Route = { async function handler(ctx) { // sortBy: created, replied const { kw, cid = '0', sortBy = 'created' } = ctx.req.param(); - const cookie = config.baidu.cookie; - - if (!cookie) { - throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); - } - - // 检查Cookie是否包含必要的BDUSS - if (!cookie.includes('BDUSS')) { - throw new Error('BAIDU_COOKIE must contain BDUSS. Please check your cookie format.'); - } + const sortParam = sortBy === 'replied' ? '&sc=67108864' : ''; // 固定抓取3页,约30条帖子 const maxPages = 3; - // 先获取第一页 - const { getPuppeteerPage } = await import('@/utils/puppeteer'); - const sortParam = sortBy === 'replied' ? '&sc=67108864' : ''; - // 并发获取所有页面 const pagePromises = []; for (let pageNum = 0; pageNum < maxPages; pageNum++) { const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=${pageNum * 50}${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${pageNum === 0 ? '' : '&ie=utf-8'}${sortParam}`; - const promise = cache.tryGet( - `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, - async () => { - const { page, destroy } = await getPuppeteerPage(pageUrl, { - noGoto: true, - }); - - try { - await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); - - const cookies = cookie.split(';').map((c) => { - const [name, value] = c.trim().split('='); - return { - name: name.trim(), - value: value || '', - domain: '.tieba.baidu.com', - }; - }); - await page.setCookie(...cookies); - - await page.goto(pageUrl, { waitUntil: 'networkidle2', timeout: 60000 }); - - // 动态等待帖子卡片加载,最多3秒 - try { - await page.waitForSelector('.thread-card-wrapper', { timeout: 3000 }); - } catch { - // 如果3秒内没加载出来,继续执行 - } - - return await page.content(); - } finally { - await destroy(); - } - }, - config.cache.routeExpire, - false - ); + const promise = getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 }); pagePromises.push(promise); } @@ -104,8 +53,7 @@ async function handler(ctx) { // 解析所有页面数据并去重 const threadMap = new Map(); - for (const pageData of pageResults) { - const html = pageData as string; + for (const html of pageResults) { if (html && html.length > 0) { const $ = load(html); const threads = parseThreads($); @@ -128,7 +76,7 @@ async function handler(ctx) { const parsedDate = parseRelativeTime(thread.time); return { title: thread.title, - link: thread.link || `https://tieba.baidu.com/p/${thread.id}`, + link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`, pubDate: parsedDate ? timezone(parsedDate, +8) : undefined, author: thread.author, description: renderToString( diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index 1efc457b3efe..9b1acab263e2 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -1,12 +1,10 @@ import { load } from 'cheerio'; import { renderToString } from 'hono/jsx/dom/server'; -import { config } from '@/config'; -import ConfigNotFoundError from '@/errors/types/config-not-found'; import type { Route } from '@/types'; -import cache from '@/utils/cache'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent } from './common'; import { parseRelativeTime } from './utils'; /** @@ -19,61 +17,18 @@ import { parseRelativeTime } from './utils'; * @returns */ async function getPost(id: string, lz = 0, pn = 7e6) { - const cookie = config.baidu.cookie; - if (!cookie) { - throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); - } - - const { getPuppeteerPage } = await import('@/utils/puppeteer'); const url = `https://tieba.baidu.com/p/${id}?see_lz=${lz}&pn=${pn}`; + const html = await getTiebaPageContent(url, `tieba:post:${id}:${lz}:${pn}`, { + waitForSelector: '.virtual-list-item', + timeout: 3000, + }); - const data = await cache.tryGet( - `tieba:post:${id}:${lz}:${pn}`, - async () => { - const { page, destroy } = await getPuppeteerPage(url, { - noGoto: true, - }); - - try { - // 先访问以设置域名 - await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); - - // 设置 Cookie - const cookies = cookie.split(';').map((c) => { - const [name, value] = c.trim().split('='); - return { - name: name.trim(), - value: value || '', - domain: '.tieba.baidu.com', - }; - }); - await page.setCookie(...cookies); - - // 访问目标页面 - await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); - - // 动态等待回复内容加载,最多3秒 - try { - await page.waitForSelector('.virtual-list-item', { timeout: 3000 }); - } catch { - // 如果3秒内没加载出来,继续执行 - } - - return await page.content(); - } finally { - await destroy(); - } - }, - config.cache.routeExpire, - false - ); - - const $ = load(data as string); + const $ = load(html); const max = Number.parseInt($('[max-page]').attr('max-page') || '0'); if (max > pn) { return getPost(id, lz, max); } - return data as string; + return html; } export const route: Route = { @@ -111,11 +66,6 @@ async function handler(ctx) { const html = await getPost(id, lz); const $ = load(html); - // 检查是否遇到安全验证 - if ($('title').text().includes('安全验证') || html.includes('百度安全验证')) { - throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); - } - const title = $('.pb-title-wrap .pb-title').text().trim() || ''; // 使用新的 Vue 渲染页面选择器 - 只选择 virtual-list-item 避免重复 diff --git a/lib/routes/baidu/tieba/search.tsx b/lib/routes/baidu/tieba/search.tsx index c8c2577fda1e..a308160e3f98 100644 --- a/lib/routes/baidu/tieba/search.tsx +++ b/lib/routes/baidu/tieba/search.tsx @@ -2,13 +2,12 @@ import { load } from 'cheerio'; import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; -import { config } from '@/config'; -import ConfigNotFoundError from '@/errors/types/config-not-found'; import type { Route } from '@/types'; -import cache from '@/utils/cache'; import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent, normalizeUrl } from './common'; + export const route: Route = { path: '/tieba/search/:qw/:routeParams?', categories: ['bbs'], @@ -43,11 +42,6 @@ export const route: Route = { async function handler(ctx) { const qw = ctx.req.param('qw'); - const cookie = config.baidu.cookie; - - if (!cookie) { - throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); - } const query = new URLSearchParams(ctx.req.param('routeParams')); query.set('ie', 'utf-8'); @@ -55,67 +49,12 @@ async function handler(ctx) { query.set('rn', query.get('rn') || '20'); const link = `https://tieba.baidu.com/f/search/res?${query.toString()}`; - const { getPuppeteerPage } = await import('@/utils/puppeteer'); - - const data = await cache.tryGet( - `tieba:search:${qw}:${query.toString()}`, - async () => { - const { page, destroy } = await getPuppeteerPage(link, { - noGoto: true, - }); - - try { - // 先访问以设置域名 - await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); - - // 设置 Cookie - const cookies = cookie.split(';').map((c) => { - const [name, value] = c.trim().split('='); - return { - name: name.trim(), - value: value || '', - domain: '.tieba.baidu.com', - }; - }); - await page.setCookie(...cookies); - - // 访问目标页面 - await page.goto(link, { waitUntil: 'networkidle2', timeout: 60000 }); - - // 动态等待搜索结果加载,最多3秒 - try { - await page.waitForSelector('.thread-content-box', { timeout: 3000 }); - } catch { - // 如果3秒内没加载出来,尝试滚动触发 - } - - // 滚动触发内容加载 - await page.evaluate(() => { - window.scrollTo(0, 500); - }); - - // 再次等待内容加载,最多2秒 - try { - await page.waitForFunction(() => document.querySelectorAll('.thread-content-box').length > 0, { timeout: 2000 }); - } catch { - // 继续执行 - } - - return await page.content(); - } finally { - await destroy(); - } - }, - config.cache.routeExpire, - false - ); - - const $ = load(data as string); - - // 检查是否遇到安全验证 - if ($('title').text().includes('安全验证') || (data as string).includes('百度安全验证')) { - throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); - } + const html = await getTiebaPageContent(link, `tieba:search:${qw}:${query.toString()}`, { + waitForSelector: '.thread-content-box', + timeout: 3000, + }); + + const $ = load(html); const resultList = $('.thread-content-box'); @@ -135,8 +74,9 @@ async function handler(ctx) { // 内容摘要 const details = item.find('.abstract-wrap span').text().trim(); - // 从链接中提取帖子URL - const linkHref = item.find('.action-bar-warp a.action-link-bg').attr('href') || ''; + // 从链接中提取帖子URL,并规范化为绝对地址 + const linkPath = item.find('.action-bar-warp a.action-link-bg').attr('href') || ''; + const linkHref = normalizeUrl(linkPath); // 作者 const author = item.find('.forum-attention.user').text().trim(); diff --git a/lib/routes/baidu/tieba/user.tsx b/lib/routes/baidu/tieba/user.tsx index be0511577406..02ae6393cf6e 100644 --- a/lib/routes/baidu/tieba/user.tsx +++ b/lib/routes/baidu/tieba/user.tsx @@ -1,13 +1,12 @@ import { load } from 'cheerio'; import { renderToString } from 'hono/jsx/dom/server'; -import { config } from '@/config'; -import ConfigNotFoundError from '@/errors/types/config-not-found'; import type { Route } from '@/types'; -import cache from '@/utils/cache'; import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent, normalizeUrl } from './common'; + export const route: Route = { path: '/tieba/user/:uid', categories: ['bbs'], @@ -35,62 +34,14 @@ export const route: Route = { async function handler(ctx) { const uid = ctx.req.param('uid'); - const cookie = config.baidu.cookie; - - if (!cookie) { - throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); - } - - const { getPuppeteerPage } = await import('@/utils/puppeteer'); const url = `https://tieba.baidu.com/home/main?un=${uid}`; - const data = await cache.tryGet( - `tieba:user:${uid}`, - async () => { - const { page, destroy } = await getPuppeteerPage(url, { - noGoto: true, - }); - - try { - // 先访问以设置域名 - await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); - - // 设置 Cookie - const cookies = cookie.split(';').map((c) => { - const [name, value] = c.trim().split('='); - return { - name: name.trim(), - value: value || '', - domain: '.tieba.baidu.com', - }; - }); - await page.setCookie(...cookies); - - // 访问目标页面 - await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); - - // 动态等待帖子卡片加载,最多3秒 - try { - await page.waitForSelector('.thread-card', { timeout: 3000 }); - } catch { - // 如果3秒内没加载出来,继续执行 - } - - return await page.content(); - } finally { - await destroy(); - } - }, - config.cache.routeExpire, - false - ); - - const $ = load(data as string); - - // 检查是否遇到安全验证 - if ($('title').text().includes('安全验证') || (data as string).includes('百度安全验证')) { - throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); - } + const html = await getTiebaPageContent(url, `tieba:user:${uid}`, { + waitForSelector: '.thread-card', + timeout: 3000, + }); + + const $ = load(html); const name = $('span.userinfo_username').text() || uid; const list = $('.thread-card'); @@ -127,7 +78,7 @@ async function handler(ctx) { const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; // 链接 - const link = item.find('a.thread-card-content').attr('href') || ''; + const link = normalizeUrl(item.find('a.thread-card-content').attr('href') || ''); return { title, From 4b80d98cebd15debbf46f6b1a3acd38d5d17b3b9 Mon Sep 17 00:00:00 2001 From: FlanChanOwO Date: Wed, 8 Apr 2026 18:44:19 +0800 Subject: [PATCH 05/13] refactor(route/baidu): refactor cookie parsing logic and improve code formatting - Refactor parseBaiduCookies function to use chained array methods for better readability - Simplify cookie parsing by trimming and filtering empty strings before mapping - Remove redundant comments in common.ts and post.tsx files - Improve code formatting by removing excessive blank lines - Maintain same functionality while enhancing code maintainability --- lib/routes/baidu/tieba/common.ts | 28 ++++++++++++++-------------- lib/routes/baidu/tieba/post.tsx | 8 ++++---- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts index 3e696fbb0af5..64b885203007 100644 --- a/lib/routes/baidu/tieba/common.ts +++ b/lib/routes/baidu/tieba/common.ts @@ -7,16 +7,19 @@ import cache from '@/utils/cache'; * 正确处理包含 '=' 的 cookie 值 */ export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> { - return cookieStr.split(';').map((c) => { - const trimmed = c.trim(); - const firstEqualIndex = trimmed.indexOf('='); - if (firstEqualIndex === -1) { - return { name: trimmed, value: '', domain: '.tieba.baidu.com' }; - } - const name = trimmed.slice(0, firstEqualIndex).trim(); - const value = trimmed.slice(firstEqualIndex + 1).trim(); - return { name, value, domain: '.tieba.baidu.com' }; - }); + return cookieStr + .split(';') + .map((c) => c.trim()) + .filter((c) => c.length > 0) + .map((c) => { + const firstEqualIndex = c.indexOf('='); + if (firstEqualIndex === -1) { + return { name: c, value: '', domain: '.tieba.baidu.com' }; + } + const name = c.slice(0, firstEqualIndex).trim(); + const value = c.slice(firstEqualIndex + 1).trim(); + return { name, value, domain: '.tieba.baidu.com' }; + }); } /** @@ -55,10 +58,7 @@ export async function getTiebaPageContent( const { page, destroy } = await getPuppeteerPage(url, { noGoto: true }); try { - // 先访问以设置域名 - await page.goto('https://tieba.baidu.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); - - // 设置 Cookie + // 设置 Cookie(在访问页面前设置,减少一次导航) const cookies = parseBaiduCookies(cookie); await page.setCookie(...cookies); diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index 9b1acab263e2..52e8fac92eca 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -92,13 +92,13 @@ async function handler(ctx) { return null; } - // 内容 - 从 pb-rich-text 获取 + // 内容 - 从 pb-rich-text 获取(保留行内富文本,如链接、图片、表情等) const contentItems = item.find('.pb-rich-text .pb-content-item'); let postContent = ''; contentItems.each((_, el) => { - const text = $(el).text().trim(); - if (text) { - postContent += `

${text}

`; + const html = $(el).html()?.trim(); + if (html) { + postContent += `

${html}

`; } }); From 7d7efc76e51179774af294dfc45f73dd10cd39e4 Mon Sep 17 00:00:00 2001 From: FlanChan <104259619+FlanChanXwO@users.noreply.github.com> Date: Wed, 8 Apr 2026 18:51:34 +0800 Subject: [PATCH 06/13] feat(route/baidu/tieba): add BAIDU_COOKIE support and extract shared utilities Add common.ts for cookie parsing, page retrieval, security check and URL normalization. Refactor forum, post, search and user routes to use shared utilities. Preserve rich text content in post replies. Support direct reply links. Fix cookie value parsing for cookies containing '=' character. Route/baidu --- lib/config.ts | 9 +- lib/routes/baidu/tieba/common.ts | 101 +++++++++++++++++++ lib/routes/baidu/tieba/forum.tsx | 120 +++++++++++++---------- lib/routes/baidu/tieba/post.tsx | 156 +++++++++++++++++++----------- lib/routes/baidu/tieba/search.tsx | 90 ++++++++++------- lib/routes/baidu/tieba/user.ts | 54 ----------- lib/routes/baidu/tieba/user.tsx | 103 ++++++++++++++++++++ lib/routes/baidu/tieba/utils.ts | 128 ++++++++++++++++++++++++ 8 files changed, 566 insertions(+), 195 deletions(-) create mode 100644 lib/routes/baidu/tieba/common.ts delete mode 100644 lib/routes/baidu/tieba/user.ts create mode 100644 lib/routes/baidu/tieba/user.tsx create mode 100644 lib/routes/baidu/tieba/utils.ts diff --git a/lib/config.ts b/lib/config.ts index 9ba567508c9e..2892b843be5d 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -85,6 +85,7 @@ type ConfigEnvKeys = | 'BITBUCKET_USERNAME' | 'BITBUCKET_PASSWORD' | 'BTBYR_HOST' + | 'BAIDU_COOKIE' | 'BTBYR_COOKIE' | 'BUPT_PORTAL_COOKIE' | 'CAIXIN_COOKIE' @@ -348,6 +349,9 @@ export type Config = { }; // Route-specific Configurations + baidu: { + cookie?: string; + }; bilibili: { cookies: Record; dmImgList?: string; @@ -763,7 +767,7 @@ const calculateValue = () => { type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // 缓存类型,支持 'memory' 和 'redis',设为空可以禁止缓存 requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60), routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒 - contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒 + contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒 }, memory: { max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger. @@ -843,6 +847,9 @@ const calculateValue = () => { }, // Route-specific Configurations + baidu: { + cookie: envs.BAIDU_COOKIE, + }, bilibili: { cookies: bilibili_cookies, dmImgList: envs.BILIBILI_DM_IMG_LIST, diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts new file mode 100644 index 000000000000..64b885203007 --- /dev/null +++ b/lib/routes/baidu/tieba/common.ts @@ -0,0 +1,101 @@ +import { config } from '@/config'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; +import cache from '@/utils/cache'; + +/** + * 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组 + * 正确处理包含 '=' 的 cookie 值 + */ +export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> { + return cookieStr + .split(';') + .map((c) => c.trim()) + .filter((c) => c.length > 0) + .map((c) => { + const firstEqualIndex = c.indexOf('='); + if (firstEqualIndex === -1) { + return { name: c, value: '', domain: '.tieba.baidu.com' }; + } + const name = c.slice(0, firstEqualIndex).trim(); + const value = c.slice(firstEqualIndex + 1).trim(); + return { name, value, domain: '.tieba.baidu.com' }; + }); +} + +/** + * 检查 HTML 内容是否包含百度安全验证页面 + */ +export function checkSecurityVerification(html: string): void { + if (html.includes('安全验证') || html.includes('百度安全验证')) { + throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); + } +} + +/** + * 使用 Puppeteer 获取贴吧页面内容 + * 包含统一的 cookie 设置、安全验证检查和缓存逻辑 + */ +export async function getTiebaPageContent( + url: string, + cacheKey: string, + options: { + waitForSelector?: string; + timeout?: number; + } = {} +): Promise { + const cookie = config.baidu.cookie; + + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + const { getPuppeteerPage } = await import('@/utils/puppeteer'); + const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000 } = options; + + const data = await cache.tryGet( + cacheKey, + async () => { + const { page, destroy } = await getPuppeteerPage(url, { noGoto: true }); + + try { + // 设置 Cookie(在访问页面前设置,减少一次导航) + const cookies = parseBaiduCookies(cookie); + await page.setCookie(...cookies); + + // 访问目标页面 + await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); + + // 动态等待内容加载 + try { + await page.waitForSelector(waitForSelector, { timeout }); + } catch { + // 如果超时,继续执行 + } + + return await page.content(); + } finally { + await destroy(); + } + }, + config.cache.routeExpire, + false + ); + + const html = data as string; + checkSecurityVerification(html); + return html; +} + +/** + * 规范化 URL 为绝对地址 + */ +export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.com'): string { + if (!href) { + return ''; + } + if (href.startsWith('http')) { + return href; + } + const path = href.startsWith('/') ? href : `/${href}`; + return `${base}${path}`; +} diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index 8ba4ab80957b..33c8ad68db45 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -1,85 +1,101 @@ import { load } from 'cheerio'; -import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent, normalizeUrl } from './common'; +import { parseRelativeTime, parseThreads } from './utils'; + export const route: Route = { path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'], categories: ['bbs'], example: '/baidu/tieba/forum/good/女图', parameters: { kw: '吧名', cid: '精品分类,默认为 `0`(全部分类),如果不传 `cid` 则获取全部分类', sortBy: '排序方式:`created`, `replied`。默认为 `created`' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, }, name: '精品帖子', - maintainers: ['u3u'], + maintainers: ['u3u', 'FlanChanXwO'], handler, }; async function handler(ctx) { // sortBy: created, replied const { kw, cid = '0', sortBy = 'created' } = ctx.req.param(); + const sortParam = sortBy === 'replied' ? '&sc=67108864' : ''; - // PC端:https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)} - // 移动端接口:https://tieba.baidu.com/mo/q/m?kw=${encodeURIComponent(kw)}&lp=5024&forum_recommend=1&lm=0&cid=0&has_url_param=1&pn=0&is_ajax=1 - const params = { kw: encodeURIComponent(kw) }; - ctx.req.path.includes('good') && (params.tab = 'good'); - cid && (params.cid = cid); - const { data } = await got(`https://tieba.baidu.com/f`, { - headers: { - Referer: 'https://tieba.baidu.com/', - }, - searchParams: params, - }); + // 固定抓取3页,约30条帖子 + const maxPages = 3; + + // 并发获取所有页面 + const pagePromises = []; + for (let pageNum = 0; pageNum < maxPages; pageNum++) { + const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=${pageNum * 50}${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${pageNum === 0 ? '' : '&ie=utf-8'}${sortParam}`; + + const promise = getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 }); + pagePromises.push(promise); + } - const threadListHTML = load(data)('code[id="pagelet_html_frs-list/pagelet/thread_list"]') - .contents() - .filter((e) => e.nodeType === '8'); + // 等待所有页面获取完成 + const pageResults = await Promise.all(pagePromises); - const $ = load(threadListHTML.prevObject[0].data); - const list = $('#thread_list > .j_thread_list[data-field]') - .toArray() - .map((element) => { - const item = $(element); - const { id, author_name } = item.data('field'); - const time = sortBy === 'created' ? item.find('.is_show_create_time').text().trim() : item.find('.threadlist_reply_date').text().trim(); - const title = item.find('a.j_th_tit').text().trim(); - const details = item.find('.threadlist_abs').text().trim(); - const medias = item - .find('.threadlist_media img') - .toArray() - .map((element) => { - const item = $(element); - return ``; - }) - .join(''); + // 解析所有页面数据并去重 + const threadMap = new Map(); + for (const html of pageResults) { + if (html && html.length > 0) { + const $ = load(html); + const threads = parseThreads($); + for (const thread of threads) { + // 使用帖子ID去重,只保留第一次出现的 + if (!threadMap.has(thread.id)) { + threadMap.set(thread.id, thread); + } + } + } + } - return { - title, - description: renderToString( - <> -

{details}

-

{raw(medias)}

-

作者:{author_name}

- - ), - pubDate: timezone(parseDate(time, ['HH:mm', 'M-D', 'YYYY-MM'], true), +8), - link: `https://tieba.baidu.com/p/${id}`, - }; - }); + const allThreads = [...threadMap.values()]; + + if (allThreads.length === 0) { + throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.'); + } + + const list = allThreads.map((thread) => { + const parsedDate = parseRelativeTime(thread.time); + return { + title: thread.title, + link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`, + pubDate: parsedDate ? timezone(parsedDate, +8) : undefined, + author: thread.author, + description: renderToString( + <> + {thread.content ?

{thread.content}

: null} + {thread.images && thread.images.length > 0 ? ( +
+ {thread.images.map((img) => ( + + ))} +
+ ) : null} + + ), + }; + }); return { title: `${kw}吧`, - description: load(data)('meta[name="description"]').attr('content'), link: `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}`, item: list, }; diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index cfc0a02ddb85..52e8fac92eca 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -1,12 +1,12 @@ import { load } from 'cheerio'; -import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent } from './common'; +import { parseRelativeTime } from './utils'; + /** * 获取最新的帖子回复(倒序查看) * @@ -16,18 +16,19 @@ import timezone from '@/utils/timezone'; * 这个默认值我测试下来 7e6 是比较接近最大值了,因为当我输入 8e6 就会返回第一页的数据而不是最后一页了 * @returns */ -async function getPost(id, lz = 0, pn = 7e6) { - const { data } = await got(`https://tieba.baidu.com/p/${id}?see_lz=${lz}&pn=${pn}&ajax=1`, { - headers: { - Referer: 'https://tieba.baidu.com/', - }, +async function getPost(id: string, lz = 0, pn = 7e6) { + const url = `https://tieba.baidu.com/p/${id}?see_lz=${lz}&pn=${pn}`; + const html = await getTiebaPageContent(url, `tieba:post:${id}:${lz}:${pn}`, { + waitForSelector: '.virtual-list-item', + timeout: 3000, }); - const $ = load(data); - const max = Number.parseInt($('[max-page]').attr('max-page')); + + const $ = load(html); + const max = Number.parseInt($('[max-page]').attr('max-page') || '0'); if (max > pn) { - return getPost(id, max); + return getPost(id, lz, max); } - return data; + return html; } export const route: Route = { @@ -36,9 +37,15 @@ export const route: Route = { example: '/baidu/tieba/post/686961453', parameters: { id: '帖子 ID' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, @@ -49,7 +56,7 @@ export const route: Route = { }, ], name: '帖子动态', - maintainers: ['u3u'], + maintainers: ['u3u', 'FlanChanXwO'], handler, }; @@ -58,49 +65,88 @@ async function handler(ctx) { const lz = ctx.req.path.includes('lz') ? 1 : 0; const html = await getPost(id, lz); const $ = load(html); - const title = $('.core_title_txt').attr('title'); - // .substr(3); - const list = $('.p_postlist > [data-field]:not(:has(.ad_bottom_view))'); + + const title = $('.pb-title-wrap .pb-title').text().trim() || ''; + + // 使用新的 Vue 渲染页面选择器 - 只选择 virtual-list-item 避免重复 + const list = $('.virtual-list-item'); + + if (list.length === 0) { + throw new Error('No post replies found. The post may not exist or the cookie is invalid.'); + } return { title: lz ? `【只看楼主】${title}` : title, link: `https://tieba.baidu.com/p/${id}?see_lz=${lz}`, description: `${title}的最新回复`, - item: list.toArray().map((element) => { - const item = $(element); - const { author, content } = item.data('field'); - const tempList = item - .find('.post-tail-wrap > .tail-info') - .toArray() - .map((element) => $(element).text()); - let [pubContent, from, num, time] = ['', '', '', '']; - if (0 === tempList.length && 'date' in content) { - num = `${content.post_no}楼`; - time = content.date; - pubContent = item.find('.j_d_post_content').html(); - } else if (2 === tempList.length) { - [num, time] = tempList; - pubContent = content.content; - } else if (3 === tempList.length) { - [from, num, time] = tempList; - pubContent = content.content; - } - return { - title: `${author.user_name}回复了帖子《${title}》`, - description: renderToString( - <> -

{raw(pubContent)}

-
- 作者:{author.user_name} -
- 楼层:{num} -
- {from} - - ), - pubDate: timezone(parseDate(time, 'YYYY-MM-DD hh:mm'), +8), - link: `https://tieba.baidu.com/p/${id}?pid=${content.post_id}#${content.post_id}`, - }; - }), + item: list + .toArray() + .map((element) => { + const item = $(element); + + // 作者名 + const authorName = item.find('.head-name').text().trim(); + + // 跳过无效用户(无作者名的条目) + if (!authorName) { + return null; + } + + // 内容 - 从 pb-rich-text 获取(保留行内富文本,如链接、图片、表情等) + const contentItems = item.find('.pb-rich-text .pb-content-item'); + let postContent = ''; + contentItems.each((_, el) => { + const html = $(el).html()?.trim(); + if (html) { + postContent += `

${html}

`; + } + }); + + // 图片 + const images = item + .find('.image-list-wrapper img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('data-src') || '') + .filter(Boolean) + .map((src) => `${title}`) + .join(''); + + // 楼层和时间 + const descText = item.find('.pc-pb-comments-desc, .comment-desc-left').text().trim(); + const floorMatch = descText.match(/第(\d+)楼/); + const floor = floorMatch ? `${floorMatch[1]}楼` : ''; + + // 解析时间并验证有效性 - 使用完整的 descText 以支持 parseRelativeTime 能处理的所有格式 + const parsedDate = descText ? parseRelativeTime(descText) : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 提取时间文本用于显示 + const timeMatch = descText.match(/(\d+分钟前|\d+小时前|今天\s*\d{2}:\d{2}|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}|昨天\s*\d{2}:\d{2}|刚刚)/); + const timeText = timeMatch ? timeMatch[1] : descText; + + // 尝试获取回复的唯一ID用于生成直接链接 + const postId = item.attr('data-post-id') || item.attr('id') || ''; + const replyLink = postId ? `https://tieba.baidu.com/p/${id}?pid=${postId}#${postId}` : `https://tieba.baidu.com/p/${id}`; + + return { + title: `${authorName} 回复了帖子《${title}》`, + description: renderToString( + <> +
+
+

+ 楼层:{floor} +
+ 时间:{timeText} +

+ + ), + + pubDate: validPubDate, + author: authorName, + link: replyLink, + }; + }) + .filter((item): item is NonNullable => item !== null), }; } diff --git a/lib/routes/baidu/tieba/search.tsx b/lib/routes/baidu/tieba/search.tsx index 3af8d03288c7..a308160e3f98 100644 --- a/lib/routes/baidu/tieba/search.tsx +++ b/lib/routes/baidu/tieba/search.tsx @@ -1,28 +1,34 @@ import { load } from 'cheerio'; import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; -import iconv from 'iconv-lite'; import type { Route } from '@/types'; -import got from '@/utils/got'; import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent, normalizeUrl } from './common'; + export const route: Route = { path: '/tieba/search/:qw/:routeParams?', categories: ['bbs'], example: '/baidu/tieba/search/neuro', parameters: { qw: '搜索关键词', routeParams: '额外参数;请参阅以下说明和表格' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, }, name: '贴吧搜索', - maintainers: ['JimenezLi'], + maintainers: ['JimenezLi', 'FlanChanXwO'], handler, description: `| 键 | 含义 | 接受的值 | 默认值 | | ------------ | ---------------------------------------------------------- | ------------- | ------ | @@ -36,43 +42,65 @@ export const route: Route = { async function handler(ctx) { const qw = ctx.req.param('qw'); + const query = new URLSearchParams(ctx.req.param('routeParams')); query.set('ie', 'utf-8'); query.set('qw', qw); - query.set('rn', query.get('rn') || '20'); // Number of returned items + query.set('rn', query.get('rn') || '20'); const link = `https://tieba.baidu.com/f/search/res?${query.toString()}`; - const response = await got.get(link, { - headers: { - Referer: 'https://tieba.baidu.com', - }, - responseType: 'buffer', + const html = await getTiebaPageContent(link, `tieba:search:${qw}:${query.toString()}`, { + waitForSelector: '.thread-content-box', + timeout: 3000, }); - const data = iconv.decode(response.data, 'gbk'); - const $ = load(data); - const resultList = $('div.s_post'); + const $ = load(html); + + const resultList = $('.thread-content-box'); + + if (resultList.length === 0) { + throw new Error('No search results found. The page structure may have changed.'); + } return { title: `${qw} - ${query.get('kw') || '百度贴'}吧搜索`, link, item: resultList.toArray().map((element) => { const item = $(element); - const titleItem = item.find('.p_title a'); - const title = titleItem.text().trim(); - const link = titleItem.attr('href'); - const time = item.find('.p_date').text().trim(); - const details = item.find('.p_content').text().trim(); + + // 标题 + const title = item.find('.title-content-wrap .title-wrap span').text().trim(); + + // 内容摘要 + const details = item.find('.abstract-wrap span').text().trim(); + + // 从链接中提取帖子URL,并规范化为绝对地址 + const linkPath = item.find('.action-bar-warp a.action-link-bg').attr('href') || ''; + const linkHref = normalizeUrl(linkPath); + + // 作者 + const author = item.find('.forum-attention.user').text().trim(); + + // 时间 - 从 top-title 中提取 "发布于 YYYY-M-D" + const timeText = item.find('.top-title').text().trim(); + const timeMatch = timeText.match(/发布于\s+(\d{4}-\d{1,2}-\d{1,2})/); + const time = timeMatch ? timeMatch[1] : ''; + const parsedDate = time ? parseDate(time, 'YYYY-M-D') : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 图片 const medias = item - .find('.p_mediaCont img') + .find('.thread-media-new img') .toArray() - .map((element) => { - const item = $(element); - return ``; + .map((el) => { + const img = $(el); + const src = img.attr('src') || img.attr('data-src') || ''; + return `${title}`; }) .join(''); - const tieba = item.find('a.p_forum').text().trim(); - const author = item.find('a').last().text().trim(); + + // 贴吧名 + const tieba = item.find('.forum-name-text').text().trim(); return { title, @@ -80,16 +108,12 @@ async function handler(ctx) { <>

{details}

{raw(medias)}

-

- 贴吧:{tieba} -
- 作者:{author} -

+

贴吧:{tieba}

), author, - pubDate: timezone(parseDate(time, 'YYYY-MM-DD HH:mm'), +8), - link, + pubDate: validPubDate, + link: linkHref, }; }), }; diff --git a/lib/routes/baidu/tieba/user.ts b/lib/routes/baidu/tieba/user.ts deleted file mode 100644 index a5a9288e45b8..000000000000 --- a/lib/routes/baidu/tieba/user.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { load } from 'cheerio'; - -import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; -import timezone from '@/utils/timezone'; - -export const route: Route = { - path: '/tieba/user/:uid', - categories: ['bbs'], - example: '/baidu/tieba/user/斗鱼游戏君', - parameters: { uid: '用户 ID' }, - features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, - supportBT: false, - supportPodcast: false, - supportScihub: false, - }, - name: '用户帖子', - maintainers: ['igxlin', 'nczitzk'], - handler, - description: `用户 ID 可以通过打开用户的主页后查看地址栏的 \`un\` 字段来获取。`, -}; - -async function handler(ctx) { - const uid = ctx.req.param('uid'); - const response = await got(`https://tieba.baidu.com/home/main?un=${uid}`); - - const data = response.data; - - const $ = load(data); - const name = $('span.userinfo_username').text(); - const list = $('div.n_right.clearfix'); - let imgurl; - - return { - title: `${name} 的贴吧`, - link: `https://tieba.baidu.com/home/main?un=${uid}`, - item: - list && - list.toArray().map((item) => { - item = $(item).find('.n_contain'); - imgurl = item.find('ul.n_media.clearfix img').attr('original'); - return { - title: item.find('div.thread_name a').attr('title'), - pubDate: timezone(parseDate(item.parent().find('div .n_post_time').text(), ['YYYY-MM-DD', 'HH:mm']), +8), - description: `${item.find('div.n_txt').text()}
`, - link: item.find('div.thread_name a').attr('href'), - }; - }), - }; -} diff --git a/lib/routes/baidu/tieba/user.tsx b/lib/routes/baidu/tieba/user.tsx new file mode 100644 index 000000000000..02ae6393cf6e --- /dev/null +++ b/lib/routes/baidu/tieba/user.tsx @@ -0,0 +1,103 @@ +import { load } from 'cheerio'; +import { renderToString } from 'hono/jsx/dom/server'; + +import type { Route } from '@/types'; +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; + +import { getTiebaPageContent, normalizeUrl } from './common'; + +export const route: Route = { + path: '/tieba/user/:uid', + categories: ['bbs'], + example: '/baidu/tieba/user/斗鱼游戏君', + parameters: { uid: '用户 ID' }, + features: { + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + name: '用户帖子', + maintainers: ['igxlin', 'nczitzk', 'FlanChanXwO'], + handler, + description: `用户 ID 可以通过打开用户的主页后查看地址栏的 \`un\` 字段来获取。`, +}; + +async function handler(ctx) { + const uid = ctx.req.param('uid'); + const url = `https://tieba.baidu.com/home/main?un=${uid}`; + + const html = await getTiebaPageContent(url, `tieba:user:${uid}`, { + waitForSelector: '.thread-card', + timeout: 3000, + }); + + const $ = load(html); + + const name = $('span.userinfo_username').text() || uid; + const list = $('.thread-card'); + + if (list.length === 0) { + throw new Error('No user posts found. The page structure may have changed or the user does not exist.'); + } + + return { + title: `${name} 的贴吧`, + link: `https://tieba.baidu.com/home/main?un=${uid}`, + item: list.toArray().map((element) => { + const item = $(element); + + // 作者 + const authorName = item.find('.head-name').text().trim() || name; + + // 标题 + const title = item.find('.title-text').text().trim(); + + // 内容 + const content = item.find('.tb-richtext .text').text().trim(); + + // 图片 + const images = item + .find('.image-list-item img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('data-src') || '') + .filter(Boolean); + + // 时间 + const timeText = item.find('.post-num').text().trim(); + const parsedDate = timeText ? parseDate(timeText, ['YYYY-MM-DD']) : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 链接 + const link = normalizeUrl(item.find('a.thread-card-content').attr('href') || ''); + + return { + title, + pubDate: validPubDate, + author: authorName, + description: renderToString( + <> + {content ?

{content}

: null} + {images.length > 0 ? ( +
+ {images.map((img) => ( + + ))} +
+ ) : null} + + ), + link, + }; + }), + }; +} diff --git a/lib/routes/baidu/tieba/utils.ts b/lib/routes/baidu/tieba/utils.ts new file mode 100644 index 000000000000..45400df3796c --- /dev/null +++ b/lib/routes/baidu/tieba/utils.ts @@ -0,0 +1,128 @@ +import type { CheerioAPI } from 'cheerio'; + +import { parseDate } from '@/utils/parse-date'; + +/** + * 解析相对时间(如"回复于4小时前")为实际日期 + */ +export function parseRelativeTime(timeStr: string): Date { + const now = new Date(); + + // 如果时间为空,返回当前时间 + if (!timeStr || timeStr.trim() === '') { + return now; + } + + // 移除"回复于"前缀 + const cleanStr = timeStr.replace(/^回复于/, '').trim(); + + // 匹配 "刚刚" + if (cleanStr === '刚刚' || cleanStr.includes('刚刚')) { + return now; + } + + // 匹配 "X小时前" + const hourMatch = cleanStr.match(/(\d+)\s*小时前/); + if (hourMatch) { + const hours = Number.parseInt(hourMatch[1], 10); + return new Date(now.getTime() - hours * 60 * 60 * 1000); + } + + // 匹配 "X分钟前" + const minMatch = cleanStr.match(/(\d+)\s*分钟前/); + if (minMatch) { + const mins = Number.parseInt(minMatch[1], 10); + return new Date(now.getTime() - mins * 60 * 1000); + } + + // 匹配 "X天前" + const dayMatch = cleanStr.match(/(\d+)\s*天前/); + if (dayMatch) { + const days = Number.parseInt(dayMatch[1], 10); + return new Date(now.getTime() - days * 24 * 60 * 60 * 1000); + } + + // 匹配 "昨天 HH:mm" + const yesterdayMatch = cleanStr.match(/昨天\s*(\d{1,2}):(\d{2})/); + if (yesterdayMatch) { + const date = new Date(now); + date.setDate(date.getDate() - 1); + date.setHours(Number.parseInt(yesterdayMatch[1], 10), Number.parseInt(yesterdayMatch[2], 10), 0, 0); + return date; + } + + // 匹配 "今天 HH:mm" + const todayMatch = cleanStr.match(/今天\s*(\d{1,2}):(\d{2})/); + if (todayMatch) { + const date = new Date(now); + date.setHours(Number.parseInt(todayMatch[1], 10), Number.parseInt(todayMatch[2], 10), 0, 0); + return date; + } + + // 尝试标准日期格式 + try { + // @ts-ignore + const parsed = parseDate(cleanStr, ['M-D', 'YYYY-MM-DD', 'HH:mm', 'YYYY-MM-DD HH:mm', 'YYYY-M-D HH:mm'], true); + // 检查是否是有效日期 + if (parsed && !Number.isNaN(parsed.getTime())) { + return parsed; + } + } catch { + // 解析失败,返回当前时间 + } + + // 默认返回当前时间 + return now; +} + +/** + * 帖子数据接口 + */ +export interface Thread { + id: string; + title: string; + content: string; + author: string; + time: string; + images: string[]; + link: string; +} + +/** + * 解析帖子列表 + */ +export function parseThreads($: CheerioAPI): Thread[] { + return $('.thread-card-wrapper') + .toArray() + .map((element) => { + const item = $(element); + + const linkHref = item.find('a.thread-content-link').first().attr('href') || ''; + const idMatch = linkHref.match(/\/p\/(\d+)/); + const id = idMatch ? idMatch[1] : ''; + + const title = item.find('.thread-title .text').text().trim(); + const content = item.find('.thread-content .text').text().trim(); + const author = item.find('.head-name').text().trim(); + + const descInfo = item.find('.desc-info').first(); + const timeText = descInfo.length > 0 ? descInfo.text().trim() : item.find('[class*="time"], [class*="date"]').first().text().trim(); + + const images = item + .find('.image-list-item img') + .toArray() + .map((img) => $(img).attr('data-src')) + .filter((src): src is string => !!src); + + return { + id, + title, + content, + author, + time: timeText, + images, + link: linkHref, + }; + }) + .filter((t) => t.id && t.title); +} From 47415dbdac6edfb80a1c57ec06d6fd0e56cfab2e Mon Sep 17 00:00:00 2001 From: FlanChanOwO Date: Wed, 8 Apr 2026 19:01:15 +0800 Subject: [PATCH 07/13] style(baidu/tieba): fix code formatting and whitespace issues - Remove trailing whitespace from empty lines - Ensure consistent line endings in utility functions --- lib/routes/baidu/tieba/utils.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/routes/baidu/tieba/utils.ts b/lib/routes/baidu/tieba/utils.ts index 45400df3796c..d5e53314c785 100644 --- a/lib/routes/baidu/tieba/utils.ts +++ b/lib/routes/baidu/tieba/utils.ts @@ -97,7 +97,7 @@ export function parseThreads($: CheerioAPI): Thread[] { .map((element) => { const item = $(element); - const linkHref = item.find('a.thread-content-link').first().attr('href') || ''; + const linkHref = item.find('a.thread-content-link').attr('href') || ''; const idMatch = linkHref.match(/\/p\/(\d+)/); const id = idMatch ? idMatch[1] : ''; @@ -105,8 +105,8 @@ export function parseThreads($: CheerioAPI): Thread[] { const content = item.find('.thread-content .text').text().trim(); const author = item.find('.head-name').text().trim(); - const descInfo = item.find('.desc-info').first(); - const timeText = descInfo.length > 0 ? descInfo.text().trim() : item.find('[class*="time"], [class*="date"]').first().text().trim(); + const descInfo = item.find('.desc-info'); + const timeText = descInfo.length > 0 ? descInfo.text().trim() : item.find('.time, .date').text().trim(); const images = item .find('.image-list-item img') From fc7ae10350a099e862fd3f01e7d897c9681feae2 Mon Sep 17 00:00:00 2001 From: FlanChan <104259619+FlanChanXwO@users.noreply.github.com> Date: Wed, 8 Apr 2026 21:29:22 +0800 Subject: [PATCH 08/13] feat(route/baidu): add retry mechanism for transient errors in page content retrieval (#2) --- lib/routes/baidu/tieba/common.ts | 55 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts index 64b885203007..7b8e3729b786 100644 --- a/lib/routes/baidu/tieba/common.ts +++ b/lib/routes/baidu/tieba/common.ts @@ -34,6 +34,7 @@ export function checkSecurityVerification(html: string): void { /** * 使用 Puppeteer 获取贴吧页面内容 * 包含统一的 cookie 设置、安全验证检查和缓存逻辑 + * 带有重试机制处理瞬态错误 */ export async function getTiebaPageContent( url: string, @@ -41,6 +42,7 @@ export async function getTiebaPageContent( options: { waitForSelector?: string; timeout?: number; + retries?: number; } = {} ): Promise { const cookie = config.baidu.cookie; @@ -50,32 +52,51 @@ export async function getTiebaPageContent( } const { getPuppeteerPage } = await import('@/utils/puppeteer'); - const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000 } = options; + const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options; const data = await cache.tryGet( cacheKey, async () => { - const { page, destroy } = await getPuppeteerPage(url, { noGoto: true }); + let lastError: Error | undefined; - try { - // 设置 Cookie(在访问页面前设置,减少一次导航) - const cookies = parseBaiduCookies(cookie); - await page.setCookie(...cookies); + /* eslint-disable no-await-in-loop -- Intentional sequential retry logic */ + for (let attempt = 0; attempt < retries; attempt++) { + const { page, destroy } = await getPuppeteerPage(url, { noGoto: true }); - // 访问目标页面 - await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); - - // 动态等待内容加载 try { - await page.waitForSelector(waitForSelector, { timeout }); - } catch { - // 如果超时,继续执行 - } + // 设置 Cookie(在访问页面前设置,减少一次导航) + const cookies = parseBaiduCookies(cookie); + await page.setCookie(...cookies); + + // 访问目标页面 - 使用更宽松的等待条件 + await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 }); + + // 等待页面稳定 + await new Promise((resolve) => setTimeout(resolve, 2000)); - return await page.content(); - } finally { - await destroy(); + // 动态等待内容加载 + try { + await page.waitForSelector(waitForSelector, { timeout }); + } catch { + // 如果超时,继续执行 + } + + return await page.content(); + } catch (error) { + lastError = error as Error; + // 如果是最后一次尝试,抛出错误 + if (attempt === retries - 1) { + throw lastError; + } + // 等待后重试 + await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1))); + } finally { + await destroy(); + } } + /* eslint-enable no-await-in-loop */ + + throw lastError || new Error('Failed to fetch page content'); }, config.cache.routeExpire, false From bf19f348b7d50e1a43e6e0443ee6d19074ccb9fe Mon Sep 17 00:00:00 2001 From: FlanChan <104259619+FlanChanXwO@users.noreply.github.com> Date: Wed, 22 Apr 2026 18:51:26 +0800 Subject: [PATCH 09/13] fix: improve page content retrieval --- lib/config.ts | 1 + lib/routes/baidu/tieba/common.ts | 49 ++++++++------- lib/routes/baidu/tieba/forum.tsx | 40 ++++-------- lib/routes/baidu/tieba/post.tsx | 14 +---- lib/routes/baidu/tieba/utils.ts | 105 ++++++++++--------------------- 5 files changed, 74 insertions(+), 135 deletions(-) diff --git a/lib/config.ts b/lib/config.ts index 2892b843be5d..b417c4a58fc4 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -78,6 +78,7 @@ type ConfigEnvKeys = | 'FOLLOW_PRICE' | 'FOLLOW_USER_LIMIT' // Route-specific (dynamic cookies with prefixes) + | 'BAIDU_COOKIE' | `BILIBILI_COOKIE_${string}` | 'BILIBILI_DM_IMG_LIST' | 'BILIBILI_DM_IMG_INTER' diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts index 7b8e3729b786..7a0644c820da 100644 --- a/lib/routes/baidu/tieba/common.ts +++ b/lib/routes/baidu/tieba/common.ts @@ -1,6 +1,8 @@ -import { config } from '@/config'; +import {config} from '@/config'; import ConfigNotFoundError from '@/errors/types/config-not-found'; import cache from '@/utils/cache'; +import {getPuppeteerPage} from '@/utils/puppeteer'; +import {Cookie} from 'tough-cookie'; /** * 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组 @@ -9,17 +11,13 @@ import cache from '@/utils/cache'; export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> { return cookieStr .split(';') - .map((c) => c.trim()) - .filter((c) => c.length > 0) - .map((c) => { - const firstEqualIndex = c.indexOf('='); - if (firstEqualIndex === -1) { - return { name: c, value: '', domain: '.tieba.baidu.com' }; - } - const name = c.slice(0, firstEqualIndex).trim(); - const value = c.slice(firstEqualIndex + 1).trim(); - return { name, value, domain: '.tieba.baidu.com' }; - }); + .map((c) => Cookie.parse(c.trim())) + .filter((c): c is Cookie => Boolean(c?.key)) + .map((c) => ({ + name: c.key, + value: c.value, + domain: '.tieba.baidu.com', + })); } /** @@ -51,8 +49,12 @@ export async function getTiebaPageContent( throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); } - const { getPuppeteerPage } = await import('@/utils/puppeteer'); - const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options; + const cookies = parseBaiduCookies(cookie); + const { + waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', + timeout = 3000, + retries = 3 + } = options; const data = await cache.tryGet( cacheKey, @@ -61,22 +63,22 @@ export async function getTiebaPageContent( /* eslint-disable no-await-in-loop -- Intentional sequential retry logic */ for (let attempt = 0; attempt < retries; attempt++) { - const { page, destroy } = await getPuppeteerPage(url, { noGoto: true }); + const {page, destroy} = await getPuppeteerPage(url, { + onBeforeLoad: async (page) => { + if (cookies.length > 0) { + await page.setCookie(...cookies); + } + }, + gotoConfig: {waitUntil: 'domcontentloaded'}, + }); try { - // 设置 Cookie(在访问页面前设置,减少一次导航) - const cookies = parseBaiduCookies(cookie); - await page.setCookie(...cookies); - - // 访问目标页面 - 使用更宽松的等待条件 - await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 }); - // 等待页面稳定 await new Promise((resolve) => setTimeout(resolve, 2000)); // 动态等待内容加载 try { - await page.waitForSelector(waitForSelector, { timeout }); + await page.waitForSelector(waitForSelector, {timeout}); } catch { // 如果超时,继续执行 } @@ -95,7 +97,6 @@ export async function getTiebaPageContent( } } /* eslint-enable no-await-in-loop */ - throw lastError || new Error('Failed to fetch page content'); }, config.cache.routeExpire, diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index 33c8ad68db45..abac2ee7e3d9 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -36,37 +36,19 @@ async function handler(ctx) { const { kw, cid = '0', sortBy = 'created' } = ctx.req.param(); const sortParam = sortBy === 'replied' ? '&sc=67108864' : ''; - // 固定抓取3页,约30条帖子 - const maxPages = 3; + const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=0${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${sortParam}`; + const data = await getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 }); - // 并发获取所有页面 - const pagePromises = []; - for (let pageNum = 0; pageNum < maxPages; pageNum++) { - const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=${pageNum * 50}${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${pageNum === 0 ? '' : '&ie=utf-8'}${sortParam}`; + const $ = load(data); + const threadListHTML = $('code[id="pagelet_html_frs-list/pagelet/thread_list"]') + .contents() + .filter((_, e) => e.type === 'comment' || (e as { nodeType?: number }).nodeType === 8) + .first() + .text() + .trim(); - const promise = getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 }); - pagePromises.push(promise); - } - - // 等待所有页面获取完成 - const pageResults = await Promise.all(pagePromises); - - // 解析所有页面数据并去重 - const threadMap = new Map(); - for (const html of pageResults) { - if (html && html.length > 0) { - const $ = load(html); - const threads = parseThreads($); - for (const thread of threads) { - // 使用帖子ID去重,只保留第一次出现的 - if (!threadMap.has(thread.id)) { - threadMap.set(thread.id, thread); - } - } - } - } - - const allThreads = [...threadMap.values()]; + const threadRoot = threadListHTML ? load(threadListHTML) : $; + const allThreads = parseThreads(threadRoot); if (allThreads.length === 0) { throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.'); diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index 52e8fac92eca..b11129364e25 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -120,10 +120,6 @@ async function handler(ctx) { const parsedDate = descText ? parseRelativeTime(descText) : null; const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; - // 提取时间文本用于显示 - const timeMatch = descText.match(/(\d+分钟前|\d+小时前|今天\s*\d{2}:\d{2}|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}|昨天\s*\d{2}:\d{2}|刚刚)/); - const timeText = timeMatch ? timeMatch[1] : descText; - // 尝试获取回复的唯一ID用于生成直接链接 const postId = item.attr('data-post-id') || item.attr('id') || ''; const replyLink = postId ? `https://tieba.baidu.com/p/${id}?pid=${postId}#${postId}` : `https://tieba.baidu.com/p/${id}`; @@ -132,13 +128,9 @@ async function handler(ctx) { title: `${authorName} 回复了帖子《${title}》`, description: renderToString( <> -
-
-

- 楼层:{floor} -
- 时间:{timeText} -

+ {postContent ?
{raw(postContent)}
: null} + {images ?
{raw(images)}
: null} + {floor ?

楼层:{floor}

: null} ), diff --git a/lib/routes/baidu/tieba/utils.ts b/lib/routes/baidu/tieba/utils.ts index d5e53314c785..4d800cd02550 100644 --- a/lib/routes/baidu/tieba/utils.ts +++ b/lib/routes/baidu/tieba/utils.ts @@ -1,78 +1,13 @@ -import type { CheerioAPI } from 'cheerio'; +import type {CheerioAPI} from 'cheerio'; -import { parseDate } from '@/utils/parse-date'; +import {parseRelativeDate} from '@/utils/parse-date'; /** * 解析相对时间(如"回复于4小时前")为实际日期 */ export function parseRelativeTime(timeStr: string): Date { - const now = new Date(); - - // 如果时间为空,返回当前时间 - if (!timeStr || timeStr.trim() === '') { - return now; - } - - // 移除"回复于"前缀 - const cleanStr = timeStr.replace(/^回复于/, '').trim(); - - // 匹配 "刚刚" - if (cleanStr === '刚刚' || cleanStr.includes('刚刚')) { - return now; - } - - // 匹配 "X小时前" - const hourMatch = cleanStr.match(/(\d+)\s*小时前/); - if (hourMatch) { - const hours = Number.parseInt(hourMatch[1], 10); - return new Date(now.getTime() - hours * 60 * 60 * 1000); - } - - // 匹配 "X分钟前" - const minMatch = cleanStr.match(/(\d+)\s*分钟前/); - if (minMatch) { - const mins = Number.parseInt(minMatch[1], 10); - return new Date(now.getTime() - mins * 60 * 1000); - } - - // 匹配 "X天前" - const dayMatch = cleanStr.match(/(\d+)\s*天前/); - if (dayMatch) { - const days = Number.parseInt(dayMatch[1], 10); - return new Date(now.getTime() - days * 24 * 60 * 60 * 1000); - } - - // 匹配 "昨天 HH:mm" - const yesterdayMatch = cleanStr.match(/昨天\s*(\d{1,2}):(\d{2})/); - if (yesterdayMatch) { - const date = new Date(now); - date.setDate(date.getDate() - 1); - date.setHours(Number.parseInt(yesterdayMatch[1], 10), Number.parseInt(yesterdayMatch[2], 10), 0, 0); - return date; - } - - // 匹配 "今天 HH:mm" - const todayMatch = cleanStr.match(/今天\s*(\d{1,2}):(\d{2})/); - if (todayMatch) { - const date = new Date(now); - date.setHours(Number.parseInt(todayMatch[1], 10), Number.parseInt(todayMatch[2], 10), 0, 0); - return date; - } - - // 尝试标准日期格式 - try { - // @ts-ignore - const parsed = parseDate(cleanStr, ['M-D', 'YYYY-MM-DD', 'HH:mm', 'YYYY-MM-DD HH:mm', 'YYYY-M-D HH:mm'], true); - // 检查是否是有效日期 - if (parsed && !Number.isNaN(parsed.getTime())) { - return parsed; - } - } catch { - // 解析失败,返回当前时间 - } - - // 默认返回当前时间 - return now; + const normalized = (timeStr || '').replace(/^回复于/, '').trim(); + return parseRelativeDate(normalized, ['M-D', 'YYYY-MM-DD', 'HH:mm', 'YYYY-MM-DD HH:mm', 'YYYY-M-D HH:mm']); } /** @@ -92,7 +27,7 @@ export interface Thread { * 解析帖子列表 */ export function parseThreads($: CheerioAPI): Thread[] { - return $('.thread-card-wrapper') + const cardThreads = $('.thread-card-wrapper') .toArray() .map((element) => { const item = $(element); @@ -112,7 +47,7 @@ export function parseThreads($: CheerioAPI): Thread[] { .find('.image-list-item img') .toArray() .map((img) => $(img).attr('data-src')) - .filter((src): src is string => !!src); + .filter((src): src is string => src !== undefined && src !== ''); return { id, @@ -125,4 +60,32 @@ export function parseThreads($: CheerioAPI): Thread[] { }; }) .filter((t) => t.id && t.title); + + if (cardThreads.length > 0) { + return cardThreads; + } + + return $('li.j_thread_list') + .toArray() + .map((element) => { + const item = $(element); + const linkHref = item.find('a.j_th_tit').attr('href') || ''; + const idMatch = linkHref.match(/\/p\/(\d+)/); + const id = idMatch ? idMatch[1] : ''; + + return { + id, + title: item.find('a.j_th_tit').text().trim(), + content: item.find('.threadlist_abs').text().trim(), + author: item.find('.frs-author-name').first().text().trim(), + time: item.find('.threadlist_reply_date').first().text().trim(), + images: item + .find('.threadlist_pic img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('bpic') || '') + .filter((src) => src !== ''), + link: linkHref, + }; + }) + .filter((t) => t.id && t.title); } From cf78d1e2e06e0db56b290a216f77709459f2f4f7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 22 Apr 2026 10:52:58 +0000 Subject: [PATCH 10/13] style: auto format --- lib/routes/baidu/tieba/common.ts | 19 ++++++++----------- lib/routes/baidu/tieba/utils.ts | 4 ++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts index 7a0644c820da..de0b4d7301ad 100644 --- a/lib/routes/baidu/tieba/common.ts +++ b/lib/routes/baidu/tieba/common.ts @@ -1,8 +1,9 @@ -import {config} from '@/config'; +import { Cookie } from 'tough-cookie'; + +import { config } from '@/config'; import ConfigNotFoundError from '@/errors/types/config-not-found'; import cache from '@/utils/cache'; -import {getPuppeteerPage} from '@/utils/puppeteer'; -import {Cookie} from 'tough-cookie'; +import { getPuppeteerPage } from '@/utils/puppeteer'; /** * 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组 @@ -50,11 +51,7 @@ export async function getTiebaPageContent( } const cookies = parseBaiduCookies(cookie); - const { - waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', - timeout = 3000, - retries = 3 - } = options; + const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options; const data = await cache.tryGet( cacheKey, @@ -63,13 +60,13 @@ export async function getTiebaPageContent( /* eslint-disable no-await-in-loop -- Intentional sequential retry logic */ for (let attempt = 0; attempt < retries; attempt++) { - const {page, destroy} = await getPuppeteerPage(url, { + const { page, destroy } = await getPuppeteerPage(url, { onBeforeLoad: async (page) => { if (cookies.length > 0) { await page.setCookie(...cookies); } }, - gotoConfig: {waitUntil: 'domcontentloaded'}, + gotoConfig: { waitUntil: 'domcontentloaded' }, }); try { @@ -78,7 +75,7 @@ export async function getTiebaPageContent( // 动态等待内容加载 try { - await page.waitForSelector(waitForSelector, {timeout}); + await page.waitForSelector(waitForSelector, { timeout }); } catch { // 如果超时,继续执行 } diff --git a/lib/routes/baidu/tieba/utils.ts b/lib/routes/baidu/tieba/utils.ts index 4d800cd02550..3a6a5c91d082 100644 --- a/lib/routes/baidu/tieba/utils.ts +++ b/lib/routes/baidu/tieba/utils.ts @@ -1,6 +1,6 @@ -import type {CheerioAPI} from 'cheerio'; +import type { CheerioAPI } from 'cheerio'; -import {parseRelativeDate} from '@/utils/parse-date'; +import { parseRelativeDate } from '@/utils/parse-date'; /** * 解析相对时间(如"回复于4小时前")为实际日期 From 43322b766bce5de4465f683b93cef7ff7c589fc8 Mon Sep 17 00:00:00 2001 From: FlanChanOwO Date: Wed, 22 Apr 2026 20:52:23 +0800 Subject: [PATCH 11/13] fix: improve URL encoding and enhance content retrieval logic --- lib/config.ts | 1 - lib/routes/baidu/tieba/common.ts | 8 ++++---- lib/routes/baidu/tieba/forum.tsx | 5 +++-- lib/routes/baidu/tieba/post.tsx | 1 + lib/routes/baidu/tieba/search.tsx | 10 +++++----- lib/routes/baidu/tieba/user.tsx | 5 +++-- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/config.ts b/lib/config.ts index b417c4a58fc4..a75b93c5202c 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -86,7 +86,6 @@ type ConfigEnvKeys = | 'BITBUCKET_USERNAME' | 'BITBUCKET_PASSWORD' | 'BTBYR_HOST' - | 'BAIDU_COOKIE' | 'BTBYR_COOKIE' | 'BUPT_PORTAL_COOKIE' | 'CAIXIN_COOKIE' diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts index de0b4d7301ad..222a4c82dbd1 100644 --- a/lib/routes/baidu/tieba/common.ts +++ b/lib/routes/baidu/tieba/common.ts @@ -80,7 +80,9 @@ export async function getTiebaPageContent( // 如果超时,继续执行 } - return await page.content(); + const html = await page.content(); + checkSecurityVerification(html); + return html; } catch (error) { lastError = error as Error; // 如果是最后一次尝试,抛出错误 @@ -100,9 +102,7 @@ export async function getTiebaPageContent( false ); - const html = data as string; - checkSecurityVerification(html); - return html; + return data as string; } /** diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index abac2ee7e3d9..0fe989a94e9e 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -55,11 +55,12 @@ async function handler(ctx) { } const list = allThreads.map((thread) => { - const parsedDate = parseRelativeTime(thread.time); + const parsedDate = thread.time ? parseRelativeTime(thread.time) : undefined; + const pubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; return { title: thread.title, link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`, - pubDate: parsedDate ? timezone(parsedDate, +8) : undefined, + ...(pubDate ? { pubDate } : {}), author: thread.author, description: renderToString( <> diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index b11129364e25..200ef52ba354 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -1,4 +1,5 @@ import { load } from 'cheerio'; +import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; import type { Route } from '@/types'; diff --git a/lib/routes/baidu/tieba/search.tsx b/lib/routes/baidu/tieba/search.tsx index a308160e3f98..654969d76e0e 100644 --- a/lib/routes/baidu/tieba/search.tsx +++ b/lib/routes/baidu/tieba/search.tsx @@ -90,12 +90,12 @@ async function handler(ctx) { // 图片 const medias = item - .find('.thread-media-new img') + .find('.p_mediaCont img') .toArray() - .map((el) => { - const img = $(el); - const src = img.attr('src') || img.attr('data-src') || ''; - return `${title}`; + .flatMap((element) => { + const item = $(element); + const src = (item.attr('original') || '').trim(); + return src ? [``] : []; }) .join(''); diff --git a/lib/routes/baidu/tieba/user.tsx b/lib/routes/baidu/tieba/user.tsx index 02ae6393cf6e..de87296c7c81 100644 --- a/lib/routes/baidu/tieba/user.tsx +++ b/lib/routes/baidu/tieba/user.tsx @@ -34,7 +34,8 @@ export const route: Route = { async function handler(ctx) { const uid = ctx.req.param('uid'); - const url = `https://tieba.baidu.com/home/main?un=${uid}`; + const encodedUid = encodeURIComponent(uid); + const url = `https://tieba.baidu.com/home/main?un=${encodedUid}`; const html = await getTiebaPageContent(url, `tieba:user:${uid}`, { waitForSelector: '.thread-card', @@ -52,7 +53,7 @@ async function handler(ctx) { return { title: `${name} 的贴吧`, - link: `https://tieba.baidu.com/home/main?un=${uid}`, + link: `https://tieba.baidu.com/home/main?un=${encodedUid}`, item: list.toArray().map((element) => { const item = $(element); From 4bef130b74d6ccc22e1d1b01436f73699e83bda9 Mon Sep 17 00:00:00 2001 From: FlanChanXwO Date: Tue, 12 May 2026 19:00:52 +0800 Subject: [PATCH 12/13] feat(route/baidu/tieba): migrate forum route to use client API Replace Puppeteer-based HTML scraping with direct HTTP API call to /c/f/frs/page. The API requires BDUSS + MD5 client signature, eliminating the need for browser rendering and reducing resource usage. Also updated thread parsing to match the JSON response structure (thread_list at top level, author via user_list map, images from first_post_content type 3 items). Co-Authored-By: Claude Opus 4.7 --- lib/routes/baidu/tieba/common.ts | 68 ++++++++++++++++++++++++++++ lib/routes/baidu/tieba/forum.tsx | 77 +++++++++++++++++++++----------- 2 files changed, 119 insertions(+), 26 deletions(-) diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts index 222a4c82dbd1..a002c160c955 100644 --- a/lib/routes/baidu/tieba/common.ts +++ b/lib/routes/baidu/tieba/common.ts @@ -1,8 +1,11 @@ +import { createHash } from 'node:crypto'; + import { Cookie } from 'tough-cookie'; import { config } from '@/config'; import ConfigNotFoundError from '@/errors/types/config-not-found'; import cache from '@/utils/cache'; +import got from '@/utils/got'; import { getPuppeteerPage } from '@/utils/puppeteer'; /** @@ -118,3 +121,68 @@ export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.c const path = href.startsWith('/') ? href : `/${href}`; return `${base}${path}`; } + +/** + * 通过 /c/f/frs/page API 获取贴吧帖子列表 + * 使用贴吧客户端签名认证,无需 Puppeteer + */ +const TIEBA_CLIENT_SECRET = 'tiebaclient!!!'; + +function computeSign(params: Record): string { + const sortedKeys = Object.keys(params).toSorted(); + const raw = sortedKeys.map((key) => `${key}=${params[key]}`).join('') + TIEBA_CLIENT_SECRET; + return createHash('md5').update(raw).digest('hex'); +} + +export async function getTiebaForumData(params: { kw: string; cid?: string; isGood?: boolean; sortBy?: string }): Promise { + const cookie = config.baidu.cookie; + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + const bduss = cookie.match(/BDUSS=([^;]+)/)?.[1] || ''; + if (!bduss) { + throw new ConfigNotFoundError('BAIDU_COOKIE must contain BDUSS. Please check your cookie configuration.'); + } + + const apiParams: Record = { + _client_id: 'wappc_1234567890123_456', + _client_type: '2', + _client_version: '12.20.1.0', + _phone_imei: '000000000000000', + from: 'tieba', + kw: params.kw, + rn: '30', + pn: '1', + BDUSS: bduss, + }; + + if (params.isGood) { + apiParams.is_good = '1'; + } + if (params.cid && params.cid !== '0') { + apiParams.cid = params.cid; + } + if (params.sortBy === 'replied') { + apiParams.sort_type = '1'; + } + + apiParams.sign = computeSign(apiParams); + + const url = 'https://tieba.baidu.com/c/f/frs/page'; + const cacheKey = `tieba:api:forum:${params.kw}:${params.cid || '0'}:${params.sortBy || 'created'}`; + + const data = await cache.tryGet( + cacheKey, + async () => { + const { data: response } = await got.post(url, { + form: apiParams, + }); + return response; + }, + config.cache.routeExpire, + false + ); + + return data; +} diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index 0fe989a94e9e..0b18dd396469 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -1,11 +1,9 @@ -import { load } from 'cheerio'; import { renderToString } from 'hono/jsx/dom/server'; import type { Route } from '@/types'; import timezone from '@/utils/timezone'; -import { getTiebaPageContent, normalizeUrl } from './common'; -import { parseRelativeTime, parseThreads } from './utils'; +import { getTiebaForumData } from './common'; export const route: Route = { path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'], @@ -20,7 +18,7 @@ export const route: Route = { description: '百度 cookie 值,用于需要登录的贴吧页面', }, ], - requirePuppeteer: true, + requirePuppeteer: false, antiCrawler: true, supportBT: false, supportPodcast: false, @@ -31,43 +29,70 @@ export const route: Route = { handler, }; +function extractContent(items: any[]): { text: string; images: string[] } { + let text = ''; + const images: string[] = []; + if (!Array.isArray(items)) { + return { text, images }; + } + for (const item of items) { + if (Number(item.type) === 0 && item.text) { + text += item.text; + } else if (Number(item.type) === 3) { + const src = item.origin_src || item.original_src || item.big_cdn_src || item.cdn_src || item.src; + if (src) { + images.push(src); + } + } + } + return { text, images }; +} + async function handler(ctx) { - // sortBy: created, replied const { kw, cid = '0', sortBy = 'created' } = ctx.req.param(); - const sortParam = sortBy === 'replied' ? '&sc=67108864' : ''; + const isGood = ctx.req.path.includes('good'); - const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=0${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${sortParam}`; - const data = await getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 }); + const data = await getTiebaForumData({ kw, cid, isGood, sortBy }); - const $ = load(data); - const threadListHTML = $('code[id="pagelet_html_frs-list/pagelet/thread_list"]') - .contents() - .filter((_, e) => e.type === 'comment' || (e as { nodeType?: number }).nodeType === 8) - .first() - .text() - .trim(); + if (data?.error_code && data.error_code !== '0' && data.error_code !== 0) { + throw new Error(`Tieba API error: ${data.error_msg || data.error_code}`); + } - const threadRoot = threadListHTML ? load(threadListHTML) : $; - const allThreads = parseThreads(threadRoot); + const threadList = data?.thread_list || []; - if (allThreads.length === 0) { + if (threadList.length === 0) { throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.'); } - const list = allThreads.map((thread) => { - const parsedDate = thread.time ? parseRelativeTime(thread.time) : undefined; - const pubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + // Build author map from user_list + const userList: any[] = data?.user_list || []; + const authorMap = new Map(); + for (const user of userList) { + if (user.id) { + authorMap.set(Number(user.id), user.name_show || user.name || ''); + } + } + + const list = threadList.map((thread) => { + // Prefer first_post_content (richer), fall back to abstract + const { text: content, images } = extractContent(thread.first_post_content || thread.abstract || []); + + const timestamp = Number(thread.create_time || 0); + const pubDate = timestamp > 0 ? timezone(new Date(timestamp * 1000), +8) : undefined; + + const authorName = authorMap.get(Number(thread.author_id)) || ''; + return { title: thread.title, - link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`, + link: `https://tieba.baidu.com/p/${thread.id || thread.tid}`, ...(pubDate ? { pubDate } : {}), - author: thread.author, + author: authorName, description: renderToString( <> - {thread.content ?

{thread.content}

: null} - {thread.images && thread.images.length > 0 ? ( + {content ?

{content}

: null} + {images.length > 0 ? (
- {thread.images.map((img) => ( + {images.map((img) => ( ))}
From c40018af733f3fe2b5ab8d0a302c78e91d904492 Mon Sep 17 00:00:00 2001 From: Tony Date: Tue, 9 Jun 2026 01:57:29 +0800 Subject: [PATCH 13/13] revert: unnecessary changes mentioned in https://github.com/DIYgod/RSSHub/pull/21663#discussion_r3051928062 --- lib/config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/config.ts b/lib/config.ts index a539e84963d1..c399d52707fe 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -781,7 +781,7 @@ const calculateValue = () => { type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // Cache type; supports 'memory', 'redis', and 'http'. Set to empty string to disable cache. requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60), routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒 - contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒 + contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒 }, memory: { max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger.