diff --git a/lib/config.ts b/lib/config.ts index 9ba567508c9e..a75b93c5202c 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -78,6 +78,7 @@ type ConfigEnvKeys = | 'FOLLOW_PRICE' | 'FOLLOW_USER_LIMIT' // Route-specific (dynamic cookies with prefixes) + | 'BAIDU_COOKIE' | `BILIBILI_COOKIE_${string}` | 'BILIBILI_DM_IMG_LIST' | 'BILIBILI_DM_IMG_INTER' @@ -348,6 +349,9 @@ export type Config = { }; // Route-specific Configurations + baidu: { + cookie?: string; + }; bilibili: { cookies: Record; dmImgList?: string; @@ -763,7 +767,7 @@ const calculateValue = () => { type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // 缓存类型,支持 'memory' 和 'redis',设为空可以禁止缓存 requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60), routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒 - contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒 + contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒 }, memory: { max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger. @@ -843,6 +847,9 @@ const calculateValue = () => { }, // Route-specific Configurations + baidu: { + cookie: envs.BAIDU_COOKIE, + }, bilibili: { cookies: bilibili_cookies, dmImgList: envs.BILIBILI_DM_IMG_LIST, diff --git a/lib/routes/baidu/tieba/common.ts b/lib/routes/baidu/tieba/common.ts new file mode 100644 index 000000000000..a002c160c955 --- /dev/null +++ b/lib/routes/baidu/tieba/common.ts @@ -0,0 +1,188 @@ +import { createHash } from 'node:crypto'; + +import { Cookie } from 'tough-cookie'; + +import { config } from '@/config'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { getPuppeteerPage } from '@/utils/puppeteer'; + +/** + * 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组 + * 正确处理包含 '=' 的 cookie 值 + */ +export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> { + return cookieStr + .split(';') + .map((c) => Cookie.parse(c.trim())) + .filter((c): c is Cookie => Boolean(c?.key)) + .map((c) => ({ + name: c.key, + value: c.value, + domain: '.tieba.baidu.com', + })); +} + +/** + * 检查 HTML 内容是否包含百度安全验证页面 + */ +export function checkSecurityVerification(html: string): void { + if (html.includes('安全验证') || html.includes('百度安全验证')) { + throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.'); + } +} + +/** + * 使用 Puppeteer 获取贴吧页面内容 + * 包含统一的 cookie 设置、安全验证检查和缓存逻辑 + * 带有重试机制处理瞬态错误 + */ +export async function getTiebaPageContent( + url: string, + cacheKey: string, + options: { + waitForSelector?: string; + timeout?: number; + retries?: number; + } = {} +): Promise { + const cookie = config.baidu.cookie; + + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + const cookies = parseBaiduCookies(cookie); + const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options; + + const data = await cache.tryGet( + cacheKey, + async () => { + let lastError: Error | undefined; + + /* eslint-disable no-await-in-loop -- Intentional sequential retry logic */ + for (let attempt = 0; attempt < retries; attempt++) { + const { page, destroy } = await getPuppeteerPage(url, { + onBeforeLoad: async (page) => { + if (cookies.length > 0) { + await page.setCookie(...cookies); + } + }, + gotoConfig: { waitUntil: 'domcontentloaded' }, + }); + + try { + // 等待页面稳定 + await new Promise((resolve) => setTimeout(resolve, 2000)); + + // 动态等待内容加载 + try { + await page.waitForSelector(waitForSelector, { timeout }); + } catch { + // 如果超时,继续执行 + } + + const html = await page.content(); + checkSecurityVerification(html); + return html; + } catch (error) { + lastError = error as Error; + // 如果是最后一次尝试,抛出错误 + if (attempt === retries - 1) { + throw lastError; + } + // 等待后重试 + await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1))); + } finally { + await destroy(); + } + } + /* eslint-enable no-await-in-loop */ + throw lastError || new Error('Failed to fetch page content'); + }, + config.cache.routeExpire, + false + ); + + return data as string; +} + +/** + * 规范化 URL 为绝对地址 + */ +export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.com'): string { + if (!href) { + return ''; + } + if (href.startsWith('http')) { + return href; + } + const path = href.startsWith('/') ? href : `/${href}`; + return `${base}${path}`; +} + +/** + * 通过 /c/f/frs/page API 获取贴吧帖子列表 + * 使用贴吧客户端签名认证,无需 Puppeteer + */ +const TIEBA_CLIENT_SECRET = 'tiebaclient!!!'; + +function computeSign(params: Record): string { + const sortedKeys = Object.keys(params).toSorted(); + const raw = sortedKeys.map((key) => `${key}=${params[key]}`).join('') + TIEBA_CLIENT_SECRET; + return createHash('md5').update(raw).digest('hex'); +} + +export async function getTiebaForumData(params: { kw: string; cid?: string; isGood?: boolean; sortBy?: string }): Promise { + const cookie = config.baidu.cookie; + if (!cookie) { + throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of BAIDU_COOKIE'); + } + + const bduss = cookie.match(/BDUSS=([^;]+)/)?.[1] || ''; + if (!bduss) { + throw new ConfigNotFoundError('BAIDU_COOKIE must contain BDUSS. Please check your cookie configuration.'); + } + + const apiParams: Record = { + _client_id: 'wappc_1234567890123_456', + _client_type: '2', + _client_version: '12.20.1.0', + _phone_imei: '000000000000000', + from: 'tieba', + kw: params.kw, + rn: '30', + pn: '1', + BDUSS: bduss, + }; + + if (params.isGood) { + apiParams.is_good = '1'; + } + if (params.cid && params.cid !== '0') { + apiParams.cid = params.cid; + } + if (params.sortBy === 'replied') { + apiParams.sort_type = '1'; + } + + apiParams.sign = computeSign(apiParams); + + const url = 'https://tieba.baidu.com/c/f/frs/page'; + const cacheKey = `tieba:api:forum:${params.kw}:${params.cid || '0'}:${params.sortBy || 'created'}`; + + const data = await cache.tryGet( + cacheKey, + async () => { + const { data: response } = await got.post(url, { + form: apiParams, + }); + return response; + }, + config.cache.routeExpire, + false + ); + + return data; +} diff --git a/lib/routes/baidu/tieba/forum.tsx b/lib/routes/baidu/tieba/forum.tsx index 8ba4ab80957b..0b18dd396469 100644 --- a/lib/routes/baidu/tieba/forum.tsx +++ b/lib/routes/baidu/tieba/forum.tsx @@ -1,85 +1,109 @@ -import { load } from 'cheerio'; -import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaForumData } from './common'; + export const route: Route = { path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'], categories: ['bbs'], example: '/baidu/tieba/forum/good/女图', parameters: { kw: '吧名', cid: '精品分类,默认为 `0`(全部分类),如果不传 `cid` 则获取全部分类', sortBy: '排序方式:`created`, `replied`。默认为 `created`' }, features: { - requireConfig: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], requirePuppeteer: false, - antiCrawler: false, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, }, name: '精品帖子', - maintainers: ['u3u'], + maintainers: ['u3u', 'FlanChanXwO'], handler, }; +function extractContent(items: any[]): { text: string; images: string[] } { + let text = ''; + const images: string[] = []; + if (!Array.isArray(items)) { + return { text, images }; + } + for (const item of items) { + if (Number(item.type) === 0 && item.text) { + text += item.text; + } else if (Number(item.type) === 3) { + const src = item.origin_src || item.original_src || item.big_cdn_src || item.cdn_src || item.src; + if (src) { + images.push(src); + } + } + } + return { text, images }; +} + async function handler(ctx) { - // sortBy: created, replied const { kw, cid = '0', sortBy = 'created' } = ctx.req.param(); + const isGood = ctx.req.path.includes('good'); - // PC端:https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)} - // 移动端接口:https://tieba.baidu.com/mo/q/m?kw=${encodeURIComponent(kw)}&lp=5024&forum_recommend=1&lm=0&cid=0&has_url_param=1&pn=0&is_ajax=1 - const params = { kw: encodeURIComponent(kw) }; - ctx.req.path.includes('good') && (params.tab = 'good'); - cid && (params.cid = cid); - const { data } = await got(`https://tieba.baidu.com/f`, { - headers: { - Referer: 'https://tieba.baidu.com/', - }, - searchParams: params, - }); + const data = await getTiebaForumData({ kw, cid, isGood, sortBy }); + + if (data?.error_code && data.error_code !== '0' && data.error_code !== 0) { + throw new Error(`Tieba API error: ${data.error_msg || data.error_code}`); + } - const threadListHTML = load(data)('code[id="pagelet_html_frs-list/pagelet/thread_list"]') - .contents() - .filter((e) => e.nodeType === '8'); + const threadList = data?.thread_list || []; - const $ = load(threadListHTML.prevObject[0].data); - const list = $('#thread_list > .j_thread_list[data-field]') - .toArray() - .map((element) => { - const item = $(element); - const { id, author_name } = item.data('field'); - const time = sortBy === 'created' ? item.find('.is_show_create_time').text().trim() : item.find('.threadlist_reply_date').text().trim(); - const title = item.find('a.j_th_tit').text().trim(); - const details = item.find('.threadlist_abs').text().trim(); - const medias = item - .find('.threadlist_media img') - .toArray() - .map((element) => { - const item = $(element); - return ``; - }) - .join(''); + if (threadList.length === 0) { + throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.'); + } - return { - title, - description: renderToString( - <> -

{details}

-

{raw(medias)}

-

作者:{author_name}

- - ), - pubDate: timezone(parseDate(time, ['HH:mm', 'M-D', 'YYYY-MM'], true), +8), - link: `https://tieba.baidu.com/p/${id}`, - }; - }); + // Build author map from user_list + const userList: any[] = data?.user_list || []; + const authorMap = new Map(); + for (const user of userList) { + if (user.id) { + authorMap.set(Number(user.id), user.name_show || user.name || ''); + } + } + + const list = threadList.map((thread) => { + // Prefer first_post_content (richer), fall back to abstract + const { text: content, images } = extractContent(thread.first_post_content || thread.abstract || []); + + const timestamp = Number(thread.create_time || 0); + const pubDate = timestamp > 0 ? timezone(new Date(timestamp * 1000), +8) : undefined; + + const authorName = authorMap.get(Number(thread.author_id)) || ''; + + return { + title: thread.title, + link: `https://tieba.baidu.com/p/${thread.id || thread.tid}`, + ...(pubDate ? { pubDate } : {}), + author: authorName, + description: renderToString( + <> + {content ?

{content}

: null} + {images.length > 0 ? ( +
+ {images.map((img) => ( + + ))} +
+ ) : null} + + ), + }; + }); return { title: `${kw}吧`, - description: load(data)('meta[name="description"]').attr('content'), link: `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}`, item: list, }; diff --git a/lib/routes/baidu/tieba/post.tsx b/lib/routes/baidu/tieba/post.tsx index cfc0a02ddb85..200ef52ba354 100644 --- a/lib/routes/baidu/tieba/post.tsx +++ b/lib/routes/baidu/tieba/post.tsx @@ -3,10 +3,11 @@ import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent } from './common'; +import { parseRelativeTime } from './utils'; + /** * 获取最新的帖子回复(倒序查看) * @@ -16,18 +17,19 @@ import timezone from '@/utils/timezone'; * 这个默认值我测试下来 7e6 是比较接近最大值了,因为当我输入 8e6 就会返回第一页的数据而不是最后一页了 * @returns */ -async function getPost(id, lz = 0, pn = 7e6) { - const { data } = await got(`https://tieba.baidu.com/p/${id}?see_lz=${lz}&pn=${pn}&ajax=1`, { - headers: { - Referer: 'https://tieba.baidu.com/', - }, +async function getPost(id: string, lz = 0, pn = 7e6) { + const url = `https://tieba.baidu.com/p/${id}?see_lz=${lz}&pn=${pn}`; + const html = await getTiebaPageContent(url, `tieba:post:${id}:${lz}:${pn}`, { + waitForSelector: '.virtual-list-item', + timeout: 3000, }); - const $ = load(data); - const max = Number.parseInt($('[max-page]').attr('max-page')); + + const $ = load(html); + const max = Number.parseInt($('[max-page]').attr('max-page') || '0'); if (max > pn) { - return getPost(id, max); + return getPost(id, lz, max); } - return data; + return html; } export const route: Route = { @@ -36,9 +38,15 @@ export const route: Route = { example: '/baidu/tieba/post/686961453', parameters: { id: '帖子 ID' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, @@ -49,7 +57,7 @@ export const route: Route = { }, ], name: '帖子动态', - maintainers: ['u3u'], + maintainers: ['u3u', 'FlanChanXwO'], handler, }; @@ -58,49 +66,80 @@ async function handler(ctx) { const lz = ctx.req.path.includes('lz') ? 1 : 0; const html = await getPost(id, lz); const $ = load(html); - const title = $('.core_title_txt').attr('title'); - // .substr(3); - const list = $('.p_postlist > [data-field]:not(:has(.ad_bottom_view))'); + + const title = $('.pb-title-wrap .pb-title').text().trim() || ''; + + // 使用新的 Vue 渲染页面选择器 - 只选择 virtual-list-item 避免重复 + const list = $('.virtual-list-item'); + + if (list.length === 0) { + throw new Error('No post replies found. The post may not exist or the cookie is invalid.'); + } return { title: lz ? `【只看楼主】${title}` : title, link: `https://tieba.baidu.com/p/${id}?see_lz=${lz}`, description: `${title}的最新回复`, - item: list.toArray().map((element) => { - const item = $(element); - const { author, content } = item.data('field'); - const tempList = item - .find('.post-tail-wrap > .tail-info') - .toArray() - .map((element) => $(element).text()); - let [pubContent, from, num, time] = ['', '', '', '']; - if (0 === tempList.length && 'date' in content) { - num = `${content.post_no}楼`; - time = content.date; - pubContent = item.find('.j_d_post_content').html(); - } else if (2 === tempList.length) { - [num, time] = tempList; - pubContent = content.content; - } else if (3 === tempList.length) { - [from, num, time] = tempList; - pubContent = content.content; - } - return { - title: `${author.user_name}回复了帖子《${title}》`, - description: renderToString( - <> -

{raw(pubContent)}

-
- 作者:{author.user_name} -
- 楼层:{num} -
- {from} - - ), - pubDate: timezone(parseDate(time, 'YYYY-MM-DD hh:mm'), +8), - link: `https://tieba.baidu.com/p/${id}?pid=${content.post_id}#${content.post_id}`, - }; - }), + item: list + .toArray() + .map((element) => { + const item = $(element); + + // 作者名 + const authorName = item.find('.head-name').text().trim(); + + // 跳过无效用户(无作者名的条目) + if (!authorName) { + return null; + } + + // 内容 - 从 pb-rich-text 获取(保留行内富文本,如链接、图片、表情等) + const contentItems = item.find('.pb-rich-text .pb-content-item'); + let postContent = ''; + contentItems.each((_, el) => { + const html = $(el).html()?.trim(); + if (html) { + postContent += `

${html}

`; + } + }); + + // 图片 + const images = item + .find('.image-list-wrapper img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('data-src') || '') + .filter(Boolean) + .map((src) => `${title}`) + .join(''); + + // 楼层和时间 + const descText = item.find('.pc-pb-comments-desc, .comment-desc-left').text().trim(); + const floorMatch = descText.match(/第(\d+)楼/); + const floor = floorMatch ? `${floorMatch[1]}楼` : ''; + + // 解析时间并验证有效性 - 使用完整的 descText 以支持 parseRelativeTime 能处理的所有格式 + const parsedDate = descText ? parseRelativeTime(descText) : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 尝试获取回复的唯一ID用于生成直接链接 + const postId = item.attr('data-post-id') || item.attr('id') || ''; + const replyLink = postId ? `https://tieba.baidu.com/p/${id}?pid=${postId}#${postId}` : `https://tieba.baidu.com/p/${id}`; + + return { + title: `${authorName} 回复了帖子《${title}》`, + description: renderToString( + <> + {postContent ?
{raw(postContent)}
: null} + {images ?
{raw(images)}
: null} + {floor ?

楼层:{floor}

: null} + + ), + + pubDate: validPubDate, + author: authorName, + link: replyLink, + }; + }) + .filter((item): item is NonNullable => item !== null), }; } diff --git a/lib/routes/baidu/tieba/search.tsx b/lib/routes/baidu/tieba/search.tsx index 3af8d03288c7..654969d76e0e 100644 --- a/lib/routes/baidu/tieba/search.tsx +++ b/lib/routes/baidu/tieba/search.tsx @@ -1,28 +1,34 @@ import { load } from 'cheerio'; import { raw } from 'hono/html'; import { renderToString } from 'hono/jsx/dom/server'; -import iconv from 'iconv-lite'; import type { Route } from '@/types'; -import got from '@/utils/got'; import { parseDate } from '@/utils/parse-date'; import timezone from '@/utils/timezone'; +import { getTiebaPageContent, normalizeUrl } from './common'; + export const route: Route = { path: '/tieba/search/:qw/:routeParams?', categories: ['bbs'], example: '/baidu/tieba/search/neuro', parameters: { qw: '搜索关键词', routeParams: '额外参数;请参阅以下说明和表格' }, features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, supportBT: false, supportPodcast: false, supportScihub: false, }, name: '贴吧搜索', - maintainers: ['JimenezLi'], + maintainers: ['JimenezLi', 'FlanChanXwO'], handler, description: `| 键 | 含义 | 接受的值 | 默认值 | | ------------ | ---------------------------------------------------------- | ------------- | ------ | @@ -36,43 +42,65 @@ export const route: Route = { async function handler(ctx) { const qw = ctx.req.param('qw'); + const query = new URLSearchParams(ctx.req.param('routeParams')); query.set('ie', 'utf-8'); query.set('qw', qw); - query.set('rn', query.get('rn') || '20'); // Number of returned items + query.set('rn', query.get('rn') || '20'); const link = `https://tieba.baidu.com/f/search/res?${query.toString()}`; - const response = await got.get(link, { - headers: { - Referer: 'https://tieba.baidu.com', - }, - responseType: 'buffer', + const html = await getTiebaPageContent(link, `tieba:search:${qw}:${query.toString()}`, { + waitForSelector: '.thread-content-box', + timeout: 3000, }); - const data = iconv.decode(response.data, 'gbk'); - const $ = load(data); - const resultList = $('div.s_post'); + const $ = load(html); + + const resultList = $('.thread-content-box'); + + if (resultList.length === 0) { + throw new Error('No search results found. The page structure may have changed.'); + } return { title: `${qw} - ${query.get('kw') || '百度贴'}吧搜索`, link, item: resultList.toArray().map((element) => { const item = $(element); - const titleItem = item.find('.p_title a'); - const title = titleItem.text().trim(); - const link = titleItem.attr('href'); - const time = item.find('.p_date').text().trim(); - const details = item.find('.p_content').text().trim(); + + // 标题 + const title = item.find('.title-content-wrap .title-wrap span').text().trim(); + + // 内容摘要 + const details = item.find('.abstract-wrap span').text().trim(); + + // 从链接中提取帖子URL,并规范化为绝对地址 + const linkPath = item.find('.action-bar-warp a.action-link-bg').attr('href') || ''; + const linkHref = normalizeUrl(linkPath); + + // 作者 + const author = item.find('.forum-attention.user').text().trim(); + + // 时间 - 从 top-title 中提取 "发布于 YYYY-M-D" + const timeText = item.find('.top-title').text().trim(); + const timeMatch = timeText.match(/发布于\s+(\d{4}-\d{1,2}-\d{1,2})/); + const time = timeMatch ? timeMatch[1] : ''; + const parsedDate = time ? parseDate(time, 'YYYY-M-D') : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 图片 const medias = item .find('.p_mediaCont img') .toArray() - .map((element) => { + .flatMap((element) => { const item = $(element); - return ``; + const src = (item.attr('original') || '').trim(); + return src ? [``] : []; }) .join(''); - const tieba = item.find('a.p_forum').text().trim(); - const author = item.find('a').last().text().trim(); + + // 贴吧名 + const tieba = item.find('.forum-name-text').text().trim(); return { title, @@ -80,16 +108,12 @@ async function handler(ctx) { <>

{details}

{raw(medias)}

-

- 贴吧:{tieba} -
- 作者:{author} -

+

贴吧:{tieba}

), author, - pubDate: timezone(parseDate(time, 'YYYY-MM-DD HH:mm'), +8), - link, + pubDate: validPubDate, + link: linkHref, }; }), }; diff --git a/lib/routes/baidu/tieba/user.ts b/lib/routes/baidu/tieba/user.ts deleted file mode 100644 index a5a9288e45b8..000000000000 --- a/lib/routes/baidu/tieba/user.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { load } from 'cheerio'; - -import type { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; -import timezone from '@/utils/timezone'; - -export const route: Route = { - path: '/tieba/user/:uid', - categories: ['bbs'], - example: '/baidu/tieba/user/斗鱼游戏君', - parameters: { uid: '用户 ID' }, - features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, - supportBT: false, - supportPodcast: false, - supportScihub: false, - }, - name: '用户帖子', - maintainers: ['igxlin', 'nczitzk'], - handler, - description: `用户 ID 可以通过打开用户的主页后查看地址栏的 \`un\` 字段来获取。`, -}; - -async function handler(ctx) { - const uid = ctx.req.param('uid'); - const response = await got(`https://tieba.baidu.com/home/main?un=${uid}`); - - const data = response.data; - - const $ = load(data); - const name = $('span.userinfo_username').text(); - const list = $('div.n_right.clearfix'); - let imgurl; - - return { - title: `${name} 的贴吧`, - link: `https://tieba.baidu.com/home/main?un=${uid}`, - item: - list && - list.toArray().map((item) => { - item = $(item).find('.n_contain'); - imgurl = item.find('ul.n_media.clearfix img').attr('original'); - return { - title: item.find('div.thread_name a').attr('title'), - pubDate: timezone(parseDate(item.parent().find('div .n_post_time').text(), ['YYYY-MM-DD', 'HH:mm']), +8), - description: `${item.find('div.n_txt').text()}
`, - link: item.find('div.thread_name a').attr('href'), - }; - }), - }; -} diff --git a/lib/routes/baidu/tieba/user.tsx b/lib/routes/baidu/tieba/user.tsx new file mode 100644 index 000000000000..de87296c7c81 --- /dev/null +++ b/lib/routes/baidu/tieba/user.tsx @@ -0,0 +1,104 @@ +import { load } from 'cheerio'; +import { renderToString } from 'hono/jsx/dom/server'; + +import type { Route } from '@/types'; +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; + +import { getTiebaPageContent, normalizeUrl } from './common'; + +export const route: Route = { + path: '/tieba/user/:uid', + categories: ['bbs'], + example: '/baidu/tieba/user/斗鱼游戏君', + parameters: { uid: '用户 ID' }, + features: { + requireConfig: [ + { + name: 'BAIDU_COOKIE', + optional: false, + description: '百度 cookie 值,用于需要登录的贴吧页面', + }, + ], + requirePuppeteer: true, + antiCrawler: true, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + name: '用户帖子', + maintainers: ['igxlin', 'nczitzk', 'FlanChanXwO'], + handler, + description: `用户 ID 可以通过打开用户的主页后查看地址栏的 \`un\` 字段来获取。`, +}; + +async function handler(ctx) { + const uid = ctx.req.param('uid'); + const encodedUid = encodeURIComponent(uid); + const url = `https://tieba.baidu.com/home/main?un=${encodedUid}`; + + const html = await getTiebaPageContent(url, `tieba:user:${uid}`, { + waitForSelector: '.thread-card', + timeout: 3000, + }); + + const $ = load(html); + + const name = $('span.userinfo_username').text() || uid; + const list = $('.thread-card'); + + if (list.length === 0) { + throw new Error('No user posts found. The page structure may have changed or the user does not exist.'); + } + + return { + title: `${name} 的贴吧`, + link: `https://tieba.baidu.com/home/main?un=${encodedUid}`, + item: list.toArray().map((element) => { + const item = $(element); + + // 作者 + const authorName = item.find('.head-name').text().trim() || name; + + // 标题 + const title = item.find('.title-text').text().trim(); + + // 内容 + const content = item.find('.tb-richtext .text').text().trim(); + + // 图片 + const images = item + .find('.image-list-item img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('data-src') || '') + .filter(Boolean); + + // 时间 + const timeText = item.find('.post-num').text().trim(); + const parsedDate = timeText ? parseDate(timeText, ['YYYY-MM-DD']) : null; + const validPubDate = parsedDate && !Number.isNaN(parsedDate.getTime()) ? timezone(parsedDate, +8) : undefined; + + // 链接 + const link = normalizeUrl(item.find('a.thread-card-content').attr('href') || ''); + + return { + title, + pubDate: validPubDate, + author: authorName, + description: renderToString( + <> + {content ?

{content}

: null} + {images.length > 0 ? ( +
+ {images.map((img) => ( + + ))} +
+ ) : null} + + ), + link, + }; + }), + }; +} diff --git a/lib/routes/baidu/tieba/utils.ts b/lib/routes/baidu/tieba/utils.ts new file mode 100644 index 000000000000..3a6a5c91d082 --- /dev/null +++ b/lib/routes/baidu/tieba/utils.ts @@ -0,0 +1,91 @@ +import type { CheerioAPI } from 'cheerio'; + +import { parseRelativeDate } from '@/utils/parse-date'; + +/** + * 解析相对时间(如"回复于4小时前")为实际日期 + */ +export function parseRelativeTime(timeStr: string): Date { + const normalized = (timeStr || '').replace(/^回复于/, '').trim(); + return parseRelativeDate(normalized, ['M-D', 'YYYY-MM-DD', 'HH:mm', 'YYYY-MM-DD HH:mm', 'YYYY-M-D HH:mm']); +} + +/** + * 帖子数据接口 + */ +export interface Thread { + id: string; + title: string; + content: string; + author: string; + time: string; + images: string[]; + link: string; +} + +/** + * 解析帖子列表 + */ +export function parseThreads($: CheerioAPI): Thread[] { + const cardThreads = $('.thread-card-wrapper') + .toArray() + .map((element) => { + const item = $(element); + + const linkHref = item.find('a.thread-content-link').attr('href') || ''; + const idMatch = linkHref.match(/\/p\/(\d+)/); + const id = idMatch ? idMatch[1] : ''; + + const title = item.find('.thread-title .text').text().trim(); + const content = item.find('.thread-content .text').text().trim(); + const author = item.find('.head-name').text().trim(); + + const descInfo = item.find('.desc-info'); + const timeText = descInfo.length > 0 ? descInfo.text().trim() : item.find('.time, .date').text().trim(); + + const images = item + .find('.image-list-item img') + .toArray() + .map((img) => $(img).attr('data-src')) + .filter((src): src is string => src !== undefined && src !== ''); + + return { + id, + title, + content, + author, + time: timeText, + images, + link: linkHref, + }; + }) + .filter((t) => t.id && t.title); + + if (cardThreads.length > 0) { + return cardThreads; + } + + return $('li.j_thread_list') + .toArray() + .map((element) => { + const item = $(element); + const linkHref = item.find('a.j_th_tit').attr('href') || ''; + const idMatch = linkHref.match(/\/p\/(\d+)/); + const id = idMatch ? idMatch[1] : ''; + + return { + id, + title: item.find('a.j_th_tit').text().trim(), + content: item.find('.threadlist_abs').text().trim(), + author: item.find('.frs-author-name').first().text().trim(), + time: item.find('.threadlist_reply_date').first().text().trim(), + images: item + .find('.threadlist_pic img') + .toArray() + .map((img) => $(img).attr('src') || $(img).attr('bpic') || '') + .filter((src) => src !== ''), + link: linkHref, + }; + }) + .filter((t) => t.id && t.title); +}