diff --git a/Dockerfile b/Dockerfile index 5abf8ace..01fcb54a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,7 @@ COPY . . # 构建 Nuxt 应用(生成 .output 目录) ENV NODE_ENV=production \ + NODE_OPTIONS=--max-old-space-size=4096 \ NITRO_KV_DRIVER=fs \ NITRO_KV_BASE=.data/kv @@ -50,6 +51,7 @@ WORKDIR /app # 复制构建输出 COPY --from=build-env /app/.output ./ +COPY --from=build-env /app/cli ./cli # puppeteer 被 Rollup external 排除,运行时需要从 node_modules 加载(Chromium 已通过 apt 安装,跳过下载) RUN npm install --no-save --ignore-scripts puppeteer@24 diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 00000000..c74e3a41 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,135 @@ +# wechat-exporter CLI + +`wechat-exporter` is a command-line client for the local Nuxt backend. It is meant to cover backend capabilities without operating the web UI. + +## Run + +From the repository: + +```bash +node cli/wechat-exporter.mjs --help +``` + +From the Docker container: + +```bash +docker exec wechat-article-exporter node cli/wechat-exporter.mjs --help +``` + +The CLI defaults to `http://127.0.0.1:3000` and auto-detects the first auth key under `.data/kv/cookie`. + +## Login + +```bash +docker exec wechat-article-exporter node cli/wechat-exporter.mjs login +``` + +The command saves a QR code to `.data/login-qrcode.jpg`, polls scan status, and prints the new `auth_key` after phone confirmation. + +Use a specific auth key: + +```bash +node cli/wechat-exporter.mjs me --auth-key +``` + +## Common Commands + +```bash +node cli/wechat-exporter.mjs me +node cli/wechat-exporter.mjs search-account --keyword 华普亿方数智就创业 +node cli/wechat-exporter.mjs account-by-url --url 'https://mp.weixin.qq.com/s/...' +node cli/wechat-exporter.mjs articles --account 华普亿方数智就创业 --year 2026 --all +node cli/wechat-exporter.mjs articles --fakeid MzIyODc5NTA1NQ== --year 2026 --all +``` + +## Export + +Export one year to HTML, Markdown, text, or JSON files in a zip: + +```bash +node cli/wechat-exporter.mjs export --account 华普亿方数智就创业 --year 2026 --format markdown +``` + +Export one year of an official account to Word files in a zip: + +```bash +node cli/wechat-exporter.mjs export-word --account 华普亿方数智就创业 --year 2026 +``` + +Export one year to PDF files in a zip: + +```bash +node cli/wechat-exporter.mjs export-pdf --account 华普亿方数智就创业 --year 2026 +``` + +Download a single article: + +```bash +node cli/wechat-exporter.mjs download --url 'https://mp.weixin.qq.com/s/...' --format markdown --output article.md +``` + +Supported single-article formats are `html`, `markdown`, `text`, and `json`. + +## Album, Comments, Proxy + +```bash +node cli/wechat-exporter.mjs album --fakeid --album-id +node cli/wechat-exporter.mjs comments --url 'https://mp.weixin.qq.com/s/...' +node cli/wechat-exporter.mjs current-ip +node cli/wechat-exporter.mjs proxy-metrics +node cli/wechat-exporter.mjs blocked-ips +``` + +`comments` requires a rich WeChat article URL that includes comment credentials, or explicit `--comment-id`, `--key`, `--uin`, and `--pass-ticket` arguments. + +## Public API Shortcuts + +These map to `/api/public/v1/*` endpoints: + +```bash +node cli/wechat-exporter.mjs public-account --keyword 华普亿方数智就创业 +node cli/wechat-exporter.mjs public-articles --fakeid MzIyODc5NTA1NQ== +``` + +## Raw API Passthrough + +Every backend endpoint can be called through `api`: + +```bash +node cli/wechat-exporter.mjs api GET /api/web/mp/info +node cli/wechat-exporter.mjs api GET /api/web/worker/overview-metrics +node cli/wechat-exporter.mjs api POST /api/web/cli/export-word --body '{"account":"华普亿方数智就创业","year":2026}' +``` + +Add query parameters with repeated `--query k=v`: + +```bash +node cli/wechat-exporter.mjs api GET /api/web/mp/searchbiz --query keyword=华普亿方数智就创业 --query size=20 +``` + +## Coverage Notes + +Native CLI commands cover: + +- QR-code login and auth-key validation +- Logged-in account info and logout +- Official account search and account resolution by article URL +- Official account article listing +- Single article download +- Year-based HTML, Markdown, text, and JSON zip export +- Year-based Word zip export +- Year-based PDF zip export +- Album listing +- Comment fetching when credentials are available +- Proxy status endpoints +- Public API account/article endpoints + +The following UI behavior is intentionally not mirrored as first-class CLI commands: + +- Browser table layout, filter state, and column settings +- Browser IndexedDB cache inspection +- Account favorites stored only in browser state +- Export options stored in `localStorage` +- Excel export, until the browser-side table/export options are backed by a stable server contract + +For those, use command options, raw `api`, or add a backend-backed feature first. diff --git a/cli/wechat-exporter.mjs b/cli/wechat-exporter.mjs new file mode 100644 index 00000000..e8542afb --- /dev/null +++ b/cli/wechat-exporter.mjs @@ -0,0 +1,594 @@ +#!/usr/bin/env node +import { promises as fs } from 'node:fs'; +import path from 'node:path'; + +const DEFAULT_SERVER = 'http://127.0.0.1:3000'; + +function printHelp() { + console.log(`wechat-exporter + +Usage: + wechat-exporter [options] + +Common commands: + login Login by QR code scan + auth-key Print detected auth key + me Show current logged-in MP account + search-account --keyword Search official accounts + account-by-url --url Resolve an official account from an article URL + articles --account [options] List articles from an official account + articles --fakeid [options] List articles from an official account fakeid + export --format --year 2026 Export a year's articles to a zip + export-word --account --year 2026 Export a year's articles to a Word zip + export-pdf --account --year 2026 Export a year's articles to a PDF zip + download --url [options] Download one article as html/markdown/text/json + public-account --keyword Call public account search API + public-articles --fakeid Call public article list API + album --fakeid --album-id Fetch album articles + comments --url Fetch comments when URL contains required keys + current-ip Show outbound IP + proxy-metrics Show worker proxy metrics + blocked-ips Show blocked IP list + logout Logout current auth key + +Power-user command: + api GET /api/web/mp/info + api POST /api/web/cli/export-word --body '{"account":"公众号","year":2026}' + +Global options: + --server Local server URL. Default: ${DEFAULT_SERVER} + --auth-key Auth key from login cookie storage + --data-dir Data directory used to auto-detect auth-key. Default: .data + --output Write response body to a file + --qrcode QR code image path for login. Default: .data/login-qrcode.jpg + --pretty Pretty-print JSON. Default for terminal JSON output + --raw Print raw response body + -h, --help Show help + +Command options: + --keyword Search keyword + --account Official account nickname + --fakeid Official account fakeid / __biz + --year Publish year filter + --begin Begin offset. Default: 0 + --size Page size. Default: 5 + --all Fetch article pages until exhausted + --format html, markdown, text, or json. Default: html + --filename Output zip filename for export-word + --url WeChat article URL + --album-id Album ID + --query k=v Add query parameter. Can be repeated + --body JSON body for POST/PUT/PATCH api calls + +Examples: + wechat-exporter login + wechat-exporter me + wechat-exporter search-account --keyword 华普亿方数智就创业 + wechat-exporter articles --account 华普亿方数智就创业 --year 2026 --all + wechat-exporter export --account 华普亿方数智就创业 --year 2026 --format markdown + wechat-exporter export-word --account 华普亿方数智就创业 --year 2026 + wechat-exporter export-pdf --account 华普亿方数智就创业 --year 2026 + wechat-exporter download --url 'https://mp.weixin.qq.com/s/...' --format markdown --output article.md + node cli/wechat-exporter.mjs api GET /api/web/worker/overview-metrics +`); +} + +function parseArgs(argv) { + const args = { _: [], query: [] }; + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]; + if (!arg.startsWith('-')) { + args._.push(arg); + continue; + } + const key = arg.replace(/^-+/, ''); + if (key === 'h' || key === 'help') { + args.help = true; + continue; + } + const value = argv[i + 1]; + if (!value || value.startsWith('-')) { + args[key] = true; + continue; + } + if (key === 'query') { + args.query.push(value); + } else { + args[key] = value; + } + i++; + } + return args; +} + +function serverUrl(args) { + return String(args.server || DEFAULT_SERVER).replace(/\/$/, ''); +} + +async function detectAuthKey(dataDir) { + const cookieDir = path.resolve(dataDir, 'kv/cookie'); + const names = await fs.readdir(cookieDir).catch(() => []); + const candidates = names.filter(name => !name.startsWith('.')).sort(); + return candidates[0] || null; +} + +async function authKey(args, required = true) { + if (args['auth-key']) return String(args['auth-key']); + const detected = await detectAuthKey(String(args['data-dir'] || '.data')); + if (!detected && required) { + throw new Error(`Missing --auth-key and no auth key found under ${path.resolve(String(args['data-dir'] || '.data'), 'kv/cookie')}`); + } + return detected; +} + +function appendQuery(url, values = {}) { + for (const [key, value] of Object.entries(values)) { + if (value !== undefined && value !== null && value !== '') { + url.searchParams.set(key, String(value)); + } + } + return url; +} + +function parseQueryPairs(pairs = []) { + const values = {}; + for (const pair of pairs) { + const index = pair.indexOf('='); + if (index === -1) { + values[pair] = ''; + } else { + values[pair.slice(0, index)] = pair.slice(index + 1); + } + } + return values; +} + +async function request(args, method, route, { query, body, authRequired = true, raw = false } = {}) { + const url = new URL(route.startsWith('http') ? route : `${serverUrl(args)}${route}`); + appendQuery(url, query); + const key = await authKey(args, authRequired); + const response = await fetch(url, { + method, + headers: { + ...(body ? { 'Content-Type': 'application/json' } : {}), + ...(key ? { 'X-Auth-Key': key } : {}), + }, + body: body ? JSON.stringify(body) : undefined, + }); + const text = await response.text(); + if (!response.ok) { + throw new Error(text || `${response.status} ${response.statusText}`); + } + if (raw) return text; + + const contentType = response.headers.get('content-type') || ''; + if (contentType.includes('application/json') || looksJson(text)) { + try { + return JSON.parse(text); + } catch { + return text; + } + } + return text; +} + +async function requestRaw(args, method, route, { query, body, cookie } = {}) { + const url = new URL(route.startsWith('http') ? route : `${serverUrl(args)}${route}`); + appendQuery(url, query); + return fetch(url, { + method, + headers: { + ...(body ? { 'Content-Type': 'application/json' } : {}), + ...(cookie ? { Cookie: cookie } : {}), + }, + body: body ? JSON.stringify(body) : undefined, + }); +} + +function getSetCookies(headers) { + if (typeof headers.getSetCookie === 'function') { + return headers.getSetCookie(); + } + const value = headers.get('set-cookie'); + return value ? splitSetCookie(value) : []; +} + +function splitSetCookie(value) { + const result = []; + let start = 0; + let inExpires = false; + for (let i = 0; i < value.length; i++) { + const rest = value.slice(i).toLowerCase(); + if (rest.startsWith('expires=')) inExpires = true; + if (inExpires && value[i] === ';') inExpires = false; + if (!inExpires && value[i] === ',') { + result.push(value.slice(start, i).trim()); + start = i + 1; + } + } + result.push(value.slice(start).trim()); + return result.filter(Boolean); +} + +function mergeCookies(cookieJar, setCookies) { + for (const cookie of setCookies) { + const [pair] = cookie.split(';'); + const index = pair.indexOf('='); + if (index === -1) continue; + const name = pair.slice(0, index).trim(); + const value = pair.slice(index + 1).trim(); + if (!name) continue; + if (value === 'EXPIRED') { + delete cookieJar[name]; + } else { + cookieJar[name] = value; + } + } +} + +function cookieHeader(cookieJar) { + return Object.entries(cookieJar) + .map(([name, value]) => `${name}=${value}`) + .join('; '); +} + +function authKeyFromSetCookies(setCookies) { + for (const cookie of setCookies) { + const [pair] = cookie.split(';'); + const index = pair.indexOf('='); + if (index === -1) continue; + if (pair.slice(0, index).trim() === 'auth-key') { + return pair.slice(index + 1).trim(); + } + } + return null; +} + +function looksJson(text) { + const trimmed = text.trim(); + return trimmed.startsWith('{') || trimmed.startsWith('['); +} + +async function output(args, value) { + const text = typeof value === 'string' + ? value + : (args.raw ? JSON.stringify(value) : JSON.stringify(value, null, 2)); + if (args.output) { + await fs.writeFile(path.resolve(String(args.output)), text); + console.log(path.resolve(String(args.output))); + } else { + console.log(text); + } +} + +function requireArg(args, name) { + const value = args[name]; + if (!value) throw new Error(`Missing --${name}`); + return String(value); +} + +async function resolveFakeid(args) { + if (args.fakeid) return String(args.fakeid); + const keyword = requireArg(args, 'account'); + const result = await request(args, 'GET', '/api/web/mp/searchbiz', { + query: { keyword, size: args.size || 20 }, + }); + const matches = (result.list || []).filter(item => item.nickname === keyword || item.nickname?.includes(keyword)); + const matched = matches[0] || result.list?.[0]; + if (!matched?.fakeid) throw new Error(`Account not found: ${keyword}`); + return matched.fakeid; +} + +function parsePublishArticles(resp) { + if (resp?.base_resp?.ret !== 0) { + throw new Error(resp?.base_resp?.err_msg || 'request failed'); + } + if (!resp.publish_page) return []; + const publishPage = JSON.parse(resp.publish_page); + return (publishPage.publish_list || []) + .filter(item => item.publish_info) + .flatMap(item => JSON.parse(item.publish_info).appmsgex || []); +} + +function filterByYear(articles, year) { + if (!year) return articles; + const start = Math.floor(new Date(`${year}-01-01T00:00:00+08:00`).getTime() / 1000); + const end = Math.floor(new Date(`${Number(year) + 1}-01-01T00:00:00+08:00`).getTime() / 1000); + return articles.filter(article => article.update_time >= start && article.update_time < end); +} + +function simplifyArticle(article) { + return { + title: article.title, + update_time: article.update_time, + url: article.link, + digest: article.digest, + cover: article.cover, + }; +} + +async function commandAuthKey(args) { + console.log(await authKey(args)); +} + +async function readJsonResponse(response) { + const text = await response.text(); + try { + return JSON.parse(text); + } catch { + throw new Error(text || `${response.status} ${response.statusText}`); + } +} + +async function commandLogin(args) { + const cookieJar = {}; + const sid = `${Date.now()}${Math.floor(Math.random() * 100)}`; + const qrcodePath = path.resolve(String(args.qrcode || '.data/login-qrcode.jpg')); + + const startResp = await requestRaw(args, 'POST', `/api/web/login/session/${sid}`); + mergeCookies(cookieJar, getSetCookies(startResp.headers)); + const startJson = await readJsonResponse(startResp); + if (!startResp.ok || startJson?.base_resp?.ret !== 0) { + throw new Error(startJson?.base_resp?.err_msg || 'start login failed'); + } + + const qrResp = await requestRaw(args, 'GET', '/api/web/login/getqrcode', { + query: { rnd: Math.random() }, + cookie: cookieHeader(cookieJar), + }); + mergeCookies(cookieJar, getSetCookies(qrResp.headers)); + if (!qrResp.ok) { + throw new Error(`get qrcode failed: ${qrResp.status} ${qrResp.statusText}`); + } + await fs.mkdir(path.dirname(qrcodePath), { recursive: true }); + await fs.writeFile(qrcodePath, Buffer.from(await qrResp.arrayBuffer())); + + console.error(`QR code saved: ${qrcodePath}`); + console.error('Scan it in WeChat, then confirm login on your phone...'); + + const deadline = Date.now() + Number(args.timeout || 120) * 1000; + while (Date.now() < deadline) { + await new Promise(resolve => setTimeout(resolve, Number(args.interval || 2) * 1000)); + const scanResp = await requestRaw(args, 'GET', '/api/web/login/scan', { + cookie: cookieHeader(cookieJar), + }); + mergeCookies(cookieJar, getSetCookies(scanResp.headers)); + const scanJson = await readJsonResponse(scanResp); + if (scanJson?.base_resp?.ret !== 0) continue; + + if (scanJson.status === 1) { + const loginResp = await requestRaw(args, 'POST', '/api/web/login/bizlogin', { + cookie: cookieHeader(cookieJar), + }); + const setCookies = getSetCookies(loginResp.headers); + mergeCookies(cookieJar, setCookies); + const loginJson = await readJsonResponse(loginResp); + if (!loginResp.ok || loginJson.err) { + throw new Error(loginJson.err || 'biz login failed'); + } + const key = authKeyFromSetCookies(setCookies); + await output(args, { ...loginJson, auth_key: key }); + return; + } + + if (scanJson.status === 4 || scanJson.status === 6) { + console.error('Scanned. Waiting for confirmation...'); + } else if (scanJson.status === 2 || scanJson.status === 3) { + throw new Error('QR code expired, run login again'); + } else if (scanJson.status === 5) { + throw new Error('This account has no bound email and cannot scan-login'); + } + } + + throw new Error('Login timed out'); +} + +async function commandGet(args, route, query = {}, authRequired = true) { + await output(args, await request(args, 'GET', route, { query, authRequired, raw: args.raw })); +} + +async function commandSearchAccount(args) { + await commandGet(args, '/api/web/mp/searchbiz', { + keyword: requireArg(args, 'keyword'), + begin: args.begin || 0, + size: args.size || 5, + }); +} + +async function commandAccountByUrl(args) { + await commandGet(args, '/api/web/mp/searchbyurl', { url: requireArg(args, 'url') }); +} + +async function commandArticles(args) { + const fakeid = await resolveFakeid(args); + const size = Number(args.size || 5); + const begin = Number(args.begin || 0); + const all = Boolean(args.all); + const year = args.year ? Number(args.year) : undefined; + const rows = []; + + for (let offset = begin; offset < 10000; offset += size) { + const resp = await request(args, 'GET', '/api/web/mp/appmsgpublish', { + query: { + id: fakeid, + keyword: args.keyword || '', + begin: offset, + size, + }, + }); + const pageRows = parsePublishArticles(resp); + rows.push(...filterByYear(pageRows, year)); + if (!all || pageRows.length < size) break; + if (year) { + const oldest = Math.min(...pageRows.map(article => article.update_time).filter(Boolean)); + const start = Math.floor(new Date(`${year}-01-01T00:00:00+08:00`).getTime() / 1000); + if (Number.isFinite(oldest) && oldest < start) break; + } + } + + await output(args, rows.map(simplifyArticle)); +} + +async function commandExportWord(args) { + const account = args.account ? String(args.account) : undefined; + const fakeid = args.fakeid ? String(args.fakeid) : undefined; + if (!account && !fakeid) throw new Error('Missing --account or --fakeid'); + const year = Number(args.year || new Date().getFullYear()); + console.error(`Exporting ${year} articles...`); + await output(args, await request(args, 'POST', '/api/web/cli/export-word', { + body: { account, fakeid, year, filename: args.filename ? String(args.filename) : undefined }, + })); +} + +async function commandExportContent(args) { + const account = args.account ? String(args.account) : undefined; + const fakeid = args.fakeid ? String(args.fakeid) : undefined; + if (!account && !fakeid) throw new Error('Missing --account or --fakeid'); + const year = Number(args.year || new Date().getFullYear()); + const format = String(args.format || 'html').toLowerCase(); + console.error(`Exporting ${year} articles to ${format}...`); + await output(args, await request(args, 'POST', '/api/web/cli/export-content', { + body: { account, fakeid, year, format, filename: args.filename ? String(args.filename) : undefined }, + })); +} + +async function commandExportPdf(args) { + const account = args.account ? String(args.account) : undefined; + const fakeid = args.fakeid ? String(args.fakeid) : undefined; + if (!account && !fakeid) throw new Error('Missing --account or --fakeid'); + const year = Number(args.year || new Date().getFullYear()); + console.error(`Exporting ${year} articles to PDF...`); + await output(args, await request(args, 'POST', '/api/web/cli/export-pdf', { + body: { account, fakeid, year, filename: args.filename ? String(args.filename) : undefined }, + })); +} + +async function commandDownload(args) { + await commandGet(args, '/api/public/v1/download', { + url: requireArg(args, 'url'), + format: args.format || 'html', + }, false); +} + +async function commandPublicAccount(args) { + await commandGet(args, '/api/public/v1/account', { + keyword: requireArg(args, 'keyword'), + begin: args.begin || 0, + size: args.size || 5, + }); +} + +async function commandPublicArticles(args) { + await commandGet(args, '/api/public/v1/article', { + fakeid: requireArg(args, 'fakeid'), + keyword: args.keyword || '', + begin: args.begin || 0, + size: args.size || 5, + }); +} + +async function commandAlbum(args) { + await commandGet(args, '/api/web/misc/appmsgalbum', { + fakeid: requireArg(args, 'fakeid'), + album_id: requireArg(args, 'album-id'), + is_reverse: args.reverse ? '1' : '0', + count: args.size || args.count || 20, + begin_msgid: args['begin-msgid'], + begin_itemidx: args['begin-itemidx'], + }, false); +} + +function queryFromArticleUrl(articleUrl) { + const url = new URL(articleUrl); + return { + __biz: url.searchParams.get('__biz'), + comment_id: url.searchParams.get('comment_id'), + key: url.searchParams.get('key'), + uin: url.searchParams.get('uin'), + pass_ticket: url.searchParams.get('pass_ticket'), + }; +} + +async function commandComments(args) { + const params = args.url ? queryFromArticleUrl(String(args.url)) : { + __biz: args.fakeid || args.__biz, + comment_id: args['comment-id'], + key: args.key, + uin: args.uin, + pass_ticket: args['pass-ticket'], + }; + for (const name of ['__biz', 'comment_id', 'key', 'uin', 'pass_ticket']) { + if (!params[name]) throw new Error(`Missing ${name}; provide a rich article URL or explicit comment parameters`); + } + await commandGet(args, '/api/web/misc/comment', params, false); +} + +async function commandApi(args) { + const method = String(args._[1] || 'GET').toUpperCase(); + const route = args._[2]; + if (!route) throw new Error('Usage: api METHOD /path [--query k=v] [--body json]'); + const body = args.body ? JSON.parse(String(args.body)) : undefined; + await output(args, await request(args, method, route, { + query: parseQueryPairs(args.query), + body, + authRequired: !args['no-auth'], + raw: args.raw, + })); +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + const command = args._[0]; + if (args.help || !command) { + printHelp(); + return; + } + + switch (command) { + case 'login': + return commandLogin(args); + case 'auth-key': + return commandAuthKey(args); + case 'me': + return commandGet(args, '/api/web/mp/info'); + case 'search-account': + return commandSearchAccount(args); + case 'account-by-url': + return commandAccountByUrl(args); + case 'articles': + return commandArticles(args); + case 'export': + return commandExportContent(args); + case 'export-word': + return commandExportWord(args); + case 'export-pdf': + return commandExportPdf(args); + case 'download': + return commandDownload(args); + case 'public-account': + return commandPublicAccount(args); + case 'public-articles': + return commandPublicArticles(args); + case 'album': + return commandAlbum(args); + case 'comments': + return commandComments(args); + case 'current-ip': + return commandGet(args, '/api/web/misc/current-ip', {}, false); + case 'proxy-metrics': + return commandGet(args, '/api/web/worker/overview-metrics', {}, false); + case 'blocked-ips': + return commandGet(args, '/api/web/worker/blocked-ip-list', {}, false); + case 'logout': + return commandGet(args, '/api/web/mp/logout'); + case 'api': + return commandApi(args); + default: + throw new Error(`Unknown command: ${command}`); + } +} + +main().catch(error => { + console.error(error instanceof Error ? error.message : String(error)); + process.exitCode = 1; +}); diff --git a/package.json b/package.json index ab303042..c73ebfb3 100644 --- a/package.json +++ b/package.json @@ -2,6 +2,9 @@ "name": "wechat-article-exporter", "version": "2.3.16", "type": "module", + "bin": { + "wechat-exporter": "./cli/wechat-exporter.mjs" + }, "scripts": { "debug": "nuxt dev --inspect", "dev": "nuxt dev", diff --git a/server/api/web/cli/export-content.post.ts b/server/api/web/cli/export-content.post.ts new file mode 100644 index 00000000..faa9a0c7 --- /dev/null +++ b/server/api/web/cli/export-content.post.ts @@ -0,0 +1,211 @@ +import { promises as fs } from 'node:fs'; +import path from 'node:path'; +import JSZip from 'jszip'; +import TurndownService from 'turndown'; +import { ARTICLE_LIST_PAGE_SIZE, USER_AGENT } from '~/config'; +import { normalizeHtml, parseCgiDataNew } from '#shared/utils/html'; +import { filterInvalidFilenameChars } from '#shared/utils/helpers'; +import { getTokenFromStore } from '~/server/utils/CookieStore'; +import { proxyMpRequest } from '~/server/utils/proxy-request'; + +type ExportFormat = 'html' | 'markdown' | 'text' | 'json'; + +interface ExportContentBody { + account?: string; + fakeid?: string; + year?: number; + format?: ExportFormat; + filename?: string; +} + +const extensions: Record = { + html: 'html', + markdown: 'md', + text: 'txt', + json: 'json', +}; + +function normalizeFilename(filename: string) { + return filterInvalidFilenameChars(filename || 'untitled') || 'untitled'; +} + +function uniqueName(filename: string, extension: string, usedNames: Set) { + const base = normalizeFilename(filename); + let name = `${base}.${extension}`; + let index = 2; + while (usedNames.has(name)) { + name = `${base}-${index}.${extension}`; + index++; + } + usedNames.add(name); + return name; +} + +async function resolveFakeid(event: any, account?: string, fakeid?: string) { + if (fakeid) return fakeid; + if (!account) { + throw createError({ statusCode: 400, statusMessage: 'account or fakeid is required' }); + } + + const token = await getTokenFromStore(event); + if (!token) { + throw createError({ statusCode: 401, statusMessage: 'not logged in or auth-key expired' }); + } + + const resp = await proxyMpRequest({ + event, + method: 'GET', + endpoint: 'https://mp.weixin.qq.com/cgi-bin/searchbiz', + query: { + action: 'search_biz', + begin: 0, + count: 20, + query: account, + token, + lang: 'zh_CN', + f: 'json', + ajax: '1', + }, + parseJson: true, + }); + + if (resp?.base_resp?.ret !== 0) { + throw createError({ statusCode: 502, statusMessage: resp?.base_resp?.err_msg || 'search account failed' }); + } + + const matches = (resp.list || []).filter((item: any) => item.nickname === account || item.nickname?.includes(account)); + const matched = matches[0] || resp.list?.[0]; + if (!matched?.fakeid) { + throw createError({ statusCode: 404, statusMessage: `account not found: ${account}` }); + } + return matched.fakeid; +} + +async function fetchArticles(event: any, fakeid: string, year: number) { + const token = await getTokenFromStore(event); + if (!token) { + throw createError({ statusCode: 401, statusMessage: 'not logged in or auth-key expired' }); + } + + const start = Math.floor(new Date(`${year}-01-01T00:00:00+08:00`).getTime() / 1000); + const end = Math.floor(new Date(`${year + 1}-01-01T00:00:00+08:00`).getTime() / 1000); + const articles: any[] = []; + + for (let begin = 0; begin < 10000; begin += ARTICLE_LIST_PAGE_SIZE) { + const resp = await proxyMpRequest({ + event, + method: 'GET', + endpoint: 'https://mp.weixin.qq.com/cgi-bin/appmsgpublish', + query: { + sub: 'list', + search_field: 'null', + begin, + count: ARTICLE_LIST_PAGE_SIZE, + query: '', + fakeid, + type: '101_1', + free_publish_type: 1, + sub_action: 'list_ex', + token, + lang: 'zh_CN', + f: 'json', + ajax: 1, + }, + parseJson: true, + }); + + if (resp?.base_resp?.ret !== 0) { + throw createError({ statusCode: 502, statusMessage: resp?.base_resp?.err_msg || 'fetch article list failed' }); + } + + const publishPage = JSON.parse(resp.publish_page); + const publishList = (publishPage.publish_list || []).filter((item: any) => !!item.publish_info); + if (publishList.length === 0) break; + + const pageArticles = publishList.flatMap((item: any) => JSON.parse(item.publish_info).appmsgex || []); + for (const article of pageArticles) { + if (article.update_time >= start && article.update_time < end) { + articles.push(article); + } + } + + const oldest = Math.min(...pageArticles.map((article: any) => article.update_time).filter(Boolean)); + if (Number.isFinite(oldest) && oldest < start) break; + } + + return articles; +} + +async function fetchArticleHtml(url: string) { + const response = await fetch(url, { + headers: { + Referer: 'https://mp.weixin.qq.com/', + Origin: 'https://mp.weixin.qq.com', + 'User-Agent': USER_AGENT, + }, + }); + if (!response.ok) { + throw new Error(`fetch ${url} failed: ${response.status} ${response.statusText}`); + } + return response.text(); +} + +async function convertContent(rawHtml: string, format: ExportFormat) { + if (format === 'html') return normalizeHtml(rawHtml, 'html'); + if (format === 'text') return normalizeHtml(rawHtml, 'text'); + if (format === 'markdown') return new TurndownService().turndown(normalizeHtml(rawHtml, 'html')); + return JSON.stringify(await parseCgiDataNew(rawHtml), null, 2); +} + +export default defineEventHandler(async event => { + const body = await readBody(event); + const format = (body.format || 'html').toLowerCase() as ExportFormat; + if (!['html', 'markdown', 'text', 'json'].includes(format)) { + throw createError({ statusCode: 400, statusMessage: 'format must be html, markdown, text, or json' }); + } + + const year = Number(body.year || new Date().getFullYear()); + const fakeid = await resolveFakeid(event, body.account, body.fakeid); + const articles = await fetchArticles(event, fakeid, year); + + if (articles.length === 0) { + throw createError({ statusCode: 404, statusMessage: `no articles found for ${year}` }); + } + + const zip = new JSZip(); + const usedNames = new Set(); + const failures: Array<{ title: string; url: string; error: string }> = []; + + for (const article of articles) { + try { + const rawHtml = await fetchArticleHtml(article.link); + zip.file(uniqueName(article.title, extensions[format], usedNames), await convertContent(rawHtml, format)); + } catch (error) { + failures.push({ + title: article.title, + url: article.link, + error: error instanceof Error ? error.message : String(error), + }); + } + } + + const zipBuffer = await zip.generateAsync({ type: 'nodebuffer' }); + const dir = path.resolve(process.cwd(), '.data/exports'); + await fs.mkdir(dir, { recursive: true }); + const filename = body.filename || `${normalizeFilename(body.account || fakeid)}-${year}-${format}.zip`; + const filepath = path.join(dir, filename); + await fs.writeFile(filepath, zipBuffer); + + return { + fakeid, + year, + format, + total: articles.length, + exported: usedNames.size, + failed: failures.length, + failures, + filename, + filepath, + size: zipBuffer.length, + }; +}); diff --git a/server/api/web/cli/export-pdf.post.ts b/server/api/web/cli/export-pdf.post.ts new file mode 100644 index 00000000..9f99e20c --- /dev/null +++ b/server/api/web/cli/export-pdf.post.ts @@ -0,0 +1,223 @@ +import { promises as fs } from 'node:fs'; +import path from 'node:path'; +import JSZip from 'jszip'; +import { ARTICLE_LIST_PAGE_SIZE, USER_AGENT } from '~/config'; +import { normalizeHtml } from '#shared/utils/html'; +import { filterInvalidFilenameChars } from '#shared/utils/helpers'; +import { getTokenFromStore } from '~/server/utils/CookieStore'; +import { proxyMpRequest } from '~/server/utils/proxy-request'; + +interface ExportPdfBody { + account?: string; + fakeid?: string; + year?: number; + filename?: string; +} + +function normalizeFilename(filename: string) { + return filterInvalidFilenameChars(filename || 'untitled') || 'untitled'; +} + +function uniqueName(filename: string, usedNames: Set) { + const base = normalizeFilename(filename); + let name = `${base}.pdf`; + let index = 2; + while (usedNames.has(name)) { + name = `${base}-${index}.pdf`; + index++; + } + usedNames.add(name); + return name; +} + +async function resolveFakeid(event: any, account?: string, fakeid?: string) { + if (fakeid) return fakeid; + if (!account) { + throw createError({ statusCode: 400, statusMessage: 'account or fakeid is required' }); + } + + const token = await getTokenFromStore(event); + if (!token) { + throw createError({ statusCode: 401, statusMessage: 'not logged in or auth-key expired' }); + } + + const resp = await proxyMpRequest({ + event, + method: 'GET', + endpoint: 'https://mp.weixin.qq.com/cgi-bin/searchbiz', + query: { + action: 'search_biz', + begin: 0, + count: 20, + query: account, + token, + lang: 'zh_CN', + f: 'json', + ajax: '1', + }, + parseJson: true, + }); + + if (resp?.base_resp?.ret !== 0) { + throw createError({ statusCode: 502, statusMessage: resp?.base_resp?.err_msg || 'search account failed' }); + } + + const matches = (resp.list || []).filter((item: any) => item.nickname === account || item.nickname?.includes(account)); + const matched = matches[0] || resp.list?.[0]; + if (!matched?.fakeid) { + throw createError({ statusCode: 404, statusMessage: `account not found: ${account}` }); + } + return matched.fakeid; +} + +async function fetchArticles(event: any, fakeid: string, year: number) { + const token = await getTokenFromStore(event); + if (!token) { + throw createError({ statusCode: 401, statusMessage: 'not logged in or auth-key expired' }); + } + + const start = Math.floor(new Date(`${year}-01-01T00:00:00+08:00`).getTime() / 1000); + const end = Math.floor(new Date(`${year + 1}-01-01T00:00:00+08:00`).getTime() / 1000); + const articles: any[] = []; + + for (let begin = 0; begin < 10000; begin += ARTICLE_LIST_PAGE_SIZE) { + const resp = await proxyMpRequest({ + event, + method: 'GET', + endpoint: 'https://mp.weixin.qq.com/cgi-bin/appmsgpublish', + query: { + sub: 'list', + search_field: 'null', + begin, + count: ARTICLE_LIST_PAGE_SIZE, + query: '', + fakeid, + type: '101_1', + free_publish_type: 1, + sub_action: 'list_ex', + token, + lang: 'zh_CN', + f: 'json', + ajax: 1, + }, + parseJson: true, + }); + + if (resp?.base_resp?.ret !== 0) { + throw createError({ statusCode: 502, statusMessage: resp?.base_resp?.err_msg || 'fetch article list failed' }); + } + + const publishPage = JSON.parse(resp.publish_page); + const publishList = (publishPage.publish_list || []).filter((item: any) => !!item.publish_info); + if (publishList.length === 0) break; + + const pageArticles = publishList.flatMap((item: any) => JSON.parse(item.publish_info).appmsgex || []); + for (const article of pageArticles) { + if (article.update_time >= start && article.update_time < end) { + articles.push(article); + } + } + + const oldest = Math.min(...pageArticles.map((article: any) => article.update_time).filter(Boolean)); + if (Number.isFinite(oldest) && oldest < start) break; + } + + return articles; +} + +async function fetchArticleHtml(url: string) { + const response = await fetch(url, { + headers: { + Referer: 'https://mp.weixin.qq.com/', + Origin: 'https://mp.weixin.qq.com', + 'User-Agent': USER_AGENT, + }, + }); + if (!response.ok) { + throw new Error(`fetch ${url} failed: ${response.status} ${response.statusText}`); + } + return response.text(); +} + +async function renderPdf(html: string) { + let getBrowser: Awaited['getBrowser']; + try { + getBrowser = (await import('~/server/utils/puppeteer')).getBrowser; + } catch { + throw createError({ statusCode: 501, statusMessage: 'current runtime does not support PDF export; use Docker deployment' }); + } + + const browser = await getBrowser(); + const page = await browser.newPage(); + + try { + await page.setViewport({ width: 794, height: 1123 }); + await page.setContent(html, { waitUntil: 'load', timeout: 60_000 }); + const contentHeight = await page.evaluate( + () => Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight), + ); + return await page.pdf({ + width: '210mm', + height: `${contentHeight}px`, + printBackground: true, + margin: { top: '0', bottom: '0', left: '0', right: '0' }, + }); + } finally { + await page.close(); + } +} + +function preparePdfHtml(rawHtml: string) { + const pdfStyleTag = ``; + const html = normalizeHtml(rawHtml, 'html'); + return html.includes('') ? html.replace('', `${pdfStyleTag}\n`) : `${pdfStyleTag}\n${html}`; +} + +export default defineEventHandler(async event => { + const body = await readBody(event); + const year = Number(body.year || new Date().getFullYear()); + const fakeid = await resolveFakeid(event, body.account, body.fakeid); + const articles = await fetchArticles(event, fakeid, year); + + if (articles.length === 0) { + throw createError({ statusCode: 404, statusMessage: `no articles found for ${year}` }); + } + + const zip = new JSZip(); + const usedNames = new Set(); + const failures: Array<{ title: string; url: string; error: string }> = []; + + for (const article of articles) { + try { + const rawHtml = await fetchArticleHtml(article.link); + zip.file(uniqueName(article.title, usedNames), await renderPdf(preparePdfHtml(rawHtml))); + } catch (error) { + failures.push({ + title: article.title, + url: article.link, + error: error instanceof Error ? error.message : String(error), + }); + } + } + + const zipBuffer = await zip.generateAsync({ type: 'nodebuffer' }); + const dir = path.resolve(process.cwd(), '.data/exports'); + await fs.mkdir(dir, { recursive: true }); + const filename = body.filename || `${normalizeFilename(body.account || fakeid)}-${year}-PDF.zip`; + const filepath = path.join(dir, filename); + await fs.writeFile(filepath, zipBuffer); + + return { + fakeid, + year, + total: articles.length, + exported: usedNames.size, + failed: failures.length, + failures, + filename, + filepath, + size: zipBuffer.length, + }; +}); diff --git a/server/api/web/cli/export-word.post.ts b/server/api/web/cli/export-word.post.ts new file mode 100644 index 00000000..98b60e62 --- /dev/null +++ b/server/api/web/cli/export-word.post.ts @@ -0,0 +1,244 @@ +import { promises as fs } from 'node:fs'; +import path from 'node:path'; +import vm from 'node:vm'; +import JSZip from 'jszip'; +import { ARTICLE_LIST_PAGE_SIZE, USER_AGENT } from '~/config'; +import { normalizeHtml } from '#shared/utils/html'; +import { filterInvalidFilenameChars } from '#shared/utils/helpers'; +import { getTokenFromStore } from '~/server/utils/CookieStore'; +import { proxyMpRequest } from '~/server/utils/proxy-request'; + +interface ExportWordBody { + account?: string; + fakeid?: string; + year?: number; + filename?: string; +} + +let htmlDocx: any; + +async function getHtmlDocx() { + if (htmlDocx) return htmlDocx; + + const candidates = [ + path.resolve(process.cwd(), 'public/vendors/html-docx-js@0.3.1/html-docx.js'), + path.resolve(process.cwd(), '.output/public/vendors/html-docx-js@0.3.1/html-docx.js'), + ]; + const vendorPath = await firstExistingPath(candidates); + if (!vendorPath) { + throw new Error(`html-docx vendor not found: ${candidates.join(', ')}`); + } + + const code = await fs.readFile(vendorPath, 'utf8'); + const context: any = { + module: { exports: {} }, + exports: {}, + Blob, + Buffer, + ArrayBuffer, + Uint8Array, + console, + }; + context.global = context; + context.window = context; + context.self = context; + vm.runInNewContext(code, context, { filename: vendorPath }); + htmlDocx = context.module.exports || context.htmlDocx; + return htmlDocx; +} + +async function firstExistingPath(paths: string[]) { + for (const filepath of paths) { + try { + await fs.access(filepath); + return filepath; + } catch { + // try next + } + } + return null; +} + +function normalizeFilename(filename: string) { + return filterInvalidFilenameChars(filename || 'untitled') || 'untitled'; +} + +function uniqueName(filename: string, usedNames: Set) { + const base = normalizeFilename(filename); + let name = `${base}.docx`; + let index = 2; + while (usedNames.has(name)) { + name = `${base}-${index}.docx`; + index++; + } + usedNames.add(name); + return name; +} + +async function resolveFakeid(event: any, account?: string, fakeid?: string) { + if (fakeid) return fakeid; + if (!account) { + throw createError({ statusCode: 400, statusMessage: 'account or fakeid is required' }); + } + + const token = await getTokenFromStore(event); + if (!token) { + throw createError({ statusCode: 401, statusMessage: 'not logged in or auth-key expired' }); + } + + const resp = await proxyMpRequest({ + event, + method: 'GET', + endpoint: 'https://mp.weixin.qq.com/cgi-bin/searchbiz', + query: { + action: 'search_biz', + begin: 0, + count: 20, + query: account, + token, + lang: 'zh_CN', + f: 'json', + ajax: '1', + }, + parseJson: true, + }); + + if (resp?.base_resp?.ret !== 0) { + throw createError({ statusCode: 502, statusMessage: resp?.base_resp?.err_msg || 'search account failed' }); + } + + const matches = (resp.list || []).filter((item: any) => item.nickname === account || item.nickname?.includes(account)); + const matched = matches[0] || resp.list?.[0]; + if (!matched?.fakeid) { + throw createError({ statusCode: 404, statusMessage: `account not found: ${account}` }); + } + return matched.fakeid; +} + +async function fetchArticles(event: any, fakeid: string, year: number) { + const token = await getTokenFromStore(event); + if (!token) { + throw createError({ statusCode: 401, statusMessage: 'not logged in or auth-key expired' }); + } + + const start = Math.floor(new Date(`${year}-01-01T00:00:00+08:00`).getTime() / 1000); + const end = Math.floor(new Date(`${year + 1}-01-01T00:00:00+08:00`).getTime() / 1000); + const articles: any[] = []; + + for (let begin = 0; begin < 10000; begin += ARTICLE_LIST_PAGE_SIZE) { + const resp = await proxyMpRequest({ + event, + method: 'GET', + endpoint: 'https://mp.weixin.qq.com/cgi-bin/appmsgpublish', + query: { + sub: 'list', + search_field: 'null', + begin, + count: ARTICLE_LIST_PAGE_SIZE, + query: '', + fakeid, + type: '101_1', + free_publish_type: 1, + sub_action: 'list_ex', + token, + lang: 'zh_CN', + f: 'json', + ajax: 1, + }, + parseJson: true, + }); + + if (resp?.base_resp?.ret !== 0) { + throw createError({ statusCode: 502, statusMessage: resp?.base_resp?.err_msg || 'fetch article list failed' }); + } + + const publishPage = JSON.parse(resp.publish_page); + const publishList = (publishPage.publish_list || []).filter((item: any) => !!item.publish_info); + if (publishList.length === 0) break; + + const pageArticles = publishList.flatMap((item: any) => JSON.parse(item.publish_info).appmsgex || []); + for (const article of pageArticles) { + if (article.update_time >= start && article.update_time < end) { + articles.push(article); + } + } + + const oldest = Math.min(...pageArticles.map((article: any) => article.update_time).filter(Boolean)); + if (Number.isFinite(oldest) && oldest < start) break; + } + + return articles; +} + +async function fetchArticleHtml(url: string) { + const response = await fetch(url, { + headers: { + Referer: 'https://mp.weixin.qq.com/', + Origin: 'https://mp.weixin.qq.com', + 'User-Agent': USER_AGENT, + }, + }); + if (!response.ok) { + throw new Error(`fetch ${url} failed: ${response.status} ${response.statusText}`); + } + return response.text(); +} + +async function blobToBuffer(blob: any) { + if (Buffer.isBuffer(blob)) return blob; + if (blob instanceof Uint8Array) return Buffer.from(blob); + if (typeof blob.arrayBuffer === 'function') { + return Buffer.from(await blob.arrayBuffer()); + } + throw new Error('unsupported docx blob'); +} + +export default defineEventHandler(async event => { + const body = await readBody(event); + const year = Number(body.year || new Date().getFullYear()); + const fakeid = await resolveFakeid(event, body.account, body.fakeid); + const articles = await fetchArticles(event, fakeid, year); + + if (articles.length === 0) { + throw createError({ statusCode: 404, statusMessage: `no articles found for ${year}` }); + } + + const docx = await getHtmlDocx(); + const zip = new JSZip(); + const usedNames = new Set(); + const failures: Array<{ title: string; url: string; error: string }> = []; + + for (const article of articles) { + try { + const rawHtml = await fetchArticleHtml(article.link); + const html = normalizeHtml(rawHtml, 'html'); + const blob = docx.asBlob(html); + zip.file(uniqueName(article.title, usedNames), await blobToBuffer(blob)); + } catch (error) { + failures.push({ + title: article.title, + url: article.link, + error: error instanceof Error ? error.message : String(error), + }); + } + } + + const zipBuffer = await zip.generateAsync({ type: 'nodebuffer' }); + const dir = path.resolve(process.cwd(), '.data/exports'); + await fs.mkdir(dir, { recursive: true }); + const filename = body.filename || `${normalizeFilename(body.account || fakeid)}-${year}-Word.zip`; + const filepath = path.join(dir, filename); + await fs.writeFile(filepath, zipBuffer); + + return { + fakeid, + year, + total: articles.length, + exported: usedNames.size, + failed: failures.length, + failures, + filename, + filepath, + size: zipBuffer.length, + }; +});