|
1 | | -import { cli } from '@jackwener/opencli/registry'; |
| 1 | +import { cli, Strategy } from '@jackwener/opencli/registry'; |
| 2 | +import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; |
| 3 | + |
| 4 | +function stripHtml(html) { |
| 5 | + return (html || '') |
| 6 | + .replace(/<[^>]+>/g, '') |
| 7 | + .replace(/ /g, ' ') |
| 8 | + .replace(/</g, '<') |
| 9 | + .replace(/>/g, '>') |
| 10 | + .replace(/&/g, '&') |
| 11 | + .replace(/<em>/g, '') |
| 12 | + .replace(/<\/em>/g, '') |
| 13 | + .trim(); |
| 14 | +} |
| 15 | + |
| 16 | +function itemKey(item) { |
| 17 | + const obj = item.object || {}; |
| 18 | + if (obj.id != null) return `${obj.type || ''}:${obj.id}`; |
| 19 | + return null; |
| 20 | +} |
| 21 | + |
| 22 | +function itemUrl(obj) { |
| 23 | + const id = obj.id == null ? '' : String(obj.id); |
| 24 | + if (obj.type === 'answer') { |
| 25 | + const questionId = obj.question?.id == null ? '' : String(obj.question.id); |
| 26 | + return questionId && id ? `https://www.zhihu.com/question/${questionId}/answer/${id}` : ''; |
| 27 | + } |
| 28 | + if (obj.type === 'article') { |
| 29 | + return id ? `https://zhuanlan.zhihu.com/p/${id}` : ''; |
| 30 | + } |
| 31 | + if (obj.type === 'question') { |
| 32 | + return id ? `https://www.zhihu.com/question/${id}` : ''; |
| 33 | + } |
| 34 | + return ''; |
| 35 | +} |
| 36 | + |
| 37 | +function normalizeSearchUrl(url) { |
| 38 | + if (typeof url !== 'string' || !url) return ''; |
| 39 | + try { |
| 40 | + const parsed = new URL(url); |
| 41 | + if (parsed.hostname === 'api.zhihu.com' && parsed.pathname === '/search_v3') { |
| 42 | + return `https://www.zhihu.com/api/v4/search_v3${parsed.search}`; |
| 43 | + } |
| 44 | + if (parsed.hostname === 'www.zhihu.com' && parsed.pathname === '/api/v4/search_v3') { |
| 45 | + return parsed.toString(); |
| 46 | + } |
| 47 | + } catch { |
| 48 | + return ''; |
| 49 | + } |
| 50 | + return ''; |
| 51 | +} |
| 52 | + |
| 53 | +const MAX_LIMIT = 1000; |
| 54 | +const PAGE_SIZE = 20; |
| 55 | +const TYPES = ['all', 'answer', 'article', 'question']; |
| 56 | + |
| 57 | +function parseLimit(value) { |
| 58 | + const limit = Number(value ?? 10); |
| 59 | + if (!Number.isInteger(limit) || limit <= 0 || limit > MAX_LIMIT) { |
| 60 | + throw new ArgumentError(`zhihu search --limit must be a positive integer no greater than ${MAX_LIMIT}`, 'Use a normal-sized limit to avoid slow requests or Zhihu risk controls'); |
| 61 | + } |
| 62 | + return limit; |
| 63 | +} |
| 64 | + |
| 65 | +function requireQuery(value) { |
| 66 | + const query = String(value || '').trim(); |
| 67 | + if (!query) { |
| 68 | + throw new ArgumentError('zhihu search query must not be empty', 'Example: opencli zhihu search codex'); |
| 69 | + } |
| 70 | + return query; |
| 71 | +} |
| 72 | + |
| 73 | +function requireType(value) { |
| 74 | + const type = String(value || 'all'); |
| 75 | + if (!TYPES.includes(type)) { |
| 76 | + throw new ArgumentError(`zhihu search --type must be one of: ${TYPES.join(', ')}`, 'Example: opencli zhihu search codex --type answer'); |
| 77 | + } |
| 78 | + return type; |
| 79 | +} |
| 80 | + |
| 81 | +function unwrapEvaluateResult(payload) { |
| 82 | + if (payload && typeof payload === 'object' && 'data' in payload && 'session' in payload) return payload.data; |
| 83 | + return payload; |
| 84 | +} |
| 85 | + |
| 86 | +function requireSearchPayload(data, url) { |
| 87 | + const payload = unwrapEvaluateResult(data); |
| 88 | + if (!payload || typeof payload !== 'object' || Array.isArray(payload)) { |
| 89 | + throw new CommandExecutionError('Zhihu search returned malformed payload'); |
| 90 | + } |
| 91 | + if (payload.__httpError) { |
| 92 | + const status = payload.__httpError; |
| 93 | + if (status === 401 || status === 403) { |
| 94 | + throw new AuthRequiredError('www.zhihu.com', 'Failed to fetch search results from Zhihu'); |
| 95 | + } |
| 96 | + throw new CommandExecutionError(`Zhihu search request failed${status ? ` (HTTP ${status})` : ''}`, 'Try again later or rerun with -v for more detail'); |
| 97 | + } |
| 98 | + if (payload.__fetchError) { |
| 99 | + throw new CommandExecutionError('Zhihu search request failed', String(payload.__fetchError)); |
| 100 | + } |
| 101 | + if (!Array.isArray(payload.data)) { |
| 102 | + throw new CommandExecutionError('Zhihu search returned malformed data list', `URL: ${url}`); |
| 103 | + } |
| 104 | + if (!payload.paging || typeof payload.paging !== 'object') { |
| 105 | + throw new CommandExecutionError('Zhihu search returned malformed paging data', `URL: ${url}`); |
| 106 | + } |
| 107 | + return payload; |
| 108 | +} |
| 109 | + |
| 110 | +function normalizeResultItem(item) { |
| 111 | + if (!item || typeof item !== 'object' || item.type !== 'search_result' || !item.object || typeof item.object !== 'object') { |
| 112 | + return null; |
| 113 | + } |
| 114 | + const obj = item.object; |
| 115 | + if (obj.type !== 'answer' && obj.type !== 'article' && obj.type !== 'question') return null; |
| 116 | + const key = itemKey(item); |
| 117 | + const url = itemUrl(obj); |
| 118 | + const question = obj.question || {}; |
| 119 | + const title = stripHtml(obj.title || question.name || question.title || ''); |
| 120 | + if (!key || !url || !title) { |
| 121 | + throw new CommandExecutionError('Zhihu search returned malformed result row identity'); |
| 122 | + } |
| 123 | + return { |
| 124 | + item, |
| 125 | + key, |
| 126 | + row: { |
| 127 | + title, |
| 128 | + type: obj.type, |
| 129 | + author: obj.author?.name || '', |
| 130 | + votes: obj.voteup_count || 0, |
| 131 | + url, |
| 132 | + }, |
| 133 | + }; |
| 134 | +} |
| 135 | + |
2 | 136 | cli({ |
3 | 137 | site: 'zhihu', |
4 | 138 | name: 'search', |
5 | 139 | access: 'read', |
6 | 140 | description: '知乎搜索', |
7 | 141 | domain: 'www.zhihu.com', |
| 142 | + strategy: Strategy.COOKIE, |
8 | 143 | args: [ |
9 | 144 | { name: 'query', required: true, positional: true, help: 'Search query' }, |
10 | | - { name: 'limit', type: 'int', default: 10, help: 'Number of results' }, |
| 145 | + { name: 'limit', type: 'int', default: 10, help: 'Number of results (max 1000; use normal-sized requests)' }, |
| 146 | + { name: 'type', default: 'all', choices: TYPES, help: 'Result type: all, answer, article, or question' }, |
11 | 147 | ], |
12 | 148 | columns: ['rank', 'title', 'type', 'author', 'votes', 'url'], |
13 | | - pipeline: [ |
14 | | - { navigate: 'https://www.zhihu.com' }, |
15 | | - { evaluate: `(async () => { |
16 | | - const strip = (html) => (html || '').replace(/<[^>]+>/g, '').replace(/ /g, ' ').replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&').replace(/<em>/g, '').replace(/<\\/em>/g, '').trim(); |
17 | | - const keyword = \${{ args.query | json }}; |
18 | | - const limit = \${{ args.limit }}; |
19 | | - var fetchLimit = Math.max(limit * 3, 30); |
20 | | - const res = await fetch('https://www.zhihu.com/api/v4/search_v3?q=' + encodeURIComponent(keyword) + '&t=general&offset=0&limit=' + fetchLimit, { |
21 | | - credentials: 'include' |
22 | | - }); |
23 | | - const d = await res.json(); |
24 | | - return (d?.data || []) |
25 | | - .filter(item => item.object && (item.object.type === 'answer' || item.object.type === 'article' || item.object.type === 'question')) |
26 | | - .map(item => { |
27 | | - const obj = item.object || {}; |
28 | | - const q = obj.question || {}; |
29 | | - return { |
30 | | - type: obj.type, |
31 | | - title: strip(obj.title || q.name || ''), |
32 | | - excerpt: strip(obj.excerpt || '').substring(0, 100), |
33 | | - author: obj.author?.name || '', |
34 | | - votes: obj.voteup_count || 0, |
35 | | - url: obj.type === 'answer' |
36 | | - ? 'https://www.zhihu.com/question/' + q.id + '/answer/' + obj.id |
37 | | - : obj.type === 'article' |
38 | | - ? 'https://zhuanlan.zhihu.com/p/' + obj.id |
39 | | - : 'https://www.zhihu.com/question/' + obj.id, |
40 | | - }; |
41 | | - }); |
42 | | -})() |
43 | | -` }, |
44 | | - { map: { |
45 | | - rank: '${{ index + 1 }}', |
46 | | - title: '${{ item.title }}', |
47 | | - type: '${{ item.type }}', |
48 | | - author: '${{ item.author }}', |
49 | | - votes: '${{ item.votes }}', |
50 | | - url: '${{ item.url }}', |
51 | | - } }, |
52 | | - { limit: '${{ args.limit }}' }, |
53 | | - ], |
| 149 | + func: async (page, kwargs) => { |
| 150 | + const query = requireQuery(kwargs.query); |
| 151 | + const resultLimit = parseLimit(kwargs.limit); |
| 152 | + const type = requireType(kwargs.type); |
| 153 | + await page.goto('https://www.zhihu.com'); |
| 154 | + let url = 'https://www.zhihu.com/api/v4/search_v3' |
| 155 | + + `?q=${encodeURIComponent(query)}&t=general&offset=0&limit=${PAGE_SIZE}`; |
| 156 | + const results = []; |
| 157 | + const seen = new Set(); |
| 158 | + const visited = new Set(); |
| 159 | + while (url && results.length < resultLimit && !visited.has(url)) { |
| 160 | + visited.add(url); |
| 161 | + const data = requireSearchPayload(await page.evaluate(` |
| 162 | + (async () => { |
| 163 | + try { |
| 164 | + const r = await fetch(${JSON.stringify(url)}, { credentials: 'include' }); |
| 165 | + if (!r.ok) return { __httpError: r.status }; |
| 166 | + return await r.json(); |
| 167 | + } catch (err) { |
| 168 | + return { __fetchError: err?.message || String(err) }; |
| 169 | + } |
| 170 | + })() |
| 171 | + `), url); |
| 172 | + for (const item of data.data) { |
| 173 | + const rawType = item?.object?.type; |
| 174 | + if (type !== 'all' && rawType && rawType !== type) continue; |
| 175 | + const normalized = normalizeResultItem(item); |
| 176 | + if (!normalized) continue; |
| 177 | + if (type !== 'all' && normalized.row.type !== type) continue; |
| 178 | + if (seen.has(normalized.key)) continue; |
| 179 | + seen.add(normalized.key); |
| 180 | + results.push(normalized.row); |
| 181 | + if (results.length >= resultLimit) break; |
| 182 | + } |
| 183 | + if (results.length >= resultLimit) break; |
| 184 | + if (data.paging?.is_end) break; |
| 185 | + const next = normalizeSearchUrl(data.paging?.next); |
| 186 | + if (!next) { |
| 187 | + throw new CommandExecutionError('Zhihu search pagination returned malformed next URL'); |
| 188 | + } |
| 189 | + if (visited.has(next)) { |
| 190 | + throw new CommandExecutionError('Zhihu search pagination returned a repeated next URL'); |
| 191 | + } |
| 192 | + url = next; |
| 193 | + } |
| 194 | + if (results.length === 0) { |
| 195 | + throw new EmptyResultError('zhihu search', `No ${type === 'all' ? '' : `${type} `}results found for "${query}"`); |
| 196 | + } |
| 197 | + return results.map((row, i) => { |
| 198 | + return { |
| 199 | + rank: i + 1, |
| 200 | + ...row, |
| 201 | + }; |
| 202 | + }); |
| 203 | + }, |
54 | 204 | }); |
| 205 | + |
| 206 | +export const __test__ = { |
| 207 | + stripHtml, |
| 208 | + itemKey, |
| 209 | + itemUrl, |
| 210 | + normalizeSearchUrl, |
| 211 | + parseLimit, |
| 212 | + requireQuery, |
| 213 | + requireType, |
| 214 | + unwrapEvaluateResult, |
| 215 | + requireSearchPayload, |
| 216 | + normalizeResultItem, |
| 217 | +}; |
0 commit comments