|
| 1 | +import { cli, Strategy } from '@jackwener/opencli/registry'; |
| 2 | +import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; |
| 3 | + |
| 4 | +const ALLOWED_HOSTS = new Set([ |
| 5 | + 'share.note.youdao.com', |
| 6 | + 'note.youdao.com', |
| 7 | + 'share.note.youdao.cn', |
| 8 | + 'note.youdao.cn', |
| 9 | +]); |
| 10 | + |
| 11 | +function unwrapEvaluateResult(payload) { |
| 12 | + if (payload && !Array.isArray(payload) && typeof payload === 'object' && 'session' in payload && 'data' in payload) { |
| 13 | + return payload.data; |
| 14 | + } |
| 15 | + return payload; |
| 16 | +} |
| 17 | + |
| 18 | +function normalizeShareUrl(raw) { |
| 19 | + const value = String(raw ?? '').trim(); |
| 20 | + if (!value) { |
| 21 | + throw new ArgumentError('youdao note url cannot be empty', 'Pass a full public share URL from Youdao Notes.'); |
| 22 | + } |
| 23 | + let parsed; |
| 24 | + try { |
| 25 | + parsed = new URL(value); |
| 26 | + } catch { |
| 27 | + throw new ArgumentError('Invalid Youdao Note URL', 'Example: https://share.note.youdao.com/ynoteshare/index.html?id=...&type=note'); |
| 28 | + } |
| 29 | + if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') { |
| 30 | + throw new ArgumentError('Youdao Note URL must use http or https'); |
| 31 | + } |
| 32 | + if (!ALLOWED_HOSTS.has(parsed.hostname)) { |
| 33 | + throw new ArgumentError('Youdao Note URL must be under note.youdao.com or note.youdao.cn'); |
| 34 | + } |
| 35 | + if (!parsed.searchParams.get('id')) { |
| 36 | + throw new ArgumentError('Youdao Note URL must include an id query parameter'); |
| 37 | + } |
| 38 | + const type = parsed.searchParams.get('type'); |
| 39 | + if (type && type !== 'note') { |
| 40 | + throw new ArgumentError('youdao note only accepts shared note URLs', 'Shared notebooks are not implemented yet.'); |
| 41 | + } |
| 42 | + return parsed.toString(); |
| 43 | +} |
| 44 | + |
| 45 | +function formatYoudaoTimestamp(value) { |
| 46 | + if (value == null || value === '') return ''; |
| 47 | + const numeric = Number(value); |
| 48 | + if (!Number.isFinite(numeric) || numeric <= 0) return String(value); |
| 49 | + const millis = numeric < 10_000_000_000 ? numeric * 1000 : numeric; |
| 50 | + const date = new Date(millis); |
| 51 | + if (Number.isNaN(date.getTime())) return String(value); |
| 52 | + return date.toISOString(); |
| 53 | +} |
| 54 | + |
| 55 | +function buildExtractorJs() { |
| 56 | + const walkTextFn = ` |
| 57 | + function walkText(node, out) { |
| 58 | + if (!node || typeof node !== 'object') return; |
| 59 | + if (Array.isArray(node)) { |
| 60 | + for (var i = 0; i < node.length; i += 1) walkText(node[i], out); |
| 61 | + return; |
| 62 | + } |
| 63 | + if (typeof node[8] === 'string') { |
| 64 | + var text = node[8].replace(/\\s+/g, ' ').trim(); |
| 65 | + if (text) out.push(text); |
| 66 | + } |
| 67 | + var keys = Object.keys(node); |
| 68 | + for (var j = 0; j < keys.length; j += 1) { |
| 69 | + var value = node[keys[j]]; |
| 70 | + if (value && typeof value === 'object') walkText(value, out); |
| 71 | + } |
| 72 | + } |
| 73 | + `; |
| 74 | + return ` |
| 75 | + (function() { |
| 76 | + ${walkTextFn} |
| 77 | + function cleanText(value) { |
| 78 | + return String(value || '').replace(/\\u00a0/g, ' ').replace(/[ \\t]+\\n/g, '\\n').replace(/\\n{3,}/g, '\\n\\n').trim(); |
| 79 | + } |
| 80 | + function pageText() { |
| 81 | + return cleanText((document.body && (document.body.innerText || document.body.textContent)) || '').slice(0, 1000); |
| 82 | + } |
| 83 | + function classifyBodyText(text) { |
| 84 | + if (/登录|登陆|请先登录|无权|权限|访问受限|验证码|安全验证|login|forbidden|permission/i.test(text)) return 'auth'; |
| 85 | + if (/分享已取消|分享不存在|文件不存在|笔记不存在|页面不存在|已过期|不存在|not found|404/i.test(text)) return 'not_found'; |
| 86 | + return ''; |
| 87 | + } |
| 88 | + function findStoreState(value, depth, seen) { |
| 89 | + if (!value || typeof value !== 'object' || depth > 10) return null; |
| 90 | + if (seen.indexOf(value) !== -1) return null; |
| 91 | + seen.push(value); |
| 92 | + if (value.storeState && typeof value.storeState === 'object') return value.storeState; |
| 93 | + if (value.content && value.content.data && typeof value.content.data === 'object') return value; |
| 94 | + var keys = Object.keys(value); |
| 95 | + for (var i = 0; i < keys.length; i += 1) { |
| 96 | + var found = findStoreState(value[keys[i]], depth + 1, seen); |
| 97 | + if (found) return found; |
| 98 | + } |
| 99 | + return null; |
| 100 | + } |
| 101 | + function findStoreFromFiber(fiber) { |
| 102 | + var cursor = fiber; |
| 103 | + var stack = []; |
| 104 | + while (cursor || stack.length) { |
| 105 | + if (!cursor) { |
| 106 | + cursor = stack.pop(); |
| 107 | + continue; |
| 108 | + } |
| 109 | + var fromState = findStoreState(cursor.memoizedState, 0, []); |
| 110 | + if (fromState) return fromState; |
| 111 | + var fromProps = findStoreState(cursor.memoizedProps, 0, []); |
| 112 | + if (fromProps) return fromProps; |
| 113 | + if (cursor.sibling) stack.push(cursor.sibling); |
| 114 | + cursor = cursor.child; |
| 115 | + } |
| 116 | + return null; |
| 117 | + } |
| 118 | + var root = document.querySelector('#root'); |
| 119 | + var body = pageText(); |
| 120 | + var bodyKind = classifyBodyText(body); |
| 121 | + if (!root) { |
| 122 | + return [false, bodyKind || 'root_missing', body]; |
| 123 | + } |
| 124 | + var reactKey = Object.keys(root).find(function(key) { return key.indexOf('__reactContainer$') === 0; }); |
| 125 | + var fiber = (root._reactRootContainer && root._reactRootContainer._internalRoot && root._reactRootContainer._internalRoot.current) |
| 126 | + || (reactKey ? root[reactKey] : null); |
| 127 | + if (!fiber) { |
| 128 | + return [false, bodyKind || 'react_root_missing', body]; |
| 129 | + } |
| 130 | + var store = findStoreFromFiber(fiber); |
| 131 | + if (!store) { |
| 132 | + return [false, bodyKind || 'store_missing', body]; |
| 133 | + } |
| 134 | + var contentData = store.content && store.content.data; |
| 135 | + if (!contentData || typeof contentData !== 'object') { |
| 136 | + return [false, bodyKind || 'content_data_missing', body]; |
| 137 | + } |
| 138 | + var title = cleanText(contentData.tl || document.querySelector('.file-name')?.textContent || document.title || ''); |
| 139 | + var hasContentField = Object.prototype.hasOwnProperty.call(contentData, 'content'); |
| 140 | + var rawContent = hasContentField ? String(contentData.content || '') : ''; |
| 141 | + var content = ''; |
| 142 | + if (rawContent) { |
| 143 | + try { |
| 144 | + var parsed = JSON.parse(rawContent); |
| 145 | + var parts = []; |
| 146 | + walkText(parsed, parts); |
| 147 | + content = cleanText(parts.join('\\n')); |
| 148 | + } catch (error) { |
| 149 | + content = cleanText(rawContent); |
| 150 | + } |
| 151 | + } |
| 152 | + var summary = ''; |
| 153 | + var keywords = []; |
| 154 | + var ai = store.aiSummary; |
| 155 | + if (ai && ai.aiSummary) { |
| 156 | + try { |
| 157 | + var aiPayload = JSON.parse(ai.aiSummary); |
| 158 | + summary = cleanText(aiPayload.description || ''); |
| 159 | + if (Array.isArray(aiPayload.keywords)) { |
| 160 | + for (var i = 0; i < aiPayload.keywords.length; i += 1) { |
| 161 | + var keyword = aiPayload.keywords[i]; |
| 162 | + if (keyword && keyword.title) keywords.push(cleanText(((keyword.emoji || '') + ' ' + keyword.title).trim())); |
| 163 | + } |
| 164 | + } |
| 165 | + } catch {} |
| 166 | + } |
| 167 | + return [true, title, content, summary, keywords.join(' | '), contentData.ct || null, contentData.sz || null, hasContentField, rawContent.length, window.location.href]; |
| 168 | + })() |
| 169 | + `; |
| 170 | +} |
| 171 | + |
| 172 | +function normalizeExtractionResult(payload, sourceUrl) { |
| 173 | + const data = unwrapEvaluateResult(payload); |
| 174 | + if (!Array.isArray(data)) { |
| 175 | + throw new CommandExecutionError('Youdao note extractor returned a malformed payload'); |
| 176 | + } |
| 177 | + const ok = data[0] === true; |
| 178 | + if (!ok) { |
| 179 | + const reason = typeof data[1] === 'string' && data[1].trim() ? data[1].trim() : 'unknown_parser_failure'; |
| 180 | + if (reason === 'auth') { |
| 181 | + throw new AuthRequiredError('note.youdao.com', 'Youdao shared note requires login or additional permission'); |
| 182 | + } |
| 183 | + if (reason === 'not_found') { |
| 184 | + throw new EmptyResultError('youdao note', 'The shared note is missing, expired, cancelled, or inaccessible.'); |
| 185 | + } |
| 186 | + throw new CommandExecutionError(`Youdao note parser failed: ${reason}`); |
| 187 | + } |
| 188 | + const title = String(data[1] ?? ''); |
| 189 | + const content = String(data[2] ?? ''); |
| 190 | + const summary = String(data[3] ?? ''); |
| 191 | + const keywords = String(data[4] ?? ''); |
| 192 | + const createTime = data[5]; |
| 193 | + const fileSize = data[6]; |
| 194 | + const hasContentField = data[7] === true; |
| 195 | + const rawContentLength = Number(data[8] ?? 0); |
| 196 | + const finalUrl = String(data[9] || sourceUrl); |
| 197 | + if (!title) { |
| 198 | + throw new CommandExecutionError('Youdao note parser did not extract a title'); |
| 199 | + } |
| 200 | + if (!hasContentField) { |
| 201 | + throw new CommandExecutionError('Youdao note parser did not find full note content in the page store'); |
| 202 | + } |
| 203 | + if (rawContentLength > 0 && !content) { |
| 204 | + throw new CommandExecutionError('Youdao note parser found note content but extracted no readable text'); |
| 205 | + } |
| 206 | + const row = {}; |
| 207 | + row.title = title; |
| 208 | + row.content = content; |
| 209 | + row.summary = summary; |
| 210 | + row.keywords = keywords; |
| 211 | + row.created_at = formatYoudaoTimestamp(createTime); |
| 212 | + row.file_size = fileSize == null ? '' : String(fileSize); |
| 213 | + row.url = finalUrl; |
| 214 | + return row; |
| 215 | +} |
| 216 | + |
| 217 | +var command = cli({ |
| 218 | + site: 'youdao', |
| 219 | + name: 'note', |
| 220 | + access: 'read', |
| 221 | + description: 'Read a public shared Youdao Note', |
| 222 | + domain: 'share.note.youdao.com', |
| 223 | + strategy: Strategy.PUBLIC, |
| 224 | + browser: true, |
| 225 | + args: [ |
| 226 | + { name: 'url', positional: true, required: true, help: 'Full share URL of the Youdao Note' }, |
| 227 | + ], |
| 228 | + columns: ['title', 'content', 'summary', 'keywords', 'created_at', 'file_size', 'url'], |
| 229 | + func: async function(page, kwargs) { |
| 230 | + const url = normalizeShareUrl(kwargs.url); |
| 231 | + try { |
| 232 | + await page.goto(url); |
| 233 | + } catch (error) { |
| 234 | + throw new CommandExecutionError(`Failed to open Youdao Note URL: ${error instanceof Error ? error.message : String(error)}`); |
| 235 | + } |
| 236 | + try { |
| 237 | + await page.wait({ selector: '#root, .file-name, body', timeout: 10 }); |
| 238 | + } catch { |
| 239 | + await page.wait(3).catch(function() {}); |
| 240 | + } |
| 241 | + await page.wait(2).catch(function() {}); |
| 242 | + let payload; |
| 243 | + try { |
| 244 | + payload = await page.evaluate(buildExtractorJs()); |
| 245 | + } catch (error) { |
| 246 | + throw new CommandExecutionError(`Youdao note extractor failed: ${error instanceof Error ? error.message : String(error)}`); |
| 247 | + } |
| 248 | + return [normalizeExtractionResult(payload, url)]; |
| 249 | + }, |
| 250 | +}); |
| 251 | + |
| 252 | +export var __test__ = { |
| 253 | + buildExtractorJs: buildExtractorJs, |
| 254 | + command: command, |
| 255 | + formatYoudaoTimestamp: formatYoudaoTimestamp, |
| 256 | + normalizeExtractionResult: normalizeExtractionResult, |
| 257 | + normalizeShareUrl: normalizeShareUrl, |
| 258 | +}; |
0 commit comments