Skip to content

Commit baf1522

Browse files
Pandas886huzekangjackwener
authored
feat: add Youdao Notes shared note reader adapter (#1547)
* feat: add Youdao Notes shared note reader adapter Add a new adapter for reading publicly shared Youdao Notes (有道云笔记). - youdao note <url>: Fetches a public shared note by its share URL using browser-based DOM extraction. Extracts title, content, and keyword tags from the React-rendered page. - Supports note.youdao.com and note.youdao.cn share URLs. - Includes test coverage (3 tests) and documentation. Closes #1418 * fix: extract full note content from React Redux store Previously the adapter only extracted the AI summary section from the DOM. Now it accesses the React fiber tree to read the full note content from the Redux store (store.content.data.content), which contains the complete note body in Youdao's structured format. The extractor recursively walks Youdao's proprietary node format (key '8' for text content) to reconstruct the full note as plain text. * fix(youdao): harden shared note reader contract --------- Co-authored-by: huzekang <huzekang@opencode.ai> Co-authored-by: jackwener <jakevingoo@gmail.com>
1 parent e3995df commit baf1522

7 files changed

Lines changed: 431 additions & 0 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ To load the source Browser Bridge extension:
277277
| **wanfang** | `search` |
278278
| **hackernews** | `top` `new` `best` `ask` `show` `jobs` `search` `user` |
279279
| **xiaoyuzhou** | `auth*` `podcast*` `podcast-episodes*` `episode*` `download*` `transcript*` |
280+
| **youdao** | `note` |
280281

281282
100+ site surfaces in total — **[→ see all supported sites & commands](./docs/adapters/index.md)**
282283

README.zh-CN.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ npm link
263263
| **zhihu** | `hot` `search` `question` `download` `follow` `like` `favorite` `comment` `answer` | 浏览器 |
264264
| **weixin** | `download` | 浏览器 |
265265
| **youtube** | `search` `video` `transcript` `comments` `channel` `playlist` `feed` `history` `watch-later` `subscriptions` `like` `unlike` `subscribe` `unsubscribe` | 浏览器 |
266+
| **youdao** | `note` | 公开 |
266267
| **boss** | `search` `detail` `recommend` `joblist` `greet` `batchgreet` `send` `chatlist` `chatmsg` `invite` `mark` `exchange` `resume` `stats` | 浏览器 |
267268
| **coupang** | `search` `add-to-cart` | 浏览器 |
268269
| **bbc** | `news` | 公共 API |

cli-manifest.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27604,6 +27604,36 @@
2760427604
"sourceFile": "yollomi/video.js",
2760527605
"navigateBefore": "https://yollomi.com"
2760627606
},
27607+
{
27608+
"site": "youdao",
27609+
"name": "note",
27610+
"description": "Read a public shared Youdao Note",
27611+
"access": "read",
27612+
"domain": "share.note.youdao.com",
27613+
"strategy": "public",
27614+
"browser": true,
27615+
"args": [
27616+
{
27617+
"name": "url",
27618+
"type": "str",
27619+
"required": true,
27620+
"positional": true,
27621+
"help": "Full share URL of the Youdao Note"
27622+
}
27623+
],
27624+
"columns": [
27625+
"title",
27626+
"content",
27627+
"summary",
27628+
"keywords",
27629+
"created_at",
27630+
"file_size",
27631+
"url"
27632+
],
27633+
"type": "js",
27634+
"modulePath": "youdao/note.js",
27635+
"sourceFile": "youdao/note.js"
27636+
},
2760727637
{
2760827638
"site": "youtube",
2760927639
"name": "channel",

clis/youdao/note.js

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
import { cli, Strategy } from '@jackwener/opencli/registry';
2+
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
3+
4+
const ALLOWED_HOSTS = new Set([
5+
'share.note.youdao.com',
6+
'note.youdao.com',
7+
'share.note.youdao.cn',
8+
'note.youdao.cn',
9+
]);
10+
11+
function unwrapEvaluateResult(payload) {
12+
if (payload && !Array.isArray(payload) && typeof payload === 'object' && 'session' in payload && 'data' in payload) {
13+
return payload.data;
14+
}
15+
return payload;
16+
}
17+
18+
function normalizeShareUrl(raw) {
19+
const value = String(raw ?? '').trim();
20+
if (!value) {
21+
throw new ArgumentError('youdao note url cannot be empty', 'Pass a full public share URL from Youdao Notes.');
22+
}
23+
let parsed;
24+
try {
25+
parsed = new URL(value);
26+
} catch {
27+
throw new ArgumentError('Invalid Youdao Note URL', 'Example: https://share.note.youdao.com/ynoteshare/index.html?id=...&type=note');
28+
}
29+
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
30+
throw new ArgumentError('Youdao Note URL must use http or https');
31+
}
32+
if (!ALLOWED_HOSTS.has(parsed.hostname)) {
33+
throw new ArgumentError('Youdao Note URL must be under note.youdao.com or note.youdao.cn');
34+
}
35+
if (!parsed.searchParams.get('id')) {
36+
throw new ArgumentError('Youdao Note URL must include an id query parameter');
37+
}
38+
const type = parsed.searchParams.get('type');
39+
if (type && type !== 'note') {
40+
throw new ArgumentError('youdao note only accepts shared note URLs', 'Shared notebooks are not implemented yet.');
41+
}
42+
return parsed.toString();
43+
}
44+
45+
function formatYoudaoTimestamp(value) {
46+
if (value == null || value === '') return '';
47+
const numeric = Number(value);
48+
if (!Number.isFinite(numeric) || numeric <= 0) return String(value);
49+
const millis = numeric < 10_000_000_000 ? numeric * 1000 : numeric;
50+
const date = new Date(millis);
51+
if (Number.isNaN(date.getTime())) return String(value);
52+
return date.toISOString();
53+
}
54+
55+
function buildExtractorJs() {
56+
const walkTextFn = `
57+
function walkText(node, out) {
58+
if (!node || typeof node !== 'object') return;
59+
if (Array.isArray(node)) {
60+
for (var i = 0; i < node.length; i += 1) walkText(node[i], out);
61+
return;
62+
}
63+
if (typeof node[8] === 'string') {
64+
var text = node[8].replace(/\\s+/g, ' ').trim();
65+
if (text) out.push(text);
66+
}
67+
var keys = Object.keys(node);
68+
for (var j = 0; j < keys.length; j += 1) {
69+
var value = node[keys[j]];
70+
if (value && typeof value === 'object') walkText(value, out);
71+
}
72+
}
73+
`;
74+
return `
75+
(function() {
76+
${walkTextFn}
77+
function cleanText(value) {
78+
return String(value || '').replace(/\\u00a0/g, ' ').replace(/[ \\t]+\\n/g, '\\n').replace(/\\n{3,}/g, '\\n\\n').trim();
79+
}
80+
function pageText() {
81+
return cleanText((document.body && (document.body.innerText || document.body.textContent)) || '').slice(0, 1000);
82+
}
83+
function classifyBodyText(text) {
84+
if (/登录|登陆|请先登录|无权|权限|访问受限|验证码|安全验证|login|forbidden|permission/i.test(text)) return 'auth';
85+
if (/分享已取消|分享不存在|文件不存在|笔记不存在|页面不存在|已过期|不存在|not found|404/i.test(text)) return 'not_found';
86+
return '';
87+
}
88+
function findStoreState(value, depth, seen) {
89+
if (!value || typeof value !== 'object' || depth > 10) return null;
90+
if (seen.indexOf(value) !== -1) return null;
91+
seen.push(value);
92+
if (value.storeState && typeof value.storeState === 'object') return value.storeState;
93+
if (value.content && value.content.data && typeof value.content.data === 'object') return value;
94+
var keys = Object.keys(value);
95+
for (var i = 0; i < keys.length; i += 1) {
96+
var found = findStoreState(value[keys[i]], depth + 1, seen);
97+
if (found) return found;
98+
}
99+
return null;
100+
}
101+
function findStoreFromFiber(fiber) {
102+
var cursor = fiber;
103+
var stack = [];
104+
while (cursor || stack.length) {
105+
if (!cursor) {
106+
cursor = stack.pop();
107+
continue;
108+
}
109+
var fromState = findStoreState(cursor.memoizedState, 0, []);
110+
if (fromState) return fromState;
111+
var fromProps = findStoreState(cursor.memoizedProps, 0, []);
112+
if (fromProps) return fromProps;
113+
if (cursor.sibling) stack.push(cursor.sibling);
114+
cursor = cursor.child;
115+
}
116+
return null;
117+
}
118+
var root = document.querySelector('#root');
119+
var body = pageText();
120+
var bodyKind = classifyBodyText(body);
121+
if (!root) {
122+
return [false, bodyKind || 'root_missing', body];
123+
}
124+
var reactKey = Object.keys(root).find(function(key) { return key.indexOf('__reactContainer$') === 0; });
125+
var fiber = (root._reactRootContainer && root._reactRootContainer._internalRoot && root._reactRootContainer._internalRoot.current)
126+
|| (reactKey ? root[reactKey] : null);
127+
if (!fiber) {
128+
return [false, bodyKind || 'react_root_missing', body];
129+
}
130+
var store = findStoreFromFiber(fiber);
131+
if (!store) {
132+
return [false, bodyKind || 'store_missing', body];
133+
}
134+
var contentData = store.content && store.content.data;
135+
if (!contentData || typeof contentData !== 'object') {
136+
return [false, bodyKind || 'content_data_missing', body];
137+
}
138+
var title = cleanText(contentData.tl || document.querySelector('.file-name')?.textContent || document.title || '');
139+
var hasContentField = Object.prototype.hasOwnProperty.call(contentData, 'content');
140+
var rawContent = hasContentField ? String(contentData.content || '') : '';
141+
var content = '';
142+
if (rawContent) {
143+
try {
144+
var parsed = JSON.parse(rawContent);
145+
var parts = [];
146+
walkText(parsed, parts);
147+
content = cleanText(parts.join('\\n'));
148+
} catch (error) {
149+
content = cleanText(rawContent);
150+
}
151+
}
152+
var summary = '';
153+
var keywords = [];
154+
var ai = store.aiSummary;
155+
if (ai && ai.aiSummary) {
156+
try {
157+
var aiPayload = JSON.parse(ai.aiSummary);
158+
summary = cleanText(aiPayload.description || '');
159+
if (Array.isArray(aiPayload.keywords)) {
160+
for (var i = 0; i < aiPayload.keywords.length; i += 1) {
161+
var keyword = aiPayload.keywords[i];
162+
if (keyword && keyword.title) keywords.push(cleanText(((keyword.emoji || '') + ' ' + keyword.title).trim()));
163+
}
164+
}
165+
} catch {}
166+
}
167+
return [true, title, content, summary, keywords.join(' | '), contentData.ct || null, contentData.sz || null, hasContentField, rawContent.length, window.location.href];
168+
})()
169+
`;
170+
}
171+
172+
function normalizeExtractionResult(payload, sourceUrl) {
173+
const data = unwrapEvaluateResult(payload);
174+
if (!Array.isArray(data)) {
175+
throw new CommandExecutionError('Youdao note extractor returned a malformed payload');
176+
}
177+
const ok = data[0] === true;
178+
if (!ok) {
179+
const reason = typeof data[1] === 'string' && data[1].trim() ? data[1].trim() : 'unknown_parser_failure';
180+
if (reason === 'auth') {
181+
throw new AuthRequiredError('note.youdao.com', 'Youdao shared note requires login or additional permission');
182+
}
183+
if (reason === 'not_found') {
184+
throw new EmptyResultError('youdao note', 'The shared note is missing, expired, cancelled, or inaccessible.');
185+
}
186+
throw new CommandExecutionError(`Youdao note parser failed: ${reason}`);
187+
}
188+
const title = String(data[1] ?? '');
189+
const content = String(data[2] ?? '');
190+
const summary = String(data[3] ?? '');
191+
const keywords = String(data[4] ?? '');
192+
const createTime = data[5];
193+
const fileSize = data[6];
194+
const hasContentField = data[7] === true;
195+
const rawContentLength = Number(data[8] ?? 0);
196+
const finalUrl = String(data[9] || sourceUrl);
197+
if (!title) {
198+
throw new CommandExecutionError('Youdao note parser did not extract a title');
199+
}
200+
if (!hasContentField) {
201+
throw new CommandExecutionError('Youdao note parser did not find full note content in the page store');
202+
}
203+
if (rawContentLength > 0 && !content) {
204+
throw new CommandExecutionError('Youdao note parser found note content but extracted no readable text');
205+
}
206+
const row = {};
207+
row.title = title;
208+
row.content = content;
209+
row.summary = summary;
210+
row.keywords = keywords;
211+
row.created_at = formatYoudaoTimestamp(createTime);
212+
row.file_size = fileSize == null ? '' : String(fileSize);
213+
row.url = finalUrl;
214+
return row;
215+
}
216+
217+
var command = cli({
218+
site: 'youdao',
219+
name: 'note',
220+
access: 'read',
221+
description: 'Read a public shared Youdao Note',
222+
domain: 'share.note.youdao.com',
223+
strategy: Strategy.PUBLIC,
224+
browser: true,
225+
args: [
226+
{ name: 'url', positional: true, required: true, help: 'Full share URL of the Youdao Note' },
227+
],
228+
columns: ['title', 'content', 'summary', 'keywords', 'created_at', 'file_size', 'url'],
229+
func: async function(page, kwargs) {
230+
const url = normalizeShareUrl(kwargs.url);
231+
try {
232+
await page.goto(url);
233+
} catch (error) {
234+
throw new CommandExecutionError(`Failed to open Youdao Note URL: ${error instanceof Error ? error.message : String(error)}`);
235+
}
236+
try {
237+
await page.wait({ selector: '#root, .file-name, body', timeout: 10 });
238+
} catch {
239+
await page.wait(3).catch(function() {});
240+
}
241+
await page.wait(2).catch(function() {});
242+
let payload;
243+
try {
244+
payload = await page.evaluate(buildExtractorJs());
245+
} catch (error) {
246+
throw new CommandExecutionError(`Youdao note extractor failed: ${error instanceof Error ? error.message : String(error)}`);
247+
}
248+
return [normalizeExtractionResult(payload, url)];
249+
},
250+
});
251+
252+
export var __test__ = {
253+
buildExtractorJs: buildExtractorJs,
254+
command: command,
255+
formatYoudaoTimestamp: formatYoudaoTimestamp,
256+
normalizeExtractionResult: normalizeExtractionResult,
257+
normalizeShareUrl: normalizeShareUrl,
258+
};

0 commit comments

Comments
 (0)