Skip to content

Commit 5553300

Browse files
feat(linux-do): split topic content into a dedicated command (#821)
* feat(linux-do): split topic content into a dedicated command Move the old main-post path out of linux-do topic so topic stays a summarized first-page reader while topic-content becomes the Markdown-focused entrypoint for full post bodies. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * feat(linux-do): update topic content handling to include YAML front matter * fix(linux-do): update default output format to plain for topic-content rendering * fix(linux-do): replace js-yaml with inline YAML serialization for topic-content Adapters must only import node builtins, relative modules, or opencli public APIs. Hand-roll the simple front matter serialization to remove the third-party js-yaml dependency. * fix(linux-do): refine YAML quoting to only escape colons followed by space Colons in URLs (e.g. https://) are valid unquoted YAML values. Only quote when a colon is followed by a space or appears at end of line. --------- Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent d61dd7b commit 5553300

9 files changed

Lines changed: 304 additions & 22 deletions

File tree

README.zh-CN.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参
178178
| **jike** | `feed` `search` `create` `like` `comment` `repost` `notifications` `post` `topic` `user` | 浏览器 |
179179
| **jimeng** | `generate` `history` | 浏览器 |
180180
| **yollomi** | `generate` `video` `edit` `upload` `models` `remove-bg` `upscale` `face-swap` `restore` `try-on` `background` `object-remover` | 浏览器 |
181-
| **linux-do** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `user-posts` `user-topics` | 浏览器 |
181+
| **linux-do** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `topic-content` `user-posts` `user-topics` | 浏览器 |
182182
| **stackoverflow** | `hot` `search` `bounties` `unanswered` | 公开 |
183183
| **steam** | `top-sellers` | 公开 |
184184
| **weread** | `shelf` `search` `book` `highlights` `notes` `notebooks` `ranking` | 浏览器 |
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import { getRegistry } from '@jackwener/opencli/registry';
2+
import fs from 'node:fs';
3+
import { describe, expect, it } from 'vitest';
4+
import { __test__ } from './topic-content.js';
5+
6+
describe('linux-do topic-content', () => {
7+
it('prefers raw markdown when the topic payload includes it', () => {
8+
const result = __test__.extractTopicContent({
9+
title: 'Hello Linux.do',
10+
post_stream: {
11+
posts: [
12+
{
13+
post_number: 1,
14+
username: 'neo',
15+
raw: '## Heading\n\n- one\n- two',
16+
cooked: '<h2>Heading</h2><ul><li>one</li><li>two</li></ul>',
17+
like_count: 7,
18+
created_at: '2025-04-05T10:00:00.000Z',
19+
},
20+
],
21+
},
22+
}, 1234);
23+
24+
expect(result.content).toContain('---');
25+
expect(result.content).toContain('title: Hello Linux.do');
26+
expect(result.content).toContain('author: neo');
27+
expect(result.content).toContain('likes: 7');
28+
expect(result.content).toContain('url: https://linux.do/t/1234');
29+
expect(result.content).toContain('## Heading');
30+
expect(result.content).toContain('- one');
31+
});
32+
33+
it('falls back to cooked html and converts it to markdown', () => {
34+
const result = __test__.extractTopicContent({
35+
title: 'Converted Topic',
36+
post_stream: {
37+
posts: [
38+
{
39+
post_number: 1,
40+
username: 'trinity',
41+
cooked: '<p>Hello <strong>world</strong></p><blockquote><p>quoted</p></blockquote>',
42+
like_count: 3,
43+
created_at: '2025-04-05T10:00:00.000Z',
44+
},
45+
],
46+
},
47+
}, 42);
48+
49+
expect(result.content).toContain('Hello **world**');
50+
expect(result.content).toContain('> quoted');
51+
});
52+
53+
it('registers topic-content with plain default output for markdown body rendering', () => {
54+
const command = getRegistry().get('linux-do/topic-content');
55+
56+
expect(command?.defaultFormat).toBe('plain');
57+
expect(command?.columns).toEqual(['content']);
58+
});
59+
60+
it('keeps topic yaml as a summarized first-page reader after the split', () => {
61+
const topicYaml = fs.readFileSync(new URL('./topic.yaml', import.meta.url), 'utf8');
62+
63+
expect(topicYaml).not.toContain('main_only');
64+
expect(topicYaml).toContain('slice(0, 200)');
65+
expect(topicYaml).toContain('帖子首页摘要和回复');
66+
});
67+
});

clis/linux-do/topic-content.ts

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
2+
import { cli, Strategy } from '@jackwener/opencli/registry';
3+
import type { IPage } from '@jackwener/opencli/types';
4+
import { htmlToMarkdown, isRecord } from '@jackwener/opencli/utils';
5+
const LINUX_DO_DOMAIN = 'linux.do';
6+
const LINUX_DO_HOME = 'https://linux.do';
7+
8+
interface FetchTopicResult {
9+
ok: boolean;
10+
status?: number;
11+
data?: unknown;
12+
error?: string;
13+
}
14+
15+
interface LinuxDoTopicPost {
16+
post_number?: number;
17+
username?: string;
18+
raw?: string;
19+
cooked?: string;
20+
like_count?: number;
21+
created_at?: string;
22+
}
23+
24+
interface LinuxDoTopicPayload {
25+
title?: string;
26+
post_stream?: {
27+
posts?: LinuxDoTopicPost[];
28+
};
29+
}
30+
31+
interface TopicContentRow {
32+
content: string;
33+
}
34+
35+
function toLocalTime(utcStr: string): string {
36+
if (!utcStr) return '';
37+
const date = new Date(utcStr);
38+
return Number.isNaN(date.getTime()) ? utcStr : date.toLocaleString();
39+
}
40+
41+
function normalizeTopicPayload(payload: unknown): LinuxDoTopicPayload | null {
42+
if (!isRecord(payload)) return null;
43+
const postStream = isRecord(payload.post_stream)
44+
? {
45+
posts: Array.isArray(payload.post_stream.posts)
46+
? payload.post_stream.posts.filter(isRecord).map((post) => ({
47+
post_number: typeof post.post_number === 'number' ? post.post_number : undefined,
48+
username: typeof post.username === 'string' ? post.username : undefined,
49+
raw: typeof post.raw === 'string' ? post.raw : undefined,
50+
cooked: typeof post.cooked === 'string' ? post.cooked : undefined,
51+
like_count: typeof post.like_count === 'number' ? post.like_count : undefined,
52+
created_at: typeof post.created_at === 'string' ? post.created_at : undefined,
53+
}))
54+
: undefined,
55+
}
56+
: undefined;
57+
58+
return {
59+
title: typeof payload.title === 'string' ? payload.title : undefined,
60+
post_stream: postStream,
61+
};
62+
}
63+
64+
function buildTopicMarkdownDocument(params: {
65+
title: string;
66+
author: string;
67+
likes?: number;
68+
createdAt: string;
69+
url: string;
70+
body: string;
71+
}): string {
72+
const frontMatterLines: string[] = [];
73+
const entries: [string, string | number | undefined][] = [
74+
['title', params.title || undefined],
75+
['author', params.author || undefined],
76+
['likes', typeof params.likes === 'number' && Number.isFinite(params.likes) ? params.likes : undefined],
77+
['createdAt', params.createdAt || undefined],
78+
['url', params.url || undefined],
79+
];
80+
for (const [key, value] of entries) {
81+
if (value === undefined) continue;
82+
if (typeof value === 'number') {
83+
frontMatterLines.push(`${key}: ${value}`);
84+
} else {
85+
// Quote strings that could be misinterpreted by YAML parsers
86+
const needsQuote = /[#{}[\],&*?|>!%@`'"]/.test(value) || /: /.test(value) || /:$/.test(value) || value.includes('\n');
87+
frontMatterLines.push(`${key}: ${needsQuote ? `'${value.replace(/'/g, "''")}'` : value}`);
88+
}
89+
}
90+
const frontMatter = frontMatterLines.join('\n');
91+
92+
return [
93+
frontMatter ? `---\n${frontMatter}\n---` : '',
94+
params.body.trim(),
95+
].filter(Boolean).join('\n\n').trim();
96+
}
97+
98+
function extractTopicContent(payload: unknown, id: number): TopicContentRow {
99+
const topic = normalizeTopicPayload(payload);
100+
if (!topic) {
101+
throw new CommandExecutionError('linux.do returned an unexpected topic payload');
102+
}
103+
104+
const posts = topic.post_stream?.posts ?? [];
105+
const mainPost = posts.find((post) => post.post_number === 1);
106+
if (!mainPost) {
107+
throw new EmptyResultError('linux-do/topic-content', `Could not find the main post for topic ${id}.`);
108+
}
109+
110+
const body = typeof mainPost.raw === 'string' && mainPost.raw.trim()
111+
? mainPost.raw.trim()
112+
: htmlToMarkdown(mainPost.cooked ?? '');
113+
114+
if (!body) {
115+
throw new EmptyResultError('linux-do/topic-content', `Topic ${id} does not contain a readable main post body.`);
116+
}
117+
118+
return {
119+
content: buildTopicMarkdownDocument({
120+
title: topic.title?.trim() ?? '',
121+
author: mainPost.username?.trim() ?? '',
122+
likes: typeof mainPost.like_count === 'number' ? mainPost.like_count : undefined,
123+
createdAt: toLocalTime(mainPost.created_at ?? ''),
124+
url: `${LINUX_DO_HOME}/t/${id}`,
125+
body,
126+
}),
127+
};
128+
}
129+
130+
async function fetchTopicPayload(page: IPage, id: number): Promise<unknown> {
131+
const result = await page.evaluate(`(async () => {
132+
try {
133+
const res = await fetch('/t/${id}.json?include_raw=true', { credentials: 'include' });
134+
let data = null;
135+
try {
136+
data = await res.json();
137+
} catch (_error) {
138+
data = null;
139+
}
140+
return {
141+
ok: res.ok,
142+
status: res.status,
143+
data,
144+
error: data === null ? 'Response is not valid JSON' : '',
145+
};
146+
} catch (error) {
147+
return {
148+
ok: false,
149+
error: error instanceof Error ? error.message : String(error),
150+
};
151+
}
152+
})()`) as FetchTopicResult | null;
153+
154+
if (!result) {
155+
throw new CommandExecutionError('linux.do returned an empty browser response');
156+
}
157+
158+
if (result.status === 401 || result.status === 403) {
159+
throw new AuthRequiredError(LINUX_DO_DOMAIN, 'linux.do requires an active signed-in browser session');
160+
}
161+
162+
if (result.error === 'Response is not valid JSON') {
163+
throw new AuthRequiredError(LINUX_DO_DOMAIN, 'linux.do requires an active signed-in browser session');
164+
}
165+
166+
if (!result.ok) {
167+
throw new CommandExecutionError(
168+
result.error || `linux.do request failed: HTTP ${result.status ?? 'unknown'}`,
169+
);
170+
}
171+
172+
if (result.error) {
173+
throw new CommandExecutionError(result.error, 'Please verify your linux.do session is still valid');
174+
}
175+
176+
return result.data;
177+
}
178+
179+
cli({
180+
site: 'linux-do',
181+
name: 'topic-content',
182+
description: 'Get the main topic body as Markdown',
183+
domain: LINUX_DO_DOMAIN,
184+
strategy: Strategy.COOKIE,
185+
browser: true,
186+
defaultFormat: 'plain',
187+
args: [
188+
{ name: 'id', positional: true, type: 'int', required: true, help: 'Topic ID' },
189+
],
190+
columns: ['content'],
191+
func: async (page: IPage, kwargs) => {
192+
const id = Number(kwargs.id);
193+
if (!Number.isInteger(id) || id <= 0) {
194+
throw new CommandExecutionError(`Invalid linux.do topic id: ${String(kwargs.id ?? '')}`);
195+
}
196+
197+
const payload = await fetchTopicPayload(page, id);
198+
return [extractTopicContent(payload, id)];
199+
},
200+
});
201+
202+
export const __test__ = {
203+
buildTopicMarkdownDocument,
204+
extractTopicContent,
205+
normalizeTopicPayload,
206+
toLocalTime,
207+
};

clis/linux-do/topic.yaml

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
site: linux-do
22
name: topic
3-
description: linux.do 帖子详情和回复(首页
3+
description: linux.do 帖子首页摘要和回复(首屏
44
domain: linux.do
55
strategy: cookie
66
browser: true
@@ -15,17 +15,12 @@ args:
1515
type: int
1616
default: 20
1717
description: Number of posts
18-
main_only:
19-
type: bool
20-
default: false
21-
description: Only return the main post body without truncation
2218

2319
pipeline:
2420
- navigate: https://linux.do
2521

2622
- evaluate: |
2723
(async () => {
28-
const mainOnly = ${{ args.main_only }};
2924
const toLocalTime = (utcStr) => {
3025
if (!utcStr) return '';
3126
const date = new Date(utcStr);
@@ -50,16 +45,6 @@ pipeline:
5045
.replace(/\s+/g, ' ')
5146
.trim();
5247
const posts = data?.post_stream?.posts || [];
53-
if (mainOnly) {
54-
const mainPost = posts.find(p => p.post_number === 1);
55-
if (!mainPost) return [];
56-
return [{
57-
author: mainPost.username || '',
58-
content: mainPost.cooked || '',
59-
likes: mainPost.like_count || 0,
60-
created_at: toLocalTime(mainPost.created_at),
61-
}];
62-
}
6348
return posts.slice(0, ${{ args.limit }}).map(p => ({
6449
author: p.username,
6550
content: strip(p.cooked).slice(0, 200),

docs/adapters/browser/linux-do.md

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
| `opencli linux-do tags` | List popular tags |
1212
| `opencli linux-do search <query>` | Search topics |
1313
| `opencli linux-do topic <id>` | View topic posts |
14+
| `opencli linux-do topic-content <id>` | Read the main topic body as Markdown |
1415
| `opencli linux-do user-topics <username>` | Topics created by a user |
1516
| `opencli linux-do user-posts <username>` | Replies posted by a user |
1617

@@ -147,19 +148,35 @@ Output columns: `rank`, `title`, `views`, `likes`, `replies`, `url`
147148

148149
## topic
149150

150-
View posts within a topic (first page).
151+
View summarized first-page posts within a topic.
151152

152153
```bash
153154
opencli linux-do topic 1234
154155
opencli linux-do topic 1234 --limit 50
155-
opencli linux-do topic 1234 --main_only -f json | jq -r '.[0].content'
156156
```
157157

158158
Notes:
159-
- `--main_only` returns only the main post row and keeps the body untruncated
159+
- `content` is a plain-text summary extracted from each first-page post
160+
- Each summary is truncated to 200 characters
161+
- Use `opencli linux-do topic-content <id>` for the full main post body in Markdown
160162

161163
Output columns: `author`, `content`, `likes`, `created_at`
162164

165+
## topic-content
166+
167+
Read the main topic body as Markdown.
168+
169+
```bash
170+
opencli linux-do topic-content 1234
171+
opencli linux-do topic-content 1234 -f json
172+
```
173+
174+
Notes:
175+
- Default output prints the Markdown body directly for copy/paste or piping into LLMs
176+
- Use `-f json` if you want a machine-readable wrapper
177+
178+
Output columns: `content`
179+
163180
## user-topics
164181

165182
List topics created by a user.

docs/adapters/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Run `opencli list` for the live registry.
2828
| **[jike](./browser/jike)** | `feed` `search` `post` `topic` `user` `create` `comment` `like` `repost` `notifications` | 🔐 Browser |
2929
| **[jimeng](./browser/jimeng)** | `generate` `history` | 🔐 Browser |
3030
| **[yollomi](./browser/yollomi)** | `generate` `video` `edit` `upload` `models` `remove-bg` `upscale` `face-swap` `restore` `try-on` `background` `object-remover` | 🔐 Browser |
31-
| **[linux-do](./browser/linux-do)** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `user-posts` `user-topics` | 🔐 Browser |
31+
| **[linux-do](./browser/linux-do)** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `topic-content` `user-posts` `user-topics` | 🔐 Browser |
3232
| **[chaoxing](./browser/chaoxing)** | `assignments` `exams` | 🔐 Browser |
3333
| **[grok](./browser/grok)** | `ask` | 🔐 Browser |
3434
| **[gemini](./browser/gemini)** | `new` `ask` `image` `deep-research` `deep-research-result` | 🔐 Browser |

skills/opencli-usage/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ Usage: `opencli <site> <command> [args] [--limit N] [-f json|yaml|md|csv|table]`
9999
| **web** | `read` — any URL to Markdown |
100100
| **weixin** | `download` — 公众号 article to Markdown |
101101
| **v2ex** (browser) | `daily` `me` `notifications` |
102-
| **linux-do** (browser) | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `user-posts` `user-topics` |
102+
| **linux-do** (browser) | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `topic-content` `user-posts` `user-topics` |
103103
| **bloomberg** (browser) | `news` — full article reader |
104104
| **grok** | `ask` |
105105
| **doubao** | `status` `new` `send` `read` `ask` `detail` `history` `meeting-summary` `meeting-transcript` |

0 commit comments

Comments
 (0)