Skip to content

Commit 7b4e826

Browse files
committed
Implement support for pasting URLs into the prompt field to automatically fetch and convert web page content to Markdown context
1 parent a927939 commit 7b4e826

24 files changed

Lines changed: 588 additions & 17 deletions

apps/editor/package.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1875,12 +1875,15 @@
18751875
"@babel/preset-env": "^7.27.2",
18761876
"@babel/preset-react": "^7.27.1",
18771877
"@babel/preset-typescript": "^7.27.1",
1878+
"@types/dompurify": "^3.0.5",
18781879
"@types/he": "^1.2.3",
1880+
"@types/jsdom": "^21.1.7",
18791881
"@types/node": "20.x",
18801882
"@types/react": "^18.2.60",
18811883
"@types/react-dom": "^18.2.19",
18821884
"@types/vscode": "1.92.0",
18831885
"@types/ws": "^8.18.1",
1886+
"@types/turndown": "^5.0.5",
18841887
"@typescript-eslint/eslint-plugin": "^7.18.0",
18851888
"@typescript-eslint/parser": "^7.18.0",
18861889
"babel-loader": "^9.2.1",
@@ -1900,16 +1903,21 @@
19001903
"webpack-cli": "^6.0.1"
19011904
},
19021905
"dependencies": {
1906+
"@joplin/turndown": "^4.0.81",
1907+
"@joplin/turndown-plugin-gfm": "^1.0.63",
1908+
"@mozilla/readability": "^0.6.0",
19031909
"@svgr/webpack": "^8.1.0",
19041910
"@vscode/codicons": "^0.0.43",
19051911
"axios": "^1.10.0",
19061912
"bufferutil": "^4.0.9",
19071913
"classnames": "^2.5.1",
19081914
"dayjs": "^1.11.10",
19091915
"diff": "^8.0.2",
1916+
"dompurify": "^3.2.7",
19101917
"glob": "^11.0.3",
19111918
"he": "^1.2.0",
19121919
"ignore": "^6.0.2",
1920+
"jsdom": "^24.0.0",
19131921
"marked": "^15.0.12",
19141922
"path-browserify": "^1.0.1",
19151923
"react": "^18.2.0",

apps/editor/src/env.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
declare module '@joplin/turndown'
2+
declare module '@joplin/turndown-plugin-gfm'

apps/editor/src/views/panel/backend/message-handlers/handle-code-completion.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { replace_saved_context_symbol } from '@/views/panel/backend/utils/replac
2525
import { replace_skill_symbol } from '../utils/replace-skill-symbol'
2626
import { replace_image_symbol } from '../utils/replace-image-symbol'
2727
import { replace_document_symbol } from '../utils/replace-document-symbol'
28+
import { replace_website_symbol } from '../utils/replace-website-symbol'
2829

2930
const get_code_completion_config = async (
3031
api_providers_manager: ModelProvidersManager,
@@ -394,6 +395,12 @@ export const handle_code_completion = async (
394395
})
395396
}
396397

398+
if (processed_completion_instructions.includes('#Website(')) {
399+
processed_completion_instructions = await replace_website_symbol({
400+
instruction: processed_completion_instructions
401+
})
402+
}
403+
397404
const files_collector = new FilesCollector(
398405
panel_provider.workspace_provider,
399406
panel_provider.open_editors_provider

apps/editor/src/views/panel/backend/message-handlers/handle-copy-prompt.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { replace_saved_context_symbol } from '@/views/panel/backend/utils/replac
1111
import { replace_skill_symbol } from '@/views/panel/backend/utils/replace-skill-symbol'
1212
import { replace_image_symbol } from '@/views/panel/backend/utils/replace-image-symbol'
1313
import { replace_document_symbol } from '../utils/replace-document-symbol'
14+
import { replace_website_symbol } from '../utils/replace-website-symbol'
1415
import {
1516
code_at_cursor_instructions_for_panel,
1617
prune_context_instructions_prefix,
@@ -155,6 +156,12 @@ export const handle_copy_prompt = async (params: {
155156
})
156157
}
157158

159+
if (processed_completion_instructions.includes('#Website(')) {
160+
processed_completion_instructions = await replace_website_symbol({
161+
instruction: processed_completion_instructions
162+
})
163+
}
164+
158165
const missing_text_tag = processed_completion_instructions
159166
? `<missing_text>${processed_completion_instructions}</missing_text>`
160167
: '<missing_text>'
@@ -233,6 +240,12 @@ export const handle_copy_prompt = async (params: {
233240
})
234241
}
235242

243+
if (processed_instructions.includes('#Website(')) {
244+
processed_instructions = await replace_website_symbol({
245+
instruction: processed_instructions
246+
})
247+
}
248+
236249
let system_instructions_xml = ''
237250

238251
if (params.panel_provider.web_prompt_type == 'edit-context') {

apps/editor/src/views/panel/backend/message-handlers/handle-edit-context.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { PanelProvider } from '@/views/panel/backend/panel-provider'
2525
import { replace_skill_symbol } from '@/views/panel/backend/utils/replace-skill-symbol'
2626
import { replace_image_symbol } from '@/views/panel/backend/utils/replace-image-symbol'
2727
import { replace_document_symbol } from '../utils/replace-document-symbol'
28+
import { replace_website_symbol } from '../utils/replace-website-symbol'
2829
import { apply_reasoning_effort } from '@/utils/apply-reasoning-effort'
2930
import { EditContextMessage } from '@/views/panel/types/messages'
3031
import { dictionary } from '@shared/constants/dictionary'
@@ -379,6 +380,12 @@ export const handle_edit_context = async (
379380
})
380381
}
381382

383+
if (processed_instructions.includes('#Website(')) {
384+
processed_instructions = await replace_website_symbol({
385+
instruction: processed_instructions
386+
})
387+
}
388+
382389
const is_prune_context = panel_provider.api_prompt_type == 'prune-context'
383390
const collected_files = await files_collector.collect_files({
384391
compact: is_prune_context
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import { PasteUrlMessage } from '../../types/messages'
2+
import { PanelProvider } from '../panel-provider'
3+
import axios from 'axios'
4+
import { JSDOM } from 'jsdom'
5+
import { Readability, isProbablyReaderable } from '@mozilla/readability'
6+
import createDOMPurify from 'dompurify'
7+
import TurndownServiceJoplin from '@joplin/turndown'
8+
import * as turndownPluginGfm from '@joplin/turndown-plugin-gfm'
9+
import type TurndownService from 'turndown'
10+
import * as fs from 'fs'
11+
import * as os from 'os'
12+
import * as path from 'path'
13+
import * as crypto from 'crypto'
14+
15+
const remove_markdown_images = (text: string) => {
16+
const without_images = text.replace(/!\[([^\]]*)\]\(([^)]*)\)/g, '')
17+
return without_images.replace(/\n{3,}/g, '\n\n')
18+
}
19+
20+
const create_turndown_service = () => {
21+
const turndown_service: TurndownService = new TurndownServiceJoplin({
22+
codeBlockStyle: 'fenced'
23+
})
24+
turndown_service.use(turndownPluginGfm.gfm)
25+
turndown_service.addRule('fencedCodeBlock', {
26+
filter: (node: any, options: any) => {
27+
return (
28+
options.codeBlockStyle == 'fenced' &&
29+
node.nodeName == 'PRE' &&
30+
node.querySelector('code')
31+
)
32+
},
33+
replacement: (_: any, node: any, options: any) => {
34+
const element = node as HTMLElement
35+
const language = (element
36+
.querySelector('code')
37+
?.className.match(/language-(\S+)/) || [null, ''])[1]
38+
39+
return (
40+
'\n\n' +
41+
options.fence +
42+
language +
43+
'\n' +
44+
element.textContent +
45+
'\n' +
46+
options.fence +
47+
'\n\n'
48+
)
49+
}
50+
})
51+
// Convert math blocks to markdown
52+
turndown_service.addRule('multiplemath', {
53+
filter(node) {
54+
return (
55+
node.nodeName == 'SPAN' &&
56+
(node as HTMLElement).classList.contains('katex-display')
57+
) // Check if it's a display math block that centers equation
58+
},
59+
replacement(_, node) {
60+
// "<annotation>" element holds expression string, right for markdown
61+
const annotation = (node as HTMLElement).querySelector(
62+
'annotation'
63+
)?.textContent
64+
if (!annotation) return ''
65+
return `$$\n${annotation}\n$$`
66+
}
67+
})
68+
turndown_service.addRule('multiplemath', {
69+
filter(node) {
70+
return (
71+
node.nodeName == 'SPAN' &&
72+
(node as HTMLElement).classList.contains('katex')
73+
)
74+
},
75+
replacement(_, node) {
76+
const is_block =
77+
node.parentNode?.nodeName == 'P' &&
78+
node.parentNode.childNodes.length == 1
79+
const annotation = (node as HTMLElement).querySelector(
80+
'annotation'
81+
)?.textContent
82+
if (!annotation) return ''
83+
return is_block ? `$$ ${annotation} $$` : `$${annotation}$`
84+
}
85+
})
86+
turndown_service.addRule('stripElements', {
87+
filter: ['figure', 'picture', 'sup'],
88+
replacement: () => ''
89+
})
90+
return turndown_service
91+
}
92+
93+
export const handle_paste_url = async (
94+
panel_provider: PanelProvider,
95+
message: PasteUrlMessage
96+
) => {
97+
try {
98+
const url = message.url
99+
const hash = crypto.createHash('md5').update(url).digest('hex')
100+
const filename = `cwc-website-${hash}.txt`
101+
const file_path = path.join(os.tmpdir(), filename)
102+
103+
if (fs.existsSync(file_path)) {
104+
panel_provider.add_text_at_cursor_position(`#Website(${url})`)
105+
return
106+
}
107+
108+
const response = await axios.get(url, {
109+
headers: {
110+
'User-Agent':
111+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36'
112+
},
113+
timeout: 5000
114+
})
115+
116+
if (response.status == 200 && typeof response.data == 'string') {
117+
const html = response.data
118+
const window = new JSDOM('').window
119+
const DOMPurify = createDOMPurify(window as any)
120+
const clean_html = DOMPurify.sanitize(html)
121+
const dom = new JSDOM(clean_html, { url })
122+
const doc = dom.window.document
123+
124+
if (isProbablyReaderable(doc)) {
125+
const reader = new Readability(doc, { keepClasses: true })
126+
const article = reader.parse()
127+
128+
if (article && article.content) {
129+
const turndown_service = create_turndown_service()
130+
const article_dom = new JSDOM(article.content)
131+
let content = turndown_service.turndown(
132+
article_dom.window.document.body
133+
)
134+
content = remove_markdown_images(content)
135+
136+
if (content && content.trim().length > 0) {
137+
if (article.title) {
138+
content = `# ${article.title}\n\n${content}`
139+
}
140+
141+
await fs.promises.writeFile(file_path, content, 'utf-8')
142+
panel_provider.add_text_at_cursor_position(`#Website(${url})`)
143+
return
144+
}
145+
}
146+
}
147+
148+
panel_provider.add_text_at_cursor_position(url)
149+
} else {
150+
panel_provider.add_text_at_cursor_position(url)
151+
}
152+
} catch (error) {
153+
panel_provider.add_text_at_cursor_position(message.url)
154+
}
155+
}

apps/editor/src/views/panel/backend/message-handlers/handle-preview-preset.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import { Preset } from '@shared/types/preset'
1313
import { replace_skill_symbol } from '@/views/panel/backend/utils/replace-skill-symbol'
1414
import { replace_image_symbol } from '@/views/panel/backend/utils/replace-image-symbol'
1515
import { replace_document_symbol } from '../utils/replace-document-symbol'
16+
import { replace_website_symbol } from '../utils/replace-website-symbol'
1617
import { apply_preset_affixes_to_instruction } from '@/utils/apply-preset-affixes'
1718
import { dictionary } from '@shared/constants/dictionary'
1819
import {
@@ -135,6 +136,12 @@ export const handle_preview_preset = async (
135136
})
136137
}
137138

139+
if (processed_completion_instructions.includes('#Website(')) {
140+
processed_completion_instructions = await replace_website_symbol({
141+
instruction: processed_completion_instructions
142+
})
143+
}
144+
138145
const missing_text_tag = processed_completion_instructions
139146
? `<missing_text>${processed_completion_instructions}</missing_text>`
140147
: '<missing_text>'
@@ -222,6 +229,12 @@ export const handle_preview_preset = async (
222229
})
223230
}
224231

232+
if (processed_instructions.includes('#Website(')) {
233+
processed_instructions = await replace_website_symbol({
234+
instruction: processed_instructions
235+
})
236+
}
237+
225238
let system_instructions_xml = ''
226239
if (panel_provider.web_prompt_type == 'edit-context') {
227240
const config = vscode.workspace.getConfiguration('codeWebChat')

apps/editor/src/views/panel/backend/message-handlers/handle-prune-context.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import { PanelProvider } from '@/views/panel/backend/panel-provider'
2121
import { replace_skill_symbol } from '@/views/panel/backend/utils/replace-skill-symbol'
2222
import { replace_image_symbol } from '@/views/panel/backend/utils/replace-image-symbol'
2323
import { replace_document_symbol } from '@/views/panel/backend/utils/replace-document-symbol'
24+
import { replace_website_symbol } from '@/views/panel/backend/utils/replace-website-symbol'
2425
import { apply_reasoning_effort } from '@/utils/apply-reasoning-effort'
2526
import { PruneContextMessage } from '@/views/panel/types/messages'
2627
import { dictionary } from '@shared/constants/dictionary'
@@ -358,6 +359,12 @@ export const handle_prune_context = async (
358359
})
359360
}
360361

362+
if (processed_instructions.includes('#Website(')) {
363+
processed_instructions = await replace_website_symbol({
364+
instruction: processed_instructions
365+
})
366+
}
367+
361368
const collected_files = await files_collector.collect_files({
362369
compact: true
363370
})

apps/editor/src/views/panel/backend/message-handlers/handle-send-to-browser.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
import { replace_skill_symbol } from '@/views/panel/backend/utils/replace-skill-symbol'
1313
import { replace_image_symbol } from '@/views/panel/backend/utils/replace-image-symbol'
1414
import { replace_document_symbol } from '../utils/replace-document-symbol'
15+
import { replace_website_symbol } from '../utils/replace-website-symbol'
1516
import {
1617
code_at_cursor_instructions_for_panel,
1718
prune_context_instructions_prefix,
@@ -182,6 +183,12 @@ export const handle_send_to_browser = async (params: {
182183
})
183184
}
184185

186+
if (processed_completion_instructions.includes('#Website(')) {
187+
processed_completion_instructions = await replace_website_symbol({
188+
instruction: processed_completion_instructions
189+
})
190+
}
191+
185192
const context_text = await files_collector.collect_files({
186193
exclude_path: active_path
187194
})
@@ -297,6 +304,12 @@ export const handle_send_to_browser = async (params: {
297304
})
298305
}
299306

307+
if (processed_instructions.includes('#Website(')) {
308+
processed_instructions = await replace_website_symbol({
309+
instruction: processed_instructions
310+
})
311+
}
312+
300313
let system_instructions_xml = ''
301314
if (params.panel_provider.web_prompt_type == 'edit-context') {
302315
const config = vscode.workspace.getConfiguration('codeWebChat')

apps/editor/src/views/panel/backend/message-handlers/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,4 @@ export * from './handle-save-prompt-image'
6565
export * from './handle-open-prompt-image'
6666
export * from './handle-save-prompt-document'
6767
export * from './handle-open-prompt-document'
68+
export * from './handle-paste-url'

0 commit comments

Comments
 (0)