|
1 | 1 | import React, { useState, useEffect, useRef } from "react"; |
2 | 2 | import styles from "./styles.module.css"; |
3 | | - |
4 | | -// Static selectors for content cleanup |
5 | | -const SELECTORS_TO_REMOVE = [ |
6 | | - ".theme-edit-this-page", |
7 | | - ".theme-last-updated", |
8 | | - ".pagination-nav", |
9 | | - ".theme-doc-breadcrumbs", |
10 | | - ".theme-doc-footer", |
11 | | - "button", |
12 | | - ".copy-code-button", |
13 | | - ".buttonGroup", |
14 | | - ".clean-btn", |
15 | | - ".theme-code-block-title", |
16 | | - ".line-number", |
17 | | -]; |
| 3 | +const { |
| 4 | + extractPageMarkdownFromDocument, |
| 5 | + getMarkdownRouteUrl, |
| 6 | +} = require("./htmlToMarkdown"); |
18 | 7 |
|
19 | 8 | // Utility function to merge custom styles with default classes |
20 | 9 | const mergeStyles = (defaultClassName, customStyleConfig = {}) => { |
@@ -49,7 +38,8 @@ const separatePositioningStyles = (styleObject = {}) => { |
49 | 38 |
|
50 | 39 | export default function CopyPageButton({ |
51 | 40 | customStyles = {}, |
52 | | - enabledActions = ['copy', 'view', 'chatgpt', 'claude', 'gemini'] |
| 41 | + enabledActions = ['copy', 'view', 'chatgpt', 'claude', 'gemini'], |
| 42 | + generateMarkdownRoutes = false |
53 | 43 | }) { |
54 | 44 | const [isOpen, setIsOpen] = useState(false); |
55 | 45 | const [pageContent, setPageContent] = useState(""); |
@@ -103,258 +93,8 @@ export default function CopyPageButton({ |
103 | 93 | } |
104 | 94 | }, []); |
105 | 95 |
|
106 | | - const convertToMarkdown = (element) => { |
107 | | - const cleanText = (text) => { |
108 | | - return text |
109 | | - .replace(/[\u200B-\u200D\uFEFF]/g, "") // Remove zero-width spaces |
110 | | - .replace(/\u00A0/g, " ") // Replace non-breaking spaces |
111 | | - .replace(/[\u2018\u2019]/g, "'") // Smart quotes |
112 | | - .replace(/[\u201C\u201D]/g, '"') |
113 | | - .replace(/​/g, "") // Clean encoding issues |
114 | | - .replace(/\s+/g, " ") // Normalize whitespace |
115 | | - .trim(); |
116 | | - }; |
117 | | - |
118 | | - const processNode = (node) => { |
119 | | - if (node.nodeType === Node.TEXT_NODE) { |
120 | | - return cleanText(node.textContent); |
121 | | - } |
122 | | - |
123 | | - if (node.nodeType === Node.ELEMENT_NODE) { |
124 | | - const tag = node.tagName.toLowerCase(); |
125 | | - const childResults = Array.from(node.childNodes).map((child) => |
126 | | - processNode(child) |
127 | | - ); |
128 | | - |
129 | | - // Join child results with intelligent spacing |
130 | | - let children = ""; |
131 | | - for (let i = 0; i < childResults.length; i++) { |
132 | | - const current = childResults[i]; |
133 | | - const previous = i > 0 ? childResults[i - 1] : ""; |
134 | | - |
135 | | - if (current) { |
136 | | - if ( |
137 | | - previous && |
138 | | - !previous.match(/[\s\n]$/) && |
139 | | - !current.match(/^[\s\n]/) && |
140 | | - previous.trim() && |
141 | | - current.trim() |
142 | | - ) { |
143 | | - children += " "; |
144 | | - } |
145 | | - children += current; |
146 | | - } |
147 | | - } |
148 | | - |
149 | | - switch (tag) { |
150 | | - case "h1": |
151 | | - return `\n# ${children.trim()}\n\n`; |
152 | | - case "h2": |
153 | | - return `\n## ${children.trim()}\n\n`; |
154 | | - case "h3": |
155 | | - return `\n### ${children.trim()}\n\n`; |
156 | | - case "h4": |
157 | | - return `\n#### ${children.trim()}\n\n`; |
158 | | - case "h5": |
159 | | - return `\n##### ${children.trim()}\n\n`; |
160 | | - case "h6": |
161 | | - return `\n###### ${children.trim()}\n\n`; |
162 | | - case "p": |
163 | | - return children.trim() ? `${children.trim()}\n\n` : "\n"; |
164 | | - case "strong": |
165 | | - case "b": |
166 | | - return `**${children}**`; |
167 | | - case "em": |
168 | | - case "i": |
169 | | - return `*${children}*`; |
170 | | - case "code": |
171 | | - if (node.parentElement?.tagName.toLowerCase() === "pre") { |
172 | | - return children; |
173 | | - } |
174 | | - const cleanInlineCode = children |
175 | | - .replace(/[\u200B-\u200D\uFEFF]/g, "") // Remove zero-width spaces |
176 | | - .replace(/\u00A0/g, " ") // Replace non-breaking spaces |
177 | | - .trim(); |
178 | | - return `\`${cleanInlineCode}\``; |
179 | | - case "pre": |
180 | | - const codeElement = node.querySelector("code"); |
181 | | - if (codeElement) { |
182 | | - const language = |
183 | | - (codeElement.className?.match(/language-(\w+)/) || |
184 | | - node.className?.match(/language-(\w+)/) || |
185 | | - codeElement.className?.match(/hljs-(\w+)/) || |
186 | | - codeElement.className?.match(/prism-(\w+)/) || |
187 | | - [])[1] || ""; |
188 | | - |
189 | | - let codeContent = ""; |
190 | | - |
191 | | - try { |
192 | | - // Method 1: Try to get content from data attributes (some themes store original content) |
193 | | - const originalContent = |
194 | | - codeElement.getAttribute("data-code") || |
195 | | - node.getAttribute("data-code") || |
196 | | - codeElement.getAttribute("data-raw"); |
197 | | - |
198 | | - if (originalContent) { |
199 | | - codeContent = originalContent; |
200 | | - } else { |
201 | | - // Method 2: Look for individual code lines in specific containers |
202 | | - const codeLines = codeElement.querySelectorAll( |
203 | | - "span[data-line], .token-line, .code-line, .highlight-line" |
204 | | - ); |
205 | | - if (codeLines.length > 0) { |
206 | | - codeContent = Array.from(codeLines) |
207 | | - .map((lineElement) => { |
208 | | - return lineElement?.textContent || ""; |
209 | | - }) |
210 | | - .join("\n"); |
211 | | - } else { |
212 | | - // Method 3: Look for div-based line structure |
213 | | - const codeLineDivs = codeElement.querySelectorAll("div"); |
214 | | - if (codeLineDivs.length > 0) { |
215 | | - codeContent = Array.from(codeLineDivs) |
216 | | - .map((lineDiv) => { |
217 | | - // Skip if this looks like a line number container |
218 | | - if ( |
219 | | - lineDiv.className?.includes("codeLineNumber") || |
220 | | - lineDiv.className?.includes("LineNumber") || |
221 | | - lineDiv.className?.includes("line-number") || |
222 | | - lineDiv.style?.userSelect === "none" |
223 | | - ) { |
224 | | - return null; |
225 | | - } |
226 | | - return lineDiv?.textContent || ""; |
227 | | - }) |
228 | | - .filter((line) => line !== null) |
229 | | - .join("\n"); |
230 | | - } else { |
231 | | - // Method 4: Direct text extraction with cleanup |
232 | | - let rawText = codeElement.textContent || ""; |
233 | | - |
234 | | - // Remove line numbers at the start of lines (common pattern: "1 ", "12 ", etc.) |
235 | | - rawText = rawText.replace(/^\d+\s+/gm, ""); |
236 | | - |
237 | | - // Remove copy button text and other UI elements |
238 | | - rawText = rawText.replace(/^Copy$/gm, ""); |
239 | | - rawText = rawText.replace(/^Copied!$/gm, ""); |
240 | | - rawText = rawText.replace( |
241 | | - /^\s*Copy to clipboard\s*$/gm, |
242 | | - "" |
243 | | - ); |
244 | | - |
245 | | - codeContent = rawText; |
246 | | - } |
247 | | - } |
248 | | - } |
249 | | - |
250 | | - // Final cleanup |
251 | | - codeContent = codeContent |
252 | | - .replace(/[\u200B-\u200D\uFEFF]/g, "") // Remove zero-width spaces |
253 | | - .replace(/\u00A0/g, " ") // Replace non-breaking spaces |
254 | | - .trim(); |
255 | | - |
256 | | - // Remove empty lines at start and end |
257 | | - codeContent = codeContent.replace(/^\n+|\n+$/g, ""); |
258 | | - } catch (error) { |
259 | | - // Fallback to simple text extraction if anything fails |
260 | | - codeContent = codeElement.textContent || ""; |
261 | | - } |
262 | | - |
263 | | - return `\n\`\`\`${language}\n${codeContent}\n\`\`\`\n\n`; |
264 | | - } |
265 | | - return `\n\`\`\`\n${children}\n\`\`\`\n\n`; |
266 | | - case "ul": |
267 | | - return `\n${children}`; |
268 | | - case "ol": |
269 | | - const items = Array.from(node.querySelectorAll("li")); |
270 | | - return ( |
271 | | - "\n" + |
272 | | - items |
273 | | - .map( |
274 | | - (item, index) => |
275 | | - `${index + 1}. ${processNode(item) |
276 | | - .replace(/^- /, "") |
277 | | - .trim()}\n` |
278 | | - ) |
279 | | - .join("") |
280 | | - ); |
281 | | - case "li": |
282 | | - return `- ${children.trim()}\n`; |
283 | | - case "a": |
284 | | - const href = node.getAttribute("href"); |
285 | | - if (href && !href.startsWith("#") && children.trim()) { |
286 | | - return `[${children.trim()}](${href})`; |
287 | | - } |
288 | | - return children; |
289 | | - case "br": |
290 | | - return "\n"; |
291 | | - case "blockquote": |
292 | | - return `\n> ${children.trim()}\n\n`; |
293 | | - case "table": |
294 | | - return `\n${children}\n`; |
295 | | - case "tr": |
296 | | - return `${children}\n`; |
297 | | - case "th": |
298 | | - case "td": |
299 | | - return `| ${children.trim()} `; |
300 | | - case "img": |
301 | | - const src = node.getAttribute("src"); |
302 | | - const alt = node.getAttribute("alt") || ""; |
303 | | - return src ? `` : ""; |
304 | | - case "div": |
305 | | - case "section": |
306 | | - case "article": |
307 | | - // Handle admonitions |
308 | | - if (node.classList?.contains("admonition")) { |
309 | | - const type = |
310 | | - Array.from(node.classList) |
311 | | - .find((cls) => cls.startsWith("alert--")) |
312 | | - ?.replace("alert--", "") || "note"; |
313 | | - return `\n> **${type.toUpperCase()}**: ${children.trim()}\n\n`; |
314 | | - } |
315 | | - return children + "\n"; |
316 | | - default: |
317 | | - return children; |
318 | | - } |
319 | | - } |
320 | | - |
321 | | - return ""; |
322 | | - }; |
323 | | - |
324 | | - return processNode(element) |
325 | | - .replace(/\n{3,}/g, "\n\n") // Limit multiple newlines |
326 | | - .replace(/^\n+|\n+$/g, "") // Trim newlines |
327 | | - .trim(); |
328 | | - }; |
329 | | - |
330 | 96 | const extractPageContent = () => { |
331 | | - const mainContent = |
332 | | - document.querySelector("main article") || |
333 | | - document.querySelector("main .markdown"); |
334 | | - |
335 | | - if (!mainContent) { |
336 | | - const alternatives = document.querySelector("main") || document.querySelector("article") || document.querySelector(".main-wrapper"); |
337 | | - if (!alternatives) return ""; |
338 | | - } |
339 | | - |
340 | | - const targetElement = mainContent || document.querySelector("main") || document.querySelector("article"); |
341 | | - const clone = targetElement.cloneNode(true); |
342 | | - |
343 | | - // Remove unwanted elements |
344 | | - SELECTORS_TO_REMOVE.forEach((selector) => { |
345 | | - clone.querySelectorAll(selector).forEach((el) => el.remove()); |
346 | | - }); |
347 | | - |
348 | | - // Extract title from first H1 and remove it from content |
349 | | - const firstH1 = clone.querySelector("h1"); |
350 | | - const title = firstH1?.textContent.trim() || "Documentation Page"; |
351 | | - if (firstH1) { |
352 | | - firstH1.remove(); |
353 | | - } |
354 | | - |
355 | | - const content = convertToMarkdown(clone); |
356 | | - const currentUrl = window.location.href; |
357 | | - return `# ${title}\n\nURL: ${currentUrl}\n\n${content}`; |
| 97 | + return extractPageMarkdownFromDocument(document, window.location.href); |
358 | 98 | }; |
359 | 99 |
|
360 | 100 | const copyToClipboard = async (text) => { |
@@ -388,7 +128,9 @@ export default function CopyPageButton({ |
388 | 128 |
|
389 | 129 | const openInAI = (baseUrl, queryParam = 'q', extraParams = {}) => { |
390 | 130 | try { |
391 | | - const currentUrl = window.location.href; |
| 131 | + const currentUrl = generateMarkdownRoutes |
| 132 | + ? getMarkdownRouteUrl(window.location.href) |
| 133 | + : window.location.href; |
392 | 134 | const prompt = `Please read and explain this documentation page: ${currentUrl} |
393 | 135 |
|
394 | 136 | Please provide a clear summary and help me understand the key concepts covered in this documentation.`; |
|
0 commit comments