Skip to content

Commit 45829d2

Browse files
bingryanclaude
andauthored
fix: preserve code blocks in embedded markdown files (#122)
Previously, embedded content was extracted from rendered HTML which converted code blocks (like mermaid) to their rendered form. This made it impossible to preserve the original code block syntax. Now reads raw markdown directly from source files, preserving all code blocks in their original format. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 375f99c commit 45829d2

1 file changed

Lines changed: 142 additions & 21 deletions

File tree

src/utils.ts

Lines changed: 142 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import * as path from "path";
22
import * as fs from "fs";
33
import md5 from "md5";
4-
import { TAbstractFile, TFile, TFolder, htmlToMarkdown, CachedMetadata } from "obsidian";
4+
import { TAbstractFile, TFile, TFolder, CachedMetadata } from "obsidian";
55

66
import {
77
ATTACHMENT_URL_REGEXP,
@@ -449,38 +449,159 @@ export async function tryCopyMarkdown(
449449
}
450450
}
451451

452+
/**
453+
* Extract heading content from a file (from heading to next heading of same or higher level)
454+
*/
455+
async function getHeadingContent(
456+
plugin: MarkdownExportPlugin,
457+
filePath: string,
458+
heading: string
459+
): Promise<string | null> {
460+
try {
461+
const file = plugin.app.vault.getAbstractFileByPath(filePath);
462+
if (!(file instanceof TFile)) {
463+
return null;
464+
}
465+
466+
const content = await plugin.app.vault.cachedRead(file);
467+
const lines = content.split("\n");
468+
469+
// Normalize heading for comparison (remove # and spaces, lowercase)
470+
const normalizedTarget = heading.toLowerCase().trim();
471+
472+
// Find the heading line
473+
let startIndex = -1;
474+
let headingLevel = 0;
475+
476+
for (let i = 0; i < lines.length; i++) {
477+
const headingMatch = lines[i].match(/^(#{1,6})\s+(.+)$/);
478+
if (headingMatch) {
479+
const level = headingMatch[1].length;
480+
const text = headingMatch[2].toLowerCase().trim();
481+
482+
if (text === normalizedTarget) {
483+
startIndex = i;
484+
headingLevel = level;
485+
break;
486+
}
487+
}
488+
}
489+
490+
if (startIndex === -1) {
491+
return null;
492+
}
493+
494+
// Find the end (next heading of same or higher level, or end of file)
495+
let endIndex = lines.length;
496+
for (let i = startIndex + 1; i < lines.length; i++) {
497+
const nextHeadingMatch = lines[i].match(/^(#{1,6})\s/);
498+
if (nextHeadingMatch) {
499+
const nextLevel = nextHeadingMatch[1].length;
500+
if (nextLevel <= headingLevel) {
501+
endIndex = i;
502+
break;
503+
}
504+
}
505+
}
506+
507+
const headingLines = lines.slice(startIndex, endIndex);
508+
return headingLines.join("\n");
509+
} catch (error) {
510+
console.error("Error getting heading content:", error);
511+
return null;
512+
}
513+
}
514+
515+
/**
516+
* Get embed content by reading raw markdown from the source file
517+
* This preserves code blocks (like mermaid) in their original format
518+
*/
519+
async function getEmbedContentFromSource(
520+
plugin: MarkdownExportPlugin,
521+
embedLink: string,
522+
currentPath: string
523+
): Promise<string | null> {
524+
const parsed = parseEmbedLink(embedLink, currentPath);
525+
526+
if (!parsed.filePath) {
527+
return null;
528+
}
529+
530+
try {
531+
const file = plugin.app.vault.getAbstractFileByPath(parsed.filePath);
532+
if (!(file instanceof TFile)) {
533+
return null;
534+
}
535+
536+
let content = await plugin.app.vault.cachedRead(file);
537+
538+
// Extract specific section if needed
539+
if (parsed.blockId) {
540+
const blockContent = await getBlockContent(
541+
plugin,
542+
parsed.filePath,
543+
parsed.blockId
544+
);
545+
// Remove YAML header if configured (in case block is at start of file)
546+
if (plugin.settings.removeYamlHeader && blockContent) {
547+
return blockContent.replace(EMBED_METADATA_REGEXP, "");
548+
}
549+
return blockContent;
550+
}
551+
552+
if (parsed.heading) {
553+
const headingContent = await getHeadingContent(
554+
plugin,
555+
parsed.filePath,
556+
parsed.heading
557+
);
558+
// Remove YAML header if configured (in case heading is at start of file)
559+
if (plugin.settings.removeYamlHeader && headingContent) {
560+
return headingContent.replace(EMBED_METADATA_REGEXP, "");
561+
}
562+
return headingContent;
563+
}
564+
565+
// Remove YAML header if configured for full file content
566+
if (plugin.settings.removeYamlHeader) {
567+
content = content.replace(EMBED_METADATA_REGEXP, "");
568+
}
569+
570+
return content;
571+
} catch (error) {
572+
console.error("Error getting embed content from source:", error);
573+
return null;
574+
}
575+
}
576+
452577
export async function getEmbedMap(
453578
plugin: MarkdownExportPlugin,
454579
content: string,
455580
path: string
456581
) {
457-
// key:link url
458-
// value: embed content parse from html document
459582
const embedMap = new Map();
460-
const embedList = Array.from(
461-
document.documentElement.getElementsByClassName("internal-embed")
462-
);
583+
const embeds = await getEmbeds(content);
463584

464-
Array.from(embedList).forEach((el) => {
465-
// markdown-embed-content markdown-embed-page
466-
const embedContentHtml = el.getElementsByClassName(
467-
"markdown-embed-content"
468-
)[0];
585+
for (const embedMatch of embeds) {
586+
const embedLink = embedMatch[1];
469587

470-
if (embedContentHtml) {
471-
let embedValue = htmlToMarkdown(embedContentHtml.innerHTML);
472-
if (plugin.settings.removeYamlHeader) {
473-
embedValue = embedValue.replace(EMBED_METADATA_REGEXP, "");
474-
}
475-
embedValue =
588+
// Try to get content from raw markdown source
589+
const rawContent = await getEmbedContentFromSource(
590+
plugin,
591+
embedLink,
592+
path
593+
);
594+
595+
if (rawContent !== null) {
596+
// Format as quote block
597+
const embedValue =
476598
"> " +
477-
(embedValue as string)
599+
rawContent
478600
.replaceAll("# \n\n", "# ")
479601
.replaceAll("\n", "\n> ");
480-
const embedKey = el.getAttribute("src");
481-
embedMap.set(embedKey, embedValue);
602+
embedMap.set(embedLink, embedValue);
482603
}
483-
});
604+
}
484605

485606
return embedMap;
486607
}

0 commit comments

Comments
 (0)