Skip to content

Commit 1c720f2

Browse files
authored
fix(markdown): stable round-trip for tables, captions, and audio (#2720)
1 parent 531ea32 commit 1c720f2

22 files changed

Lines changed: 936 additions & 118 deletions

File tree

packages/core/src/api/exporters/markdown/htmlToMarkdown.ts

Lines changed: 81 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -520,8 +520,10 @@ function serializeVideo(el: HTMLElement, ctx: SerializeContext): string {
520520
function serializeAudio(el: HTMLElement, ctx: SerializeContext): string {
521521
const src = el.getAttribute("src") || "";
522522
if (!src) {return "\n\n";}
523-
// Audio has no visible representation in markdown; output as link with empty text
524-
return ctx.indent + `[](${src})\n\n`;
523+
// Audio has no markdown syntax, so emit raw HTML. The markdown parser
524+
// passes <audio> blocks through verbatim and BlockNote's audio block parser
525+
// recognizes them, giving a clean round-trip.
526+
return ctx.indent + `<audio src="${escapeHtmlAttr(src)}" controls></audio>\n\n`;
525527
}
526528

527529
function serializeEmbed(el: HTMLElement, ctx: SerializeContext): string {
@@ -531,41 +533,97 @@ function serializeEmbed(el: HTMLElement, ctx: SerializeContext): string {
531533
}
532534

533535
function serializeFigure(el: HTMLElement, ctx: SerializeContext): string {
534-
let result = "";
535-
536-
// Find the media element
537536
const img = el.querySelector("img");
538537
const video = el.querySelector("video");
539538
const audio = el.querySelector("audio");
540539
const link = el.querySelector("a");
541540

541+
const figcaption = el.querySelector("figcaption");
542+
const captionText = figcaption?.textContent?.trim() || "";
543+
542544
if (img) {
543-
const src = img.getAttribute("src") || "";
544-
const alt = img.getAttribute("alt") || "";
545-
result += ctx.indent + `![${alt}](${src})\n\n`;
546-
} else if (video) {
545+
return serializeMediaFigure(
546+
"img",
547+
img.getAttribute("src") || "",
548+
img.getAttribute("alt") || "",
549+
captionText,
550+
ctx,
551+
);
552+
}
553+
if (video) {
547554
const src =
548555
video.getAttribute("src") || video.getAttribute("data-url") || "";
549556
const name =
550557
video.getAttribute("data-name") || video.getAttribute("title") || "";
551-
result += ctx.indent + `![${name}](${src})\n\n`;
552-
} else if (audio) {
553-
const src = audio.getAttribute("src") || "";
554-
result += ctx.indent + `[](${src})\n\n`;
555-
} else if (link) {
556-
result += serializeBlockLink(link as HTMLElement, ctx);
558+
return serializeMediaFigure("video", src, name, captionText, ctx);
559+
}
560+
if (audio) {
561+
return serializeMediaFigure(
562+
"audio",
563+
audio.getAttribute("src") || "",
564+
"",
565+
captionText,
566+
ctx,
567+
);
568+
}
569+
if (link) {
570+
return serializeBlockLink(link as HTMLElement, ctx);
557571
}
572+
return "";
573+
}
558574

559-
// Caption
560-
const figcaption = el.querySelector("figcaption");
561-
if (figcaption) {
562-
const caption = figcaption.textContent?.trim() || "";
563-
if (caption) {
564-
result += ctx.indent + caption + "\n\n";
565-
}
575+
function serializeMediaFigure(
576+
kind: "img" | "video" | "audio",
577+
src: string,
578+
descriptor: string,
579+
captionText: string,
580+
ctx: SerializeContext,
581+
): string {
582+
if (!src) {return "";}
583+
584+
// No caption + has a markdown shorthand → use it.
585+
if (!captionText && kind !== "audio") {
586+
return ctx.indent + `![${descriptor}](${src})\n\n`;
566587
}
567588

568-
return result;
589+
// The descriptor (alt / data-name) is dropped when it duplicates the
590+
// caption text; otherwise on round-trip both `name` and `caption` would
591+
// get set to the same string (BlockNote's HTML exporter writes alt =
592+
// name || caption, so a caption-only image has alt === figcaption text).
593+
const showDescriptor = descriptor && descriptor !== captionText;
594+
const descAttr =
595+
!showDescriptor
596+
? ""
597+
: kind === "img"
598+
? ` alt="${escapeHtmlAttr(descriptor)}"`
599+
: kind === "video"
600+
? ` data-name="${escapeHtmlAttr(descriptor)}"`
601+
: "";
602+
603+
const tag =
604+
kind === "img"
605+
? `<img${descAttr} src="${escapeHtmlAttr(src)}">`
606+
: `<${kind} src="${escapeHtmlAttr(src)}"${descAttr} controls></${kind}>`;
607+
608+
const captionPart = captionText
609+
? `<figcaption>${escapeHtmlText(captionText)}</figcaption>`
610+
: "";
611+
return ctx.indent + `<figure>${tag}${captionPart}</figure>\n\n`;
612+
}
613+
614+
function escapeHtmlAttr(value: string): string {
615+
return value
616+
.replace(/&/g, "&amp;")
617+
.replace(/"/g, "&quot;")
618+
.replace(/</g, "&lt;")
619+
.replace(/>/g, "&gt;");
620+
}
621+
622+
function escapeHtmlText(value: string): string {
623+
return value
624+
.replace(/&/g, "&amp;")
625+
.replace(/</g, "&lt;")
626+
.replace(/>/g, "&gt;");
569627
}
570628

571629
function serializeBlockLink(el: HTMLElement, ctx: SerializeContext): string {

packages/core/src/api/parsers/markdown/markdownToHtml.ts

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,13 @@ function parseImage(
321321
);
322322

323323
if (isVideoUrl(url)) {
324-
// Match remark-rehype behavior: data-name comes from the title, not alt
324+
// Use the alt text as the video's display name (falling back to the
325+
// title) so a video link written with the standard `![name](url)` form
326+
// round-trips into BlockNote's video block. Captioned videos go through
327+
// raw `<figure>` HTML instead, see htmlToMarkdown.serializeMediaFigure.
328+
const name = alt || title;
325329
return {
326-
html: `<video src="${escapeHtml(url)}"${title !== undefined ? ` data-name="${escapeHtml(title)}"` : ""} data-url="${escapeHtml(url)}" controls></video>`,
330+
html: `<video src="${escapeHtml(url)}"${name ? ` data-name="${escapeHtml(name)}"` : ""} data-url="${escapeHtml(url)}" controls></video>`,
327331
end: parenEnd + 1,
328332
};
329333
}
@@ -573,19 +577,21 @@ type Token =
573577
| RawHtmlToken;
574578

575579
/**
576-
* HTML block-level tag names (from the CommonMark type-6 list). When a line
577-
* starts with `<` followed by one of these tag names, the run of non-blank
578-
* lines is emitted verbatim as raw HTML rather than wrapped in a paragraph.
580+
* HTML block-level tag names (from the CommonMark type-6 list, plus `audio`
581+
* which BlockNote serializes as raw HTML since markdown has no shorthand
582+
* for it). When a line starts with `<` followed by one of these tag names,
583+
* the run of non-blank lines is emitted verbatim as raw HTML rather than
584+
* wrapped in a paragraph.
579585
*/
580586
const HTML_BLOCK_TAGS = new Set([
581-
"address", "article", "aside", "base", "basefont", "blockquote", "body",
582-
"caption", "center", "col", "colgroup", "dd", "details", "dialog", "dir",
583-
"div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form",
584-
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header",
585-
"hr", "html", "iframe", "legend", "li", "link", "main", "menu", "menuitem",
586-
"nav", "noframes", "ol", "optgroup", "option", "p", "param", "section",
587-
"source", "summary", "table", "tbody", "td", "tfoot", "th", "thead",
588-
"title", "tr", "track", "ul",
587+
"address", "article", "aside", "audio", "base", "basefont", "blockquote",
588+
"body", "caption", "center", "col", "colgroup", "dd", "details", "dialog",
589+
"dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer",
590+
"form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
591+
"header", "hr", "html", "iframe", "legend", "li", "link", "main", "menu",
592+
"menuitem", "nav", "noframes", "ol", "optgroup", "option", "p", "param",
593+
"section", "source", "summary", "table", "tbody", "td", "tfoot", "th",
594+
"thead", "title", "tr", "track", "ul",
589595
]);
590596

591597
function isHtmlBlockStart(line: string): boolean {
@@ -1140,21 +1146,28 @@ function getEffectiveListType(
11401146
function emitTable(table: TableToken): string {
11411147
let html = "<table>";
11421148

1143-
// Header row
1144-
html += "<thead><tr>";
1145-
for (let c = 0; c < table.headers.length; c++) {
1146-
const align = table.alignments[c];
1147-
const alignAttr = align ? ` align="${align}"` : "";
1148-
html += `<th${alignAttr}>${parseInline(table.headers[c])}</th>`;
1149+
// BlockNote tables have no required header row, but the markdown table
1150+
// syntax does. When we serialize a headerless BlockNote table to markdown
1151+
// we emit an empty header row; on re-parse, treat that empty header as
1152+
// "no header" so the round-trip is stable (issue #739).
1153+
const headerIsEmpty = table.headers.every((h) => h.trim() === "");
1154+
const colCount = table.headers.length;
1155+
1156+
if (!headerIsEmpty) {
1157+
html += "<thead><tr>";
1158+
for (let c = 0; c < colCount; c++) {
1159+
const align = table.alignments[c];
1160+
const alignAttr = align ? ` align="${align}"` : "";
1161+
html += `<th${alignAttr}>${parseInline(table.headers[c])}</th>`;
1162+
}
1163+
html += "</tr></thead>";
11491164
}
1150-
html += "</tr></thead>";
11511165

1152-
// Body rows
11531166
if (table.rows.length > 0) {
11541167
html += "<tbody>";
11551168
for (const row of table.rows) {
11561169
html += "<tr>";
1157-
for (let c = 0; c < table.headers.length; c++) {
1170+
for (let c = 0; c < colCount; c++) {
11581171
const cell = c < row.length ? row[c] : "";
11591172
const align = table.alignments[c];
11601173
const alignAttr = align ? ` align="${align}"` : "";
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
export const parseVideoElement = (videoElement: HTMLVideoElement) => {
22
const url = videoElement.src || undefined;
33
const previewWidth = videoElement.width || undefined;
4+
const name = videoElement.getAttribute("data-name") || undefined;
45

5-
return { url, previewWidth };
6+
return { url, previewWidth, name };
67
};

packages/server-util/src/context/__snapshots__/ServerBlockNoteEditor.test.ts.snap

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,7 @@ Paragraph
135135
136136
* list item
137137
138-
![Example](exampleURL)
139-
140-
Caption
138+
<figure><img alt="Example" src="exampleURL"><figcaption>Caption</figcaption></figure>
141139
142140
[Example](exampleURL)
143141
@@ -221,31 +219,14 @@ exports[`Test ServerBlockNoteEditor > converts to and from markdown (blocksToMar
221219
"id": "3",
222220
"props": {
223221
"backgroundColor": "default",
224-
"caption": "",
222+
"caption": "Caption",
225223
"name": "Example",
226224
"showPreview": true,
227225
"textAlignment": "left",
228226
"url": "exampleURL",
229227
},
230228
"type": "image",
231229
},
232-
{
233-
"children": [],
234-
"content": [
235-
{
236-
"styles": {},
237-
"text": "Caption",
238-
"type": "text",
239-
},
240-
],
241-
"id": "4",
242-
"props": {
243-
"backgroundColor": "default",
244-
"textAlignment": "left",
245-
"textColor": "default",
246-
},
247-
"type": "paragraph",
248-
},
249230
{
250231
"children": [],
251232
"content": [
@@ -261,7 +242,7 @@ exports[`Test ServerBlockNoteEditor > converts to and from markdown (blocksToMar
261242
"type": "link",
262243
},
263244
],
264-
"id": "5",
245+
"id": "4",
265246
"props": {
266247
"backgroundColor": "default",
267248
"textAlignment": "left",
@@ -278,7 +259,7 @@ exports[`Test ServerBlockNoteEditor > converts to and from markdown (blocksToMar
278259
"type": "text",
279260
},
280261
],
281-
"id": "6",
262+
"id": "5",
282263
"props": {
283264
"backgroundColor": "default",
284265
"textAlignment": "left",
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<div class="bn-block-group" data-node-type="blockGroup">
2+
<div class="bn-block-outer" data-node-type="blockOuter" data-id="1">
3+
<div class="bn-block" data-node-type="blockContainer" data-id="1">
4+
<div
5+
class="bn-block-content"
6+
data-content-type="image"
7+
data-url="exampleURL"
8+
data-file-block=""
9+
>
10+
<div
11+
class="bn-file-block-content-wrapper"
12+
style="position: relative; width: fit-content;"
13+
>
14+
<div class="bn-visual-media-wrapper">
15+
<img
16+
class="bn-visual-media"
17+
src="exampleURL"
18+
alt="BlockNote image"
19+
draggable="false"
20+
/>
21+
<div class="bn-resize-handle" style="left: 4px; display: none;"></div>
22+
<div class="bn-resize-handle" style="right: 4px; display: none;"></div>
23+
</div>
24+
</div>
25+
</div>
26+
</div>
27+
</div>
28+
</div>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<img src="exampleURL" alt="BlockNote image" data-url="exampleURL" />
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[](https://example.com/audio.mp3)
1+
<audio src="https://example.com/audio.mp3" controls></audio>
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[](https://example.com/audio.mp3)
1+
<audio src="https://example.com/audio.mp3" controls></audio>
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
![example](exampleURL)
2-
3-
Caption
1+
<figure><img alt="example" src="exampleURL"><figcaption>Caption</figcaption></figure>
Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
![Caption](exampleURL)
1+
<figure><img src="exampleURL"><figcaption>Caption</figcaption></figure>
22

3-
Caption
4-
5-
![Caption](exampleURL)
6-
7-
Caption
3+
<figure><img src="exampleURL"><figcaption>Caption</figcaption></figure>

0 commit comments

Comments
 (0)