Skip to content

Commit f067aee

Browse files
nperez0111claude
andcommitted
fix: improve markdown parser robustness and update export snapshots
- Accept GFM tables with or without outer pipes - Allow 0-3 leading spaces on fenced code blocks per CommonMark - Strip optional titles from link URLs so they don't appear in href - Use balanced bracket matching for image alt text - Update 3 export snapshots for empty blocks after rebase Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 23a463f commit f067aee

9 files changed

Lines changed: 239 additions & 11 deletions

File tree

packages/core/src/api/parsers/markdown/markdownToHtml.ts

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,10 @@ function parseImage(
250250
start: number
251251
): { html: string; end: number } | null {
252252
// ![alt](url) or ![alt](url "title")
253-
const altStart = start + 2; // after ![
254-
const altEnd = text.indexOf("]", altStart);
253+
// Use balanced bracket matching to handle nested/escaped brackets in alt text
254+
const altEnd = findClosingBracket(text, start + 1);
255255
if (altEnd === -1) {return null;}
256+
const altStart = start + 2; // after ![
256257

257258
if (text[altEnd + 1] !== "(") {return null;}
258259

@@ -303,7 +304,7 @@ function parseLink(
303304
if (parenEnd === -1) {return null;}
304305

305306
const linkText = text.substring(textStart, textEnd);
306-
const url = text.substring(urlStart, parenEnd).trim();
307+
const url = extractDestination(text.substring(urlStart, parenEnd).trim());
307308

308309
return {
309310
html: `<a href="${escapeHtml(url)}">${parseInline(linkText)}</a>`,
@@ -343,6 +344,35 @@ function findClosingParen(text: string, openPos: number): number {
343344
return -1;
344345
}
345346

347+
/**
348+
* Extract the destination URL from a link/image URL+title string.
349+
* Handles angle-bracket destinations and strips optional titles.
350+
* E.g., `<url>` → `url`, `url "title"` → `url`
351+
*/
352+
function extractDestination(raw: string): string {
353+
// Angle-bracket destination: <url>
354+
if (raw.startsWith("<") && raw.endsWith(">")) {
355+
return raw.substring(1, raw.length - 1);
356+
}
357+
if (raw.startsWith("<")) {
358+
const close = raw.indexOf(">");
359+
if (close !== -1) {
360+
return raw.substring(1, close);
361+
}
362+
}
363+
// Split at first unescaped whitespace to separate destination from title
364+
for (let i = 0; i < raw.length; i++) {
365+
if (raw[i] === "\\" && i + 1 < raw.length) {
366+
i++; // skip escaped char
367+
continue;
368+
}
369+
if (raw[i] === " " || raw[i] === "\t" || raw[i] === "\n") {
370+
return raw.substring(0, i);
371+
}
372+
}
373+
return raw;
374+
}
375+
346376
function parseDelimited(
347377
text: string,
348378
start: number,
@@ -478,8 +508,8 @@ function tokenize(markdown: string): Token[] {
478508
continue;
479509
}
480510

481-
// Fenced code block
482-
const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/);
511+
// Fenced code block (0-3 leading spaces allowed per CommonMark)
512+
const fenceMatch = line.match(/^ {0,3}(`{3,}|~{3,})(.*)$/);
483513
if (fenceMatch) {
484514
const fence = fenceMatch[1];
485515
const fenceChar = fence[0];
@@ -489,7 +519,7 @@ function tokenize(markdown: string): Token[] {
489519
i++;
490520
while (i < lines.length) {
491521
const closingMatch = lines[i].match(
492-
new RegExp(`^${fenceChar}{${fenceLen},}\\s*$`)
522+
new RegExp(`^ {0,3}${fenceChar}{${fenceLen},}\\s*$`)
493523
);
494524
if (closingMatch) {
495525
i++;
@@ -750,11 +780,15 @@ function tryParseTable(
750780
const headerLine = lines[start];
751781
const separatorLine = lines[start + 1];
752782

753-
// Check separator line format: | --- | --- | or | :--- | ---: |
754-
if (!/^\s*\|(\s*:?-+:?\s*\|)+\s*$/.test(separatorLine)) {return null;}
783+
// Check separator line format: | --- | --- | or --- | --- (outer pipes optional)
784+
// Must contain at least one pipe and only dashes, colons, pipes, and whitespace
785+
if (
786+
!separatorLine.includes("|") ||
787+
!/^\s*\|?\s*:?-+:?\s*(\|\s*:?-+:?\s*)*\|?\s*$/.test(separatorLine)
788+
) {return null;}
755789

756-
// Check header line format: | ... | ... |
757-
if (!/^\s*\|(.+\|)+\s*$/.test(headerLine)) {return null;}
790+
// Check header line has at least one pipe (required to distinguish from plain text)
791+
if (!headerLine.includes("|")) {return null;}
758792

759793
const headers = parsePipeCells(headerLine);
760794
const alignments = parseAlignments(separatorLine);
@@ -763,7 +797,7 @@ function tryParseTable(
763797
let i = start + 2;
764798
while (i < lines.length) {
765799
const line = lines[i];
766-
if (!/^\s*\|(.+\|)+\s*$/.test(line)) {break;}
800+
if (!line.includes("|")) {break;}
767801
rows.push(parsePipeCells(line));
768802
i++;
769803
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[
2+
{
3+
"children": [],
4+
"content": [
5+
{
6+
"styles": {},
7+
"text": "const x = 1;",
8+
"type": "text",
9+
},
10+
],
11+
"id": "1",
12+
"props": {
13+
"language": "ts",
14+
},
15+
"type": "codeBlock",
16+
},
17+
]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[
2+
{
3+
"children": [],
4+
"content": undefined,
5+
"id": "1",
6+
"props": {
7+
"backgroundColor": "default",
8+
"caption": "",
9+
"name": "alt [with] brackets",
10+
"showPreview": true,
11+
"textAlignment": "left",
12+
"url": "https://example.com/image.png",
13+
},
14+
"type": "image",
15+
},
16+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[
2+
{
3+
"children": [],
4+
"content": [
5+
{
6+
"content": [
7+
{
8+
"styles": {},
9+
"text": "example",
10+
"type": "text",
11+
},
12+
],
13+
"href": "https://example.com",
14+
"type": "link",
15+
},
16+
],
17+
"id": "1",
18+
"props": {
19+
"backgroundColor": "default",
20+
"textAlignment": "left",
21+
"textColor": "default",
22+
},
23+
"type": "paragraph",
24+
},
25+
]
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
[
2+
{
3+
"children": [],
4+
"content": {
5+
"columnWidths": [
6+
undefined,
7+
undefined,
8+
],
9+
"headerCols": undefined,
10+
"headerRows": 1,
11+
"rows": [
12+
{
13+
"cells": [
14+
{
15+
"content": [
16+
{
17+
"styles": {},
18+
"text": "Col 1",
19+
"type": "text",
20+
},
21+
],
22+
"props": {
23+
"backgroundColor": "default",
24+
"colspan": 1,
25+
"rowspan": 1,
26+
"textAlignment": "left",
27+
"textColor": "default",
28+
},
29+
"type": "tableCell",
30+
},
31+
{
32+
"content": [
33+
{
34+
"styles": {},
35+
"text": "Col 2",
36+
"type": "text",
37+
},
38+
],
39+
"props": {
40+
"backgroundColor": "default",
41+
"colspan": 1,
42+
"rowspan": 1,
43+
"textAlignment": "left",
44+
"textColor": "default",
45+
},
46+
"type": "tableCell",
47+
},
48+
],
49+
},
50+
{
51+
"cells": [
52+
{
53+
"content": [
54+
{
55+
"styles": {},
56+
"text": "A",
57+
"type": "text",
58+
},
59+
],
60+
"props": {
61+
"backgroundColor": "default",
62+
"colspan": 1,
63+
"rowspan": 1,
64+
"textAlignment": "left",
65+
"textColor": "default",
66+
},
67+
"type": "tableCell",
68+
},
69+
{
70+
"content": [
71+
{
72+
"styles": {},
73+
"text": "B",
74+
"type": "text",
75+
},
76+
],
77+
"props": {
78+
"backgroundColor": "default",
79+
"colspan": 1,
80+
"rowspan": 1,
81+
"textAlignment": "left",
82+
"textColor": "default",
83+
},
84+
"type": "tableCell",
85+
},
86+
],
87+
},
88+
],
89+
"type": "tableContent",
90+
},
91+
"id": "1",
92+
"props": {
93+
"textColor": "default",
94+
},
95+
"type": "table",
96+
},
97+
]

tests/src/unit/core/formatConversion/parse/parseTestInstances.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,4 +1880,40 @@ Paragraph after table`,
18801880
},
18811881
executeTest: testParseMarkdown,
18821882
},
1883+
// Table without outer pipes (GFM allows optional outer pipes)
1884+
{
1885+
testCase: {
1886+
name: "tablePipeless",
1887+
content: `Col 1 | Col 2
1888+
----- | -----
1889+
A | B`,
1890+
},
1891+
executeTest: testParseMarkdown,
1892+
},
1893+
// Indented fenced code block (up to 3 leading spaces per CommonMark)
1894+
{
1895+
testCase: {
1896+
name: "codeBlockIndented",
1897+
content: ` \`\`\`ts
1898+
const x = 1;
1899+
\`\`\``,
1900+
},
1901+
executeTest: testParseMarkdown,
1902+
},
1903+
// Link with title (title should not appear in href)
1904+
{
1905+
testCase: {
1906+
name: "linkWithTitle",
1907+
content: `[example](https://example.com "Example Site")`,
1908+
},
1909+
executeTest: testParseMarkdown,
1910+
},
1911+
// Image with nested brackets in alt text
1912+
{
1913+
testCase: {
1914+
name: "imageNestedBracketsAlt",
1915+
content: `![alt [with] brackets](https://example.com/image.png)`,
1916+
},
1917+
executeTest: testParseMarkdown,
1918+
},
18831919
];

0 commit comments

Comments
 (0)