Skip to content

Commit 814f3e8

Browse files
committed
Update logic for lists and code fences
1 parent d4ac0a2 commit 814f3e8

4 files changed

Lines changed: 214 additions & 17 deletions

File tree

src/cmem/markdown/Markdown.stories.tsx

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,17 @@ A line with some <strong>HTML code</strong> inside.
7070

7171
export const CutOff = Template.bind({});
7272

73-
CutOff.args = {
74-
children: `This component renders Markdown content safely. It supports **GitHub Flavoured Markdown**, syntax highlighting for code blocks, and definition lists.
73+
const cutOffContent = `This component renders Markdown content safely. It supports **GitHub Flavoured Markdown**, syntax highlighting for code blocks, and definition lists.
7574
7675
You can:
7776
* configure _link targets_
7877
* add custom __rehype__ plugins
7978
* and filter content through an allowed elements list
80-
A third paragraph that will not appear once the cutOff limit is reached.`,
81-
cutOff: 300,
79+
A third paragraph that will not appear once the cutOff limit is reached.`;
80+
81+
CutOff.args = {
82+
children: cutOffContent,
83+
cutOff: cutOffContent.indexOf("filter"),
8284
};
8385

8486
export const CutOffWithCodeFence = Template.bind({});
@@ -100,6 +102,23 @@ This paragraph comes after the code block and should not appear when the cutOff
100102
cutOffSuffix: "...",
101103
};
102104

105+
const indentedCodeFenceContent = `Intro.
106+
107+
\`\`\`ts
108+
const first = 1;
109+
const second = 2;
110+
\`\`\`
111+
112+
Outro.`;
113+
114+
export const CutOffWithIndentedCodeFence = Template.bind({});
115+
116+
CutOffWithIndentedCodeFence.args = {
117+
children: indentedCodeFenceContent,
118+
cutOff: indentedCodeFenceContent.indexOf("first"),
119+
cutOffSuffix: "...",
120+
};
121+
103122
export const CutOffWithLinks = Template.bind({});
104123

105124
CutOffWithLinks.args = {

src/cmem/markdown/Markdown.test.tsx

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ describe("Markdown", () => {
1515
expect(container.textContent).not.toContain("https://example.com");
1616
});
1717

18-
it("renders a valid code block when cutOff falls inside a fenced code block", () => {
18+
it("backs up before a fenced code block when cutOff falls inside it after a paragraph boundary", () => {
1919
const content = [
2020
"Intro.",
2121
"",
@@ -30,11 +30,33 @@ describe("Markdown", () => {
3030

3131
const { container } = render(<Markdown cutOff={35}>{content}</Markdown>);
3232

33-
expect(container.querySelector("pre")).toBeTruthy();
34-
expect(container.textContent).toContain("const first");
33+
expect(container.querySelector("pre")).toBeFalsy();
34+
expect(container.textContent).toContain("Intro.");
35+
expect(container.textContent).not.toContain("const first");
3536
expect(container.textContent).not.toContain("Outro");
3637
});
3738

39+
it("backs up before a fenced code block when cutOff falls inside it after preceding text without a paragraph boundary", () => {
40+
const content = [
41+
"A short paragraph before the code block.",
42+
"Here is an important code example:",
43+
"```json",
44+
"{",
45+
' "host": "localhost"',
46+
"}",
47+
"```",
48+
"",
49+
"After fence.",
50+
].join("\n");
51+
52+
const { container } = render(<Markdown cutOff={content.indexOf("localhost")}>{content}</Markdown>);
53+
54+
expect(container.querySelector("pre")).toBeFalsy();
55+
expect(container.textContent).toContain("Here is an important code example:");
56+
expect(container.textContent).not.toContain("localhost");
57+
expect(container.textContent).not.toContain("After fence");
58+
});
59+
3860
it("renders a valid table when cutOff falls inside a markdown table", () => {
3961
const content = [
4062
"| Name | Value |",
@@ -54,6 +76,24 @@ describe("Markdown", () => {
5476
expect(container.textContent).not.toContain("After table");
5577
});
5678

79+
it("keeps a complete list when cutOff falls after it without a paragraph boundary", () => {
80+
const content = `This component renders Markdown content safely. It supports **GitHub Flavoured Markdown**, syntax highlighting for code blocks, and definition lists.
81+
82+
You can:
83+
* configure _link targets_
84+
* add custom __rehype__ plugins
85+
* and filter content through an allowed elements list
86+
A third paragraph that will not appear once the cutOff limit is reached.`;
87+
88+
const { container } = render(<Markdown cutOff={300}>{content}</Markdown>);
89+
90+
expect(container.textContent).toContain("You can:");
91+
expect(container.textContent).toContain("configure link targets");
92+
expect(container.textContent).toContain("add custom rehype plugins");
93+
expect(container.textContent).toContain("and filter content through an allowed elements list");
94+
expect(container.textContent).not.toContain("A third paragraph");
95+
});
96+
5797
it("keeps complete fenced blocks before a following link with display cutOff", () => {
5898
const content = [
5999
"A short paragraph before the code block.",

src/cmem/markdown/truncateMarkdown.test.ts

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,40 @@ describe("truncateMarkdown", () => {
3737
expect(result).toBe("abcdefghij\n\n...");
3838
});
3939

40-
it("closes a code fence when cutOff falls inside it", () => {
40+
it("backs up before a code fence when cutOff falls inside it after a paragraph boundary", () => {
4141
const content = ["Safe paragraph.", "", "```", "line one", "", "line two", "```", "", "After fence."].join(
4242
"\n",
4343
);
4444
const cutOff = content.indexOf("line one") + "line one".length;
4545
const result = truncateMarkdown(content, cutOff, "...");
46-
expect(result).toBe("Safe paragraph.\n\n```\nline one\n```\n\n...");
46+
expect(result).toBe("Safe paragraph.\n\n...");
4747
});
4848

49-
it("keeps the code fence valid when cutOff includes the full fenced content", () => {
49+
it("backs up before a code fence when cutOff includes partial fenced content after a paragraph boundary", () => {
5050
const content = ["Intro.", "", "```", "some code", "```", "", "Outro."].join("\n");
5151
const result = truncateMarkdown(content, content.indexOf("some code") + "some code".length, "...");
52+
expect(result).toBe("Intro.\n\n...");
53+
});
54+
55+
it("backs up before a code fence when cutOff falls inside it after preceding text without a paragraph boundary", () => {
56+
const content = [
57+
"A short paragraph before the code block.",
58+
"Here is an important code example:",
59+
"```json",
60+
"{",
61+
' "host": "localhost"',
62+
"}",
63+
"```",
64+
"",
65+
"After fence.",
66+
].join("\n");
67+
const result = truncateMarkdown(content, content.indexOf("localhost"), "...");
68+
expect(result).toBe("A short paragraph before the code block.\nHere is an important code example:\n\n...");
69+
});
70+
71+
it("keeps the full code fence when cutOff passes the closing fence", () => {
72+
const content = ["Intro.", "", "```", "some code", "```", "", "Outro."].join("\n");
73+
const result = truncateMarkdown(content, content.indexOf("Outro."), "...");
5274
expect(result).toBe("Intro.\n\n```\nsome code\n```\n\n...");
5375
});
5476

@@ -71,6 +93,54 @@ describe("truncateMarkdown", () => {
7193
expect(result).toBe("| Name | Value |\n| --- | --- |\n| first | row |\n\n...");
7294
});
7395

96+
it("keeps a complete list when cutOff falls after it without a paragraph boundary", () => {
97+
const content = [
98+
"You can:",
99+
" * configure _link targets_",
100+
" * add custom __rehype__ plugins",
101+
" * and filter content through an allowed elements list",
102+
"A third paragraph that continues after the list.",
103+
].join("\n");
104+
const result = truncateMarkdown(content, content.indexOf("continues"), "...");
105+
106+
expect(result).toBe(
107+
[
108+
"You can:",
109+
" * configure _link targets_",
110+
" * add custom __rehype__ plugins",
111+
" * and filter content through an allowed elements list",
112+
"",
113+
"...",
114+
].join("\n"),
115+
);
116+
});
117+
118+
it("backs up before the active list item when cutOff falls inside a list", () => {
119+
const content = [
120+
"You can:",
121+
" * configure _link targets_",
122+
" * add custom __rehype__ plugins",
123+
" * and filter content through an allowed elements list",
124+
"A third paragraph that continues after the list.",
125+
].join("\n");
126+
const result = truncateMarkdown(content, content.indexOf("rehype"), "...");
127+
128+
expect(result).toBe("You can:\n * configure _link targets_\n\n...");
129+
});
130+
131+
it("backs up before the first list item when cutOff falls inside it", () => {
132+
const content = [
133+
"You can:",
134+
" * configure _link targets_",
135+
" * add custom __rehype__ plugins",
136+
" * and filter content through an allowed elements list",
137+
"A third paragraph that continues after the list.",
138+
].join("\n");
139+
const result = truncateMarkdown(content, content.indexOf("configure"), "...");
140+
141+
expect(result).toBe("You can:\n\n...");
142+
});
143+
74144
it("does not cut inside inline markdown links", () => {
75145
const content = "Read [the guide](https://example.com/a/very/long/url) before continuing.";
76146
const result = truncateMarkdown(content, content.indexOf("example.com") + 4, "...");

src/cmem/markdown/truncateMarkdown.ts

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ interface ParagraphRange {
1717
end: number;
1818
}
1919

20+
interface ListItemRange {
21+
start: number;
22+
end: number;
23+
}
24+
2025
const getFenceRanges = (content: string): FenceRange[] => {
2126
const fenceRanges: FenceRange[] = [];
2227
const fenceRegex = /^([`~]{3,})[^\n]*(?:\n|$)/gm;
@@ -115,10 +120,55 @@ const getLastParagraphRangeBeforeCutOff = (
115120
return cutPoint;
116121
};
117122

123+
const getListItemRanges = (content: string, fenceRanges: FenceRange[]): ListItemRange[] => {
124+
const listItemRanges: ListItemRange[] = [];
125+
const listItemRegex = /^\s*(?:[-+*]\s+|\d+[.)]\s+)/;
126+
let activeListItemRange: ListItemRange | undefined;
127+
let lineStart = 0;
128+
129+
while (lineStart < content.length) {
130+
const lineBreak = content.indexOf("\n", lineStart);
131+
const lineEnd = lineBreak === -1 ? content.length : lineBreak;
132+
const nextLineStart = lineBreak === -1 ? content.length : lineBreak + 1;
133+
const line = content.slice(lineStart, lineEnd);
134+
const isListItem = listItemRegex.test(line) && !isInsideFence(fenceRanges, lineStart);
135+
const isListContinuation = Boolean(activeListItemRange && line.trim() && /^\s+/.test(line));
136+
137+
if (isListItem) {
138+
if (activeListItemRange) {
139+
listItemRanges.push(activeListItemRange);
140+
}
141+
activeListItemRange = { start: lineStart, end: nextLineStart };
142+
} else if (activeListItemRange && isListContinuation) {
143+
activeListItemRange.end = nextLineStart;
144+
} else if (activeListItemRange) {
145+
listItemRanges.push(activeListItemRange);
146+
activeListItemRange = undefined;
147+
}
148+
149+
lineStart = nextLineStart;
150+
}
151+
152+
if (activeListItemRange) {
153+
listItemRanges.push(activeListItemRange);
154+
}
155+
156+
return listItemRanges;
157+
};
158+
159+
const getLastListItemRangeEndBeforeCutOff = (listItemRanges: ListItemRange[], cutOff: number): number => {
160+
let cutPoint = -1;
161+
for (const listItemRange of listItemRanges) {
162+
if (listItemRange.end > cutOff) break;
163+
cutPoint = listItemRange.end;
164+
}
165+
return cutPoint;
166+
};
167+
118168
/**
119169
* Truncates a Markdown string at a safe raw boundary.
120-
* It keeps links atomic and closes partial fenced code blocks; display-length refinement is handled by
121-
* `truncateMarkdownDisplay`.
170+
* It keeps links atomic, prefers boundaries outside structured blocks, and closes a partial fenced code block only
171+
* when no safer boundary exists. Display-length refinement is handled by `truncateMarkdownDisplay`.
122172
*/
123173
export const truncateMarkdown = (content: string, cutOff: number, suffix?: string): string => {
124174
if (!cutOff || cutOff <= 0 || content.length <= cutOff) {
@@ -128,17 +178,13 @@ export const truncateMarkdown = (content: string, cutOff: number, suffix?: strin
128178
const appendSuffix = (truncated: string) => (suffix ? `${truncated.trimEnd()}\n\n${suffix}` : truncated.trimEnd());
129179
const fenceRanges = getFenceRanges(content);
130180
const linkRanges = getLinkRanges(content, fenceRanges);
181+
const listItemRanges = getListItemRanges(content, fenceRanges);
131182
const safeCutOff = moveCutOffOutsideLink(cutOff, linkRanges);
132183

133184
if (safeCutOff >= content.length) {
134185
return content;
135186
}
136187

137-
const activeFence = fenceRanges.find(({ start, end }) => safeCutOff > start && safeCutOff < end);
138-
if (activeFence) {
139-
return appendSuffix(truncateActiveFence(content, safeCutOff, activeFence));
140-
}
141-
142188
if (linkRanges.some(({ start }) => safeCutOff === start)) {
143189
return appendSuffix(content.slice(0, safeCutOff));
144190
}
@@ -148,6 +194,18 @@ export const truncateMarkdown = (content: string, cutOff: number, suffix?: strin
148194
}
149195

150196
let cutPoint = getLastParagraphRangeBeforeCutOff(getParagraphRanges(content), safeCutOff, fenceRanges);
197+
const listBoundary = getLastListItemRangeEndBeforeCutOff(listItemRanges, safeCutOff);
198+
if (listBoundary > cutPoint) {
199+
cutPoint = listBoundary;
200+
}
201+
202+
const activeListItem = listItemRanges.find(({ start, end }) => safeCutOff > start && safeCutOff < end);
203+
if (activeListItem) {
204+
const cutBeforeListItem = activeListItem.start > 0 && content.slice(0, activeListItem.start).trim().length > 0;
205+
if (cutPoint !== -1 || cutBeforeListItem) {
206+
return appendSuffix(content.slice(0, cutBeforeListItem ? activeListItem.start : cutPoint));
207+
}
208+
}
151209

152210
if (cutPoint === -1) {
153211
const lineBoundary = content.lastIndexOf("\n", safeCutOff);
@@ -156,6 +214,16 @@ export const truncateMarkdown = (content: string, cutOff: number, suffix?: strin
156214
}
157215
}
158216

217+
const activeFence = fenceRanges.find(({ start, end }) => safeCutOff > start && safeCutOff < end);
218+
if (activeFence) {
219+
const cutBeforeFence = activeFence.start > 0 && content.slice(0, activeFence.start).trim().length > 0;
220+
if (cutPoint !== -1 || cutBeforeFence) {
221+
return appendSuffix(content.slice(0, cutPoint !== -1 ? cutPoint : activeFence.start));
222+
}
223+
224+
return appendSuffix(truncateActiveFence(content, safeCutOff, activeFence));
225+
}
226+
159227
if (cutPoint === -1) {
160228
const lastSpace = content.lastIndexOf(" ", safeCutOff);
161229
cutPoint = lastSpace > 0 ? lastSpace : safeCutOff;

0 commit comments

Comments
 (0)