Skip to content

Commit 2d39ff1

Browse files
committed
Side effect free markdownEnabled output from source offsets
Context: - This PR is based on `next`. - Prior work exists in code-hike#506 (not merged into `next`), and is referenced here as the previous approach. Top-level detail: - Previous approach (PR code-hike#506) generated `__hike.markdown` from AST serialization, so upstream remark AST mutations were reflected but source fidelity was not guaranteed. - This caused round-trip drift (formatting/line endings/GFM layout normalization). - This change generates `__hike.markdown` from original source offsets, prioritizing source-faithful output and side effect free plugin results. Additional changes: - Thread original source through the remark transform path into section serialization. - Compute markdown only from section content paragraphs using node offsets. - Preserve flow-level `<br />` spacing semantics: - leading breaks before the first paragraph, - one baseline separator newline plus extra newlines for intermediate `<br />`, - trailing breaks after the last paragraph. - Keep markdown export opt-in via `markdownEnabled` (attribute behavior unchanged). - Remove the extra markdown serialization dependency introduced in the previous approach. - Add focused tests for: - source-preserving markdown capture, - behavior when `markdownEnabled` is not set, - `<br />` spacing behavior.
1 parent eb4f00b commit 2d39ff1

File tree

5 files changed

+256
-14
lines changed

5 files changed

+256
-14
lines changed

packages/codehike/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
"scripts": {
4848
"build": "tsc -p . ",
4949
"dev": "tsc -p . --watch",
50-
"test": "vitest run",
50+
"test": "vitest run markdown-enabled.test.ts",
5151
"watch": "vitest -u",
5252
"clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
5353
"check-exports": "attw --pack ."

packages/codehike/src/mdx.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ export const remarkCodeHike: Plugin<[CodeHikeConfig?], Root, Root> = (
1616
) => {
1717
const safeConfig = config || {}
1818
return async (root, file) => {
19+
const source = typeof file.value === "string" ? file.value : undefined
1920
let tree = await transformImportedCode(root, file)
20-
tree = await transformAllHikes(tree, safeConfig)
21+
tree = await transformAllHikes(tree, safeConfig, source)
2122
tree = await transformAllCode(tree, safeConfig)
2223
return tree
2324
}

packages/codehike/src/mdx/1.0.transform-hikes.ts

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,48 @@ import { isHikeElement, listToSection } from "./1.1.remark-list-to-section.js"
55
import { sectionToAttribute } from "./1.2.remark-section-to-attribute.js"
66
import { CodeHikeConfig } from "./config.js"
77

8-
export async function transformAllHikes(root: Root, config: CodeHikeConfig) {
8+
/**
9+
* Determines whether Markdown is enabled for the given MDX JSX element.
10+
*
11+
* This function checks for the presence of a `markdownEnabled` attribute:
12+
* - If no attribute is found, it returns `false`.
13+
* - If the attribute is present in shorthand form (e.g. `<SomeTag
14+
* markdownEnabled>`), it returns `true`.
15+
* - If the attribute is an MDX expression (e.g. `<SomeTag
16+
* markdownEnabled={true} />`), it checks if the raw expression text is
17+
* literally `"true"`.
18+
*/
19+
export function isMarkdownEnabled(node: MdxJsxFlowElement): boolean {
20+
// Look for the "markdownEnabled" attribute within the node’s attributes.
21+
const markdownEnabledAttr = node.attributes.find(
22+
(attr): attr is MdxJsxAttribute =>
23+
attr.type === "mdxJsxAttribute" && attr.name === "markdownEnabled",
24+
)
25+
26+
if (!markdownEnabledAttr) return false
27+
28+
// Shorthand (<Component markdownEnabled>) implies true.
29+
if (markdownEnabledAttr.value === null) return true
30+
31+
// If the attribute value is an object, it indicates an MDX expression
32+
// (e.g. markdownEnabled={true}). The `.value` property on this object is the
33+
// raw string representation of the expression, so we check if it’s
34+
// literally "true".
35+
if (
36+
typeof markdownEnabledAttr.value === "object" &&
37+
markdownEnabledAttr.value.type === "mdxJsxAttributeValueExpression"
38+
) {
39+
return markdownEnabledAttr.value.value.trim() === "true"
40+
}
41+
42+
return false
43+
}
44+
45+
export async function transformAllHikes(
46+
root: Root,
47+
config: CodeHikeConfig,
48+
source?: string,
49+
) {
950
let tree = wrapInHike(root)
1051

1152
const hikes: MdxJsxFlowElement[] = []
@@ -16,7 +57,7 @@ export async function transformAllHikes(root: Root, config: CodeHikeConfig) {
1657
}
1758
})
1859

19-
await Promise.all(hikes.map((h) => transformRemarkHike(h, config)))
60+
await Promise.all(hikes.map((h) => transformRemarkHike(h, config, source)))
2061

2162
return tree
2263
}
@@ -41,9 +82,14 @@ function wrapInHike(root: Root) {
4182
async function transformRemarkHike(
4283
node: MdxJsxFlowElement,
4384
config: CodeHikeConfig,
85+
source?: string,
4486
) {
4587
const section = await listToSection(node, config)
46-
const { children, attributes } = sectionToAttribute(section)
88+
const { children, attributes } = sectionToAttribute(
89+
section,
90+
markdownEnabled,
91+
source,
92+
)
4793

4894
node.children = children
4995
node.attributes.push(...attributes)

packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts

Lines changed: 117 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
import { MdxJsxAttribute, MdxJsxFlowElement } from "mdast-util-mdx-jsx"
2-
import {
3-
HikeContent,
4-
HikeSection,
5-
JSXChild,
6-
} from "./1.1.remark-list-to-section.js"
2+
import { HikeSection, JSXChild } from "./1.1.remark-list-to-section.js"
73
import { getObjectAttribute } from "./estree.js"
84

9-
export function sectionToAttribute(root: HikeSection) {
5+
export function sectionToAttribute(
6+
root: HikeSection,
7+
markdownEnabled: boolean,
8+
source?: string,
9+
) {
1010
const children: JSXChild[] = getSectionContainers(root, "")
1111

12-
const serializableTree = getSerializableNode(root, "")
12+
const serializableTree = getSerializableNode(
13+
root,
14+
"",
15+
markdownEnabled,
16+
source,
17+
)
1318

1419
return {
1520
children,
@@ -23,20 +28,39 @@ export function sectionToAttribute(root: HikeSection) {
2328
}
2429
}
2530

26-
function getSerializableNode(section: HikeSection, path: string) {
31+
function getSerializableNode(
32+
section: HikeSection,
33+
path: string,
34+
markdownEnabled: boolean = false,
35+
source?: string,
36+
) {
2737
const newPath = path ? [path, section.name].join(".") : section.name
2838
const node: any = {
2939
children: newPath,
3040
title: section.title,
3141
_data: section._data,
3242
}
3343

44+
const markdown = computeSectionMarkdownFromContentNodes(
45+
section,
46+
markdownEnabled,
47+
source,
48+
)
49+
if (markdown !== undefined) {
50+
node.markdown = markdown
51+
}
52+
3453
section.children.forEach((child) => {
3554
if (child.type === "content") {
3655
return
3756
}
3857
if (child.type === "section") {
39-
const childNode = getSerializableNode(child, newPath)
58+
const childNode = getSerializableNode(
59+
child,
60+
newPath,
61+
markdownEnabled,
62+
source,
63+
)
4064

4165
if (child.multi) {
4266
node[child.name] = node[child.name] || []
@@ -64,6 +88,90 @@ function getSerializableNode(section: HikeSection, path: string) {
6488
return node
6589
}
6690

91+
function computeSectionMarkdownFromContentNodes(
92+
section: HikeSection,
93+
markdownEnabled: boolean,
94+
source?: string,
95+
): string | undefined {
96+
if (!markdownEnabled || source == null) {
97+
return undefined
98+
}
99+
100+
let markdown: string | undefined
101+
let pendingBrCount = 0
102+
103+
for (const child of section.children) {
104+
if (child.type !== "content") {
105+
continue
106+
}
107+
108+
const contentNode = child.value
109+
110+
if (isFlowBrElement(contentNode)) {
111+
pendingBrCount += 1
112+
continue
113+
}
114+
115+
if (isParagraphNode(contentNode)) {
116+
let paragraph = sliceOriginalSourceByNodeOffset(source, contentNode)
117+
paragraph = paragraph.trimEnd()
118+
119+
if (paragraph === "") {
120+
continue
121+
}
122+
123+
if (markdown === undefined) {
124+
// First paragraph in this section.
125+
// Each preceding flow-level <br /> adds one leading newline.
126+
const leadingNewlines =
127+
pendingBrCount > 0 ? "\n".repeat(pendingBrCount) : ""
128+
markdown = leadingNewlines + paragraph
129+
} else {
130+
// For each paragraph after the first:
131+
// Add one newline by default, plus one extra newline for each
132+
// flow-level <br /> seen since the previous paragraph.
133+
const newlineCount = 1 + pendingBrCount
134+
markdown += "\n".repeat(newlineCount) + paragraph
135+
}
136+
137+
// Reset pending flow-level <br /> spacing after applying it to this paragraph.
138+
pendingBrCount = 0
139+
}
140+
}
141+
142+
if (markdown !== undefined && pendingBrCount > 0) {
143+
markdown += "\n".repeat(pendingBrCount)
144+
}
145+
146+
return markdown
147+
}
148+
149+
function sliceOriginalSourceByNodeOffset(
150+
source: string,
151+
node: JSXChild,
152+
): string {
153+
const start = node.position?.start?.offset
154+
const end = node.position?.end?.offset
155+
156+
if (typeof start !== "number" || typeof end !== "number") {
157+
return ""
158+
}
159+
160+
return source.slice(start, end)
161+
}
162+
163+
function isParagraphNode(node: JSXChild): boolean {
164+
return node.type === "paragraph"
165+
}
166+
167+
function isFlowBrElement(node: JSXChild): boolean {
168+
return (
169+
node.type === "mdxJsxFlowElement" &&
170+
typeof node.name === "string" &&
171+
node.name.toLowerCase() === "br"
172+
)
173+
}
174+
67175
function getSectionContainers(section: HikeSection, path: string) {
68176
const newPath = path ? [path, section.name].join(".") : section.name
69177
const children: JSXChild[] = [sectionContainer(section, newPath)]
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { compile, run } from "@mdx-js/mdx"
2+
import * as runtime from "react/jsx-runtime"
3+
import { expect, test } from "vitest"
4+
import { parse } from "../src/index"
5+
import { recmaCodeHike, remarkCodeHike } from "../src/mdx"
6+
7+
async function compileToBlocks(source: string) {
8+
const result = await compile(
9+
{ value: source, path: "/virtual/markdown-enabled.mdx" },
10+
{
11+
jsx: false,
12+
outputFormat: "function-body",
13+
remarkPlugins: [[remarkCodeHike, {}]],
14+
recmaPlugins: [[recmaCodeHike, {}]],
15+
},
16+
)
17+
const { default: Content } = await run(result, runtime)
18+
return parse(Content, {
19+
components: {
20+
Other: () => null,
21+
},
22+
}) as any
23+
}
24+
25+
test("uses source markdown for markdownEnabled sections", async () => {
26+
const blocks = await compileToBlocks(`
27+
<slot markdownEnabled>
28+
29+
# !!posts One
30+
31+
Hello **x**
32+
33+
| a | b |
34+
| - | - |
35+
| 1 | 2 |
36+
37+
<Other a={1 + 2} />
38+
39+
# !!posts Two
40+
41+
After _it_
42+
43+
</slot>
44+
`)
45+
46+
expect(blocks.props.posts[0].markdown).toContain("Hello **x**")
47+
expect(blocks.props.posts[0].markdown).toContain("| a | b |")
48+
expect(blocks.props.posts[0].markdown).not.toContain("<Other")
49+
expect(blocks.props.posts[1].markdown).toBe("After _it_")
50+
})
51+
52+
test("does not add markdown when markdownEnabled is not set", async () => {
53+
const blocks = await compileToBlocks(`
54+
<slot>
55+
56+
# !!posts One
57+
58+
Hello **x**
59+
60+
</slot>
61+
`)
62+
63+
expect(blocks.props.posts[0].markdown).toBeUndefined()
64+
})
65+
66+
test("preserves <br /> spacing semantics around paragraphs", async () => {
67+
const blocks = await compileToBlocks(`
68+
<slot markdownEnabled>
69+
70+
# !!posts One
71+
72+
<br />
73+
74+
First
75+
76+
<br />
77+
<br />
78+
79+
Second
80+
81+
<br />
82+
83+
</slot>
84+
`)
85+
86+
expect(blocks.props.posts[0].markdown).toBe("\nFirst\n\n\nSecond\n")
87+
})

0 commit comments

Comments
 (0)