11import { visit } from "unist-util-visit" ;
2+ import { fromMarkdown } from "mdast-util-from-markdown" ;
23import type { Plugin } from "unified" ;
34
45import type mdast from "mdast" ;
56import type { MdxJsxFlowElement } from "mdast-util-mdx-jsx" ;
67
78import { DEFAULT_CAPITALIZATIONS } from "./capitalizations.ts" ;
8- import { titleCase } from "./title-case.ts" ;
9+ import { collectTitleItemsFromChildren , titleCase } from "./title-case.ts" ;
910
10- // Matches a Markdown inline code span (backtick-wrapped) so titles can be
11- // split around code spans and left untouched.
12- const CODE_REGEX = / ( ` [ a - z 0 - 9 _ \- \s ] + ` ) / gi;
13-
14- // Title-case a standalone string (treated as a whole title), leaving inline
15- // code spans untouched. The plugin uses this for frontmatter and component
16- // title props; it's exported for callers who want title-casing outside a
17- // Markdown tree.
11+ // Title-case a standalone string (treated as a whole markdown title), handling
12+ // inline code spans, emphasis, escapes, etc. The plugin uses this for
13+ // frontmatter and component title props; it's exported for callers who want
14+ // title-casing outside a remark plugin.
1815export const capitalizeTitle = (
1916 title : string ,
2017 { special = DEFAULT_CAPITALIZATIONS } : { special ?: string [ ] } = { } ,
2118) : string => {
22- const parts = title . split ( new RegExp ( CODE_REGEX ) ) ;
23- return parts
24- . map ( ( part , idx ) => {
25- if ( part . startsWith ( "`" ) && part . endsWith ( "`" ) ) return part ;
26- return titleCase ( part , {
27- special,
28- isFirstTextNode : idx === 0 ,
29- isLastTextNode : idx === parts . length - 1 ,
30- } ) ;
31- } )
32- . join ( "" ) ;
19+ const tree = fromMarkdown ( title ) ;
20+ const items = collectTitleItemsFromChildren ( tree . children ) ;
21+
22+ // Re-case the original source bytes (via each text node's position offsets),
23+ // NOT node.value: value is unescaped (`a \* b` => `a * b`) while its position
24+ // spans the *escaped* source, so splicing value back would drop escapes and
25+ // shift later offsets. Since titleCase only flips letters and copies all other
26+ // characters through, escapes, `_`/`*` delimiters, and exact spacing survives.
27+ let result = "" ;
28+ let cursor = 0 ;
29+ items . forEach ( ( item , i ) => {
30+ if ( item . type !== "text" ) return ;
31+ const start = item . node . position ! . start . offset ! ;
32+ const end = item . node . position ! . end . offset ! ;
33+ result += title . slice ( cursor , start ) ;
34+ result += titleCase ( title . slice ( start , end ) , {
35+ special,
36+ isFirstTextNode : i === 0 ,
37+ isLastTextNode : i === items . length - 1 ,
38+ firstWordIsContinuation : item . firstWordIsContinuation ,
39+ } ) ;
40+ cursor = end ;
41+ } ) ;
42+ result += title . slice ( cursor ) ;
43+ return result ;
3344} ;
3445
3546type AstroFrontmatterData = {
@@ -49,7 +60,7 @@ const plugin: Plugin<[PluginOptions?], mdast.Root> =
4960 special = DEFAULT_CAPITALIZATIONS ,
5061 componentNames = [ ] ,
5162 frontmatterTitle = true ,
52- } : PluginOptions = { } ) =>
63+ } = { } ) =>
5364 ( tree , file ) => {
5465 // If frontmatterTitle is true, it will also format the title in the
5566 // frontmatter, but only if Astro is exposing it.
@@ -61,29 +72,30 @@ const plugin: Plugin<[PluginOptions?], mdast.Root> =
6172 }
6273 }
6374
64- // Pass 1: every heading. A heading's text can be split across multiple text
65- // nodes (e.g. emphasis, links, inline code), so we collect them first to
66- // find out which is the first/last text node.
75+ // Pass 1: every heading. A heading's text can be split across multiple
76+ // phrasing nodes (emphasis, links, inline code), so we flatten them into
77+ // ordered title items first — that tells us which text node is first/last
78+ // and which ones continue a preceding code span ("`head`ing"). Headings
79+ // mutate text nodes in place; the serializer re-emits them (and normalizes
80+ // the body, e.g. `_em_` → `*em*`, which is the expected heading behavior).
6781 visit ( tree , "heading" , ( node ) => {
68- // Get every text node for this heading
69- const textNodes : { value ?: string } [ ] = [ ] ;
70- visit ( node , "text" , ( textNode ) => {
71- textNodes . push ( textNode ) ;
72- } ) ;
73- // Lowercase each node, but be mindful of which one is first/last
74- textNodes . forEach ( ( textNode , i ) => {
75- textNode . value = titleCase ( textNode . value ?? "" , {
82+ const items = collectTitleItemsFromChildren ( node . children ) ;
83+ items . forEach ( ( item , i ) => {
84+ if ( item . type !== "text" ) return ;
85+ item . node . value = titleCase ( item . node . value , {
7686 special,
7787 isFirstTextNode : i === 0 ,
78- isLastTextNode : i === textNodes . length - 1 ,
88+ isLastTextNode : i === items . length - 1 ,
89+ firstWordIsContinuation : item . firstWordIsContinuation ,
7990 } ) ;
8091 } ) ;
8192 } ) ;
8293 if ( componentNames . length === 0 ) {
8394 return ;
8495 }
8596 // Pass 2: title props of the named MDX components. Their value is one raw
86- // string, so capitalizeTitle handles code-span splitting itself.
97+ // string, so capitalizeTitle parses and re-cases it (code spans, emphasis,
98+ // escapes and all) on its own.
8799 visit (
88100 tree ,
89101 ( node ) : node is MdxJsxFlowElement => {
0 commit comments