Skip to content

Commit 54c5aa7

Browse files
authored
fix(toc): inject _Toc bookmarks so exported DOCX TOC links work without manual Update Table (#2431)
1 parent e631f4b commit 54c5aa7

5 files changed

Lines changed: 357 additions & 9 deletions

File tree

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import { describe, expect, it } from 'vitest';
2+
import { generateTocBookmarkName } from './toc-bookmark-sync.js';
3+
4+
describe('generateTocBookmarkName', () => {
5+
it('produces a _Toc-prefixed name with only valid bookmark characters', () => {
6+
const name = generateTocBookmarkName('some-block-id');
7+
expect(name).toMatch(/^_Toc[a-zA-Z0-9_]+$/);
8+
});
9+
10+
it('escapes hyphens in UUID-style block IDs', () => {
11+
expect(generateTocBookmarkName('ba2b746a-930a-4baf-93d2-4d65637194d1')).toBe(
12+
'_Tocba2b746a_2d930a_2d4baf_2d93d2_2d4d65637194d1',
13+
);
14+
});
15+
16+
it('passes through pure alphanumeric paraId inputs unchanged', () => {
17+
expect(generateTocBookmarkName('41964671')).toBe('_Toc41964671');
18+
});
19+
20+
it('escapes literal underscores to prevent ambiguity', () => {
21+
expect(generateTocBookmarkName('a_b')).toBe('_Toca__b');
22+
});
23+
24+
it('is deterministic for the same input', () => {
25+
const a = generateTocBookmarkName('abc-123');
26+
const b = generateTocBookmarkName('abc-123');
27+
expect(a).toBe(b);
28+
});
29+
30+
it('produces different names for different inputs', () => {
31+
const a = generateTocBookmarkName('heading-1');
32+
const b = generateTocBookmarkName('heading-2');
33+
expect(a).not.toBe(b);
34+
});
35+
36+
it('does not collide for punctuation-folding pairs like p-1 vs p1', () => {
37+
const a = generateTocBookmarkName('p-1');
38+
const b = generateTocBookmarkName('p1');
39+
expect(a).not.toBe(b);
40+
});
41+
42+
it('does not collide for underscore vs hyphen pairs like a_b vs a-b', () => {
43+
const a = generateTocBookmarkName('a_b');
44+
const b = generateTocBookmarkName('a-b');
45+
expect(a).not.toBe(b);
46+
});
47+
48+
it('does not collide for inputs that collided under the old FNV-1a hash', () => {
49+
const a = generateTocBookmarkName('id-u4-ehdfkc7l');
50+
const b = generateTocBookmarkName('id-f6q-l70lxz94');
51+
expect(a).not.toBe(b);
52+
});
53+
54+
it('does not collide for hyphenated paragraph IDs like P-ABCDEF01 vs PABCDEF01', () => {
55+
const a = generateTocBookmarkName('P-ABCDEF01');
56+
const b = generateTocBookmarkName('PABCDEF01');
57+
expect(a).not.toBe(b);
58+
});
59+
});
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
/**
2+
* TOC bookmark synchronization — ensures `_Toc` bookmarks exist around
3+
* headings referenced by TOC entry hyperlinks.
4+
*
5+
* Word's TOC `<w:hyperlink w:anchor="...">` elements require matching
6+
* `<w:bookmarkStart w:name="...">` / `<w:bookmarkEnd>` pairs around
7+
* the target heading. Without them, TOC links in the exported DOCX
8+
* are broken until the user manually runs "Update Table" in Word.
9+
*
10+
* This module generates deterministic `_Toc`-prefixed bookmark names
11+
* and injects the bookmark nodes after TOC content is materialized.
12+
*/
13+
14+
import type { Node as ProseMirrorNode } from 'prosemirror-model';
15+
import type { Editor } from '../../core/Editor.js';
16+
17+
const TOC_BOOKMARK_PREFIX = '_Toc';
18+
19+
// ---------------------------------------------------------------------------
20+
// Bookmark name generation
21+
// ---------------------------------------------------------------------------
22+
23+
/**
24+
* Generates a deterministic `_Toc`-prefixed bookmark name from a block ID.
25+
*
26+
* Uses percent-style encoding to produce a valid OOXML bookmark name that is
27+
* **injective** — no two distinct block IDs can produce the same output.
28+
*
29+
* Encoding rules (using `_` as escape character):
30+
* - Alphanumeric chars except `_` pass through unchanged
31+
* - `_` is escaped as `__` (escape-the-escape)
32+
* - Any other character is escaped as `_xx` (two-digit lowercase hex)
33+
*
34+
* Examples:
35+
* - `ba2b746a-930a-...` → `_Tocba2b746a_2d930a_2d...`
36+
* - `p-1` → `_Tocp_2d1`
37+
* - `p1` → `_Tocp1` (no collision with `p-1`)
38+
*/
39+
export function generateTocBookmarkName(blockId: string): string {
40+
return `${TOC_BOOKMARK_PREFIX}${encodeBlockId(blockId)}`;
41+
}
42+
43+
/**
44+
* Injective encoding of a block ID into valid bookmark name characters.
45+
* Uses `_` as the escape character: literal `_` → `__`, non-alphanumeric → `_xx`.
46+
*/
47+
function encodeBlockId(input: string): string {
48+
let result = '';
49+
for (let i = 0; i < input.length; i++) {
50+
const ch = input[i]!;
51+
if (ch === '_') {
52+
result += '__';
53+
} else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')) {
54+
result += ch;
55+
} else {
56+
result += `_${ch.charCodeAt(0).toString(16).padStart(2, '0')}`;
57+
}
58+
}
59+
return result;
60+
}
61+
62+
// ---------------------------------------------------------------------------
63+
// Bookmark synchronization
64+
// ---------------------------------------------------------------------------
65+
66+
/**
67+
* Ensures `_Toc` bookmarks exist around heading paragraphs referenced by
68+
* TOC entry hyperlinks.
69+
*
70+
* Call after the TOC content has been committed to the editor state. This
71+
* builds and dispatches a follow-up transaction that inserts any missing
72+
* `bookmarkStart` / `bookmarkEnd` pairs.
73+
*
74+
* Skips silently when:
75+
* - No sources require bookmarks
76+
* - All required bookmarks already exist
77+
* - The schema lacks bookmark node types (headless/test environments)
78+
*/
79+
export function syncTocBookmarks(editor: Editor, sources: Array<{ sdBlockId: string }>): void {
80+
const { schema, doc } = editor.state;
81+
if (!schema.nodes.bookmarkStart || !schema.nodes.bookmarkEnd) return;
82+
83+
const needed = deduplicateByBlockId(sources);
84+
const existing = collectExistingTocBookmarkNames(doc);
85+
const missing = needed.filter((t) => !existing.has(t.bookmarkName));
86+
if (missing.length === 0) return;
87+
88+
const paragraphPositions = buildBlockIdPositionMap(doc);
89+
const insertions = resolveInsertionTargets(missing, paragraphPositions, doc);
90+
if (insertions.length === 0) return;
91+
92+
const { tr } = editor.state;
93+
let nextId = findMaxBookmarkId(doc) + 1;
94+
95+
for (const { bookmarkName, contentStart, contentEnd } of insertions) {
96+
const bookmarkId = String(nextId++);
97+
const endNode = schema.nodes.bookmarkEnd.create({ id: bookmarkId });
98+
const startNode = schema.nodes.bookmarkStart.create({ name: bookmarkName, id: bookmarkId });
99+
100+
// Insert bookmarkStart first, then bookmarkEnd. This ordering is critical
101+
// for empty paragraphs where contentStart === contentEnd: Mapping.map() is
102+
// right-biased, so inserting start first guarantees end maps to after start.
103+
// tr.mapping.map() converts original-doc positions to current-transaction
104+
// positions, accounting for earlier insertions in this loop.
105+
tr.insert(tr.mapping.map(contentStart), startNode);
106+
tr.insert(tr.mapping.map(contentEnd), endNode);
107+
}
108+
109+
if (tr.docChanged) {
110+
dispatchTransaction(editor, tr);
111+
}
112+
}
113+
114+
// ---------------------------------------------------------------------------
115+
// Internal helpers
116+
// ---------------------------------------------------------------------------
117+
118+
interface TocBookmarkTarget {
119+
blockId: string;
120+
bookmarkName: string;
121+
}
122+
123+
/**
124+
* Deduplicates sources by blockId — each heading needs at most one bookmark.
125+
* The injective encoding in `encodeBlockId` guarantees unique names, but the
126+
* collision guard is retained as defense-in-depth.
127+
*/
128+
function deduplicateByBlockId(sources: Array<{ sdBlockId: string }>): TocBookmarkTarget[] {
129+
const seenBlockIds = new Set<string>();
130+
const claimedNames = new Map<string, string>(); // bookmarkName → first blockId
131+
const targets: TocBookmarkTarget[] = [];
132+
133+
for (const { sdBlockId } of sources) {
134+
if (seenBlockIds.has(sdBlockId)) continue;
135+
seenBlockIds.add(sdBlockId);
136+
137+
const bookmarkName = generateTocBookmarkName(sdBlockId);
138+
const existingOwner = claimedNames.get(bookmarkName);
139+
if (existingOwner !== undefined && existingOwner !== sdBlockId) continue;
140+
141+
claimedNames.set(bookmarkName, sdBlockId);
142+
targets.push({ blockId: sdBlockId, bookmarkName });
143+
}
144+
145+
return targets;
146+
}
147+
148+
/** Collects names of all existing `_Toc`-prefixed bookmarks in the document. */
149+
function collectExistingTocBookmarkNames(doc: ProseMirrorNode): Set<string> {
150+
const names = new Set<string>();
151+
doc.descendants((node) => {
152+
if (node.type.name === 'bookmarkStart') {
153+
const name = node.attrs?.name as string | undefined;
154+
if (name?.startsWith(TOC_BOOKMARK_PREFIX)) names.add(name);
155+
}
156+
return true;
157+
});
158+
return names;
159+
}
160+
161+
/** Maps block IDs (sdBlockId or paraId) to paragraph positions. */
162+
function buildBlockIdPositionMap(doc: ProseMirrorNode): Map<string, number> {
163+
const map = new Map<string, number>();
164+
doc.descendants((node, pos) => {
165+
if (node.type.name === 'paragraph') {
166+
const id = (node.attrs?.sdBlockId ?? node.attrs?.paraId) as string | undefined;
167+
if (id && !map.has(id)) map.set(id, pos);
168+
}
169+
return true;
170+
});
171+
return map;
172+
}
173+
174+
interface BookmarkInsertion {
175+
bookmarkName: string;
176+
/** Position of the first inline content inside the paragraph (paragraphPos + 1). */
177+
contentStart: number;
178+
/** Position just before the paragraph's closing boundary (paragraphPos + nodeSize - 1). */
179+
contentEnd: number;
180+
}
181+
182+
/**
183+
* Resolves which paragraphs need bookmark insertions and sorts them
184+
* descending by position for safe back-to-front processing.
185+
*/
186+
function resolveInsertionTargets(
187+
missing: TocBookmarkTarget[],
188+
positions: Map<string, number>,
189+
doc: ProseMirrorNode,
190+
): BookmarkInsertion[] {
191+
const result: BookmarkInsertion[] = [];
192+
193+
for (const { blockId, bookmarkName } of missing) {
194+
const pos = positions.get(blockId);
195+
if (pos === undefined) continue;
196+
197+
const node = doc.nodeAt(pos);
198+
if (!node || node.type.name !== 'paragraph') continue;
199+
200+
result.push({
201+
bookmarkName,
202+
contentStart: pos + 1,
203+
contentEnd: pos + node.nodeSize - 1,
204+
});
205+
}
206+
207+
// Descending position order so each insertion only shifts positions we've
208+
// already processed, keeping earlier mapped positions correct.
209+
result.sort((a, b) => b.contentStart - a.contentStart);
210+
return result;
211+
}
212+
213+
/** Scans the document for the highest existing bookmark numeric ID. */
214+
function findMaxBookmarkId(doc: ProseMirrorNode): number {
215+
let maxId = -1;
216+
doc.descendants((node) => {
217+
if (node.type.name !== 'bookmarkStart' && node.type.name !== 'bookmarkEnd') return true;
218+
const raw = node.attrs?.id;
219+
const id = typeof raw === 'string' ? parseInt(raw, 10) : typeof raw === 'number' ? raw : NaN;
220+
if (!isNaN(id) && id > maxId) maxId = id;
221+
return true;
222+
});
223+
return maxId;
224+
}
225+
226+
function dispatchTransaction(editor: Editor, tr: unknown): void {
227+
if (typeof editor.dispatch === 'function') {
228+
editor.dispatch(tr as Parameters<Editor['dispatch']>[0]);
229+
} else if (typeof editor.view?.dispatch === 'function') {
230+
editor.view.dispatch(tr as Parameters<NonNullable<Editor['view']>['dispatch']>[0]);
231+
}
232+
}

packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { describe, expect, it } from 'vitest';
22
import { buildTocEntryParagraphs, type TocSource } from './toc-entry-builder.js';
3+
import { generateTocBookmarkName } from './toc-bookmark-sync.js';
34
import type { TocSwitchConfig } from '@superdoc/document-api';
45

56
const BASE_SOURCE: TocSource = {
@@ -18,6 +19,37 @@ function makeConfig(display: TocSwitchConfig['display'] = {}): TocSwitchConfig {
1819
}
1920

2021
describe('buildTocEntryParagraphs', () => {
22+
describe('hyperlink anchors', () => {
23+
it('uses a _Toc bookmark name as the hyperlink anchor, not the raw sdBlockId', () => {
24+
const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true }));
25+
const textNode = paragraphs[0]!.content[0] as { marks?: Array<{ type: string; attrs: Record<string, unknown> }> };
26+
const linkMark = textNode.marks?.find((m) => m.type === 'link');
27+
28+
expect(linkMark).toBeDefined();
29+
expect(linkMark!.attrs.anchor).toMatch(/^_Toc[a-zA-Z0-9_]+$/);
30+
expect(linkMark!.attrs.anchor).toBe(generateTocBookmarkName(BASE_SOURCE.sdBlockId));
31+
expect(linkMark!.attrs.anchor).not.toBe(BASE_SOURCE.sdBlockId);
32+
});
33+
34+
it('produces the same anchor for the same sdBlockId across calls', () => {
35+
const first = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true }));
36+
const second = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true }));
37+
38+
const getAnchor = (paragraphs: typeof first) => {
39+
const node = paragraphs[0]!.content[0] as { marks?: Array<{ attrs: Record<string, unknown> }> };
40+
return node.marks?.[0]?.attrs.anchor;
41+
};
42+
43+
expect(getAnchor(first)).toBe(getAnchor(second));
44+
});
45+
46+
it('does not add link mark when hyperlinks display option is false', () => {
47+
const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: false }));
48+
const textNode = paragraphs[0]!.content[0] as { marks?: unknown[] };
49+
expect(textNode.marks).toBeUndefined();
50+
});
51+
});
52+
2153
describe('rightAlignPageNumbers', () => {
2254
it('adds a right-aligned tab stop when rightAlignPageNumbers is true', () => {
2355
const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ rightAlignPageNumbers: true }));

packages/super-editor/src/document-api-adapters/helpers/toc-entry-builder.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import type { Node as ProseMirrorNode } from 'prosemirror-model';
99
import type { TocSwitchConfig } from '@superdoc/document-api';
1010
import { parseTcInstruction } from '../../core/super-converter/field-references/shared/tc-switches.js';
1111
import { getHeadingLevel } from './node-address-resolver.js';
12+
import { generateTocBookmarkName } from './toc-bookmark-sync.js';
1213

1314
// ---------------------------------------------------------------------------
1415
// Source types
@@ -158,7 +159,7 @@ export interface EntryParagraphJson {
158159
* Each entry gets:
159160
* - Paragraph style: TOC{level}
160161
* - tocSourceId paragraph attribute (source heading/TC field's sdBlockId)
161-
* - Link mark with anchor pointing to source sdBlockId (when \h is set)
162+
* - Link mark with anchor pointing to a `_Toc`-prefixed bookmark name (when \h is set)
162163
* - Page number placeholder "0" with tocPageNumber mark
163164
* - Separator: custom (\p switch) or default tab
164165
*/
@@ -192,7 +193,7 @@ function buildEntryParagraph(source: TocSource, config: TocSwitchConfig): EntryP
192193
{
193194
type: 'link',
194195
attrs: {
195-
anchor: source.sdBlockId,
196+
anchor: generateTocBookmarkName(source.sdBlockId),
196197
rId: null,
197198
history: true,
198199
},

0 commit comments

Comments
 (0)