Skip to content

Commit 369b7e1

Browse files
VladaHarbourcaio-pizzolharbournick
authored
fix: normalize bookmarks in tables (#1892)
* fix: normalize bookmarks in tables * fix: review comment * fix: check for empty row * ci: update spec review * fix: consider colFirst/colLast attributes * fix: add colFirst/colLast logic only to bookmarkStart * fix: cleanup * fix: minor fix for cell index calculation * fix: review comments --------- Co-authored-by: Caio Pizzol <caio@harbourshare.com> Co-authored-by: Nick Bernal <nick@superdoc.dev>
1 parent d07b49d commit 369b7e1

2 files changed

Lines changed: 339 additions & 1 deletion

File tree

packages/super-editor/src/core/super-converter/v2/importer/docxImporter.js

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ export const createDocumentJson = (docx, converter, editor) => {
167167

168168
// Safety: drop any inline-only nodes that accidentally landed at the doc root
169169
parsedContent = filterOutRootInlineNodes(parsedContent);
170+
parsedContent = normalizeTableBookmarksInContent(parsedContent, editor);
170171
collapseWhitespaceNextToInlinePassthrough(parsedContent);
171172

172173
const result = {
@@ -844,6 +845,205 @@ export function filterOutRootInlineNodes(content = []) {
844845
return result;
845846
}
846847

848+
/**
849+
* Normalize bookmark nodes that appear as direct table children.
850+
* Moves bookmarkStart/End into the first/last cell textblock of the table.
851+
*
852+
* Some non-conformant DOCX producers place bookmarks as direct table children.
853+
* Per ECMA-376 §17.13.6.2, they should be inside cells (bookmarkStart) or
854+
* as children of rows (bookmarkEnd).
855+
* PM can't accept bookmarks as a direct child of table row and that is why
856+
* we relocate them for compatibility.
857+
*
858+
* @param {Array<{type: string, content?: any[], attrs?: any}>} content
859+
* @param {Editor} [editor]
860+
* @returns {Array}
861+
*/
862+
export function normalizeTableBookmarksInContent(content = [], editor) {
863+
if (!Array.isArray(content) || content.length === 0) return content;
864+
865+
return content.map((node) => normalizeTableBookmarksInNode(node, editor));
866+
}
867+
868+
function normalizeTableBookmarksInNode(node, editor) {
869+
if (!node || typeof node !== 'object') return node;
870+
871+
if (node.type === 'table') {
872+
node = normalizeTableBookmarksInTable(node, editor);
873+
}
874+
875+
if (Array.isArray(node.content)) {
876+
node = { ...node, content: normalizeTableBookmarksInContent(node.content, editor) };
877+
}
878+
879+
return node;
880+
}
881+
882+
function parseColIndex(val) {
883+
if (val == null || val === '') return null;
884+
const n = parseInt(String(val), 10);
885+
return Number.isNaN(n) ? null : Math.max(0, n);
886+
}
887+
888+
/** colFirst/colLast apply only to bookmarkStart; bookmarkEnd always uses first/last cell by position. */
889+
function getCellIndexForBookmark(bookmarkNode, position, rowCellCount) {
890+
if (!rowCellCount) return 0;
891+
if (bookmarkNode?.type === 'bookmarkEnd') {
892+
return position === 'start' ? 0 : rowCellCount - 1;
893+
}
894+
const attrs = bookmarkNode?.attrs ?? {};
895+
const col = parseColIndex(position === 'start' ? attrs.colFirst : attrs.colLast);
896+
if (col == null) return position === 'start' ? 0 : rowCellCount - 1;
897+
return Math.min(col, rowCellCount - 1);
898+
}
899+
900+
function addBookmarkToRowCellInlines(rowCellInlines, rowIndex, position, bookmarkNode, rowCellCount) {
901+
const cellIndex = getCellIndexForBookmark(bookmarkNode, position, rowCellCount);
902+
const bucket = rowCellInlines[rowIndex][position];
903+
if (!bucket[cellIndex]) bucket[cellIndex] = [];
904+
bucket[cellIndex].push(bookmarkNode);
905+
}
906+
907+
/** Apply collected start/end bookmark inlines to a single row; returns new row. */
908+
function applyBookmarksToRow(rowNode, { start: startByCell, end: endByCell }, editor) {
909+
const cellIndices = [
910+
...new Set([...Object.keys(startByCell).map(Number), ...Object.keys(endByCell).map(Number)]),
911+
].sort((a, b) => a - b);
912+
let row = rowNode;
913+
for (const cellIndex of cellIndices) {
914+
const startNodes = startByCell[cellIndex];
915+
const endNodes = endByCell[cellIndex];
916+
if (startNodes?.length) row = insertInlineIntoRow(row, startNodes, editor, 'start', cellIndex);
917+
if (endNodes?.length) row = insertInlineIntoRow(row, endNodes, editor, 'end', cellIndex);
918+
}
919+
return row;
920+
}
921+
922+
function normalizeTableBookmarksInTable(tableNode, editor) {
923+
if (!tableNode || tableNode.type !== 'table' || !Array.isArray(tableNode.content)) return tableNode;
924+
925+
const rows = tableNode.content.filter((child) => child?.type === 'tableRow');
926+
if (!rows.length) return tableNode;
927+
928+
/** @type {{ start: Record<number, unknown[]>, end: Record<number, unknown[]> }[]} */
929+
const rowCellInlines = rows.map(() => ({
930+
start: /** @type {Record<number, unknown[]>} */ ({}),
931+
end: /** @type {Record<number, unknown[]>} */ ({}),
932+
}));
933+
let rowCursor = 0;
934+
935+
// Collect bookmark positions per row/cell (no content array yet).
936+
for (const child of tableNode.content) {
937+
if (child?.type === 'tableRow') {
938+
rowCursor += 1;
939+
continue;
940+
}
941+
if (isBookmarkNode(child)) {
942+
const prevRowIndex = rowCursor > 0 ? rowCursor - 1 : null;
943+
const nextRowIndex = rowCursor < rows.length ? rowCursor : null;
944+
const row = (nextRowIndex ?? prevRowIndex) != null ? rows[nextRowIndex ?? prevRowIndex] : null;
945+
const rowCellCount = row?.content?.length ?? 0;
946+
if (child.type === 'bookmarkStart') {
947+
if (nextRowIndex != null)
948+
addBookmarkToRowCellInlines(rowCellInlines, nextRowIndex, 'start', child, rowCellCount);
949+
else if (prevRowIndex != null)
950+
addBookmarkToRowCellInlines(rowCellInlines, prevRowIndex, 'end', child, rowCellCount);
951+
} else {
952+
if (prevRowIndex != null) addBookmarkToRowCellInlines(rowCellInlines, prevRowIndex, 'end', child, rowCellCount);
953+
else if (nextRowIndex != null)
954+
addBookmarkToRowCellInlines(rowCellInlines, nextRowIndex, 'start', child, rowCellCount);
955+
}
956+
}
957+
}
958+
959+
const updatedRows = rows.map((row, index) => applyBookmarksToRow(row, rowCellInlines[index], editor));
960+
961+
rowCursor = 0;
962+
const content = [];
963+
for (const child of tableNode.content) {
964+
if (child?.type === 'tableRow') {
965+
content.push(updatedRows[rowCursor] ?? child);
966+
rowCursor += 1;
967+
} else if (!isBookmarkNode(child)) {
968+
content.push(child);
969+
}
970+
}
971+
972+
return {
973+
...tableNode,
974+
content,
975+
};
976+
}
977+
978+
/**
979+
* @param {number} [cellIndex] - If set, insert into this cell; otherwise first (start) or last (end) cell.
980+
*/
981+
function insertInlineIntoRow(rowNode, inlineNodes, editor, position, cellIndex) {
982+
if (!rowNode || !inlineNodes?.length) return rowNode;
983+
984+
if (!Array.isArray(rowNode.content) || rowNode.content.length === 0) {
985+
const paragraph = { type: 'paragraph', content: inlineNodes };
986+
const newCell = { type: 'tableCell', content: [paragraph], attrs: {}, marks: [] };
987+
return { ...rowNode, content: [newCell] };
988+
}
989+
990+
const lastCellIndex = rowNode.content.length - 1;
991+
const targetIndex =
992+
cellIndex != null ? Math.min(Math.max(0, cellIndex), lastCellIndex) : position === 'end' ? lastCellIndex : 0;
993+
const targetCell = rowNode.content[targetIndex];
994+
const updatedCell = insertInlineIntoCell(targetCell, inlineNodes, editor, position);
995+
996+
if (updatedCell === targetCell) return rowNode;
997+
998+
const nextContent = rowNode.content.slice();
999+
nextContent[targetIndex] = updatedCell;
1000+
return { ...rowNode, content: nextContent };
1001+
}
1002+
1003+
function findTextblockIndex(content, editor, fromEnd) {
1004+
const start = fromEnd ? content.length - 1 : 0;
1005+
const end = fromEnd ? -1 : content.length;
1006+
const step = fromEnd ? -1 : 1;
1007+
for (let i = start; fromEnd ? i > end : i < end; i += step) {
1008+
if (isTextblockNode(content[i], editor)) return i;
1009+
}
1010+
return -1;
1011+
}
1012+
1013+
function insertInlineIntoCell(cellNode, inlineNodes, editor, position) {
1014+
if (!cellNode || !inlineNodes?.length) return cellNode;
1015+
1016+
const content = Array.isArray(cellNode.content) ? cellNode.content.slice() : [];
1017+
const targetIndex = findTextblockIndex(content, editor, position === 'end');
1018+
1019+
if (targetIndex === -1) {
1020+
const paragraph = { type: 'paragraph', content: inlineNodes };
1021+
if (position === 'end') content.push(paragraph);
1022+
else content.unshift(paragraph);
1023+
return { ...cellNode, content };
1024+
}
1025+
1026+
const targetBlock = content[targetIndex] || { type: 'paragraph', content: [] };
1027+
const blockContent = Array.isArray(targetBlock.content) ? targetBlock.content.slice() : [];
1028+
const nextBlockContent = position === 'end' ? blockContent.concat(inlineNodes) : inlineNodes.concat(blockContent);
1029+
1030+
content[targetIndex] = { ...targetBlock, content: nextBlockContent };
1031+
return { ...cellNode, content };
1032+
}
1033+
1034+
function isBookmarkNode(node) {
1035+
const typeName = node?.type;
1036+
return typeName === 'bookmarkStart' || typeName === 'bookmarkEnd';
1037+
}
1038+
1039+
function isTextblockNode(node, editor) {
1040+
const typeName = node?.type;
1041+
if (!typeName) return false;
1042+
const nodeType = editor?.schema?.nodes?.[typeName];
1043+
if (nodeType && typeof nodeType.isTextblock === 'boolean') return nodeType.isTextblock;
1044+
return typeName === 'paragraph';
1045+
}
1046+
8471047
/**
8481048
* Reconstruct original OOXML for preservable inline nodes using their attribute decoders.
8491049
*

packages/super-editor/src/core/super-converter/v2/importer/docxImporter.test.js

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import { describe, it, expect } from 'vitest';
2-
import { collapseWhitespaceNextToInlinePassthrough, filterOutRootInlineNodes } from './docxImporter.js';
2+
import {
3+
collapseWhitespaceNextToInlinePassthrough,
4+
filterOutRootInlineNodes,
5+
normalizeTableBookmarksInContent,
6+
} from './docxImporter.js';
37

48
const n = (type, attrs = {}) => ({ type, attrs, marks: [] });
59

@@ -178,3 +182,137 @@ describe('collapseWhitespaceNextToInlinePassthrough', () => {
178182
expect(tree[0].content[2].content[0].text).toBe('bar');
179183
});
180184
});
185+
186+
describe('normalizeTableBookmarksInContent', () => {
187+
const table = (content) => ({ type: 'table', content, attrs: {}, marks: [] });
188+
const row = (cells) => ({ type: 'tableRow', content: cells, attrs: {}, marks: [] });
189+
const cell = (content) => ({ type: 'tableCell', content, attrs: {}, marks: [] });
190+
const paragraph = (content) => ({ type: 'paragraph', content, attrs: {}, marks: [] });
191+
const text = (value) => ({ type: 'text', text: value, marks: [] });
192+
const bookmarkStart = (id, attrs = {}) => ({ type: 'bookmarkStart', attrs: { id, ...attrs } });
193+
const bookmarkEnd = (id, attrs = {}) => ({ type: 'bookmarkEnd', attrs: { id, ...attrs } });
194+
195+
it('moves leading bookmarkStart into the first cell paragraph', () => {
196+
const input = [table([bookmarkStart('b1'), row([cell([paragraph([text('Cell')])])])])];
197+
198+
const result = normalizeTableBookmarksInContent(input);
199+
const normalizedTable = result[0];
200+
201+
expect(normalizedTable.content.some((node) => node.type === 'bookmarkStart')).toBe(false);
202+
const paraContent = normalizedTable.content[0].content[0].content[0].content;
203+
expect(paraContent[0]).toMatchObject({ type: 'bookmarkStart', attrs: { id: 'b1' } });
204+
expect(paraContent[1]).toMatchObject({ type: 'text', text: 'Cell' });
205+
});
206+
207+
it('moves trailing bookmarkEnd into the last cell paragraph', () => {
208+
const input = [table([row([cell([paragraph([text('Cell')])])]), bookmarkEnd('b1')])];
209+
210+
const result = normalizeTableBookmarksInContent(input);
211+
const normalizedTable = result[0];
212+
213+
expect(normalizedTable.content.some((node) => node.type === 'bookmarkEnd')).toBe(false);
214+
const paraContent = normalizedTable.content[0].content[0].content[0].content;
215+
expect(paraContent[0]).toMatchObject({ type: 'text', text: 'Cell' });
216+
expect(paraContent[1]).toMatchObject({ type: 'bookmarkEnd', attrs: { id: 'b1' } });
217+
});
218+
219+
it('moves bookmarkStart and bookmarkEnd into the same cell when no textblocks exist', () => {
220+
const input = [table([bookmarkStart('b1'), row([cell([])]), bookmarkEnd('b1')])];
221+
222+
const result = normalizeTableBookmarksInContent(input);
223+
const normalizedTable = result[0];
224+
225+
expect(normalizedTable.content.some((node) => node.type === 'bookmarkStart')).toBe(false);
226+
expect(normalizedTable.content.some((node) => node.type === 'bookmarkEnd')).toBe(false);
227+
228+
const paraContent = normalizedTable.content[0].content[0].content[0].content;
229+
expect(paraContent).toEqual([
230+
{ type: 'bookmarkStart', attrs: { id: 'b1' } },
231+
{ type: 'bookmarkEnd', attrs: { id: 'b1' } },
232+
]);
233+
});
234+
235+
it('anchors bookmark boundaries to adjacent rows when markers appear between rows', () => {
236+
const input = [
237+
table([
238+
bookmarkStart('b1'),
239+
row([cell([paragraph([text('R1')])])]),
240+
bookmarkEnd('b1'),
241+
row([cell([paragraph([text('R2')])])]),
242+
]),
243+
];
244+
245+
const result = normalizeTableBookmarksInContent(input);
246+
const normalizedTable = result[0];
247+
248+
const row1Content = normalizedTable.content[0].content[0].content[0].content;
249+
expect(row1Content).toEqual([
250+
{ type: 'bookmarkStart', attrs: { id: 'b1' } },
251+
{ type: 'text', text: 'R1', marks: [] },
252+
{ type: 'bookmarkEnd', attrs: { id: 'b1' } },
253+
]);
254+
255+
const row2Content = normalizedTable.content[1].content[0].content[0].content;
256+
expect(row2Content).toEqual([{ type: 'text', text: 'R2', marks: [] }]);
257+
});
258+
259+
it('creates a cell when a row is empty', () => {
260+
const input = [table([bookmarkStart('b1'), row([]), bookmarkEnd('b1')])];
261+
262+
const result = normalizeTableBookmarksInContent(input);
263+
const normalizedTable = result[0];
264+
265+
const rowContent = normalizedTable.content[0].content;
266+
expect(rowContent).toHaveLength(1);
267+
expect(rowContent[0].type).toBe('tableCell');
268+
269+
const paraContent = rowContent[0].content[0].content;
270+
expect(paraContent).toEqual([
271+
{ type: 'bookmarkStart', attrs: { id: 'b1' } },
272+
{ type: 'bookmarkEnd', attrs: { id: 'b1' } },
273+
]);
274+
});
275+
276+
it('places bookmarkStart in the cell indicated by colFirst when present; bookmarkEnd uses first/last cell only', () => {
277+
const twoCells = row([cell([paragraph([text('A')])]), cell([paragraph([text('B')])])]);
278+
const input = [table([bookmarkStart('b1', { colFirst: '1' }), twoCells, bookmarkEnd('b1')])];
279+
280+
const result = normalizeTableBookmarksInContent(input);
281+
const normalizedTable = result[0];
282+
const rowContent = normalizedTable.content[0].content;
283+
284+
expect(normalizedTable.content.some((node) => node.type === 'bookmarkStart')).toBe(false);
285+
expect(normalizedTable.content.some((node) => node.type === 'bookmarkEnd')).toBe(false);
286+
287+
const firstCellContent = rowContent[0].content[0].content;
288+
expect(firstCellContent).toEqual([{ type: 'text', text: 'A', marks: [] }]);
289+
290+
const secondCellContent = rowContent[1].content[0].content;
291+
expect(secondCellContent[0]).toMatchObject({ type: 'bookmarkStart', attrs: { id: 'b1', colFirst: '1' } });
292+
expect(secondCellContent[1]).toMatchObject({ type: 'text', text: 'B', marks: [] });
293+
expect(secondCellContent[2]).toMatchObject({ type: 'bookmarkEnd', attrs: { id: 'b1' } });
294+
});
295+
296+
it('normalizes bookmarks in a nested table (table inside a cell with bookmarks as direct children of inner table)', () => {
297+
const innerTableWithBookmarks = table([
298+
bookmarkStart('n1'),
299+
row([cell([paragraph([text('Nested')])])]),
300+
bookmarkEnd('n1'),
301+
]);
302+
const outerTable = table([row([cell([innerTableWithBookmarks])])]);
303+
const input = [outerTable];
304+
305+
const result = normalizeTableBookmarksInContent(input);
306+
const outer = result[0];
307+
const inner = outer.content[0].content[0].content[0];
308+
309+
expect(inner.type).toBe('table');
310+
expect(inner.content.some((node) => node.type === 'bookmarkStart')).toBe(false);
311+
expect(inner.content.some((node) => node.type === 'bookmarkEnd')).toBe(false);
312+
313+
const innerCellParagraphContent = inner.content[0].content[0].content[0].content;
314+
expect(innerCellParagraphContent[0]).toMatchObject({ type: 'bookmarkStart', attrs: { id: 'n1' } });
315+
expect(innerCellParagraphContent[1]).toMatchObject({ type: 'text', text: 'Nested', marks: [] });
316+
expect(innerCellParagraphContent[2]).toMatchObject({ type: 'bookmarkEnd', attrs: { id: 'n1' } });
317+
});
318+
});

0 commit comments

Comments
 (0)