@@ -167,6 +167,7 @@ export const createDocumentJson = (docx, converter, editor) => {
167167
168168 // Safety: drop any inline-only nodes that accidentally landed at the doc root
169169 parsedContent = filterOutRootInlineNodes ( parsedContent ) ;
170+ parsedContent = normalizeTableBookmarksInContent ( parsedContent , editor ) ;
170171 collapseWhitespaceNextToInlinePassthrough ( parsedContent ) ;
171172
172173 const result = {
@@ -844,6 +845,205 @@ export function filterOutRootInlineNodes(content = []) {
844845 return result ;
845846}
846847
848+ /**
849+ * Normalize bookmark nodes that appear as direct table children.
850+ * Moves bookmarkStart/End into the first/last cell textblock of the table.
851+ *
852+ * Some non-conformant DOCX producers place bookmarks as direct table children.
853+ * Per ECMA-376 §17.13.6.2, they should be inside cells (bookmarkStart) or
854+ * as children of rows (bookmarkEnd).
855+ * PM can't accept bookmarks as a direct child of table row and that is why
856+ * we relocate them for compatibility.
857+ *
858+ * @param {Array<{type: string, content?: any[], attrs?: any}> } content
859+ * @param {Editor } [editor]
860+ * @returns {Array }
861+ */
862+ export function normalizeTableBookmarksInContent ( content = [ ] , editor ) {
863+ if ( ! Array . isArray ( content ) || content . length === 0 ) return content ;
864+
865+ return content . map ( ( node ) => normalizeTableBookmarksInNode ( node , editor ) ) ;
866+ }
867+
868+ function normalizeTableBookmarksInNode ( node , editor ) {
869+ if ( ! node || typeof node !== 'object' ) return node ;
870+
871+ if ( node . type === 'table' ) {
872+ node = normalizeTableBookmarksInTable ( node , editor ) ;
873+ }
874+
875+ if ( Array . isArray ( node . content ) ) {
876+ node = { ...node , content : normalizeTableBookmarksInContent ( node . content , editor ) } ;
877+ }
878+
879+ return node ;
880+ }
881+
882+ function parseColIndex ( val ) {
883+ if ( val == null || val === '' ) return null ;
884+ const n = parseInt ( String ( val ) , 10 ) ;
885+ return Number . isNaN ( n ) ? null : Math . max ( 0 , n ) ;
886+ }
887+
888+ /** colFirst/colLast apply only to bookmarkStart; bookmarkEnd always uses first/last cell by position. */
889+ function getCellIndexForBookmark ( bookmarkNode , position , rowCellCount ) {
890+ if ( ! rowCellCount ) return 0 ;
891+ if ( bookmarkNode ?. type === 'bookmarkEnd' ) {
892+ return position === 'start' ? 0 : rowCellCount - 1 ;
893+ }
894+ const attrs = bookmarkNode ?. attrs ?? { } ;
895+ const col = parseColIndex ( position === 'start' ? attrs . colFirst : attrs . colLast ) ;
896+ if ( col == null ) return position === 'start' ? 0 : rowCellCount - 1 ;
897+ return Math . min ( col , rowCellCount - 1 ) ;
898+ }
899+
900+ function addBookmarkToRowCellInlines ( rowCellInlines , rowIndex , position , bookmarkNode , rowCellCount ) {
901+ const cellIndex = getCellIndexForBookmark ( bookmarkNode , position , rowCellCount ) ;
902+ const bucket = rowCellInlines [ rowIndex ] [ position ] ;
903+ if ( ! bucket [ cellIndex ] ) bucket [ cellIndex ] = [ ] ;
904+ bucket [ cellIndex ] . push ( bookmarkNode ) ;
905+ }
906+
907+ /** Apply collected start/end bookmark inlines to a single row; returns new row. */
908+ function applyBookmarksToRow ( rowNode , { start : startByCell , end : endByCell } , editor ) {
909+ const cellIndices = [
910+ ...new Set ( [ ...Object . keys ( startByCell ) . map ( Number ) , ...Object . keys ( endByCell ) . map ( Number ) ] ) ,
911+ ] . sort ( ( a , b ) => a - b ) ;
912+ let row = rowNode ;
913+ for ( const cellIndex of cellIndices ) {
914+ const startNodes = startByCell [ cellIndex ] ;
915+ const endNodes = endByCell [ cellIndex ] ;
916+ if ( startNodes ?. length ) row = insertInlineIntoRow ( row , startNodes , editor , 'start' , cellIndex ) ;
917+ if ( endNodes ?. length ) row = insertInlineIntoRow ( row , endNodes , editor , 'end' , cellIndex ) ;
918+ }
919+ return row ;
920+ }
921+
922+ function normalizeTableBookmarksInTable ( tableNode , editor ) {
923+ if ( ! tableNode || tableNode . type !== 'table' || ! Array . isArray ( tableNode . content ) ) return tableNode ;
924+
925+ const rows = tableNode . content . filter ( ( child ) => child ?. type === 'tableRow' ) ;
926+ if ( ! rows . length ) return tableNode ;
927+
928+ /** @type {{ start: Record<number, unknown[]>, end: Record<number, unknown[]> }[] } */
929+ const rowCellInlines = rows . map ( ( ) => ( {
930+ start : /** @type {Record<number, unknown[]> } */ ( { } ) ,
931+ end : /** @type {Record<number, unknown[]> } */ ( { } ) ,
932+ } ) ) ;
933+ let rowCursor = 0 ;
934+
935+ // Collect bookmark positions per row/cell (no content array yet).
936+ for ( const child of tableNode . content ) {
937+ if ( child ?. type === 'tableRow' ) {
938+ rowCursor += 1 ;
939+ continue ;
940+ }
941+ if ( isBookmarkNode ( child ) ) {
942+ const prevRowIndex = rowCursor > 0 ? rowCursor - 1 : null ;
943+ const nextRowIndex = rowCursor < rows . length ? rowCursor : null ;
944+ const row = ( nextRowIndex ?? prevRowIndex ) != null ? rows [ nextRowIndex ?? prevRowIndex ] : null ;
945+ const rowCellCount = row ?. content ?. length ?? 0 ;
946+ if ( child . type === 'bookmarkStart' ) {
947+ if ( nextRowIndex != null )
948+ addBookmarkToRowCellInlines ( rowCellInlines , nextRowIndex , 'start' , child , rowCellCount ) ;
949+ else if ( prevRowIndex != null )
950+ addBookmarkToRowCellInlines ( rowCellInlines , prevRowIndex , 'end' , child , rowCellCount ) ;
951+ } else {
952+ if ( prevRowIndex != null ) addBookmarkToRowCellInlines ( rowCellInlines , prevRowIndex , 'end' , child , rowCellCount ) ;
953+ else if ( nextRowIndex != null )
954+ addBookmarkToRowCellInlines ( rowCellInlines , nextRowIndex , 'start' , child , rowCellCount ) ;
955+ }
956+ }
957+ }
958+
959+ const updatedRows = rows . map ( ( row , index ) => applyBookmarksToRow ( row , rowCellInlines [ index ] , editor ) ) ;
960+
961+ rowCursor = 0 ;
962+ const content = [ ] ;
963+ for ( const child of tableNode . content ) {
964+ if ( child ?. type === 'tableRow' ) {
965+ content . push ( updatedRows [ rowCursor ] ?? child ) ;
966+ rowCursor += 1 ;
967+ } else if ( ! isBookmarkNode ( child ) ) {
968+ content . push ( child ) ;
969+ }
970+ }
971+
972+ return {
973+ ...tableNode ,
974+ content,
975+ } ;
976+ }
977+
978+ /**
979+ * @param {number } [cellIndex] - If set, insert into this cell; otherwise first (start) or last (end) cell.
980+ */
981+ function insertInlineIntoRow ( rowNode , inlineNodes , editor , position , cellIndex ) {
982+ if ( ! rowNode || ! inlineNodes ?. length ) return rowNode ;
983+
984+ if ( ! Array . isArray ( rowNode . content ) || rowNode . content . length === 0 ) {
985+ const paragraph = { type : 'paragraph' , content : inlineNodes } ;
986+ const newCell = { type : 'tableCell' , content : [ paragraph ] , attrs : { } , marks : [ ] } ;
987+ return { ...rowNode , content : [ newCell ] } ;
988+ }
989+
990+ const lastCellIndex = rowNode . content . length - 1 ;
991+ const targetIndex =
992+ cellIndex != null ? Math . min ( Math . max ( 0 , cellIndex ) , lastCellIndex ) : position === 'end' ? lastCellIndex : 0 ;
993+ const targetCell = rowNode . content [ targetIndex ] ;
994+ const updatedCell = insertInlineIntoCell ( targetCell , inlineNodes , editor , position ) ;
995+
996+ if ( updatedCell === targetCell ) return rowNode ;
997+
998+ const nextContent = rowNode . content . slice ( ) ;
999+ nextContent [ targetIndex ] = updatedCell ;
1000+ return { ...rowNode , content : nextContent } ;
1001+ }
1002+
1003+ function findTextblockIndex ( content , editor , fromEnd ) {
1004+ const start = fromEnd ? content . length - 1 : 0 ;
1005+ const end = fromEnd ? - 1 : content . length ;
1006+ const step = fromEnd ? - 1 : 1 ;
1007+ for ( let i = start ; fromEnd ? i > end : i < end ; i += step ) {
1008+ if ( isTextblockNode ( content [ i ] , editor ) ) return i ;
1009+ }
1010+ return - 1 ;
1011+ }
1012+
1013+ function insertInlineIntoCell ( cellNode , inlineNodes , editor , position ) {
1014+ if ( ! cellNode || ! inlineNodes ?. length ) return cellNode ;
1015+
1016+ const content = Array . isArray ( cellNode . content ) ? cellNode . content . slice ( ) : [ ] ;
1017+ const targetIndex = findTextblockIndex ( content , editor , position === 'end' ) ;
1018+
1019+ if ( targetIndex === - 1 ) {
1020+ const paragraph = { type : 'paragraph' , content : inlineNodes } ;
1021+ if ( position === 'end' ) content . push ( paragraph ) ;
1022+ else content . unshift ( paragraph ) ;
1023+ return { ...cellNode , content } ;
1024+ }
1025+
1026+ const targetBlock = content [ targetIndex ] || { type : 'paragraph' , content : [ ] } ;
1027+ const blockContent = Array . isArray ( targetBlock . content ) ? targetBlock . content . slice ( ) : [ ] ;
1028+ const nextBlockContent = position === 'end' ? blockContent . concat ( inlineNodes ) : inlineNodes . concat ( blockContent ) ;
1029+
1030+ content [ targetIndex ] = { ...targetBlock , content : nextBlockContent } ;
1031+ return { ...cellNode , content } ;
1032+ }
1033+
1034+ function isBookmarkNode ( node ) {
1035+ const typeName = node ?. type ;
1036+ return typeName === 'bookmarkStart' || typeName === 'bookmarkEnd' ;
1037+ }
1038+
1039+ function isTextblockNode ( node , editor ) {
1040+ const typeName = node ?. type ;
1041+ if ( ! typeName ) return false ;
1042+ const nodeType = editor ?. schema ?. nodes ?. [ typeName ] ;
1043+ if ( nodeType && typeof nodeType . isTextblock === 'boolean' ) return nodeType . isTextblock ;
1044+ return typeName === 'paragraph' ;
1045+ }
1046+
8471047/**
8481048 * Reconstruct original OOXML for preservable inline nodes using their attribute decoders.
8491049 *
0 commit comments