Skip to content

Commit f891c40

Browse files
authored
feat(lists paste): handle paste from Google docs (#779)
* feat(lists paste): handle paste from Google docs
1 parent 5e944d2 commit f891c40

7 files changed

Lines changed: 218 additions & 11 deletions

File tree

packages/super-editor/src/core/InputRule.js

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { getTextContentFromNodes } from './helpers/getTextContentFromNodes.js';
77
import { isRegExp } from './utilities/isRegExp.js';
88
import { handleDocxPaste } from './inputRules/docx-paste/docx-paste.js';
99
import { flattenListsInHtml } from './inputRules/html/html-helpers.js';
10+
import { handleGoogleDocsHtml } from './inputRules/google-docs-paste/google-docs-paste.js';
1011

1112
export class InputRule {
1213
match;
@@ -231,15 +232,22 @@ export function isWordHtml(html) {
231232
);
232233
}
233234

235+
function isGoogleDocsHtml(html) {
236+
return /docs-internal-guid-/.test(html);
237+
}
238+
234239
/**
235240
* Handle HTML paste events.
236241
*
237242
* @param {String} html The HTML string to be pasted.
238243
* @param {Editor} editor The editor instance.
244+
* @param {String} source HTML content source
239245
* @returns {Boolean} Returns true if the paste was handled.
240246
*/
241-
export function handleHtmlPaste(html, editor) {
242-
const cleanedHtml = htmlHandler(html, editor);
247+
export function handleHtmlPaste(html, editor, source) {
248+
let cleanedHtml;
249+
if (source === 'google-docs') cleanedHtml = handleGoogleDocsHtml(html, editor);
250+
else cleanedHtml = htmlHandler(html, editor);
243251
const doc = PMDOMParser.fromSchema(editor.schema).parse(cleanedHtml);
244252

245253
const { dispatch, state } = editor.view;
@@ -378,6 +386,8 @@ export function handleClipboardPaste({ editor, view }, html) {
378386
source = 'plain-text';
379387
} else if (isWordHtml(html)) {
380388
source = 'word-html';
389+
} else if (isGoogleDocsHtml(html)) {
390+
source = 'google-docs';
381391
} else {
382392
source = 'browser-html';
383393
}
@@ -391,6 +401,9 @@ export function handleClipboardPaste({ editor, view }, html) {
391401
if (editor.options.mode === 'docx') {
392402
return handleDocxPaste(html, editor, view);
393403
}
404+
break;
405+
case 'google-docs':
406+
return handleGoogleDocsHtml(html, editor, view);
394407
// falls through to browser-html handling when not in DOCX mode
395408
case 'browser-html':
396409
return handleHtmlPaste(html, editor);

packages/super-editor/src/core/helpers/orderedListUtils.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const listIndexMap = {
2323

2424
const createNumbering = (values, lvlText) => {
2525
return values.reduce((acc, value, index) => {
26-
return acc.replace(`%${index + 1}`, value);
26+
return value > 9 ? acc.replace(/^0/, '').replace(`%${index + 1}`, value) : acc.replace(`%${index + 1}`, value);
2727
}, lvlText);
2828
};
2929

packages/super-editor/src/core/helpers/pasteListHelpers.js

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
export const extractListLevelStyles = (cssText, listId, level) => {
2-
const pattern = new RegExp(`@list\\s+l${listId}:level${level}\\s*\\{([^}]+)\\}`, 'i');
1+
export const extractListLevelStyles = (cssText, listId, level, numId) => {
2+
const pattern = new RegExp(`@list\\s+l${listId}:level${level}(?:\\s+lfo${numId})?\\s*\\{([^}]+)\\}`, 'i');
33
const match = cssText.match(pattern);
44
if (!match) return null;
55

@@ -77,3 +77,30 @@ export const startHelperMap = new Map([
7777
['upperRoman', getStartNumberFromRoman],
7878
['bullet', () => 1],
7979
]);
80+
81+
export const googleNumDefMap = new Map([
82+
['decimal', 'decimal'],
83+
['decimal-leading-zero', 'decimal'],
84+
['lower-alpha', 'lowerLetter'],
85+
['upper-alpha', 'upperLetter'],
86+
['lower-roman', 'lowerRoman'],
87+
['upper-roman', 'upperRoman'],
88+
['bullet', 'bullet'],
89+
]);
90+
91+
export const getLvlTextForGoogleList = (fmt, level, editor) => {
92+
const bulletListDef = editor.converter.numbering.abstracts[0];
93+
const bulletDefForLevel = bulletListDef.elements.find(
94+
(el) => el.name === 'w:lvl' && el.attributes?.['w:ilvl'] === (level - 1).toString(),
95+
);
96+
const bulletLvlText = bulletDefForLevel.elements.find((el) => el.name === 'w:lvlText')?.attributes?.['w:val'];
97+
98+
switch (fmt) {
99+
case 'decimal-leading-zero':
100+
return `0%${level}.`;
101+
case 'bullet':
102+
return bulletLvlText;
103+
default:
104+
return `%${level}.`;
105+
}
106+
};

packages/super-editor/src/core/inputRules/docx-paste/docx-paste.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ export const handleDocxPaste = (html, editor, view) => {
4444

4545
if (msoListMatch) {
4646
const [, abstractId, level, numId] = msoListMatch;
47-
const styles = extractListLevelStyles(css, abstractId, level);
47+
const styles = extractListLevelStyles(css, abstractId, level, numId) || {};
4848
let start, numFmt, lvlText;
4949

5050
if (type === 'listItem') {
@@ -58,7 +58,7 @@ export const handleDocxPaste = (html, editor, view) => {
5858
// Get numbering format from Word styles
5959
const msoNumFormat = styles['mso-level-number-format'] || 'decimal';
6060
numFmt = numDefMap.get(msoNumFormat);
61-
const punc = item.children[0]?.innerText?.slice(-1) || '.';
61+
const punc = item.innerText?.match(/^\s*[a-zA-Z0-9]+([.()])/i)?.[1] || '.';
6262
lvlText = numFmt === 'bullet' ? normalizeLvlTextChar(styles['mso-level-text']) : `%${level}${punc}`;
6363

6464
const startGetter = startHelperMap.get(numFmt);
@@ -83,6 +83,7 @@ export const handleDocxPaste = (html, editor, view) => {
8383

8484
transformWordLists(tempDiv, editor);
8585
const doc = DOMParser.fromSchema(editor.schema).parse(tempDiv);
86+
8687
tempDiv.remove();
8788

8889
const { dispatch } = editor.view;
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import { DOMParser } from 'prosemirror-model';
2+
import { convertEmToPt, sanitizeHtml } from '../../InputRule.js';
3+
import { ListHelpers } from '../../helpers/list-numbering-helpers.js';
4+
import { createSingleItemList } from '../html/html-helpers.js';
5+
import { getLvlTextForGoogleList, googleNumDefMap } from '../../helpers/pasteListHelpers.js';
6+
7+
/**
8+
* Main handler for pasted Google Docs content.
9+
*
10+
* @param {string} html The string being pasted
11+
* @param {Editor} editor The SuperEditor instance
12+
* @param {Object} view The ProseMirror view
13+
* @returns
14+
*/
15+
export const handleGoogleDocsHtml = (html, editor, view) => {
16+
// convert lists
17+
const htmlWithPtSizing = convertEmToPt(html);
18+
const cleanedHtml = sanitizeHtml(htmlWithPtSizing).innerHTML;
19+
20+
const tempDiv = document.createElement('div');
21+
tempDiv.innerHTML = cleanedHtml;
22+
23+
const htmlWithMergedLists = mergeSeparateLists(tempDiv);
24+
const flattenHtml = flattenListsInHtml(htmlWithMergedLists, editor);
25+
26+
const doc = DOMParser.fromSchema(editor.schema).parse(flattenHtml);
27+
tempDiv.remove();
28+
29+
const { dispatch } = editor.view;
30+
if (!dispatch) return false;
31+
32+
dispatch(view.state.tr.replaceSelectionWith(doc, true));
33+
return true;
34+
};
35+
36+
/**
37+
* Flattens lists to ensure each list contains exactly ONE list item.
38+
*/
39+
function flattenListsInHtml(container, editor) {
40+
// Keep processing until all lists are flattened
41+
let foundList;
42+
while ((foundList = findListToFlatten(container))) {
43+
flattenFoundList(foundList, editor);
44+
}
45+
46+
return container;
47+
}
48+
49+
/**
50+
* Finds lists to be flattened
51+
*/
52+
function findListToFlatten(container) {
53+
// First priority: unprocessed lists
54+
let list = container.querySelector('ol:not([data-list-id]), ul:not([data-list-id])');
55+
if (list) return list;
56+
57+
return null;
58+
}
59+
60+
/**
61+
* Flattens a single list by:
62+
* 1. Ensuring it has proper data-list-id
63+
* 2. Splitting multi-item lists into single-item lists
64+
* 3. Extracting nested lists and processing them recursively
65+
*/
66+
function flattenFoundList(listElem, editor) {
67+
let NodeInterface;
68+
if (editor.options.mockDocument) {
69+
const win = editor.options.mockDocument.defaultView;
70+
NodeInterface = win.Node;
71+
} else {
72+
NodeInterface = window.Node;
73+
}
74+
75+
const tag = listElem.tagName.toLowerCase();
76+
const rootListLevel = Number(listElem.children[0].getAttribute('aria-level'));
77+
const rootListFmt = listElem.children[0].style['list-style-type'] || 'decimal';
78+
const start = listElem.getAttribute('start') || 1;
79+
80+
// Google docs list doesn't have numId
81+
const rootNumId = ListHelpers.getNewListId(editor);
82+
83+
ListHelpers.generateNewListDefinition({
84+
numId: rootNumId,
85+
listType: tag === 'ol' ? 'orderedList' : 'bulletList',
86+
editor,
87+
fmt: googleNumDefMap.get(rootListFmt),
88+
level: (rootListLevel - 1).toString(),
89+
start,
90+
text: getLvlTextForGoogleList(rootListFmt, rootListLevel, editor),
91+
});
92+
93+
// Create single-item lists for each item
94+
const newLists = [];
95+
96+
// Get all direct <li> children
97+
const items = Array.from(listElem.children).filter((c) => c.tagName.toLowerCase() === 'li');
98+
99+
items.forEach((li) => {
100+
const level = Number(li.getAttribute('aria-level')) - 1;
101+
const listLevel = [level + 1];
102+
const nestedLists = getNestedLists([li.nextSibling]);
103+
104+
// Create a new single-item list for this li
105+
const newList = createSingleItemList({ li, tag, rootNumId, level, listLevel, editor, NodeInterface });
106+
newLists.push(newList);
107+
108+
nestedLists.forEach((list) => {
109+
newLists.push(list.cloneNode(true));
110+
});
111+
if (nestedLists.length && ['OL', 'UL'].includes(li.nextSibling.tagName)) {
112+
li.nextSibling?.remove();
113+
}
114+
});
115+
116+
// Replace the original list with the new single-item lists
117+
const parent = listElem.parentNode;
118+
const nextSibling = listElem.nextSibling;
119+
parent.removeChild(listElem);
120+
121+
newLists.forEach((list) => {
122+
parent.insertBefore(list, nextSibling);
123+
});
124+
}
125+
126+
/**
127+
* Recursive helper to find all nested lists for the list item
128+
*/
129+
function getNestedLists(nodes) {
130+
let result = [];
131+
132+
const nodesArray = Array.from(nodes).filter((n) => n !== null);
133+
134+
for (let item of nodesArray) {
135+
if (item.tagName === 'OL' || item.tagName === 'UL') {
136+
result.push(item);
137+
result.push(...getNestedLists(item.children));
138+
}
139+
}
140+
141+
return result;
142+
}
143+
144+
/**
145+
* Method that combines separate lists with sequential start attribute into one list
146+
* Google Docs list items could be presented as separate lists with sequential start attribute
147+
*/
148+
function mergeSeparateLists(container) {
149+
const tempCont = container.cloneNode(true);
150+
151+
const rootLevelLists = Array.from(tempCont.querySelectorAll('ol:not(ol ol):not(ul ol)') || []);
152+
const mainList = rootLevelLists.find((list) => !list.getAttribute('start'));
153+
const hasStartAttr = rootLevelLists.some((list) => list.getAttribute('start') !== null);
154+
155+
if (hasStartAttr) {
156+
const listsWithStartAttr = rootLevelLists.filter((list) => list.getAttribute('start') !== null);
157+
for (let [index, item] of listsWithStartAttr.entries()) {
158+
if (item.getAttribute('start') === (index + 2).toString()) {
159+
mainList.append(...item.childNodes);
160+
item.remove();
161+
}
162+
}
163+
}
164+
165+
return tempCont;
166+
}

packages/super-editor/src/core/inputRules/html/html-helpers.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ function flattenFoundList(listElem, editor, NodeInterface) {
105105
nestedLists.forEach((nl) => nl.parentNode.removeChild(nl));
106106

107107
// Create a new single-item list for this li
108-
const newList = createSingleItemList(li, tag, rootNumId, level, editor, NodeInterface);
108+
const newList = createSingleItemList({ li, tag, rootNumId, level, editor, NodeInterface });
109109
newLists.push(newList);
110110

111111
// Add the nested lists (they'll be processed in the next iteration)
@@ -127,7 +127,7 @@ function flattenFoundList(listElem, editor, NodeInterface) {
127127
/**
128128
* Creates a single-item list from an <li> element
129129
*/
130-
function createSingleItemList(li, tag, rootNumId, level, editor, NodeInterface) {
130+
export function createSingleItemList({ li, tag, rootNumId, level, listLevel, editor, NodeInterface }) {
131131
const localDoc = li.ownerDocument;
132132
const ELEMENT_NODE = NodeInterface.ELEMENT_NODE;
133133
const TEXT_NODE = NodeInterface.TEXT_NODE;
@@ -163,7 +163,7 @@ function createSingleItemList(li, tag, rootNumId, level, editor, NodeInterface)
163163

164164
newLi.setAttribute('data-num-fmt', listNumberingType);
165165
newLi.setAttribute('data-lvl-text', lvlText || '');
166-
newLi.setAttribute('data-list-level', JSON.stringify([level + 1]));
166+
newLi.setAttribute('data-list-level', JSON.stringify(listLevel || [level + 1]));
167167

168168
// Copy content from original li
169169
Array.from(li.childNodes).forEach((node) => {

packages/super-editor/src/core/super-converter/v2/importer/listImporter.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ export const generateListPath = (level, numId, styleId, levels, docx) => {
389389
if (iLvl > 0) {
390390
for (let i = iLvl; i >= 0; i--) {
391391
const { start: lvlStart } = getListLevelDefinitionTag(numId, i, styleId, docx);
392-
if (!levels[i]) levels[i] = Number(lvlStart);
392+
if (!levels[i]) levels[i] = Number(lvlStart) || 1;
393393
path.unshift(levels[i]);
394394
}
395395
}

0 commit comments

Comments
 (0)