Skip to content

Commit 167f5aa

Browse files
committed
fix(paste): detect heading levels from Google Docs styled paragraphs
1 parent b3a2912 commit 167f5aa

2 files changed

Lines changed: 146 additions & 1 deletion

File tree

packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.js

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ import { createSingleItemList } from '../html/html-helpers.js';
55
import { getLvlTextForGoogleList, googleNumDefMap } from '../../helpers/pasteListHelpers.js';
66
import { wrapTextsInRuns } from '../docx-paste/docx-paste.js';
77

8+
// Ordered largest → smallest; first match wins.
9+
const headingSizeMap = [
10+
{ minPt: 20, tag: 'h1' },
11+
{ minPt: 16, tag: 'h2' },
12+
{ minPt: 14, tag: 'h3' },
13+
{ minPt: 12, tag: 'h4' },
14+
{ minPt: 10, tag: 'h5' },
15+
];
16+
17+
const boldWeightRegex = /^(bold|700|800|900)$/i;
18+
819
/**
920
* Main handler for pasted Google Docs content.
1021
*
@@ -21,7 +32,9 @@ export const handleGoogleDocsHtml = (html, editor, view) => {
2132
const tempDiv = document.createElement('div');
2233
tempDiv.innerHTML = cleanedHtml;
2334

24-
const htmlWithMergedLists = mergeSeparateLists(tempDiv);
35+
const tempDivWithHeadings = convertStyledHeadings(tempDiv);
36+
37+
const htmlWithMergedLists = mergeSeparateLists(tempDivWithHeadings);
2538
const flattenHtml = flattenListsInHtml(htmlWithMergedLists, editor);
2639

2740
let doc = DOMParser.fromSchema(editor.schema).parse(flattenHtml);
@@ -253,3 +266,60 @@ function buildListPath(level, map) {
253266
}
254267
return path;
255268
}
269+
270+
/**
271+
* Converts Google Docs styled <p> elements that represent headings into proper
272+
* <h1>–<h5> tags before ProseMirror parsing.
273+
*
274+
* Google Docs converts heading levels to <p> tags with inline font-size /
275+
* font-weight styling instead of semantic heading tags. This function detects
276+
* that pattern and replaces the elements in-place.
277+
*
278+
* @param {HTMLElement} container
279+
*/
280+
function convertStyledHeadings(container) {
281+
const paragraphs = Array.from(container.querySelectorAll('p'));
282+
283+
paragraphs.forEach((p) => {
284+
const { fontSize, isBold } = getHeadingStyleProps(p);
285+
if (!isBold || fontSize === null) return;
286+
287+
const match = headingSizeMap.find(({ minPt }) => fontSize >= minPt);
288+
if (!match) return;
289+
290+
const heading = document.createElement(match.tag);
291+
heading.innerHTML = p.innerHTML;
292+
Array.from(p.attributes).forEach((attr) => heading.setAttribute(attr.name, attr.value));
293+
p.replaceWith(heading);
294+
});
295+
296+
return container;
297+
}
298+
299+
/**
300+
* Reads font-size (in pt) and bold status from an element's inline style.
301+
* Checks both the element itself and its first child <span> to cover both
302+
* Google Docs style placements (style on <p> vs. style on inner <span>).
303+
*
304+
* @param {HTMLElement} el
305+
* @returns {{ fontSize: number|null, isBold: boolean }}
306+
*/
307+
function getHeadingStyleProps(el) {
308+
const span = el.querySelector('span');
309+
const fontSize = parsePtValue(el.style.fontSize) ?? parsePtValue(span?.style.fontSize);
310+
const isBold = boldWeightRegex.test(el.style.fontWeight || '') || boldWeightRegex.test(span?.style.fontWeight || '');
311+
return { fontSize, isBold };
312+
}
313+
314+
/**
315+
* Parses a CSS font-size value in pt units, e.g. "20pt" → 20. Returns null
316+
* for any other format.
317+
*
318+
* @param {string|undefined} cssValue
319+
* @returns {number|null}
320+
*/
321+
function parsePtValue(cssValue) {
322+
if (!cssValue) return null;
323+
const m = cssValue.match(/^([\d.]+)pt$/i);
324+
return m ? parseFloat(m[1]) : null;
325+
}

packages/super-editor/src/core/inputRules/google-docs-paste/google-docs-paste.test.js

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,79 @@ describe('handleGoogleDocsHtml', () => {
9595
expect(replaceSelectionWith).toHaveBeenCalledWith(parseResult, true);
9696
expect(dispatch).toHaveBeenCalledWith('next');
9797
});
98+
99+
describe('convertStyledHeadings', () => {
100+
function makeEditor(dispatch, replaceSelectionWith) {
101+
return {
102+
editor: { schema: {}, view: { dispatch }, options: {} },
103+
view: { state: { tr: { replaceSelectionWith } } },
104+
};
105+
}
106+
107+
function parseHeadings(html) {
108+
const dispatch = vi.fn();
109+
const replaceSelectionWith = vi.fn(() => 'next');
110+
const { editor, view } = makeEditor(dispatch, replaceSelectionWith);
111+
handleGoogleDocsHtml(html, editor, view);
112+
return parseSpy.mock.calls[0][0];
113+
}
114+
115+
it('converts bold <p> with large font-size to heading tags', () => {
116+
const html = `
117+
<p style="font-size:20pt;font-weight:700">Heading 1</p>
118+
<p style="font-size:16pt;font-weight:bold">Heading 2</p>
119+
<p style="font-size:14pt;font-weight:700">Heading 3</p>
120+
<p style="font-size:12pt;font-weight:700">Heading 4</p>
121+
<p style="font-size:11pt;font-weight:700">Heading 5</p>
122+
`;
123+
const dom = parseHeadings(html);
124+
expect(dom.querySelector('h1')?.textContent?.trim()).toBe('Heading 1');
125+
expect(dom.querySelector('h2')?.textContent?.trim()).toBe('Heading 2');
126+
expect(dom.querySelector('h3')?.textContent?.trim()).toBe('Heading 3');
127+
expect(dom.querySelector('h4')?.textContent?.trim()).toBe('Heading 4');
128+
expect(dom.querySelector('h5')?.textContent?.trim()).toBe('Heading 5');
129+
});
130+
131+
it('converts when style is on a child <span> instead of the <p>', () => {
132+
const html = `
133+
<p><span style="font-size:20pt;font-weight:700">Heading from span</span></p>
134+
`;
135+
const dom = parseHeadings(html);
136+
expect(dom.querySelector('h1')?.textContent?.trim()).toBe('Heading from span');
137+
expect(dom.querySelector('p')).toBeNull();
138+
});
139+
140+
it('does not convert non-bold paragraphs', () => {
141+
const html = `<p style="font-size:20pt">Not a heading</p>`;
142+
const dom = parseHeadings(html);
143+
expect(dom.querySelector('h1')).toBeNull();
144+
expect(dom.querySelector('p')?.textContent?.trim()).toBe('Not a heading');
145+
});
146+
147+
it('does not convert bold paragraphs with small font-size', () => {
148+
const html = `<p style="font-size:9pt;font-weight:700">Small bold</p>`;
149+
const dom = parseHeadings(html);
150+
expect(dom.querySelector('h1,h2,h3,h4,h5')).toBeNull();
151+
});
152+
153+
it('handles large font-sizes from alternate Google Docs themes (e.g. 24pt → h1)', () => {
154+
const html = `<p style="font-size:24pt;font-weight:700">Big Heading</p>`;
155+
const dom = parseHeadings(html);
156+
expect(dom.querySelector('h1')?.textContent?.trim()).toBe('Big Heading');
157+
});
158+
159+
it('converts when font-size is on <p> but font-weight is only on the child <span>', () => {
160+
const html = `
161+
<p style="font-size:20pt"><span style="font-weight:700">Split style heading</span></p>
162+
`;
163+
const dom = parseHeadings(html);
164+
expect(dom.querySelector('h1')?.textContent?.trim()).toBe('Split style heading');
165+
});
166+
167+
it('preserves attributes from the original <p> on the new heading element', () => {
168+
const html = `<p style="font-size:20pt;font-weight:700" data-custom="yes">With attr</p>`;
169+
const dom = parseHeadings(html);
170+
expect(dom.querySelector('h1')?.getAttribute('data-custom')).toBe('yes');
171+
});
172+
});
98173
});

0 commit comments

Comments
 (0)