-
Notifications
You must be signed in to change notification settings - Fork 137
fix(paste): detect heading levels from Google Docs styled paragraphs #2178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
167f5aa
fbc19c7
ab90150
0ec3a54
11c24d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -5,6 +5,17 @@ import { createSingleItemList } from '../html/html-helpers.js'; | |||||||||||||
| import { getLvlTextForGoogleList, googleNumDefMap } from '../../helpers/pasteListHelpers.js'; | ||||||||||||||
| import { wrapTextsInRuns } from '../docx-paste/docx-paste.js'; | ||||||||||||||
|
|
||||||||||||||
| // Ordered largest → smallest; first match wins. | ||||||||||||||
| const headingSizeMap = [ | ||||||||||||||
| { minPt: 20, tag: 'h1' }, | ||||||||||||||
| { minPt: 16, tag: 'h2' }, | ||||||||||||||
| { minPt: 14, tag: 'h3' }, | ||||||||||||||
| { minPt: 12, tag: 'h4' }, | ||||||||||||||
| { minPt: 10, tag: 'h5' }, | ||||||||||||||
| ]; | ||||||||||||||
|
|
||||||||||||||
| const boldWeightRegex = /^(bold|700|800|900)$/i; | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
| * Main handler for pasted Google Docs content. | ||||||||||||||
| * | ||||||||||||||
|
|
@@ -21,7 +32,9 @@ export const handleGoogleDocsHtml = (html, editor, view) => { | |||||||||||||
| const tempDiv = document.createElement('div'); | ||||||||||||||
| tempDiv.innerHTML = cleanedHtml; | ||||||||||||||
|
|
||||||||||||||
| const htmlWithMergedLists = mergeSeparateLists(tempDiv); | ||||||||||||||
| const tempDivWithHeadings = convertStyledHeadings(tempDiv); | ||||||||||||||
|
|
||||||||||||||
| const htmlWithMergedLists = mergeSeparateLists(tempDivWithHeadings); | ||||||||||||||
|
Comment on lines
+36
to
+38
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Google Docs clipboard already uses
Suggested change
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey @caio-pizzol, I want to make sure I'm not missing something here, because this seems to contradict the issue #2152 itself. The issue description states:
That's exactly the pattern this PR detects. So could you clarify? Did something change in how Google Docs serializes to clipboard, or was the issue description inaccurate? If Google Docs already outputs semantic heading tags upfront, then the issue itself would be invalid and need to be closed, but the repro steps suggest otherwise.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey @ErickPetru, you're right to question it - the issue description is wrong, and that's on us for not verifying the assumption before writing it up. I just tested both paste flows with debug logging on the clipboard HTML:
Both sources already output proper heading tags in the clipboard. ProseMirror handles them natively — no conversion needed. The Sorry for the confusion on this - but appreciate your patience working through the reviews. If you're gained, there are other |
||||||||||||||
| const flattenHtml = flattenListsInHtml(htmlWithMergedLists, editor); | ||||||||||||||
|
|
||||||||||||||
| let doc = DOMParser.fromSchema(editor.schema).parse(flattenHtml); | ||||||||||||||
|
|
@@ -253,3 +266,68 @@ function buildListPath(level, map) { | |||||||||||||
| } | ||||||||||||||
| return path; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
| * Converts Google Docs styled <p> elements that represent headings into proper | ||||||||||||||
| * <h1>–<h5> tags before ProseMirror parsing. | ||||||||||||||
| * | ||||||||||||||
| * Google Docs converts heading levels to <p> tags with inline font-size / | ||||||||||||||
| * font-weight styling instead of semantic heading tags. This function detects | ||||||||||||||
| * that pattern and replaces the elements in-place. | ||||||||||||||
| * | ||||||||||||||
| * @param {HTMLElement} container | ||||||||||||||
| */ | ||||||||||||||
| function convertStyledHeadings(container) { | ||||||||||||||
| const paragraphs = Array.from(container.querySelectorAll('p')).filter( | ||||||||||||||
| (p) => p.parentElement?.tagName?.toLowerCase() !== 'li', | ||||||||||||||
| ); | ||||||||||||||
|
|
||||||||||||||
| paragraphs.forEach((p) => { | ||||||||||||||
| const { fontSize, isBold } = getHeadingStyleProps(p); | ||||||||||||||
| if (!isBold || fontSize === null) return; | ||||||||||||||
|
|
||||||||||||||
| const match = headingSizeMap.find(({ minPt }) => fontSize >= minPt); | ||||||||||||||
| if (!match) return; | ||||||||||||||
|
|
||||||||||||||
| const heading = document.createElement(match.tag); | ||||||||||||||
| heading.innerHTML = p.innerHTML; | ||||||||||||||
| Array.from(p.attributes).forEach((attr) => heading.setAttribute(attr.name, attr.value)); | ||||||||||||||
| p.replaceWith(heading); | ||||||||||||||
| }); | ||||||||||||||
|
|
||||||||||||||
| return container; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
| * Reads font-size (in pt) and bold status from an element's inline style. | ||||||||||||||
| * Checks both the element itself and its first child <span> to cover both | ||||||||||||||
| * Google Docs style placements (style on <p> vs. style on inner <span>). | ||||||||||||||
| * | ||||||||||||||
| * @param {HTMLElement} el | ||||||||||||||
| * @returns {{ fontSize: number|null, isBold: boolean }} | ||||||||||||||
| */ | ||||||||||||||
| function getHeadingStyleProps(el) { | ||||||||||||||
| const fontSize = parsePtValue(el.style.fontSize); | ||||||||||||||
| const isBoldOnEl = boldWeightRegex.test(el.style.fontWeight || ''); | ||||||||||||||
|
|
||||||||||||||
| const { children } = el; | ||||||||||||||
| const singleSpan = children.length === 1 && children[0].tagName?.toLowerCase() === 'span' ? children[0] : null; | ||||||||||||||
|
|
||||||||||||||
| return { | ||||||||||||||
| fontSize: fontSize ?? parsePtValue(singleSpan?.style.fontSize), | ||||||||||||||
| isBold: isBoldOnEl || boldWeightRegex.test(singleSpan?.style.fontWeight || ''), | ||||||||||||||
| }; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /** | ||||||||||||||
| * Parses a CSS font-size value in pt units, e.g. "20pt" → 20. Returns null | ||||||||||||||
| * for any other format. | ||||||||||||||
| * | ||||||||||||||
| * @param {string|undefined} cssValue | ||||||||||||||
| * @returns {number|null} | ||||||||||||||
| */ | ||||||||||||||
| function parsePtValue(cssValue) { | ||||||||||||||
| if (!cssValue) return null; | ||||||||||||||
| const m = cssValue.match(/^([\d.]+)pt$/i); | ||||||||||||||
| return m ? parseFloat(m[1]) : null; | ||||||||||||||
| } | ||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a comment saying these numbers come from Google Docs default heading sizes (H1=20pt, H2=16pt, etc.) would help — not obvious where they came from otherwise.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good suggestion; what's obvious to one developer might not be to another.