From 81dca3ed434ad22d9c27d3da2ab69ec99627352c Mon Sep 17 00:00:00 2001 From: ella Date: Tue, 5 May 2026 14:27:52 +0200 Subject: [PATCH 1/2] Block parser: Share parsed innerHTML across validation fixes Invalid blocks went through `applyBuiltInValidationFixes` and the deprecation loop re-parsing the same `originalContent` repeatedly: each fix wrapped innerHTML in a synthetic `
` and re-parsed to read a single root-element attribute, and every deprecation iteration re-parsed via `getBlockAttributes`. Parse `innerHTML` once in `parseRawBlock`, deep-clone the result so it's insulated from hpq's shared document body, and thread it down through the validation and deprecation paths. Fixes now read attributes directly off the pre-parsed root. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/blocks/README.md | 1 + .../parser/apply-block-deprecated-versions.ts | 24 ++++++--- .../parser/apply-built-in-validation-fixes.ts | 34 ++++++++++--- .../src/api/parser/fix-custom-classname.ts | 42 +++++++++------ .../src/api/parser/fix-global-attribute.ts | 24 ++++++--- .../src/api/parser/get-block-attributes.ts | 15 ++++-- packages/blocks/src/api/parser/index.ts | 51 +++++++++++++++---- 7 files changed, 141 insertions(+), 50 deletions(-) diff --git a/packages/blocks/README.md b/packages/blocks/README.md index ee80df05735c50..bf96cac02f3486 100644 --- a/packages/blocks/README.md +++ b/packages/blocks/README.md @@ -97,6 +97,7 @@ _Parameters_ - _blockTypeOrName_ `string | BlockType`: Block type or name. - _innerHTML_ `string | Node`: Raw block content. - _attributes_ `Record< string, unknown >`: Known block attributes (from delimiters). +- _parsedBody_ `Node`: Optional pre-parsed DOM node for innerHTML. When provided, the internal HTML parse is skipped. Useful for sharing a single parse across multiple callers operating on the same innerHTML string (e.g. block validation, deprecation iteration). Note: hpq uses a single shared document body, so a parsed node held across other parses will be detached. Detached nodes still respond correctly to attribute and class reads. _Returns_ diff --git a/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts b/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts index e4f5ce434f6adc..01957e8eadee31 100644 --- a/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts +++ b/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts @@ -22,18 +22,24 @@ function stubFalse(): boolean { * deprecated migrations applied, or the original block if it was both valid * and no eligible migrations exist. * - * @param block Parsed and invalid block object. - * @param rawBlock Raw block object. - * @param blockType Block type. This is normalize not necessary and - * can be inferred from the block name, - * but it's here for performance reasons. + * @param block Parsed and invalid block object. + * @param rawBlock Raw block object. + * @param blockType Block type. This is normalize not necessary and + * can be inferred from the block name, + * but it's here for performance reasons. + * @param parsedBody Pre-parsed DOM body for `block.originalContent`, if + * available. Shared across loop iterations (passed to + * `getBlockAttributes` to skip re-parsing, and to + * `applyBuiltInValidationFixes` which extracts the root + * element from it for the per-fix attribute reads). * * @return Migrated block object. */ export function applyBlockDeprecatedVersions( block: Block, rawBlock: RawBlock, - blockType: BlockType + blockType: BlockType, + parsedBody?: Element | null ): Block { const parsedAttributes = rawBlock.attrs ?? {}; const { deprecated: deprecatedDefinitions } = blockType; @@ -79,7 +85,8 @@ export function applyBlockDeprecatedVersions( attributes: getBlockAttributes( deprecatedBlockType, block.originalContent ?? '', - parsedAttributes + parsedAttributes, + parsedBody ), }; @@ -90,7 +97,8 @@ export function applyBlockDeprecatedVersions( if ( ! isValid ) { migratedBlock = applyBuiltInValidationFixes( migratedBlock, - deprecatedBlockType + deprecatedBlockType, + parsedBody ); [ isValid ] = validateBlock( migratedBlock, deprecatedBlockType ); } diff --git a/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts b/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts index dd1b4feca76042..c3b841edbeda91 100644 --- a/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts +++ b/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts @@ -23,25 +23,41 @@ const ANCHOR_ATTR_SCHEMA: BlockAttribute = { * Attempts to fix block invalidation by applying build-in validation fixes * like moving all extra classNames to the className attribute. * - * @param block block object. - * @param blockType Block type. This is normalize not necessary and - * can be inferred from the block name, - * but it's here for performance reasons. + * @param block block object. + * @param blockType Block type. This is normalize not necessary and + * can be inferred from the block name, + * but it's here for performance reasons. + * @param parsedBody Pre-parsed body element of `block.originalContent`, if + * available. When provided, the fixes read attributes + * directly off the body's first element child instead of + * re-parsing originalContent for each fix. * * @return Fixed block object */ export function applyBuiltInValidationFixes( block: Block, - blockType: BlockType + blockType: BlockType, + parsedBody?: Element | null ): Block { const { attributes, originalContent } = block; let updatedBlockAttributes = attributes; + // Extract the root element once: every fix below reads attributes off the + // block's outermost element. `undefined` here means "no pre-parsed body + // supplied" — the fixes will fall back to parsing originalContent + // themselves. `null` means "we have a parsed body but it has no element + // child" (e.g. text-only innerHTML). + const rootElement = + parsedBody !== undefined + ? parsedBody?.firstElementChild ?? null + : undefined; + // Fix block invalidation for className attribute. updatedBlockAttributes = fixCustomClassname( attributes, blockType, - originalContent ?? '' + originalContent ?? '', + rootElement ); // Fix block invalidation for ariaLabel attribute. updatedBlockAttributes = fixGlobalAttribute( @@ -50,7 +66,8 @@ export function applyBuiltInValidationFixes( originalContent ?? '', 'ariaLabel', 'data-aria-label', - ARIA_LABEL_ATTR_SCHEMA + ARIA_LABEL_ATTR_SCHEMA, + rootElement ); // Fix block invalidation for anchor attribute. updatedBlockAttributes = fixGlobalAttribute( @@ -59,7 +76,8 @@ export function applyBuiltInValidationFixes( originalContent ?? '', 'anchor', 'data-anchor', - ANCHOR_ATTR_SCHEMA + ANCHOR_ATTR_SCHEMA, + rootElement ); return { diff --git a/packages/blocks/src/api/parser/fix-custom-classname.ts b/packages/blocks/src/api/parser/fix-custom-classname.ts index 795490ec34e04e..bed833fb4bc18d 100644 --- a/packages/blocks/src/api/parser/fix-custom-classname.ts +++ b/packages/blocks/src/api/parser/fix-custom-classname.ts @@ -3,15 +3,18 @@ */ import { hasBlockSupport } from '../registration'; import { getSaveContent } from '../serializer'; -import { parseWithAttributeSchema } from './get-block-attributes'; -import type { BlockAttribute, BlockType } from '../../types'; +import { parseHtml } from './get-block-attributes'; +import type { BlockType } from '../../types'; -const CLASS_ATTR_SCHEMA: BlockAttribute = { - type: 'string', - source: 'attribute', - selector: '[data-custom-class-name] > *', - attribute: 'class', -}; +function splitClassName( className: unknown ): string[] { + return typeof className === 'string' && className + ? className.trim().split( /\s+/ ) + : []; +} + +function getElementClasses( element: Element | null ): string[] { + return splitClassName( element?.getAttribute( 'class' ) ); +} /** * Given an HTML string, returns an array of class names assigned to the root @@ -22,12 +25,8 @@ const CLASS_ATTR_SCHEMA: BlockAttribute = { * @return Array of class names assigned to the root element. */ export function getHTMLRootElementClasses( innerHTML: string ): string[] { - const parsed = parseWithAttributeSchema( - `
${ innerHTML }
`, - CLASS_ATTR_SCHEMA - ) as string | undefined; - - return parsed ? parsed.trim().split( /\s+/ ) : []; + const root = ( parseHtml( innerHTML ) as Element )?.firstElementChild; + return getElementClasses( root ); } /** @@ -39,13 +38,17 @@ export function getHTMLRootElementClasses( innerHTML: string ): string[] { * @param blockAttributes Original block attributes. * @param blockType Block type settings. * @param innerHTML Original block markup. + * @param rootElement Pre-parsed root element of innerHTML, if available. + * When provided, avoids re-parsing innerHTML to read + * the actual classes. * * @return Filtered block attributes. */ export function fixCustomClassname( blockAttributes: Record< string, unknown >, blockType: BlockType, - innerHTML: string + innerHTML: string, + rootElement?: Element | null ): Record< string, unknown > { if ( ! hasBlockSupport( blockType, 'customClassName', true ) ) { return blockAttributes; @@ -59,8 +62,15 @@ export function fixCustomClassname( const { className: omittedClassName, ...attributesSansClassName } = modifiedBlockAttributes; const serialized = getSaveContent( blockType, attributesSansClassName ); + // `getHTMLRootElementClasses` writes the rendered output into hpq's + // shared document body. Callers that pass `rootElement` are insulated by + // the deep clone of `parsedBody` in `parseRawBlock`; without that clone, + // this parse would mutate the shared body under our feet. const defaultClasses = getHTMLRootElementClasses( serialized ); - const actualClasses = getHTMLRootElementClasses( innerHTML ); + const actualClasses = + rootElement !== undefined + ? getElementClasses( rootElement ) + : getHTMLRootElementClasses( innerHTML ); const customClasses = actualClasses.filter( ( className ) => ! defaultClasses.includes( className ) diff --git a/packages/blocks/src/api/parser/fix-global-attribute.ts b/packages/blocks/src/api/parser/fix-global-attribute.ts index e8d555aaa9ec74..790833c5619431 100644 --- a/packages/blocks/src/api/parser/fix-global-attribute.ts +++ b/packages/blocks/src/api/parser/fix-global-attribute.ts @@ -37,6 +37,9 @@ export function getHTMLRootElement( * @param supportKey The block support key to check and attribute key to set. * @param dataAttribute The data attribute name to use as wrapper. * @param attributeSchema The attribute schema configuration. + * @param rootElement Pre-parsed root element of innerHTML, if available. + * When provided, the attribute is read directly off the + * root element, avoiding a wrap-and-reparse. * * @return Filtered block attributes. */ @@ -46,17 +49,26 @@ export function fixGlobalAttribute( innerHTML: string, supportKey: string, dataAttribute: string, - attributeSchema: BlockAttribute + attributeSchema: BlockAttribute, + rootElement?: Element | null ): Record< string, unknown > { if ( ! hasBlockSupport( blockType, supportKey, false ) ) { return blockAttributes; } const modifiedBlockAttributes = { ...blockAttributes }; - const attributeValue = getHTMLRootElement( - innerHTML, - dataAttribute, - attributeSchema - ); + let attributeValue: unknown; + if ( rootElement !== undefined ) { + const attrName = attributeSchema.attribute as string | undefined; + attributeValue = attrName + ? rootElement?.getAttribute( attrName ) ?? undefined + : undefined; + } else { + attributeValue = getHTMLRootElement( + innerHTML, + dataAttribute, + attributeSchema + ); + } if ( attributeValue ) { modifiedBlockAttributes[ supportKey ] = attributeValue; } diff --git a/packages/blocks/src/api/parser/get-block-attributes.ts b/packages/blocks/src/api/parser/get-block-attributes.ts index 8cc4ddbebe4eb5..a2338b78b06ac0 100644 --- a/packages/blocks/src/api/parser/get-block-attributes.ts +++ b/packages/blocks/src/api/parser/get-block-attributes.ts @@ -259,7 +259,7 @@ export const matcherFromSource = memoize( * * @return Parsed DOM node. */ -function parseHtml( innerHTML: string | Node ): Node { +export function parseHtml( innerHTML: string | Node ): Node { return hpqParse( innerHTML, ( h: Node ) => h ); } @@ -287,15 +287,24 @@ export function parseWithAttributeSchema( * @param blockTypeOrName Block type or name. * @param innerHTML Raw block content. * @param attributes Known block attributes (from delimiters). + * @param parsedBody Optional pre-parsed DOM node for innerHTML. When + * provided, the internal HTML parse is skipped. Useful + * for sharing a single parse across multiple callers + * operating on the same innerHTML string (e.g. block + * validation, deprecation iteration). Note: hpq uses a + * single shared document body, so a parsed node held + * across other parses will be detached. Detached nodes + * still respond correctly to attribute and class reads. * * @return All block attributes. */ export function getBlockAttributes( blockTypeOrName: string | BlockType, innerHTML: string | Node, - attributes: Record< string, unknown > = {} + attributes: Record< string, unknown > = {}, + parsedBody?: Node ): Record< string, unknown > { - const doc = parseHtml( innerHTML ); + const doc = parsedBody ?? parseHtml( innerHTML ); const blockType = normalizeBlockType( blockTypeOrName ); const blockAttributes = Object.fromEntries( diff --git a/packages/blocks/src/api/parser/index.ts b/packages/blocks/src/api/parser/index.ts index efcbe4bfe4d1bf..e4b954707243e3 100644 --- a/packages/blocks/src/api/parser/index.ts +++ b/packages/blocks/src/api/parser/index.ts @@ -17,7 +17,7 @@ import { validateBlock } from '../validation'; import { createBlock } from '../factory'; import { convertLegacyBlockNameAndAttributes } from './convert-legacy-block'; import { serializeRawBlock } from './serialize-raw-block'; -import { getBlockAttributes } from './get-block-attributes'; +import { getBlockAttributes, parseHtml } from './get-block-attributes'; import { applyBlockDeprecatedVersions } from './apply-block-deprecated-versions'; import { applyBuiltInValidationFixes } from './apply-built-in-validation-fixes'; import type { Block, BlockType, RawBlock, ParseOptions } from '../../types'; @@ -129,11 +129,15 @@ function createMissingBlockType( rawBlock: RawBlock ): RawBlock { * * @param unvalidatedBlock * @param blockType + * @param parsedBody Pre-parsed body element of the block's + * originalContent, if available. Threaded down to the + * built-in fixes so they don't have to re-parse. * @return validated block, with auto-fixes if initially invalid */ function applyBlockValidation( unvalidatedBlock: Block, - blockType: BlockType + blockType: BlockType, + parsedBody?: Element | null ): Block { // Attempt to validate the block. const [ isValid ] = validateBlock( unvalidatedBlock, blockType ); @@ -146,7 +150,8 @@ function applyBlockValidation( // like custom classNames handling. const fixedBlock = applyBuiltInValidationFixes( unvalidatedBlock, - blockType + blockType, + parsedBody ); // Attempt to validate the block once again after the built-in fixes. const [ isFixedValid, validationIssues ] = validateBlock( @@ -197,25 +202,52 @@ export function parseRawBlock( return; } - // Parse inner blocks recursively. + // Parse inner blocks recursively. This must happen *before* parsing this + // block's HTML — hpq uses a single shared document body, so each parse + // clobbers the previous one. We need this block's parsed body to remain + // stable through getBlockAttributes / validation / deprecation, so the + // recursion (which clobbers the body for each inner block) has to run + // first. const parsedInnerBlocks = normalizedBlock.innerBlocks .map( ( innerBlock ) => parseRawBlock( innerBlock, options ) ) // See https://github.com/WordPress/gutenberg/pull/17164. .filter( ( innerBlock ) => !! innerBlock ); + // Parse this block's innerHTML once and share the result with attribute + // extraction, validation fixes, and deprecation handling. Capture the + // root element eagerly: subsequent parses (e.g. of freshly serialized + // content inside the validation fixes) detach this element from the + // shared body, but detached elements still respond correctly to + // attribute and class reads. + // Parse via hpq, then deep-clone so the captured body is independent of + // hpq's shared document. Subsequent parses elsewhere in the pipeline + // (notably `fixCustomClassname`'s fallback path, which renders and + // re-parses save content) reset hpq's shared body — without the clone, + // `parsedBody` would silently change content under our feet between + // deprecation iterations. + const innerHTML = normalizedBlock.innerHTML; + const sharedBody = parseHtml( innerHTML ) as Element; + const parsedBody = + ( sharedBody?.cloneNode( true ) as Element | null ) ?? null; + // Get the fully parsed block. const parsedBlock = createBlock( normalizedBlock.blockName!, getBlockAttributes( blockType, - normalizedBlock.innerHTML, - normalizedBlock.attrs + innerHTML, + normalizedBlock.attrs, + parsedBody ), parsedInnerBlocks ); - parsedBlock.originalContent = normalizedBlock.innerHTML; + parsedBlock.originalContent = innerHTML; - const validatedBlock = applyBlockValidation( parsedBlock, blockType ); + const validatedBlock = applyBlockValidation( + parsedBlock, + blockType, + parsedBody + ); const { validationIssues } = validatedBlock; // Run the block deprecation and migrations. @@ -225,7 +257,8 @@ export function parseRawBlock( const updatedBlock = applyBlockDeprecatedVersions( validatedBlock, normalizedBlock, - blockType + blockType, + parsedBody ); if ( ! updatedBlock.isValid ) { From 7d3e563bf0f0fc3f34b267e71b59f50b85b49248 Mon Sep 17 00:00:00 2001 From: ella Date: Tue, 5 May 2026 14:32:50 +0200 Subject: [PATCH 2/2] Block parser: Widen getBlockAttributes parsedBody to accept null `parseRawBlock` types the cloned body as `Element | null`, but `getBlockAttributes` previously only accepted `Node | undefined`, causing a TS2345 in `parseRawBlock` and the deprecation iteration. Allowing `null` lets callers thread their captured-or-null body directly without coercion. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/blocks/README.md | 2 +- packages/blocks/src/api/parser/get-block-attributes.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/blocks/README.md b/packages/blocks/README.md index bf96cac02f3486..29ca97aa183c06 100644 --- a/packages/blocks/README.md +++ b/packages/blocks/README.md @@ -97,7 +97,7 @@ _Parameters_ - _blockTypeOrName_ `string | BlockType`: Block type or name. - _innerHTML_ `string | Node`: Raw block content. - _attributes_ `Record< string, unknown >`: Known block attributes (from delimiters). -- _parsedBody_ `Node`: Optional pre-parsed DOM node for innerHTML. When provided, the internal HTML parse is skipped. Useful for sharing a single parse across multiple callers operating on the same innerHTML string (e.g. block validation, deprecation iteration). Note: hpq uses a single shared document body, so a parsed node held across other parses will be detached. Detached nodes still respond correctly to attribute and class reads. +- _parsedBody_ `Node | null`: Optional pre-parsed DOM node for innerHTML. When provided, the internal HTML parse is skipped. Useful for sharing a single parse across multiple callers operating on the same innerHTML string (e.g. block validation, deprecation iteration). Note: hpq uses a single shared document body, so a parsed node held across other parses will be detached. Detached nodes still respond correctly to attribute and class reads. _Returns_ diff --git a/packages/blocks/src/api/parser/get-block-attributes.ts b/packages/blocks/src/api/parser/get-block-attributes.ts index a2338b78b06ac0..0bd5bb3c676e66 100644 --- a/packages/blocks/src/api/parser/get-block-attributes.ts +++ b/packages/blocks/src/api/parser/get-block-attributes.ts @@ -302,7 +302,7 @@ export function getBlockAttributes( blockTypeOrName: string | BlockType, innerHTML: string | Node, attributes: Record< string, unknown > = {}, - parsedBody?: Node + parsedBody?: Node | null ): Record< string, unknown > { const doc = parsedBody ?? parseHtml( innerHTML ); const blockType = normalizeBlockType( blockTypeOrName );