diff --git a/packages/blocks/README.md b/packages/blocks/README.md
index ee80df05735c50..29ca97aa183c06 100644
--- a/packages/blocks/README.md
+++ b/packages/blocks/README.md
@@ -97,6 +97,7 @@ _Parameters_
- _blockTypeOrName_ `string | BlockType`: Block type or name.
- _innerHTML_ `string | Node`: Raw block content.
- _attributes_ `Record< string, unknown >`: Known block attributes (from delimiters).
+- _parsedBody_ `Node | null`: Optional pre-parsed DOM node for innerHTML. When provided, the internal HTML parse is skipped. Useful for sharing a single parse across multiple callers operating on the same innerHTML string (e.g. block validation, deprecation iteration). Note: hpq uses a single shared document body, so a parsed node held across other parses will be detached. Detached nodes still respond correctly to attribute and class reads.
_Returns_
diff --git a/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts b/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts
index e4f5ce434f6adc..01957e8eadee31 100644
--- a/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts
+++ b/packages/blocks/src/api/parser/apply-block-deprecated-versions.ts
@@ -22,18 +22,24 @@ function stubFalse(): boolean {
* deprecated migrations applied, or the original block if it was both valid
* and no eligible migrations exist.
*
- * @param block Parsed and invalid block object.
- * @param rawBlock Raw block object.
- * @param blockType Block type. This is normalize not necessary and
- * can be inferred from the block name,
- * but it's here for performance reasons.
+ * @param block Parsed and invalid block object.
+ * @param rawBlock Raw block object.
+ * @param blockType Block type. This is normalize not necessary and
+ * can be inferred from the block name,
+ * but it's here for performance reasons.
+ * @param parsedBody Pre-parsed DOM body for `block.originalContent`, if
+ * available. Shared across loop iterations (passed to
+ * `getBlockAttributes` to skip re-parsing, and to
+ * `applyBuiltInValidationFixes` which extracts the root
+ * element from it for the per-fix attribute reads).
*
* @return Migrated block object.
*/
export function applyBlockDeprecatedVersions(
block: Block,
rawBlock: RawBlock,
- blockType: BlockType
+ blockType: BlockType,
+ parsedBody?: Element | null
): Block {
const parsedAttributes = rawBlock.attrs ?? {};
const { deprecated: deprecatedDefinitions } = blockType;
@@ -79,7 +85,8 @@ export function applyBlockDeprecatedVersions(
attributes: getBlockAttributes(
deprecatedBlockType,
block.originalContent ?? '',
- parsedAttributes
+ parsedAttributes,
+ parsedBody
),
};
@@ -90,7 +97,8 @@ export function applyBlockDeprecatedVersions(
if ( ! isValid ) {
migratedBlock = applyBuiltInValidationFixes(
migratedBlock,
- deprecatedBlockType
+ deprecatedBlockType,
+ parsedBody
);
[ isValid ] = validateBlock( migratedBlock, deprecatedBlockType );
}
diff --git a/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts b/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts
index dd1b4feca76042..c3b841edbeda91 100644
--- a/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts
+++ b/packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts
@@ -23,25 +23,41 @@ const ANCHOR_ATTR_SCHEMA: BlockAttribute = {
* Attempts to fix block invalidation by applying build-in validation fixes
* like moving all extra classNames to the className attribute.
*
- * @param block block object.
- * @param blockType Block type. This is normalize not necessary and
- * can be inferred from the block name,
- * but it's here for performance reasons.
+ * @param block block object.
+ * @param blockType Block type. This is normalize not necessary and
+ * can be inferred from the block name,
+ * but it's here for performance reasons.
+ * @param parsedBody Pre-parsed body element of `block.originalContent`, if
+ * available. When provided, the fixes read attributes
+ * directly off the body's first element child instead of
+ * re-parsing originalContent for each fix.
*
* @return Fixed block object
*/
export function applyBuiltInValidationFixes(
block: Block,
- blockType: BlockType
+ blockType: BlockType,
+ parsedBody?: Element | null
): Block {
const { attributes, originalContent } = block;
let updatedBlockAttributes = attributes;
+ // Extract the root element once: every fix below reads attributes off the
+ // block's outermost element. `undefined` here means "no pre-parsed body
+ // supplied" — the fixes will fall back to parsing originalContent
+ // themselves. `null` means "we have a parsed body but it has no element
+ // child" (e.g. text-only innerHTML).
+ const rootElement =
+ parsedBody !== undefined
+ ? parsedBody?.firstElementChild ?? null
+ : undefined;
+
// Fix block invalidation for className attribute.
updatedBlockAttributes = fixCustomClassname(
attributes,
blockType,
- originalContent ?? ''
+ originalContent ?? '',
+ rootElement
);
// Fix block invalidation for ariaLabel attribute.
updatedBlockAttributes = fixGlobalAttribute(
@@ -50,7 +66,8 @@ export function applyBuiltInValidationFixes(
originalContent ?? '',
'ariaLabel',
'data-aria-label',
- ARIA_LABEL_ATTR_SCHEMA
+ ARIA_LABEL_ATTR_SCHEMA,
+ rootElement
);
// Fix block invalidation for anchor attribute.
updatedBlockAttributes = fixGlobalAttribute(
@@ -59,7 +76,8 @@ export function applyBuiltInValidationFixes(
originalContent ?? '',
'anchor',
'data-anchor',
- ANCHOR_ATTR_SCHEMA
+ ANCHOR_ATTR_SCHEMA,
+ rootElement
);
return {
diff --git a/packages/blocks/src/api/parser/fix-custom-classname.ts b/packages/blocks/src/api/parser/fix-custom-classname.ts
index 795490ec34e04e..bed833fb4bc18d 100644
--- a/packages/blocks/src/api/parser/fix-custom-classname.ts
+++ b/packages/blocks/src/api/parser/fix-custom-classname.ts
@@ -3,15 +3,18 @@
*/
import { hasBlockSupport } from '../registration';
import { getSaveContent } from '../serializer';
-import { parseWithAttributeSchema } from './get-block-attributes';
-import type { BlockAttribute, BlockType } from '../../types';
+import { parseHtml } from './get-block-attributes';
+import type { BlockType } from '../../types';
-const CLASS_ATTR_SCHEMA: BlockAttribute = {
- type: 'string',
- source: 'attribute',
- selector: '[data-custom-class-name] > *',
- attribute: 'class',
-};
+function splitClassName( className: unknown ): string[] {
+ return typeof className === 'string' && className
+ ? className.trim().split( /\s+/ )
+ : [];
+}
+
+function getElementClasses( element: Element | null ): string[] {
+ return splitClassName( element?.getAttribute( 'class' ) );
+}
/**
* Given an HTML string, returns an array of class names assigned to the root
@@ -22,12 +25,8 @@ const CLASS_ATTR_SCHEMA: BlockAttribute = {
* @return Array of class names assigned to the root element.
*/
export function getHTMLRootElementClasses( innerHTML: string ): string[] {
- const parsed = parseWithAttributeSchema(
- `
${ innerHTML }
`,
- CLASS_ATTR_SCHEMA
- ) as string | undefined;
-
- return parsed ? parsed.trim().split( /\s+/ ) : [];
+ const root = ( parseHtml( innerHTML ) as Element )?.firstElementChild;
+ return getElementClasses( root );
}
/**
@@ -39,13 +38,17 @@ export function getHTMLRootElementClasses( innerHTML: string ): string[] {
* @param blockAttributes Original block attributes.
* @param blockType Block type settings.
* @param innerHTML Original block markup.
+ * @param rootElement Pre-parsed root element of innerHTML, if available.
+ * When provided, avoids re-parsing innerHTML to read
+ * the actual classes.
*
* @return Filtered block attributes.
*/
export function fixCustomClassname(
blockAttributes: Record< string, unknown >,
blockType: BlockType,
- innerHTML: string
+ innerHTML: string,
+ rootElement?: Element | null
): Record< string, unknown > {
if ( ! hasBlockSupport( blockType, 'customClassName', true ) ) {
return blockAttributes;
@@ -59,8 +62,15 @@ export function fixCustomClassname(
const { className: omittedClassName, ...attributesSansClassName } =
modifiedBlockAttributes;
const serialized = getSaveContent( blockType, attributesSansClassName );
+ // `getHTMLRootElementClasses` writes the rendered output into hpq's
+ // shared document body. Callers that pass `rootElement` are insulated by
+ // the deep clone of `parsedBody` in `parseRawBlock`; without that clone,
+ // this parse would mutate the shared body under our feet.
const defaultClasses = getHTMLRootElementClasses( serialized );
- const actualClasses = getHTMLRootElementClasses( innerHTML );
+ const actualClasses =
+ rootElement !== undefined
+ ? getElementClasses( rootElement )
+ : getHTMLRootElementClasses( innerHTML );
const customClasses = actualClasses.filter(
( className ) => ! defaultClasses.includes( className )
diff --git a/packages/blocks/src/api/parser/fix-global-attribute.ts b/packages/blocks/src/api/parser/fix-global-attribute.ts
index e8d555aaa9ec74..790833c5619431 100644
--- a/packages/blocks/src/api/parser/fix-global-attribute.ts
+++ b/packages/blocks/src/api/parser/fix-global-attribute.ts
@@ -37,6 +37,9 @@ export function getHTMLRootElement(
* @param supportKey The block support key to check and attribute key to set.
* @param dataAttribute The data attribute name to use as wrapper.
* @param attributeSchema The attribute schema configuration.
+ * @param rootElement Pre-parsed root element of innerHTML, if available.
+ * When provided, the attribute is read directly off the
+ * root element, avoiding a wrap-and-reparse.
*
* @return Filtered block attributes.
*/
@@ -46,17 +49,26 @@ export function fixGlobalAttribute(
innerHTML: string,
supportKey: string,
dataAttribute: string,
- attributeSchema: BlockAttribute
+ attributeSchema: BlockAttribute,
+ rootElement?: Element | null
): Record< string, unknown > {
if ( ! hasBlockSupport( blockType, supportKey, false ) ) {
return blockAttributes;
}
const modifiedBlockAttributes = { ...blockAttributes };
- const attributeValue = getHTMLRootElement(
- innerHTML,
- dataAttribute,
- attributeSchema
- );
+ let attributeValue: unknown;
+ if ( rootElement !== undefined ) {
+ const attrName = attributeSchema.attribute as string | undefined;
+ attributeValue = attrName
+ ? rootElement?.getAttribute( attrName ) ?? undefined
+ : undefined;
+ } else {
+ attributeValue = getHTMLRootElement(
+ innerHTML,
+ dataAttribute,
+ attributeSchema
+ );
+ }
if ( attributeValue ) {
modifiedBlockAttributes[ supportKey ] = attributeValue;
}
diff --git a/packages/blocks/src/api/parser/get-block-attributes.ts b/packages/blocks/src/api/parser/get-block-attributes.ts
index 8cc4ddbebe4eb5..0bd5bb3c676e66 100644
--- a/packages/blocks/src/api/parser/get-block-attributes.ts
+++ b/packages/blocks/src/api/parser/get-block-attributes.ts
@@ -259,7 +259,7 @@ export const matcherFromSource = memoize(
*
* @return Parsed DOM node.
*/
-function parseHtml( innerHTML: string | Node ): Node {
+export function parseHtml( innerHTML: string | Node ): Node {
return hpqParse( innerHTML, ( h: Node ) => h );
}
@@ -287,15 +287,24 @@ export function parseWithAttributeSchema(
* @param blockTypeOrName Block type or name.
* @param innerHTML Raw block content.
* @param attributes Known block attributes (from delimiters).
+ * @param parsedBody Optional pre-parsed DOM node for innerHTML. When
+ * provided, the internal HTML parse is skipped. Useful
+ * for sharing a single parse across multiple callers
+ * operating on the same innerHTML string (e.g. block
+ * validation, deprecation iteration). Note: hpq uses a
+ * single shared document body, so a parsed node held
+ * across other parses will be detached. Detached nodes
+ * still respond correctly to attribute and class reads.
*
* @return All block attributes.
*/
export function getBlockAttributes(
blockTypeOrName: string | BlockType,
innerHTML: string | Node,
- attributes: Record< string, unknown > = {}
+ attributes: Record< string, unknown > = {},
+ parsedBody?: Node | null
): Record< string, unknown > {
- const doc = parseHtml( innerHTML );
+ const doc = parsedBody ?? parseHtml( innerHTML );
const blockType = normalizeBlockType( blockTypeOrName );
const blockAttributes = Object.fromEntries(
diff --git a/packages/blocks/src/api/parser/index.ts b/packages/blocks/src/api/parser/index.ts
index efcbe4bfe4d1bf..e4b954707243e3 100644
--- a/packages/blocks/src/api/parser/index.ts
+++ b/packages/blocks/src/api/parser/index.ts
@@ -17,7 +17,7 @@ import { validateBlock } from '../validation';
import { createBlock } from '../factory';
import { convertLegacyBlockNameAndAttributes } from './convert-legacy-block';
import { serializeRawBlock } from './serialize-raw-block';
-import { getBlockAttributes } from './get-block-attributes';
+import { getBlockAttributes, parseHtml } from './get-block-attributes';
import { applyBlockDeprecatedVersions } from './apply-block-deprecated-versions';
import { applyBuiltInValidationFixes } from './apply-built-in-validation-fixes';
import type { Block, BlockType, RawBlock, ParseOptions } from '../../types';
@@ -129,11 +129,15 @@ function createMissingBlockType( rawBlock: RawBlock ): RawBlock {
*
* @param unvalidatedBlock
* @param blockType
+ * @param parsedBody Pre-parsed body element of the block's
+ * originalContent, if available. Threaded down to the
+ * built-in fixes so they don't have to re-parse.
* @return validated block, with auto-fixes if initially invalid
*/
function applyBlockValidation(
unvalidatedBlock: Block,
- blockType: BlockType
+ blockType: BlockType,
+ parsedBody?: Element | null
): Block {
// Attempt to validate the block.
const [ isValid ] = validateBlock( unvalidatedBlock, blockType );
@@ -146,7 +150,8 @@ function applyBlockValidation(
// like custom classNames handling.
const fixedBlock = applyBuiltInValidationFixes(
unvalidatedBlock,
- blockType
+ blockType,
+ parsedBody
);
// Attempt to validate the block once again after the built-in fixes.
const [ isFixedValid, validationIssues ] = validateBlock(
@@ -197,25 +202,52 @@ export function parseRawBlock(
return;
}
- // Parse inner blocks recursively.
+ // Parse inner blocks recursively. This must happen *before* parsing this
+ // block's HTML — hpq uses a single shared document body, so each parse
+ // clobbers the previous one. We need this block's parsed body to remain
+ // stable through getBlockAttributes / validation / deprecation, so the
+ // recursion (which clobbers the body for each inner block) has to run
+ // first.
const parsedInnerBlocks = normalizedBlock.innerBlocks
.map( ( innerBlock ) => parseRawBlock( innerBlock, options ) )
// See https://github.com/WordPress/gutenberg/pull/17164.
.filter( ( innerBlock ) => !! innerBlock );
+ // Parse this block's innerHTML once and share the result with attribute
+ // extraction, validation fixes, and deprecation handling. Capture the
+ // root element eagerly: subsequent parses (e.g. of freshly serialized
+ // content inside the validation fixes) detach this element from the
+ // shared body, but detached elements still respond correctly to
+ // attribute and class reads.
+ // Parse via hpq, then deep-clone so the captured body is independent of
+ // hpq's shared document. Subsequent parses elsewhere in the pipeline
+ // (notably `fixCustomClassname`'s fallback path, which renders and
+ // re-parses save content) reset hpq's shared body — without the clone,
+ // `parsedBody` would silently change content under our feet between
+ // deprecation iterations.
+ const innerHTML = normalizedBlock.innerHTML;
+ const sharedBody = parseHtml( innerHTML ) as Element;
+ const parsedBody =
+ ( sharedBody?.cloneNode( true ) as Element | null ) ?? null;
+
// Get the fully parsed block.
const parsedBlock = createBlock(
normalizedBlock.blockName!,
getBlockAttributes(
blockType,
- normalizedBlock.innerHTML,
- normalizedBlock.attrs
+ innerHTML,
+ normalizedBlock.attrs,
+ parsedBody
),
parsedInnerBlocks
);
- parsedBlock.originalContent = normalizedBlock.innerHTML;
+ parsedBlock.originalContent = innerHTML;
- const validatedBlock = applyBlockValidation( parsedBlock, blockType );
+ const validatedBlock = applyBlockValidation(
+ parsedBlock,
+ blockType,
+ parsedBody
+ );
const { validationIssues } = validatedBlock;
// Run the block deprecation and migrations.
@@ -225,7 +257,8 @@ export function parseRawBlock(
const updatedBlock = applyBlockDeprecatedVersions(
validatedBlock,
normalizedBlock,
- blockType
+ blockType,
+ parsedBody
);
if ( ! updatedBlock.isValid ) {