Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/blocks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ _Parameters_
- _blockTypeOrName_ `string | BlockType`: Block type or name.
- _innerHTML_ `string | Node`: Raw block content.
- _attributes_ `Record< string, unknown >`: Known block attributes (from delimiters).
- _parsedBody_ `Node | null`: Optional pre-parsed DOM node for innerHTML. When provided, the internal HTML parse is skipped. Useful for sharing a single parse across multiple callers operating on the same innerHTML string (e.g. block validation, deprecation iteration). Note: hpq uses a single shared document body, so a parsed node held across other parses will be detached. Detached nodes still respond correctly to attribute and class reads.

_Returns_

Expand Down
24 changes: 16 additions & 8 deletions packages/blocks/src/api/parser/apply-block-deprecated-versions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,24 @@ function stubFalse(): boolean {
* deprecated migrations applied, or the original block if it was both valid
* and no eligible migrations exist.
*
* @param block Parsed and invalid block object.
* @param rawBlock Raw block object.
* @param blockType Block type. This is normalize not necessary and
* can be inferred from the block name,
* but it's here for performance reasons.
* @param block Parsed and invalid block object.
* @param rawBlock Raw block object.
* @param blockType Block type. This is normalize not necessary and
* can be inferred from the block name,
* but it's here for performance reasons.
* @param parsedBody Pre-parsed DOM body for `block.originalContent`, if
* available. Shared across loop iterations (passed to
* `getBlockAttributes` to skip re-parsing, and to
* `applyBuiltInValidationFixes` which extracts the root
* element from it for the per-fix attribute reads).
*
* @return Migrated block object.
*/
export function applyBlockDeprecatedVersions(
block: Block,
rawBlock: RawBlock,
blockType: BlockType
blockType: BlockType,
parsedBody?: Element | null
): Block {
const parsedAttributes = rawBlock.attrs ?? {};
const { deprecated: deprecatedDefinitions } = blockType;
Expand Down Expand Up @@ -79,7 +85,8 @@ export function applyBlockDeprecatedVersions(
attributes: getBlockAttributes(
deprecatedBlockType,
block.originalContent ?? '',
parsedAttributes
parsedAttributes,
parsedBody
),
};

Expand All @@ -90,7 +97,8 @@ export function applyBlockDeprecatedVersions(
if ( ! isValid ) {
migratedBlock = applyBuiltInValidationFixes(
migratedBlock,
deprecatedBlockType
deprecatedBlockType,
parsedBody
);
[ isValid ] = validateBlock( migratedBlock, deprecatedBlockType );
}
Expand Down
34 changes: 26 additions & 8 deletions packages/blocks/src/api/parser/apply-built-in-validation-fixes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,41 @@ const ANCHOR_ATTR_SCHEMA: BlockAttribute = {
* Attempts to fix block invalidation by applying build-in validation fixes
* like moving all extra classNames to the className attribute.
*
* @param block block object.
* @param blockType Block type. This is normalize not necessary and
* can be inferred from the block name,
* but it's here for performance reasons.
* @param block block object.
* @param blockType Block type. This is normalize not necessary and
* can be inferred from the block name,
* but it's here for performance reasons.
* @param parsedBody Pre-parsed body element of `block.originalContent`, if
* available. When provided, the fixes read attributes
* directly off the body's first element child instead of
* re-parsing originalContent for each fix.
*
* @return Fixed block object
*/
export function applyBuiltInValidationFixes(
block: Block,
blockType: BlockType
blockType: BlockType,
parsedBody?: Element | null
): Block {
const { attributes, originalContent } = block;
let updatedBlockAttributes = attributes;

// Extract the root element once: every fix below reads attributes off the
// block's outermost element. `undefined` here means "no pre-parsed body
// supplied" — the fixes will fall back to parsing originalContent
// themselves. `null` means "we have a parsed body but it has no element
// child" (e.g. text-only innerHTML).
const rootElement =
parsedBody !== undefined
? parsedBody?.firstElementChild ?? null
: undefined;

// Fix block invalidation for className attribute.
updatedBlockAttributes = fixCustomClassname(
attributes,
blockType,
originalContent ?? ''
originalContent ?? '',
rootElement
);
// Fix block invalidation for ariaLabel attribute.
updatedBlockAttributes = fixGlobalAttribute(
Expand All @@ -50,7 +66,8 @@ export function applyBuiltInValidationFixes(
originalContent ?? '',
'ariaLabel',
'data-aria-label',
ARIA_LABEL_ATTR_SCHEMA
ARIA_LABEL_ATTR_SCHEMA,
rootElement
);
// Fix block invalidation for anchor attribute.
updatedBlockAttributes = fixGlobalAttribute(
Expand All @@ -59,7 +76,8 @@ export function applyBuiltInValidationFixes(
originalContent ?? '',
'anchor',
'data-anchor',
ANCHOR_ATTR_SCHEMA
ANCHOR_ATTR_SCHEMA,
rootElement
);

return {
Expand Down
42 changes: 26 additions & 16 deletions packages/blocks/src/api/parser/fix-custom-classname.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
*/
import { hasBlockSupport } from '../registration';
import { getSaveContent } from '../serializer';
import { parseWithAttributeSchema } from './get-block-attributes';
import type { BlockAttribute, BlockType } from '../../types';
import { parseHtml } from './get-block-attributes';
import type { BlockType } from '../../types';

const CLASS_ATTR_SCHEMA: BlockAttribute = {
type: 'string',
source: 'attribute',
selector: '[data-custom-class-name] > *',
attribute: 'class',
};
function splitClassName( className: unknown ): string[] {
return typeof className === 'string' && className
? className.trim().split( /\s+/ )
: [];
}

function getElementClasses( element: Element | null ): string[] {
return splitClassName( element?.getAttribute( 'class' ) );
}

/**
* Given an HTML string, returns an array of class names assigned to the root
Expand All @@ -22,12 +25,8 @@ const CLASS_ATTR_SCHEMA: BlockAttribute = {
* @return Array of class names assigned to the root element.
*/
export function getHTMLRootElementClasses( innerHTML: string ): string[] {
const parsed = parseWithAttributeSchema(
`<div data-custom-class-name>${ innerHTML }</div>`,
CLASS_ATTR_SCHEMA
) as string | undefined;

return parsed ? parsed.trim().split( /\s+/ ) : [];
const root = ( parseHtml( innerHTML ) as Element )?.firstElementChild;
return getElementClasses( root );
}

/**
Expand All @@ -39,13 +38,17 @@ export function getHTMLRootElementClasses( innerHTML: string ): string[] {
* @param blockAttributes Original block attributes.
* @param blockType Block type settings.
* @param innerHTML Original block markup.
* @param rootElement Pre-parsed root element of innerHTML, if available.
* When provided, avoids re-parsing innerHTML to read
* the actual classes.
*
* @return Filtered block attributes.
*/
export function fixCustomClassname(
blockAttributes: Record< string, unknown >,
blockType: BlockType,
innerHTML: string
innerHTML: string,
rootElement?: Element | null
): Record< string, unknown > {
if ( ! hasBlockSupport( blockType, 'customClassName', true ) ) {
return blockAttributes;
Expand All @@ -59,8 +62,15 @@ export function fixCustomClassname(
const { className: omittedClassName, ...attributesSansClassName } =
modifiedBlockAttributes;
const serialized = getSaveContent( blockType, attributesSansClassName );
// `getHTMLRootElementClasses` writes the rendered output into hpq's
// shared document body. Callers that pass `rootElement` are insulated by
// the deep clone of `parsedBody` in `parseRawBlock`; without that clone,
// this parse would mutate the shared body under our feet.
const defaultClasses = getHTMLRootElementClasses( serialized );
const actualClasses = getHTMLRootElementClasses( innerHTML );
const actualClasses =
rootElement !== undefined
? getElementClasses( rootElement )
: getHTMLRootElementClasses( innerHTML );

const customClasses = actualClasses.filter(
( className ) => ! defaultClasses.includes( className )
Expand Down
24 changes: 18 additions & 6 deletions packages/blocks/src/api/parser/fix-global-attribute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ export function getHTMLRootElement(
* @param supportKey The block support key to check and attribute key to set.
* @param dataAttribute The data attribute name to use as wrapper.
* @param attributeSchema The attribute schema configuration.
* @param rootElement Pre-parsed root element of innerHTML, if available.
* When provided, the attribute is read directly off the
* root element, avoiding a wrap-and-reparse.
*
* @return Filtered block attributes.
*/
Expand All @@ -46,17 +49,26 @@ export function fixGlobalAttribute(
innerHTML: string,
supportKey: string,
dataAttribute: string,
attributeSchema: BlockAttribute
attributeSchema: BlockAttribute,
rootElement?: Element | null
): Record< string, unknown > {
if ( ! hasBlockSupport( blockType, supportKey, false ) ) {
return blockAttributes;
}
const modifiedBlockAttributes = { ...blockAttributes };
const attributeValue = getHTMLRootElement(
innerHTML,
dataAttribute,
attributeSchema
);
let attributeValue: unknown;
if ( rootElement !== undefined ) {
const attrName = attributeSchema.attribute as string | undefined;
attributeValue = attrName
? rootElement?.getAttribute( attrName ) ?? undefined
: undefined;
} else {
attributeValue = getHTMLRootElement(
innerHTML,
dataAttribute,
attributeSchema
);
}
if ( attributeValue ) {
modifiedBlockAttributes[ supportKey ] = attributeValue;
}
Expand Down
15 changes: 12 additions & 3 deletions packages/blocks/src/api/parser/get-block-attributes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ export const matcherFromSource = memoize(
*
* @return Parsed DOM node.
*/
function parseHtml( innerHTML: string | Node ): Node {
export function parseHtml( innerHTML: string | Node ): Node {
return hpqParse( innerHTML, ( h: Node ) => h );
}

Expand Down Expand Up @@ -287,15 +287,24 @@ export function parseWithAttributeSchema(
* @param blockTypeOrName Block type or name.
* @param innerHTML Raw block content.
* @param attributes Known block attributes (from delimiters).
* @param parsedBody Optional pre-parsed DOM node for innerHTML. When
* provided, the internal HTML parse is skipped. Useful
* for sharing a single parse across multiple callers
* operating on the same innerHTML string (e.g. block
* validation, deprecation iteration). Note: hpq uses a
* single shared document body, so a parsed node held
* across other parses will be detached. Detached nodes
* still respond correctly to attribute and class reads.
*
* @return All block attributes.
*/
export function getBlockAttributes(
blockTypeOrName: string | BlockType,
innerHTML: string | Node,
attributes: Record< string, unknown > = {}
attributes: Record< string, unknown > = {},
parsedBody?: Node | null
): Record< string, unknown > {
const doc = parseHtml( innerHTML );
const doc = parsedBody ?? parseHtml( innerHTML );
const blockType = normalizeBlockType( blockTypeOrName );

const blockAttributes = Object.fromEntries(
Expand Down
51 changes: 42 additions & 9 deletions packages/blocks/src/api/parser/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import { validateBlock } from '../validation';
import { createBlock } from '../factory';
import { convertLegacyBlockNameAndAttributes } from './convert-legacy-block';
import { serializeRawBlock } from './serialize-raw-block';
import { getBlockAttributes } from './get-block-attributes';
import { getBlockAttributes, parseHtml } from './get-block-attributes';
import { applyBlockDeprecatedVersions } from './apply-block-deprecated-versions';
import { applyBuiltInValidationFixes } from './apply-built-in-validation-fixes';
import type { Block, BlockType, RawBlock, ParseOptions } from '../../types';
Expand Down Expand Up @@ -129,11 +129,15 @@ function createMissingBlockType( rawBlock: RawBlock ): RawBlock {
*
* @param unvalidatedBlock
* @param blockType
* @param parsedBody Pre-parsed body element of the block's
* originalContent, if available. Threaded down to the
* built-in fixes so they don't have to re-parse.
* @return validated block, with auto-fixes if initially invalid
*/
function applyBlockValidation(
unvalidatedBlock: Block,
blockType: BlockType
blockType: BlockType,
parsedBody?: Element | null
): Block {
// Attempt to validate the block.
const [ isValid ] = validateBlock( unvalidatedBlock, blockType );
Expand All @@ -146,7 +150,8 @@ function applyBlockValidation(
// like custom classNames handling.
const fixedBlock = applyBuiltInValidationFixes(
unvalidatedBlock,
blockType
blockType,
parsedBody
);
// Attempt to validate the block once again after the built-in fixes.
const [ isFixedValid, validationIssues ] = validateBlock(
Expand Down Expand Up @@ -197,25 +202,52 @@ export function parseRawBlock(
return;
}

// Parse inner blocks recursively.
// Parse inner blocks recursively. This must happen *before* parsing this
// block's HTML — hpq uses a single shared document body, so each parse
// clobbers the previous one. We need this block's parsed body to remain
// stable through getBlockAttributes / validation / deprecation, so the
// recursion (which clobbers the body for each inner block) has to run
// first.
const parsedInnerBlocks = normalizedBlock.innerBlocks
.map( ( innerBlock ) => parseRawBlock( innerBlock, options ) )
// See https://github.com/WordPress/gutenberg/pull/17164.
.filter( ( innerBlock ) => !! innerBlock );

// Parse this block's innerHTML once and share the result with attribute
// extraction, validation fixes, and deprecation handling. Capture the
// root element eagerly: subsequent parses (e.g. of freshly serialized
// content inside the validation fixes) detach this element from the
// shared body, but detached elements still respond correctly to
// attribute and class reads.
// Parse via hpq, then deep-clone so the captured body is independent of
// hpq's shared document. Subsequent parses elsewhere in the pipeline
// (notably `fixCustomClassname`'s fallback path, which renders and
// re-parses save content) reset hpq's shared body — without the clone,
// `parsedBody` would silently change content under our feet between
// deprecation iterations.
const innerHTML = normalizedBlock.innerHTML;
const sharedBody = parseHtml( innerHTML ) as Element;
const parsedBody =
( sharedBody?.cloneNode( true ) as Element | null ) ?? null;

// Get the fully parsed block.
const parsedBlock = createBlock(
normalizedBlock.blockName!,
getBlockAttributes(
blockType,
normalizedBlock.innerHTML,
normalizedBlock.attrs
innerHTML,
normalizedBlock.attrs,
parsedBody
),
parsedInnerBlocks
);
parsedBlock.originalContent = normalizedBlock.innerHTML;
parsedBlock.originalContent = innerHTML;

const validatedBlock = applyBlockValidation( parsedBlock, blockType );
const validatedBlock = applyBlockValidation(
parsedBlock,
blockType,
parsedBody
);
const { validationIssues } = validatedBlock;

// Run the block deprecation and migrations.
Expand All @@ -225,7 +257,8 @@ export function parseRawBlock(
const updatedBlock = applyBlockDeprecatedVersions(
validatedBlock,
normalizedBlock,
blockType
blockType,
parsedBody
);

if ( ! updatedBlock.isValid ) {
Expand Down
Loading