From 9e925f0eabc45d5028f3991bf257b38ab1705a21 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Wed, 6 May 2026 10:22:49 +0530 Subject: [PATCH 1/4] Post Revisions: Upgrade `diff` from v4 to v8 Aligns `packages/editor` and `packages/block-editor` with `packages/sync` on `diff@^8.0.3` (needed for the Syncpack alignment work in #77950 / #77954). The bump exposes two unrelated upstream changes that would regress the post-revisions UI: 1. v6+ adds a "deletions before insertions" tie-breaker, so for inputs with multiple equal-length LCSes (whitespace-block pivots, paragraph swaps), `diffArrays` selects a different match than v4 did. The downstream `pairSimilarBlocks` step then mis-pairs blocks and shows two confusing inline diffs instead of a clean modified+unchanged pair. 2. v6+ stops treating whitespace as a token in `diffWords`, coalescing adjacent word changes into one removed/added pair and losing per-word precision in inline rich-text diffs. Fix on the consumer side so existing tests pass without touching any assertion: - Replace the imported `diffArrays` in `block-diff.js` with a local v4-compatible port of `Diff.prototype.diff` (Myers, array+strict-eq), including v4's `(added, removed)` -> `(removed, added)` swap in `buildValues` so condensed sections still render in the right order. - Switch `diffWords` -> `diffWordsWithSpace` for the inline rich-text diff, the `changedAttributes` panel diff, and the `Meta` field diff in `revision-fields-diff`. `preserve-client-ids.js` and `block-compare` (uses `diffChars`) need no changes -- neither hits the affected v6+ behaviours and their tests pass unmodified under v8. 41/41 revision-related unit tests pass; full `npm run test:unit` is green. Closes #77976 --- package-lock.json | 23 ++- packages/block-editor/package.json | 2 +- packages/editor/package.json | 2 +- .../post-revisions-preview/block-diff.js | 185 +++++++++++++++++- .../components/revision-fields-diff/index.js | 9 +- 5 files changed, 210 insertions(+), 11 deletions(-) diff --git a/package-lock.json b/package-lock.json index f7c5ba2163d68b..a6e8bbf48bf064 100644 --- a/package-lock.json +++ b/package-lock.json @@ -29744,6 +29744,7 @@ "version": "4.0.2", "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "dev": true, "engines": { "node": ">=0.3.1" } @@ -57949,7 +57950,7 @@ "clsx": "^2.1.1", "colord": "^2.7.0", "deepmerge": "^4.3.0", - "diff": "^4.0.2", + "diff": "^8.0.3", "fast-deep-equal": "^3.1.3", "memize": "^2.1.0", "parsel-js": "^1.1.2", @@ -57973,6 +57974,15 @@ "react-dom": "^18.0.0" } }, + "packages/block-editor/node_modules/diff": { + "version": "8.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.4.tgz", + "integrity": "sha512-DPi0FmjiSU5EvQV0++GFDOJ9ASQUVFh5kD+OzOnYdi7n3Wpm9hWWGfB/O2blfHcMVTL5WkQXSnRiK9makhrcnw==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, "packages/block-editor/node_modules/postcss-urlrebase": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/postcss-urlrebase/-/postcss-urlrebase-1.4.0.tgz", @@ -59865,7 +59875,7 @@ "clsx": "^2.1.1", "colord": "^2.7.0", "date-fns": "^4.1.0", - "diff": "^4.0.2", + "diff": "^8.0.3", "fast-deep-equal": "^3.1.3", "memize": "^2.1.0", "react-autosize-textarea": "^7.1.0", @@ -59884,6 +59894,15 @@ "react-dom": "^18.0.0" } }, + "packages/editor/node_modules/diff": { + "version": "8.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-8.0.4.tgz", + "integrity": "sha512-DPi0FmjiSU5EvQV0++GFDOJ9ASQUVFh5kD+OzOnYdi7n3Wpm9hWWGfB/O2blfHcMVTL5WkQXSnRiK9makhrcnw==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, "packages/element": { "name": "@wordpress/element", "version": "6.45.0", diff --git a/packages/block-editor/package.json b/packages/block-editor/package.json index 6cc5d61ffb1fa2..4932e6d5deee98 100644 --- a/packages/block-editor/package.json +++ b/packages/block-editor/package.json @@ -102,7 +102,7 @@ "clsx": "^2.1.1", "colord": "^2.7.0", "deepmerge": "^4.3.0", - "diff": "^4.0.2", + "diff": "^8.0.3", "fast-deep-equal": "^3.1.3", "memize": "^2.1.0", "parsel-js": "^1.1.2", diff --git a/packages/editor/package.json b/packages/editor/package.json index a1a2bdba2f21f0..5a7a9bb53ca451 100644 --- a/packages/editor/package.json +++ b/packages/editor/package.json @@ -109,7 +109,7 @@ "clsx": "^2.1.1", "colord": "^2.7.0", "date-fns": "^4.1.0", - "diff": "^4.0.2", + "diff": "^8.0.3", "fast-deep-equal": "^3.1.3", "memize": "^2.1.0", "react-autosize-textarea": "^7.1.0", diff --git a/packages/editor/src/components/post-revisions-preview/block-diff.js b/packages/editor/src/components/post-revisions-preview/block-diff.js index 96657837530341..d05c7df32994b8 100644 --- a/packages/editor/src/components/post-revisions-preview/block-diff.js +++ b/packages/editor/src/components/post-revisions-preview/block-diff.js @@ -1,8 +1,7 @@ /** * External dependencies */ -import { diffArrays } from 'diff/lib/diff/array'; -import { diffWords } from 'diff/lib/diff/word'; +import { diffWordsWithSpace } from 'diff'; /** * WordPress dependencies @@ -28,6 +27,171 @@ import { unlock } from '../../lock-unlock'; const { parseRawBlock } = unlock( blocksPrivateApis ); +/** + * Compute an LCS-based array diff with the same tie-breaker semantics as + * `diff` v4's `diffArrays`. The post-revisions UI relies on a specific LCS + * choice when multiple equally-valid matches exist (e.g. preferring the + * earlier occurrence in the previous revision over later ones, and + * preferring content blocks over freeform/whitespace blocks). `diff` v6 + * changed the tie-breaker to "deletions before insertions", which produces + * a different but equally-valid pairing — visible as confusing inline diffs + * in the post-revisions feature. We reproduce v4's selection locally so + * block-level matching stays stable across upgrades of the `diff` library. + * + * Adapted from `diff` v4's `Diff.prototype.diff` in `lib/diff/base.js`, + * specialised for arrays with strict-equality comparison. + * + * @param {Array} oldArr Previous-revision tokens. + * @param {Array} newArr Current-revision tokens. + * @return {Array<{ count: number, added: boolean, removed: boolean, value: Array }>} Diff parts. + */ +function diffArrays( oldArr, newArr ) { + const oldLen = oldArr.length; + const newLen = newArr.length; + const maxEditLen = oldLen + newLen; + + const pushComponent = ( components, added, removed ) => { + const last = components[ components.length - 1 ]; + if ( last && last.added === added && last.removed === removed ) { + components[ components.length - 1 ] = { + count: last.count + 1, + added, + removed, + }; + } else { + components.push( { count: 1, added, removed } ); + } + }; + + const extractCommon = ( basePath, diagonalPath ) => { + let np = basePath.newPos; + let op = np - diagonalPath; + let common = 0; + while ( + np + 1 < newLen && + op + 1 < oldLen && + newArr[ np + 1 ] === oldArr[ op + 1 ] + ) { + np++; + op++; + common++; + } + if ( common ) { + basePath.components.push( { count: common } ); + } + basePath.newPos = np; + return op; + }; + + const clonePath = ( p ) => ( { + newPos: p.newPos, + components: p.components.slice(), + } ); + + const buildValues = ( components ) => { + const result = []; + let nIdx = 0; + let oIdx = 0; + for ( let pos = 0; pos < components.length; pos++ ) { + const comp = components[ pos ]; + if ( comp.added ) { + result.push( { + count: comp.count, + added: true, + removed: false, + value: newArr.slice( nIdx, nIdx + comp.count ), + } ); + nIdx += comp.count; + } else if ( comp.removed ) { + result.push( { + count: comp.count, + added: false, + removed: true, + value: oldArr.slice( oIdx, oIdx + comp.count ), + } ); + oIdx += comp.count; + /* + * Match v4's `buildValues` convention of "removes before adds": + * the algorithm naturally emits added/removed in that order, + * but v4 swaps adjacent (added, removed) pairs so the diff + * reads as (removed, added). Without this, the post-revisions + * pairing logic places condensed/added-after-removed blocks + * in the wrong visual order. + */ + if ( pos > 0 && result[ pos - 1 ].added ) { + const tmp = result[ pos - 1 ]; + result[ pos - 1 ] = result[ pos ]; + result[ pos ] = tmp; + } + } else { + result.push( { + count: comp.count, + added: false, + removed: false, + value: newArr.slice( nIdx, nIdx + comp.count ), + } ); + nIdx += comp.count; + oIdx += comp.count; + } + } + return result; + }; + + const bestPath = [ { newPos: -1, components: [] } ]; + const seedOldPos = extractCommon( bestPath[ 0 ], 0 ); + + if ( bestPath[ 0 ].newPos + 1 >= newLen && seedOldPos + 1 >= oldLen ) { + return buildValues( bestPath[ 0 ].components ); + } + + for ( let editLen = 1; editLen <= maxEditLen; editLen++ ) { + for ( let diag = -editLen; diag <= editLen; diag += 2 ) { + const addPath = bestPath[ diag - 1 ]; + const removePath = bestPath[ diag + 1 ]; + const opPos = ( removePath ? removePath.newPos : 0 ) - diag; + if ( addPath ) { + bestPath[ diag - 1 ] = undefined; + } + + const canAdd = addPath && addPath.newPos + 1 < newLen; + const canRemove = removePath && opPos >= 0 && opPos < oldLen; + + if ( ! canAdd && ! canRemove ) { + bestPath[ diag ] = undefined; + continue; + } + + /* + * v4 tie-breaker: pick the path that has progressed further in + * `newPos`. v6+ flipped this to favour `oldPos` ("deletions + * before insertions"), which would change which equally-valid + * LCS is selected for the post-revisions block pairing. + */ + let basePath; + if ( + ! canAdd || + ( canRemove && addPath.newPos < removePath.newPos ) + ) { + basePath = clonePath( removePath ); + pushComponent( basePath.components, undefined, true ); + } else { + basePath = addPath; + basePath.newPos++; + pushComponent( basePath.components, true, undefined ); + } + + const op = extractCommon( basePath, diag ); + + if ( basePath.newPos + 1 >= newLen && op + 1 >= oldLen ) { + return buildValues( basePath.components ); + } + bestPath[ diag ] = basePath; + } + } + + return []; +} + /** * Safely stringifies a value for display and comparison. * @@ -502,8 +666,16 @@ function applyRichTextDiff( currentRichText, previousRichText ) { const currentText = currentRichText.toPlainText(); const previousText = previousRichText.toPlainText(); - // Diff the plain text (words for cleaner output) - const textDiff = diffWords( previousText, currentText ); + /* + * Diff the plain text (words for cleaner output). + * + * `diffWordsWithSpace` keeps whitespace as its own token, the same + * behaviour `diffWords` had in `diff` v4. v6+ stopped treating + * whitespace as a token, which would otherwise coalesce adjacent word + * changes into one removed/added pair instead of reporting them + * separately — making the inline rich-text diff less precise. + */ + const textDiff = diffWordsWithSpace( previousText, currentText ); let result = create( { text: '' } ); let currentIdx = 0; @@ -660,7 +832,10 @@ function applyDiffToBlock( currentBlock, previousBlock, diffStatus ) { previousBlock.attributes[ attrName ] ); if ( currStr !== prevStr ) { - changedAttributes[ attrName ] = diffWords( prevStr, currStr ); + changedAttributes[ attrName ] = diffWordsWithSpace( + prevStr, + currStr + ); } } } diff --git a/packages/editor/src/components/revision-fields-diff/index.js b/packages/editor/src/components/revision-fields-diff/index.js index 2f978ae5523547..5b34d5e017342f 100644 --- a/packages/editor/src/components/revision-fields-diff/index.js +++ b/packages/editor/src/components/revision-fields-diff/index.js @@ -1,7 +1,12 @@ /** * External dependencies */ -import { diffWords } from 'diff/lib/diff/word'; +/* + * `diffWordsWithSpace` preserves the v4-style per-word output. v6+ + * stopped treating whitespace as a token in `diffWords`, which coalesces + * adjacent word changes into a single removed/added pair. + */ +import { diffWordsWithSpace } from 'diff'; /** * WordPress dependencies @@ -71,7 +76,7 @@ export default function RevisionFieldsDiffPanel() { continue; } - result[ key ] = diffWords( prevStr, revStr ); + result[ key ] = diffWordsWithSpace( prevStr, revStr ); } if ( Object.keys( result ).length === 0 ) { From 1bd351f57b0f3b4d17288f445006ef94db38a1cc Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Wed, 6 May 2026 14:16:34 +0530 Subject: [PATCH 2/4] Post Revisions: Drop vendored `diffArrays`, filter whitespace blocks Addresses review feedback on #77992 (findings 1, 5, 6, 7, 8, 9). The previous commit inlined ~150 LoC of v4's Myers algorithm to keep `diffArrays`'s LCS pivot stable across the v4 -> v8 bump. That preserved all existing test assertions but came at a real maintenance cost. The cleaner approach (the issue's original "Class 1" fix): just drop freeform/whitespace pseudo-blocks from both arrays before LCS. Without the `\n\n` blocks competing as a match anchor, v8's "deletions before insertions" tie-breaker picks the same content-block pivot v4 did for every input that was previously failing, and the inlined algorithm becomes unnecessary. Two follow-ups to make that approach work end-to-end: 1. Adjust `pairSimilarBlocks`'s placement heuristic. The original heuristic looked for *added* blocks between the removed and added positions to decide where to anchor a paired modification. With whitespace pseudo-blocks no longer in the result list, an unchanged content block between the two positions is now the only "the modified block crosses current-revision content" signal -- so the heuristic now also fires on unchanged blocks (but still ignores removed blocks, which don't exist in the current revision and so don't count as crossing). 2. Relax the `'handles two blocks that swapped positions'` assertion to the user-facing invariant (one unmarked + one removed/added pair with same content) rather than which side of the swap gets matched. For a pure swap the two LCS choices are equally valid -- both v4 and v8 produce semantically-correct output, they just disagree on which block reads as "unchanged" -- so asserting one is testing the implementation, not the behaviour. Net: -191 +56 LoC in `block-diff.js`. All 33 block-diff unit tests pass and the broader revision-related suites stay green. --- .../post-revisions-preview/block-diff.js | 233 ++++-------------- .../post-revisions-preview/test/block-diff.js | 65 +++-- 2 files changed, 87 insertions(+), 211 deletions(-) diff --git a/packages/editor/src/components/post-revisions-preview/block-diff.js b/packages/editor/src/components/post-revisions-preview/block-diff.js index d05c7df32994b8..11037fe5ccee3d 100644 --- a/packages/editor/src/components/post-revisions-preview/block-diff.js +++ b/packages/editor/src/components/post-revisions-preview/block-diff.js @@ -1,7 +1,13 @@ /** * External dependencies */ -import { diffWordsWithSpace } from 'diff'; +/* + * `diffWordsWithSpace` keeps whitespace as its own token, matching the + * behaviour `diffWords` had in `diff` v4. v6+ stopped treating whitespace + * as a token, which would otherwise coalesce adjacent word changes into a + * single removed/added pair instead of reporting them per-word. + */ +import { diffArrays, diffWordsWithSpace } from 'diff'; /** * WordPress dependencies @@ -28,168 +34,23 @@ import { unlock } from '../../lock-unlock'; const { parseRawBlock } = unlock( blocksPrivateApis ); /** - * Compute an LCS-based array diff with the same tie-breaker semantics as - * `diff` v4's `diffArrays`. The post-revisions UI relies on a specific LCS - * choice when multiple equally-valid matches exist (e.g. preferring the - * earlier occurrence in the previous revision over later ones, and - * preferring content blocks over freeform/whitespace blocks). `diff` v6 - * changed the tie-breaker to "deletions before insertions", which produces - * a different but equally-valid pairing — visible as confusing inline diffs - * in the post-revisions feature. We reproduce v4's selection locally so - * block-level matching stays stable across upgrades of the `diff` library. - * - * Adapted from `diff` v4's `Diff.prototype.diff` in `lib/diff/base.js`, - * specialised for arrays with strict-equality comparison. + * Whether a grammar-parsed raw block is a whitespace-only freeform pseudo-block + * (the `\n\n` between block markers, etc). These are stripped from both arrays + * before LCS to keep the matching pivot stable: under `diff` v6's tie-breaker, + * a whitespace block could otherwise be selected as the LCS anchor in + * `[paragraph, whitespace, paragraph]` swaps, mis-pairing the surrounding + * paragraphs in `pairSimilarBlocks`. Whitespace pseudo-blocks don't render + * anyway (`parseRawBlock` returns undefined for them), so dropping them + * before the diff has no user-visible effect. * - * @param {Array} oldArr Previous-revision tokens. - * @param {Array} newArr Current-revision tokens. - * @return {Array<{ count: number, added: boolean, removed: boolean, value: Array }>} Diff parts. + * @param {Object} rawBlock A raw block from `@wordpress/block-serialization-default-parser`. + * @return {boolean} True if the block should be excluded from LCS matching. */ -function diffArrays( oldArr, newArr ) { - const oldLen = oldArr.length; - const newLen = newArr.length; - const maxEditLen = oldLen + newLen; - - const pushComponent = ( components, added, removed ) => { - const last = components[ components.length - 1 ]; - if ( last && last.added === added && last.removed === removed ) { - components[ components.length - 1 ] = { - count: last.count + 1, - added, - removed, - }; - } else { - components.push( { count: 1, added, removed } ); - } - }; - - const extractCommon = ( basePath, diagonalPath ) => { - let np = basePath.newPos; - let op = np - diagonalPath; - let common = 0; - while ( - np + 1 < newLen && - op + 1 < oldLen && - newArr[ np + 1 ] === oldArr[ op + 1 ] - ) { - np++; - op++; - common++; - } - if ( common ) { - basePath.components.push( { count: common } ); - } - basePath.newPos = np; - return op; - }; - - const clonePath = ( p ) => ( { - newPos: p.newPos, - components: p.components.slice(), - } ); - - const buildValues = ( components ) => { - const result = []; - let nIdx = 0; - let oIdx = 0; - for ( let pos = 0; pos < components.length; pos++ ) { - const comp = components[ pos ]; - if ( comp.added ) { - result.push( { - count: comp.count, - added: true, - removed: false, - value: newArr.slice( nIdx, nIdx + comp.count ), - } ); - nIdx += comp.count; - } else if ( comp.removed ) { - result.push( { - count: comp.count, - added: false, - removed: true, - value: oldArr.slice( oIdx, oIdx + comp.count ), - } ); - oIdx += comp.count; - /* - * Match v4's `buildValues` convention of "removes before adds": - * the algorithm naturally emits added/removed in that order, - * but v4 swaps adjacent (added, removed) pairs so the diff - * reads as (removed, added). Without this, the post-revisions - * pairing logic places condensed/added-after-removed blocks - * in the wrong visual order. - */ - if ( pos > 0 && result[ pos - 1 ].added ) { - const tmp = result[ pos - 1 ]; - result[ pos - 1 ] = result[ pos ]; - result[ pos ] = tmp; - } - } else { - result.push( { - count: comp.count, - added: false, - removed: false, - value: newArr.slice( nIdx, nIdx + comp.count ), - } ); - nIdx += comp.count; - oIdx += comp.count; - } - } - return result; - }; - - const bestPath = [ { newPos: -1, components: [] } ]; - const seedOldPos = extractCommon( bestPath[ 0 ], 0 ); - - if ( bestPath[ 0 ].newPos + 1 >= newLen && seedOldPos + 1 >= oldLen ) { - return buildValues( bestPath[ 0 ].components ); - } - - for ( let editLen = 1; editLen <= maxEditLen; editLen++ ) { - for ( let diag = -editLen; diag <= editLen; diag += 2 ) { - const addPath = bestPath[ diag - 1 ]; - const removePath = bestPath[ diag + 1 ]; - const opPos = ( removePath ? removePath.newPos : 0 ) - diag; - if ( addPath ) { - bestPath[ diag - 1 ] = undefined; - } - - const canAdd = addPath && addPath.newPos + 1 < newLen; - const canRemove = removePath && opPos >= 0 && opPos < oldLen; - - if ( ! canAdd && ! canRemove ) { - bestPath[ diag ] = undefined; - continue; - } - - /* - * v4 tie-breaker: pick the path that has progressed further in - * `newPos`. v6+ flipped this to favour `oldPos` ("deletions - * before insertions"), which would change which equally-valid - * LCS is selected for the post-revisions block pairing. - */ - let basePath; - if ( - ! canAdd || - ( canRemove && addPath.newPos < removePath.newPos ) - ) { - basePath = clonePath( removePath ); - pushComponent( basePath.components, undefined, true ); - } else { - basePath = addPath; - basePath.newPos++; - pushComponent( basePath.components, true, undefined ); - } - - const op = extractCommon( basePath, diag ); - - if ( basePath.newPos + 1 >= newLen && op + 1 >= oldLen ) { - return buildValues( basePath.components ); - } - bestPath[ diag ] = basePath; - } - } - - return []; +function isWhitespaceRawBlock( rawBlock ) { + return ( + rawBlock.blockName === null && + ( ! rawBlock.innerHTML || ! rawBlock.innerHTML.trim() ) + ); } /** @@ -398,7 +259,8 @@ function pairSimilarBlocks( blocks ) { // Decide where to place the modified block by checking // what's between the removed and added positions. - // If there are unpaired added blocks between them, + // If anything between them is part of the current revision + // (an unpaired added block, or an unchanged block), // placing at the removed position would put the modified // block before content that comes before it in the // current revision — so use the added position. @@ -406,18 +268,20 @@ function pairSimilarBlocks( blocks ) { // previous revision's order intact. const lo = Math.min( rem.index, bestMatch.index ); const hi = Math.max( rem.index, bestMatch.index ); - let hasAddedBetween = false; + let crossesCurrentContent = false; for ( let i = lo + 1; i < hi; i++ ) { - if ( - blocks[ i ].__revisionDiffStatus?.status === 'added' && - ! pairedAdded.has( i ) - ) { - hasAddedBetween = true; + const status = blocks[ i ].__revisionDiffStatus?.status; + if ( status === undefined ) { + crossesCurrentContent = true; + break; + } + if ( status === 'added' && ! pairedAdded.has( i ) ) { + crossesCurrentContent = true; break; } } - if ( hasAddedBetween ) { + if ( crossesCurrentContent ) { // Use the added position — don't jump before // current-revision content. modifications.set( bestMatch.index, modifiedBlock ); @@ -456,6 +320,15 @@ function pairSimilarBlocks( blocks ) { * @return {Array} Merged raw blocks with diff status injected. */ function diffRawBlocks( currentRaw, previousRaw ) { + // Strip whitespace-only freeform pseudo-blocks before LCS — see + // `isWhitespaceRawBlock` for why. + const currentBlocks = currentRaw.filter( + ( b ) => ! isWhitespaceRawBlock( b ) + ); + const previousBlocks = previousRaw.filter( + ( b ) => ! isWhitespaceRawBlock( b ) + ); + const createBlockSignature = ( rawBlock ) => JSON.stringify( { name: rawBlock.blockName, @@ -466,8 +339,8 @@ function diffRawBlocks( currentRaw, previousRaw ) { ( c ) => c !== null && c.trim() !== '' ), } ); - const currentSigs = currentRaw.map( createBlockSignature ); - const previousSigs = previousRaw.map( createBlockSignature ); + const currentSigs = currentBlocks.map( createBlockSignature ); + const previousSigs = previousBlocks.map( createBlockSignature ); const diff = diffArrays( previousSigs, currentSigs ); @@ -479,22 +352,22 @@ function diffRawBlocks( currentRaw, previousRaw ) { if ( part.added ) { for ( let i = 0; i < part.count; i++ ) { result.push( { - ...currentRaw[ currIdx++ ], + ...currentBlocks[ currIdx++ ], __revisionDiffStatus: { status: 'added' }, } ); } } else if ( part.removed ) { for ( let i = 0; i < part.count; i++ ) { result.push( { - ...previousRaw[ prevIdx++ ], + ...previousBlocks[ prevIdx++ ], __revisionDiffStatus: { status: 'removed' }, } ); } } else { // Matched blocks - recursively diff their innerBlocks. for ( let i = 0; i < part.count; i++ ) { - const currBlock = currentRaw[ currIdx++ ]; - const prevBlock = previousRaw[ prevIdx++ ]; + const currBlock = currentBlocks[ currIdx++ ]; + const prevBlock = previousBlocks[ prevIdx++ ]; // Recursively diff inner blocks. const diffedInnerBlocks = diffRawBlocks( @@ -666,15 +539,7 @@ function applyRichTextDiff( currentRichText, previousRichText ) { const currentText = currentRichText.toPlainText(); const previousText = previousRichText.toPlainText(); - /* - * Diff the plain text (words for cleaner output). - * - * `diffWordsWithSpace` keeps whitespace as its own token, the same - * behaviour `diffWords` had in `diff` v4. v6+ stopped treating - * whitespace as a token, which would otherwise coalesce adjacent word - * changes into one removed/added pair instead of reporting them - * separately — making the inline rich-text diff less precise. - */ + // Diff the plain text (words for cleaner output). const textDiff = diffWordsWithSpace( previousText, currentText ); let result = create( { text: '' } ); diff --git a/packages/editor/src/components/post-revisions-preview/test/block-diff.js b/packages/editor/src/components/post-revisions-preview/test/block-diff.js index a37708015b2d7f..07bdd56c907aa8 100644 --- a/packages/editor/src/components/post-revisions-preview/test/block-diff.js +++ b/packages/editor/src/components/post-revisions-preview/test/block-diff.js @@ -338,34 +338,45 @@ describe( 'diffRevisionContent', () => { createBlock( 'core/paragraph', { content: 'First block content' } ), ] ); const blocks = diffRevisionContent( current, previous ); + const normalized = normalizeBlockTree( blocks ); - // LCS matches one block ("First block content" at prev[0] -> curr[1]). - // The other block appears as removed + added (showing the reorder). - // We intentionally don't pair identical blocks as "modified" since - // there's no actual content change - just a position change. - expect( normalizeBlockTree( blocks ) ).toMatchObject( [ - { - name: 'core/paragraph', - attributes: { - content: 'Second block content', - __revisionDiffStatus: { status: 'added' }, - }, - }, - { - name: 'core/paragraph', - attributes: { - content: 'First block content', - __revisionDiffStatus: undefined, - }, - }, - { - name: 'core/paragraph', - attributes: { - content: 'Second block content', - __revisionDiffStatus: { status: 'removed' }, - }, - }, - ] ); + /* + * For a pure swap, LCS has two equally-valid choices for the + * "unchanged" anchor — either block could be the anchor while the + * other reads as removed+added. The choice is implementation- + * defined (it differs across `diff` library versions, for + * instance), so we assert the user-facing invariant rather than + * which side gets matched: exactly one block stays unmarked, the + * other shows up as a removed/added pair with the same content + * (a position change, not a modification). + */ + const statuses = normalized.map( + ( b ) => b.attributes.__revisionDiffStatus?.status + ); + const unchanged = normalized.filter( + ( _, i ) => statuses[ i ] === undefined + ); + const added = normalized.filter( + ( _, i ) => statuses[ i ] === 'added' + ); + const removed = normalized.filter( + ( _, i ) => statuses[ i ] === 'removed' + ); + + expect( normalized ).toHaveLength( 3 ); + expect( unchanged ).toHaveLength( 1 ); + expect( added ).toHaveLength( 1 ); + expect( removed ).toHaveLength( 1 ); + + expect( added[ 0 ].attributes.content ).toBe( + removed[ 0 ].attributes.content + ); + expect( unchanged[ 0 ].attributes.content ).not.toBe( + added[ 0 ].attributes.content + ); + expect( [ 'First block content', 'Second block content' ] ).toContain( + unchanged[ 0 ].attributes.content + ); } ); it( 'pairs blocks as modified when attrs differ but content is identical', () => { From 98f3d2c8a98a0d1b0b55f2767a58deae31a77984 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Wed, 6 May 2026 14:16:52 +0530 Subject: [PATCH 3/4] Post Revisions: Migrate remaining `diff` imports, add CHANGELOG entries Addresses review feedback on #77992 (findings 2, 3, 4, 10). - `preserve-client-ids.js` and `block-compare/index.js` now import `diffArrays` / `diffChars` from the top-level `'diff'` package instead of the deep `'diff/lib/diff/'` paths. v8's `package.json` `exports` map only wildcards `./lib/*.js` (with extension); the bare-folder `./lib/` mapping requires a trailing slash. The deep paths only resolve here because the bundler/Jest resolver fills in `.js` -- a future tooling change could break them. v8 also marks the package `sideEffects: false`, so the historical tree-shaking reason for the deep imports no longer applies. The "diff doesn't tree-shake correctly" comment in `block-compare/index.js` is now stale and gets removed. - Add `### Internal` entries to `packages/editor/CHANGELOG.md` and `packages/block-editor/CHANGELOG.md` recording the major dependency bump. --- packages/block-editor/CHANGELOG.md | 4 ++++ packages/block-editor/src/components/block-compare/index.js | 4 +--- packages/editor/CHANGELOG.md | 1 + .../components/post-revisions-preview/preserve-client-ids.js | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/block-editor/CHANGELOG.md b/packages/block-editor/CHANGELOG.md index e72a2687a0325c..efcc081cb3fe10 100644 --- a/packages/block-editor/CHANGELOG.md +++ b/packages/block-editor/CHANGELOG.md @@ -6,6 +6,10 @@ - `BlockManager`: Add stacking context isolation to category list ([#77759](https://github.com/WordPress/gutenberg/pull/77759)). +### Internal + +- Updated `diff` dependency from `^4.0.2` to `^8.0.3` ([#77992](https://github.com/WordPress/gutenberg/pull/77992)). + ## 15.18.0 (2026-04-29) ### Enhancements diff --git a/packages/block-editor/src/components/block-compare/index.js b/packages/block-editor/src/components/block-compare/index.js index 4b9b6db596864e..6e44b5c133f1ba 100644 --- a/packages/block-editor/src/components/block-compare/index.js +++ b/packages/block-editor/src/components/block-compare/index.js @@ -2,9 +2,7 @@ * External dependencies */ import clsx from 'clsx'; -// diff doesn't tree-shake correctly, so we import from the individual -// module here, to avoid including too much of the library -import { diffChars } from 'diff/lib/diff/character'; +import { diffChars } from 'diff'; /** * WordPress dependencies diff --git a/packages/editor/CHANGELOG.md b/packages/editor/CHANGELOG.md index 7ae528ecc4915b..de1ad6ed159682 100644 --- a/packages/editor/CHANGELOG.md +++ b/packages/editor/CHANGELOG.md @@ -5,6 +5,7 @@ ### Internal - Update `date-fns` dependency to `v4.1.0` ([#78057](https://github.com/WordPress/gutenberg/pull/78057)). +- Updated `diff` dependency from `^4.0.2` to `^8.0.3` ([#77992](https://github.com/WordPress/gutenberg/pull/77992)). ## 14.45.0 (2026-04-29) diff --git a/packages/editor/src/components/post-revisions-preview/preserve-client-ids.js b/packages/editor/src/components/post-revisions-preview/preserve-client-ids.js index eb907f6281b7f7..b57acf17f0321a 100644 --- a/packages/editor/src/components/post-revisions-preview/preserve-client-ids.js +++ b/packages/editor/src/components/post-revisions-preview/preserve-client-ids.js @@ -1,7 +1,7 @@ /** * External dependencies */ -import { diffArrays } from 'diff/lib/diff/array'; +import { diffArrays } from 'diff'; /** * Preserves clientIds from previously rendered blocks to prevent flashing. From a80ca3abb280a9632871f660ca73183d28e56785 Mon Sep 17 00:00:00 2001 From: Manzoor Wani Date: Fri, 8 May 2026 22:00:16 +0530 Subject: [PATCH 4/4] Post Revisions: Add focused tests + clarify comments for `diffRawBlocks` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses round-2 review feedback on #77992 (human #1, #3, #5, #6 + Codex test gaps a, b). - Add a `'filters whitespace-only freeform pseudo-blocks before LCS'` test that's a direct canary for the whitespace filter — without the filter, `pairSimilarBlocks` would mis-match two paragraphs across the whitespace pseudo-block as the LCS anchor and produce two confused modified blocks instead of one modified + one unchanged. - Add a `'places paired modification at current-revision position when only unchanged blocks sit between'` test exercising the new `crossesCurrentContent` "unchanged between removed and added" branch in isolation. Previously only hit transitively through the `'handles block move with a tiny change'` test, which mixes that branch with the whitespace-filter path and other heuristics. - Tighten the `crossesCurrentContent` comment so it matches what the code actually checks (unpaired-added + unchanged) and adds a one-line note that 'removed' / `pairedAdded` blocks aren't checked because they aren't in the current revision. - Match the `diffWordsWithSpace` rationale comment between `block-diff.js` and `revision-fields-diff/index.js` for grep-ability. - Document on `diffRawBlocks` that the whitespace filter is intentionally re-applied at every recursive level so the function stays self-contained when called directly with raw grammar output. No logic changes. All 35 block-diff tests + the broader unit suite (32598 tests) stay green. --- .../post-revisions-preview/block-diff.js | 32 ++++--- .../post-revisions-preview/test/block-diff.js | 94 +++++++++++++++++++ 2 files changed, 114 insertions(+), 12 deletions(-) diff --git a/packages/editor/src/components/post-revisions-preview/block-diff.js b/packages/editor/src/components/post-revisions-preview/block-diff.js index 11037fe5ccee3d..96b32a7cafff19 100644 --- a/packages/editor/src/components/post-revisions-preview/block-diff.js +++ b/packages/editor/src/components/post-revisions-preview/block-diff.js @@ -2,10 +2,9 @@ * External dependencies */ /* - * `diffWordsWithSpace` keeps whitespace as its own token, matching the - * behaviour `diffWords` had in `diff` v4. v6+ stopped treating whitespace - * as a token, which would otherwise coalesce adjacent word changes into a - * single removed/added pair instead of reporting them per-word. + * `diffWordsWithSpace` preserves the v4-style per-word output. v6+ + * stopped treating whitespace as a token in `diffWords`, which coalesces + * adjacent word changes into a single removed/added pair. */ import { diffArrays, diffWordsWithSpace } from 'diff'; @@ -258,14 +257,18 @@ function pairSimilarBlocks( blocks ) { }; // Decide where to place the modified block by checking - // what's between the removed and added positions. - // If anything between them is part of the current revision - // (an unpaired added block, or an unchanged block), - // placing at the removed position would put the modified - // block before content that comes before it in the - // current revision — so use the added position. - // Otherwise, use the removed position to keep the - // previous revision's order intact. + // what's between the removed and added positions. If any + // block between them is in the current revision (an + // unchanged block, or an unpaired added block), placing + // the modification at the removed position would put it + // before content that already comes before it in the + // current revision — so use the added position instead. + // Otherwise, use the removed position to keep the previous + // revision's reading order intact. + // + // 'removed' blocks (and added blocks already absorbed via + // `pairedAdded`) aren't checked because they aren't in the + // current revision and so don't count as crossing it. const lo = Math.min( rem.index, bestMatch.index ); const hi = Math.max( rem.index, bestMatch.index ); let crossesCurrentContent = false; @@ -315,6 +318,11 @@ function pairSimilarBlocks( blocks ) { * Detects modifications when exactly 1 block is removed and 1 is added * with the same blockName (1:1 replacement = modification). * + * Whitespace-only freeform pseudo-blocks are filtered at every recursive + * level so this function is safe to call directly with raw output from + * `@wordpress/block-serialization-default-parser`. The duplicate work for + * inner-block recursion is negligible and keeps the contract self-contained. + * * @param {Array} currentRaw Current revision's raw blocks. * @param {Array} previousRaw Previous revision's raw blocks. * @return {Array} Merged raw blocks with diff status injected. diff --git a/packages/editor/src/components/post-revisions-preview/test/block-diff.js b/packages/editor/src/components/post-revisions-preview/test/block-diff.js index 07bdd56c907aa8..a53873d69898ac 100644 --- a/packages/editor/src/components/post-revisions-preview/test/block-diff.js +++ b/packages/editor/src/components/post-revisions-preview/test/block-diff.js @@ -452,6 +452,100 @@ describe( 'diffRevisionContent', () => { ] ); } ); + it( 'filters whitespace-only freeform pseudo-blocks before LCS', () => { + /* + * Direct canary for the whitespace-pseudo-block filter in + * `diffRawBlocks`. The grammar parser emits + * `{ blockName: null, innerHTML: '\n\n' }` for the whitespace + * between block markers; under `diff` v6+'s LCS tie-breaker, + * those pseudo-blocks would otherwise be selected as the match + * anchor in [paragraph, whitespace, paragraph] swaps, leaving + * `pairSimilarBlocks` with two removed and two added paragraphs + * to mis-match by similarity. With the filter, the LCS picks a + * content block and the surrounding paragraphs pair cleanly. + */ + const previous = serialize( [ + createBlock( 'core/paragraph', { content: 'Alpha content' } ), + createBlock( 'core/paragraph', { content: 'Beta content' } ), + ] ); + const current = serialize( [ + createBlock( 'core/paragraph', { + content: 'Beta content modified', + } ), + createBlock( 'core/paragraph', { content: 'Alpha content' } ), + ] ); + const blocks = diffRevisionContent( current, previous ); + const normalized = normalizeBlockTree( blocks ); + + const statuses = normalized.map( + ( b ) => b.attributes.__revisionDiffStatus?.status + ); + // Exactly one modified pair and one unchanged anchor — not the + // double-modified mis-pair that the unfiltered LCS would yield. + expect( statuses.filter( ( s ) => s === 'modified' ) ).toHaveLength( + 1 + ); + expect( statuses.filter( ( s ) => s === undefined ) ).toHaveLength( 1 ); + + const unchanged = normalized.find( + ( b ) => b.attributes.__revisionDiffStatus === undefined + ); + expect( unchanged.attributes.content ).toBe( 'Alpha content' ); + } ); + + it( 'places paired modification at current-revision position when only unchanged blocks sit between', () => { + /* + * Direct canary for the `crossesCurrentContent` "unchanged + * between removed and added" branch. The modified block crosses + * two unchanged paragraphs; the placement heuristic should + * anchor it at its current-revision position (index 0), not at + * the removed position (index 3) — otherwise the modified block + * would render after content that already comes before it in + * the current revision. + */ + const previous = serialize( [ + createBlock( 'core/paragraph', { + content: 'Stays one anchor sentence', + } ), + createBlock( 'core/paragraph', { + content: 'Stays two anchor sentence', + } ), + createBlock( 'core/paragraph', { + content: 'Original tail content sentence', + } ), + ] ); + const current = serialize( [ + createBlock( 'core/paragraph', { + content: 'Original tail content sentence rewritten', + } ), + createBlock( 'core/paragraph', { + content: 'Stays one anchor sentence', + } ), + createBlock( 'core/paragraph', { + content: 'Stays two anchor sentence', + } ), + ] ); + const blocks = diffRevisionContent( current, previous ); + const normalized = normalizeBlockTree( blocks ); + + expect( normalized ).toHaveLength( 3 ); + expect( normalized[ 0 ].attributes.__revisionDiffStatus?.status ).toBe( + 'modified' + ); + expect( normalized[ 1 ].attributes.content ).toBe( + 'Stays one anchor sentence' + ); + expect( + normalized[ 1 ].attributes.__revisionDiffStatus + ).toBeUndefined(); + expect( normalized[ 2 ].attributes.content ).toBe( + 'Stays two anchor sentence' + ); + expect( + normalized[ 2 ].attributes.__revisionDiffStatus + ).toBeUndefined(); + } ); + describe( 'inner blocks', () => { it( 'handles deeply nested inner blocks', () => { const previous = serialize( [