11/**
22 * External dependencies
33 */
4- import { diffWordsWithSpace } from 'diff' ;
4+ /*
5+ * `diffWordsWithSpace` keeps whitespace as its own token, matching the
6+ * behaviour `diffWords` had in `diff` v4. v6+ stopped treating whitespace
7+ * as a token, which would otherwise coalesce adjacent word changes into a
8+ * single removed/added pair instead of reporting them per-word.
9+ */
10+ import { diffArrays , diffWordsWithSpace } from 'diff' ;
511
612/**
713 * WordPress dependencies
@@ -28,168 +34,23 @@ import { unlock } from '../../lock-unlock';
2834const { parseRawBlock } = unlock ( blocksPrivateApis ) ;
2935
3036/**
31- * Compute an LCS-based array diff with the same tie-breaker semantics as
32- * `diff` v4's `diffArrays`. The post-revisions UI relies on a specific LCS
33- * choice when multiple equally-valid matches exist (e.g. preferring the
34- * earlier occurrence in the previous revision over later ones, and
35- * preferring content blocks over freeform/whitespace blocks). `diff` v6
36- * changed the tie-breaker to "deletions before insertions", which produces
37- * a different but equally-valid pairing — visible as confusing inline diffs
38- * in the post-revisions feature. We reproduce v4's selection locally so
39- * block-level matching stays stable across upgrades of the `diff` library.
40- *
41- * Adapted from `diff` v4's `Diff.prototype.diff` in `lib/diff/base.js`,
42- * specialised for arrays with strict-equality comparison.
37+ * Whether a grammar-parsed raw block is a whitespace-only freeform pseudo-block
38+ * (the `\n\n` between block markers, etc). These are stripped from both arrays
39+ * before LCS to keep the matching pivot stable: under `diff` v6's tie-breaker,
40+ * a whitespace block could otherwise be selected as the LCS anchor in
41+ * `[paragraph, whitespace, paragraph]` swaps, mis-pairing the surrounding
42+ * paragraphs in `pairSimilarBlocks`. Whitespace pseudo-blocks don't render
43+ * anyway (`parseRawBlock` returns undefined for them), so dropping them
44+ * before the diff has no user-visible effect.
4345 *
44- * @param {Array } oldArr Previous-revision tokens.
45- * @param {Array } newArr Current-revision tokens.
46- * @return {Array<{ count: number, added: boolean, removed: boolean, value: Array }> } Diff parts.
46+ * @param {Object } rawBlock A raw block from `@wordpress/block-serialization-default-parser`.
47+ * @return {boolean } True if the block should be excluded from LCS matching.
4748 */
48- function diffArrays ( oldArr , newArr ) {
49- const oldLen = oldArr . length ;
50- const newLen = newArr . length ;
51- const maxEditLen = oldLen + newLen ;
52-
53- const pushComponent = ( components , added , removed ) => {
54- const last = components [ components . length - 1 ] ;
55- if ( last && last . added === added && last . removed === removed ) {
56- components [ components . length - 1 ] = {
57- count : last . count + 1 ,
58- added,
59- removed,
60- } ;
61- } else {
62- components . push ( { count : 1 , added, removed } ) ;
63- }
64- } ;
65-
66- const extractCommon = ( basePath , diagonalPath ) => {
67- let np = basePath . newPos ;
68- let op = np - diagonalPath ;
69- let common = 0 ;
70- while (
71- np + 1 < newLen &&
72- op + 1 < oldLen &&
73- newArr [ np + 1 ] === oldArr [ op + 1 ]
74- ) {
75- np ++ ;
76- op ++ ;
77- common ++ ;
78- }
79- if ( common ) {
80- basePath . components . push ( { count : common } ) ;
81- }
82- basePath . newPos = np ;
83- return op ;
84- } ;
85-
86- const clonePath = ( p ) => ( {
87- newPos : p . newPos ,
88- components : p . components . slice ( ) ,
89- } ) ;
90-
91- const buildValues = ( components ) => {
92- const result = [ ] ;
93- let nIdx = 0 ;
94- let oIdx = 0 ;
95- for ( let pos = 0 ; pos < components . length ; pos ++ ) {
96- const comp = components [ pos ] ;
97- if ( comp . added ) {
98- result . push ( {
99- count : comp . count ,
100- added : true ,
101- removed : false ,
102- value : newArr . slice ( nIdx , nIdx + comp . count ) ,
103- } ) ;
104- nIdx += comp . count ;
105- } else if ( comp . removed ) {
106- result . push ( {
107- count : comp . count ,
108- added : false ,
109- removed : true ,
110- value : oldArr . slice ( oIdx , oIdx + comp . count ) ,
111- } ) ;
112- oIdx += comp . count ;
113- /*
114- * Match v4's `buildValues` convention of "removes before adds":
115- * the algorithm naturally emits added/removed in that order,
116- * but v4 swaps adjacent (added, removed) pairs so the diff
117- * reads as (removed, added). Without this, the post-revisions
118- * pairing logic places condensed/added-after-removed blocks
119- * in the wrong visual order.
120- */
121- if ( pos > 0 && result [ pos - 1 ] . added ) {
122- const tmp = result [ pos - 1 ] ;
123- result [ pos - 1 ] = result [ pos ] ;
124- result [ pos ] = tmp ;
125- }
126- } else {
127- result . push ( {
128- count : comp . count ,
129- added : false ,
130- removed : false ,
131- value : newArr . slice ( nIdx , nIdx + comp . count ) ,
132- } ) ;
133- nIdx += comp . count ;
134- oIdx += comp . count ;
135- }
136- }
137- return result ;
138- } ;
139-
140- const bestPath = [ { newPos : - 1 , components : [ ] } ] ;
141- const seedOldPos = extractCommon ( bestPath [ 0 ] , 0 ) ;
142-
143- if ( bestPath [ 0 ] . newPos + 1 >= newLen && seedOldPos + 1 >= oldLen ) {
144- return buildValues ( bestPath [ 0 ] . components ) ;
145- }
146-
147- for ( let editLen = 1 ; editLen <= maxEditLen ; editLen ++ ) {
148- for ( let diag = - editLen ; diag <= editLen ; diag += 2 ) {
149- const addPath = bestPath [ diag - 1 ] ;
150- const removePath = bestPath [ diag + 1 ] ;
151- const opPos = ( removePath ? removePath . newPos : 0 ) - diag ;
152- if ( addPath ) {
153- bestPath [ diag - 1 ] = undefined ;
154- }
155-
156- const canAdd = addPath && addPath . newPos + 1 < newLen ;
157- const canRemove = removePath && opPos >= 0 && opPos < oldLen ;
158-
159- if ( ! canAdd && ! canRemove ) {
160- bestPath [ diag ] = undefined ;
161- continue ;
162- }
163-
164- /*
165- * v4 tie-breaker: pick the path that has progressed further in
166- * `newPos`. v6+ flipped this to favour `oldPos` ("deletions
167- * before insertions"), which would change which equally-valid
168- * LCS is selected for the post-revisions block pairing.
169- */
170- let basePath ;
171- if (
172- ! canAdd ||
173- ( canRemove && addPath . newPos < removePath . newPos )
174- ) {
175- basePath = clonePath ( removePath ) ;
176- pushComponent ( basePath . components , undefined , true ) ;
177- } else {
178- basePath = addPath ;
179- basePath . newPos ++ ;
180- pushComponent ( basePath . components , true , undefined ) ;
181- }
182-
183- const op = extractCommon ( basePath , diag ) ;
184-
185- if ( basePath . newPos + 1 >= newLen && op + 1 >= oldLen ) {
186- return buildValues ( basePath . components ) ;
187- }
188- bestPath [ diag ] = basePath ;
189- }
190- }
191-
192- return [ ] ;
49+ function isWhitespaceRawBlock ( rawBlock ) {
50+ return (
51+ rawBlock . blockName === null &&
52+ ( ! rawBlock . innerHTML || ! rawBlock . innerHTML . trim ( ) )
53+ ) ;
19354}
19455
19556/**
@@ -396,28 +257,32 @@ function pairSimilarBlocks( blocks ) {
396257 __previousRawBlock : rem . block ,
397258 } ;
398259
399- // Decide where to place the modified block by checking
400- // what's between the removed and added positions.
401- // If there are unpaired added blocks between them,
402- // placing at the removed position would put the modified
403- // block before content that comes before it in the
404- // current revision — so use the added position.
405- // Otherwise, use the removed position to keep the
406- // previous revision's order intact.
260+ /*
261+ * Decide where to place the modified block by checking what's
262+ * between the removed and added positions. If anything between
263+ * them is part of the current revision (an added block that
264+ * isn't paired, or an unchanged block), placing the modified
265+ * block at the removed position would push it before content
266+ * that comes before it in the current revision — so use the
267+ * added position. Otherwise, use the removed position to keep
268+ * the previous revision's reading order intact.
269+ */
407270 const lo = Math . min ( rem . index , bestMatch . index ) ;
408271 const hi = Math . max ( rem . index , bestMatch . index ) ;
409- let hasAddedBetween = false ;
272+ let crossesCurrentContent = false ;
410273 for ( let i = lo + 1 ; i < hi ; i ++ ) {
411- if (
412- blocks [ i ] . __revisionDiffStatus ?. status === 'added' &&
413- ! pairedAdded . has ( i )
414- ) {
415- hasAddedBetween = true ;
274+ const status = blocks [ i ] . __revisionDiffStatus ?. status ;
275+ if ( status === undefined ) {
276+ crossesCurrentContent = true ;
277+ break ;
278+ }
279+ if ( status === 'added' && ! pairedAdded . has ( i ) ) {
280+ crossesCurrentContent = true ;
416281 break ;
417282 }
418283 }
419284
420- if ( hasAddedBetween ) {
285+ if ( crossesCurrentContent ) {
421286 // Use the added position — don't jump before
422287 // current-revision content.
423288 modifications . set ( bestMatch . index , modifiedBlock ) ;
@@ -456,6 +321,15 @@ function pairSimilarBlocks( blocks ) {
456321 * @return {Array } Merged raw blocks with diff status injected.
457322 */
458323function diffRawBlocks ( currentRaw , previousRaw ) {
324+ // Strip whitespace-only freeform pseudo-blocks before LCS — see
325+ // `isWhitespaceRawBlock` for why.
326+ const currentBlocks = currentRaw . filter (
327+ ( b ) => ! isWhitespaceRawBlock ( b )
328+ ) ;
329+ const previousBlocks = previousRaw . filter (
330+ ( b ) => ! isWhitespaceRawBlock ( b )
331+ ) ;
332+
459333 const createBlockSignature = ( rawBlock ) =>
460334 JSON . stringify ( {
461335 name : rawBlock . blockName ,
@@ -466,8 +340,8 @@ function diffRawBlocks( currentRaw, previousRaw ) {
466340 ( c ) => c !== null && c . trim ( ) !== ''
467341 ) ,
468342 } ) ;
469- const currentSigs = currentRaw . map ( createBlockSignature ) ;
470- const previousSigs = previousRaw . map ( createBlockSignature ) ;
343+ const currentSigs = currentBlocks . map ( createBlockSignature ) ;
344+ const previousSigs = previousBlocks . map ( createBlockSignature ) ;
471345
472346 const diff = diffArrays ( previousSigs , currentSigs ) ;
473347
@@ -479,22 +353,22 @@ function diffRawBlocks( currentRaw, previousRaw ) {
479353 if ( part . added ) {
480354 for ( let i = 0 ; i < part . count ; i ++ ) {
481355 result . push ( {
482- ...currentRaw [ currIdx ++ ] ,
356+ ...currentBlocks [ currIdx ++ ] ,
483357 __revisionDiffStatus : { status : 'added' } ,
484358 } ) ;
485359 }
486360 } else if ( part . removed ) {
487361 for ( let i = 0 ; i < part . count ; i ++ ) {
488362 result . push ( {
489- ...previousRaw [ prevIdx ++ ] ,
363+ ...previousBlocks [ prevIdx ++ ] ,
490364 __revisionDiffStatus : { status : 'removed' } ,
491365 } ) ;
492366 }
493367 } else {
494368 // Matched blocks - recursively diff their innerBlocks.
495369 for ( let i = 0 ; i < part . count ; i ++ ) {
496- const currBlock = currentRaw [ currIdx ++ ] ;
497- const prevBlock = previousRaw [ prevIdx ++ ] ;
370+ const currBlock = currentBlocks [ currIdx ++ ] ;
371+ const prevBlock = previousBlocks [ prevIdx ++ ] ;
498372
499373 // Recursively diff inner blocks.
500374 const diffedInnerBlocks = diffRawBlocks (
@@ -666,15 +540,7 @@ function applyRichTextDiff( currentRichText, previousRichText ) {
666540 const currentText = currentRichText . toPlainText ( ) ;
667541 const previousText = previousRichText . toPlainText ( ) ;
668542
669- /*
670- * Diff the plain text (words for cleaner output).
671- *
672- * `diffWordsWithSpace` keeps whitespace as its own token, the same
673- * behaviour `diffWords` had in `diff` v4. v6+ stopped treating
674- * whitespace as a token, which would otherwise coalesce adjacent word
675- * changes into one removed/added pair instead of reporting them
676- * separately — making the inline rich-text diff less precise.
677- */
543+ // Diff the plain text (words for cleaner output).
678544 const textDiff = diffWordsWithSpace ( previousText , currentText ) ;
679545
680546 let result = create ( { text : '' } ) ;
0 commit comments