Skip to content

Commit 28f88a9

Browse files
committed
fix(diffing): gate paragraph identity on structural depth (IT-1065)
Diff replay was overwriting table-cell content with body-paragraph text when a comparison contained both a structural change (table removed) and trailing text changes. DOCX importers renumber w14:paraId values across structural changes, so cell-paragraph IDs in the source can collide with top-level paragraph IDs in the target. The flat sequence diff then paired them as the same paragraph and emitted modification ops anchored inside the table. Gate paragraphComparator and canTreatAsModification on depth equality before any identity signal. Cross-depth content-signature matches were the same hazard for blank or repeated-label paragraphs. Add a fixture pair derived from the IT-1065 reporter's documents and four unit tests covering paraId, content-signature, and similarity cross-depth rejection.
1 parent f86b762 commit 28f88a9

5 files changed

Lines changed: 54 additions & 0 deletions

File tree

packages/super-editor/src/editors/v1/extensions/diffing/algorithm/paragraph-diffing.test.js

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,35 @@ describe('paragraphComparator', () => {
159159
it('returns false for paragraphs with different identity signals', () => {
160160
expect(paragraphComparator({ fullText: 'one' }, { fullText: 'two' })).toBe(false);
161161
});
162+
163+
it('rejects paraId match across different depths', () => {
164+
// Regression: importers (e.g., Google Docs) renumber paraIds when structure
165+
// changes, so a table-cell paragraph (deeper) can collide with a body
166+
// paragraph (depth 0). Treat them as different paragraphs.
167+
const oldCell = createParagraphInfo({
168+
depth: 3,
169+
fullText: 'Milestone',
170+
node: createParagraphNode({ attrs: { paraId: '00000008' } }),
171+
});
172+
const newBody = createParagraphInfo({
173+
depth: 0,
174+
fullText: '',
175+
node: createParagraphNode({ attrs: { paraId: '00000008' } }),
176+
});
177+
expect(paragraphComparator(oldCell, newBody)).toBe(false);
178+
});
179+
180+
it('rejects content-signature match across different depths', () => {
181+
const oldCell = createParagraphInfo({ depth: 3, fullText: '' });
182+
const newBody = createParagraphInfo({ depth: 0, fullText: '' });
183+
expect(paragraphComparator(oldCell, newBody)).toBe(false);
184+
});
185+
186+
it('still matches by content signature at the same depth', () => {
187+
const oldParagraph = createParagraphInfo({ depth: 0, fullText: 'shared content' });
188+
const newParagraph = createParagraphInfo({ depth: 0, fullText: 'shared content' });
189+
expect(paragraphComparator(oldParagraph, newParagraph)).toBe(true);
190+
});
162191
});
163192

164193
describe('paragraph diff builders', () => {
@@ -488,4 +517,13 @@ describe('canTreatAsModification', () => {
488517
const b = { node: { attrs: {} }, fullText: 'dolor sit' };
489518
expect(canTreatAsModification(a, b)).toBe(false);
490519
});
520+
521+
it('rejects similarity-based pairing across different depths', () => {
522+
// Even with text similar enough to clear SIMILARITY_THRESHOLD, paragraphs
523+
// at different structural depths should not be re-paired as a single
524+
// modification - their replay anchor would land in the wrong context.
525+
const a = createParagraphInfo({ depth: 3, fullText: 'lorem ipsum dolor' });
526+
const b = createParagraphInfo({ depth: 0, fullText: 'lorem ipsum dolor!' });
527+
expect(canTreatAsModification(a, b)).toBe(false);
528+
});
491529
});

packages/super-editor/src/editors/v1/extensions/diffing/algorithm/paragraph-diffing.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,16 @@ export function shouldProcessEqualAsModification(
145145
*
146146
* The content signature covers both text and inline nodes (images, etc.),
147147
* so image-only paragraphs with different images are not falsely paired.
148+
*
149+
* Identity is gated on structural context (depth) first. Cross-depth pairing
150+
* via paraId or content-signature alone produces false identity when DOCX
151+
* importers renumber paraIds across structural changes, e.g. a table-cell
152+
* paragraph and a body paragraph sharing a paraId or both being empty.
148153
*/
149154
export function paragraphComparator(oldParagraph: ParagraphNodeInfo, newParagraph: ParagraphNodeInfo): boolean {
155+
if (oldParagraph?.depth !== newParagraph?.depth) {
156+
return false;
157+
}
150158
const oldId = oldParagraph?.node?.attrs?.paraId;
151159
const newId = newParagraph?.node?.attrs?.paraId;
152160
if (oldId && newId && oldId === newId) {
@@ -223,6 +231,9 @@ export function buildModifiedParagraphDiff(
223231
* Decides whether a delete/insert pair should be reinterpreted as a modification to minimize noisy diff output.
224232
*/
225233
export function canTreatAsModification(oldParagraph: ParagraphNodeInfo, newParagraph: ParagraphNodeInfo): boolean {
234+
if (oldParagraph?.depth !== newParagraph?.depth) {
235+
return false;
236+
}
226237
if (paragraphComparator(oldParagraph, newParagraph)) {
227238
return true;
228239
}

packages/super-editor/src/editors/v1/extensions/diffing/replayDiffs.test.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,11 @@ const getReplayFixturePairs = () => [
382382
['diff_before7.docx', 'diff_after7.docx'],
383383
['diff_before8.docx', 'diff_after8.docx'],
384384
['diff_before9.docx', 'diff_after9.docx'],
385+
// IT-1065: combined structural (table removal) + text change. Google Docs
386+
// renumbers paraIds across the structural change, so cell paragraphs in
387+
// before share paraIds with body paragraphs in after. Depth-gated identity
388+
// prevents false matches.
389+
['diff_before_it1065.docx', 'diff_after_it1065.docx'],
385390
];
386391

387392
/**
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)