Skip to content

Commit 57a3891

Browse files
authored
Merge pull request #3355 from superdoc-dev/caio/IT-1065-diffing-table-text
fix(diffing): gate paragraph identity on structural depth (SD-3174)
2 parents 004eac5 + ff97a7b commit 57a3891

5 files changed

Lines changed: 74 additions & 0 deletions

File tree

packages/super-editor/src/editors/v1/extensions/diffing/algorithm/paragraph-diffing.test.js

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,35 @@ describe('paragraphComparator', () => {
159159
it('returns false for paragraphs with different identity signals', () => {
160160
expect(paragraphComparator({ fullText: 'one' }, { fullText: 'two' })).toBe(false);
161161
});
162+
163+
it('rejects paraId match across different depths', () => {
164+
// Regression: importers (e.g., Google Docs) renumber paraIds when structure
165+
// changes, so a table-cell paragraph (deeper) can collide with a body
166+
// paragraph (depth 0). Treat them as different paragraphs.
167+
const oldCell = createParagraphInfo({
168+
depth: 3,
169+
fullText: 'Milestone',
170+
node: createParagraphNode({ attrs: { paraId: '00000008' } }),
171+
});
172+
const newBody = createParagraphInfo({
173+
depth: 0,
174+
fullText: '',
175+
node: createParagraphNode({ attrs: { paraId: '00000008' } }),
176+
});
177+
expect(paragraphComparator(oldCell, newBody)).toBe(false);
178+
});
179+
180+
it('rejects content-signature match across different depths', () => {
181+
const oldCell = createParagraphInfo({ depth: 3, fullText: '' });
182+
const newBody = createParagraphInfo({ depth: 0, fullText: '' });
183+
expect(paragraphComparator(oldCell, newBody)).toBe(false);
184+
});
185+
186+
it('still matches by content signature at the same depth', () => {
187+
const oldParagraph = createParagraphInfo({ depth: 0, fullText: 'shared content' });
188+
const newParagraph = createParagraphInfo({ depth: 0, fullText: 'shared content' });
189+
expect(paragraphComparator(oldParagraph, newParagraph)).toBe(true);
190+
});
162191
});
163192

164193
describe('paragraph diff builders', () => {
@@ -488,4 +517,13 @@ describe('canTreatAsModification', () => {
488517
const b = { node: { attrs: {} }, fullText: 'dolor sit' };
489518
expect(canTreatAsModification(a, b)).toBe(false);
490519
});
520+
521+
it('rejects similarity-based pairing across different depths', () => {
522+
// Even with text similar enough to clear SIMILARITY_THRESHOLD, paragraphs
523+
// at different structural depths should not be re-paired as a single
524+
// modification - their replay anchor would land in the wrong context.
525+
const a = createParagraphInfo({ depth: 3, fullText: 'lorem ipsum dolor' });
526+
const b = createParagraphInfo({ depth: 0, fullText: 'lorem ipsum dolor!' });
527+
expect(canTreatAsModification(a, b)).toBe(false);
528+
});
491529
});

packages/super-editor/src/editors/v1/extensions/diffing/algorithm/paragraph-diffing.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,16 @@ export function shouldProcessEqualAsModification(
145145
*
146146
* The content signature covers both text and inline nodes (images, etc.),
147147
* so image-only paragraphs with different images are not falsely paired.
148+
*
149+
* Identity is gated on structural context (depth) first. Cross-depth pairing
150+
* via paraId or content-signature alone produces false identity when DOCX
151+
* importers renumber paraIds across structural changes, e.g. a table-cell
152+
* paragraph and a body paragraph sharing a paraId or both being empty.
148153
*/
149154
export function paragraphComparator(oldParagraph: ParagraphNodeInfo, newParagraph: ParagraphNodeInfo): boolean {
155+
if (oldParagraph?.depth !== newParagraph?.depth) {
156+
return false;
157+
}
150158
const oldId = oldParagraph?.node?.attrs?.paraId;
151159
const newId = newParagraph?.node?.attrs?.paraId;
152160
if (oldId && newId && oldId === newId) {
@@ -223,6 +231,9 @@ export function buildModifiedParagraphDiff(
223231
* Decides whether a delete/insert pair should be reinterpreted as a modification to minimize noisy diff output.
224232
*/
225233
export function canTreatAsModification(oldParagraph: ParagraphNodeInfo, newParagraph: ParagraphNodeInfo): boolean {
234+
if (oldParagraph?.depth !== newParagraph?.depth) {
235+
return false;
236+
}
226237
if (paragraphComparator(oldParagraph, newParagraph)) {
227238
return true;
228239
}

packages/super-editor/src/editors/v1/extensions/diffing/replayDiffs.test.js

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,11 @@ const getReplayFixturePairs = () => [
382382
['diff_before7.docx', 'diff_after7.docx'],
383383
['diff_before8.docx', 'diff_after8.docx'],
384384
['diff_before9.docx', 'diff_after9.docx'],
385+
// IT-1065: combined structural (table removal) + text change. Google Docs
386+
// renumbers paraIds across the structural change, so cell paragraphs in
387+
// before share paraIds with body paragraphs in after. Depth-gated identity
388+
// prevents false matches.
389+
['diff_before_it1065.docx', 'diff_after_it1065.docx'],
385390
];
386391

387392
/**
@@ -453,6 +458,26 @@ describe('replayDifferences can()', () => {
453458
});
454459
});
455460
describe('replayDiffs tracked changes', runTrackedReplayDiffsSuite);
461+
describe('replayDiffs tracked structural replay', () => {
462+
it('replays the IT-1065 table/text collision fixture with tracked changes enabled', async () => {
463+
const testUser = { name: 'Test User', email: 'test@example.com' };
464+
const beforeEditor = await getEditorFromFixture('diff_before_it1065.docx', testUser);
465+
const afterEditor = await getEditorFromFixture('diff_after_it1065.docx');
466+
467+
try {
468+
const diff = beforeEditor.commands.compareDocuments(afterEditor);
469+
const success = beforeEditor.commands.replayDifferences(diff, { applyTrackedChanges: true });
470+
471+
expect(success).toBe(true);
472+
expect(getTrackChanges(beforeEditor.state).length).toBeGreaterThan(0);
473+
expect(beforeEditor.commands.acceptAllTrackedChanges()).toBe(true);
474+
expect(beforeEditor.state.doc.textContent).toBe(afterEditor.state.doc.textContent);
475+
} finally {
476+
beforeEditor.destroy?.();
477+
afterEditor.destroy?.();
478+
}
479+
});
480+
});
456481
describe('replayDiffs tracked-change ids', () => {
457482
it('keeps tracked mark ids populated for diff_before8 replay', async () => {
458483
await expectTrackedReplayMarksHaveIds('diff_before8.docx', 'diff_after8.docx');
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)