SD-2443 - fix: comments being merged by range when they shouldn't (#2735)

chittolinag · chittolina · caio-pizzol · web-flow · commit 45d03e15fedf · 2026-04-25T00:07:00.000Z
* fix: comments being merged

* fix: ooxml compliance

* test: added more tests around paraId and paraIdParent

* fix(comments-export): key commentsExtended.xml guard off file-set, not origin (SD-2443)

detectDocumentOrigin stamps every file missing commentsExtended.xml as
origin='google-docs' on import, including legacy Word range-based files,
so the previous exportStrategy !== 'google-docs' guard never fired for
the exact class of files SD-2443 targets. Testing 123.docx only
round-tripped because the pre-existing hasThreadedComments safeguard
caught one imported reply link; strip parent links and the regression
returns.

Flips the two commentThreadingProfile integration tests and the
commentsExporter "still honors Google Docs export strategy" unit test
that codified the bug (they asserted commentsExtended.xml should be
dropped for comments.xml-only imports). Adds the range-based +
origin=google-docs + no parent links regression case, plus a
commentsExtended profile test so the override does not over-fire.

---------

Co-authored-by: Gabriel Chittolina &lt;gabrielchittolina1@gmail.com&gt;
Co-authored-by: Caio Pizzol &lt;caio@superdoc.dev&gt;
diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v2/exporter/commentsExporter.js b/packages/super-editor/src/editors/v1/core/super-converter/v2/exporter/commentsExporter.js
@@ -37,7 +37,6 @@ export const getCommentDefinition = (comment, commentId, allComments, editor) =>
   const attributes = {
     'w:id': String(commentId),
     'w:author': comment.creatorName || comment.importedAuthor?.name,
-    'w:email': comment.creatorEmail || comment.importedAuthor?.email,
     'w:date': toIsoNoFractional(comment.createdTime),
     'w:initials': getInitials(comment.creatorName),
     'w:done': comment.resolvedTime ? '1' : '0',
@@ -48,6 +47,7 @@ export const getCommentDefinition = (comment, commentId, allComments, editor) =>
     'custom:trackedChangeType': comment.trackedChangeType,
     'custom:trackedChangeDisplayType': comment.trackedChangeDisplayType || null,
     'custom:trackedDeletedText': comment.deletedText || null,
+    'custom:email': comment.creatorEmail || comment.importedAuthor?.email,
   };
 
   // Add the w15:paraIdParent attribute if the comment has a parent
@@ -132,7 +132,6 @@ export const updateCommentsXml = (commentDefs = [], commentsXml) => {
     commentDef.attributes = {
       'w:id': commentDef.attributes['w:id'],
       'w:author': commentDef.attributes['w:author'],
-      'w:email': commentDef.attributes['w:email'],
       'w:date': commentDef.attributes['w:date'],
       'w:initials': commentDef.attributes['w:initials'],
       'custom:internalId': commentDef.attributes['custom:internalId'],
@@ -141,6 +140,7 @@ export const updateCommentsXml = (commentDefs = [], commentsXml) => {
       'custom:trackedChangeType': commentDef.attributes['custom:trackedChangeType'],
       'custom:trackedChangeDisplayType': commentDef.attributes['custom:trackedChangeDisplayType'],
       'custom:trackedDeletedText': commentDef.attributes['custom:trackedDeletedText'],
+      'custom:email': commentDef.attributes['custom:email'],
       'xmlns:custom': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
     };
   });
@@ -400,10 +400,17 @@ export const prepareCommentsXmlFilesForExport = ({
   relationships.push(generateRelationship('comments.xml'));
   emittedTargets.add('comments.xml');
 
+  // Key off the file-set capability, not exportStrategy: the importer tags
+  // every file missing commentsExtended.xml as origin='google-docs', including
+  // legacy Word range-based files, so exportStrategy can't distinguish them.
+  const forceWordThreadingProfile =
+    threadingProfile?.defaultStyle === 'range-based' && threadingProfile?.fileSet?.hasCommentsExtended === false;
+  const effectiveThreadingProfile = forceWordThreadingProfile ? 'word' : threadingProfile || exportStrategy;
+
   const commentsExtendedXml = updateCommentsExtendedXml(
     commentsWithParaIds,
     updatedXml['word/commentsExtended.xml'],
-    threadingProfile || exportStrategy,
+    effectiveThreadingProfile,
   );
 
   // Only add the file and relationship if we're actually generating commentsExtended.xml
diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v2/exporter/commentsExporter.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v2/exporter/commentsExporter.test.js
@@ -381,12 +381,133 @@ describe('prepareCommentsXmlFilesForExport', () => {
       expect(result.removedTargets).toHaveLength(0);
     });
   });
+
+  describe('threading profile overrides', () => {
+    it('forces Word-style threading when profile is range-based and the import lacks commentsExtended.xml', () => {
+      const threadingProfile = {
+        defaultStyle: 'range-based',
+        mixed: false,
+        fileSet: {
+          hasCommentsExtended: false,
+          hasCommentsExtensible: false,
+          hasCommentsIds: false,
+        },
+      };
+
+      // Multiple unthreaded comments — exercises the scenario where the
+      // importer would otherwise guess thread parents from overlapping ranges.
+      const unthreadedComments = [
+        makeComment({ commentId: 'c1', commentParaId: 'AAAAAAA1' }),
+        makeComment({ commentId: 'c2', commentParaId: 'AAAAAAA2' }),
+        makeComment({ commentId: 'c3', commentParaId: 'AAAAAAA3' }),
+      ];
+      const unthreadedDefs = unthreadedComments.map((c, i) => makeCommentDef(String(i), c.commentParaId));
+
+      const result = prepareCommentsXmlFilesForExport({
+        convertedXml: makeConvertedXml(),
+        defs: unthreadedDefs,
+        commentsWithParaIds: unthreadedComments,
+        exportType: 'external',
+        threadingProfile,
+      });
+
+      const extXml = result.documentXml['word/commentsExtended.xml'];
+      expect(extXml).toBeDefined();
+      const rel = result.relationships.find((r) => r.attributes.Target === 'commentsExtended.xml');
+      expect(rel).toBeDefined();
+
+      // One w15:commentEx entry per comment, each with w15:paraId and NO
+      // w15:paraIdParent — the missing parent ids are what prevents the
+      // importer from reconstructing threads from overlapping ranges.
+      const entries = extXml.elements[0].elements;
+      expect(entries).toHaveLength(unthreadedComments.length);
+      const paraIds = new Set();
+      for (const entry of entries) {
+        expect(entry.name).toBe('w15:commentEx');
+        expect(entry.attributes['w15:paraId']).toBeDefined();
+        expect(entry.attributes['w15:paraIdParent']).toBeUndefined();
+        paraIds.add(entry.attributes['w15:paraId']);
+      }
+      expect(paraIds.size).toBe(unthreadedComments.length);
+    });
+
+    it('emits commentsExtended.xml for range-based files with no original extended part, even when every comment is tagged origin=google-docs', () => {
+      // Regression case: detectDocumentOrigin stamps every comment in a
+      // comments.xml-only file as origin='google-docs', including legacy Word
+      // range-based files. Without the fileSet-based guard, the exporter
+      // silently dropped commentsExtended.xml here and re-import rebuilt
+      // threads from range overlaps.
+      const threadingProfile = {
+        defaultStyle: 'range-based',
+        mixed: false,
+        fileSet: {
+          hasCommentsExtended: false,
+          hasCommentsExtensible: false,
+          hasCommentsIds: false,
+        },
+      };
+
+      const importedAsGoogleDocs = [
+        makeComment({ commentId: 'c1', commentParaId: '126B0C7F', origin: 'google-docs' }),
+        makeComment({ commentId: 'c2', commentParaId: '126B0C80', origin: 'google-docs' }),
+      ];
+      const importedDefs = [makeCommentDef('0', '126B0C7F'), makeCommentDef('1', '126B0C80')];
+
+      const result = prepareCommentsXmlFilesForExport({
+        convertedXml: makeConvertedXml(),
+        defs: importedDefs,
+        commentsWithParaIds: importedAsGoogleDocs,
+        exportType: 'external',
+        threadingProfile,
+      });
+
+      const extendedXml = result.documentXml['word/commentsExtended.xml'];
+      expect(extendedXml).toBeDefined();
+
+      const entries = extendedXml.elements[0].elements;
+      expect(entries).toHaveLength(2);
+      for (const entry of entries) {
+        expect(entry.attributes['w15:paraId']).toBeDefined();
+        expect(entry.attributes['w15:paraIdParent']).toBeUndefined();
+      }
+
+      const rel = result.relationships.find((r) => r.attributes.Target === 'commentsExtended.xml');
+      expect(rel).toBeDefined();
+    });
+
+    it('leaves existing commentsExtended profile untouched when the import already ships commentsExtended.xml', () => {
+      // The override keys off fileSet.hasCommentsExtended === false. When the
+      // import already carries commentsExtended.xml the importer classifies
+      // the profile as 'commentsExtended' and the existing export path owns
+      // it; the override must not re-enter.
+      const threadingProfile = {
+        defaultStyle: 'commentsExtended',
+        mixed: false,
+        fileSet: {
+          hasCommentsExtended: true,
+          hasCommentsExtensible: false,
+          hasCommentsIds: false,
+        },
+      };
+
+      const result = prepareCommentsXmlFilesForExport({
+        convertedXml: makeConvertedXml(),
+        defs,
+        commentsWithParaIds,
+        exportType: 'external',
+        threadingProfile,
+      });
+
+      expect(result.documentXml['word/commentsExtended.xml']).toBeDefined();
+    });
+  });
 });
 
 describe('getCommentDefinition', () => {
   it('preserves tracked change display metadata for exported tracked-change comments', () => {
     const definition = getCommentDefinition(
       makeComment({
+        creatorEmail: 'author@example.com',
         trackedChange: true,
         trackedChangeType: 'trackFormat',
         trackedChangeText: 'https://example.com',
@@ -400,6 +521,8 @@ describe('getCommentDefinition', () => {
     expect(definition.attributes['custom:trackedChangeType']).toBe('trackFormat');
     expect(definition.attributes['custom:trackedChangeText']).toBe('https://example.com');
     expect(definition.attributes['custom:trackedChangeDisplayType']).toBe('hyperlinkAdded');
+    expect(definition.attributes['custom:email']).toBe('author@example.com');
+    expect(definition.attributes['w:email']).toBeUndefined();
   });
 });
 
@@ -609,5 +732,31 @@ describe('updateCommentsXml', () => {
     const lastParagraph = updatedComment.elements[updatedComment.elements.length - 1];
 
     expect(lastParagraph.attributes['w14:paraId']).toBe('ABC12345');
+    expect(updatedComment.attributes['w:email']).toBeUndefined();
+    expect(updatedComment.attributes['custom:email']).toBeUndefined();
+  });
+
+  it('preserves custom author email attribute and omits w:email', () => {
+    const commentDef = {
+      type: 'element',
+      name: 'w:comment',
+      attributes: {
+        'w:id': '1',
+        'w:author': 'Author',
+        'w:initials': 'A',
+        'w15:paraId': 'EMAIL123',
+        'custom:email': 'author@example.com',
+      },
+      elements: [{ type: 'element', name: 'w:p', attributes: {}, elements: [] }],
+    };
+    const commentsXml = {
+      elements: [{ elements: [] }],
+    };
+
+    const result = updateCommentsXml([commentDef], commentsXml);
+    const updatedComment = result.elements[0].elements[0];
+
+    expect(updatedComment.attributes['w:email']).toBeUndefined();
+    expect(updatedComment.attributes['custom:email']).toBe('author@example.com');
   });
 });
diff --git a/packages/super-editor/src/editors/v1/tests/export/commentThreadingProfile.test.js b/packages/super-editor/src/editors/v1/tests/export/commentThreadingProfile.test.js
@@ -109,20 +109,21 @@ describe('Partial threading profile (nested-comments.docx)', () => {
 });
 
 // ---------------------------------------------------------------------------
-// Scenario 2 – Google Docs profile, no threading (comments.xml only)
-// gdocs-single-comment.docx has: comments.xml with 1 non-threaded comment.
-// No commentsExtended / commentsIds / commentsExtensible.
-// Since there are no threaded comments, the exporter should NOT fabricate
-// auxiliary files — the range-based threading model is preserved.
+// Scenario 2 – Range-based profile without a shipped commentsExtended.xml
+// (gdocs-single-comment.docx: comments.xml + 1 non-threaded comment, no
+// commentsExtended / commentsIds / commentsExtensible).
+// The exporter must synthesize commentsExtended.xml so re-import does not
+// reconstruct threads from range overlaps. commentsIds / commentsExtensible
+// stay absent: they were not in the import file-set.
 // ---------------------------------------------------------------------------
-describe('Google Docs profile without threading (gdocs-single-comment.docx)', () => {
+describe('Range-based profile without commentsExtended (gdocs-single-comment.docx)', () => {
   let docx, media, mediaFiles, fonts;
 
   beforeAll(async () => {
     ({ docx, media, mediaFiles, fonts } = await loadTestDataForEditorTests('gdocs-single-comment.docx'));
   });
 
-  it('emits only comments.xml — no auxiliary files fabricated', async () => {
+  it('synthesizes commentsExtended.xml and leaves commentsIds/Extensible absent', async () => {
     const { editor } = initTestEditor({ content: docx, media, mediaFiles, fonts });
 
     try {
@@ -135,19 +136,16 @@ describe('Google Docs profile without threading (gdocs-single-comment.docx)', ()
         getUpdatedDocs: true,
       });
 
-      // comments.xml must be present
       expect(updatedDocs['word/comments.xml']).toEqual(expect.any(String));
-
-      // The three auxiliary files must all be null (removed / never existed)
-      expect(updatedDocs['word/commentsExtended.xml']).toBeNull();
+      expect(updatedDocs['word/commentsExtended.xml']).toEqual(expect.any(String));
       expect(updatedDocs['word/commentsIds.xml']).toBeNull();
       expect(updatedDocs['word/commentsExtensible.xml']).toBeNull();
     } finally {
       editor.destroy();
     }
   });
 
-  it('produces a zip with only comments.xml', async () => {
+  it('produces a zip with comments.xml and the synthesized commentsExtended.xml', async () => {
     const { editor } = initTestEditor({ content: docx, media, mediaFiles, fonts });
 
     try {
@@ -161,13 +159,18 @@ describe('Google Docs profile without threading (gdocs-single-comment.docx)', ()
       const zip = await zipper.unzip(blob);
 
       expect(zip.file('word/comments.xml')).not.toBeNull();
-      expect(zip.file('word/commentsExtended.xml')).toBeNull();
+      expect(zip.file('word/commentsExtended.xml')).not.toBeNull();
       expect(zip.file('word/commentsIds.xml')).toBeNull();
       expect(zip.file('word/commentsExtensible.xml')).toBeNull();
 
+      const extendedXml = await zip.file('word/commentsExtended.xml').async('string');
+      const paraIdMatches = extendedXml.match(/w15:paraId="/g) ?? [];
+      expect(paraIdMatches.length).toBe(comments.length);
+      expect(extendedXml).not.toContain('w15:paraIdParent');
+
       const contentTypes = await zip.file('[Content_Types].xml').async('string');
       expect(contentTypes).toContain('/word/comments.xml');
-      expect(contentTypes).not.toContain('/word/commentsExtended.xml');
+      expect(contentTypes).toContain('/word/commentsExtended.xml');
       expect(contentTypes).not.toContain('/word/commentsIds.xml');
       expect(contentTypes).not.toContain('/word/commentsExtensible.xml');
     } finally {
diff --git a/packages/super-editor/src/editors/v1/tests/import/documentCommentsImporter.unit.test.js b/packages/super-editor/src/editors/v1/tests/import/documentCommentsImporter.unit.test.js
@@ -45,6 +45,7 @@ const buildDocx = ({ comments = [], extended = [], documentRanges = [] } = {}) =
       'custom:trackedChangeType': comment.trackedChangeType,
       'custom:trackedChangeDisplayType': comment.trackedChangeDisplayType,
       'custom:trackedDeletedText': comment.trackedDeletedText,
+      ...(comment.customEmail ? { 'custom:email': comment.customEmail } : {}),
     },
     elements: comment.elements ?? [{ fakeParaId: comment.paraId ?? `para-${comment.id}` }],
   }));
@@ -280,6 +281,22 @@ describe('importCommentData metadata parsing', () => {
     const [comment] = importCommentData({ docx });
     expect(comment.elements).toHaveLength(2);
   });
+
+  it('reads custom:email when w:email is absent', () => {
+    const docx = buildDocx({
+      comments: [
+        {
+          id: 6,
+          author: 'Custom Email',
+          customEmail: 'custom@example.com',
+        },
+      ],
+    });
+    delete docx['word/comments.xml'].elements[0].elements[0].attributes['w:email'];
+
+    const [comment] = importCommentData({ docx });
+    expect(comment.creatorEmail).toBe('custom@example.com');
+  });
 });
 
 describe('importCommentData extended metadata', () => {