Skip to content

Commit 8a8d6f5

Browse files
committed
fix(super-editor): preserve line breaks through DOCX export (SD-3278)
Multi-line text passed into text-mode mutations stored newlines as a raw \n inside one <w:t>, which Word collapses on open while SuperDoc renders it as a break. Convert newlines to lineBreak nodes at creation, and split any residual raw newline into <w:t>/<w:br/> within one run on export, so the break serializes as a Word-native <w:br/> (ECMA-376 17.3.3.1). - buildTextWithTabs: normalize \n, \r\n, \r to lineBreak nodes, gated on parent admission for text*-only parents (e.g. total-page-number) - materializeLineBreak: prefer lineBreak over hardBreak so a structural kind:'lineBreak' is a soft break, not a page break - del-translator: rename every <w:t> in a split run to <w:delText> (17.3.3.7 requires delText for all deleted text)
1 parent 85802da commit 8a8d6f5

11 files changed

Lines changed: 716 additions & 41 deletions

File tree

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,18 @@ function decode(params) {
104104
return null;
105105
}
106106

107-
// ECMA-376 renames w:t → w:delText inside <w:del>. Other inline content —
108-
// w:noBreakHyphen, w:tab, w:br, etc. — stays as-is; the deletion is
109-
// conveyed by the <w:del> wrapper alone. Guard the rename so non-text
110-
// atoms inside <w:del> don't crash.
111-
const textNode = translatedTextNode.elements.find((n) => n.name === 'w:t');
112-
if (textNode) textNode.name = 'w:delText';
107+
// ECMA-376 (17.3.3.7) requires w:delText for ALL text runs inside <w:del>. A
108+
// single run can now hold multiple <w:t> siblings, because the newline export
109+
// safety net splits text around <w:br/> (e.g. <w:t>Alpha</w:t><w:br/><w:t>Beta</w:t>),
110+
// so rename every direct w:t, not just the first; a leftover <w:t> inside
111+
// <w:del> would not be treated as deleted. Other inline content
112+
// (w:noBreakHyphen, w:tab, w:br, etc.) stays as-is; the <w:del> wrapper alone
113+
// conveys the deletion.
114+
(translatedTextNode.elements || [])
115+
.filter((n) => n.name === 'w:t')
116+
.forEach((n) => {
117+
n.name = 'w:delText';
118+
});
113119

114120
return {
115121
name: 'w:del',

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,37 @@ describe('w:del translator', () => {
177177
expect(result.elements[0].elements[0].name).toBe('w:delText');
178178
});
179179

180+
it('renames every <w:t> in a multi-segment run to <w:delText> (newline split)', () => {
181+
const mockTrackedMark = {
182+
type: 'trackDelete',
183+
attrs: {
184+
id: '789',
185+
sourceId: '',
186+
author: 'Test',
187+
authorEmail: 'test@example.com',
188+
date: '2025-10-09T12:00:00Z',
189+
},
190+
};
191+
192+
// The newline export safety net produces one run with interleaved w:t/w:br;
193+
// every w:t inside <w:del> must become w:delText, not just the first.
194+
exportSchemaToJson.mockReturnValue({
195+
name: 'w:r',
196+
elements: [
197+
{ name: 'w:t', elements: [{ text: 'Alpha', type: 'text' }] },
198+
{ name: 'w:br' },
199+
{ name: 'w:t', elements: [{ text: 'Beta', type: 'text' }] },
200+
],
201+
});
202+
203+
const node = { type: 'text', text: 'Alpha\nBeta', marks: [mockTrackedMark] };
204+
const result = config.decode({ node });
205+
206+
const run = result.elements[0];
207+
expect(run.elements.map((n) => n.name)).toEqual(['w:delText', 'w:br', 'w:delText']);
208+
expect(run.elements.some((n) => n.name === 'w:t')).toBe(false);
209+
});
210+
180211
it('writes sourceId to w:id for round-trip fidelity', () => {
181212
const mockTrackedMark = {
182213
type: 'trackDelete',

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,38 @@ export function getTextNodeForExport(text, marks, params) {
4444
partPath: resolveExportPartPath(params),
4545
});
4646

47-
textNodes.push({
48-
name: 'w:t',
49-
elements: [{ text, type: 'text' }],
50-
attributes: nodeAttrs,
51-
});
47+
const textValue = typeof text === 'string' ? text : '';
48+
// Normalize CRLF/CR to LF so Windows line endings export Word-native breaks
49+
// too, rather than leaving a stray carriage return inside <w:t>.
50+
const normalizedText = textValue.includes('\r') ? textValue.replace(/\r\n?/g, '\n') : textValue;
51+
if (normalizedText.includes('\n')) {
52+
// Export safety net: a raw newline inside <w:t> is whitespace that Word
53+
// collapses on open (it is not the OOXML representation of a line break),
54+
// while SuperDoc still renders it as a break: the SD-3278
55+
// divergence. Emit a Word-native <w:br/> between
56+
// segments instead. Everything stays inside this single run so the
57+
// surrounding <w:ins>/<w:del> wrappers keep wrapping exactly one run.
58+
const segments = normalizedText.split('\n');
59+
segments.forEach((segment, index) => {
60+
if (segment.length > 0) {
61+
const segmentNeedsSpace = /^\s|\s$/.test(segment);
62+
textNodes.push({
63+
name: 'w:t',
64+
elements: [{ text: segment, type: 'text' }],
65+
attributes: segmentNeedsSpace ? { 'xml:space': 'preserve' } : null,
66+
});
67+
}
68+
if (index < segments.length - 1) {
69+
textNodes.push({ name: 'w:br' });
70+
}
71+
});
72+
} else {
73+
textNodes.push({
74+
name: 'w:t',
75+
elements: [{ text: normalizedText, type: 'text' }],
76+
attributes: nodeAttrs,
77+
});
78+
}
5279

5380
// For custom mark export, we need to add a bookmark start and end tag
5481
// And store attributes in the bookmark name

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,4 +134,80 @@ describe('getTextNodeForExport', () => {
134134
const runPropertiesChange = runProperties.elements.find((element) => element.name === 'w:rPrChange');
135135
expect(runPropertiesChange.attributes['w:id']).toBe('7');
136136
});
137+
138+
// SD-3278 export safety net: a raw newline left inside a PM text
139+
// node (e.g. from an imported .docx that stored breaks as literal '\n') must
140+
// export as a Word-native <w:br/>, not a collapsed newline inside <w:t>.
141+
describe('raw newline export safety net', () => {
142+
const contentElements = (result) => result.elements.filter((el) => el.name === 'w:t' || el.name === 'w:br');
143+
144+
it('exports a single newline as <w:t>/<w:br/>/<w:t> within one run', () => {
145+
const result = getTextNodeForExport('Alpha\nBeta', [], buildParams());
146+
expect(result.name).toBe('w:r');
147+
const content = contentElements(result);
148+
expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']);
149+
expect(content[0].elements[0].text).toBe('Alpha');
150+
expect(content[2].elements[0].text).toBe('Beta');
151+
});
152+
153+
it('never leaves a raw newline inside a <w:t>', () => {
154+
const result = getTextNodeForExport('Alpha\nBeta', [], buildParams());
155+
const texts = result.elements.filter((el) => el.name === 'w:t');
156+
expect(texts.some((el) => el.elements[0].text.includes('\n'))).toBe(false);
157+
});
158+
159+
it('emits a soft break (no w:type="page") for the <w:br/>', () => {
160+
const result = getTextNodeForExport('Alpha\nBeta', [], buildParams());
161+
const br = result.elements.find((el) => el.name === 'w:br');
162+
expect(br).toBeDefined();
163+
expect(br.attributes?.['w:type']).toBeUndefined();
164+
});
165+
166+
it('leaves newline-free text as a single <w:t> (unchanged)', () => {
167+
const result = getTextNodeForExport('hello world', [], buildParams());
168+
const content = contentElements(result);
169+
expect(content).toHaveLength(1);
170+
expect(content[0].name).toBe('w:t');
171+
expect(content[0].elements[0].text).toBe('hello world');
172+
});
173+
174+
it('emits a <w:br/> for each newline including leading, trailing, and consecutive newlines', () => {
175+
const result = getTextNodeForExport('\nA\n\nB\n', [], buildParams());
176+
const content = contentElements(result);
177+
expect(content.map((el) => el.name)).toEqual(['w:br', 'w:t', 'w:br', 'w:br', 'w:t', 'w:br']);
178+
const texts = content.filter((el) => el.name === 'w:t').map((el) => el.elements[0].text);
179+
expect(texts).toEqual(['A', 'B']);
180+
});
181+
182+
it('sets xml:space="preserve" only on segments with edge whitespace', () => {
183+
const result = getTextNodeForExport('Alpha \n Beta', [], buildParams());
184+
const texts = result.elements.filter((el) => el.name === 'w:t');
185+
expect(texts[0].elements[0].text).toBe('Alpha ');
186+
expect(texts[0].attributes).toEqual({ 'xml:space': 'preserve' });
187+
expect(texts[1].elements[0].text).toBe(' Beta');
188+
expect(texts[1].attributes).toEqual({ 'xml:space': 'preserve' });
189+
});
190+
191+
it('does not set xml:space on segments without edge whitespace', () => {
192+
const result = getTextNodeForExport('Alpha\nBeta', [], buildParams());
193+
const texts = result.elements.filter((el) => el.name === 'w:t');
194+
expect(texts[0].attributes).toBeNull();
195+
expect(texts[1].attributes).toBeNull();
196+
});
197+
198+
it('normalizes CRLF to a <w:br/> on export', () => {
199+
const content = contentElements(getTextNodeForExport('Alpha\r\nBeta', [], buildParams()));
200+
expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']);
201+
expect(content[0].elements[0].text).toBe('Alpha');
202+
expect(content[2].elements[0].text).toBe('Beta');
203+
});
204+
205+
it('normalizes a bare CR to a <w:br/> without leaving a stray carriage return in <w:t>', () => {
206+
const result = getTextNodeForExport('Alpha\rBeta', [], buildParams());
207+
const content = contentElements(result);
208+
expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']);
209+
const texts = result.elements.filter((el) => el.name === 'w:t');
210+
expect(texts.some((el) => el.elements[0].text.includes('\r'))).toBe(false);
211+
});
212+
});
137213
});

packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ import { describe, expect, it, vi } from 'vitest';
22
import { Fragment, Schema } from 'prosemirror-model';
33
import { buildTextWithTabs, parentAllowsNodeAt, textBetweenWithTabs } from './text-with-tabs.js';
44

5-
function makeRealSchema(options: { hasTab?: boolean; hasNoBreakHyphen?: boolean; hasGenericLeaf?: boolean } = {}) {
5+
function makeRealSchema(
6+
options: { hasTab?: boolean; hasLineBreak?: boolean; hasNoBreakHyphen?: boolean; hasGenericLeaf?: boolean } = {},
7+
) {
68
const nodes: Record<string, any> = {
79
doc: { content: 'paragraph+' },
810
paragraph: { group: 'block', content: 'inline*' },
@@ -13,6 +15,11 @@ function makeRealSchema(options: { hasTab?: boolean; hasNoBreakHyphen?: boolean;
1315
// Tab is non-leaf, which is why `textBetweenWithTabs` (not PM's built-in textBetween) is needed.
1416
nodes.tab = { group: 'inline', inline: true, atom: true, content: 'inline*' };
1517
}
18+
if (options.hasLineBreak) {
19+
// Mirrors the real extensions/line-break/line-break.js shape: inline atom
20+
// that disallows marks (`marks: ''`) and renders to <br> / exports to <w:br/>.
21+
nodes.lineBreak = { group: 'inline', inline: true, atom: true, marks: '' };
22+
}
1623
if (options.hasNoBreakHyphen) {
1724
// Mirrors the real extensions/no-break-hyphen schema: inline leaf atom with leafText.
1825
nodes.noBreakHyphen = { group: 'inline', inline: true, atom: true, leafText: () => '‑' };
@@ -99,6 +106,108 @@ describe('buildTextWithTabs', () => {
99106
expect(result.child(2).text).toBe('y');
100107
expect(result.child(2).marks.some((m: any) => m.type.name === 'bold')).toBe(true);
101108
});
109+
110+
// SD-3278: a literal '\n' inside a text node exports as raw newline
111+
// inside <w:t>, which Word collapses. It must become a `lineBreak` node so the
112+
// exporter emits a Word-native <w:br/>.
113+
it('splits text around a single newline into text + lineBreak + text', () => {
114+
const schema = makeRealSchema({ hasLineBreak: true });
115+
const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined);
116+
expect(result).toBeInstanceOf(Fragment);
117+
const fragment = result as Fragment;
118+
expect(fragment.childCount).toBe(3);
119+
expect(fragment.child(0).text).toBe('Alpha');
120+
expect(fragment.child(1).type.name).toBe('lineBreak');
121+
expect(fragment.child(2).text).toBe('Beta');
122+
});
123+
124+
it('emits a lineBreak node even when the schema has no tab node type', () => {
125+
const schema = makeRealSchema({ hasLineBreak: true });
126+
const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined) as Fragment;
127+
expect(result).toBeInstanceOf(Fragment);
128+
expect(result.childCount).toBe(3);
129+
expect(result.child(1).type.name).toBe('lineBreak');
130+
});
131+
132+
it('keeps the raw newline in a single text node when the schema has no lineBreak node type', () => {
133+
const schema = makeRealSchema({ hasTab: true });
134+
const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined);
135+
expect((result as any).isText).toBe(true);
136+
expect((result as any).text).toBe('Alpha\nBeta');
137+
});
138+
139+
it('does not attach marks to the lineBreak node (its schema disallows marks)', () => {
140+
const schema = makeRealSchema({ hasLineBreak: true });
141+
const boldMark = schema.marks.bold.create();
142+
const result = buildTextWithTabs(schema, 'Alpha\nBeta', [boldMark]) as Fragment;
143+
expect(result.child(0).marks.some((m: any) => m.type.name === 'bold')).toBe(true);
144+
expect(result.child(1).type.name).toBe('lineBreak');
145+
expect(result.child(1).marks.length).toBe(0);
146+
expect(result.child(2).marks.some((m: any) => m.type.name === 'bold')).toBe(true);
147+
});
148+
149+
it('omits empty segments around leading, trailing, and consecutive newlines', () => {
150+
const schema = makeRealSchema({ hasLineBreak: true });
151+
const lead = buildTextWithTabs(schema, '\nfoo', undefined) as Fragment;
152+
expect(lead.childCount).toBe(2);
153+
expect(lead.child(0).type.name).toBe('lineBreak');
154+
expect(lead.child(1).text).toBe('foo');
155+
156+
const doubled = buildTextWithTabs(schema, 'a\n\nb', undefined) as Fragment;
157+
expect(doubled.childCount).toBe(4);
158+
expect(doubled.child(0).text).toBe('a');
159+
expect(doubled.child(1).type.name).toBe('lineBreak');
160+
expect(doubled.child(2).type.name).toBe('lineBreak');
161+
expect(doubled.child(3).text).toBe('b');
162+
});
163+
164+
it('interleaves tab and lineBreak nodes when both control characters are present', () => {
165+
const schema = makeRealSchema({ hasTab: true, hasLineBreak: true });
166+
const result = buildTextWithTabs(schema, 'a\tb\nc', undefined) as Fragment;
167+
expect(result.childCount).toBe(5);
168+
expect(result.child(0).text).toBe('a');
169+
expect(result.child(1).type.name).toBe('tab');
170+
expect(result.child(2).text).toBe('b');
171+
expect(result.child(3).type.name).toBe('lineBreak');
172+
expect(result.child(4).text).toBe('c');
173+
});
174+
175+
it('keeps the raw tab literal but still splits the newline when tabs are disallowed', () => {
176+
const schema = makeRealSchema({ hasTab: true, hasLineBreak: true });
177+
const result = buildTextWithTabs(schema, 'a\tb\nc', undefined, { parentAllowsTab: false }) as Fragment;
178+
expect(result.childCount).toBe(3);
179+
expect(result.child(0).text).toBe('a\tb');
180+
expect(result.child(1).type.name).toBe('lineBreak');
181+
expect(result.child(2).text).toBe('c');
182+
});
183+
184+
// Finding 4: generated/SDK text often uses CRLF; normalize CRLF and bare CR to
185+
// line breaks so no stray carriage return survives in a text segment.
186+
it('normalizes CRLF to a lineBreak node', () => {
187+
const schema = makeRealSchema({ hasLineBreak: true });
188+
const result = buildTextWithTabs(schema, 'Alpha\r\nBeta', undefined) as Fragment;
189+
expect(result.childCount).toBe(3);
190+
expect(result.child(0).text).toBe('Alpha');
191+
expect(result.child(1).type.name).toBe('lineBreak');
192+
expect(result.child(2).text).toBe('Beta');
193+
});
194+
195+
it('normalizes a bare CR to a lineBreak node without leaving a stray carriage return', () => {
196+
const schema = makeRealSchema({ hasLineBreak: true });
197+
const result = buildTextWithTabs(schema, 'Alpha\rBeta', undefined) as Fragment;
198+
expect(result.childCount).toBe(3);
199+
expect(result.child(0).text).toBe('Alpha');
200+
expect(result.child(1).type.name).toBe('lineBreak');
201+
expect(result.child(2).text).toBe('Beta');
202+
});
203+
204+
// Finding 1: a `text*`-only parent (e.g. total-page-number) rejects lineBreak.
205+
it('keeps the raw newline in a single text node when parentAllowsLineBreak is false', () => {
206+
const schema = makeRealSchema({ hasLineBreak: true });
207+
const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined, { parentAllowsLineBreak: false });
208+
expect((result as any).isText).toBe(true);
209+
expect((result as any).text).toBe('Alpha\nBeta');
210+
});
102211
});
103212

104213
describe('parentAllowsNodeAt', () => {

0 commit comments

Comments
 (0)