Skip to content

Commit 6eeb8d7

Browse files
authored
Merge pull request #3616 from xy200303/fix/nested-content-control-import
fix(super-editor): support nested content controls
2 parents 59c6da1 + bbb027d commit 6eeb8d7

11 files changed

Lines changed: 621 additions & 10 deletions

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ const INLINE_FALLBACK_TYPES = new Set([
2424
'endnoteReference',
2525
'fieldAnnotation',
2626
'structuredContent',
27+
'image',
2728
'mathInline',
2829
'passthroughInline',
2930
'page-number',

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/helpers/is-inline-node.test.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ describe('isInlineNode', () => {
88
expect(isInlineNode({ type: 'bookmarkStart', attrs: { id: '1' } })).toBe(true);
99
expect(isInlineNode({ type: 'bookmarkEnd', attrs: { id: '1' } })).toBe(true);
1010
expect(isInlineNode({ type: 'tab' })).toBe(true);
11+
expect(isInlineNode({ type: 'image', attrs: { src: 'media/image1.png' } })).toBe(true);
1112
expect(isInlineNode({ type: 'footnoteReference', attrs: { id: '1' } })).toBe(true);
1213
});
1314

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.js

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,56 @@
11
import { parseAnnotationMarks } from './handle-annotation-node';
22
import { parseStrictStOnOff } from '../../../utils.js';
33
import { BLOCK_FIELD_XML_NAMES } from '../../../sd/shared/block-field-xml-names.js';
4+
import { isInlineNode } from '../../../helpers/is-inline-node.js';
5+
6+
const INLINE_CONTEXT_XML_NAMES = new Set(['w:p', 'w:r', 'w:hyperlink', 'w:smartTag']);
7+
8+
function hasDirectBlockSignal(sdtContent) {
9+
return Boolean(
10+
sdtContent?.elements?.some(
11+
(el) => el?.name === 'w:p' || el?.name === 'w:tbl' || BLOCK_FIELD_XML_NAMES.has(el?.name),
12+
),
13+
);
14+
}
15+
16+
function canEmitInlineStructuredContent(path = []) {
17+
return path.some((entry) => INLINE_CONTEXT_XML_NAMES.has(entry?.name) || entry?.name === 'w:sdtContent');
18+
}
19+
20+
function hasTranslatedBlockContent(content = [], schema) {
21+
return content.some((node) => node?.type && !isInlineNode(node, schema));
22+
}
23+
24+
function wrapInlineRunsAsParagraphs(content = [], schema) {
25+
const normalized = [];
26+
let pendingInline = [];
27+
28+
const flushInline = () => {
29+
if (!pendingInline.length) return;
30+
normalized.push({
31+
type: 'paragraph',
32+
attrs: null,
33+
content: pendingInline,
34+
marks: [],
35+
});
36+
pendingInline = [];
37+
};
38+
39+
for (const node of content) {
40+
if (!node) continue;
41+
42+
if (isInlineNode(node, schema)) {
43+
pendingInline.push(node);
44+
continue;
45+
}
46+
47+
flushInline();
48+
normalized.push(node);
49+
}
50+
51+
flushInline();
52+
return normalized;
53+
}
454

555
/**
656
* Detect the semantic control type from sdtPr child elements.
@@ -113,25 +163,25 @@ export function handleStructuredContentNode(params) {
113163
return null;
114164
}
115165

116-
const paragraph = sdtContent.elements?.find((el) => el.name === 'w:p');
117-
const table = sdtContent.elements?.find((el) => el.name === 'w:tbl');
118-
// SD-3005: a content control wrapping a block field (e.g. BIBLIOGRAPHY) has
119-
// no direct w:p after preprocessing — its child is an sd:* block node. It is
120-
// block content and must not be emitted as an inline structuredContent.
121-
const blockField = sdtContent.elements?.find((el) => BLOCK_FIELD_XML_NAMES.has(el?.name));
122166
const { marks } = parseAnnotationMarks(sdtContent);
123167
const translatedContent = nodeListHandler.handler({
124168
...params,
125169
nodes: sdtContent.elements,
126170
path: [...(params.path || []), sdtContent],
127171
});
128172

129-
const isBlockNode = paragraph || table || blockField;
173+
const schema = params.editor?.schema;
174+
const content = Array.isArray(translatedContent) ? translatedContent : [];
175+
const isBlockNode =
176+
hasTranslatedBlockContent(content, schema) ||
177+
hasDirectBlockSignal(sdtContent) ||
178+
!canEmitInlineStructuredContent(params.path);
130179
const sdtContentType = isBlockNode ? 'structuredContentBlock' : 'structuredContent';
180+
const normalizedContent = isBlockNode ? wrapInlineRunsAsParagraphs(content, schema) : content;
131181

132182
let result = {
133183
type: sdtContentType,
134-
content: translatedContent,
184+
content: normalizedContent,
135185
marks,
136186
attrs: {
137187
id: id?.attributes?.['w:val'] || null,

packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/sdt/helpers/handle-structured-content-node.test.js

Lines changed: 182 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
import { describe, it, expect, vi, beforeEach } from 'vitest';
1+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
22
import { handleStructuredContentNode } from './handle-structured-content-node';
33
import { parseAnnotationMarks } from './handle-annotation-node';
4+
import { defaultNodeListHandler } from '../../../../../v2/importer/docxImporter.js';
5+
import { initTestEditor } from '@tests/helpers/helpers.js';
46

57
// Mock dependencies
68
vi.mock('./handle-annotation-node', () => ({
@@ -31,6 +33,10 @@ describe('handleStructuredContentNode', () => {
3133
parseAnnotationMarks.mockReturnValue({ marks: [] });
3234
});
3335

36+
afterEach(() => {
37+
vi.restoreAllMocks();
38+
});
39+
3440
it('returns null when nodes array is empty', () => {
3541
const params = { nodes: [], nodeListHandler: mockNodeListHandler };
3642
const result = handleStructuredContentNode(params);
@@ -79,7 +85,7 @@ describe('handleStructuredContentNode', () => {
7985
const params = {
8086
nodes: [node],
8187
nodeListHandler: mockNodeListHandler,
82-
path: [],
88+
path: [{ name: 'w:p' }],
8389
};
8490

8591
parseAnnotationMarks.mockReturnValue({ marks: [{ type: 'bold' }] });
@@ -363,3 +369,177 @@ describe('handleStructuredContentNode', () => {
363369
});
364370
});
365371
});
372+
373+
describe('handleStructuredContentNode nested SDT import regression', () => {
374+
let editor;
375+
376+
const textRun = (text) => ({
377+
name: 'w:r',
378+
elements: [{ name: 'w:t', elements: [{ type: 'text', text }] }],
379+
});
380+
381+
const paragraph = (text) => ({
382+
name: 'w:p',
383+
elements: [textRun(text)],
384+
});
385+
386+
const sdtPr = ({ id, tag, alias, lockMode = 'unlocked', controlType = 'w:richText' }) => ({
387+
name: 'w:sdtPr',
388+
elements: [
389+
{ name: 'w:id', attributes: { 'w:val': id } },
390+
{ name: 'w:tag', attributes: { 'w:val': tag } },
391+
{ name: 'w:alias', attributes: { 'w:val': alias } },
392+
{ name: 'w:lock', attributes: { 'w:val': lockMode } },
393+
{ name: controlType },
394+
],
395+
});
396+
397+
const sdt = (props, contentElements) => ({
398+
name: 'w:sdt',
399+
elements: [sdtPr(props), { name: 'w:sdtContent', elements: contentElements }],
400+
});
401+
402+
const table = (text) => ({
403+
name: 'w:tbl',
404+
elements: [
405+
{
406+
name: 'w:tblPr',
407+
elements: [{ name: 'w:tblW', attributes: { 'w:w': '2400', 'w:type': 'dxa' } }],
408+
},
409+
{
410+
name: 'w:tblGrid',
411+
elements: [{ name: 'w:gridCol', attributes: { 'w:w': '2400' } }],
412+
},
413+
{
414+
name: 'w:tr',
415+
elements: [
416+
{
417+
name: 'w:tc',
418+
elements: [
419+
{
420+
name: 'w:tcPr',
421+
elements: [{ name: 'w:tcW', attributes: { 'w:w': '2400', 'w:type': 'dxa' } }],
422+
},
423+
paragraph(text),
424+
],
425+
},
426+
],
427+
},
428+
],
429+
});
430+
431+
const importNodes = (nodes) => {
432+
const nodeListHandler = defaultNodeListHandler();
433+
return nodeListHandler.handler({
434+
nodes,
435+
nodeListHandler,
436+
docx: {},
437+
editor,
438+
path: [],
439+
});
440+
};
441+
442+
const expectSchemaValid = (content) => {
443+
let pmDoc;
444+
expect(() => {
445+
pmDoc = editor.schema.nodeFromJSON({ type: 'doc', content });
446+
pmDoc.check();
447+
}).not.toThrow();
448+
return pmDoc;
449+
};
450+
451+
const findFirstJson = (node, predicate) => {
452+
if (!node) return null;
453+
if (predicate(node)) return node;
454+
for (const child of node.content || []) {
455+
const found = findFirstJson(child, predicate);
456+
if (found) return found;
457+
}
458+
return null;
459+
};
460+
461+
beforeEach(() => {
462+
({ editor } = initTestEditor({
463+
isHeadless: true,
464+
loadFromSchema: true,
465+
content: { type: 'doc', content: [{ type: 'paragraph' }] },
466+
}));
467+
parseAnnotationMarks.mockReturnValue({ marks: [] });
468+
});
469+
470+
afterEach(() => {
471+
editor?.destroy();
472+
editor = null;
473+
vi.restoreAllMocks();
474+
});
475+
476+
it('imports nested block SDT when outer sdtContent directly contains w:sdt wrapping a paragraph', () => {
477+
const inner = sdt({ id: 'inner-block', tag: 'inner-tag', alias: 'Inner Alias', lockMode: 'contentLocked' }, [
478+
paragraph('Nested paragraph'),
479+
]);
480+
const outer = sdt({ id: 'outer-block', tag: 'outer-tag', alias: 'Outer Alias', lockMode: 'sdtLocked' }, [inner]);
481+
482+
const result = importNodes([outer]);
483+
484+
expect(result).toHaveLength(1);
485+
expect(result[0].type).toBe('structuredContentBlock');
486+
expect(result[0].attrs).toMatchObject({
487+
id: 'outer-block',
488+
tag: 'outer-tag',
489+
alias: 'Outer Alias',
490+
lockMode: 'sdtLocked',
491+
controlType: 'richText',
492+
});
493+
494+
const nested = result[0].content?.[0];
495+
expect(nested?.type).toBe('structuredContentBlock');
496+
expect(nested.attrs).toMatchObject({
497+
id: 'inner-block',
498+
tag: 'inner-tag',
499+
alias: 'Inner Alias',
500+
lockMode: 'contentLocked',
501+
controlType: 'richText',
502+
});
503+
expect(nested.attrs.sdtPr?.elements?.find((el) => el.name === 'w:alias')?.attributes?.['w:val']).toBe(
504+
'Inner Alias',
505+
);
506+
507+
expectSchemaValid(result);
508+
});
509+
510+
it('wraps nested inline SDT safely when an outer block SDT also contains paragraph and table content', () => {
511+
const inlineNested = sdt(
512+
{ id: 'inner-inline', tag: 'inline-tag', alias: 'Inline Alias', lockMode: 'sdtContentLocked' },
513+
[textRun('Inline value')],
514+
);
515+
const outer = sdt({ id: 'outer-mixed', tag: 'outer-mixed-tag', alias: 'Outer Mixed', lockMode: 'sdtLocked' }, [
516+
inlineNested,
517+
paragraph('Outer paragraph'),
518+
table('Cell text'),
519+
]);
520+
521+
const result = importNodes([outer]);
522+
523+
expect(result).toHaveLength(1);
524+
expect(result[0].type).toBe('structuredContentBlock');
525+
expect(result[0].content?.map((node) => node.type)).toEqual(['paragraph', 'paragraph', 'table']);
526+
527+
const nested = findFirstJson(
528+
result[0],
529+
(node) => node.type === 'structuredContent' && node.attrs?.id === 'inner-inline',
530+
);
531+
expect(nested).toBeTruthy();
532+
expect(nested.attrs).toMatchObject({
533+
id: 'inner-inline',
534+
tag: 'inline-tag',
535+
alias: 'Inline Alias',
536+
lockMode: 'sdtContentLocked',
537+
controlType: 'richText',
538+
});
539+
expect(nested.attrs.sdtPr?.elements?.find((el) => el.name === 'w:lock')?.attributes?.['w:val']).toBe(
540+
'sdtContentLocked',
541+
);
542+
543+
expectSchemaValid(result);
544+
});
545+
});
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# SDT classification fixtures (PR #3616)
2+
3+
Real `.docx` fixtures that validate the nested content-control classifier in
4+
`super-converter/v3/handlers/w/sdt/`. Exercised by
5+
`tests/editor/sdt-nested-classification.test.js`.
6+
7+
The claim under test: block vs run/inline SDT classification is driven by the
8+
translated ProseMirror content shape plus import context, not only by the direct
9+
XML child names of `w:sdtContent`.
10+
11+
## Provenance and conformance
12+
13+
Each fixture's surrounding package (content types, rels, styles, theme, fonts, and
14+
image media) is taken verbatim from a Word-authored base already in this folder.
15+
Only `word/document.xml` is hand-authored to encode the exact OOXML shape, so the
16+
package stays valid while the structure is precise. All fixtures are therefore
17+
**schema-only** (hand-authored structure, not produced or validated by Word).
18+
19+
The `conformance` column distinguishes shapes that are valid ECMA-376 from one that
20+
is deliberately malformed to exercise the PR's defensive normalization:
21+
22+
| Fixture | Conformance | Base package | Shape under `w:body` |
23+
|---|---|---|---|
24+
| `sdt-nested-block.docx` | conformant | `blank-doc.docx` | block `w:sdt` whose `w:sdtContent` directly contains a nested block `w:sdt` (no direct `w:p`) wrapping a paragraph. Legal: `EG_ContentBlockContent` permits `sdt`. |
25+
| `sdt-nested-inline.docx` | conformant | `blank-doc.docx` | `w:p` containing an inline `w:sdt` that contains a nested inline `w:sdt` of runs, between two text runs. Legal: `CT_SdtContentRun` is `EG_PContent`. |
26+
| `sdt-mixed-block.docx` | **defensive (malformed)** | `blank-doc.docx` | block `w:sdt` whose `w:sdtContent` holds a bare inline `w:sdt`, a `w:p`, and a `w:tbl`. The bare inline `w:sdt` is **non-conformant**: a `w:sdt` directly under block content is positionally `CT_SdtBlock`, whose content may not be a bare `w:r` (`EG_ContentBlockContent` allows only `customXml/sdt/p/tbl/EG_RunLevelElts`, and `EG_RunLevelElts` excludes `w:r`). Included on purpose to drive `wrapInlineRunsAsParagraphs`, which the PR uses to normalize bare inline content inside a block SDT. |
27+
| `sdt-inline-picture.docx` | conformant | `anchor_images.docx` (reuses `media/image1.png`, `rId4`) | `w:p` > inline `w:sdt` with `<w:picture/>` marker > `w:sdtContent` > `w:r` > `w:drawing`. Legal per ECMA-376 §17.5.2.24 (picture content control wrapping a single DrawingML picture). |
28+
29+
## Rebuild
30+
31+
```
32+
node packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.generate.cjs
33+
```
34+
35+
The generator resolves all paths from its own location and reads the two base
36+
packages (`blank-doc.docx`, `anchor_images.docx`) from this folder, so it is
37+
portable. It re-reads each built file and asserts the intended shape. Set
38+
`SDT_FIXTURE_OUT=/some/dir` to write to a scratch dir instead of overwriting the
39+
committed fixtures (useful for a dry run). Regeneration is content-equivalent;
40+
only zip metadata may differ.
41+
42+
## Out of scope
43+
44+
Row-level SDTs (`w:tbl > w:sdt > w:sdtContent > w:tr`, Google Docs `goog_rdk_*`
45+
exports) are a separate table-walk concern tracked by SD-3118 / IT-1040. The real
46+
Google Docs artifact attached to those tickets should be used as that fixture, and
47+
its preservation checked through a Word round-trip before choosing transparent
48+
unwrap vs. `rowSdt` metadata. Not covered by these fixtures.

0 commit comments

Comments
 (0)