superdoc-dev
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/bibliography-preprocessor.js‎
Lines changed: 5 additions & 12 deletions b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/bibliography-preprocessor.js‎
Lines changed: 5 additions & 12 deletions
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/bibliography-preprocessor.test.js‎
Lines changed: 26 additions & 4 deletions b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/bibliography-preprocessor.test.js‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/build-block-field-node.js‎
Lines changed: 32 additions & 0 deletions b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/build-block-field-node.js‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/build-block-field-node.test.js‎
Lines changed: 35 additions & 0 deletions b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/build-block-field-node.test.js‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/index-preprocessor.js‎
Lines changed: 2 additions & 12 deletions b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/index-preprocessor.js‎
Lines changed: 2 additions & 12 deletions
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/toa-preprocessor.js‎
Lines changed: 2 additions & 12 deletions b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/toa-preprocessor.js‎
Lines changed: 2 additions & 12 deletions
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/preProcessNodesForFldChar.js‎
Lines changed: 7 additions & 1 deletion b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/preProcessNodesForFldChar.js‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/preProcessNodesForFldChar.test.js‎
Lines changed: 62 additions & 1 deletion b/‎packages/super-editor/src/editors/v1/core/super-converter/field-references/preProcessNodesForFldChar.test.js‎
Lines changed: 62 additions & 1 deletion
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/v2/importer/docxImporter.js‎
Lines changed: 2 additions & 0 deletions b/‎packages/super-editor/src/editors/v1/core/super-converter/v2/importer/docxImporter.js‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/super-editor/src/editors/v1/core/super-converter/v2/importer/paragraphNodeImporter.js‎
Lines changed: 1 addition & 1 deletion b/‎packages/super-editor/src/editors/v1/core/super-converter/v2/importer/paragraphNodeImporter.js‎
Lines changed: 1 addition & 1 deletion
@@ -1,4 +1,4 @@
-import { normalizeFieldContentToParagraphs } from './normalize-field-content.js';
+import { buildBlockFieldNode } from './build-block-field-node.js';
 
 /**
  * Processes a BIBLIOGRAPHY instruction and creates an `sd:bibliography` node.
@@ -7,17 +7,10 @@ import { normalizeFieldContentToParagraphs } from './normalize-field-content.js'
  *
  * @param {import('../../v2/types/index.js').OpenXmlNode[]} nodesToCombine The nodes to combine.
  * @param {string} instrText The instruction text.
+ * @param {import('../../v2/docxHelper').ParsedDocx} [_docx] The docx object (unused).
+ * @param {Array<{type: string, text?: string}>} [instructionTokens] Raw instruction tokens.
  * @returns {import('../../v2/types/index.js').OpenXmlNode[]}
  */
-export function preProcessBibliographyInstruction(nodesToCombine, instrText) {
-  return [
-    {
-      name: 'sd:bibliography',
-      type: 'element',
-      attributes: {
-        instruction: instrText,
-      },
-      elements: normalizeFieldContentToParagraphs(nodesToCombine),
-    },
-  ];
+export function preProcessBibliographyInstruction(nodesToCombine, instrText, _docx, instructionTokens = null) {
+  return buildBlockFieldNode('sd:bibliography', nodesToCombine, instrText, instructionTokens);
 }
@@ -29,16 +29,18 @@ describe('preProcessBibliographyInstruction', () => {
     // bibliography PM node declares `content: 'paragraph+'`, so emitting loose
     // runs as direct children crashes the schema. The preprocessor must group
     // adjacent inline nodes into a synthesized <w:p>.
-    const r1 = { name: 'w:r', type: 'element', elements: [{ name: 'w:t', elements: [{ text: 'Smith, J. (2024). ' }] }] };
+    const r1 = {
+      name: 'w:r',
+      type: 'element',
+      elements: [{ name: 'w:t', elements: [{ text: 'Smith, J. (2024). ' }] }],
+    };
     const r2 = { name: 'w:r', type: 'element', elements: [{ name: 'w:t', elements: [{ text: 'Document Formats.' }] }] };
 
     const result = preProcessBibliographyInstruction([r1, r2], 'BIBLIOGRAPHY \\l 1033 ');
 
     expect(result).toHaveLength(1);
     expect(result[0].name).toBe('sd:bibliography');
-    expect(result[0].elements).toEqual([
-      { name: 'w:p', type: 'element', elements: [r1, r2] },
-    ]);
+    expect(result[0].elements).toEqual([{ name: 'w:p', type: 'element', elements: [r1, r2] }]);
   });
 
   it('preserves existing w:p children as-is (multi-paragraph field)', () => {
@@ -63,4 +65,24 @@ describe('preProcessBibliographyInstruction', () => {
       { name: 'w:p', type: 'element', elements: [trailingRun] },
     ]);
   });
+
+  it('preserves instructionTokens so split instructions round-trip (SD-3066)', () => {
+    // Parity with index/toa: a BIBLIOGRAPHY instruction split across runs
+    // (e.g. 'BIBLIOGRAPHY ' + '\\l 1033 ') must keep its raw fragments so the
+    // exporter can rebuild the original runs instead of collapsing to one.
+    const instructionTokens = [
+      { type: 'text', text: 'BIBLIOGRAPHY ' },
+      { type: 'text', text: '\\l 1033 ' },
+    ];
+
+    const result = preProcessBibliographyInstruction([], 'BIBLIOGRAPHY \\l 1033', null, instructionTokens);
+
+    expect(result[0].attributes.instructionTokens).toEqual(instructionTokens);
+  });
+
+  it('omits instructionTokens when none are provided', () => {
+    const result = preProcessBibliographyInstruction([], 'BIBLIOGRAPHY');
+
+    expect(result[0].attributes).not.toHaveProperty('instructionTokens');
+  });
 });
@@ -0,0 +1,32 @@
+import { normalizeFieldContentToParagraphs } from './normalize-field-content.js';
+
+/**
+ * Build a block-level field node (`sd:bibliography`, `sd:index`,
+ * `sd:tableOfAuthorities`) from the runs a complex field collected between its
+ * `separate` and `end` fldChars.
+ *
+ * These three fields share one shape: an `sd:*` element carrying the raw
+ * instruction (plus its token fragments, when the instruction was split across
+ * runs) whose children are the field's generated paragraphs. The result is
+ * normalized so loose inline runs are wrapped into paragraphs, satisfying the
+ * `paragraph+` PM schema (see normalize-field-content / SD-3005).
+ *
+ * @param {string} xmlName The `sd:*` element name to emit.
+ * @param {import('../../v2/types/index.js').OpenXmlNode[]} nodesToCombine The collected result nodes.
+ * @param {string} instrText The field instruction text.
+ * @param {Array<{type: string, text?: string}> | null} [instructionTokens] Raw instruction-run fragments.
+ * @returns {import('../../v2/types/index.js').OpenXmlNode[]}
+ */
+export function buildBlockFieldNode(xmlName, nodesToCombine, instrText, instructionTokens = null) {
+  return [
+    {
+      name: xmlName,
+      type: 'element',
+      attributes: {
+        instruction: instrText,
+        ...(instructionTokens ? { instructionTokens } : {}),
+      },
+      elements: normalizeFieldContentToParagraphs(nodesToCombine),
+    },
+  ];
+}
@@ -0,0 +1,35 @@
+import { describe, expect, it } from 'vitest';
+import { buildBlockFieldNode } from './build-block-field-node.js';
+
+describe('buildBlockFieldNode', () => {
+  it('emits the given sd:* element with the instruction and normalized paragraphs', () => {
+    const run = { name: 'w:r', type: 'element', elements: [] };
+
+    const result = buildBlockFieldNode('sd:index', [run], 'INDEX \\c 2');
+
+    expect(result).toEqual([
+      {
+        name: 'sd:index',
+        type: 'element',
+        attributes: { instruction: 'INDEX \\c 2' },
+        elements: [{ name: 'w:p', type: 'element', elements: [run] }],
+      },
+    ]);
+  });
+
+  it('includes instructionTokens only when provided', () => {
+    const tokens = [{ type: 'text', text: 'INDEX ' }, { type: 'tab' }];
+
+    const withTokens = buildBlockFieldNode('sd:tableOfAuthorities', [], 'INDEX', tokens);
+    expect(withTokens[0].attributes.instructionTokens).toEqual(tokens);
+
+    const withoutTokens = buildBlockFieldNode('sd:tableOfAuthorities', [], 'INDEX');
+    expect(withoutTokens[0].attributes).not.toHaveProperty('instructionTokens');
+  });
+
+  it('synthesizes an empty paragraph when there is no content', () => {
+    const result = buildBlockFieldNode('sd:bibliography', [], 'BIBLIOGRAPHY');
+
+    expect(result[0].elements).toEqual([{ name: 'w:p', type: 'element', elements: [] }]);
+  });
+});
@@ -1,4 +1,4 @@
-import { normalizeFieldContentToParagraphs } from './normalize-field-content.js';
+import { buildBlockFieldNode } from './build-block-field-node.js';
 
 /**
  * Processes an INDEX instruction and creates an `sd:index` node.
@@ -9,15 +9,5 @@ import { normalizeFieldContentToParagraphs } from './normalize-field-content.js'
  * @returns {import('../../v2/types/index.js').OpenXmlNode[]}
  */
 export function preProcessIndexInstruction(nodesToCombine, instrText, _docx, instructionTokens = null) {
-  return [
-    {
-      name: 'sd:index',
-      type: 'element',
-      attributes: {
-        instruction: instrText,
-        ...(instructionTokens ? { instructionTokens } : {}),
-      },
-      elements: normalizeFieldContentToParagraphs(nodesToCombine),
-    },
-  ];
+  return buildBlockFieldNode('sd:index', nodesToCombine, instrText, instructionTokens);
 }
@@ -1,4 +1,4 @@
-import { normalizeFieldContentToParagraphs } from './normalize-field-content.js';
+import { buildBlockFieldNode } from './build-block-field-node.js';
 
 /**
  * Processes a TOA (Table of Authorities) instruction and creates an `sd:tableOfAuthorities` node.
@@ -12,15 +12,5 @@ import { normalizeFieldContentToParagraphs } from './normalize-field-content.js'
  * @returns {import('../../v2/types/index.js').OpenXmlNode[]}
  */
 export function preProcessToaInstruction(nodesToCombine, instrText, _docx, instructionTokens = null) {
-  return [
-    {
-      name: 'sd:tableOfAuthorities',
-      type: 'element',
-      attributes: {
-        instruction: instrText,
-        ...(instructionTokens ? { instructionTokens } : {}),
-      },
-      elements: normalizeFieldContentToParagraphs(nodesToCombine),
-    },
-  ];
+  return buildBlockFieldNode('sd:tableOfAuthorities', nodesToCombine, instrText, instructionTokens);
 }
@@ -178,7 +178,13 @@ export const preProcessNodesForFldChar = (nodes = [], docx) => {
           currentField.instructionTokens.push(...instructionTokens);
           const instrTextValue = instrTextEl?.elements?.[0]?.text;
           if (instrTextValue != null) {
-            currentField.instrText += `${instrTextValue} `;
+            // SD-3066: join instrText fragments verbatim. Word preserves the
+            // literal spaces inside each run, so an instruction split across
+            // runs (e.g. ' XE "' + 'Building Standard' + '" ') already carries
+            // its own separators. Injecting a space per fragment corrupted the
+            // entry text to 'XE " Building Standard "'. The leading/trailing
+            // whitespace is trimmed by finalizeField via `instrText.trim()`.
+            currentField.instrText += `${instrTextValue}`;
           }
           if (instructionTokens.some((token) => token.type === 'tab')) {
             currentField.instrText += '\t';
 
@@ -683,7 +683,11 @@ describe('preProcessNodesForFldChar', () => {
       {
         nodes: [{ name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'link text' }] }] }],
         fieldInfo: {
-          instrText: 'HYPERLINK "http://example.com"   ',
+          // SD-3066: verbatim concatenation of the two instrText runs
+          // ('HYPERLINK "http://example.com"' + ' ') is a single trailing
+          // space. The previous expectation of three spaces reflected the
+          // old per-fragment injected separator, not the literal source text.
+          instrText: 'HYPERLINK "http://example.com" ',
           instructionTokens: [
             { type: 'text', text: 'HYPERLINK "http://example.com"' },
             { type: 'text', text: ' ' },
@@ -746,6 +750,63 @@ describe('preProcessNodesForFldChar', () => {
     expect(processedNodes[0].attributes.instruction).toBe('XE "Term"');
   });
 
+  it('processes fldSimple INDEX fields, wrapping loose result runs in a paragraph (SD-3066)', () => {
+    // The ticket flags w:fldSimple as a primary INDEX signal. A fldSimple INDEX
+    // carries its generated entries as loose runs; the index PM node requires
+    // `paragraph+`, so the preprocessor must wrap them (normalizeFieldContentToParagraphs,
+    // the SD-3005 fix). This guards both the fldSimple dispatch and that wrapping.
+    const nodes = [
+      {
+        name: 'w:fldSimple',
+        attributes: { 'w:instr': 'INDEX \\c 2' },
+        elements: [
+          { name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'apple, 3' }] }] },
+          { name: 'w:r', elements: [{ name: 'w:t', elements: [{ type: 'text', text: 'banana, 5' }] }] },
+        ],
+      },
+    ];
+
+    const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx);
+    expect(processedNodes).toHaveLength(1);
+    expect(processedNodes[0].name).toBe('sd:index');
+    expect(processedNodes[0].attributes.instruction).toBe('INDEX \\c 2');
+    // Loose runs wrapped into a single paragraph so the PM `paragraph+` schema holds.
+    expect(processedNodes[0].elements).toHaveLength(1);
+    expect(processedNodes[0].elements[0].name).toBe('w:p');
+    expect(processedNodes[0].elements[0].elements).toHaveLength(2);
+  });
+
+  it('joins instruction text split across multiple instrText runs verbatim (SD-3066)', () => {
+    // Word commonly splits an XE instruction across runs, with the literal
+    // spaces preserved inside each run: ' XE "' + 'Building Standard' + '" '.
+    // The aggregated instruction must reconstruct the literal string, not
+    // inject a separator space per fragment (which produced
+    // 'XE " Building Standard "' with spurious internal spaces).
+    const nodes = [
+      { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'begin' } }] },
+      {
+        name: 'w:r',
+        elements: [
+          { name: 'w:instrText', attributes: { 'xml:space': 'preserve' }, elements: [{ type: 'text', text: ' XE "' }] },
+        ],
+      },
+      { name: 'w:r', elements: [{ name: 'w:instrText', elements: [{ type: 'text', text: 'Building Standard' }] }] },
+      {
+        name: 'w:r',
+        elements: [
+          { name: 'w:instrText', attributes: { 'xml:space': 'preserve' }, elements: [{ type: 'text', text: '" ' }] },
+        ],
+      },
+      { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'separate' } }] },
+      { name: 'w:r', elements: [{ name: 'w:fldChar', attributes: { 'w:fldCharType': 'end' } }] },
+    ];
+
+    const { processedNodes } = preProcessNodesForFldChar(nodes, mockDocx);
+    expect(processedNodes).toHaveLength(1);
+    expect(processedNodes[0].name).toBe('sd:indexEntry');
+    expect(processedNodes[0].attributes.instruction).toBe('XE "Building Standard"');
+  });
+
   it('passes field-sequence rPr into body NUMWORDS fields when cached-result runs have no styling', () => {
     const nodes = [
       {
 
@@ -33,6 +33,7 @@ import { tableNodeHandlerEntity } from './tableImporter.js';
 import { tableOfContentsHandlerEntity } from './tableOfContentsImporter.js';
 import { indexHandlerEntity, indexEntryHandlerEntity } from './indexImporter.js';
 import { bibliographyHandlerEntity } from './bibliographyImporter.js';
+import { tableOfAuthoritiesHandlerEntity } from './tableOfAuthoritiesImporter.js';
 import { preProcessNodesForFldChar } from '../../field-references';
 import { preProcessPageFieldsOnly } from '../../field-references/preProcessPageFieldsOnly.js';
 import { ensureNumberingCache } from './numberingCache.js';
@@ -344,6 +345,7 @@ export const defaultNodeListHandler = () => {
     tableOfContentsHandlerEntity,
     indexHandlerEntity,
     bibliographyHandlerEntity,
+    tableOfAuthoritiesHandlerEntity,
     indexEntryHandlerEntity,
     autoPageHandlerEntity,
     autoTotalPageCountEntity,
 
@@ -1,8 +1,8 @@
 // @ts-check
 import { translator as wPNodeTranslator } from '../../v3/handlers/w/p/index.js';
+import { BLOCK_FIELD_XML_NAMES } from '../../v3/handlers/sd/shared/block-field-xml-names.js';
 
 const PARAGRAPH_PROPERTIES_XML_NAME = 'w:pPr';
-const BLOCK_FIELD_XML_NAMES = new Set(['sd:tableOfContents', 'sd:index', 'sd:bibliography', 'sd:tableOfAuthorities']);
 
 const hasMeaningfulParagraphContent = (elements = []) =>
   elements.some((element) => element?.name && element.name !== PARAGRAPH_PROPERTIES_XML_NAME);