Skip to content

Commit 1efe0cc

Browse files
feat(math): implement m:acc accent converter (#2748)
* feat(math): implement m:acc accent converter (closes #2604) Made-with: Cursor * fix(math): handle m:acc spec edge cases and render accents as stretchy forms - Distinguish the three m:chr states from ECMA-376 §22.1.2.20: element absent → default U+0302; element present with missing/empty m:val → character absent (render bare base, no <mover>); m:val set → use it. Previous code rendered a circumflex for the "character absent" case. - Map combining marks (U+0300–U+036F, U+20D6/7) to their ASCII-range accent equivalents (^, ~, ¯, arrows, …) before placing in <mo>, so MathML renderers can use the stretchy accent operators. Firefox and MathJax stretch these across wide bases; Chrome's MathML Core does not yet stretch accents but the output is semantically correct. - Return null when m:e is absent (invalid per CT_Acc), so malformed input is dropped silently to match Word's behavior. - Move the registry entry out of the "Not yet implemented" block in omml-to-mathml.ts. - Expand unit tests from 3 to 11 covering every spec branch, the spacing mapping, multi-run base wrapping, and the missing-m:e case. - Add a behavior test asserting <mover accent="true"> + spacing-form <mo> output. SD-2382 --------- Co-authored-by: Caio Pizzol <caiopizzol@icloud.com>
1 parent aa0d15d commit 1efe0cc

File tree

5 files changed

+265
-1
lines changed

5 files changed

+265
-1
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import type { MathObjectConverter } from '../types.js';
2+
3+
const MATHML_NS = 'http://www.w3.org/1998/Math/MathML';
4+
5+
/** Default accent character when m:chr is absent (combining circumflex). */
6+
const DEFAULT_ACCENT_CHAR = '\u0302';
7+
8+
/**
9+
* Maps combining diacritical marks (which Word emits in m:chr) to their
10+
* non-combining accent equivalents, preferring characters that MathML Core's
11+
* operator dictionary registers as stretchy accents.
12+
*
13+
* Why: combining marks (U+0300–U+036F) placed bare inside <mo> render against
14+
* a dotted-circle placeholder in some engines. For the common accents we map
15+
* to ASCII-range characters (`^`, `~`, `¯`, `"`, `` ` ``, `´`) because those
16+
* are marked stretchy in the MathML Core operator dictionary, so MathML
17+
* renderers stretch them across wide bases (e.g. a tilde over "x+1"). For
18+
* accents without an ASCII-range equivalent we fall back to the Unicode
19+
* spacing modifier letter.
20+
*
21+
* Covers the accents Word's equation editor emits; anything outside this table
22+
* passes through unchanged.
23+
*/
24+
const COMBINING_TO_SPACING: Record<string, string> = {
25+
'\u0300': '\u0060', // grave → ` (U+0060)
26+
'\u0301': '\u00B4', // acute → ´ (U+00B4)
27+
'\u0302': '\u005E', // circumflex / hat → ^ (U+005E, stretchy)
28+
'\u0303': '\u007E', // tilde → ~ (U+007E, stretchy)
29+
'\u0304': '\u00AF', // macron → ¯ (U+00AF, stretchy)
30+
'\u0306': '\u02D8', // breve → ˘
31+
'\u0307': '\u02D9', // dot above → ˙
32+
'\u0308': '\u00A8', // diaeresis → ¨
33+
'\u030A': '\u02DA', // ring above → ˚
34+
'\u030B': '\u02DD', // double acute → ˝
35+
'\u030C': '\u02C7', // caron / háček → ˇ
36+
'\u20D6': '\u2190', // combining left arrow above → ← (U+2190, stretchy)
37+
'\u20D7': '\u2192', // combining right arrow above → → (U+2192, stretchy)
38+
};
39+
40+
/**
41+
* Convert m:acc (accent / diacritical mark) to MathML <mover accent="true">.
42+
*
43+
* OMML structure:
44+
* m:acc → m:accPr? (optional: m:chr@m:val), m:e (base expression, required)
45+
*
46+
* MathML output:
47+
* <mover accent="true">
48+
* <mrow>base</mrow>
49+
* <mo>accent-char</mo>
50+
* </mover>
51+
*
52+
* ECMA-376 §22.1.2.20 (chr) defines three m:chr states:
53+
* 1. m:chr element absent → default accent char (U+0302)
54+
* 2. m:chr present, m:val absent → character is absent (render bare base)
55+
* 3. m:chr present, m:val = "x" → use x as the accent character
56+
*
57+
* When the accent character is absent, the base is returned wrapped in <mrow>
58+
* with no <mover> wrapper. When m:e itself is absent (invalid per the schema),
59+
* the converter returns null so the caller can drop the malformed element.
60+
*
61+
* @spec ECMA-376 §22.1.2.1 (acc), §22.1.2.2 (accPr), §22.1.2.20 (chr)
62+
*/
63+
export const convertAccent: MathObjectConverter = (node, doc, convertChildren) => {
64+
const elements = node.elements ?? [];
65+
const base = elements.find((e) => e.name === 'm:e');
66+
67+
// m:e is required by CT_Acc. Missing it means the input is malformed; decline
68+
// to render rather than emit a floating accent with no base.
69+
if (!base) return null;
70+
71+
const accPr = elements.find((e) => e.name === 'm:accPr');
72+
const chr = accPr?.elements?.find((e) => e.name === 'm:chr');
73+
const rawVal = chr?.attributes?.['m:val'];
74+
75+
// Resolve the accent character per §22.1.2.20.
76+
// - chr element absent → default U+0302
77+
// - chr present, m:val absent/"" → character absent (no accent)
78+
// - chr present, m:val = "x" → "x"
79+
const accentChar = chr === undefined ? DEFAULT_ACCENT_CHAR : rawVal && rawVal.length > 0 ? rawVal : '';
80+
81+
const baseRow = doc.createElementNS(MATHML_NS, 'mrow');
82+
baseRow.appendChild(convertChildren(base.elements ?? []));
83+
84+
if (!accentChar) {
85+
// No accent character: render the base alone.
86+
return baseRow;
87+
}
88+
89+
// Map combining marks to their spacing forms so MathML renderers can use the
90+
// stretchy accent operators. Non-combining or unmapped characters pass through.
91+
const renderChar = COMBINING_TO_SPACING[accentChar] ?? accentChar;
92+
93+
const mover = doc.createElementNS(MATHML_NS, 'mover');
94+
mover.setAttribute('accent', 'true');
95+
mover.appendChild(baseRow);
96+
97+
const mo = doc.createElementNS(MATHML_NS, 'mo');
98+
// stretchy is a hint: renderers that honor it (e.g. MathJax, Firefox's
99+
// accent-stretch path) will stretch the accent across wide bases. Chrome's
100+
// current MathML Core implementation ignores this for accent operators, so
101+
// the accent renders at glyph width there — acceptable baseline behavior.
102+
mo.setAttribute('stretchy', 'true');
103+
mo.textContent = renderChar;
104+
mover.appendChild(mo);
105+
106+
return mover;
107+
};

packages/layout-engine/painters/dom/src/features/math/converters/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export { convertDelimiter } from './delimiter.js';
1414
export { convertSubscript } from './subscript.js';
1515
export { convertSuperscript } from './superscript.js';
1616
export { convertSubSuperscript } from './sub-superscript.js';
17+
export { convertAccent } from './accent.js';
1718
export { convertPreSubSuperscript } from './pre-sub-superscript.js';
1819
export { convertRadical } from './radical.js';
1920
export { convertLowerLimit } from './lower-limit.js';

packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,6 +1899,130 @@ describe('m:func converter', () => {
18991899
});
19001900
});
19011901

1902+
describe('m:acc converter', () => {
1903+
// Helper: build an m:acc node with an optional accPr and a base string.
1904+
const buildAcc = (accPrElements: unknown[] | null, baseText: string | null, extraBaseRuns: string[] = []) => {
1905+
const elements: unknown[] = [];
1906+
if (accPrElements !== null) {
1907+
elements.push({ name: 'm:accPr', elements: accPrElements });
1908+
}
1909+
if (baseText !== null) {
1910+
const runs = [baseText, ...extraBaseRuns].map((t) => ({
1911+
name: 'm:r',
1912+
elements: [{ name: 'm:t', elements: [{ type: 'text', text: t }] }],
1913+
}));
1914+
elements.push({ name: 'm:e', elements: runs });
1915+
}
1916+
return { name: 'm:oMath', elements: [{ name: 'm:acc', elements }] };
1917+
};
1918+
1919+
it('converts accent with tilde to <mover accent="true">', () => {
1920+
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x'), doc);
1921+
expect(result).not.toBeNull();
1922+
const mover = result!.querySelector('mover');
1923+
expect(mover).not.toBeNull();
1924+
expect(mover!.getAttribute('accent')).toBe('true');
1925+
expect(mover!.children[0]!.textContent).toBe('x');
1926+
// Combining tilde (U+0303) is mapped to ASCII tilde (U+007E, "~") which
1927+
// MathML Core's operator dictionary marks as a stretchy accent.
1928+
const mo = mover!.querySelector('mo');
1929+
expect(mo!.textContent).toBe('\u007E');
1930+
});
1931+
1932+
it('defaults to circumflex when m:accPr is absent (spec §22.1.2.1)', () => {
1933+
const result = convertOmmlToMathml(buildAcc(null, 'a'), doc);
1934+
const mover = result!.querySelector('mover');
1935+
expect(mover).not.toBeNull();
1936+
expect(mover!.getAttribute('accent')).toBe('true');
1937+
// Combining circumflex (U+0302) maps to ASCII circumflex (U+005E, "^").
1938+
expect(mover!.querySelector('mo')!.textContent).toBe('\u005E');
1939+
});
1940+
1941+
it('defaults to circumflex when m:accPr is present but m:chr is absent (spec §22.1.2.20)', () => {
1942+
const result = convertOmmlToMathml(buildAcc([{ name: 'm:ctrlPr' }], 'a'), doc);
1943+
const mover = result!.querySelector('mover');
1944+
expect(mover).not.toBeNull();
1945+
expect(mover!.getAttribute('accent')).toBe('true');
1946+
expect(mover!.querySelector('mo')!.textContent).toBe('\u005E');
1947+
});
1948+
1949+
it('renders dot accent', () => {
1950+
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0307' } }], 'y'), doc);
1951+
const mover = result!.querySelector('mover');
1952+
expect(mover!.getAttribute('accent')).toBe('true');
1953+
// U+0307 → U+02D9 (spacing dot above) — no ASCII-range equivalent.
1954+
expect(mover!.querySelector('mo')!.textContent).toBe('\u02D9');
1955+
});
1956+
1957+
it('maps combining right-arrow (U+20D7) to stretchy right arrow (U+2192)', () => {
1958+
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u20D7' } }], 'v'), doc);
1959+
expect(result!.querySelector('mover mo')!.textContent).toBe('\u2192');
1960+
});
1961+
1962+
it('passes unmapped accent characters through unchanged', () => {
1963+
// A character outside the combining→spacing table should pass through as-is.
1964+
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '*' } }], 'x'), doc);
1965+
expect(result!.querySelector('mover mo')!.textContent).toBe('*');
1966+
});
1967+
1968+
// ── Spec §22.1.2.20: m:chr present with missing/empty m:val means the
1969+
// character is absent (not "use the default"). Render the base alone.
1970+
it('renders the base alone when m:chr is present with no m:val attribute', () => {
1971+
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr' }], 'x'), doc);
1972+
expect(result).not.toBeNull();
1973+
// No <mover> wrapper — just the base inside an <mrow>.
1974+
expect(result!.querySelector('mover')).toBeNull();
1975+
expect(result!.textContent).toBe('x');
1976+
});
1977+
1978+
it('renders the base alone when m:chr has an explicitly empty m:val', () => {
1979+
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '' } }], 'x'), doc);
1980+
expect(result).not.toBeNull();
1981+
expect(result!.querySelector('mover')).toBeNull();
1982+
expect(result!.textContent).toBe('x');
1983+
});
1984+
1985+
it('wraps multi-run base in <mrow> so a wide base like x+1 renders as a group', () => {
1986+
const result = convertOmmlToMathml(
1987+
buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x', ['+', '1']),
1988+
doc,
1989+
);
1990+
const mover = result!.querySelector('mover');
1991+
expect(mover).not.toBeNull();
1992+
const baseRow = mover!.children[0]!;
1993+
expect(baseRow.tagName.toLowerCase()).toBe('mrow');
1994+
expect(baseRow.children.length).toBe(3);
1995+
expect(baseRow.textContent).toBe('x+1');
1996+
});
1997+
1998+
it('ignores non-chr siblings in m:accPr (e.g. m:ctrlPr)', () => {
1999+
const result = convertOmmlToMathml(
2000+
buildAcc([{ name: 'm:ctrlPr' }, { name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x'),
2001+
doc,
2002+
);
2003+
const mover = result!.querySelector('mover');
2004+
expect(mover).not.toBeNull();
2005+
expect(mover!.children.length).toBe(2);
2006+
expect(mover!.querySelector('mo')!.textContent).toBe('\u007E');
2007+
});
2008+
2009+
it('returns null when m:e is absent (invalid per CT_Acc)', () => {
2010+
const omml = {
2011+
name: 'm:oMath',
2012+
elements: [
2013+
{
2014+
name: 'm:acc',
2015+
elements: [{ name: 'm:accPr', elements: [{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }] }],
2016+
},
2017+
],
2018+
};
2019+
const result = convertOmmlToMathml(omml, doc);
2020+
// The outer <math> is produced only if it has children. With m:acc dropped,
2021+
// there are no math children, so convertOmmlToMathml returns null.
2022+
expect(result).toBeNull();
2023+
});
2024+
});
2025+
19022026
describe('m:limLow converter', () => {
19032027
it('converts m:limLow to <munder> with base and lower limit', () => {
19042028
// lim_{n→∞}

packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
convertSubscript,
2020
convertSuperscript,
2121
convertSubSuperscript,
22+
convertAccent,
2223
convertPreSubSuperscript,
2324
convertRadical,
2425
convertLowerLimit,
@@ -43,6 +44,7 @@ export const MATHML_NS = 'http://www.w3.org/1998/Math/MathML';
4344
const MATH_OBJECT_REGISTRY: Record<string, MathObjectConverter | null> = {
4445
// ── Implemented ──────────────────────────────────────────────────────────
4546
'm:r': convertMathRun,
47+
'm:acc': convertAccent, // Accent (diacritical mark above base)
4648
'm:bar': convertBar, // Bar (overbar/underbar)
4749
'm:d': convertDelimiter, // Delimiter (parentheses, brackets, braces)
4850
'm:f': convertFraction, // Fraction (numerator/denominator)
@@ -56,7 +58,6 @@ const MATH_OBJECT_REGISTRY: Record<string, MathObjectConverter | null> = {
5658
'm:sPre': convertPreSubSuperscript, // Pre-sub-superscript (left of base)
5759

5860
// ── Not yet implemented (community contributions welcome) ────────────────
59-
'm:acc': null, // Accent (diacritical mark above base)
6061
'm:borderBox': null, // Border box (border around math content)
6162
'm:box': null, // Box (invisible grouping container)
6263
'm:eqArr': null, // Equation array (vertical array of equations)

tests/behavior/tests/importing/math-equations.spec.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,37 @@ test.describe('math equation import and rendering', () => {
8888
}
8989
});
9090

91+
test('renders m:acc as <mover accent="true"> with spacing-form accent char', async ({ superdoc }) => {
92+
await superdoc.loadDocument(ALL_OBJECTS_DOC);
93+
await superdoc.waitForStable();
94+
95+
// The fixture has m:acc with m:chr m:val="U+0302" (combining circumflex).
96+
// convertAccent should:
97+
// 1. Produce a <mover accent="true"> wrapper
98+
// 2. Emit ASCII circumflex U+005E (not the combining U+0302) since that's
99+
// what MathML Core's operator dictionary marks as a stretchy accent.
100+
const accentData = await superdoc.page.evaluate(() => {
101+
const mover = document.querySelector('mover[accent="true"]');
102+
if (!mover) return null;
103+
const mo = mover.querySelector('mo');
104+
return {
105+
childCount: mover.children.length,
106+
baseText: mover.children[0]?.textContent,
107+
accentChar: mo?.textContent,
108+
accentCodepoint: mo?.textContent
109+
? 'U+' + (mo.textContent.codePointAt(0) ?? 0).toString(16).padStart(4, '0').toUpperCase()
110+
: null,
111+
};
112+
});
113+
114+
expect(accentData).not.toBeNull();
115+
expect(accentData!.childCount).toBe(2);
116+
expect(accentData!.baseText).toBe('x');
117+
// Combining circumflex (U+0302) in OMML must be rendered as ASCII circumflex (U+005E).
118+
expect(accentData!.accentChar).toBe('\u005E');
119+
expect(accentData!.accentCodepoint).toBe('U+005E');
120+
});
121+
91122
test('renders sub-superscript as <msubsup> with base, subscript, and superscript', async ({ superdoc }) => {
92123
await superdoc.loadDocument(ALL_OBJECTS_DOC);
93124
await superdoc.waitForStable();

0 commit comments

Comments
 (0)