Skip to content

Commit d57a971

Browse files
committed
feat: enhance symbol parsing to support emoji sequences and improve error handling for invalid symbols
1 parent 40de8f6 commit d57a971

1 file changed

Lines changed: 56 additions & 25 deletions

File tree

src/compute-engine/latex-syntax/parse-symbol.ts

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,11 @@ function parseSymbolBody(parser: Parser): string | null {
159159
parser.nextToken();
160160
continue;
161161
}
162+
// Try emoji sequences first (they fail parseSymbolToken's XIDC check)
163+
if (EMOJIS.test(id + token)) {
164+
id += parser.nextToken();
165+
continue;
166+
}
162167
const next = parseSymbolToken(parser, { toplevel: false });
163168
if (next === null) return null;
164169
id += next;
@@ -221,6 +226,7 @@ function matchPrefixedSymbol(parser: Parser): string | null {
221226

222227
if (prefix === null) return null;
223228

229+
const start = parser.index;
224230
parser.nextToken();
225231
if (parser.match('<{>')) {
226232
// If the symbol starts with a digit,
@@ -245,7 +251,10 @@ function matchPrefixedSymbol(parser: Parser): string | null {
245251
}
246252

247253
body += parseSymbolBody(parser);
248-
if (body === null || !parser.match('<}>')) return null;
254+
if (body === null || !parser.match('<}>')) {
255+
parser.index = start;
256+
return null;
257+
}
249258
// Multi-character symbols do not need a prefix
250259
// if they are upright (that's their default presentation)
251260
if (prefix === '_upright' && body.length > 1) return body;
@@ -255,6 +264,7 @@ function matchPrefixedSymbol(parser: Parser): string | null {
255264
//
256265
// Not a prefixed symbol
257266
//
267+
parser.index = start;
258268
return null;
259269
}
260270

@@ -264,30 +274,51 @@ function matchPrefixedSymbol(parser: Parser): string | null {
264274
export function parseInvalidSymbol(parser: Parser): MathJsonExpression | null {
265275
const start = parser.index;
266276
const id = matchPrefixedSymbol(parser);
267-
if (id === null || isValidSymbol(id)) return null;
268-
269-
return parser.error(['invalid-symbol', { str: validateSymbol(id) }], start);
270-
271-
// const prefix =SYMBOL_PREFIX[parser.peek] ?? null;
272-
// if (prefix === null) return null;
273-
274-
// const start = parser.index;
275-
// parser.nextToken();
276-
// if (parser.match('<{>')) {
277-
// let level = 0;
278-
// while (!parser.atEnd && level === 0 && parser.peek !== '<}>') {
279-
// if (parser.peek === '<{>') level += 1;
280-
// if (parser.peek === '<}>') level -= 1;
281-
// parser.nextToken();
282-
// }
283-
// parser.match('<}>');
284-
// }
285-
// const s = parser.latex(start, parser.index);
286-
// if (isValidSymbo(s)) {
287-
// this.index = start;
288-
// return null;
289-
// }
290-
// return parser.error(['invalid-symbol', validateSymbol(s)], start);
277+
278+
if (id !== null) {
279+
if (isValidSymbol(id)) return null;
280+
return parser.error(['invalid-symbol', { str: validateSymbol(id) }], start);
281+
}
282+
283+
// matchPrefixedSymbol returned null — it may have partially consumed
284+
// tokens (prefix + '{') before failing on an invalid body character.
285+
// Reset and try a permissive fallback: consume the entire \mathrm{...}
286+
// group to report a proper error.
287+
parser.index = start;
288+
289+
const prefix = SYMBOL_PREFIX[parser.peek] ?? null;
290+
if (prefix === null) return null;
291+
292+
parser.nextToken();
293+
if (!parser.match('<{>')) {
294+
parser.index = start;
295+
return null;
296+
}
297+
298+
// Consume everything inside the braces (including nested braces)
299+
const bodyStart = parser.index;
300+
let level = 0;
301+
while (!parser.atEnd && !(level === 0 && parser.peek === '<}>')) {
302+
if (parser.peek === '<{>') level += 1;
303+
if (parser.peek === '<}>') level -= 1;
304+
parser.nextToken();
305+
}
306+
307+
// Extract the body content and check if it's actually invalid.
308+
// If it's a valid symbol (e.g., pure emoji), restore and return null
309+
// so other parsing paths can handle it.
310+
const bodyText = parser.latex(bodyStart, parser.index);
311+
if (isValidSymbol(bodyText)) {
312+
parser.index = start;
313+
return null;
314+
}
315+
316+
parser.match('<}>');
317+
318+
return parser.error(
319+
['invalid-symbol', { str: validateSymbol(bodyText) }],
320+
start
321+
);
291322
}
292323

293324
/**

0 commit comments

Comments
 (0)