Skip to content

Commit 91b4e28

Browse files
authored
fix: replace regex HTML pre-scan with linear scanner (#49)
* fix: avoid quadratic html-tag scan in ignoreHtmlTag * fix: preserve tag recognition after unterminated HTML comments - Export MatchRange from utils so useCheckIsHtmlTag's import resolves (fixes TS2305 during vite-plugin-dts declaration emit). - Fall through to normal <...> scan when <!-- has no matching --> instead of aborting the loop, restoring the original regex behavior. - Add regression tests for unterminated comments, lone <!--, and empty <>.
1 parent 8adb359 commit 91b4e28

3 files changed

Lines changed: 65 additions & 5 deletions

File tree

packages/text-vide/src/__tests__/index.test.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,30 @@ describe('with html tags', () => {
321321
expect(textVide(text)).toBe(expected);
322322
});
323323

324+
it('preserves tag ranges after an unterminated HTML comment opener', () => {
325+
const text = 'aa <!-- bb <div>cd</div>';
326+
const expected = '<b>a</b>a <!-- bb <div><b>c</b>d</div>';
327+
expect(textVide(text)).toBe(expected);
328+
});
329+
330+
it('continues highlighting after an unterminated comment with multiple tags', () => {
331+
const text = 'x <!-- unterminated <div>ab</div> yy <i>cd</i>';
332+
const expected =
333+
'x <!-- unterminated <div><b>a</b>b</div> <b>y</b>y <i><b>c</b>d</i>';
334+
expect(textVide(text)).toBe(expected);
335+
});
336+
337+
it('treats an unterminated comment opener like a regular tag opener', () => {
338+
const text = 'a <!-- foo';
339+
const expected = 'a <!-- <b>fo</b>o';
340+
expect(textVide(text)).toBe(expected);
341+
});
342+
343+
it('ignores empty angle brackets', () => {
344+
const text = '<>';
345+
expect(textVide(text)).toBe('<>');
346+
});
347+
324348
it('complex html tags', () => {
325349
const text = `<div class="bionic-reader-container">
326350

packages/text-vide/src/useCheckIsHtmlTag.ts

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,44 @@
1-
import { extractMatchRangeList } from './utils';
1+
import { MatchRange } from './utils';
22

3-
const HTML_TAG_REGEX = /<!--[^]*?-->|<[^>]+>/g;
3+
const getHtmlTagRangeList = (text: string): MatchRange[] => {
4+
const htmlTagRangeList: MatchRange[] = [];
5+
let cursor = 0;
6+
7+
while (cursor < text.length) {
8+
const openIndex = text.indexOf('<', cursor);
9+
if (openIndex === -1) {
10+
break;
11+
}
12+
13+
if (text.startsWith('<!--', openIndex)) {
14+
const commentCloseIndex = text.indexOf('-->', openIndex + 4);
15+
if (commentCloseIndex !== -1) {
16+
htmlTagRangeList.push([openIndex, commentCloseIndex + 2]);
17+
cursor = commentCloseIndex + 3;
18+
continue;
19+
}
20+
// Unterminated `<!--`: fall through and treat the `<` as a normal
21+
// tag opener so subsequent tags are still recognized (matches the
22+
// behavior of the previous `/<!--[^]*?-->|<[^>]+>/g` regex).
23+
}
24+
25+
const closeIndex = text.indexOf('>', openIndex + 1);
26+
if (closeIndex === -1) {
27+
break;
28+
}
29+
30+
if (closeIndex > openIndex + 1) {
31+
htmlTagRangeList.push([openIndex, closeIndex]);
32+
}
33+
34+
cursor = closeIndex + 1;
35+
}
36+
37+
return htmlTagRangeList;
38+
};
439

540
export const useCheckIsHtmlTag = (text: string) => {
6-
const htmlTagMatchList = text.matchAll(HTML_TAG_REGEX);
7-
const htmlTagRangeList = extractMatchRangeList(htmlTagMatchList);
41+
const htmlTagRangeList = getHtmlTagRangeList(text);
842
const reversedHtmlTagRangeList = htmlTagRangeList.reverse();
943

1044
return (match: RegExpMatchArray) => {

packages/text-vide/src/utils.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
export type MatchRange = [number, number];
2+
13
export const extractMatchRangeList = (
24
matchList: IterableIterator<RegExpMatchArray>,
3-
) =>
5+
): MatchRange[] =>
46
Array.from(matchList).map(match => {
57
const startIndex = match.index!;
68
const [matchedWord] = match;

0 commit comments

Comments
 (0)