Skip to content

Commit 6683c51

Browse files
authored
fix: make codeblock language detection performant (docmost#2032)
* fix: make codeblock language detection performant * lint
1 parent cc5c800 commit 6683c51

1 file changed

Lines changed: 99 additions & 56 deletions

File tree

Lines changed: 99 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,130 @@
1-
import { findChildren } from '@tiptap/core'
2-
import type { Node as ProsemirrorNode } from '@tiptap/pm/model'
3-
import { Plugin, PluginKey } from '@tiptap/pm/state'
4-
import { Decoration, DecorationSet } from '@tiptap/pm/view'
1+
import { findChildren } from '@tiptap/core';
2+
import type { Node as ProsemirrorNode } from '@tiptap/pm/model';
3+
import { Plugin, PluginKey } from '@tiptap/pm/state';
4+
import { Decoration, DecorationSet } from '@tiptap/pm/view';
55
// @ts-ignore
6-
import highlight from 'highlight.js/lib/core'
6+
import highlight from 'highlight.js/lib/core';
77

8-
function parseNodes(nodes: any[], className: string[] = []): { text: string; classes: string[] }[] {
8+
function parseNodes(
9+
nodes: any[],
10+
className: string[] = [],
11+
): { text: string; classes: string[] }[] {
912
return nodes
10-
.map(node => {
11-
const classes = [...className, ...(node.properties ? node.properties.className : [])]
13+
.map((node) => {
14+
const classes = [
15+
...className,
16+
...(node.properties ? node.properties.className : []),
17+
];
1218

1319
if (node.children) {
14-
return parseNodes(node.children, classes)
20+
return parseNodes(node.children, classes);
1521
}
1622

1723
return {
1824
text: node.value,
1925
classes,
20-
}
26+
};
2127
})
22-
.flat()
28+
.flat();
2329
}
2430

2531
function getHighlightNodes(result: any) {
2632
// `.value` for lowlight v1, `.children` for lowlight v2
27-
return result.value || result.children || []
33+
return result.value || result.children || [];
2834
}
2935

3036
function registered(aliasOrLanguage: string) {
31-
return Boolean(highlight.getLanguage(aliasOrLanguage))
37+
return Boolean(highlight.getLanguage(aliasOrLanguage));
3238
}
3339

40+
// Max characters to sample for auto-detection to avoid performance issues with large code blocks
41+
const AUTO_DETECT_SAMPLE_SIZE = 3000;
42+
3443
function getDecorations({
3544
doc,
3645
name,
3746
lowlight,
3847
defaultLanguage,
3948
}: {
40-
doc: ProsemirrorNode
41-
name: string
42-
lowlight: any
43-
defaultLanguage: string | null | undefined
49+
doc: ProsemirrorNode;
50+
name: string;
51+
lowlight: any;
52+
defaultLanguage: string | null | undefined;
4453
}) {
45-
const decorations: Decoration[] = []
46-
47-
findChildren(doc, node => node.type.name === name).forEach(block => {
48-
let from = block.pos + 1
49-
const language = block.node.attrs.language || defaultLanguage
50-
const languages = lowlight.listLanguages()
51-
52-
const nodes =
53-
language && (languages.includes(language) || registered(language) || lowlight.registered?.(language))
54-
? getHighlightNodes(lowlight.highlight(language, block.node.textContent))
55-
: getHighlightNodes(lowlight.highlightAuto(block.node.textContent))
54+
const decorations: Decoration[] = [];
55+
56+
findChildren(doc, (node) => node.type.name === name).forEach((block) => {
57+
let from = block.pos + 1;
58+
const language = block.node.attrs.language || defaultLanguage;
59+
const languages = lowlight.listLanguages();
60+
const textContent = block.node.textContent;
61+
62+
let nodes;
63+
if (
64+
language &&
65+
(languages.includes(language) ||
66+
registered(language) ||
67+
lowlight.registered?.(language))
68+
) {
69+
nodes = getHighlightNodes(lowlight.highlight(language, textContent));
70+
} else {
71+
// For auto-detection, sample a limited portion to detect the language,
72+
// then highlight the full content with the detected language
73+
const sample =
74+
textContent.length > AUTO_DETECT_SAMPLE_SIZE
75+
? textContent.slice(0, AUTO_DETECT_SAMPLE_SIZE)
76+
: textContent;
77+
const autoResult = lowlight.highlightAuto(sample);
78+
const detectedLanguage = autoResult.data?.language;
79+
if (detectedLanguage && textContent.length > AUTO_DETECT_SAMPLE_SIZE) {
80+
nodes = getHighlightNodes(
81+
lowlight.highlight(detectedLanguage, textContent),
82+
);
83+
} else {
84+
nodes = getHighlightNodes(autoResult);
85+
}
86+
}
5687

57-
parseNodes(nodes).forEach(node => {
58-
const to = from + node.text.length
88+
parseNodes(nodes).forEach((node) => {
89+
const to = from + node.text.length;
5990

6091
if (node.classes.length) {
6192
const decoration = Decoration.inline(from, to, {
6293
class: node.classes.join(' '),
63-
})
94+
});
6495

65-
decorations.push(decoration)
96+
decorations.push(decoration);
6697
}
6798

68-
from = to
69-
})
70-
})
99+
from = to;
100+
});
101+
});
71102

72-
return DecorationSet.create(doc, decorations)
103+
return DecorationSet.create(doc, decorations);
73104
}
74105

75106
// eslint-disable-next-line @typescript-eslint/no-unsafe-function-type
76107
function isFunction(param: any): param is Function {
77-
return typeof param === 'function'
108+
return typeof param === 'function';
78109
}
79110

80111
export function LowlightPlugin({
81112
name,
82113
lowlight,
83114
defaultLanguage,
84115
}: {
85-
name: string
86-
lowlight: any
87-
defaultLanguage: string | null | undefined
116+
name: string;
117+
lowlight: any;
118+
defaultLanguage: string | null | undefined;
88119
}) {
89-
if (!['highlight', 'highlightAuto', 'listLanguages'].every(api => isFunction(lowlight[api]))) {
90-
throw Error('You should provide an instance of lowlight to use the code-block-lowlight extension')
120+
if (
121+
!['highlight', 'highlightAuto', 'listLanguages'].every((api) =>
122+
isFunction(lowlight[api]),
123+
)
124+
) {
125+
throw Error(
126+
'You should provide an instance of lowlight to use the code-block-lowlight extension',
127+
);
91128
}
92129

93130
const lowlightPlugin: Plugin<any> = new Plugin({
@@ -102,10 +139,16 @@ export function LowlightPlugin({
102139
defaultLanguage,
103140
}),
104141
apply: (transaction, decorationSet, oldState, newState) => {
105-
const oldNodeName = oldState.selection.$head.parent.type.name
106-
const newNodeName = newState.selection.$head.parent.type.name
107-
const oldNodes = findChildren(oldState.doc, node => node.type.name === name)
108-
const newNodes = findChildren(newState.doc, node => node.type.name === name)
142+
const oldNodeName = oldState.selection.$head.parent.type.name;
143+
const newNodeName = newState.selection.$head.parent.type.name;
144+
const oldNodes = findChildren(
145+
oldState.doc,
146+
(node) => node.type.name === name,
147+
);
148+
const newNodes = findChildren(
149+
newState.doc,
150+
(node) => node.type.name === name,
151+
);
109152

110153
if (
111154
transaction.docChanged &&
@@ -117,43 +160,43 @@ export function LowlightPlugin({
117160
// OR transaction has changes that completely encapsulte a node
118161
// (for example, a transaction that affects the entire document).
119162
// Such transactions can happen during collab syncing via y-prosemirror, for example.
120-
transaction.steps.some(step => {
163+
transaction.steps.some((step) => {
121164
// @ts-ignore
122165
return (
123166
// @ts-ignore
124167
step.from !== undefined &&
125168
// @ts-ignore
126169
step.to !== undefined &&
127-
oldNodes.some(node => {
170+
oldNodes.some((node) => {
128171
// @ts-ignore
129172
return (
130173
// @ts-ignore
131174
node.pos >= step.from &&
132175
// @ts-ignore
133176
node.pos + node.node.nodeSize <= step.to
134-
)
177+
);
135178
})
136-
)
179+
);
137180
}))
138181
) {
139182
return getDecorations({
140183
doc: transaction.doc,
141184
name,
142185
lowlight,
143186
defaultLanguage,
144-
})
187+
});
145188
}
146189

147-
return decorationSet.map(transaction.mapping, transaction.doc)
190+
return decorationSet.map(transaction.mapping, transaction.doc);
148191
},
149192
},
150193

151194
props: {
152195
decorations(state) {
153-
return lowlightPlugin.getState(state)
196+
return lowlightPlugin.getState(state);
154197
},
155198
},
156-
})
199+
});
157200

158-
return lowlightPlugin
159-
}
201+
return lowlightPlugin;
202+
}

0 commit comments

Comments
 (0)