-
Notifications
You must be signed in to change notification settings - Fork 481
Expand file tree
/
Copy pathlabel-templates.ts
More file actions
352 lines (330 loc) · 11.9 KB
/
Copy pathlabel-templates.ts
File metadata and controls
352 lines (330 loc) · 11.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// An "auto" label: a template form of `LabelDescription` whose concrete
// instantiations are synthesized by matching its `funcPrefixTemplates`
// against function names in the profile. E.g. matching
// "mozilla::dom::{Class}_Binding::{method}(" against
// "mozilla::dom::EventTarget_Binding::addEventListener(JSContext*, ...)" will create
// an instantiation with Class="EventTarget" and method="addEventListener".
// `nameTemplate` and `funcPrefixTemplates` mirror the `name` and
// `funcPrefixes` of `LabelDescription`: each template expands, with the
// recovered variable values, into the corresponding literal field.
export type AutoLabel = {
// Template for the synthesized label's `name`, e.g. "set {Class}.{prop}".
// Only plain `{name}` placeholders are allowed here, no `{name:modifier}`.
nameTemplate: string;
// A list of templates, each matched independently against funcNames; a
// successful match becomes one entry in the synthesized label's
// `funcPrefixes`. A single AutoLabel typically lists one template per
// engine (Gecko / Blink / WebKit) so the same logical label is
// discovered from any of the supported binding-name styles.
//
// Template variables may carry a modifier, written `{name:modifier}`,
// which controls both what the compiled regex accepts and how the
// captured text is transformed back before substitution into
// `nameTemplate`:
//
// - no modifier: matches PascalCase when the variable name starts with
// an uppercase letter (e.g. `{Class}` → `Element`), camelCase
// otherwise (e.g. `{method}` → `querySelector`). The captured text
// is substituted as-is.
// - `:pascal`: matches PascalCase; the first letter is lowercased on
// substitution. Used to recover Blink V8 binding method names
// (`SetSrc` in `SetSrcOperation` → `setSrc`).
// - `:blink_snake`: matches lowercase snake_case; on substitution the
// value is reassembled into PascalCase using `BLINK_SPECIAL_TOKENS`
// to recover acronym casing (`html_image_element` → `HTMLImageElement`).
funcPrefixTemplates: string[];
};
// An explicit label entry, either authored directly in a labels TOML file
// (`[[labels]]`) or synthesized by `discoverAutoLabels` from an
// `[[auto_labels]]` entry. A stack frame whose funcName starts with any
// string in `funcPrefixes` gets `name` attached as its label by
// `insertStackLabels`.
export type LabelDescription = {
name: string;
funcPrefixes: string[];
};
// An AutoLabel with each of its `funcPrefixTemplates` compiled to a regex
// plus the ordered list of variables that regex captures (one entry per
// capture group, in match order).
type CompiledAutoEntry = {
auto: AutoLabel;
funcPrefixTemplates: Array<{
regex: RegExp;
vars: Array<{ name: string; modifier: string | undefined }>;
}>;
};
// Allows mapping strings from auto-detected Blink DOM binding C++ functions
// to correctly-cased class names.
// For example, if we match `v8_{Class:blink_snake}` against `v8_dom_token_list`,
// we want to produce the class name "DOMTokenList" rather than "DomTokenList".
// Based on https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/build/scripts/blinkbuild/name_style_converter.py;l=10;drc=047c7dc4ee1ce908d7fea38ca063fa2f80f92c77
const BLINK_SPECIAL_TOKENS = [
'WebCodecs',
'WebSocket',
'String16',
'Float32',
'Float64',
'Base64',
'IFrame',
'Latin1',
'MathML',
'PlugIn',
'SQLite',
'Uint16',
'Uint32',
'WebGL2',
'webgl2',
'WebGPU',
'ASCII',
'CSSOM',
'CType',
'DList',
'Int16',
'Int32',
'MPath',
'OList',
'TSpan',
'UList',
'UTF16',
'Uint8',
'WebGL',
'XPath',
'ETC1',
'etc1',
'HTML',
'Int8',
'S3TC',
's3tc',
'SPv2',
'UTF8',
'sRGB',
'URLs',
'API',
'CSS',
'DNS',
'DOM',
'EXT',
'RTC',
'SVG',
'XSS',
'2D',
'AX',
'FE',
'JS',
'V0',
'V8',
'v8',
'XR',
];
// A map which allows looking up the correctly-cased token based on
// its lower cased variant, e.g. "urls" -> "URLs"
const BLINK_TOKEN_BY_LOWER = (function buildBlinkTokenByLower() {
const tokenByLower = new Map<string, string>();
for (const t of BLINK_SPECIAL_TOKENS) {
const lower = t.toLowerCase();
if (!tokenByLower.has(lower)) {
tokenByLower.set(lower, t);
}
}
return tokenByLower;
})();
/**
* Reverse a `:blink_snake`-formed string back to its PascalCase original.
*
* For example, this turns "html_div_element" into "HTMLDivElement".
*
* Lowercasing during `:blink_snake` is one-way: `html_element` could come
* from either `HtmlElement` or `HTMLElement`. `BLINK_SPECIAL_TOKENS`
* supplies canonical-cased fragments to disambiguate; unknown segments
* get their first letter capitalised.
*/
export function reverseBlinkSnake(value: string): string {
return value
.split('_')
.map(
(seg) =>
BLINK_TOKEN_BY_LOWER.get(seg) ??
seg.charAt(0).toUpperCase() + seg.slice(1)
)
.join('');
}
/**
* Reverse the case transformation implied by `modifier` so that a value
* captured from a funcName can be substituted back into a label template
* in its canonical form. See the modifier list on
* `AutoLabel.funcPrefixTemplates`.
*/
export function reverseModifier(
value: string,
modifier: string | undefined
): string {
switch (modifier) {
case 'pascal':
return value.charAt(0).toLowerCase() + value.slice(1);
case 'blink_snake':
return reverseBlinkSnake(value);
case undefined:
return value;
default:
throw new Error(`Unknown template modifier: ${modifier}`);
}
}
// This regex matches `{name}` and `{name:modifier}` placeholders.
const TEMPLATE_VAR_RE = /\{(\w+)(?::(\w+))?\}/g;
/**
* Substitute `{name}` placeholders in a label template. Used only to
* produce human-readable label names like `Element.querySelector`;
* modifier syntax (`{name:modifier}`) is not supported here.
*/
export function expandPattern(
template: string,
vars: Record<string, string>
): string {
return template.replace(TEMPLATE_VAR_RE, (_match, name: string, modifier) => {
if (modifier !== undefined) {
throw new Error(
`Template modifier ":${modifier}" is not supported in label names`
);
}
if (!(name in vars)) {
throw new Error(`Template variable "${name}" not provided`);
}
return vars[name];
});
}
function regexCharClassForVar(
name: string,
modifier: string | undefined
): string {
if (modifier === 'blink_snake') {
// snake_case identifier: starts with lowercase letter or digit, may
// contain `_`-separated alnum runs.
return '[a-z][a-z0-9]*(?:_[a-z0-9]+)*';
}
// No modifier or :pascal — matches the case-style expected at the
// expansion site. PascalCase if the var name starts with uppercase,
// camelCase otherwise. `:pascal` always emits a PascalCase result.
// Underscores are excluded from camelCase: DOM method/property names
// are camelCase, and allowing `_` would let `{method}` swallow the
// `set_`/`get_` prefix of binding setters/getters (matching
// `mozilla::dom::Element_Binding::set_innerHTML(` as method=
// `set_innerHTML` instead of leaving it for the dom_setter template).
if (modifier === 'pascal' || /^[A-Z]/.test(name)) {
return '[A-Z][A-Za-z0-9]*';
}
return '[a-z][A-Za-z0-9]*';
}
function escapeRegex(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Compile a pattern string to a regex that matches a prefix of a funcName,
* together with the ordered list of template variables (one per capture
* group). Literal text in the pattern is regex-escaped; each `{name}` or
* `{name:modifier}` placeholder is replaced by a capturing group whose
* character class is chosen by `regexCharClassForVar`.
*
* E.g. "mozilla::dom::{Class}_Binding::set_{prop}(" compiles to
* /^mozilla::dom::([A-Z][A-Za-z0-9]*)_Binding::set_([a-z][A-Za-z0-9]*)\(/
* with vars [{ name: 'Class' }, { name: 'prop' }].
*
* The regex is anchored at `^` but not at `$`, so a successful match's
* `m[0]` is the literal funcName prefix used as `funcPrefixes` entry.
*/
export function compilePatternToRegex(pattern: string): {
regex: RegExp;
vars: Array<{ name: string; modifier: string | undefined }>;
} {
const vars: Array<{ name: string; modifier: string | undefined }> = [];
let regexStr = '';
let lastIndex = 0;
for (const m of pattern.matchAll(TEMPLATE_VAR_RE)) {
regexStr += escapeRegex(pattern.slice(lastIndex, m.index));
const name = m[1];
const modifier = m[2] ?? undefined;
regexStr += '(' + regexCharClassForVar(name, modifier) + ')';
vars.push({ name, modifier });
lastIndex = m.index! + m[0].length;
}
regexStr += escapeRegex(pattern.slice(lastIndex));
return { regex: new RegExp('^' + regexStr), vars };
}
function compileAutoLabel(auto: AutoLabel): CompiledAutoEntry {
const funcPrefixTemplates = auto.funcPrefixTemplates.map((template) => {
const { regex, vars } = compilePatternToRegex(template);
return { regex, vars };
});
return { auto, funcPrefixTemplates };
}
/**
* Walk `funcNames` and synthesize a label entry for each unique
* (auto-label, recovered-vars) tuple matched by an `[[auto_labels]]` entry.
* Each entry's `funcPrefixes` collects the actual matched prefix of every
* funcName that hit one of the entry's templates, so the same label still
* attaches to every observed form of the same (Class, method) pair.
*/
export function discoverAutoLabels(
autoLabels: AutoLabel[],
funcNames: Iterable<string>
): LabelDescription[] {
const compiled = autoLabels.map((autoLabel) => compileAutoLabel(autoLabel));
if (compiled.length === 0) {
return [];
}
const discovered = new Map<string, LabelDescription>();
for (const funcName of funcNames) {
for (const { auto, funcPrefixTemplates } of compiled) {
for (const c of funcPrefixTemplates) {
const m = funcName.match(c.regex);
if (!m) {
continue;
}
const vars: Record<string, string> = {};
for (let i = 0; i < c.vars.length; i++) {
const { name, modifier } = c.vars[i];
vars[name] = reverseModifier(m[i + 1], modifier);
}
const labelName = expandPattern(auto.nameTemplate, vars);
const key = auto.nameTemplate + '\0' + labelName;
const existing = discovered.get(key);
if (existing === undefined) {
discovered.set(key, { name: labelName, funcPrefixes: [m[0]] });
} else if (!existing.funcPrefixes.includes(m[0])) {
existing.funcPrefixes.push(m[0]);
}
break; // first matching template wins for this (auto, funcName)
}
}
}
return [...discovered.values()];
}
/**
* Resolve `autoLabels` against `funcNames`, then merge in `labels`.
* On a name collision (whether between two auto-discovered labels, or between
* an auto-discovered label and an explicit one), funcPrefixes are merged into
* a deduplicated union. Two `autoLabels` entries can legitimately produce
* the same label name from different templates — e.g. a generic `{Class}.{method}`
* entry and a specific `CanvasRenderingContext2D.{method}` entry both yielding
* `CanvasRenderingContext2D.fill` — and we want every matched prefix to apply.
*/
export function resolveAllLabels(
autoLabels: AutoLabel[],
labels: LabelDescription[],
funcNames: Iterable<string>
): LabelDescription[] {
const auto = discoverAutoLabels(autoLabels, funcNames);
const allLabels = auto.concat(labels);
const byName = new Map<string, LabelDescription>();
for (const { name, funcPrefixes } of allLabels) {
let entry = byName.get(name);
if (entry === undefined) {
entry = { name, funcPrefixes: [] };
byName.set(name, entry);
}
entry.funcPrefixes.push(...funcPrefixes);
}
return [...byName.values()];
}