Skip to content

Commit b9ee107

Browse files
committed
feat(symbolization): print JS functions and remap PC locations
Mark function-entry PCs so libFuzzer emits NEW_FUNC lines for JavaScript targets and improve function names with class-qualified methods. Remap CJS and ESM print_pcs/print_funcs locations through inline or external source maps with generated-JS fallback, and replace sourceMappingURL extraction with linear-time parsing.
1 parent ad99c42 commit b9ee107

12 files changed

Lines changed: 783 additions & 52 deletions

packages/fuzzer/coverage.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ export class CoverageTracker {
9595
*
9696
* @param filename Source file path
9797
* @param funcNames Deduplicated function name table
98-
* @param entries Flat Int32Array: [edgeId, line, col, funcIdx, ...]
98+
* @param entries Flat Int32Array:
99+
* [edgeId, line, col, funcIdx, isFuncEntry, ...]
99100
* @param pcBase For ESM: the pcBase from createModuleCounters.
100101
* For CJS: pass 0 (edge IDs are already global PCs).
101102
*/

packages/fuzzer/shared/coverage.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,23 @@ namespace {
2727
// We register an array of 8-bit coverage counters with libFuzzer. The array is
2828
// populated from JavaScript using Buffer.
2929
uint8_t *gCoverageCounters = nullptr;
30+
size_t gCoverageCountersSize = 0;
3031

3132
// PC-Table is used by libfuzzer to keep track of program addresses
3233
// corresponding to coverage counters. The flags determine whether the
33-
// corresponding counter is the beginning of a function; we don't currently use
34-
// it.
34+
// corresponding counter is the beginning of a function.
3535
struct PCTableEntry {
3636
uintptr_t PC, PCFlags;
3737
};
3838

39+
struct ModulePCTable {
40+
uintptr_t basePC;
41+
size_t numEntries;
42+
PCTableEntry *entries;
43+
};
44+
45+
std::vector<ModulePCTable> gModulePCTables;
46+
3947
// The array of supplementary information for coverage counters. Each entry
4048
// corresponds to an entry in gCoverageCounters; since we don't know the actual
4149
// addresses of our counters in JS land, we fill this table with fake
@@ -54,6 +62,7 @@ void RegisterCoverageMap(const Napi::CallbackInfo &info) {
5462
auto buf = info[0].As<Napi::Buffer<uint8_t>>();
5563

5664
gCoverageCounters = reinterpret_cast<uint8_t *>(buf.Data());
65+
gCoverageCountersSize = buf.Length();
5766
// Fill the PC table with fake entries. The only requirement is that the fake
5867
// addresses must not collide with the locations of real counters (e.g., from
5968
// instrumented C++ code). Therefore, we just use the address of the counter
@@ -122,6 +131,7 @@ Napi::Value RegisterModuleCounters(const Napi::CallbackInfo &info) {
122131
__sanitizer_cov_8bit_counters_init(buf.Data(), buf.Data() + size);
123132
__sanitizer_cov_pcs_init(reinterpret_cast<uintptr_t *>(pcEntries),
124133
reinterpret_cast<uintptr_t *>(pcEntries + size));
134+
gModulePCTables.push_back({basePC, size, pcEntries});
125135

126136
return Napi::Number::New(info.Env(), static_cast<double>(basePC));
127137
}
@@ -155,6 +165,13 @@ uint32_t internString(const std::string &s) {
155165
return static_cast<uint32_t>(gStringTable.size() - 1);
156166
}
157167

168+
ModulePCTable *findModulePCTable(uintptr_t basePC) {
169+
for (auto &table : gModulePCTables) {
170+
if (table.basePC == basePC) return &table;
171+
}
172+
return nullptr;
173+
}
174+
158175
// Undo libFuzzer's GetNextInstructionPc before lookup.
159176
uintptr_t toPCTablePC(uintptr_t symbolizerPC) {
160177
#if defined(__aarch64__) || defined(__arm__)
@@ -167,7 +184,8 @@ uintptr_t toPCTablePC(uintptr_t symbolizerPC) {
167184
} // namespace
168185

169186
// Called from JS: registerPCLocations(filename, funcNames[], entries[], pcBase)
170-
// entries is a flat Int32Array: [edgeId, line, col, funcIdx, ...]
187+
// entries is a flat Int32Array:
188+
// [edgeId, line, col, funcIdx, isFuncEntry, ...]
171189
// pcBase: for ESM pass the value returned by registerModuleCounters;
172190
// for CJS pass 0 (edge IDs are already global PCs).
173191
void RegisterPCLocations(const Napi::CallbackInfo &info) {
@@ -199,12 +217,14 @@ void RegisterPCLocations(const Napi::CallbackInfo &info) {
199217
bool isEsm = pcBase >= ESM_BASE;
200218
auto baseOffset = isEsm ? pcBase - ESM_BASE : pcBase;
201219
auto &locations = isEsm ? gEsmLocations : gCjsLocations;
220+
auto *modulePCTable = isEsm ? findModulePCTable(pcBase) : nullptr;
202221

203-
for (size_t i = 0; i + 3 < length; i += 4) {
222+
for (size_t i = 0; i + 4 < length; i += 5) {
204223
auto edgeId = static_cast<uint32_t>(data[i]);
205224
auto line = static_cast<uint32_t>(data[i + 1]);
206225
auto col = static_cast<uint32_t>(data[i + 2]);
207226
auto localFuncIdx = static_cast<uint32_t>(data[i + 3]);
227+
bool isFuncEntry = data[i + 4] != 0;
208228

209229
auto idx = baseOffset + edgeId;
210230
if (idx >= locations.size()) {
@@ -214,6 +234,16 @@ void RegisterPCLocations(const Napi::CallbackInfo &info) {
214234
uint32_t globalFuncIdx =
215235
localFuncIdx < funcIndices.size() ? funcIndices[localFuncIdx] : 0;
216236
locations[idx] = {fileIdx, globalFuncIdx, line, col};
237+
238+
if (!isFuncEntry) continue;
239+
240+
if (isEsm) {
241+
if (modulePCTable != nullptr && edgeId < modulePCTable->numEntries) {
242+
modulePCTable->entries[edgeId].PCFlags |= 1;
243+
}
244+
} else if (gPCEntries != nullptr && edgeId < gCoverageCountersSize) {
245+
gPCEntries[edgeId].PCFlags |= 1;
246+
}
217247
}
218248
}
219249

packages/instrumentor/SourceMapRegistry.ts

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
* limitations under the License.
1515
*/
1616

17+
import * as fs from "fs";
18+
import * as path from "path";
19+
import { fileURLToPath } from "url";
20+
1721
import { RawSourceMap } from "source-map";
1822
import sms from "source-map-support";
1923

@@ -39,6 +43,8 @@ const regex = RegExp(
3943
"mg",
4044
);
4145

46+
const URL_PREFIX = /^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//;
47+
4248
/**
4349
* Extracts the inline source map from a code string.
4450
*
@@ -54,14 +60,128 @@ export function extractInlineSourceMap(code: string): SourceMap | undefined {
5460
}
5561
}
5662

63+
/**
64+
* Extracts a source map from code, preferring inline data URLs and
65+
* falling back to file-based sourceMappingURL comments.
66+
*/
67+
export function extractSourceMap(
68+
code: string,
69+
filename: string,
70+
): SourceMap | undefined {
71+
return (
72+
extractInlineSourceMap(code) ?? extractExternalSourceMap(code, filename)
73+
);
74+
}
75+
76+
function extractExternalSourceMap(
77+
code: string,
78+
filename: string,
79+
): SourceMap | undefined {
80+
const sourceMapUrl = extractSourceMapUrl(code);
81+
if (!sourceMapUrl || sourceMapUrl.startsWith("data:")) {
82+
return;
83+
}
84+
85+
const sanitizedUrl = sourceMapUrl.split("#", 1)[0].split("?", 1)[0];
86+
const mapPath = resolveSourceMapPath(filename, sanitizedUrl);
87+
if (!mapPath) {
88+
return;
89+
}
90+
91+
try {
92+
const mapContent = fs.readFileSync(mapPath, "utf8");
93+
return JSON.parse(mapContent);
94+
} catch {
95+
return;
96+
}
97+
}
98+
99+
function extractSourceMapUrl(code: string): string | undefined {
100+
let lineEnd = code.length;
101+
while (lineEnd >= 0) {
102+
let lineStart = code.lastIndexOf("\n", lineEnd - 1);
103+
lineStart = lineStart === -1 ? 0 : lineStart + 1;
104+
105+
const sourceMapUrl = parseSourceMapDirective(
106+
code.slice(lineStart, lineEnd).trim(),
107+
);
108+
if (sourceMapUrl) {
109+
return sourceMapUrl;
110+
}
111+
112+
if (lineStart === 0) {
113+
break;
114+
}
115+
116+
lineEnd = lineStart - 1;
117+
if (lineEnd > 0 && code[lineEnd - 1] === "\r") {
118+
lineEnd--;
119+
}
120+
}
121+
}
122+
123+
function parseSourceMapDirective(line: string): string | undefined {
124+
if (!line) {
125+
return;
126+
}
127+
128+
let body: string;
129+
if ((line.startsWith("//#") || line.startsWith("//@")) && line.length >= 3) {
130+
body = line.slice(3);
131+
} else if (
132+
(line.startsWith("/*#") || line.startsWith("/*@")) &&
133+
line.length >= 3
134+
) {
135+
body = line.endsWith("*/") ? line.slice(3, -2) : line.slice(3);
136+
} else {
137+
return;
138+
}
139+
140+
body = body.trimStart();
141+
const directive = "sourceMappingURL=";
142+
if (!body.startsWith(directive)) {
143+
return;
144+
}
145+
146+
const sourceMapUrl = body.slice(directive.length).trim();
147+
return sourceMapUrl || undefined;
148+
}
149+
150+
function resolveSourceMapPath(
151+
filename: string,
152+
sourceMapUrl: string,
153+
): string | undefined {
154+
if (!sourceMapUrl) {
155+
return;
156+
}
157+
158+
if (sourceMapUrl.startsWith("file://")) {
159+
return fileURLToPath(sourceMapUrl);
160+
}
161+
if (URL_PREFIX.test(sourceMapUrl)) {
162+
return;
163+
}
164+
165+
let decodedUrl = sourceMapUrl;
166+
try {
167+
decodedUrl = decodeURIComponent(sourceMapUrl);
168+
} catch {
169+
// Keep undecoded value if it contains invalid escapes.
170+
}
171+
172+
return path.resolve(path.dirname(filename), decodedUrl);
173+
}
174+
57175
export function toRawSourceMap(
58176
sourceMap?: SourceMap,
59177
): RawSourceMap | undefined {
60178
if (sourceMap) {
61179
return {
62180
version: sourceMap.version.toString(),
181+
file: sourceMap.file,
63182
sources: sourceMap.sources ?? [],
64183
names: sourceMap.names,
184+
sourceRoot: sourceMap.sourceRoot,
65185
sourcesContent: sourceMap.sourcesContent,
66186
mappings: sourceMap.mappings,
67187
};

packages/instrumentor/esm-loader.mts

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ const { sourceCodeCoverage } =
4343
require("./plugins/sourceCodeCoverage.js") as typeof import("./plugins/sourceCodeCoverage.js");
4444
const { functionHooks } =
4545
require("./plugins/functionHooks.js") as typeof import("./plugins/functionHooks.js");
46+
const { buildPCLocationBatches } =
47+
require("./pcLocationBatches.js") as typeof import("./pcLocationBatches.js");
48+
const { extractSourceMap, toRawSourceMap } =
49+
require("./SourceMapRegistry.js") as typeof import("./SourceMapRegistry.js");
4650

4751
// The loader thread has its own CJS module cache, so this is a
4852
// separate HookManager instance from the main thread's. We populate
@@ -136,6 +140,7 @@ export const load: LoadFn = async function load(url, context, nextLoad) {
136140

137141
function instrumentModule(code: string, filename: string): string | null {
138142
drainHookUpdates();
143+
const inputSourceMap = extractSourceMap(code, filename);
139144

140145
const fuzzerCoverage = esmCodeCoverage();
141146

@@ -163,6 +168,7 @@ function instrumentModule(code: string, filename: string): string | null {
163168
filename,
164169
sourceFileName: filename,
165170
sourceMaps: true,
171+
inputSourceMap: toRawSourceMap(inputSourceMap) as any,
166172
plugins,
167173
sourceType: "module",
168174
});
@@ -176,8 +182,6 @@ function instrumentModule(code: string, filename: string): string | null {
176182
if (edges === 0 || !transformed?.code) {
177183
return null;
178184
}
179-
const displayFilename = stripProjectRootPrefix(filename);
180-
181185
// Build a preamble that runs on the main thread before the module
182186
// body. It allocates the per-module coverage counter buffer and,
183187
// when a source map is available, registers it with the main-thread
@@ -188,18 +192,26 @@ function instrumentModule(code: string, filename: string): string | null {
188192
];
189193

190194
// Register edge-to-source mappings for PC symbolization.
191-
// Serialized as a flat array: [id, line, col, funcIdx, ...]
195+
// Serialized as a flat array:
196+
// [id, line, col, funcIdx, isFuncEntry, ...]
192197
const edgeEntries = fuzzerCoverage.edgeEntries();
193198
if (edgeEntries.length > 0) {
194-
const flat = edgeEntries.flat();
195199
const funcNames = fuzzerCoverage.funcNames();
196-
preambleLines.push(
197-
`Fuzzer.coverageTracker.registerPCLocations(` +
198-
`${JSON.stringify(displayFilename)},` +
199-
`${JSON.stringify(funcNames)},` +
200-
`new Int32Array(${JSON.stringify(flat)}),` +
201-
`__jazzer_pcBase);`,
200+
const batches = buildPCLocationBatches(
201+
edgeEntries,
202+
filename,
203+
inputSourceMap,
204+
stripProjectRootPrefix,
202205
);
206+
for (const batch of batches) {
207+
preambleLines.push(
208+
`Fuzzer.coverageTracker.registerPCLocations(` +
209+
`${JSON.stringify(batch.filename)},` +
210+
`${JSON.stringify(funcNames)},` +
211+
`new Int32Array(${JSON.stringify(Array.from(batch.entries))}),` +
212+
`__jazzer_pcBase);`,
213+
);
214+
}
203215
}
204216

205217
if (transformed.map) {

0 commit comments

Comments
 (0)