Skip to content

Commit c37aca5

Browse files
authored
New fuzzer mode: Fuzz against JavaScript (#8655)
The new fuzzer flag --fuzz-against-js tells the fuzzer we will only run the wasm against JS - not link it to wasm or something else. This lets it make changes that are valid from JS's point of view, like refining things on the boundary while not changing the arity. For example, if we sent JS an anyref, but the actual type we send is (ref $A) then we can refine to that type (or any type between it and anyref). We can do this for both export results and import params, as in both cases we send things to JS and know their type. This is useful for fuzzers that generate JS and let Binaryen mutate the wasm: they can emit anyrefs on the boundary, and Binaryen will be able to add new GC types in the module and even refine the boundary to those types. Such a fuzzer does not even need to emit GC types itself (it can emit anyref and send only nulls).
1 parent 6b06b59 commit c37aca5

7 files changed

Lines changed: 526 additions & 0 deletions

File tree

scripts/fuzz_opt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2202,6 +2202,7 @@ def do_handle_pair(self, input, before_wasm, after_wasm, opts):
22022202
input,
22032203
'-ttf',
22042204
'--fuzz-preserve-imports-exports',
2205+
'--fuzz-against-js',
22052206
'--initial-fuzz=' + wat_file,
22062207
'-o', pre_wasm,
22072208
'-g',

src/tools/fuzzing.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class TranslateToFuzzReader {
132132
void setPreserveImportsAndExports(bool preserveImportsAndExports_) {
133133
preserveImportsAndExports = preserveImportsAndExports_;
134134
}
135+
void setAgainstJS(bool againstJS_) { againstJS = againstJS_; }
135136
void setImportedModule(std::string importedModuleName);
136137

137138
void build();
@@ -159,6 +160,11 @@ class TranslateToFuzzReader {
159160
// existing testcase (using initial-content).
160161
bool preserveImportsAndExports = false;
161162

163+
// Whether the wasm will be used from JS and in no other way. This lets us
164+
// modify the wasm in ways that keep it valid from JS's point of view, but
165+
// which might cause issues when linked against wasm or used otherwise.
166+
bool againstJS = false;
167+
162168
// An optional module to import from.
163169
std::optional<Module> importedModule;
164170

@@ -409,6 +415,10 @@ class TranslateToFuzzReader {
409415
void fixAfterChanges(Function* func);
410416
void modifyInitialFunctions();
411417

418+
// Mutate the JS boundary, that is, make changes on the wasm side that JS
419+
// would not be broken by (JS does not care about types).
420+
void mutateJSBoundary();
421+
412422
// Note a global for use during code generation.
413423
void useGlobalLater(Global* global);
414424

src/tools/fuzzing/fuzzing.cpp

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "ir/glbs.h"
2020
#include "ir/iteration.h"
2121
#include "ir/local-structural-dominance.h"
22+
#include "ir/lubs.h"
2223
#include "ir/module-utils.h"
2324
#include "ir/names.h"
2425
#include "ir/subtype-exprs.h"
@@ -413,6 +414,10 @@ void TranslateToFuzzReader::build() {
413414
PassRunner runner(&wasm);
414415
ReFinalize().run(&runner, &wasm);
415416
ReFinalize().walkModuleCode(&wasm);
417+
418+
if (againstJS) {
419+
mutateJSBoundary();
420+
}
416421
}
417422

418423
void TranslateToFuzzReader::setupMemory() {
@@ -2389,6 +2394,240 @@ void TranslateToFuzzReader::modifyInitialFunctions() {
23892394
}
23902395
}
23912396

2397+
void TranslateToFuzzReader::mutateJSBoundary() {
2398+
assert(againstJS);
2399+
2400+
// Scan to find functions whose address is taken. We cannot modify their
2401+
// signatures at all.
2402+
2403+
struct FunctionInfo {
2404+
// Whether there are references to this function itself.
2405+
bool reffed = false;
2406+
2407+
// Calls to imports from this function.
2408+
std::vector<Call*> callImports;
2409+
};
2410+
2411+
using NameInfoMap = std::unordered_map<Name, FunctionInfo>;
2412+
2413+
struct FunctionInfoScanner
2414+
: public WalkerPass<PostWalker<FunctionInfoScanner>> {
2415+
// Not parallel for simplicity, see the map update below.
2416+
2417+
bool modifiesBinaryenIR() override { return false; }
2418+
2419+
NameInfoMap& map;
2420+
2421+
FunctionInfoScanner(NameInfoMap& map) : map(map) {}
2422+
2423+
std::unique_ptr<Pass> create() override {
2424+
return std::make_unique<FunctionInfoScanner>(map);
2425+
}
2426+
2427+
void visitCall(Call* curr) {
2428+
if (getModule()->getFunction(curr->target)->imported()) {
2429+
map[curr->target].callImports.push_back(curr);
2430+
}
2431+
2432+
// Return calls add a dependency similar to references: we cannot refine
2433+
// the callee without coordination with the caller.
2434+
if (curr->isReturn) {
2435+
map[curr->target].reffed = true;
2436+
}
2437+
}
2438+
2439+
void visitRefFunc(RefFunc* curr) { map[curr->func].reffed = true; }
2440+
};
2441+
2442+
NameInfoMap map;
2443+
FunctionInfoScanner scanner(map);
2444+
PassRunner runner(&wasm);
2445+
scanner.setModule(&wasm);
2446+
scanner.run(&runner, &wasm);
2447+
scanner.walkModuleCode(&wasm);
2448+
2449+
// If a function does not have its address taken, we can refine types. This is
2450+
// safe because we will still send and receive the right number of values (we
2451+
// are not changing the arity, which JS might notice). Each place we may
2452+
// refine, we are given the maximum refinement and pick a random type between
2453+
// it and the old type.
2454+
auto maybeRefine = [&](Type old, Type new_) {
2455+
if (!old.isRef()) {
2456+
return old;
2457+
}
2458+
2459+
// If this is unreachable code, we can still refine to the bottom.
2460+
if (new_ == Type::unreachable) {
2461+
new_ = Type(old.getHeapType().getBottom(), NonNullable);
2462+
}
2463+
2464+
// Find all heap types between the old and new, starting from new.
2465+
auto oldHeapType = old.getHeapType();
2466+
auto newHeapType = new_.getHeapType();
2467+
assert(HeapType::isSubType(newHeapType, oldHeapType));
2468+
std::vector<HeapType> options;
2469+
while (1) {
2470+
options.push_back(newHeapType);
2471+
// We cannot look at a bottom type's supers (there can be many, and the
2472+
// getSuperType() API doesn't return them), but can use
2473+
// interestingHeapSubTypes on the top.
2474+
if (newHeapType.isBottom()) {
2475+
for (auto type : interestingHeapSubTypes[newHeapType.getTop()]) {
2476+
options.push_back(type);
2477+
}
2478+
break;
2479+
}
2480+
// Continue until we reach the old type.
2481+
if (newHeapType == oldHeapType) {
2482+
break;
2483+
}
2484+
auto next = newHeapType.getSuperType();
2485+
assert(next);
2486+
newHeapType = *next;
2487+
}
2488+
newHeapType = pick(options);
2489+
2490+
// Pick the nullability.
2491+
auto oldNullability = old.getNullability();
2492+
auto newNullability = new_.getNullability();
2493+
if (newNullability != oldNullability) {
2494+
newNullability = getNullability();
2495+
}
2496+
2497+
// Pick the exactness.
2498+
auto oldExactness = old.getExactness();
2499+
auto newExactness = new_.getExactness();
2500+
// We can only be exact if we are using the new heap type: that type is
2501+
// exactly what is sent here, and no intermediate heap type would be valid.
2502+
// For example, given $A :> $B :> $C, then maybeRefine($A, exact $C) can
2503+
// return exact $C, but cannot return exact $B.
2504+
//
2505+
// Also, basic heap types cannot be exact.
2506+
if (newHeapType != new_.getHeapType() || newHeapType.isBasic()) {
2507+
newExactness = Inexact;
2508+
} else if (newExactness != oldExactness) {
2509+
// TODO: once getExactness() is fixed (see there), use that
2510+
newExactness = oneIn(2) ? Exact : Inexact;
2511+
}
2512+
2513+
return Type(newHeapType, newNullability, newExactness);
2514+
};
2515+
2516+
// Given a set of types (all params or all results), and an index among them,
2517+
// refine that index if we can. It is possible that no new types exist at all,
2518+
// if the code was unreachable and we noted nothing.
2519+
auto maybeRefineIndex = [&](Type oldTypes, LUBFinder newLUB, Index index) {
2520+
auto lub =
2521+
newLUB.noted() ? newLUB.getLUB()[index] : Type(Type::unreachable);
2522+
return maybeRefine(oldTypes[index], lub);
2523+
};
2524+
2525+
// First, refine params sent to imports. Gather the LUB sent to each import,
2526+
// and then refine.
2527+
std::unordered_map<Name, LUBFinder> paramLUBs;
2528+
for (auto& [_, info] : map) {
2529+
for (auto* call : info.callImports) {
2530+
auto declaredParams = wasm.getFunction(call->target)->getParams();
2531+
std::vector<Type> sent;
2532+
for (Index i = 0; i < call->operands.size(); i++) {
2533+
auto type = call->operands[i]->type;
2534+
if (type == Type::unreachable) {
2535+
// Nothing sent here. What we refine to must still validate, even
2536+
// though this call is unreachable. Using the non-nullable bottom type
2537+
// is valid, and has the fewest restrictions.
2538+
type = declaredParams[i];
2539+
if (type.isRef()) {
2540+
type = Type(type.getHeapType().getBottom(), NonNullable);
2541+
}
2542+
}
2543+
sent.push_back(type);
2544+
}
2545+
paramLUBs[call->target].note(Type(sent));
2546+
}
2547+
}
2548+
2549+
for (auto& func : wasm.functions) {
2550+
if (!func->imported()) {
2551+
continue;
2552+
}
2553+
// TODO: In the referenced case, we could consider using import/export
2554+
// wrappers and refining just there.
2555+
if (map[func->name].reffed) {
2556+
continue;
2557+
}
2558+
// Do not alter the signature of configureAll or other VM builtins. Changing
2559+
// these to something the VM does not expect will just cause it to
2560+
// immediately reject the module by trapping.
2561+
if (func->module.startsWith("wasm:")) {
2562+
continue;
2563+
}
2564+
2565+
auto oldParams = func->getParams();
2566+
if (oldParams == Type::none) {
2567+
continue;
2568+
}
2569+
2570+
// Refine.
2571+
auto lub = paramLUBs[func->name];
2572+
auto lubType = lub.getLUB();
2573+
// Either the LUB has the right data shape, or nothing was noted (this is
2574+
// unreachable).
2575+
assert(oldParams.size() == lubType.size() || !lub.noted());
2576+
std::vector<Type> newParams;
2577+
for (Index i = 0; i < lubType.size(); i++) {
2578+
newParams.push_back(maybeRefineIndex(oldParams, lub, i));
2579+
}
2580+
func->setParams(Type(newParams));
2581+
}
2582+
2583+
// Second, refine results sent from exports.
2584+
for (auto& exp : wasm.exports) {
2585+
if (exp->kind != ExternalKind::Function) {
2586+
continue;
2587+
}
2588+
auto name = *exp->getInternalName();
2589+
if (map[name].reffed) {
2590+
continue;
2591+
}
2592+
2593+
auto* func = wasm.getFunction(name);
2594+
auto oldResults = func->getResults();
2595+
if (oldResults == Type::none) {
2596+
continue;
2597+
}
2598+
2599+
// Refine.
2600+
auto lub = LUB::getResultsLUB(func, wasm);
2601+
auto lubType = lub.getLUB();
2602+
assert(oldResults.size() == lubType.size() || !lub.noted());
2603+
std::vector<Type> newResults;
2604+
for (Index i = 0; i < lubType.size(); i++) {
2605+
newResults.push_back(maybeRefineIndex(oldResults, lub, i));
2606+
}
2607+
func->setResults(Type(newResults));
2608+
}
2609+
2610+
// Update return types from calls to exports whose results we refined.
2611+
struct CallUpdater : public WalkerPass<PostWalker<CallUpdater>> {
2612+
bool isFunctionParallel() override { return true; }
2613+
2614+
std::unique_ptr<Pass> create() override {
2615+
return std::make_unique<CallUpdater>();
2616+
}
2617+
2618+
void visitCall(Call* curr) {
2619+
if (curr->type != Type::unreachable) {
2620+
curr->type = getModule()->getFunction(curr->target)->getResults();
2621+
}
2622+
}
2623+
} updater;
2624+
updater.setModule(&wasm);
2625+
updater.run(&runner, &wasm);
2626+
2627+
// Propagate after our changes.
2628+
ReFinalize().run(&runner, &wasm);
2629+
}
2630+
23922631
void TranslateToFuzzReader::dropToLog(Function* func) {
23932632
// Don't always do this.
23942633
if (oneIn(2)) {

src/tools/wasm-opt.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ int main(int argc, const char* argv[]) {
8787
bool fuzzMemory = true;
8888
bool fuzzOOB = true;
8989
bool fuzzPreserveImportsAndExports = false;
90+
bool fuzzAgainstJS = false;
9091
std::string fuzzImport;
9192
std::string emitSpecWrapper;
9293
std::string emitWasm2CWrapper;
@@ -212,6 +213,13 @@ For more on how to optimize effectively, see
212213
[&](Options* o, const std::string& arguments) {
213214
fuzzPreserveImportsAndExports = true;
214215
})
216+
.add(
217+
"--fuzz-against-js",
218+
"",
219+
"modify the wasm in valid ways that assume it is used only from JS",
220+
WasmOptOption,
221+
Options::Arguments::Zero,
222+
[&](Options* o, const std::string& arguments) { fuzzAgainstJS = true; })
215223
.add(
216224
"--fuzz-import",
217225
"",
@@ -349,6 +357,7 @@ For more on how to optimize effectively, see
349357
reader.setAllowMemory(fuzzMemory);
350358
reader.setAllowOOB(fuzzOOB);
351359
reader.setPreserveImportsAndExports(fuzzPreserveImportsAndExports);
360+
reader.setAgainstJS(fuzzAgainstJS);
352361
if (!fuzzImport.empty()) {
353362
reader.setImportedModule(fuzzImport);
354363
}

test/lit/help/wasm-opt.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@
7272
;; CHECK-NEXT: --fuzz-preserve-imports-exports don't add imports and exports in
7373
;; CHECK-NEXT: -ttf mode, and keep the start
7474
;; CHECK-NEXT:
75+
;; CHECK-NEXT: --fuzz-against-js modify the wasm in valid ways
76+
;; CHECK-NEXT: that assume it is used only from
77+
;; CHECK-NEXT: JS
78+
;; CHECK-NEXT:
7579
;; CHECK-NEXT: --fuzz-import a module to use as an import in
7680
;; CHECK-NEXT: -ttf mode
7781
;; CHECK-NEXT:

0 commit comments

Comments
 (0)