-
Notifications
You must be signed in to change notification settings - Fork 856
New fuzzer mode: Fuzz against JavaScript #8655
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 42 commits
91c30a0
65c2a31
d08c29f
f3a07c8
823c665
767c8b4
c7d7f1b
fc981c7
ebb1106
fa2da6e
2c3c192
53b1438
255250d
169f4c8
2fe14e0
300c953
39f57ee
cffacbc
41cd320
063b415
12d35ef
e5d4ea5
d968315
45ce1dc
c1fccda
a7b5bfd
35c498f
98e7b09
26d3f3e
c0c6dae
90c18af
f3d11a6
44fe786
79e65d7
98bd0ac
4179d31
2e2d028
a7bf270
1f7e875
06357e5
a079f2e
a7b5125
149f300
130029f
ca52c2d
c59b29b
ada1591
3ddc4fe
cd47cb6
59b11d3
1b857b1
ebe62e9
2f67d1d
239ee45
7a545b0
b94e9f7
4c24184
81e6fa8
7567bdd
b86b357
b479276
b324bfd
d07aea1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| #include "ir/glbs.h" | ||
| #include "ir/iteration.h" | ||
| #include "ir/local-structural-dominance.h" | ||
| #include "ir/lubs.h" | ||
| #include "ir/module-utils.h" | ||
| #include "ir/names.h" | ||
| #include "ir/subtype-exprs.h" | ||
|
|
@@ -413,6 +414,10 @@ void TranslateToFuzzReader::build() { | |
| PassRunner runner(&wasm); | ||
| ReFinalize().run(&runner, &wasm); | ||
| ReFinalize().walkModuleCode(&wasm); | ||
|
|
||
| if (againstJS) { | ||
| mutateJSBoundary(); | ||
| } | ||
| } | ||
|
|
||
| void TranslateToFuzzReader::setupMemory() { | ||
|
|
@@ -2389,6 +2394,221 @@ void TranslateToFuzzReader::modifyInitialFunctions() { | |
| } | ||
| } | ||
|
|
||
| void TranslateToFuzzReader::mutateJSBoundary() { | ||
| assert(againstJS); | ||
|
|
||
| // Scan to find functions whose address is taken. We cannot modify their | ||
| // signatures at all. | ||
|
|
||
| struct FunctionInfo { | ||
| // Whether there are references to this function itself. | ||
| bool reffed = false; | ||
|
|
||
| // Calls to imports from this function. | ||
| std::vector<Call*> callImports; | ||
| }; | ||
|
|
||
| using NameInfoMap = std::unordered_map<Name, FunctionInfo>; | ||
|
|
||
| struct FunctionInfoScanner | ||
| : public WalkerPass<PostWalker<FunctionInfoScanner>> { | ||
| // Not parallel for simplicity, see the map update below. | ||
|
|
||
| bool modifiesBinaryenIR() override { return false; } | ||
|
|
||
| NameInfoMap& map; | ||
|
|
||
| FunctionInfoScanner(NameInfoMap& map) : map(map) {} | ||
|
|
||
| std::unique_ptr<Pass> create() override { | ||
| return std::make_unique<FunctionInfoScanner>(map); | ||
| } | ||
|
|
||
| void visitCall(Call* curr) { | ||
| if (getModule()->getFunction(curr->target)->imported()) { | ||
| map[curr->target].callImports.push_back(curr); | ||
| } | ||
| } | ||
|
|
||
| void visitRefFunc(RefFunc* curr) { map[curr->func].reffed = true; } | ||
| }; | ||
|
|
||
| NameInfoMap map; | ||
| FunctionInfoScanner scanner(map); | ||
| PassRunner runner(&wasm); | ||
| scanner.setModule(&wasm); | ||
| scanner.run(&runner, &wasm); | ||
| scanner.walkModuleCode(&wasm); | ||
|
|
||
| // If a function does not have its address taken, we can refine types. This is | ||
| // safe because we will still send and receive the right number of values (we | ||
| // are not changing the arity, which JS might notice). Each place we may | ||
| // refine, we are given the maximum refinement and pick a random type between | ||
| // it and the old type. | ||
| auto maybeRefine = [&](Type old, Type new_) { | ||
| if (!new_.isRef()) { | ||
| // A non-reference like i32, or unreachable (no values reach this place), | ||
| // so it does not matter. | ||
| return old; | ||
| } | ||
|
|
||
| // Find all heap types between the old and new, starting from new. | ||
| auto oldHeapType = old.getHeapType(); | ||
| auto newHeapType = new_.getHeapType(); | ||
| assert(HeapType::isSubType(newHeapType, oldHeapType)); | ||
| std::vector<HeapType> options; | ||
| while (1) { | ||
| options.push_back(newHeapType); | ||
| // We cannot look at a bottom type's supers (there can be many, and the | ||
| // getSuperType() API doesn't return them). | ||
| // TODO: handle all possible supers. | ||
| if (newHeapType.isBottom()) { | ||
| options.push_back(oldHeapType); | ||
| break; | ||
| } | ||
| // Continue until we reach the old type. | ||
| if (newHeapType == oldHeapType) { | ||
| break; | ||
| } | ||
| auto next = newHeapType.getSuperType(); | ||
| assert(next); | ||
| newHeapType = *next; | ||
| } | ||
| newHeapType = pick(options); | ||
|
|
||
| // Pick the nullability. | ||
| auto oldNullability = old.getNullability(); | ||
| auto newNullability = new_.getNullability(); | ||
| if (newNullability != oldNullability) { | ||
| newNullability = getNullability(); | ||
| } | ||
|
|
||
| // Pick the exactness. | ||
| auto oldExactness = old.getExactness(); | ||
| auto newExactness = new_.getExactness(); | ||
| if (newExactness != oldExactness) { | ||
| // TODO: once getExactness() is fixed (see there), use that | ||
| newExactness = oneIn(2) ? Exact : Inexact; | ||
| } | ||
| // We can only be exact if we are using the new heap type: that type is | ||
| // exactly what is sent here, and no intermediate heap type would be valid. | ||
| // For example, given $A :> $B :> $C, then maybeRefine($A, exact $C) can | ||
| // return exact $C, but cannot return exact $B. | ||
| // | ||
| // Also, basic heap types cannot be exact. | ||
| if (newHeapType != new_.getHeapType() || newHeapType.isBasic()) { | ||
| newExactness = Inexact; | ||
| } | ||
|
Comment on lines
+2506
to
2511
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might as well check these conditions before burning a bit to generate a new exactness above.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bump.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, saved that bit. |
||
|
|
||
| return Type(newHeapType, newNullability, newExactness); | ||
| }; | ||
|
|
||
| // First, refine params sent to imports. Gather the LUB sent to each import, | ||
| // and then refine. | ||
| std::unordered_map<Name, LUBFinder> paramLUBs; | ||
| for (auto& [_, info] : map) { | ||
| for (auto* call : info.callImports) { | ||
| auto declaredParams = wasm.getFunction(call->target)->getParams(); | ||
| std::vector<Type> sent; | ||
| for (Index i = 0; i < call->operands.size(); i++) { | ||
| auto type = call->operands[i]->type; | ||
| if (type == Type::unreachable) { | ||
| // Nothing sent here. What we refine to must still validate, even | ||
| // though this call is unreachable. Using the non-nullable bottom type | ||
| // is valid, and has the fewest restrictions. | ||
| type = declaredParams[i]; | ||
| if (type.isRef()) { | ||
| type = Type(type.getHeapType().getBottom(), NonNullable); | ||
| } | ||
| } | ||
|
tlively marked this conversation as resolved.
|
||
| sent.push_back(type); | ||
| } | ||
| paramLUBs[call->target].note(Type(sent)); | ||
| } | ||
| } | ||
|
|
||
| for (auto& func : wasm.functions) { | ||
| if (!func->imported()) { | ||
| continue; | ||
| } | ||
| // TODO: In the reffed cast, we could consider using import/export wrappers | ||
|
kripken marked this conversation as resolved.
Outdated
|
||
| // and refining just there. | ||
| if (map[func->name].reffed) { | ||
| continue; | ||
| } | ||
|
tlively marked this conversation as resolved.
|
||
| // Do not alter the signature of configureAll or other VM builtins. Changing | ||
| // these to something the VM does not expect will just cause it to | ||
| // immediately reject the module by trapping. | ||
| if (func->module.startsWith("wasm:")) { | ||
| continue; | ||
| } | ||
|
|
||
| // Find the LUB, which is the most we can refine. | ||
| auto lub = paramLUBs[func->name]; | ||
| if (!lub.noted()) { | ||
| continue; | ||
| } | ||
|
|
||
| // Refine. | ||
| auto oldParams = func->getParams(); | ||
| auto lubType = lub.getLUB(); | ||
| assert(oldParams.size() == lubType.size()); | ||
| std::vector<Type> newParams; | ||
| for (Index i = 0; i < lubType.size(); i++) { | ||
| newParams.push_back(maybeRefine(oldParams[i], lubType[i])); | ||
| } | ||
| func->setParams(Type(newParams)); | ||
| } | ||
|
|
||
| // Second, refine results sent from exports. | ||
| for (auto& exp : wasm.exports) { | ||
| if (exp->kind != ExternalKind::Function) { | ||
| continue; | ||
| } | ||
| auto name = *exp->getInternalName(); | ||
| if (map[name].reffed) { | ||
| continue; | ||
| } | ||
|
|
||
| // Find the LUB. | ||
| auto* func = wasm.getFunction(name); | ||
| auto lub = LUB::getResultsLUB(func, wasm); | ||
| if (!lub.noted()) { | ||
| continue; | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could use non-nullable bottom reference types here, too.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose, though in this case the import/export is never actually reached?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but who knows what might end up happening in the engine 🤷 Might as well exercise as many situations as we can. Without looking at the V8 code, it seems unlikely but plausible that there would be some different code path taken on traps or exceptions, or when doing a JSPI suspension or something, depending on the return type even when the function never does a normal return.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, but what would we refine it to? A totally random type..?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A random subtype of the old result type, yes. The
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, added refinement of unreachable code down to the bottom. |
||
|
|
||
| // Refine. | ||
| auto oldResults = func->getResults(); | ||
| auto lubType = lub.getLUB(); | ||
| assert(oldResults.size() == lubType.size()); | ||
| std::vector<Type> newResults; | ||
| for (Index i = 0; i < lubType.size(); i++) { | ||
| newResults.push_back(maybeRefine(oldResults[i], lubType[i])); | ||
| } | ||
| func->setResults(Type(newResults)); | ||
| } | ||
|
|
||
| // Update return types from calls to exports whose results we refined. | ||
| struct CallUpdater : public WalkerPass<PostWalker<CallUpdater>> { | ||
| bool isFunctionParallel() override { return true; } | ||
|
|
||
| std::unique_ptr<Pass> create() override { | ||
| return std::make_unique<CallUpdater>(); | ||
| } | ||
|
|
||
| void visitCall(Call* curr) { | ||
| if (curr->type != Type::unreachable) { | ||
| curr->type = getModule()->getFunction(curr->target)->getResults(); | ||
| } | ||
| } | ||
| } updater; | ||
| updater.setModule(&wasm); | ||
| updater.run(&runner, &wasm); | ||
|
|
||
| // Propagate after our changes. | ||
| ReFinalize().run(&runner, &wasm); | ||
| } | ||
|
|
||
| void TranslateToFuzzReader::dropToLog(Function* func) { | ||
| // Don't always do this. | ||
| if (oneIn(2)) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| (module | ||
| ;; Two structs, A and B, each of which has a subtype. | ||
| (rec | ||
| (type $A (sub (struct))) | ||
| (type $A2 (sub $A (struct))) | ||
|
|
||
| (type $B (sub (struct))) | ||
| (type $B2 (sub $B(struct))) | ||
| ) | ||
|
|
||
| ;; Two imports, one which will be referenced. | ||
| (import "module" "base" (func $import (param i32 anyref) (result eqref))) | ||
| (import "module" "base" (func $import-reffed (param i32 anyref) (result eqref))) | ||
|
|
||
| ;; Two exports, one which will be referenced. | ||
|
|
||
| (func $export (export "export") (param $0 i32) (param $1 anyref) (result eqref) | ||
| ;; Add the refs. | ||
| (drop | ||
| (ref.func $import-reffed) | ||
| ) | ||
| (drop | ||
| (ref.func $export-reffed) | ||
| ) | ||
|
|
||
| ;; Call the imports. | ||
| (drop | ||
| (call $import | ||
| (i32.const 10) | ||
| ;; Send $A. We can refine the anyref to $A or $A2 (but not $B or $B2). | ||
| (struct.new $A) | ||
| ) | ||
| ) | ||
| (drop | ||
| (call $import-reffed | ||
| (i32.const 20) | ||
| (struct.new $A) | ||
| ) | ||
| ) | ||
|
|
||
| ;; Return $B. We can refine the eqref to $B or $B2 (but not $A or $A2). | ||
| (struct.new $B) | ||
| ) | ||
|
|
||
| (func $export-reffed (export "export-reffed") (param $0 i32) (param $1 anyref) (result eqref) | ||
| (struct.new $A) | ||
| ) | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In cases where the old heap type is also bottom, this will end up with two copies of bottom in the options. Not incorrect, but wastes a bit of randomness.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, yeah, but keeping the code simple seems good enough here.