|
19 | 19 | #include "ir/glbs.h" |
20 | 20 | #include "ir/iteration.h" |
21 | 21 | #include "ir/local-structural-dominance.h" |
| 22 | +#include "ir/lubs.h" |
22 | 23 | #include "ir/module-utils.h" |
23 | 24 | #include "ir/names.h" |
24 | 25 | #include "ir/subtype-exprs.h" |
@@ -413,6 +414,10 @@ void TranslateToFuzzReader::build() { |
413 | 414 | PassRunner runner(&wasm); |
414 | 415 | ReFinalize().run(&runner, &wasm); |
415 | 416 | ReFinalize().walkModuleCode(&wasm); |
| 417 | + |
| 418 | + if (againstJS) { |
| 419 | + mutateJSBoundary(); |
| 420 | + } |
416 | 421 | } |
417 | 422 |
|
418 | 423 | void TranslateToFuzzReader::setupMemory() { |
@@ -2389,6 +2394,240 @@ void TranslateToFuzzReader::modifyInitialFunctions() { |
2389 | 2394 | } |
2390 | 2395 | } |
2391 | 2396 |
|
| 2397 | +void TranslateToFuzzReader::mutateJSBoundary() { |
| 2398 | + assert(againstJS); |
| 2399 | + |
| 2400 | + // Scan to find functions whose address is taken. We cannot modify their |
| 2401 | + // signatures at all. |
| 2402 | + |
| 2403 | + struct FunctionInfo { |
| 2404 | + // Whether there are references to this function itself. |
| 2405 | + bool reffed = false; |
| 2406 | + |
| 2407 | + // Calls to imports from this function. |
| 2408 | + std::vector<Call*> callImports; |
| 2409 | + }; |
| 2410 | + |
| 2411 | + using NameInfoMap = std::unordered_map<Name, FunctionInfo>; |
| 2412 | + |
| 2413 | + struct FunctionInfoScanner |
| 2414 | + : public WalkerPass<PostWalker<FunctionInfoScanner>> { |
| 2415 | + // Not parallel for simplicity, see the map update below. |
| 2416 | + |
| 2417 | + bool modifiesBinaryenIR() override { return false; } |
| 2418 | + |
| 2419 | + NameInfoMap& map; |
| 2420 | + |
| 2421 | + FunctionInfoScanner(NameInfoMap& map) : map(map) {} |
| 2422 | + |
| 2423 | + std::unique_ptr<Pass> create() override { |
| 2424 | + return std::make_unique<FunctionInfoScanner>(map); |
| 2425 | + } |
| 2426 | + |
| 2427 | + void visitCall(Call* curr) { |
| 2428 | + if (getModule()->getFunction(curr->target)->imported()) { |
| 2429 | + map[curr->target].callImports.push_back(curr); |
| 2430 | + } |
| 2431 | + |
| 2432 | + // Return calls add a dependency similar to references: we cannot refine |
| 2433 | + // the callee without coordination with the caller. |
| 2434 | + if (curr->isReturn) { |
| 2435 | + map[curr->target].reffed = true; |
| 2436 | + } |
| 2437 | + } |
| 2438 | + |
| 2439 | + void visitRefFunc(RefFunc* curr) { map[curr->func].reffed = true; } |
| 2440 | + }; |
| 2441 | + |
| 2442 | + NameInfoMap map; |
| 2443 | + FunctionInfoScanner scanner(map); |
| 2444 | + PassRunner runner(&wasm); |
| 2445 | + scanner.setModule(&wasm); |
| 2446 | + scanner.run(&runner, &wasm); |
| 2447 | + scanner.walkModuleCode(&wasm); |
| 2448 | + |
| 2449 | + // If a function does not have its address taken, we can refine types. This is |
| 2450 | + // safe because we will still send and receive the right number of values (we |
| 2451 | + // are not changing the arity, which JS might notice). Each place we may |
| 2452 | + // refine, we are given the maximum refinement and pick a random type between |
| 2453 | + // it and the old type. |
| 2454 | + auto maybeRefine = [&](Type old, Type new_) { |
| 2455 | + if (!old.isRef()) { |
| 2456 | + return old; |
| 2457 | + } |
| 2458 | + |
| 2459 | + // If this is unreachable code, we can still refine to the bottom. |
| 2460 | + if (new_ == Type::unreachable) { |
| 2461 | + new_ = Type(old.getHeapType().getBottom(), NonNullable); |
| 2462 | + } |
| 2463 | + |
| 2464 | + // Find all heap types between the old and new, starting from new. |
| 2465 | + auto oldHeapType = old.getHeapType(); |
| 2466 | + auto newHeapType = new_.getHeapType(); |
| 2467 | + assert(HeapType::isSubType(newHeapType, oldHeapType)); |
| 2468 | + std::vector<HeapType> options; |
| 2469 | + while (1) { |
| 2470 | + options.push_back(newHeapType); |
| 2471 | + // We cannot look at a bottom type's supers (there can be many, and the |
| 2472 | + // getSuperType() API doesn't return them), but can use |
| 2473 | + // interestingHeapSubTypes on the top. |
| 2474 | + if (newHeapType.isBottom()) { |
| 2475 | + for (auto type : interestingHeapSubTypes[newHeapType.getTop()]) { |
| 2476 | + options.push_back(type); |
| 2477 | + } |
| 2478 | + break; |
| 2479 | + } |
| 2480 | + // Continue until we reach the old type. |
| 2481 | + if (newHeapType == oldHeapType) { |
| 2482 | + break; |
| 2483 | + } |
| 2484 | + auto next = newHeapType.getSuperType(); |
| 2485 | + assert(next); |
| 2486 | + newHeapType = *next; |
| 2487 | + } |
| 2488 | + newHeapType = pick(options); |
| 2489 | + |
| 2490 | + // Pick the nullability. |
| 2491 | + auto oldNullability = old.getNullability(); |
| 2492 | + auto newNullability = new_.getNullability(); |
| 2493 | + if (newNullability != oldNullability) { |
| 2494 | + newNullability = getNullability(); |
| 2495 | + } |
| 2496 | + |
| 2497 | + // Pick the exactness. |
| 2498 | + auto oldExactness = old.getExactness(); |
| 2499 | + auto newExactness = new_.getExactness(); |
| 2500 | + // We can only be exact if we are using the new heap type: that type is |
| 2501 | + // exactly what is sent here, and no intermediate heap type would be valid. |
| 2502 | + // For example, given $A :> $B :> $C, then maybeRefine($A, exact $C) can |
| 2503 | + // return exact $C, but cannot return exact $B. |
| 2504 | + // |
| 2505 | + // Also, basic heap types cannot be exact. |
| 2506 | + if (newHeapType != new_.getHeapType() || newHeapType.isBasic()) { |
| 2507 | + newExactness = Inexact; |
| 2508 | + } else if (newExactness != oldExactness) { |
| 2509 | + // TODO: once getExactness() is fixed (see there), use that |
| 2510 | + newExactness = oneIn(2) ? Exact : Inexact; |
| 2511 | + } |
| 2512 | + |
| 2513 | + return Type(newHeapType, newNullability, newExactness); |
| 2514 | + }; |
| 2515 | + |
| 2516 | + // Given a set of types (all params or all results), and an index among them, |
| 2517 | + // refine that index if we can. It is possible that no new types exist at all, |
| 2518 | + // if the code was unreachable and we noted nothing. |
| 2519 | + auto maybeRefineIndex = [&](Type oldTypes, LUBFinder newLUB, Index index) { |
| 2520 | + auto lub = |
| 2521 | + newLUB.noted() ? newLUB.getLUB()[index] : Type(Type::unreachable); |
| 2522 | + return maybeRefine(oldTypes[index], lub); |
| 2523 | + }; |
| 2524 | + |
| 2525 | + // First, refine params sent to imports. Gather the LUB sent to each import, |
| 2526 | + // and then refine. |
| 2527 | + std::unordered_map<Name, LUBFinder> paramLUBs; |
| 2528 | + for (auto& [_, info] : map) { |
| 2529 | + for (auto* call : info.callImports) { |
| 2530 | + auto declaredParams = wasm.getFunction(call->target)->getParams(); |
| 2531 | + std::vector<Type> sent; |
| 2532 | + for (Index i = 0; i < call->operands.size(); i++) { |
| 2533 | + auto type = call->operands[i]->type; |
| 2534 | + if (type == Type::unreachable) { |
| 2535 | + // Nothing sent here. What we refine to must still validate, even |
| 2536 | + // though this call is unreachable. Using the non-nullable bottom type |
| 2537 | + // is valid, and has the fewest restrictions. |
| 2538 | + type = declaredParams[i]; |
| 2539 | + if (type.isRef()) { |
| 2540 | + type = Type(type.getHeapType().getBottom(), NonNullable); |
| 2541 | + } |
| 2542 | + } |
| 2543 | + sent.push_back(type); |
| 2544 | + } |
| 2545 | + paramLUBs[call->target].note(Type(sent)); |
| 2546 | + } |
| 2547 | + } |
| 2548 | + |
| 2549 | + for (auto& func : wasm.functions) { |
| 2550 | + if (!func->imported()) { |
| 2551 | + continue; |
| 2552 | + } |
| 2553 | + // TODO: In the referenced case, we could consider using import/export |
| 2554 | + // wrappers and refining just there. |
| 2555 | + if (map[func->name].reffed) { |
| 2556 | + continue; |
| 2557 | + } |
| 2558 | + // Do not alter the signature of configureAll or other VM builtins. Changing |
| 2559 | + // these to something the VM does not expect will just cause it to |
| 2560 | + // immediately reject the module by trapping. |
| 2561 | + if (func->module.startsWith("wasm:")) { |
| 2562 | + continue; |
| 2563 | + } |
| 2564 | + |
| 2565 | + auto oldParams = func->getParams(); |
| 2566 | + if (oldParams == Type::none) { |
| 2567 | + continue; |
| 2568 | + } |
| 2569 | + |
| 2570 | + // Refine. |
| 2571 | + auto lub = paramLUBs[func->name]; |
| 2572 | + auto lubType = lub.getLUB(); |
| 2573 | + // Either the LUB has the right data shape, or nothing was noted (this is |
| 2574 | + // unreachable). |
| 2575 | + assert(oldParams.size() == lubType.size() || !lub.noted()); |
| 2576 | + std::vector<Type> newParams; |
| 2577 | + for (Index i = 0; i < lubType.size(); i++) { |
| 2578 | + newParams.push_back(maybeRefineIndex(oldParams, lub, i)); |
| 2579 | + } |
| 2580 | + func->setParams(Type(newParams)); |
| 2581 | + } |
| 2582 | + |
| 2583 | + // Second, refine results sent from exports. |
| 2584 | + for (auto& exp : wasm.exports) { |
| 2585 | + if (exp->kind != ExternalKind::Function) { |
| 2586 | + continue; |
| 2587 | + } |
| 2588 | + auto name = *exp->getInternalName(); |
| 2589 | + if (map[name].reffed) { |
| 2590 | + continue; |
| 2591 | + } |
| 2592 | + |
| 2593 | + auto* func = wasm.getFunction(name); |
| 2594 | + auto oldResults = func->getResults(); |
| 2595 | + if (oldResults == Type::none) { |
| 2596 | + continue; |
| 2597 | + } |
| 2598 | + |
| 2599 | + // Refine. |
| 2600 | + auto lub = LUB::getResultsLUB(func, wasm); |
| 2601 | + auto lubType = lub.getLUB(); |
| 2602 | + assert(oldResults.size() == lubType.size() || !lub.noted()); |
| 2603 | + std::vector<Type> newResults; |
| 2604 | + for (Index i = 0; i < lubType.size(); i++) { |
| 2605 | + newResults.push_back(maybeRefineIndex(oldResults, lub, i)); |
| 2606 | + } |
| 2607 | + func->setResults(Type(newResults)); |
| 2608 | + } |
| 2609 | + |
| 2610 | + // Update return types from calls to exports whose results we refined. |
| 2611 | + struct CallUpdater : public WalkerPass<PostWalker<CallUpdater>> { |
| 2612 | + bool isFunctionParallel() override { return true; } |
| 2613 | + |
| 2614 | + std::unique_ptr<Pass> create() override { |
| 2615 | + return std::make_unique<CallUpdater>(); |
| 2616 | + } |
| 2617 | + |
| 2618 | + void visitCall(Call* curr) { |
| 2619 | + if (curr->type != Type::unreachable) { |
| 2620 | + curr->type = getModule()->getFunction(curr->target)->getResults(); |
| 2621 | + } |
| 2622 | + } |
| 2623 | + } updater; |
| 2624 | + updater.setModule(&wasm); |
| 2625 | + updater.run(&runner, &wasm); |
| 2626 | + |
| 2627 | + // Propagate after our changes. |
| 2628 | + ReFinalize().run(&runner, &wasm); |
| 2629 | +} |
| 2630 | + |
2392 | 2631 | void TranslateToFuzzReader::dropToLog(Function* func) { |
2393 | 2632 | // Don't always do this. |
2394 | 2633 | if (oneIn(2)) { |
|
0 commit comments