Skip to content

Commit 1e36622

Browse files
authored
[wasm-reduce] Empty functions with delta debugging (#8640)
Delta debugging is an algorithm for finding the minimal set of items necessary to preserve a condition. It generally works by using increasingly fine partitions of the orignal set of items and alternating trying to keep just one of the partitions to make rapid progress and trying to keep the complement of one of the partitions to make smaller changes that are more likely to work. Add a header containing a templatized delta debugging implementation, then use it in wasm-reduce to preserve the minimal number of function bodies necessary to reproduce the reduction condition. This should allow wasm-reduce to make much faster progress on emptying out functions in the common case and leave it much less work to do afterwards. Using delta debugging for deleting functions and performing other reduction operations is left as future work. Deleting functions in particular is challenging because it can involve reloading the module from the working file, potentially changing function names and invalidating the function names that would be stored in the delta debugging partitions.
1 parent 74ca4eb commit 1e36622

4 files changed

Lines changed: 335 additions & 43 deletions

File tree

src/support/delta_debugging.h

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Copyright 2026 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef wasm_support_delta_debugging_h
18+
#define wasm_support_delta_debugging_h
19+
20+
#include <algorithm>
21+
#include <cassert>
22+
#include <vector>
23+
24+
namespace wasm {
25+
26+
// Use the delta debugging algorithm (Zeller 1999,
27+
// https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of
28+
// items necessary to preserve some property. Returns that minimal set of
29+
// items, preserving their input order. `tryPartition` should have this
30+
// signature:
31+
//
32+
// bool tryPartition(size_t partitionIndex,
33+
// size_t numPartitions,
34+
// const std::vector<T>& partition)
35+
//
36+
// It should return true iff the property is preserved while keeping only
37+
// `partition` items.
38+
template<typename T, typename F>
39+
std::vector<T> deltaDebugging(std::vector<T> items, const F& tryPartition) {
40+
if (items.empty()) {
41+
return items;
42+
}
43+
// First try removing everything.
44+
if (tryPartition(0, 1, {})) {
45+
return {};
46+
}
47+
size_t numPartitions = 2;
48+
while (numPartitions <= items.size()) {
49+
// Partition the items.
50+
std::vector<std::vector<T>> partitions;
51+
size_t size = items.size();
52+
size_t basePartitionSize = size / numPartitions;
53+
size_t rem = size % numPartitions;
54+
size_t idx = 0;
55+
for (size_t i = 0; i < numPartitions; ++i) {
56+
size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0);
57+
if (partitionSize > 0) {
58+
std::vector<T> partition;
59+
partition.reserve(partitionSize);
60+
for (size_t j = 0; j < partitionSize; ++j) {
61+
partition.push_back(items[idx++]);
62+
}
63+
partitions.emplace_back(std::move(partition));
64+
}
65+
}
66+
assert(numPartitions == partitions.size());
67+
68+
bool reduced = false;
69+
70+
// Try keeping only one partition. Try each partition in turn.
71+
for (size_t i = 0; i < numPartitions; ++i) {
72+
if (tryPartition(i, numPartitions, partitions[i])) {
73+
items = std::move(partitions[i]);
74+
numPartitions = 2;
75+
reduced = true;
76+
break;
77+
}
78+
}
79+
if (reduced) {
80+
continue;
81+
}
82+
83+
// Otherwise, try keeping the complement of a partition. Do not do this with
84+
// only two partitions because that would be no different from what we
85+
// already tried.
86+
if (numPartitions > 2) {
87+
for (size_t i = 0; i < numPartitions; ++i) {
88+
std::vector<T> complement;
89+
complement.reserve(items.size() - partitions[i].size());
90+
for (size_t j = 0; j < numPartitions; ++j) {
91+
if (j != i) {
92+
complement.insert(
93+
complement.end(), partitions[j].begin(), partitions[j].end());
94+
}
95+
}
96+
if (tryPartition(i, numPartitions, complement)) {
97+
items = std::move(complement);
98+
numPartitions = std::max(numPartitions - 1, size_t(2));
99+
reduced = true;
100+
break;
101+
}
102+
}
103+
if (reduced) {
104+
continue;
105+
}
106+
}
107+
108+
if (numPartitions == items.size()) {
109+
// Cannot further refine the partitions. We're done.
110+
break;
111+
}
112+
113+
// Otherwise, make the partitions finer grained.
114+
numPartitions = std::min(items.size(), 2 * numPartitions);
115+
}
116+
return items;
117+
}
118+
119+
} // namespace wasm
120+
121+
#endif // wasm_support_delta_debugging_h

src/tools/wasm-reduce/wasm-reduce.cpp

Lines changed: 116 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@
2929

3030
#include "ir/branch-utils.h"
3131
#include "ir/iteration.h"
32-
#include "ir/literal-utils.h"
3332
#include "ir/properties.h"
3433
#include "ir/utils.h"
3534
#include "pass.h"
3635
#include "support/colors.h"
3736
#include "support/command-line.h"
37+
#include "support/delta_debugging.h"
3838
#include "support/file.h"
3939
#include "support/hash.h"
4040
#include "support/path.h"
@@ -894,8 +894,105 @@ struct Reducer
894894
}
895895
}
896896

897-
// Reduces entire functions at a time. Returns whether we did a significant
898-
// amount of reduction that justifies doing even more.
897+
bool isEmptyBody(Expression* body) {
898+
if (body->is<Nop>() || body->is<Unreachable>()) {
899+
return true;
900+
}
901+
if (auto* block = body->dynCast<Block>()) {
902+
return block->list.empty();
903+
}
904+
return false;
905+
}
906+
907+
void reduceFunctionBodies() {
908+
std::cerr << "| try to remove function bodies\n";
909+
// Use function indices to speed up finding the complement of the kept
910+
// partition.
911+
std::vector<Index> nontrivialFuncIndices;
912+
nontrivialFuncIndices.reserve(module->functions.size());
913+
for (Index i = 0; i < module->functions.size(); ++i) {
914+
auto& func = module->functions[i];
915+
// Skip functions that already have trivial bodies.
916+
if (func->imported() || isEmptyBody(func->body)) {
917+
continue;
918+
}
919+
nontrivialFuncIndices.push_back(i);
920+
}
921+
// TODO: Use something other than an exception to implement early return.
922+
struct EarlyReturn {};
923+
try {
924+
deltaDebugging(
925+
nontrivialFuncIndices,
926+
[&](Index partitionIndex,
927+
Index numPartitions,
928+
const std::vector<Index>& partition) {
929+
// Stop early if the partition size is less than the square root of
930+
// the remaining set. We don't want to waste time on very fine-grained
931+
// partitions when we could switch to another reduction strategy
932+
// instead.
933+
if (size_t sqrtRemaining = std::sqrt(nontrivialFuncIndices.size());
934+
partition.size() > 0 && partition.size() < sqrtRemaining) {
935+
throw EarlyReturn{};
936+
}
937+
938+
std::cerr << "| try partition " << partitionIndex + 1 << " / "
939+
<< numPartitions << " (size " << partition.size() << ")\n";
940+
Index removedSize = nontrivialFuncIndices.size() - partition.size();
941+
std::vector<Expression*> oldBodies(removedSize);
942+
943+
// We first need to remove each non-kept function body, and later we
944+
// might need to restore the same function bodies. Abstract the logic
945+
// for iterating over these function bodies. `f` takes a Function* and
946+
// Expression*& for the stashed body.
947+
auto forEachRemovedFuncBody = [&](auto f) {
948+
Index bodyIndex = 0;
949+
Index nontrivialIndex = 0;
950+
Index partitionIndex = 0;
951+
while (nontrivialIndex < nontrivialFuncIndices.size()) {
952+
if (partitionIndex < partition.size() &&
953+
nontrivialFuncIndices[nontrivialIndex] ==
954+
partition[partitionIndex]) {
955+
// Kept, skip it.
956+
nontrivialIndex++;
957+
partitionIndex++;
958+
} else {
959+
// Removed, process it
960+
Index funcIndex = nontrivialFuncIndices[nontrivialIndex++];
961+
f(module->functions[funcIndex].get(), oldBodies[bodyIndex++]);
962+
}
963+
}
964+
assert(bodyIndex == removedSize);
965+
assert(partitionIndex == partition.size());
966+
};
967+
968+
// Stash the bodies.
969+
forEachRemovedFuncBody([&](Function* func, Expression*& oldBody) {
970+
oldBody = func->body;
971+
Builder builder(*module);
972+
if (func->getResults() == Type::none) {
973+
func->body = builder.makeNop();
974+
} else {
975+
func->body = builder.makeUnreachable();
976+
}
977+
});
978+
979+
if (!writeAndTestReduction()) {
980+
// Failure. Restore the bodies.
981+
forEachRemovedFuncBody([](Function* func, Expression*& oldBody) {
982+
func->body = oldBody;
983+
});
984+
return false;
985+
}
986+
987+
// Success!
988+
noteReduction(removedSize);
989+
nontrivialFuncIndices = partition;
990+
return true;
991+
});
992+
} catch (EarlyReturn) {
993+
}
994+
}
995+
899996
bool reduceFunctions() {
900997
// try to remove functions
901998
std::vector<Name> functionNames;
@@ -936,11 +1033,9 @@ struct Reducer
9361033
}
9371034
std::cerr << "| trying at i=" << i << " of size " << names.size()
9381035
<< "\n";
939-
// Try to remove functions and/or empty them. Note that
940-
// tryToRemoveFunctions() will reload the module if it fails, which means
941-
// function names may change - for that reason, run it second.
942-
justReduced = tryToEmptyFunctions(names) || tryToRemoveFunctions(names);
943-
if (justReduced) {
1036+
// Note that tryToRemoveFunctions() will reload the module if it fails,
1037+
// which means function names may change.
1038+
if (tryToRemoveFunctions(names)) {
9441039
noteReduction(names.size());
9451040
// Subtract 1 since the loop increments us anyhow by one: we want to
9461041
// skip over the skipped functions, and not any more.
@@ -967,8 +1062,11 @@ struct Reducer
9671062
assert(curr == module.get());
9681063
curr = nullptr;
9691064

1065+
reduceFunctionBodies();
1066+
9701067
// Reduction of entire functions at a time is very effective, and we do it
9711068
// with exponential growth and backoff, so keep doing it while it works.
1069+
// TODO: Figure out how to use delta debugging for this as well.
9721070
while (reduceFunctions()) {
9731071
}
9741072

@@ -1047,41 +1145,6 @@ struct Reducer
10471145
}
10481146
}
10491147

1050-
// Try to empty out the bodies of some functions.
1051-
bool tryToEmptyFunctions(std::vector<Name> names) {
1052-
std::vector<Expression*> oldBodies;
1053-
size_t actuallyEmptied = 0;
1054-
for (auto name : names) {
1055-
auto* func = module->getFunction(name);
1056-
auto* oldBody = func->body;
1057-
oldBodies.push_back(oldBody);
1058-
// Nothing to do for imported functions (body is nullptr) or for bodies
1059-
// that have already been as reduced as we can make them.
1060-
if (func->imported() || oldBody->is<Unreachable>() ||
1061-
oldBody->is<Nop>()) {
1062-
continue;
1063-
}
1064-
actuallyEmptied++;
1065-
bool useUnreachable = func->getResults() != Type::none;
1066-
if (useUnreachable) {
1067-
func->body = builder->makeUnreachable();
1068-
} else {
1069-
func->body = builder->makeNop();
1070-
}
1071-
}
1072-
if (actuallyEmptied > 0 && writeAndTestReduction()) {
1073-
std::cerr << "| emptied " << actuallyEmptied << " / "
1074-
<< names.size() << " functions\n";
1075-
return true;
1076-
} else {
1077-
// Restore the bodies.
1078-
for (size_t i = 0; i < names.size(); i++) {
1079-
module->getFunction(names[i])->body = oldBodies[i];
1080-
}
1081-
return false;
1082-
}
1083-
}
1084-
10851148
// Try to actually remove functions. If they are somehow referred to, we will
10861149
// get a validation error and undo it.
10871150
bool tryToRemoveFunctions(std::vector<Name> names) {
@@ -1504,10 +1567,20 @@ More documentation can be found at
15041567

15051568
bool stopping = false;
15061569

1570+
bool first = true;
15071571
while (1) {
15081572
Reducer reducer(
15091573
command, test, working, binary, deNan, verbose, debugInfo, options);
15101574

1575+
// For extremely large modules with slow reproduction commands, reducing
1576+
// function bodies first can be more effective than running passes. TODO:
1577+
// clean this up and reconsider the order of reducers.
1578+
if (first) {
1579+
reducer.loadWorking();
1580+
reducer.reduceFunctionBodies();
1581+
first = false;
1582+
}
1583+
15111584
// run binaryen optimization passes to reduce. passes are fast to run
15121585
// and can often reduce large amounts of code efficiently, as opposed
15131586
// to detructive reduction (i.e., that doesn't preserve correctness as

test/gtest/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set(unittest_SOURCES
1010
cast-check.cpp
1111
cfg.cpp
1212
dataflow.cpp
13+
delta_debugging.cpp
1314
dfa_minimization.cpp
1415
disjoint_sets.cpp
1516
graph.cpp

0 commit comments

Comments
 (0)