Skip to content

Commit 35ca7ce

Browse files
authored
[NFC] Refactor delta debugger utility to a struct (#8651)
The delta debugging utility was previously just a free function that took a lambda for testing a partition and saying whether it worked or not. The control flow was entirely in the control of the utility itself, which meant that things like exiting early needed to use exceptions. The user was also not able to e.g. dynamically add new items to the set being reduced. Give users more control and flexibility by refactoring the delta debugging utility into a struct that implements the algorithm as a state machine. The struct provides the current working and test sets, and the user says whether the current test set should be accepted or rejected.
1 parent 5210dee commit 35ca7ce

3 files changed

Lines changed: 326 additions & 196 deletions

File tree

src/support/delta_debugging.h

Lines changed: 125 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -21,100 +21,150 @@
2121
#include <cassert>
2222
#include <vector>
2323

24+
#include "support/index.h"
2425
namespace wasm {
2526

26-
// Use the delta debugging algorithm (Zeller 1999,
27+
// Use the delta debugging algorithm (Zeller 2002,
2728
// https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of
28-
// items necessary to preserve some property. Returns that minimal set of
29-
// items, preserving their input order. `tryPartition` should have this
30-
// signature:
31-
//
32-
// bool tryPartition(size_t partitionIndex,
33-
// size_t numPartitions,
34-
// const std::vector<T>& partition)
35-
//
36-
// It should return true iff the property is preserved while keeping only
37-
// `partition` items.
38-
template<typename T, typename F>
39-
std::vector<T> deltaDebugging(std::vector<T> items, const F& tryPartition) {
40-
if (items.empty()) {
41-
return items;
29+
// items necessary to preserve some property. `working` is the minimal set of
30+
// items found so far and `test` is the smaller set of items that should be
31+
// tested next. After testing, call `accept()`, `reject()`, or `resolve(bool
32+
// accepted)` to update the working and test sets appropriately.
33+
template<typename T> struct DeltaDebugger {
34+
std::vector<T> working;
35+
std::vector<T> test;
36+
37+
private:
38+
Index numPartitions = 1;
39+
Index currentPartition = 0;
40+
bool testingComplements = false;
41+
bool triedEmpty = false;
42+
bool isFinished = false;
43+
std::vector<std::vector<T>> partitions;
44+
45+
public:
46+
DeltaDebugger(std::vector<T> items) : working(std::move(items)) {}
47+
48+
bool finished() const {
49+
return isFinished || (triedEmpty && working.size() <= 1);
4250
}
43-
// First try removing everything.
44-
if (tryPartition(0, 1, {})) {
45-
return {};
51+
Index partitionCount() { return numPartitions; }
52+
Index partitionIndex() { return currentPartition; }
53+
54+
void accept() {
55+
if (finished()) {
56+
return;
57+
}
58+
59+
if (test.empty()) {
60+
triedEmpty = true;
61+
}
62+
63+
working = std::move(test);
64+
65+
// We might be finished now even if we weren't before.
66+
if (finished()) {
67+
return;
68+
}
69+
70+
if (!testingComplements) {
71+
numPartitions = 2;
72+
} else {
73+
numPartitions = std::max(numPartitions - 1, Index(2));
74+
}
75+
testingComplements = false;
76+
currentPartition = 0;
77+
updateTest();
4678
}
47-
size_t numPartitions = 2;
48-
while (numPartitions <= items.size()) {
49-
// Partition the items.
50-
std::vector<std::vector<T>> partitions;
51-
size_t size = items.size();
52-
size_t basePartitionSize = size / numPartitions;
53-
size_t rem = size % numPartitions;
54-
size_t idx = 0;
55-
for (size_t i = 0; i < numPartitions; ++i) {
56-
size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0);
57-
if (partitionSize > 0) {
58-
std::vector<T> partition;
59-
partition.reserve(partitionSize);
60-
for (size_t j = 0; j < partitionSize; ++j) {
61-
partition.push_back(items[idx++]);
79+
80+
void reject() {
81+
if (test.empty()) {
82+
triedEmpty = true;
83+
numPartitions = 2;
84+
updateTest();
85+
return;
86+
}
87+
88+
if (finished()) {
89+
return;
90+
}
91+
92+
++currentPartition;
93+
if (currentPartition >= partitions.size()) {
94+
// No need to test complements if there are only two partitions, since
95+
// that is no different.
96+
if (!testingComplements && numPartitions > 2) {
97+
testingComplements = true;
98+
currentPartition = 0;
99+
} else {
100+
if (numPartitions >= working.size()) {
101+
isFinished = true;
102+
return;
62103
}
63-
partitions.emplace_back(std::move(partition));
104+
// Refine the partitions.
105+
numPartitions = std::min(Index(working.size()), 2 * numPartitions);
106+
testingComplements = false;
107+
currentPartition = 0;
64108
}
65109
}
66-
assert(numPartitions == partitions.size());
110+
updateTest();
111+
}
67112

68-
bool reduced = false;
113+
// Convenience wrapper for when there is already a bool determining whether to
114+
// accept or reject the current test sequence.
115+
void resolve(bool success) {
116+
if (success) {
117+
accept();
118+
} else {
119+
reject();
120+
}
121+
}
69122

70-
// Try keeping only one partition. Try each partition in turn.
71-
for (size_t i = 0; i < numPartitions; ++i) {
72-
if (tryPartition(i, numPartitions, partitions[i])) {
73-
items = std::move(partitions[i]);
74-
numPartitions = 2;
75-
reduced = true;
76-
break;
77-
}
123+
private:
124+
void updateTest() {
125+
if (finished()) {
126+
test.clear();
127+
return;
78128
}
79-
if (reduced) {
80-
continue;
129+
130+
if (currentPartition == 0 && !testingComplements) {
131+
generatePartitions();
81132
}
82133

83-
// Otherwise, try keeping the complement of a partition. Do not do this with
84-
// only two partitions because that would be no different from what we
85-
// already tried.
86-
if (numPartitions > 2) {
87-
for (size_t i = 0; i < numPartitions; ++i) {
88-
std::vector<T> complement;
89-
complement.reserve(items.size() - partitions[i].size());
90-
for (size_t j = 0; j < numPartitions; ++j) {
91-
if (j != i) {
92-
complement.insert(
93-
complement.end(), partitions[j].begin(), partitions[j].end());
94-
}
95-
}
96-
if (tryPartition(i, numPartitions, complement)) {
97-
items = std::move(complement);
98-
numPartitions = std::max(numPartitions - 1, size_t(2));
99-
reduced = true;
100-
break;
134+
if (!testingComplements) {
135+
test = partitions[currentPartition];
136+
} else {
137+
test.clear();
138+
test.reserve(working.size() - partitions[currentPartition].size());
139+
for (size_t i = 0; i < partitions.size(); ++i) {
140+
if (i != currentPartition) {
141+
test.insert(test.end(), partitions[i].begin(), partitions[i].end());
101142
}
102143
}
103-
if (reduced) {
104-
continue;
105-
}
106144
}
145+
}
107146

108-
if (numPartitions == items.size()) {
109-
// Cannot further refine the partitions. We're done.
110-
break;
111-
}
147+
void generatePartitions() {
148+
partitions.clear();
149+
size_t size = working.size();
150+
assert(numPartitions != 0 && numPartitions <= size);
112151

113-
// Otherwise, make the partitions finer grained.
114-
numPartitions = std::min(items.size(), 2 * numPartitions);
152+
size_t basePartitionSize = size / numPartitions;
153+
size_t rem = size % numPartitions;
154+
size_t idx = 0;
155+
for (size_t i = 0; i < numPartitions; ++i) {
156+
size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0);
157+
if (partitionSize > 0) {
158+
std::vector<T> partition;
159+
partition.reserve(partitionSize);
160+
for (size_t j = 0; j < partitionSize; ++j) {
161+
partition.push_back(working[idx++]);
162+
}
163+
partitions.emplace_back(std::move(partition));
164+
}
165+
}
115166
}
116-
return items;
117-
}
167+
};
118168

119169
} // namespace wasm
120170

src/tools/wasm-reduce/wasm-reduce.cpp

Lines changed: 58 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -918,78 +918,67 @@ struct Reducer
918918
}
919919
nontrivialFuncIndices.push_back(i);
920920
}
921-
// TODO: Use something other than an exception to implement early return.
922-
struct EarlyReturn {};
923-
try {
924-
deltaDebugging(
925-
nontrivialFuncIndices,
926-
[&](Index partitionIndex,
927-
Index numPartitions,
928-
const std::vector<Index>& partition) {
929-
// Stop early if the partition size is less than the square root of
930-
// the remaining set. We don't want to waste time on very fine-grained
931-
// partitions when we could switch to another reduction strategy
932-
// instead.
933-
if (size_t sqrtRemaining = std::sqrt(nontrivialFuncIndices.size());
934-
partition.size() > 0 && partition.size() < sqrtRemaining) {
935-
throw EarlyReturn{};
936-
}
921+
DeltaDebugger<Index> dd(std::move(nontrivialFuncIndices));
922+
while (!dd.finished()) {
923+
// Stop early if the partition size is less than the square root of
924+
// the remaining set. We don't want to waste time on very fine-grained
925+
// partitions when we could switch to another reduction strategy
926+
// instead.
927+
if (size_t sqrtRemaining = std::sqrt(dd.working.size());
928+
dd.test.size() > 0 && dd.test.size() < sqrtRemaining) {
929+
break;
930+
}
937931

938-
std::cerr << "| try partition " << partitionIndex + 1 << " / "
939-
<< numPartitions << " (size " << partition.size() << ")\n";
940-
Index removedSize = nontrivialFuncIndices.size() - partition.size();
941-
std::vector<Expression*> oldBodies(removedSize);
942-
943-
// We first need to remove each non-kept function body, and later we
944-
// might need to restore the same function bodies. Abstract the logic
945-
// for iterating over these function bodies. `f` takes a Function* and
946-
// Expression*& for the stashed body.
947-
auto forEachRemovedFuncBody = [&](auto f) {
948-
Index bodyIndex = 0;
949-
Index nontrivialIndex = 0;
950-
Index partitionIndex = 0;
951-
while (nontrivialIndex < nontrivialFuncIndices.size()) {
952-
if (partitionIndex < partition.size() &&
953-
nontrivialFuncIndices[nontrivialIndex] ==
954-
partition[partitionIndex]) {
955-
// Kept, skip it.
956-
nontrivialIndex++;
957-
partitionIndex++;
958-
} else {
959-
// Removed, process it
960-
Index funcIndex = nontrivialFuncIndices[nontrivialIndex++];
961-
f(module->functions[funcIndex].get(), oldBodies[bodyIndex++]);
962-
}
963-
}
964-
assert(bodyIndex == removedSize);
965-
assert(partitionIndex == partition.size());
966-
};
967-
968-
// Stash the bodies.
969-
forEachRemovedFuncBody([&](Function* func, Expression*& oldBody) {
970-
oldBody = func->body;
971-
Builder builder(*module);
972-
if (func->getResults() == Type::none) {
973-
func->body = builder.makeNop();
974-
} else {
975-
func->body = builder.makeUnreachable();
976-
}
977-
});
978-
979-
if (!writeAndTestReduction()) {
980-
// Failure. Restore the bodies.
981-
forEachRemovedFuncBody([](Function* func, Expression*& oldBody) {
982-
func->body = oldBody;
983-
});
984-
return false;
932+
std::cerr << "| try partition " << dd.partitionIndex() + 1 << " / "
933+
<< dd.partitionCount() << " (size " << dd.test.size() << ")\n";
934+
Index removedSize = dd.working.size() - dd.test.size();
935+
std::vector<Expression*> oldBodies(removedSize);
936+
937+
// We first need to remove each non-kept function body, and later we
938+
// might need to restore the same function bodies. Abstract the logic
939+
// for iterating over these function bodies. `f` takes a Function* and
940+
// Expression*& for the stashed body.
941+
auto forEachRemovedFuncBody = [&](auto f) {
942+
Index bodyIndex = 0;
943+
Index workingIndex = 0;
944+
Index testIndex = 0;
945+
while (workingIndex < dd.working.size()) {
946+
if (testIndex < dd.test.size() &&
947+
dd.working[workingIndex] == dd.test[testIndex]) {
948+
// Kept, skip it.
949+
workingIndex++;
950+
testIndex++;
951+
} else {
952+
// Removed, process it
953+
Index funcIndex = dd.working[workingIndex++];
954+
f(module->functions[funcIndex].get(), oldBodies[bodyIndex++]);
985955
}
956+
}
957+
assert(bodyIndex == removedSize);
958+
assert(testIndex == dd.test.size());
959+
};
960+
961+
// Stash the bodies.
962+
forEachRemovedFuncBody([&](Function* func, Expression*& oldBody) {
963+
oldBody = func->body;
964+
Builder builder(*module);
965+
if (func->getResults() == Type::none) {
966+
func->body = builder.makeNop();
967+
} else {
968+
func->body = builder.makeUnreachable();
969+
}
970+
});
986971

987-
// Success!
988-
noteReduction(removedSize);
989-
nontrivialFuncIndices = partition;
990-
return true;
991-
});
992-
} catch (EarlyReturn) {
972+
if (!writeAndTestReduction()) {
973+
// Failure. Restore the bodies.
974+
forEachRemovedFuncBody(
975+
[](Function* func, Expression*& oldBody) { func->body = oldBody; });
976+
dd.reject();
977+
} else {
978+
// Success!
979+
noteReduction(removedSize);
980+
dd.accept();
981+
}
993982
}
994983
}
995984

0 commit comments

Comments
 (0)