Skip to content

Commit e251faf

Browse files
committed
Add compression-oriented function reordering pass
Implement the --reorder-functions-by-similarity optimization pass in wasm-opt. Gzip and Brotli compression algorithms rely on finding repetitive byte patterns inside a sliding window (e.g., 32KB for Gzip). If structurally similar functions are placed far apart in the Wasm binary, the compressor cannot detect matches across them. While the existing --reorder-functions pass sorts functions strictly by call frequency to shrink LEB128 indexes, it scatters mutually compressible functions and ultimately increases gzipped delivery sizes. This new pass traverses defined function bodies in post-order and extracts a similarity sorting key based on signature type IDs, local variables types, and structural opcode sequences. By sorting defined functions lexicographically by this key, structurally similar functions are physically grouped together in the output binary, providing adjacent compressible bytes. Empirical benchmarks on real-world Flutter and Poppler Wasm examples show a significant improvement, saving up to 2.13% and .98% in compressed delivery size compared to the baseline (no reordering).
1 parent d9fd5da commit e251faf

8 files changed

Lines changed: 283 additions & 0 deletions

File tree

src/passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ set(passes_SOURCES
115115
RemoveUnusedModuleElements.cpp
116116
RemoveUnusedTypes.cpp
117117
ReorderFunctions.cpp
118+
ReorderFunctionsBySimilarity.cpp
118119
ReorderGlobals.cpp
119120
ReorderLocals.cpp
120121
ReorderTypes.cpp
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
* Copyright 2026 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
//
18+
// Sorts functions by structural similarity. This groups mutually-compressible
19+
// instruction sequences together, maximizing subsequent compression ratio
20+
// (e.g., Gzip/Brotli).
21+
//
22+
23+
#include <algorithm>
24+
#include <memory>
25+
#include <vector>
26+
27+
#include "ir/utils.h"
28+
#include "pass.h"
29+
#include "support/threads.h"
30+
#include "wasm.h"
31+
32+
namespace wasm {
33+
34+
// Post-order traversal visitor to extract instruction sequence
35+
struct OpcodeSequenceBuilder
36+
: public PostWalker<OpcodeSequenceBuilder,
37+
UnifiedExpressionVisitor<OpcodeSequenceBuilder>> {
38+
std::vector<uint32_t> sequence;
39+
const size_t max_len = 512;
40+
41+
void visitExpression(Expression* curr) {
42+
if (sequence.size() >= max_len) {
43+
return;
44+
}
45+
// Append the core expression ID
46+
sequence.push_back(curr->_id);
47+
48+
// Capture important immediate type/operator information
49+
// TODO: There's probably more data that would be useful to capture.
50+
if (auto* unary = curr->dynCast<Unary>()) {
51+
sequence.push_back(unary->op);
52+
} else if (auto* binary = curr->dynCast<Binary>()) {
53+
sequence.push_back(binary->op);
54+
} else if (auto* load = curr->dynCast<Load>()) {
55+
sequence.push_back(load->bytes);
56+
sequence.push_back(load->offset);
57+
} else if (auto* store = curr->dynCast<Store>()) {
58+
sequence.push_back(store->bytes);
59+
sequence.push_back(store->offset);
60+
} else if (auto* localGet = curr->dynCast<LocalGet>()) {
61+
sequence.push_back(localGet->type.getID());
62+
} else if (auto* localSet = curr->dynCast<LocalSet>()) {
63+
sequence.push_back(localSet->type.getID());
64+
} else if (auto* const_ = curr->dynCast<Const>()) {
65+
sequence.push_back(const_->type.getID());
66+
}
67+
}
68+
};
69+
70+
struct ReorderFunctionsBySimilarity : public Pass {
71+
bool requiresNonNullableLocalFixups() override { return false; }
72+
73+
void run(Module* module) override {
74+
// If the number of defined functions is small, similarity-based reordering
75+
// does not help and can regress size due to breaking natural call
76+
// proximity.
77+
size_t numDefined = 0;
78+
for (const auto& func : module->functions) {
79+
if (!func->imported()) {
80+
numDefined++;
81+
}
82+
}
83+
size_t minFunctions = 150;
84+
auto arg = getArgumentOrDefault("reorder-functions-by-similarity", "");
85+
if (!arg.empty()) {
86+
minFunctions = std::stoul(arg);
87+
}
88+
if (numDefined < minFunctions) {
89+
return;
90+
}
91+
92+
// 1. Separate imported and defined functions
93+
std::vector<std::unique_ptr<Function>> importedFuncs;
94+
std::vector<std::unique_ptr<Function>> definedFuncs;
95+
96+
for (auto& func : module->functions) {
97+
if (func->imported()) {
98+
importedFuncs.push_back(std::move(func));
99+
} else {
100+
definedFuncs.push_back(std::move(func));
101+
}
102+
}
103+
104+
// 2. Build keys for defined functions in parallel
105+
struct FunctionSortKey {
106+
std::unique_ptr<Function> func;
107+
std::string typeStr;
108+
std::vector<std::string> varsStrs;
109+
std::vector<uint32_t> opcodeSequence;
110+
size_t originalIndex;
111+
112+
bool operator<(const FunctionSortKey& other) const {
113+
if (typeStr != other.typeStr) {
114+
return typeStr < other.typeStr;
115+
}
116+
if (varsStrs != other.varsStrs) {
117+
return varsStrs < other.varsStrs;
118+
}
119+
if (opcodeSequence != other.opcodeSequence) {
120+
return opcodeSequence < other.opcodeSequence;
121+
}
122+
return originalIndex < other.originalIndex;
123+
}
124+
};
125+
126+
size_t numThreads = ThreadPool::get()->size();
127+
std::vector<std::function<ThreadWorkState()>> doWorkers;
128+
std::atomic<size_t> nextFunction(0);
129+
size_t numFunctions = definedFuncs.size();
130+
131+
std::vector<FunctionSortKey> keys(numFunctions);
132+
133+
for (size_t i = 0; i < numThreads; i++) {
134+
doWorkers.push_back([&]() {
135+
while (true) {
136+
auto index = nextFunction.fetch_add(1);
137+
if (index >= numFunctions) {
138+
return ThreadWorkState::Finished;
139+
}
140+
auto& func = definedFuncs[index];
141+
142+
FunctionSortKey key;
143+
key.typeStr = func->type.toString();
144+
145+
key.varsStrs.reserve(func->vars.size());
146+
for (auto var : func->vars) {
147+
key.varsStrs.push_back(var.toString());
148+
}
149+
150+
OpcodeSequenceBuilder builder;
151+
builder.walk(func->body);
152+
key.opcodeSequence = std::move(builder.sequence);
153+
154+
key.originalIndex = index;
155+
key.func = std::move(func);
156+
keys[index] = std::move(key);
157+
}
158+
});
159+
}
160+
ThreadPool::get()->work(doWorkers);
161+
162+
// 3. Sort defined functions by the similarity heuristic
163+
std::sort(keys.begin(), keys.end());
164+
165+
// 4. Re-assemble module->functions vector
166+
module->functions.clear();
167+
module->functions.reserve(importedFuncs.size() + keys.size());
168+
169+
for (auto& func : importedFuncs) {
170+
module->functions.push_back(std::move(func));
171+
}
172+
for (auto& key : keys) {
173+
module->functions.push_back(std::move(key.func));
174+
}
175+
}
176+
};
177+
178+
Pass* createReorderFunctionsBySimilarityPass() {
179+
return new ReorderFunctionsBySimilarity();
180+
}
181+
182+
} // namespace wasm

src/passes/pass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,9 @@ void PassRegistry::registerPasses() {
442442
registerPass("reorder-functions-by-name",
443443
"sorts functions by name (useful for debugging)",
444444
createReorderFunctionsByNamePass);
445+
registerPass("reorder-functions-by-similarity",
446+
"sorts functions by similarity to improve compression",
447+
createReorderFunctionsBySimilarityPass);
445448
registerPass("reorder-functions",
446449
"sorts functions by access frequency",
447450
createReorderFunctionsPass);

src/passes/passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ Pass* createRemoveUnusedNonFunctionModuleElementsPass();
144144
Pass* createRemoveUnusedNamesPass();
145145
Pass* createRemoveUnusedTypesPass();
146146
Pass* createReorderFunctionsByNamePass();
147+
Pass* createReorderFunctionsBySimilarityPass();
147148
Pass* createReorderFunctionsPass();
148149
Pass* createReorderGlobalsPass();
149150
Pass* createReorderGlobalsAlwaysPass();

test/lit/help/wasm-metadce.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@
418418
;; CHECK-NEXT: --reorder-functions-by-name sorts functions by name (useful
419419
;; CHECK-NEXT: for debugging)
420420
;; CHECK-NEXT:
421+
;; CHECK-NEXT: --reorder-functions-by-similarity sorts functions by similarity to
422+
;; CHECK-NEXT: improve compression
423+
;; CHECK-NEXT:
421424
;; CHECK-NEXT: --reorder-globals sorts globals by access
422425
;; CHECK-NEXT: frequency
423426
;; CHECK-NEXT:

test/lit/help/wasm-opt.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,9 @@
454454
;; CHECK-NEXT: --reorder-functions-by-name sorts functions by name (useful
455455
;; CHECK-NEXT: for debugging)
456456
;; CHECK-NEXT:
457+
;; CHECK-NEXT: --reorder-functions-by-similarity sorts functions by similarity to
458+
;; CHECK-NEXT: improve compression
459+
;; CHECK-NEXT:
457460
;; CHECK-NEXT: --reorder-globals sorts globals by access
458461
;; CHECK-NEXT: frequency
459462
;; CHECK-NEXT:

test/lit/help/wasm2js.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,9 @@
382382
;; CHECK-NEXT: --reorder-functions-by-name sorts functions by name (useful
383383
;; CHECK-NEXT: for debugging)
384384
;; CHECK-NEXT:
385+
;; CHECK-NEXT: --reorder-functions-by-similarity sorts functions by similarity to
386+
;; CHECK-NEXT: improve compression
387+
;; CHECK-NEXT:
385388
;; CHECK-NEXT: --reorder-globals sorts globals by access
386389
;; CHECK-NEXT: frequency
387390
;; CHECK-NEXT:
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
;; `reorder-functions-by-similarity=0` disables the size threshold, forcing the compiler to reorder functions.
2+
;; RUN: foreach %s %t wasm-opt -all --reorder-functions-by-similarity=0 -S -o - | filecheck %s
3+
4+
(module
5+
;; CHECK: (type $0 (func (result i32)))
6+
;; CHECK-NEXT: (type $1 (func (param i32) (result i32)))
7+
8+
;; CHECK: (func $sig_b (type $1) (param $0 i32) (result i32)
9+
;; CHECK-NEXT: (i32.const 100)
10+
;; CHECK-NEXT: )
11+
12+
;; CHECK: (func $sig_c (type $1) (param $0 i32) (result i32)
13+
;; CHECK-NEXT: (i32.const 200)
14+
;; CHECK-NEXT: )
15+
16+
;; CHECK: (func $body_add_2 (type $0) (result i32)
17+
;; CHECK-NEXT: (i32.add
18+
;; CHECK-NEXT: (i32.const 10)
19+
;; CHECK-NEXT: (i32.const 20)
20+
;; CHECK-NEXT: )
21+
;; CHECK-NEXT: )
22+
23+
;; CHECK: (func $body_add_1 (type $0) (result i32)
24+
;; CHECK-NEXT: (i32.add
25+
;; CHECK-NEXT: (i32.const 1)
26+
;; CHECK-NEXT: (i32.const 2)
27+
;; CHECK-NEXT: )
28+
;; CHECK-NEXT: )
29+
30+
;; CHECK: (func $body_sub (type $0) (result i32)
31+
;; CHECK-NEXT: (i32.sub
32+
;; CHECK-NEXT: (i32.const 1)
33+
;; CHECK-NEXT: (i32.const 2)
34+
;; CHECK-NEXT: )
35+
;; CHECK-NEXT: )
36+
37+
;; CHECK: (func $locals_a (type $0) (result i32)
38+
;; CHECK-NEXT: (local $0 i32)
39+
;; CHECK-NEXT: (local $1 f64)
40+
;; CHECK-NEXT: (i32.const 5)
41+
;; CHECK-NEXT: )
42+
43+
;; CHECK: (func $locals_b (type $0) (result i32)
44+
;; CHECK-NEXT: (local $0 i32)
45+
;; CHECK-NEXT: (local $1 f64)
46+
;; CHECK-NEXT: (i32.const 10)
47+
;; CHECK-NEXT: )
48+
49+
;; Functions in mixed order:
50+
51+
;; Signature A
52+
(func $body_sub (result i32)
53+
(i32.sub (i32.const 1) (i32.const 2))
54+
)
55+
56+
;; Signature B: (param i32) (result i32)
57+
(func $sig_b (param i32) (result i32)
58+
(i32.const 100)
59+
)
60+
61+
;; Signature A, same body shape as $body_add_1
62+
(func $body_add_2 (result i32)
63+
(i32.add (i32.const 10) (i32.const 20))
64+
)
65+
66+
;; Signature A, has local variables (i32 f64)
67+
(func $locals_a (result i32)
68+
(local i32 f64)
69+
(i32.const 5)
70+
)
71+
72+
;; Signature A, same body shape as $body_add_2
73+
(func $body_add_1 (result i32)
74+
(i32.add (i32.const 1) (i32.const 2))
75+
)
76+
77+
;; Signature A, has local variables (i32 f64), same as $locals_a
78+
(func $locals_b (result i32)
79+
(local i32 f64)
80+
(i32.const 10)
81+
)
82+
83+
;; Signature B: (param i32) (result i32), same as $sig_b
84+
(func $sig_c (param i32) (result i32)
85+
(i32.const 200)
86+
)
87+
)

0 commit comments

Comments
 (0)