Skip to content

Commit 024be09

Browse files
avrabeclaude
andauthored
fix(optimizer): optimized path block/br_if branch target lands mid-instruction (#483, #242) (#497)
On the optimized (non-relocatable) ARM path, a forward `block` + `br_if` lowered to a conditional branch whose target landed in the MIDDLE of an instruction and did not skip the block, silently miscompiling any such function (the `--relocatable` direct path was unaffected). Two root causes: 1. **Label id mismatch.** A `br`/`br_if` exiting a forward `block` targets the id pushed when the `Block` opened (the `Block` arm emits only a `Nop`). But the matching `End` emitted its label with the End's OWN id, so the branch resolved against an id no label carried and was left as the unpatched offset-0 placeholder. Fix: `End` now labels with the id of the block it closes (loop ends and the function-end keep their own id — a loop's target label is already at its start, and the function end is just the structural trailing label). 2. **Byte-size estimate gap.** The `byte_offsets` table that drives branch displacement sized `Strh`/`Strb`/`Ldrh`/`Ldrsh`/`Ldrb`/`Ldrsb` as the default 2 bytes, but the optimized path always addresses memory off a high base register (`ip`/r11), so the encoder emits the 4-byte `.w` form. Once the branch resolved (fix 1), the i32.store16 in the repro drifted the target by 2. Fix: size these as 4 (the optimized path never emits the 16-bit T1 form). Validation: new CI-gated unicorn differential (`block_brif_483_differential.py`) runs the forward-block repro AND a nested-block + `br` case through the optimized path and diffs linear memory vs wasmtime across both branch directions; fails without the fix (init_branch(0) wrongly executes the guarded stores). Frozen byte gate stays bit-identical (direct path untouched); full workspace suite green. The size-estimator also lacks correct arms for other variable-width ops (Cmp/Cmn/ Adds/Subs/Popcnt) that could drift a branch spanning them — filed separately as a table-completeness follow-up; not triggered by this repro. Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 887133e commit 024be09

4 files changed

Lines changed: 211 additions & 4 deletions

File tree

.github/workflows/ci.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,3 +380,41 @@ jobs:
380380
env:
381381
SYNTH: ./target/debug/synth
382382
run: python scripts/repro/callee_saved_490_differential.py
383+
384+
block-brif-483-oracle:
385+
name: optimized-path block/br_if lowering oracle
386+
# VCR-ORACLE-001 (#242, #483): EXECUTE optimized-path functions with forward
387+
# `block`/`br_if` and nested `block`+`br` under unicorn (UC_ARCH_ARM / Thumb)
388+
# and diff the resulting linear MEMORY vs wasmtime across both branch
389+
# directions. Guards the #483 fix (End labels with the closed block's id, and
390+
# the half/byte memory-op byte-size estimate) — before it, a forward br_if
391+
# resolved against an id no label held and landed mid-instruction, silently
392+
# miscompiling any non-relocatable function with that control flow. The frozen
393+
# byte gate only covers the `--relocatable` direct path, so nothing else
394+
# exercises the optimized path's branch resolution. Isolated job: emulation
395+
# deps pip-installed here ONLY.
396+
runs-on: ubuntu-latest
397+
steps:
398+
- uses: actions/checkout@v7
399+
- uses: dtolnay/rust-toolchain@stable
400+
- name: Cache Cargo dependencies
401+
uses: actions/cache@v5
402+
with:
403+
path: |
404+
~/.cargo/registry
405+
~/.cargo/git
406+
target/
407+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
408+
restore-keys: |
409+
${{ runner.os }}-cargo-
410+
- name: Build synth
411+
run: cargo build -p synth-cli
412+
- uses: actions/setup-python@v5
413+
with:
414+
python-version: "3.x"
415+
- name: Install emulation deps
416+
run: pip install wasmtime unicorn pyelftools
417+
- name: Run block/br_if lowering oracle
418+
env:
419+
SYNTH: ./target/debug/synth
420+
run: python scripts/repro/block_brif_483_differential.py

crates/synth-synthesis/src/optimizer_bridge.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,10 +1793,22 @@ impl OptimizerBridge {
17931793
}
17941794

17951795
// End: marks the end of a block/loop/function. No stack effect.
1796-
WasmOp::End => {
1797-
block_stack.pop();
1798-
Opcode::Label { id: inst_id }
1799-
}
1796+
//
1797+
// #483: a forward `block` is the branch target of any `br`/`br_if`
1798+
// that exits it — and those branches target the id pushed when the
1799+
// `Block` was opened (block_stack stores the Block's inst_id, and
1800+
// the `Block` arm emits only a `Nop`, no label). So the End that
1801+
// closes a block must emit its label with the BLOCK's id, not its
1802+
// own, or the branch resolves against an id no label carries and is
1803+
// left as the unpatched placeholder (offset 0 → a mid-instruction
1804+
// landing — the #483 miscompile). Loop ends and the function-end
1805+
// keep their own id: a loop's target label is already at its start
1806+
// (backward branch), and the function end is just the structural
1807+
// trailing label.
1808+
WasmOp::End => match block_stack.pop() {
1809+
Some((0, block_id)) => Opcode::Label { id: block_id },
1810+
_ => Opcode::Label { id: inst_id },
1811+
},
18001812

18011813
// Br: unconditional branch to label. No stack effect at IR
18021814
// level (the wasm validator handles unreachable stack after Br).
@@ -5342,6 +5354,20 @@ impl OptimizerBridge {
53425354
4
53435355
}
53445356
}
5357+
// #483: half/byte loads and stores. The optimized path always
5358+
// materializes a memory access against a HIGH base register
5359+
// (`ip`/r12 for a const address, r11 for the base-CSE linmem
5360+
// anchor), so the encoder always emits the 32-bit `.w` form — the
5361+
// 16-bit Thumb T1 encoding (low base + small aligned offset) is
5362+
// never produced here. Sizing these as the default 2 (the bug)
5363+
// drifts `byte_offsets` and miscompiles any branch that spans the
5364+
// access (the i32.store16 in the #483 `block`/`br_if` repro).
5365+
ArmOp::Strh { .. }
5366+
| ArmOp::Strb { .. }
5367+
| ArmOp::Ldrh { .. }
5368+
| ArmOp::Ldrsh { .. }
5369+
| ArmOp::Ldrb { .. }
5370+
| ArmOp::Ldrsb { .. } => 4,
53455371
// BL is always 32-bit
53465372
ArmOp::Bl { .. } => 4,
53475373
// MOV with high register (R8-R15) or large immediate needs MOVW (4 bytes)

scripts/repro/block_brif_483.wat

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
(module
2+
(memory 1)
3+
(export "memory" (memory 0))
4+
5+
;; #483 canonical repro: a forward `block` + `br_if` exit. The br_if target
6+
;; (block end) must skip the guarded stores when taken. i32.store16 inside the
7+
;; block is the size-estimator trigger (Strh encodes 4 bytes, was sized 2).
8+
(func (export "init_branch") (param $sel i32)
9+
(i32.store (i32.const 0) (i32.const 11))
10+
(i32.store (i32.const 4) (i32.const 22))
11+
(block $skip
12+
(br_if $skip (i32.eqz (local.get $sel)))
13+
(i32.store (i32.const 8) (i32.const 33))
14+
(i32.store16 (i32.const 12) (i32.const 44))))
15+
16+
;; Nested blocks + an unconditional `br 1` exiting the OUTER block from inside
17+
;; the inner one — exercises the `Br` path and label resolution across nested
18+
;; Ends (each End must carry the id of the block it closes). Kept light (single
19+
;; guarded word store) to isolate the branch resolution from spill/frame paths.
20+
(func (export "nested") (param $sel i32)
21+
(block $outer
22+
(block $inner
23+
(br_if $inner (local.get $sel)) ;; sel!=0 → skip to $inner end
24+
(br $outer)) ;; sel==0 → exit $outer entirely
25+
(i32.store (i32.const 24) (i32.const 77))) ;; reached only when sel!=0
26+
(i32.store (i32.const 32) (i32.const 99))) ;; always reached
27+
)
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#!/usr/bin/env python3
2+
"""#483 (epic #242) — EXECUTION-validate optimized-path block/br_if lowering.
3+
4+
On the OPTIMIZED (non-`--relocatable`) ARM path, a forward `block` + `br_if`
5+
lowered to a conditional branch whose target landed MID-INSTRUCTION: the branch
6+
targeted the id pushed for the `Block` opener, but the only emitted label sat at
7+
the matching `End` carrying the End's *own* id — so the target resolved against
8+
an id no label held and was left as the unpatched offset-0 placeholder. A
9+
companion size-estimator gap (i32.store16 → `Strh`, a 4-byte `.w` encoding, was
10+
sized as 2) drifted `byte_offsets` so even a resolved branch missed by 2 bytes.
11+
12+
This harness compiles the fixture via the optimized path, runs each function
13+
under unicorn (UC_ARCH_ARM / Thumb) with the linear memory mapped at the absolute
14+
base the optimized path materializes, and asserts the resulting MEMORY is
15+
bit-identical to wasmtime ground truth across both branch directions (taken /
16+
not-taken). The functions write memory and return nothing, so memory is the
17+
observable. Before the fix, `init_branch(0)` (br_if taken → block skipped) wrote
18+
the guarded fields anyway.
19+
20+
Run (needs wasmtime + unicorn + pyelftools):
21+
SYNTH=./target/debug/synth python scripts/repro/block_brif_483_differential.py
22+
Exits nonzero on any mismatch.
23+
"""
24+
import os
25+
import subprocess
26+
import sys
27+
from pathlib import Path
28+
29+
import wasmtime
30+
from elftools.elf.elffile import ELFFile
31+
from unicorn import UC_ARCH_ARM, UC_MODE_THUMB, Uc
32+
from unicorn.arm_const import UC_ARM_REG_LR, UC_ARM_REG_R0, UC_ARM_REG_SP
33+
34+
WAT = Path(__file__).with_name("block_brif_483.wat")
35+
SYNTH = os.environ.get("SYNTH", "./target/release/synth")
36+
# Absolute linear-memory base the optimized (non-relocatable) path materializes
37+
# for a const address: `movw ip,#0x100; movt ip,#0x2000` → 0x20000100 (see #197).
38+
LINMEM = 0x20000100
39+
MEM_WINDOW = 64 # bytes of memory compared against ground truth
40+
# (function, [sel values exercising both branch directions])
41+
CASES = {"init_branch": [0, 1], "nested": [0, 1]}
42+
43+
44+
def compile_optimized(out):
45+
r = subprocess.run(
46+
[SYNTH, "compile", str(WAT), "-o", out, "-b", "arm",
47+
"--target", "cortex-m4", "--all-exports"],
48+
capture_output=True, text=True, env={"PATH": "/usr/bin:/bin"},
49+
)
50+
if r.returncode != 0:
51+
sys.exit(f"compile failed: {r.stderr}")
52+
53+
54+
def load(elf):
55+
f = ELFFile(open(elf, "rb"))
56+
text = f.get_section_by_name(".text")
57+
data, base = text.data(), text["sh_addr"]
58+
syms = {}
59+
for s in f.iter_sections():
60+
if s.header.sh_type == "SHT_SYMTAB":
61+
for sym in s.iter_symbols():
62+
if sym.name:
63+
syms[sym.name] = sym["st_value"]
64+
return data, base, syms
65+
66+
67+
def wasm_mem(func, sel):
68+
eng = wasmtime.Engine()
69+
mod = wasmtime.Module(eng, WAT.read_bytes())
70+
st = wasmtime.Store(eng)
71+
inst = wasmtime.Instance(st, mod, [])
72+
inst.exports(st)[func](st, sel)
73+
return bytes(inst.exports(st)["memory"].read(st, 0, MEM_WINDOW))
74+
75+
76+
def run_rv(code, base, addr, sel):
77+
mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB)
78+
mu.mem_map(base & ~0xFFFF, 0x20000)
79+
mu.mem_write(base, code)
80+
mu.mem_map(LINMEM & ~0xFFFF, 0x20000) # page-aligned region covering LINMEM
81+
mu.mem_map(0x90000, 0x10000)
82+
ret = 0x90100
83+
mu.mem_write(ret, b"\x00\xbf\x00\xbf")
84+
mu.reg_write(UC_ARM_REG_R0, sel & 0xFFFFFFFF)
85+
mu.reg_write(UC_ARM_REG_SP, 0x98000)
86+
mu.reg_write(UC_ARM_REG_LR, ret | 1)
87+
mu.emu_start(addr | 1, ret, timeout=5_000_000)
88+
return bytes(mu.mem_read(LINMEM, MEM_WINDOW))
89+
90+
91+
def main():
92+
if not os.path.exists(SYNTH):
93+
sys.exit(f"{SYNTH} not found — build synth first")
94+
elf = "/tmp/block_brif_483.elf"
95+
compile_optimized(elf)
96+
code, base, syms = load(elf)
97+
98+
fails = 0
99+
for func, sels in CASES.items():
100+
for sel in sels:
101+
gt = wasm_mem(func, sel)
102+
got = run_rv(code, base, syms[func], sel)
103+
ok = got == gt
104+
if not ok:
105+
fails += 1
106+
print(f"{'OK ' if ok else 'FAIL'} {func}(sel={sel})")
107+
if not ok:
108+
diff = [(i, gt[i], got[i]) for i in range(MEM_WINDOW) if gt[i] != got[i]]
109+
print(f" mem diffs (off, wasmtime, synth): {diff}")
110+
111+
print("ORACLE:", "PASS" if fails == 0 else f"FAIL ({fails})")
112+
sys.exit(1 if fails else 0)
113+
114+
115+
if __name__ == "__main__":
116+
main()

0 commit comments

Comments
 (0)