From ea42aa3bf33404b6d0169e35b70ea6fa3a508a1d Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Mon, 4 May 2026 10:53:41 -0700 Subject: [PATCH 1/3] interp: avoid repeated object clones on partial stores memoryView.store cloned the entire destination object on every partial store. Compiling a program that pulls in golang.org/x/text/collate hit this hard: its generated tables are initialized one element at a time, and each element store copied the whole global, making interp quadratic in the table size. Switch to copy-on-first-write per memory view: clone the object the first time this view writes to it, then mutate that private copy in place on later partial stores. Rollback is unaffected because revert still discards the view's objects wholesale. Two extra safety tweaks fall out of this: - Full overwrites now clone the incoming value, so the stored buffer can never alias state held by the caller. - Partial in-place stores snapshot the source buffer, so a store whose source is a load from the same object (overlapping or not) still sees the pre-store bytes. Add an interp golden test exercising both the overlapping load/store case and a cross-object aliased load/store case. --- interp/interp_test.go | 1 + interp/memory.go | 29 ++++++++++++++++++++++------- interp/testdata/store.ll | 32 ++++++++++++++++++++++++++++++++ interp/testdata/store.out.ll | 11 +++++++++++ 4 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 interp/testdata/store.ll create mode 100644 interp/testdata/store.out.ll diff --git a/interp/interp_test.go b/interp/interp_test.go index f5bdb81882..70fda4bb16 100644 --- a/interp/interp_test.go +++ b/interp/interp_test.go @@ -18,6 +18,7 @@ func TestInterp(t *testing.T) { "copy", "interface", "revert", + "store", "alloc", } { name := name // make local to this closure diff --git a/interp/memory.go b/interp/memory.go index 2812cd01c2..d7fe0d80b3 100644 --- a/interp/memory.go +++ b/interp/memory.go @@ -330,26 +330,41 @@ func (mv *memoryView) store(v value, p pointerValue) bool { if checks && mv.hasExternalLoadOrStore(p) { panic("interp: store to object with external load/store") } - obj := mv.get(p.index()) + index := p.index() + var obj object + writable := false + if mv.objects != nil { + obj, writable = mv.objects[index] + } + if !writable { + obj = mv.get(index) + } if obj.buffer == nil { // External global, return false (for a failure). return false } - if checks && p.offset()+v.len(mv.r) > obj.size { + valueLen := v.len(mv.r) + if checks && p.offset()+valueLen > obj.size { panic("interp: store out of bounds") } - if p.offset() == 0 && v.len(mv.r) == obj.buffer.len(mv.r) { - obj.buffer = v + if p.offset() == 0 && valueLen == obj.buffer.len(mv.r) { + obj.buffer = v.clone() } else { - obj = obj.clone() + if !writable { + obj = obj.clone() + } buffer := obj.buffer.asRawValue(mv.r) obj.buffer = buffer v := v.asRawValue(mv.r) - for i := uint32(0); i < v.len(mv.r); i++ { + if writable { + // A partial load from this object may share the destination buffer. + v.buf = append([]uint64(nil), v.buf...) + } + for i := uint32(0); i < valueLen; i++ { buffer.buf[p.offset()+i] = v.buf[i] } } - mv.put(p.index(), obj) + mv.put(index, obj) return true // success } diff --git a/interp/testdata/store.ll b/interp/testdata/store.ll new file mode 100644 index 0000000000..6d532f77f3 --- /dev/null +++ b/interp/testdata/store.ll @@ -0,0 +1,32 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--linux" + +@overlap.buf = global [4 x i8] c"\01\02\03\04" +@alias.src = global [4 x i8] c"\05\06\07\08" +@alias.dst = global [2 x i8] zeroinitializer + +define void @runtime.initAll() unnamed_addr { +entry: + call void @overlap.init(ptr undef) + call void @alias.init(ptr undef) + ret void +} + +define internal void @overlap.init(ptr %context) unnamed_addr { +entry: + %tail = getelementptr [4 x i8], ptr @overlap.buf, i32 0, i32 3 + store i8 9, ptr %tail + %val = load i16, ptr @overlap.buf + %dst = getelementptr [4 x i8], ptr @overlap.buf, i32 0, i32 1 + store i16 %val, ptr %dst + ret void +} + +define internal void @alias.init(ptr %context) unnamed_addr { +entry: + %src = getelementptr [4 x i8], ptr @alias.src, i32 0, i32 1 + %val = load i16, ptr %src + store i16 %val, ptr @alias.dst + store i8 9, ptr @alias.dst + ret void +} diff --git a/interp/testdata/store.out.ll b/interp/testdata/store.out.ll new file mode 100644 index 0000000000..1de806aa12 --- /dev/null +++ b/interp/testdata/store.out.ll @@ -0,0 +1,11 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--linux" + +@overlap.buf = local_unnamed_addr global [4 x i8] c"\01\01\02\09" +@alias.src = local_unnamed_addr global [4 x i8] c"\05\06\07\08" +@alias.dst = local_unnamed_addr global [2 x i8] c"\09\07" + +define void @runtime.initAll() unnamed_addr { +entry: + ret void +} From 86c183ae5deb67f94972d5aa2c8ca857e6d8e501 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Mon, 11 May 2026 13:40:58 -0700 Subject: [PATCH 2/3] interp: add regression test for partial store aliasing Adds a third init function to the store testdata: an initial partial store puts the buffer into the current memory view, then a partial load is followed by another partial store at the same offset, and finally the loaded value is written to a separate global. The expected output captures the current (incorrect) behavior of the in-place partial store optimization: the loaded value gets corrupted by the subsequent in-place store. The next commit fixes this. --- interp/testdata/store.ll | 17 +++++++++++++++++ interp/testdata/store.out.ll | 2 ++ 2 files changed, 19 insertions(+) diff --git a/interp/testdata/store.ll b/interp/testdata/store.ll index 6d532f77f3..d42010179b 100644 --- a/interp/testdata/store.ll +++ b/interp/testdata/store.ll @@ -4,11 +4,14 @@ target triple = "x86_64--linux" @overlap.buf = global [4 x i8] c"\01\02\03\04" @alias.src = global [4 x i8] c"\05\06\07\08" @alias.dst = global [2 x i8] zeroinitializer +@reload.buf = global [4 x i8] c"\01\02\03\04" +@reload.out = global [2 x i8] zeroinitializer define void @runtime.initAll() unnamed_addr { entry: call void @overlap.init(ptr undef) call void @alias.init(ptr undef) + call void @reload.init(ptr undef) ret void } @@ -30,3 +33,17 @@ entry: store i8 9, ptr @alias.dst ret void } + +define internal void @reload.init(ptr %context) unnamed_addr { +entry: + ; First store makes reload.buf writable in the current memory view. + %tail = getelementptr [4 x i8], ptr @reload.buf, i32 0, i32 3 + store i8 9, ptr %tail + ; Partial load whose result may share the underlying buffer. + %val = load i16, ptr @reload.buf + ; Subsequent in-place partial store; this must not corrupt %val. + store i8 99, ptr @reload.buf + ; Write the originally-loaded value to a separate global. + store i16 %val, ptr @reload.out + ret void +} diff --git a/interp/testdata/store.out.ll b/interp/testdata/store.out.ll index 1de806aa12..6c0f95052d 100644 --- a/interp/testdata/store.out.ll +++ b/interp/testdata/store.out.ll @@ -4,6 +4,8 @@ target triple = "x86_64--linux" @overlap.buf = local_unnamed_addr global [4 x i8] c"\01\01\02\09" @alias.src = local_unnamed_addr global [4 x i8] c"\05\06\07\08" @alias.dst = local_unnamed_addr global [2 x i8] c"\09\07" +@reload.buf = local_unnamed_addr global [4 x i8] c"c\02\03\09" +@reload.out = local_unnamed_addr global [2 x i8] c"c\02" define void @runtime.initAll() unnamed_addr { entry: From f5a2ef518ecfa1dd6b572c60679de989b590bc29 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Mon, 11 May 2026 13:42:53 -0700 Subject: [PATCH 3/3] interp: fix partial store aliasing with previously loaded values The optimization to skip cloning the destination object on partial stores when the buffer is already owned by the current memory view mutated obj.buffer.buf in place. The previous v.buf clone only handled the case where the store value itself aliased that buffer; it did not cover the more common case where an earlier partial load had returned a slice into the same buffer. The in-place store would then corrupt the previously loaded value, breaking code such as cloudflare/bn256 where the gfP arithmetic loads, computes, and stores within the same global during package initialization. Fix this in load instead: when the source object is owned by the current memory view, copy the loaded slice so the returned value is independent of the live buffer. The v.buf clone in store is no longer needed and is removed. Updates the regression test added in the previous commit to reflect the corrected behavior. --- interp/memory.go | 14 +++++++------- interp/testdata/store.out.ll | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/interp/memory.go b/interp/memory.go index d7fe0d80b3..11e9a92d16 100644 --- a/interp/memory.go +++ b/interp/memory.go @@ -316,10 +316,14 @@ func (mv *memoryView) load(p pointerValue, size uint32) value { panic("interp: load out of bounds") } v := obj.buffer.asRawValue(mv.r) - loadedValue := rawValue{ - buf: v.buf[p.offset() : p.offset()+size], + loadedBuf := v.buf[p.offset() : p.offset()+size] + if _, writable := mv.objects[p.index()]; writable { + // This object's buffer is owned by this view, which means a later + // store may mutate it in place (see store below). Copy the loaded + // slice so the returned value is not aliased with the live buffer. + loadedBuf = append([]uint64(nil), loadedBuf...) } - return loadedValue + return rawValue{buf: loadedBuf} } // Store to the value behind the given pointer. This overwrites the value in the @@ -356,10 +360,6 @@ func (mv *memoryView) store(v value, p pointerValue) bool { buffer := obj.buffer.asRawValue(mv.r) obj.buffer = buffer v := v.asRawValue(mv.r) - if writable { - // A partial load from this object may share the destination buffer. - v.buf = append([]uint64(nil), v.buf...) - } for i := uint32(0); i < valueLen; i++ { buffer.buf[p.offset()+i] = v.buf[i] } diff --git a/interp/testdata/store.out.ll b/interp/testdata/store.out.ll index 6c0f95052d..d86e5b0ccb 100644 --- a/interp/testdata/store.out.ll +++ b/interp/testdata/store.out.ll @@ -5,7 +5,7 @@ target triple = "x86_64--linux" @alias.src = local_unnamed_addr global [4 x i8] c"\05\06\07\08" @alias.dst = local_unnamed_addr global [2 x i8] c"\09\07" @reload.buf = local_unnamed_addr global [4 x i8] c"c\02\03\09" -@reload.out = local_unnamed_addr global [2 x i8] c"c\02" +@reload.out = local_unnamed_addr global [2 x i8] c"\01\02" define void @runtime.initAll() unnamed_addr { entry: