Skip to content

Commit 0e1e2f3

Browse files
committed
chore: width clean, fix builds, more comments
builds again
1 parent f654007 commit 0e1e2f3

18 files changed

Lines changed: 278 additions & 110 deletions

cmd/bee/cmd/start.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/ethersphere/bee/v2/pkg/accesscontrol"
2626
"github.com/ethersphere/bee/v2/pkg/bmt"
2727
"github.com/ethersphere/bee/v2/pkg/bmtpool"
28+
"github.com/ethersphere/bee/v2/pkg/keccak"
2829
chaincfg "github.com/ethersphere/bee/v2/pkg/config"
2930
"github.com/ethersphere/bee/v2/pkg/crypto"
3031
"github.com/ethersphere/bee/v2/pkg/keystore"
@@ -271,14 +272,17 @@ func buildBeeNode(ctx context.Context, c *command, cmd *cobra.Command, logger lo
271272

272273
useSIMD := c.config.GetBool(optionUseSIMD)
273274
if useSIMD {
274-
if runtime.GOOS != "linux" {
275-
return nil, errors.New("SIMD hashing can only be enabled on linux hosts")
275+
if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
276+
return nil, fmt.Errorf("SIMD hashing requires linux/amd64 (this build is %s/%s)", runtime.GOOS, runtime.GOARCH)
276277
}
277-
logger.Info("SIMD hashing enabled")
278-
bmt.SIMDOptIn = true
278+
if !keccak.HasSIMD() {
279+
return nil, errors.New("SIMD hashing requires a CPU with AVX2 or AVX-512; this CPU has neither")
280+
}
281+
bmt.SetSIMDOptIn(true)
279282
// Rebuild the global bmtpool instance so the new SIMDOptIn value
280283
// is reflected in the pool created for hot-path BMT hashing.
281284
bmtpool.Rebuild()
285+
logger.Info("SIMD hashing enabled", "batch_width", keccak.BatchWidth(), "avx512", keccak.HasAVX512())
282286
}
283287

284288
b, err := node.NewBee(ctx, c.config.GetString(optionNameP2PAddr), signerConfig.publicKey, signerConfig.signer, networkID, logger, signerConfig.libp2pPrivateKey, signerConfig.pssPrivateKey, signerConfig.session, &node.Options{

pkg/bmt/benchmark_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ func BenchmarkBMT(b *testing.B) {
2727
benchmarkRefHasher(b, size)
2828
})
2929
b.Run(fmt.Sprintf("%v_size_%v", "BMT_Goroutine", size), func(b *testing.B) {
30-
prev := bmt.SIMDOptIn
31-
bmt.SIMDOptIn = false
32-
defer func() { bmt.SIMDOptIn = prev }()
30+
prev := bmt.SIMDOptIn()
31+
bmt.SetSIMDOptIn(false)
32+
defer bmt.SetSIMDOptIn(prev)
3333
benchmarkBMT(b, size)
3434
})
3535
b.Run(fmt.Sprintf("%v_size_%v", "BMT_SIMD", size), func(b *testing.B) {
36-
prev := bmt.SIMDOptIn
37-
bmt.SIMDOptIn = true
38-
defer func() { bmt.SIMDOptIn = prev }()
36+
prev := bmt.SIMDOptIn()
37+
bmt.SetSIMDOptIn(true)
38+
defer bmt.SetSIMDOptIn(prev)
3939
benchmarkBMT(b, size)
4040
})
4141
}

pkg/bmt/bmt.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package bmt
77
import (
88
"encoding/binary"
99
"hash"
10+
"sync/atomic"
1011

1112
"github.com/ethersphere/bee/v2/pkg/swarm"
1213
)
@@ -16,9 +17,18 @@ var (
1617
zerosection = make([]byte, 64)
1718
)
1819

19-
// SIMDOptIn is default false, meaning it is disabled by default unless the user turns it on with the
20-
// necessary CLI flag on the supported systems.
21-
var SIMDOptIn = false
20+
// simdOptIn controls whether NewPool and friends return a SIMD-backed BMT pool.
21+
// Default is false; cmd/bee flips it on startup after parsing --use-simd-hashing.
22+
// Accessed via SIMDOptIn / SetSIMDOptIn so concurrent reads during startup are
23+
// race-free.
24+
var simdOptIn atomic.Bool
25+
26+
// SIMDOptIn reports whether the SIMD hasher has been opted into.
27+
func SIMDOptIn() bool { return simdOptIn.Load() }
28+
29+
// SetSIMDOptIn sets the SIMD opt-in flag. Intended to be called once during
30+
// startup before the first NewPool call (cmd/bee calls it after flag parsing).
31+
func SetSIMDOptIn(b bool) { simdOptIn.Store(b) }
2232

2333
// LengthToSpan creates a binary data span size representation.
2434
// It is required for calculating the BMT hash.

pkg/bmt/bmt_test.go

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -272,39 +272,59 @@ func testHasherCorrectness(h bmt.Hasher, data []byte, n, count int) (err error)
272272
}
273273

274274
// TestGoroutineSIMDParity verifies that the goroutine and SIMD pools produce
275-
// identical root hashes for the same input, across a spread of write lengths.
276-
// The SIMD sub-test is a no-op on platforms where the dispatcher falls back to
277-
// the goroutine pool (non-linux/amd64 or CPU without AVX2).
275+
// identical root hashes for the same input across every segment count in
276+
// testSegmentCounts and a spread of write lengths. Iterating over the full
277+
// segment-count table exercises the degenerate 1-level and shallow-tree branches
278+
// of simdHasher.Hash that a fixed 128-segment test would miss.
279+
//
280+
// On platforms where the dispatcher falls back to the goroutine pool
281+
// (non-linux/amd64 or CPU without AVX2/AVX-512), both pools end up as goroutine
282+
// pools and the test degrades to a goroutine-vs-goroutine comparison.
283+
//
284+
// Sub-tests are intentionally not run in parallel: the SIMDOptIn flip is global
285+
// state and concurrent NewPool calls would see flapping values.
278286
func TestGoroutineSIMDParity(t *testing.T) {
279287
testData := testutil.RandBytesWithSeed(t, 4096, seed)
280-
lengths := []int{1, 31, 32, 33, 64, 128, 500, 1024, 2048, 4095, 4096}
281288

282-
prev := bmt.SIMDOptIn
283-
defer func() { bmt.SIMDOptIn = prev }()
289+
prev := bmt.SIMDOptIn()
290+
defer bmt.SetSIMDOptIn(prev)
284291

285-
bmt.SIMDOptIn = false
286-
gPool := bmt.NewPool(bmt.NewConf(testSegmentCount, 1))
287-
bmt.SIMDOptIn = true
288-
sPool := bmt.NewPool(bmt.NewConf(testSegmentCount, 1))
292+
// Representative write lengths; clamped per segment count below. 0 covers the
293+
// empty-input path, and the neighbourhood of section boundaries (31/32/33,
294+
// 63/64/65) tickles the padding/fall-through in Hash.
295+
lengths := []int{0, 1, 31, 32, 33, 63, 64, 65, 128, 500, 1024, 2048, 4095, 4096}
289296

290-
for _, n := range lengths {
291-
t.Run(fmt.Sprintf("len_%d", n), func(t *testing.T) {
292-
gh := gPool.Get()
293-
gHash, err := syncHash(gh, testData[:n])
294-
gPool.Put(gh)
295-
if err != nil {
296-
t.Fatal(err)
297-
}
297+
for _, count := range testSegmentCounts {
298+
t.Run(fmt.Sprintf("segments_%d", count), func(t *testing.T) {
299+
bmt.SetSIMDOptIn(false)
300+
gPool := bmt.NewPool(bmt.NewConf(count, 1))
301+
bmt.SetSIMDOptIn(true)
302+
sPool := bmt.NewPool(bmt.NewConf(count, 1))
303+
304+
maxLen := count * hashSize
305+
for _, n := range lengths {
306+
if n > maxLen {
307+
continue
308+
}
309+
t.Run(fmt.Sprintf("len_%d", n), func(t *testing.T) {
310+
gh := gPool.Get()
311+
gHash, err := syncHash(gh, testData[:n])
312+
gPool.Put(gh)
313+
if err != nil {
314+
t.Fatal(err)
315+
}
298316

299-
sh := sPool.Get()
300-
sHash, err := syncHash(sh, testData[:n])
301-
sPool.Put(sh)
302-
if err != nil {
303-
t.Fatal(err)
304-
}
317+
sh := sPool.Get()
318+
sHash, err := syncHash(sh, testData[:n])
319+
sPool.Put(sh)
320+
if err != nil {
321+
t.Fatal(err)
322+
}
305323

306-
if !bytes.Equal(gHash, sHash) {
307-
t.Fatalf("goroutine/simd mismatch at len=%d\n goroutine: %x\n simd: %x", n, gHash, sHash)
324+
if !bytes.Equal(gHash, sHash) {
325+
t.Fatalf("goroutine/simd mismatch at count=%d len=%d\n goroutine: %x\n simd: %x", count, n, gHash, sHash)
326+
}
327+
})
308328
}
309329
})
310330
}

pkg/bmt/dispatch_other.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
package bmt
88

99
// NewPool returns a BMT pool. On this platform only the goroutine implementation
10-
// is compiled in, so SIMDOptIn is ignored.
10+
// is compiled in, so SIMDOptIn() is ignored.
1111
func NewPool(c *Conf) Pool {
1212
return newGoroutinePool(c)
1313
}

pkg/bmt/dispatch_simd.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,27 @@ import (
1010
"github.com/ethersphere/bee/v2/pkg/keccak"
1111
)
1212

13-
// NewPool returns a BMT pool. If SIMDOptIn is true and the CPU exposes AVX2 or
14-
// AVX-512, a SIMD-batched pool is returned. Otherwise the goroutine-based pool
15-
// is returned (silent fallback).
13+
// NewPool returns a BMT pool. If SIMDOptIn() is true and the CPU exposes AVX2
14+
// or AVX-512, a SIMD-batched pool is returned. Otherwise the goroutine-based
15+
// pool is returned (silent fallback).
1616
func NewPool(c *Conf) Pool {
17-
if SIMDOptIn && keccak.HasSIMD() {
17+
if SIMDOptIn() && keccak.HasSIMD() {
1818
return newSIMDPool(c)
1919
}
2020
return newGoroutinePool(c)
2121
}
2222

2323
// NewHasher returns a standalone (non-pooled) BMT hasher.
2424
func NewHasher() Hasher {
25-
if SIMDOptIn && keccak.HasSIMD() {
25+
if SIMDOptIn() && keccak.HasSIMD() {
2626
return newSIMDHasher()
2727
}
2828
return newGoroutineHasher()
2929
}
3030

3131
// NewPrefixHasher returns a standalone BMT hasher with the given prefix.
3232
func NewPrefixHasher(prefix []byte) Hasher {
33-
if SIMDOptIn && keccak.HasSIMD() {
33+
if SIMDOptIn() && keccak.HasSIMD() {
3434
return newSIMDPrefixHasher(prefix)
3535
}
3636
return newGoroutinePrefixHasher(prefix)

pkg/bmt/hasher_simd.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,8 @@ func (h *simdHasher) Hash(b []byte) ([]byte, error) {
8989
}
9090
// zero-fill the tail of the buffer: every leaf section must carry
9191
// deterministic bytes, because SIMD batches hash whole sections at a time
92-
// without a "is this section occupied?" check.
93-
for i := h.size; i < h.maxSize; i++ {
94-
h.bmt.buffer[i] = 0
95-
}
92+
// without a "is this section occupied?" check. clear() lowers to memclr.
93+
clear(h.bmt.buffer[h.size:])
9694
// degenerate single-level tree: the whole buffer is already one section,
9795
// so there is nothing for SIMD to batch — just run the scalar hasher.
9896
if len(h.bmt.levels) == 1 {
@@ -111,6 +109,10 @@ func (h *simdHasher) Hash(b []byte) ([]byte, error) {
111109
return nil, err
112110
}
113111
// prepend the span and hash once more to produce the chunk address.
112+
// When a prefix is configured, baseHasher() returns a PrefixHasher whose
113+
// Reset re-absorbs the prefix, so the final digest is
114+
// keccak(prefix || span || rootHash) — the same wrap-per-level rule used
115+
// by hashSIMD at every internal level.
114116
return doHash(h.baseHasher(), h.span, rootHash)
115117
}
116118

@@ -120,7 +122,10 @@ func (h *simdHasher) HashPadded(b []byte) ([]byte, error) {
120122
return h.Hash(b)
121123
}
122124

123-
// Reset prepares the Hasher for reuse.
125+
// Reset prepares the Hasher for reuse. The internal data buffer is not zeroed;
126+
// any stale bytes past h.size are overwritten on the next Write and zero-filled
127+
// on demand by Hash, so post-Reset inspection of the buffer may still see
128+
// previous-chunk contents.
124129
func (h *simdHasher) Reset() {
125130
h.size = 0
126131
copy(h.span, zerospan)

pkg/bmt/pool_simd.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,9 @@ func newSIMDConf(prefix []byte, segmentCount, capacity int) *simdConf {
4949
maxSize: count * segmentSize,
5050
depth: depth,
5151
prefix: prefix,
52+
batchWidth: keccak.BatchWidth(),
5253
}
5354

54-
bw := keccak.BatchWidth()
55-
if bw == 0 {
56-
bw = 8
57-
}
58-
c.batchWidth = bw
59-
6055
zerohashes := make([][]byte, depth+1)
6156
zeros := make([]byte, segmentSize)
6257
zerohashes[0] = zeros

pkg/bmtpool/bmtpool.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616
const Capacity = 32
1717

1818
// instance holds the active bmt.Pool. Using an atomic pointer so Rebuild can
19-
// swap it in after cmd/bee sets bmt.SIMDOptIn during startup, without
19+
// swap it in after cmd/bee calls bmt.SetSIMDOptIn during startup, without
2020
// introducing lock contention on the hot Get/Put path.
2121
var instance atomic.Pointer[bmt.Pool]
2222

@@ -27,15 +27,17 @@ func init() {
2727
// synctest would otherwise trip "receive on synctest channel from outside
2828
// bubble" if the pool were first created inside a bubble.
2929
//
30-
// SIMDOptIn is still false here (flag hasn't been parsed). cmd/bee calls
31-
// Rebuild after flag parsing if the user opted in.
30+
// bmt.SIMDOptIn() is still false here (flag hasn't been parsed). cmd/bee
31+
// calls Rebuild after flag parsing if the user opted in.
3232
p := bmt.NewPool(bmt.NewConf(swarm.BmtBranches, Capacity))
3333
instance.Store(&p)
3434
}
3535

3636
// Rebuild discards the current pool and constructs a new one reading the
37-
// latest bmt.SIMDOptIn value. Intended to be called once during startup after
38-
// CLI flag parsing so that --use-simd-hashing takes effect.
37+
// latest bmt.SIMDOptIn() value. Must be called exactly once during startup
38+
// after CLI flag parsing and before the first external Get, so that
39+
// --use-simd-hashing takes effect and no in-flight hashers are referencing the
40+
// old pool's tree.
3941
func Rebuild() {
4042
p := bmt.NewPool(bmt.NewConf(swarm.BmtBranches, Capacity))
4143
instance.Store(&p)

pkg/keccak/keccak.go

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,29 @@
1-
// Copyright 2024 The Swarm Authors. All rights reserved.
2-
// Use of this source code is governed by a BSD-style
3-
// license that can be found in the LICENSE file.
4-
51
// Package keccak provides legacy Keccak-256 (Ethereum-compatible) hashing
62
// with SIMD acceleration via XKCP.
73
//
8-
// On amd64, the package automatically selects between AVX-512 (8-way parallel)
9-
// and AVX2 (4-way parallel) based on the CPU's capabilities.
4+
// Sum256x4 uses AVX2 (4-way parallel). Sum256x8 uses AVX-512 (8-way parallel).
5+
// The caller is responsible for dispatching to the correct function based on
6+
// CPU capabilities; Sum256x8 will crash on non-AVX-512 hardware.
7+
//
8+
// # Equal-length (or nil) input constraint
9+
//
10+
// All non-nil inputs passed to Sum256x4 / Sum256x8 in the same call MUST have
11+
// the same length. This is an intrinsic limit of the batched XKCP primitive:
12+
// KeccakP1600timesN_PermuteAll_24rounds advances the sponge state of every
13+
// lane in lockstep, so any lane that has already absorbed its final padding
14+
// block gets extra, unwanted permutations every time a longer lane absorbs
15+
// another block — silently corrupting its output.
16+
//
17+
// Nil (or zero-length) lanes are allowed for partial batches: they are
18+
// treated as no-op fillers so the caller can populate N_real < N lanes with
19+
// real data and ignore the remaining N - N_real output digests. The digests
20+
// produced for nil lanes are not meaningful and must not be consumed. Mixing
21+
// distinct non-zero lengths within one call is unsupported and will produce
22+
// wrong digests for the shorter lanes, even when every individual length
23+
// would work on its own in an all-same-length call.
1024
package keccak
1125

12-
import (
13-
"encoding/hex"
14-
15-
"golang.org/x/crypto/sha3"
16-
)
26+
import "encoding/hex"
1727

1828
// Hash256 represents a 32-byte Keccak-256 hash
1929
type Hash256 [32]byte
@@ -32,7 +42,7 @@ func HasAVX512() bool {
3242
// HasSIMD reports whether any SIMD-accelerated Keccak path is available
3343
// (AVX2 or AVX-512).
3444
func HasSIMD() bool {
35-
return hasAVX2
45+
return hasAVX2 || hasAVX512
3646
}
3747

3848
// BatchWidth returns the SIMD batch width: 8 for AVX-512, 4 for AVX2, or 0
@@ -47,22 +57,12 @@ func BatchWidth() int {
4757
return 0
4858
}
4959

50-
// Sum256 computes a single Keccak-256 hash (legacy, Ethereum-compatible).
51-
// Uses the best available implementation.
52-
func Sum256(data []byte) Hash256 {
53-
var out Hash256
54-
h := sha3.NewLegacyKeccak256()
55-
h.Write(data)
56-
copy(out[:], h.Sum(nil))
57-
return out
58-
}
59-
6060
// Sum256x4 computes 4 Keccak-256 hashes in parallel using AVX2.
61-
// Callers must check HasSIMD() first; invoking without AVX2 panics.
61+
//
62+
// All non-nil inputs MUST have the same length; nil (or zero-length) lanes
63+
// are allowed as partial-batch fillers whose output must be ignored. See the
64+
// package doc for the rationale.
6265
func Sum256x4(inputs [4][]byte) [4]Hash256 {
63-
if !hasAVX2 {
64-
panic("keccak: Sum256x4 requires AVX2; call HasSIMD() first")
65-
}
6666
var outputs [4]Hash256
6767
var inputsCopy [4][]byte
6868
copy(inputsCopy[:], inputs[:])
@@ -71,11 +71,12 @@ func Sum256x4(inputs [4][]byte) [4]Hash256 {
7171
}
7272

7373
// Sum256x8 computes 8 Keccak-256 hashes in parallel using AVX-512.
74-
// Callers must check HasAVX512() first; invoking without AVX-512 panics.
74+
// Must only be called on AVX-512-capable hardware.
75+
//
76+
// All non-nil inputs MUST have the same length; nil (or zero-length) lanes
77+
// are allowed as partial-batch fillers whose output must be ignored. See the
78+
// package doc for the rationale.
7579
func Sum256x8(inputs [8][]byte) [8]Hash256 {
76-
if !hasAVX512 {
77-
panic("keccak: Sum256x8 requires AVX-512; call HasAVX512() first")
78-
}
7980
var outputs [8]Hash256
8081
var inputsCopy [8][]byte
8182
copy(inputsCopy[:], inputs[:])

0 commit comments

Comments
 (0)