Skip to content

Commit 3be7e2d

Browse files
mem: Add faster tiered buffer pool (#8775)
This change adds a new tiered buffer pool that uses power-of-2 tier sizes. It reduces the lookup time for the relevant sizedBufferPool from $O(\log n)$ to $O(1)$, where n is the number of tiers. This creates constant-time lookups independent of the tier count, allowing users to add more tiers without performance overhead. ## Benchmarks Micro-benchmark that measures only the pool query performance, ignoring the allocation time: ```go func BenchmarkSearch(b *testing.B) { defaultBufferPoolSizes := make([]int, len(defaultBufferPoolSizeExponents)) for i, exp := range defaultBufferPoolSizeExponents { defaultBufferPoolSizes[i] = 1 << exp } b.Run("pool=Tiered", func(b *testing.B) { p := NewTieredBufferPool(defaultBufferPoolSizes...).(*tieredBufferPool) for b.Loop() { for size := range 1 << 19 { // One for get, one for put. _ = p.getPool(size) _ = p.getPool(size) } } }) b.Run("pool=BinaryTiered", func(b *testing.B) { p := NewBinaryTieredBufferPool(defaultBufferPoolSizeExponents...).(*binaryTieredBufferPool) for b.Loop() { for size := range 1 << 19 { _ = p.poolForGet(size) _ = p.poolForPut(size) } } }) } ``` With 5 tiers: ```sh go test -bench=BenchmarkSearch -count=10 -benchmem | benchstat -col '/pool' - goos: linux goarch: amd64 pkg: google.golang.org/grpc/mem cpu: Intel(R) Xeon(R) CPU @ 2.60GHz │ Tiered │ BinaryTiered │ │ sec/op │ sec/op vs base │ Search-48 5.353m ± 2% 2.036m ± 0% -61.97% (p=0.000 n=10) │ Tiered │ BinaryTiered │ │ B/op │ B/op vs base │ Search-48 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal │ Tiered │ BinaryTiered │ │ allocs/op │ allocs/op vs base │ Search-48 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal ``` With 9 tiers: ```sh go test -bench=BenchmarkSearch -count=10 -benchmem | benchstat -col '/pool' - goos: linux goarch: amd64 pkg: google.golang.org/grpc/mem cpu: Intel(R) Xeon(R) CPU @ 2.60GHz │ Tiered │ BinaryTiered │ │ sec/op │ sec/op vs base │ Search-48 5.659m ± 0% 2.035m ± 0% -64.04% (p=0.000 n=10) │ Tiered │ BinaryTiered │ │ B/op │ B/op vs base │ Search-48 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal │ Tiered │ BinaryTiered │ │ allocs/op │ allocs/op vs base │ Search-48 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal ``` RELEASE NOTES: * mem: Add faster tiered buffer pool. Use `NewBinaryTieredBufferPool` to create such pools. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 13e2455 commit 3be7e2d

3 files changed

Lines changed: 319 additions & 11 deletions

File tree

mem/buffer_pool.go

Lines changed: 152 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
package mem
2020

2121
import (
22+
"fmt"
23+
"math/bits"
24+
"slices"
2225
"sort"
2326
"sync"
2427

@@ -38,20 +41,29 @@ type BufferPool interface {
3841
Put(*[]byte)
3942
}
4043

41-
const goPageSize = 4 << 10 // 4KiB. N.B. this must be a power of 2.
42-
43-
var defaultBufferPoolSizes = []int{
44-
256,
45-
goPageSize,
46-
16 << 10, // 16KB (max HTTP/2 frame size used by gRPC)
47-
32 << 10, // 32KB (default buffer size for io.Copy)
48-
1 << 20, // 1MB
49-
}
44+
const (
45+
goPageSizeExponent = 12
46+
goPageSize = 1 << goPageSizeExponent // 4KiB. N.B. this must be a power of 2.
47+
)
5048

51-
var defaultBufferPool BufferPool
49+
var (
50+
defaultBufferPoolSizeExponents = []uint8{
51+
8,
52+
goPageSizeExponent,
53+
14, // 16KB (max HTTP/2 frame size used by gRPC)
54+
15, // 32KB (default buffer size for io.Copy)
55+
20, // 1MB
56+
}
57+
defaultBufferPool BufferPool
58+
uintSize = bits.UintSize // use a variable for mocking during tests.
59+
)
5260

5361
func init() {
54-
defaultBufferPool = NewTieredBufferPool(defaultBufferPoolSizes...)
62+
var err error
63+
defaultBufferPool, err = NewBinaryTieredBufferPool(defaultBufferPoolSizeExponents...)
64+
if err != nil {
65+
panic(fmt.Sprintf("Failed to create default buffer pool: %v", err))
66+
}
5567

5668
internal.SetDefaultBufferPool = func(pool BufferPool) {
5769
defaultBufferPool = pool
@@ -109,6 +121,135 @@ func (p *tieredBufferPool) getPool(size int) BufferPool {
109121
return p.sizedPools[poolIdx]
110122
}
111123

124+
type binaryTieredBufferPool struct {
125+
// exponentToNextLargestPoolMap maps a power-of-two exponent (e.g., 12 for
126+
// 4KB) to the index of the next largest sizedBufferPool. This is used by
127+
// Get() to find the smallest pool that can satisfy a request for a given
128+
// size.
129+
exponentToNextLargestPoolMap []int
130+
// exponentToPreviousLargestPoolMap maps a power-of-two exponent to the
131+
// index of the previous largest sizedBufferPool. This is used by Put()
132+
// to return a buffer to the most appropriate pool based on its capacity.
133+
exponentToPreviousLargestPoolMap []int
134+
sizedPools []*sizedBufferPool
135+
fallbackPool simpleBufferPool
136+
maxPoolCap int // Optimization: Cache max capacity
137+
}
138+
139+
// NewBinaryTieredBufferPool returns a BufferPool backed by multiple sub-pools.
140+
// This structure enables O(1) lookup time for Get and Put operations.
141+
//
142+
// The arguments provided are the exponents for the buffer capacities (powers
143+
// of 2), not the raw byte sizes. For example, to create a pool of 16KB buffers
144+
// (2^14 bytes), pass 14 as the argument.
145+
func NewBinaryTieredBufferPool(powerOfTwoExponents ...uint8) (BufferPool, error) {
146+
slices.Sort(powerOfTwoExponents)
147+
powerOfTwoExponents = slices.Compact(powerOfTwoExponents)
148+
149+
// Determine the maximum exponent we need to support. This depends on the
150+
// word size (32-bit vs 64-bit).
151+
maxExponent := uintSize - 1
152+
indexOfNextLargestBit := slices.Repeat([]int{-1}, maxExponent+1)
153+
indexOfPreviousLargestBit := slices.Repeat([]int{-1}, maxExponent+1)
154+
155+
maxTier := 0
156+
pools := make([]*sizedBufferPool, 0, len(powerOfTwoExponents))
157+
158+
for i, exp := range powerOfTwoExponents {
159+
// Allocating slices of size > 2^maxExponent isn't possible on
160+
// maxExponent-bit machines.
161+
if int(exp) > maxExponent {
162+
return nil, fmt.Errorf("mem: allocating slice of size 2^%d is not possible", exp)
163+
}
164+
tierSize := 1 << exp
165+
pools = append(pools, newSizedBufferPool(tierSize))
166+
maxTier = max(maxTier, tierSize)
167+
168+
// Map the exact power of 2 to this pool index.
169+
indexOfNextLargestBit[exp] = i
170+
indexOfPreviousLargestBit[exp] = i
171+
}
172+
173+
// Fill gaps for Get() (Next Largest)
174+
// We iterate backwards. If current is empty, take the value from the right (larger).
175+
for i := maxExponent - 1; i >= 0; i-- {
176+
if indexOfNextLargestBit[i] == -1 {
177+
indexOfNextLargestBit[i] = indexOfNextLargestBit[i+1]
178+
}
179+
}
180+
181+
// Fill gaps for Put() (Previous Largest)
182+
// We iterate forwards. If current is empty, take the value from the left (smaller).
183+
for i := 1; i <= maxExponent; i++ {
184+
if indexOfPreviousLargestBit[i] == -1 {
185+
indexOfPreviousLargestBit[i] = indexOfPreviousLargestBit[i-1]
186+
}
187+
}
188+
189+
return &binaryTieredBufferPool{
190+
exponentToNextLargestPoolMap: indexOfNextLargestBit,
191+
exponentToPreviousLargestPoolMap: indexOfPreviousLargestBit,
192+
sizedPools: pools,
193+
maxPoolCap: maxTier,
194+
}, nil
195+
}
196+
197+
func (b *binaryTieredBufferPool) Get(size int) *[]byte {
198+
return b.poolForGet(size).Get(size)
199+
}
200+
201+
func (b *binaryTieredBufferPool) poolForGet(size int) BufferPool {
202+
if size == 0 || size > b.maxPoolCap {
203+
return &b.fallbackPool
204+
}
205+
206+
// Calculate the exponent of the smallest power of 2 >= size.
207+
// We subtract 1 from size to handle exact powers of 2 correctly.
208+
//
209+
// Examples:
210+
// size=16 (0b10000) -> size-1=15 (0b01111) -> bits.Len=4 -> Pool for 2^4
211+
// size=17 (0b10001) -> size-1=16 (0b10000) -> bits.Len=5 -> Pool for 2^5
212+
querySize := uint(size - 1)
213+
poolIdx := b.exponentToNextLargestPoolMap[bits.Len(querySize)]
214+
215+
return b.sizedPools[poolIdx]
216+
}
217+
218+
func (b *binaryTieredBufferPool) Put(buf *[]byte) {
219+
// We pass the capacity of the buffer, and not the size of the buffer here.
220+
// If we did the latter, all buffers would eventually move to the smallest
221+
// pool.
222+
b.poolForPut(cap(*buf)).Put(buf)
223+
}
224+
225+
func (b *binaryTieredBufferPool) poolForPut(bCap int) BufferPool {
226+
if bCap == 0 {
227+
return NopBufferPool{}
228+
}
229+
if bCap > b.maxPoolCap {
230+
return &b.fallbackPool
231+
}
232+
// Find the pool with the largest capacity <= bCap.
233+
//
234+
// We calculate the exponent of the largest power of 2 <= bCap.
235+
// bits.Len(x) returns the minimum number of bits required to represent x;
236+
// i.e. the number of bits up to and including the most significant bit.
237+
// Subtracting 1 gives the 0-based index of the most significant bit,
238+
// which is the exponent of the largest power of 2 <= bCap.
239+
//
240+
// Examples:
241+
// cap=16 (0b10000) -> Len=5 -> 5-1=4 -> 2^4
242+
// cap=15 (0b01111) -> Len=4 -> 4-1=3 -> 2^3
243+
largestPowerOfTwo := bits.Len(uint(bCap)) - 1
244+
poolIdx := b.exponentToPreviousLargestPoolMap[largestPowerOfTwo]
245+
// The buffer is smaller than the smallest power of 2, discard it.
246+
if poolIdx == -1 {
247+
// Buffer is smaller than our smallest pool bucket.
248+
return NopBufferPool{}
249+
}
250+
return b.sizedPools[poolIdx]
251+
}
252+
112253
// sizedBufferPool is a BufferPool implementation that is optimized for specific
113254
// buffer sizes. For example, HTTP/2 frames within gRPC have a default max size
114255
// of 16kb and a sizedBufferPool can be configured to only return buffers with a

mem/buffer_pool_internal_test.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/*
2+
*
3+
* Copyright 2026 gRPC authors.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
*/
18+
19+
package mem
20+
21+
import (
22+
"testing"
23+
)
24+
25+
func TestNewBinaryTieredBufferPool_WordSize(t *testing.T) {
26+
origUintSize := uintSize
27+
defer func() { uintSize = origUintSize }()
28+
29+
tests := []struct {
30+
name string
31+
wordSize int
32+
exponents []uint8
33+
wantErr bool
34+
}{
35+
{
36+
name: "32-bit_valid_exponent",
37+
wordSize: 32,
38+
exponents: []uint8{31},
39+
wantErr: false,
40+
},
41+
{
42+
name: "32-bit_invalid_exponent",
43+
wordSize: 32,
44+
exponents: []uint8{32},
45+
wantErr: true,
46+
},
47+
{
48+
name: "64-bit_valid_exponent",
49+
wordSize: 64,
50+
exponents: []uint8{63},
51+
wantErr: false,
52+
},
53+
{
54+
name: "64-bit_invalid_exponent",
55+
wordSize: 64,
56+
exponents: []uint8{64},
57+
wantErr: true,
58+
},
59+
}
60+
61+
for _, tt := range tests {
62+
t.Run(tt.name, func(t *testing.T) {
63+
uintSize = tt.wordSize
64+
pool, err := NewBinaryTieredBufferPool(tt.exponents...)
65+
if (err != nil) != tt.wantErr {
66+
t.Fatalf("NewBinaryTieredBufferPool() error = %t, wantErr %t", err, tt.wantErr)
67+
}
68+
if err != nil {
69+
return
70+
}
71+
bp := pool.(*binaryTieredBufferPool)
72+
if len(bp.exponentToNextLargestPoolMap) != tt.wordSize {
73+
t.Errorf("exponentToNextLargestPoolMap length = %d, want %d", len(bp.exponentToNextLargestPoolMap), tt.wordSize)
74+
}
75+
if len(bp.exponentToPreviousLargestPoolMap) != tt.wordSize {
76+
t.Errorf("exponentToPreviousLargestPoolMap length = %d, want %d", len(bp.exponentToPreviousLargestPoolMap), tt.wordSize)
77+
}
78+
})
79+
}
80+
}
81+
82+
// BenchmarkTieredPool benchmarks the performance of the tiered buffer pool
83+
// implementations, specifically focusing on the overhead of selecting the
84+
// correct bucket for a given size.
85+
func BenchmarkTieredPool(b *testing.B) {
86+
defaultBufferPoolSizes := make([]int, len(defaultBufferPoolSizeExponents))
87+
for i, exp := range defaultBufferPoolSizeExponents {
88+
defaultBufferPoolSizes[i] = 1 << exp
89+
}
90+
b.Run("pool=Tiered", func(b *testing.B) {
91+
p := NewTieredBufferPool(defaultBufferPoolSizes...).(*tieredBufferPool)
92+
for b.Loop() {
93+
for size := range 1 << 19 {
94+
// One for get, one for put.
95+
_ = p.getPool(size)
96+
_ = p.getPool(size)
97+
}
98+
}
99+
})
100+
101+
b.Run("pool=BinaryTiered", func(b *testing.B) {
102+
pool, err := NewBinaryTieredBufferPool(defaultBufferPoolSizeExponents...)
103+
if err != nil {
104+
b.Fatalf("Failed to create buffer pool: %v", err)
105+
}
106+
p := pool.(*binaryTieredBufferPool)
107+
for b.Loop() {
108+
for size := range 1 << 19 {
109+
_ = p.poolForGet(size)
110+
_ = p.poolForPut(size)
111+
}
112+
}
113+
})
114+
}
115+
116+
func TestNewBinaryTieredBufferPool_Duplicates(t *testing.T) {
117+
exponents := []uint8{1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1}
118+
pool, err := NewBinaryTieredBufferPool(exponents...)
119+
if err != nil {
120+
t.Fatalf("NewBinaryTieredBufferPool() error = %v", err)
121+
}
122+
binaryPool := pool.(*binaryTieredBufferPool)
123+
if len(binaryPool.sizedPools) != 6 {
124+
t.Errorf("sized buffer pool count = %d, want %d", len(binaryPool.sizedPools), 6)
125+
}
126+
}

mem/buffer_pool_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package mem_test
2020

2121
import (
2222
"bytes"
23+
"fmt"
2324
"testing"
2425
"unsafe"
2526

@@ -105,3 +106,43 @@ func (s) TestBufferPoolIgnoresShortBuffers(t *testing.T) {
105106
// pool, it could cause a panic.
106107
pool.Get(10)
107108
}
109+
110+
func TestBinaryBufferPool(t *testing.T) {
111+
poolSizes := []uint8{0, 2, 3, 4, 2, 3, 4} // duplicates will be ignored.
112+
113+
testCases := []struct {
114+
requestSize int
115+
wantCapacity int
116+
}{
117+
{requestSize: 0, wantCapacity: 0},
118+
{requestSize: 1, wantCapacity: 1},
119+
{requestSize: 2, wantCapacity: 4},
120+
{requestSize: 3, wantCapacity: 4},
121+
{requestSize: 4, wantCapacity: 4},
122+
{requestSize: 5, wantCapacity: 8},
123+
{requestSize: 6, wantCapacity: 8},
124+
{requestSize: 7, wantCapacity: 8},
125+
{requestSize: 8, wantCapacity: 8},
126+
{requestSize: 9, wantCapacity: 16},
127+
{requestSize: 15, wantCapacity: 16},
128+
{requestSize: 16, wantCapacity: 16},
129+
{requestSize: 17, wantCapacity: 4096}, // fallback pool returns sizes in multiples of 4096.
130+
}
131+
132+
for _, tc := range testCases {
133+
t.Run(fmt.Sprintf("requestSize=%d", tc.requestSize), func(t *testing.T) {
134+
pool, err := mem.NewBinaryTieredBufferPool(poolSizes...)
135+
if err != nil {
136+
t.Fatalf("Failed to create buffer pool: %v", err)
137+
}
138+
buf := pool.Get(tc.requestSize)
139+
if cap(*buf) != tc.wantCapacity {
140+
t.Errorf("Get(%d) returned buffer with capacity: %d, want %d", tc.requestSize, cap(*buf), tc.wantCapacity)
141+
}
142+
if len(*buf) != tc.requestSize {
143+
t.Errorf("Get(%d) returned buffer with length: %d, want %d", tc.requestSize, len(*buf), tc.requestSize)
144+
}
145+
pool.Put(buf)
146+
})
147+
}
148+
}

0 commit comments

Comments
 (0)