66package batching
77
88import (
9- "bytes"
109 "context"
1110 "encoding/json"
1211 "fmt"
@@ -17,79 +16,137 @@ import (
1716)
1817
1918const (
20- maxItemSize = 1 * 1024 * 1024
21- maxBatchSize = 5 * 1024 * 1024
22- maxItemsPerBatch = 1000
19+ defaultMaxItemSize = 1 * 1024 * 1024
20+ defaultMaxBatchSize = 5 * 1024 * 1024
21+ defaultMaxItemsPerBatch = 1000
2322)
2423
2524type Batcher struct {
26- batch [][]byte
27- batchSize int
25+ maxItemSize int
26+ maxBatchSize int
27+ maxItemsPerBatch int
28+ batch []json.RawMessage
29+ batchSize int
2830}
2931
30- func New () * Batcher {
31- return & Batcher {
32- batch : make ([][]byte , 0 , maxItemsPerBatch ),
32+ func New (opts ... Option ) * Batcher {
33+ b := & Batcher {
34+ maxItemSize : defaultMaxItemSize ,
35+ maxBatchSize : defaultMaxBatchSize ,
36+ maxItemsPerBatch : defaultMaxItemsPerBatch ,
37+ batchSize : 2 , // '[' and ']'
3338 }
39+
40+ for _ , opt := range opts {
41+ opt (b )
42+ }
43+
44+ b .batch = make ([]json.RawMessage , 0 , b .maxItemsPerBatch )
45+ return b
3446}
3547
36- func (b * Batcher ) Batch (ctx context.Context , in <- chan model.LogEntry , out chan <- [] byte ) error {
48+ func (b * Batcher ) Start (ctx context.Context , in <- chan model.LogEntry , out chan <- json. RawMessage ) error {
3749 for {
38- entry , ok , err := concurrent .SafeReader (ctx , in )
39- if err != nil {
40- return err
41- }
50+ v , ok , _ := concurrent .SafeReader (ctx , in )
4251 if ! ok {
43- return b .flush (ctx , out )
52+ batch , err := b .construct ()
53+ if err != nil {
54+ return err
55+ }
56+ if err = concurrent .SafeSender (ctx , out , batch ); err != nil {
57+ return err
58+ }
59+ break
4460 }
4561
46- data , err := json .Marshal (entry )
62+ item , err := json .Marshal (v )
4763 if err != nil {
4864 return fmt .Errorf ("marshal: %w" , err )
4965 }
5066
51- if len ( data ) > maxItemSize {
52- slog .Warn ("log entry exceeds max item size , dropping" ,
53- slog .Int ("size" , len (data )),
54- slog .Int ("max" , maxItemSize ),
67+ if ! b . valid ( item ) {
68+ slog .Warn ("invalid item, dropping" ,
69+ slog .Int ("size" , len (item )),
70+ slog .Int ("max" , b . maxItemSize ),
5571 )
5672 continue
5773 }
5874
59- if b .batchSize + len (data ) > maxBatchSize || len (b .batch ) >= maxItemsPerBatch {
60- if err := b .flush (ctx , out ); err != nil {
75+ if ok := b .add (item ); ! ok {
76+ batch , err := b .construct ()
77+ if err != nil {
78+ return err
79+ }
80+ if err = concurrent .SafeSender (ctx , out , batch ); err != nil {
6181 return err
6282 }
83+ _ = b .add (item )
84+ }
85+ }
86+ return nil
87+ }
88+
89+ func (b * Batcher ) StartSlice (items []json.RawMessage ) ([]json.RawMessage , error ) {
90+ var batchedItems []json.RawMessage
91+ for _ , item := range items {
92+ if ! b .valid (item ) {
93+ slog .Warn ("invalid item, dropping" ,
94+ slog .Int ("size" , len (item )),
95+ slog .Int ("max" , b .maxItemSize ),
96+ )
97+ continue
98+ }
99+
100+ if ok := b .add (item ); ! ok {
101+ batch , err := b .construct ()
102+ if err != nil {
103+ return nil , err
104+ }
105+ batchedItems = append (batchedItems , batch )
106+ _ = b .add (item )
63107 }
108+ }
109+
110+ batch , err := b .construct ()
111+ if err != nil {
112+ return nil , err
113+ }
64114
65- b . batch = append ( b . batch , data )
66- b . batchSize += len ( data )
115+ if batch != nil {
116+ batchedItems = append ( batchedItems , batch )
67117 }
118+ return batchedItems , nil
68119}
69120
70- func (b * Batcher ) flush ( ctx context. Context , out chan <- [] byte ) error {
71- if len (b .batch ) == 0 {
72- return nil
121+ func (b * Batcher ) add ( item json. RawMessage ) bool {
122+ if len (b .batch ) >= b . maxItemsPerBatch || b . batchSize + len ( item ) + 1 > b . maxBatchSize {
123+ return false
73124 }
74125
75- payload := assembleBatch (b .batch )
76- b .batch = b .batch [:0 ]
77- b .batchSize = 0
126+ b .batch = append (b .batch , item )
127+ b .batchSize += len (item ) + 1 // ','
128+ return true
129+ }
78130
79- return concurrent .SafeSender (ctx , out , payload )
131+ func (b * Batcher ) valid (item json.RawMessage ) bool {
132+ return len (item ) <= b .maxItemSize
80133}
81134
82- func assembleBatch (entries [][]byte ) []byte {
83- var buf bytes.Buffer
135+ func (b * Batcher ) construct () (json.RawMessage , error ) {
136+ if len (b .batch ) == 0 {
137+ return nil , nil
138+ }
84139
85- buf .WriteByte ('[' )
86- for i , entry := range entries {
87- if i > 0 {
88- buf .WriteByte (',' )
89- }
90- buf .Write (entry )
140+ batch , err := json .Marshal (& b .batch )
141+ if err != nil {
142+ return nil , fmt .Errorf ("marshal: %w" , err )
91143 }
92- buf .WriteByte (']' )
93144
94- return buf .Bytes ()
145+ b .reset ()
146+ return json .RawMessage (batch ), nil
147+ }
148+
149+ func (b * Batcher ) reset () {
150+ b .batch = b .batch [:0 ]
151+ b .batchSize = 2
95152}
0 commit comments