@@ -11,6 +11,7 @@ import (
1111
1212 ds "github.com/ipfs/go-datastore"
1313 "github.com/ipfs/go-datastore/query"
14+ "github.com/rs/zerolog"
1415 "google.golang.org/protobuf/proto"
1516
1617 coresequencer "github.com/evstack/ev-node/core/sequencer"
@@ -45,26 +46,34 @@ type BatchQueue struct {
4546 // inFlightPostponed holds txs that should be requeued on Ack.
4647 // Set via SetPostponed between Drain and Ack. Cleared only on successful Ack.
4748 inFlightPostponed [][]byte
48- // inFlightPostponedItem tracks a postponed batch already persisted during Ack
49- // so retries do not append duplicate postponed entries.
50- inFlightPostponedItem * queuedItem
49+ // postponedItem holds a postponed batch persisted to the WAL during Ack.
50+ // It is only prepended to the in-memory queue once Ack fully succeeds, so
51+ // a direct Ack retry does not persist a duplicate entry. If a Drain rolls
52+ // the in-flight state back instead, the entry is discarded again because
53+ // its txs are still covered by the rolled-back WAL entries.
54+ postponedItem * queuedItem
5155
5256 // txSeen is an in-memory dedup set keyed by sha256 hash of each tx.
5357 // hashes are added in AddBatch and removed on successful Ack.
5458 // prevents the reaper from enqueuing the same tx multiple scrape cycles.
5559 txSeen map [[32 ]byte ]struct {}
5660
61+ // totalEnqueued counts batches ever enqueued via AddBatch. Monotonic,
62+ // never decremented, so callers can detect new enqueues race-free.
63+ totalEnqueued uint64
64+
5765 // Sequence numbers for generating new keys
5866 nextAddSeq uint64
5967 nextPrependSeq uint64
6068
61- mu sync.Mutex
62- db ds.Batching
69+ mu sync.Mutex
70+ db ds.Batching
71+ logger zerolog.Logger
6372}
6473
6574// NewBatchQueue creates a new BatchQueue with the specified maximum size.
6675// If maxSize is 0, the queue will be unlimited.
67- func NewBatchQueue (db ds.Batching , prefix string , maxSize int ) * BatchQueue {
76+ func NewBatchQueue (db ds.Batching , prefix string , maxSize int , logger zerolog. Logger ) * BatchQueue {
6877 return & BatchQueue {
6978 queue : make ([]queuedItem , 0 ),
7079 head : 0 ,
@@ -73,6 +82,7 @@ func NewBatchQueue(db ds.Batching, prefix string, maxSize int) *BatchQueue {
7382 db : store .NewPrefixKVStore (db , prefix ),
7483 nextAddSeq : initialSeqNum ,
7584 nextPrependSeq : initialSeqNum - 1 ,
85+ logger : logger ,
7686 }
7787}
7888
@@ -129,6 +139,7 @@ func (bq *BatchQueue) AddBatch(ctx context.Context, batch coresequencer.Batch) e
129139 bq .nextAddSeq ++
130140
131141 bq .queue = append (bq .queue , queuedItem {Batch : batch , Key : key })
142+ bq .totalEnqueued ++
132143
133144 return nil
134145}
@@ -149,7 +160,7 @@ func (bq *BatchQueue) Drain(ctx context.Context, maxBytes uint64) (*coresequence
149160 bq .mu .Lock ()
150161 defer bq .mu .Unlock ()
151162
152- bq .rollbackInFlightLocked ()
163+ bq .rollbackInFlightLocked (ctx )
153164
154165 if bq .head >= len (bq .queue ) {
155166 return & coresequencer.Batch {Transactions : nil }, nil
@@ -192,7 +203,7 @@ func (bq *BatchQueue) Drain(ctx context.Context, maxBytes uint64) (*coresequence
192203func (bq * BatchQueue ) SetPostponed (txs [][]byte ) {
193204 bq .mu .Lock ()
194205 defer bq .mu .Unlock ()
195- if bq .inFlightPostponedItem != nil {
206+ if bq .postponedItem != nil {
196207 return
197208 }
198209 bq .inFlightPostponed = txs
@@ -208,17 +219,16 @@ func (bq *BatchQueue) Ack(ctx context.Context) error {
208219
209220 // persist postponed txs BEFORE deleting source WAL entries.
210221 // if this fails the original entries still exist — no data loss.
211- if len (bq .inFlightPostponed ) > 0 && bq .inFlightPostponedItem == nil {
222+ // the item is only prepended to the in-memory queue after the WAL
223+ // deletes succeed, so a rollback never sees its txs twice.
224+ if len (bq .inFlightPostponed ) > 0 && bq .postponedItem == nil {
212225 batch := coresequencer.Batch {Transactions : bq .inFlightPostponed }
213226 key := seqToKey (bq .nextPrependSeq )
214227 if err := bq .persistBatch (ctx , batch , key ); err != nil {
215228 return fmt .Errorf ("failed to persist postponed txs: %w" , err )
216229 }
217230 bq .nextPrependSeq --
218-
219- item := queuedItem {Batch : batch , Key : key }
220- bq .prependItemLocked (item )
221- bq .inFlightPostponedItem = & item
231+ bq .postponedItem = & queuedItem {Batch : batch , Key : key }
222232 }
223233
224234 // delete WAL entries for committed inFlight items in one batch.
@@ -259,10 +269,15 @@ func (bq *BatchQueue) Ack(ctx context.Context) error {
259269 }
260270 }
261271
272+ // requeue the persisted postponed entry now that the commit is durable
273+ if bq .postponedItem != nil {
274+ bq .prependItemLocked (* bq .postponedItem )
275+ }
276+
262277 clear (bq .inFlight )
263278 bq .inFlight = bq .inFlight [:0 ]
264279 bq .inFlightPostponed = nil
265- bq .inFlightPostponedItem = nil
280+ bq .postponedItem = nil
266281
267282 return nil
268283}
@@ -279,12 +294,25 @@ func (bq *BatchQueue) prependItemLocked(item queuedItem) {
279294}
280295
281296// rollbackInFlightLocked moves un-acked inFlight items back to the front of the queue.
282- // Must be called with bq.mu held.
283- func (bq * BatchQueue ) rollbackInFlightLocked () {
297+ // Postponed state is discarded: the postponed txs are still covered by the
298+ // rolled-back WAL entries, so a persisted postponed entry would duplicate
299+ // them and is deleted (best-effort; Load dedups any leftover on restart).
300+ // The caller is expected to make a fresh SetPostponed decision after the
301+ // next Drain. Must be called with bq.mu held.
302+ func (bq * BatchQueue ) rollbackInFlightLocked (ctx context.Context ) {
284303 if len (bq .inFlight ) == 0 {
285304 return
286305 }
287306
307+ if bq .postponedItem != nil {
308+ if err := bq .db .Delete (ctx , ds .NewKey (bq .postponedItem .Key )); err != nil {
309+ bq .logger .Warn ().Err (err ).Str ("key" , bq .postponedItem .Key ).
310+ Msg ("failed to delete rolled-back postponed WAL entry" )
311+ }
312+ bq .postponedItem = nil
313+ }
314+ bq .inFlightPostponed = nil
315+
288316 if bq .head >= len (bq .inFlight ) {
289317 // enough head slots — fill them directly
290318 for i := len (bq .inFlight ) - 1 ; i >= 0 ; i -- {
@@ -337,13 +365,13 @@ func (bq *BatchQueue) dedupAndEnqueueLocked(ctx context.Context, batch coreseque
337365 switch {
338366 case len (filtered ) == 0 :
339367 if err := bq .db .Delete (ctx , ds .NewKey (key )); err != nil {
340- fmt . Printf ( "Error deleting duplicate WAL entry %s: %v \n " , key , err )
368+ bq . logger . Error (). Err ( err ). Str ( "key" , key ). Msg ( "failed to delete duplicate WAL entry" )
341369 }
342370 return
343371 case len (filtered ) < len (batch .Transactions ):
344372 batch = coresequencer.Batch {Transactions : filtered }
345373 if err := bq .persistBatch (ctx , batch , key ); err != nil {
346- fmt . Printf ( "Error rewriting partially duplicate WAL entry %s: %v \n " , key , err )
374+ bq . logger . Error (). Err ( err ). Str ( "key" , key ). Msg ( "failed to rewrite partially duplicate WAL entry" )
347375 }
348376 }
349377
@@ -355,6 +383,8 @@ func (bq *BatchQueue) dedupAndEnqueueLocked(ctx context.Context, batch coreseque
355383// may still hold entries whose txs were already committed in the last block.
356384// Entries are rewritten in place (or deleted when emptied) so a subsequent
357385// reload stays consistent. Returns the number of dropped transactions.
386+ // It must be called on a freshly loaded queue: only queued entries are
387+ // scanned, so any in-flight entries would be missed.
358388func (bq * BatchQueue ) DropIncluded (ctx context.Context , included [][]byte ) (int , error ) {
359389 bq .mu .Lock ()
360390 defer bq .mu .Unlock ()
@@ -410,7 +440,7 @@ func (bq *BatchQueue) Load(ctx context.Context) error {
410440 bq .txSeen = make (map [[32 ]byte ]struct {})
411441 bq .inFlight = nil
412442 bq .inFlightPostponed = nil
413- bq .inFlightPostponedItem = nil
443+ bq .postponedItem = nil
414444 bq .nextAddSeq = initialSeqNum
415445 bq .nextPrependSeq = initialSeqNum - 1
416446
@@ -426,7 +456,7 @@ func (bq *BatchQueue) Load(ctx context.Context) error {
426456 var legacyItems []queuedItem
427457 for result := range results .Next () {
428458 if result .Error != nil {
429- fmt . Printf ( " Error reading entry from datastore: %v \n " , result . Error )
459+ bq . logger . Error (). Err ( result . Error ). Msg ( "failed to read entry from datastore" )
430460 continue
431461 }
432462 // We care about the last part of the key (the sequence number)
@@ -436,7 +466,7 @@ func (bq *BatchQueue) Load(ctx context.Context) error {
436466 var pbBatch pb.Batch
437467 err := proto .Unmarshal (result .Value , & pbBatch )
438468 if err != nil {
439- fmt . Printf ( "Error decoding batch for key '%s': %v. Skipping entry. \n " , keyName , err )
469+ bq . logger . Error (). Err ( err ). Str ( " key" , keyName ). Msg ( "failed to decode batch, skipping entry" )
440470 continue
441471 }
442472
@@ -465,18 +495,18 @@ func (bq *BatchQueue) Load(ctx context.Context) error {
465495 if len (legacyItems ) == 0 {
466496 return nil
467497 }
468- fmt . Printf ( "Found %d legacy items to migrate... \n " , len ( legacyItems ) )
498+ bq . logger . Info (). Int ( "count" , len ( legacyItems )). Msg ( "found legacy items to migrate" )
469499
470500 for _ , item := range legacyItems {
471501 newKeyName := seqToKey (bq .nextAddSeq )
472502
473503 if err := bq .persistBatch (ctx , item .Batch , newKeyName ); err != nil {
474- fmt . Printf ( "Failed to migrate legacy item %s: %v \n " , item . Key , err )
504+ bq . logger . Error (). Err ( err ). Str ( "key" , item . Key ). Msg ( "failed to migrate legacy item" )
475505 continue
476506 }
477507
478508 if err := bq .db .Delete (ctx , ds .NewKey (item .Key )); err != nil {
479- fmt . Printf ( "Failed to delete legacy key %s after migration: %v \n " , item . Key , err )
509+ bq . logger . Error (). Err ( err ). Str ( "key" , item . Key ). Msg ( "failed to delete legacy key after migration" )
480510 }
481511
482512 bq .dedupAndEnqueueLocked (ctx , item .Batch , newKeyName )
@@ -493,6 +523,15 @@ func (bq *BatchQueue) Size() int {
493523 return len (bq .queue ) - bq .head + len (bq .inFlight )
494524}
495525
526+ // TotalEnqueued returns a monotonic count of batches ever enqueued via
527+ // AddBatch. Unlike Size it never decreases, so comparing two snapshots
528+ // reliably detects whether new batches were enqueued in between.
529+ func (bq * BatchQueue ) TotalEnqueued () uint64 {
530+ bq .mu .Lock ()
531+ defer bq .mu .Unlock ()
532+ return bq .totalEnqueued
533+ }
534+
496535// persistBatch persists a batch to the datastore with the given key
497536func (bq * BatchQueue ) persistBatch (ctx context.Context , batch coresequencer.Batch , key string ) error {
498537 pbBatch := & pb.Batch {
0 commit comments