Skip to content

Commit ee41ed6

Browse files
authored
core: abundant QBFT logging (#4381)
* core: abundant QBFT logging (#4380) Abundant QBFT debug logging as an attempt to diagnose failing attestations. category: misc ticket: none * Remove empty file
1 parent 9ec0589 commit ee41ed6

4 files changed

Lines changed: 73 additions & 0 deletions

File tree

core/consensus/qbft/qbft.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ func newDefinition(nodes int, subs func() []subscriber, roundTimer timer.RoundTi
115115
)
116116
},
117117

118+
LogDebug: func(ctx context.Context, _ core.Duty, _ int64, msg qbft.Msg[core.Duty, [32]byte], logMsg string) {
119+
if msg != nil {
120+
log.Debug(ctx, logMsg,
121+
z.Any("type", msg.Type()),
122+
z.I64("peer", msg.Source()),
123+
)
124+
} else {
125+
log.Debug(ctx, logMsg)
126+
}
127+
},
128+
118129
// Nodes is the number of nodes.
119130
Nodes: nodes,
120131

@@ -480,6 +491,12 @@ func (c *Consensus) runInstance(parent context.Context, duty core.Duty) (err err
480491
Receive: t.RecvBuffer(),
481492
}
482493

494+
log.Debug(ctx, "QBFT run",
495+
z.Any("peer", p2p.PeerName(c.p2pNode.ID())),
496+
z.Any("peers", c.peerLabels),
497+
z.Any("timer", string(roundTimer.Type())),
498+
)
499+
483500
// Run the algo, blocking until the context is cancelled.
484501
err = qbft.Run(ctx, def, qt, duty, peerIdx, inst.HashCh)
485502
if err != nil && !isContextErr(err) {

core/consensus/qbft/strategysim_internal_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ func newSimDefinition(nodes int, roundTimer timer.RoundTimer,
497497
},
498498
NewTimer: roundTimer.Timer,
499499
LogUnjust: func(context.Context, core.Duty, int64, qbft.Msg[core.Duty, [32]byte]) {},
500+
LogDebug: func(context.Context, core.Duty, int64, qbft.Msg[core.Duty, [32]byte], string) {},
500501
LogRoundChange: func(ctx context.Context, duty core.Duty, process,
501502
round, newRound int64, uponRule qbft.UponRule, msgs []qbft.Msg[core.Duty, [32]byte],
502503
) {

core/qbft/qbft.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ type Definition[I any, V comparable] struct {
4343
LogRoundChange func(ctx context.Context, instance I, process, round, newRound int64, uponRule UponRule, msgs []Msg[I, V])
4444
// LogUnjust allows debug logging of unjust messages.
4545
LogUnjust func(ctx context.Context, instance I, process int64, msg Msg[I, V])
46+
// LogDebug allows arbitrary debug logging.
47+
LogDebug func(ctx context.Context, instance I, process int64, msg Msg[I, V], logMsg string)
4648

4749
// Nodes is the total number of nodes/processes participating in consensus.
4850
Nodes int
@@ -265,52 +267,71 @@ func Run[I any, V comparable](ctx context.Context, d Definition[I, V], t Transpo
265267
// === Algorithm ===
266268

267269
{ // Algorithm 1:11
270+
d.LogDebug(ctx, instance, process, nil, "QBFT check if leader")
268271
if d.IsLeader(instance, round, process) { // Note round==1 at this point.
272+
d.LogDebug(ctx, instance, process, nil, "QBFT leader, broadcast own pre-prepare")
269273
err := broadcastOwnPrePrepare([]Msg[I, V]{}) // Empty justification since round==1
274+
d.LogDebug(ctx, instance, process, nil, "QBFT leader, broadcast own pre-prepare finished")
270275
if err != nil {
271276
return err
272277
}
273278
}
274279

280+
d.LogDebug(ctx, instance, process, nil, "QBFT new timer")
275281
timerChan, stopTimer = d.NewTimer(round)
276282
}
277283

284+
d.LogDebug(ctx, instance, process, nil, "QBFT start handling events")
278285
// Handle events until cancelled.
279286
for {
280287
var err error
281288

282289
select {
283290
case inputValue = <-inputValueCh:
291+
d.LogDebug(ctx, instance, process, nil, "QBFT new inputValue - step 1 received")
292+
284293
if isZeroVal(inputValue) {
285294
return errors.New("zero input value not supported")
286295
}
287296

288297
if ppjCache != nil {
289298
// Broadcast the pre-prepare now that we have a input value using the cached justification.
299+
d.LogDebug(ctx, instance, process, nil, "QBFT new inputValue - step 2 broadcast")
290300
err = broadcastMsg(MsgPrePrepare, inputValue, ppjCache)
301+
d.LogDebug(ctx, instance, process, nil, "QBFT new inputValue - step 3 broadcast finished")
291302
}
292303

293304
inputValueCh = nil // Don't read from this channel again.
305+
d.LogDebug(ctx, instance, process, nil, "QBFT new inputValue - step 2-4 finished")
294306

295307
case msg := <-t.Receive:
308+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - received")
296309
// Just send Qcommit if consensus already decided
297310
if len(qCommit) > 0 {
311+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - qCommits greater than 0")
298312
if msg.Source() != process && msg.Type() == MsgRoundChange { // Algorithm 3:17
313+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - consensus already decided, broadcast")
299314
err = broadcastMsg(MsgDecided, qCommit[0].Value(), qCommit)
315+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - consensus already decided, broadcast finished")
300316
}
301317

302318
break
303319
}
304320

305321
if !isJustified(d, instance, msg) { // Drop unjust messages
322+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - unjustified, dropping")
306323
d.LogUnjust(ctx, instance, process, msg)
307324
break
308325
}
309326

327+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - buffering")
310328
bufferMsg(msg)
329+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - buffered")
311330

331+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - classifying")
312332
rule, justification := classify(d, instance, round, process, buffer, msg)
313333
if rule == UponNothing || isDuplicatedRule(rule, msg.Round()) {
334+
d.LogDebug(ctx, instance, process, msg, "new QBFT message - classified as duplicate")
314335
// Do nothing more if no rule or duplicate rule was triggered
315336
break
316337
}
@@ -319,72 +340,98 @@ func Run[I any, V comparable](ctx context.Context, d Definition[I, V], t Transpo
319340

320341
switch rule {
321342
case UponJustifiedPrePrepare: // Algorithm 2:1
343+
d.LogDebug(ctx, instance, process, msg, "UponJustifiedPrePrepare QBFT step 1 - starting procedure")
322344
// Applicable to current or future rounds (since justified)
323345
changeRound(msg.Round(), rule)
324346

347+
d.LogDebug(ctx, instance, process, msg, "UponJustifiedPrePrepare QBFT step 2 - stop timer")
325348
stopTimer()
326349
timerChan, stopTimer = d.NewTimer(round)
327350

351+
d.LogDebug(ctx, instance, process, msg, "UponJustifiedPrePrepare QBFT step 3 - broadcast")
328352
err = broadcastMsg(MsgPrepare, msg.Value(), nil)
353+
d.LogDebug(ctx, instance, process, msg, "UponJustifiedPrePrepare QBFT step 3 - broadcast finished")
329354

330355
case UponQuorumPrepares: // Algorithm 2:4
356+
d.LogDebug(ctx, instance, process, msg, "UponQuorumPrepares QBFT step 1 - starting procedure")
331357
// Only applicable to current round
332358
preparedRound = round /* == msg.Round*/
333359
preparedValue = msg.Value()
334360
preparedJustification = justification
335361

362+
d.LogDebug(ctx, instance, process, msg, "UponQuorumPrepares QBFT step 2 - broadcast")
336363
err = broadcastMsg(MsgCommit, preparedValue, nil)
364+
d.LogDebug(ctx, instance, process, msg, "UponQuorumPrepares QBFT step 3 - broadcast finished")
337365

338366
case UponQuorumCommits, UponJustifiedDecided: // Algorithm 2:8
367+
d.LogDebug(ctx, instance, process, msg, "UponQuorumCommits, UponJustifiedDecided QBFT step 1 - starting procedure")
339368
// Applicable to any round (since can be justified)
340369
changeRound(msg.Round(), rule)
341370

342371
qCommit = justification
343372

373+
d.LogDebug(ctx, instance, process, msg, "UponQuorumCommits, UponJustifiedDecided QBFT step 2 - stop timer")
344374
stopTimer()
345375

346376
timerChan = nil
347377

378+
d.LogDebug(ctx, instance, process, msg, "UponQuorumCommits, UponJustifiedDecided QBFT step 3 - decide")
348379
d.Decide(ctx, instance, msg.Value(), justification)
380+
d.LogDebug(ctx, instance, process, msg, "UponQuorumCommits, UponJustifiedDecided QBFT step 4 - decided")
349381

350382
case UponFPlus1RoundChanges: // Algorithm 3:5
383+
d.LogDebug(ctx, instance, process, msg, "UponFPlus1RoundChanges QBFT step 1 - starting procedure")
351384
// Only applicable to future rounds
352385
changeRound(nextMinRound(d, justification, round /* < msg.Round */), rule)
353386

387+
d.LogDebug(ctx, instance, process, msg, "UponFPlus1RoundChanges QBFT step 2 - stop timer")
354388
stopTimer()
355389
timerChan, stopTimer = d.NewTimer(round)
356390

391+
d.LogDebug(ctx, instance, process, msg, "UponFPlus1RoundChanges QBFT step 3 - broadcast round change")
357392
err = broadcastRoundChange()
393+
d.LogDebug(ctx, instance, process, msg, "UponFPlus1RoundChanges QBFT step 4 - broadcasted round change")
358394

359395
case UponQuorumRoundChanges: // Algorithm 3:11
396+
d.LogDebug(ctx, instance, process, msg, "UponQuorumRoundChanges QBFT step 1 - starting procedure")
360397
// Only applicable to current round (round > 1)
361398
if _, pv, ok := getSingleJustifiedPrPv(d, justification); ok {
362399
// Send pre-prepare using prepared value (not our own input value)
400+
d.LogDebug(ctx, instance, process, msg, "UponQuorumRoundChanges QBFT step 2 - broadcast pre-prepare with prepared value")
363401
err = broadcastMsg(MsgPrePrepare, pv, justification)
364402
} else {
365403
// Send pre-prepare using our own input value
404+
d.LogDebug(ctx, instance, process, msg, "UponQuorumRoundChanges QBFT step 2 - broadcast pre-prepare with own input value")
366405
err = broadcastOwnPrePrepare(justification)
367406
}
407+
d.LogDebug(ctx, instance, process, msg, "UponQuorumRoundChanges QBFT step 3 - broadcasted pre-prepare")
368408
case UponUnjustQuorumRoundChanges:
409+
d.LogDebug(ctx, instance, process, msg, "UponUnjustQuorumRoundChanges QBFT step 1 - starting procedure")
369410
// Ignore bug or byzantine
370411

371412
default:
372413
panic("bug: invalid rule")
373414
}
374415

375416
case <-timerChan: // Algorithm 3:1
417+
d.LogDebug(ctx, instance, process, nil, "RoundTimeout QBFT - step 1 starting procedure")
376418
changeRound(round+1, UponRoundTimeout)
377419

420+
d.LogDebug(ctx, instance, process, nil, "RoundTimeout QBFT - step 2 stop timer")
378421
stopTimer()
379422
timerChan, stopTimer = d.NewTimer(round)
380423

424+
d.LogDebug(ctx, instance, process, nil, "RoundTimeout QBFT - step 3 broadcast round change")
381425
err = broadcastRoundChange()
426+
d.LogDebug(ctx, instance, process, nil, "RoundTimeout QBFT - step 4 broadcast finished")
382427

383428
case <-ctx.Done(): // Cancelled
429+
d.LogDebug(ctx, instance, process, nil, "QBFT event handling context done")
384430
return ctx.Err()
385431
}
386432

387433
if err != nil { // Errors are considered fatal.
434+
d.LogDebug(ctx, instance, process, nil, "QBFT fatal error")
388435
return err
389436
}
390437
}

core/qbft/qbft_internal_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,13 @@ func testQBFT(t *testing.T, test test) {
332332
t.Logf("Unjust: %#v", msg)
333333
cancel()
334334
},
335+
LogDebug: func(_ context.Context, instance int64, process int64, msg Msg[int64, int64], logMsg string) {
336+
if test.Fuzz {
337+
return // Ignore debug messages when fuzzing.
338+
}
339+
340+
t.Logf("Debug: %s - %#v", logMsg, msg)
341+
},
335342
Nodes: n,
336343
FIFOLimit: fifoLimit,
337344
}
@@ -747,4 +754,5 @@ var noopDef = Definition[int64, int64]{
747754
LogUponRule: func(context.Context, int64, int64, int64, Msg[int64, int64], UponRule) {},
748755
LogRoundChange: func(context.Context, int64, int64, int64, int64, UponRule, []Msg[int64, int64]) {},
749756
LogUnjust: func(context.Context, int64, int64, Msg[int64, int64]) {},
757+
LogDebug: func(context.Context, int64, int64, Msg[int64, int64], string) {},
750758
}

0 commit comments

Comments
 (0)