Skip to content

Commit 0a8c7f2

Browse files
dbrajovicmaxim-inj
andauthored
[c-930] use metrics v2 to trace relevant code paths (#10)
* fix: leftover go mod tidy * chore: wire up metrics v2 * chore: trace CListMempool * impl: extend node ctor to accept a metrics meter * chore: trace CListMempool * chore: trace state store * chore: trace consensus Receive handler * chore: trace state sync reactor * chore: trace consensus handleMsg method * chore: fix test compilation * fix: set node meter from arg * fix: use proper ctx for recheckTxs * chore: refactor changes to make diff smaller Additionally: * stop() to be called with &err reference * handle checktx callback separately from the original CheckTx func * chore: stabilize tests * fix: fix test 04 in CI --------- Co-authored-by: Max <maxim@injectivelabs.org>
1 parent 45f5d7f commit 0a8c7f2

21 files changed

Lines changed: 493 additions & 190 deletions

File tree

abci/tutorials/abci-v2-forum-app/forum.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ func main() {
7777
cfg.DefaultDBProvider,
7878
nm.DefaultMetricsProvider(config.Instrumentation),
7979
logger,
80+
nil,
8081
)
8182

8283
defer func() {

go.mod

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
module github.com/cometbft/cometbft
22

3-
go 1.23
4-
toolchain go1.23.7
3+
go 1.23.9
54

65
require (
76
github.com/BurntSushi/toml v1.4.0
@@ -33,15 +32,16 @@ require (
3332
github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa
3433
github.com/spf13/cobra v1.9.1
3534
github.com/spf13/viper v1.19.0
36-
github.com/stretchr/testify v1.10.0
37-
golang.org/x/crypto v0.36.0
38-
golang.org/x/net v0.37.0
39-
google.golang.org/grpc v1.71.0
35+
github.com/stretchr/testify v1.11.1
36+
golang.org/x/crypto v0.41.0
37+
golang.org/x/net v0.43.0
38+
google.golang.org/grpc v1.75.0
4039
)
4140

4241
require github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7
4342

4443
require (
44+
github.com/InjectiveLabs/metrics/v2 v2.0.0-beta.8
4545
github.com/Masterminds/semver/v3 v3.3.1
4646
github.com/go-git/go-git/v5 v5.14.0
4747
github.com/goccmack/goutil v1.2.3
@@ -51,9 +51,9 @@ require (
5151
github.com/oasisprotocol/curve25519-voi v0.0.0-20220708102147-0a8a51822cae
5252
github.com/supranational/blst v0.3.14
5353
golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8
54-
golang.org/x/sync v0.12.0
55-
gonum.org/v1/gonum v0.15.1
56-
google.golang.org/protobuf v1.36.5
54+
golang.org/x/sync v0.16.0
55+
gonum.org/v1/gonum v0.16.0
56+
google.golang.org/protobuf v1.36.8
5757
)
5858

5959
require (
@@ -64,6 +64,7 @@ require (
6464
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
6565
github.com/ProtonMail/go-crypto v1.1.5 // indirect
6666
github.com/beorn7/perks v1.0.1 // indirect
67+
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
6768
github.com/cespare/xxhash/v2 v2.3.0 // indirect
6869
github.com/cloudflare/circl v1.6.0 // indirect
6970
github.com/cockroachdb/errors v1.11.3 // indirect
@@ -85,7 +86,7 @@ require (
8586
github.com/getsentry/sentry-go v0.31.1 // indirect
8687
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
8788
github.com/go-git/go-billy/v5 v5.6.2 // indirect
88-
github.com/go-logr/logr v1.4.2 // indirect
89+
github.com/go-logr/logr v1.4.3 // indirect
8990
github.com/go-logr/stdr v1.2.2 // indirect
9091
github.com/go-sql-driver/mysql v1.7.1 // indirect
9192
github.com/gogo/protobuf v1.3.2 // indirect
@@ -95,6 +96,7 @@ require (
9596
github.com/google/flatbuffers v25.2.10+incompatible // indirect
9697
github.com/google/go-cmp v0.7.0 // indirect
9798
github.com/gotestyourself/gotestyourself v2.2.0+incompatible // indirect
99+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
98100
github.com/hashicorp/hcl v1.0.0 // indirect
99101
github.com/imdario/mergo v0.3.15 // indirect
100102
github.com/inconshreveable/mousetrap v1.1.0 // indirect
@@ -131,13 +133,20 @@ require (
131133
github.com/xanzy/ssh-agent v0.3.3 // indirect
132134
go.etcd.io/bbolt v1.4.0 // indirect
133135
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
134-
go.opentelemetry.io/otel v1.34.0 // indirect
135-
go.opentelemetry.io/otel/metric v1.34.0 // indirect
136-
go.opentelemetry.io/otel/trace v1.34.0 // indirect
136+
go.opentelemetry.io/otel v1.38.0 // indirect
137+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 // indirect
138+
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
139+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
140+
go.opentelemetry.io/otel/metric v1.38.0 // indirect
141+
go.opentelemetry.io/otel/sdk v1.38.0 // indirect
142+
go.opentelemetry.io/otel/sdk/metric v1.38.0 // indirect
143+
go.opentelemetry.io/otel/trace v1.38.0 // indirect
144+
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
137145
go.uber.org/multierr v1.11.0 // indirect
138-
golang.org/x/sys v0.31.0 // indirect
139-
golang.org/x/text v0.23.0 // indirect
140-
google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect
146+
golang.org/x/sys v0.35.0 // indirect
147+
golang.org/x/text v0.28.0 // indirect
148+
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
149+
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
141150
gopkg.in/ini.v1 v1.67.0 // indirect
142151
gopkg.in/warnings.v0 v0.1.2 // indirect
143152
gopkg.in/yaml.v3 v3.0.1 // indirect
@@ -152,3 +161,5 @@ retract (
152161
// superseeded by v0.38.3 because of ASA-2024-001
153162
[v0.38.0, v0.38.2]
154163
)
164+
165+
replace github.com/cometbft/cometbft/api => ./api

go.sum

Lines changed: 50 additions & 36 deletions
Large diffs are not rendered by default.

go.work

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
go 1.23
1+
go 1.23.9
22

33
use (
44
.

internal/clist/clist_test.go

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package clist
33
import (
44
"fmt"
55
"runtime"
6-
"sync"
6+
"sync/atomic"
77
"testing"
88
"time"
99

@@ -73,55 +73,56 @@ func TestGCFifo(t *testing.T) {
7373
t.Skipf("Skipping on non-amd64 machine")
7474
}
7575

76-
const numElements = 1000000
77-
l := New()
76+
const numElements = 100000
77+
finalized := func() *atomic.Int64 {
78+
l := New()
79+
finalized := new(atomic.Int64)
7880

79-
// Use a WaitGroup to wait for all finalizers to run.
80-
var wg sync.WaitGroup
81-
wg.Add(numElements)
81+
type value struct {
82+
Int int
83+
}
8284

83-
type value struct {
84-
Int int
85-
}
85+
for i := 0; i < numElements; i++ {
86+
v := new(value)
87+
v.Int = i
88+
l.PushBack(v)
89+
runtime.SetFinalizer(v, func(_ *value) {
90+
finalized.Add(1)
91+
})
92+
}
8693

87-
for i := 0; i < numElements; i++ {
88-
v := new(value)
89-
v.Int = i
90-
l.PushBack(v)
91-
runtime.SetFinalizer(v, func(_ *value) {
92-
wg.Done()
93-
})
94-
}
94+
for el := l.Front(); el != nil; {
95+
next := el.Next()
96+
l.Remove(el)
97+
el.DetachPrev()
98+
el.DetachNext()
99+
el = next
100+
}
95101

96-
for el := l.Front(); el != nil; {
97-
next := el.Next()
98-
l.Remove(el)
99-
el = next
100-
}
102+
if l.Len() != 0 {
103+
t.Errorf("expected list to be empty, got %v elements", l.Len())
104+
}
101105

102-
// Wait for all finalizers to run.
103-
wg.Wait()
106+
return finalized
107+
}()
104108

105-
if l.Len() != 0 {
106-
t.Errorf("expected list to be empty, got %v elements", l.Len())
107-
}
109+
assert.Eventually(t, func() bool {
110+
runtime.GC()
111+
return finalized.Load() == numElements
112+
}, 30*time.Second, 50*time.Millisecond)
108113
}
109114

110-
// This test is quite hacky because it relies on SetFinalizer
111-
// which isn't guaranteed to run at all.
115+
// This test exercises random removals. Finalizer-based GC assertions are
116+
// covered by TestGCFifo and proved too nondeterministic here.
112117
func TestGCRandom(t *testing.T) {
113118
t.Helper()
114119
if runtime.GOARCH != "amd64" {
115120
t.Skipf("Skipping on non-amd64 machine")
116121
}
117122

118-
const numElements = 1000000
123+
const numElements = 100000
119124
l := New()
120125

121-
// Use a WaitGroup to wait for all finalizers to run.
122-
var wg sync.WaitGroup
123-
wg.Add(numElements)
124-
125126
type value struct {
126127
Int int
127128
}
@@ -130,9 +131,6 @@ func TestGCRandom(t *testing.T) {
130131
v := new(value)
131132
v.Int = i
132133
l.PushBack(v)
133-
runtime.SetFinalizer(v, func(_ *value) {
134-
wg.Done()
135-
})
136134
}
137135

138136
els := make([]*CElement, 0, numElements)
@@ -143,12 +141,11 @@ func TestGCRandom(t *testing.T) {
143141
for _, i := range cmtrand.Perm(numElements) {
144142
el := els[i]
145143
l.Remove(el)
146-
_ = el.Next()
144+
el.DetachPrev()
145+
el.DetachNext()
146+
els[i] = nil
147147
}
148148

149-
// Wait for all finalizers to run.
150-
wg.Wait()
151-
152149
if l.Len() != 0 {
153150
t.Errorf("expected list to be empty, got %v elements", l.Len())
154151
}

internal/consensus/byzantine_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ func TestByzantinePrevoteEquivocation(t *testing.T) {
288288
assert.Equal(t, prevoteHeight, ev.Height())
289289
}
290290
}
291-
case <-time.After(20 * time.Second):
291+
case <-time.After(60 * time.Second):
292292
t.Fatalf("Timed out waiting for validators to commit evidence")
293293
}
294294
}

internal/consensus/reactor.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
package consensus
22

33
import (
4+
"context"
45
"fmt"
56
"math/rand"
67
"reflect"
78
"sync"
89
"sync/atomic"
910
"time"
1011

12+
injmetrics "github.com/InjectiveLabs/metrics/v2"
1113
cmtcons "github.com/cometbft/cometbft/api/cometbft/consensus/v1"
1214
"github.com/cometbft/cometbft/internal/bits"
1315
cstypes "github.com/cometbft/cometbft/internal/consensus/types"
@@ -51,6 +53,7 @@ type Reactor struct {
5153
initialHeight int64 // under rsMtx
5254

5355
Metrics *Metrics
56+
meter injmetrics.Meter
5457
}
5558

5659
type ReactorOption func(*Reactor)
@@ -63,6 +66,7 @@ func NewReactor(consensusState *State, waitSync bool, options ...ReactorOption)
6366
rs: consensusState.GetRoundState(),
6467
initialHeight: consensusState.state.InitialHeight,
6568
Metrics: NopMetrics(),
69+
meter: injmetrics.NewNilMeter(),
6670
}
6771
conR.BaseReactor = *p2p.NewBaseReactor("Consensus", conR)
6872
if waitSync {
@@ -265,6 +269,9 @@ func (conR *Reactor) Receive(e p2p.Envelope) {
265269
panic(fmt.Sprintf("Peer %v has no state", e.Src))
266270
}
267271

272+
_, stopFn := conR.meter.FuncTimingCtx(context.Background(), "Receive", injmetrics.Tag("msg", fmt.Sprintf("%T", msg)))
273+
defer stopFn()
274+
268275
switch e.ChannelID {
269276
case StateChannel:
270277
switch msg := msg.(type) {
@@ -1079,6 +1086,11 @@ func ReactorMetrics(metrics *Metrics) ReactorOption {
10791086
return func(conR *Reactor) { conR.Metrics = metrics }
10801087
}
10811088

1089+
// ReactorMeter sets the tracing meter.
1090+
func ReactorMeter(meter injmetrics.Meter) ReactorOption {
1091+
return func(conR *Reactor) { conR.meter = meter }
1092+
}
1093+
10821094
// -----------------------------------------------------------------------------
10831095

10841096
// PeerState contains the known state of a peer, including its connection and

internal/consensus/state.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"strconv"
1313
"time"
1414

15+
injmetrics "github.com/InjectiveLabs/metrics/v2"
1516
"github.com/cosmos/gogoproto/proto"
1617

1718
cmtproto "github.com/cometbft/cometbft/api/cometbft/types/v1"
@@ -134,6 +135,7 @@ type State struct {
134135

135136
// for reporting metrics
136137
metrics *Metrics
138+
meter injmetrics.Meter
137139

138140
// offline state sync height indicating to which height the node synced offline
139141
offlineStateSyncHeight int64
@@ -171,6 +173,7 @@ func NewState(
171173
evpool: evpool,
172174
evsw: cmtevents.NewEventSwitch(),
173175
metrics: NopMetrics(),
176+
meter: injmetrics.NewNilMeter(),
174177
}
175178
for _, option := range options {
176179
option(cs)
@@ -220,6 +223,11 @@ func StateMetrics(metrics *Metrics) StateOption {
220223
return func(cs *State) { cs.metrics = metrics }
221224
}
222225

226+
// StateMeter sets the tracing meter.
227+
func StateMeter(meter injmetrics.Meter) StateOption {
228+
return func(cs *State) { cs.meter = meter }
229+
}
230+
223231
// OfflineStateSyncHeight indicates the height at which the node
224232
// statesync offline - before booting sets the metrics.
225233
func OfflineStateSyncHeight(height int64) StateOption {
@@ -877,6 +885,9 @@ func (cs *State) receiveRoutine(maxSteps int) {
877885

878886
// state transitions on complete-proposal, 2/3-any, 2/3-one.
879887
func (cs *State) handleMsg(mi msgInfo) {
888+
_, stopFn := cs.meter.FuncTimingCtx(context.Background(), "handleMsg", injmetrics.Tag("msg", fmt.Sprintf("%T", mi.Msg)))
889+
defer stopFn()
890+
880891
cs.mtx.Lock()
881892
defer cs.mtx.Unlock()
882893
var (

0 commit comments

Comments
 (0)