Skip to content

Commit e2685f8

Browse files
committed
Add Beholder Metrics
1 parent 3d8034a commit e2685f8

6 files changed

Lines changed: 347 additions & 75 deletions

File tree

metrics/logpoller.go

Lines changed: 86 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@ package metrics
22

33
import (
44
"context"
5+
"fmt"
56
"time"
67

7-
"go.opentelemetry.io/otel/metric"
8-
98
"github.com/prometheus/client_golang/prometheus"
109
"github.com/prometheus/client_golang/prometheus/promauto"
10+
"go.opentelemetry.io/otel/attribute"
11+
"go.opentelemetry.io/otel/metric"
12+
13+
"github.com/smartcontractkit/chainlink-common/pkg/beholder"
1114
)
1215

1316
type QueryType string
@@ -41,37 +44,102 @@ var (
4144
float64(2 * time.Second),
4245
float64(5 * time.Second),
4346
}
44-
LpQueryDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
47+
promLpQueryDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
4548
Name: "log_poller_query_duration",
4649
Help: "Measures duration of Log Poller's queries fetching logs",
4750
Buckets: sqlLatencyBuckets,
4851
}, []string{"chainFamily", "chainID", "query", "type"})
49-
LpQueryDataSets = promauto.NewGaugeVec(prometheus.GaugeOpts{
52+
promLpQueryDataSets = promauto.NewGaugeVec(prometheus.GaugeOpts{
5053
Name: "log_poller_query_dataset_size",
5154
Help: "Measures size of the datasets returned by Log Poller's queries",
5255
}, []string{"chainFamily", "chainID", "query", "type"})
53-
LpLogsInserted = promauto.NewCounterVec(prometheus.CounterOpts{
56+
promLpLogsInserted = promauto.NewCounterVec(prometheus.CounterOpts{
5457
Name: "log_poller_logs_inserted",
5558
Help: "Counter to track number of logs inserted by Log Poller",
5659
}, []string{"chainFamily", "chainID"})
57-
LpBlocksInserted = promauto.NewCounterVec(prometheus.CounterOpts{
60+
promLpBlocksInserted = promauto.NewCounterVec(prometheus.CounterOpts{
5861
Name: "log_poller_blocks_inserted",
5962
Help: "Counter to track number of blocks inserted by Log Poller",
6063
}, []string{"chainFamily", "chainID"})
6164
)
6265

63-
type GenericLogPollerORMMetrics interface {
64-
RecordQueryDuration(ctx context.Context, query QueryType, duration float64)
65-
RecordQueryDataSetsSize(ctx context.Context, query QueryType, size int)
66-
IncrementLogsInserted(ctx context.Context, numLogs int)
67-
IncrementBlocksInserted(ctx context.Context, numBlocks int)
66+
type GenericLogPollerMetrics interface {
67+
RecordQueryDuration(ctx context.Context, queryName string, queryType QueryType, duration float64)
68+
RecordQueryDatasetSize(ctx context.Context, queryName string, queryType QueryType, size int64)
69+
IncrementLogsInserted(ctx context.Context, numLogs int64)
70+
IncrementBlocksInserted(ctx context.Context, numBlocks int64)
71+
}
72+
73+
var _ GenericLogPollerMetrics = &logPollerMetrics{}
74+
75+
type logPollerMetrics struct {
76+
chainID string
77+
chainFamily string
78+
queryDuration metric.Float64Histogram
79+
queryDatasetsSize metric.Int64Gauge
80+
logsInserted metric.Int64Counter
81+
blocksInserted metric.Int64Counter
82+
}
83+
84+
func NewGenericLogPollerMetrics(chainID string, chainFamily string) (GenericLogPollerMetrics, error) {
85+
queryDuration, err := beholder.GetMeter().Float64Histogram("log_poller_query_duration")
86+
if err != nil {
87+
return nil, fmt.Errorf("failed to register logpoller query duration metric: %w", err)
88+
}
89+
90+
queryDatasetSize, err := beholder.GetMeter().Int64Gauge("log_poller_query_dataset_size")
91+
if err != nil {
92+
return nil, fmt.Errorf("failed to register query dataset size metric: %w", err)
93+
}
94+
95+
logsInserted, err := beholder.GetMeter().Int64Counter("log_poller_logs_inserted")
96+
if err != nil {
97+
return nil, fmt.Errorf("failed to register logs inserted metric: %w", err)
98+
}
99+
100+
blocksInserted, err := beholder.GetMeter().Int64Counter("log_poller_blocks_inserted")
101+
if err != nil {
102+
return nil, fmt.Errorf("failed to register blocks inserted metric: %w", err)
103+
}
104+
105+
return &logPollerMetrics{
106+
chainID: chainID,
107+
chainFamily: chainFamily,
108+
queryDuration: queryDuration,
109+
queryDatasetsSize: queryDatasetSize,
110+
logsInserted: logsInserted,
111+
blocksInserted: blocksInserted,
112+
}, nil
113+
}
114+
115+
func (m *logPollerMetrics) RecordQueryDuration(ctx context.Context, queryName string, queryType QueryType, duration float64) {
116+
promLpQueryDuration.WithLabelValues(m.chainFamily, m.chainID, queryName, string(queryType)).Observe(duration)
117+
m.queryDuration.Record(ctx, duration, metric.WithAttributes(
118+
attribute.String("chainFamily", m.chainFamily),
119+
attribute.String("chainID", m.chainID),
120+
attribute.String("query", queryName),
121+
attribute.String("type", string(queryType))))
122+
}
123+
124+
func (m *logPollerMetrics) RecordQueryDatasetSize(ctx context.Context, queryName string, queryType QueryType, size int64) {
125+
promLpQueryDataSets.WithLabelValues(m.chainFamily, m.chainID, queryName, string(queryType)).Add(float64(size))
126+
m.queryDatasetsSize.Record(ctx, size, metric.WithAttributes(
127+
attribute.String("chainFamily", m.chainFamily),
128+
attribute.String("chainID", m.chainID),
129+
attribute.String("query", queryName),
130+
attribute.String("type", string(queryType))))
131+
}
132+
133+
func (m *logPollerMetrics) IncrementLogsInserted(ctx context.Context, numLogs int64) {
134+
promLpLogsInserted.WithLabelValues(m.chainFamily, m.chainID).Add(float64(numLogs))
135+
m.logsInserted.Add(ctx, numLogs, metric.WithAttributes(
136+
attribute.String("chainFamily", m.chainFamily),
137+
attribute.String("chainID", m.chainID)))
68138
}
69139

70-
type logPollerORMMetrics struct {
71-
chainID string
72-
chainFamily string
73-
queryDuration metric.Float64Histogram
74-
queryDataSets metric.Int64Gauge
75-
logsInserted metric.Int64Counter
76-
blocksInserted metric.Int64Counter
140+
func (m *logPollerMetrics) IncrementBlocksInserted(ctx context.Context, numBlocks int64) {
141+
promLpBlocksInserted.WithLabelValues(m.chainFamily, m.chainID).Add(float64(numBlocks))
142+
m.blocksInserted.Add(ctx, numBlocks, metric.WithAttributes(
143+
attribute.String("chainFamily", m.chainFamily),
144+
attribute.String("chainID", m.chainID)))
77145
}

metrics/multinode.go

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
package metrics
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"github.com/prometheus/client_golang/prometheus"
7+
"github.com/prometheus/client_golang/prometheus/promauto"
8+
"github.com/smartcontractkit/chainlink-common/pkg/beholder"
9+
"go.opentelemetry.io/otel/attribute"
10+
"go.opentelemetry.io/otel/metric"
11+
)
12+
13+
var (
14+
// Node States
15+
promMultiNodeRPCNodeStates = promauto.NewGaugeVec(prometheus.GaugeOpts{
16+
Name: "multi_node_states",
17+
Help: "The number of RPC nodes currently in the given state for the given chain",
18+
}, []string{"network", "chainId", "state"})
19+
20+
// Node Verification
21+
promPoolRPCNodeVerifies = promauto.NewCounterVec(prometheus.CounterOpts{
22+
Name: "pool_rpc_node_verifies",
23+
Help: "The total number of chain ID verifications for the given RPC node",
24+
}, []string{"network", "chainID", "nodeName"})
25+
promPoolRPCNodeVerifiesFailed = promauto.NewCounterVec(prometheus.CounterOpts{
26+
Name: "pool_rpc_node_verifies_failed",
27+
Help: "The total number of failed chain ID verifications for the given RPC node",
28+
}, []string{"network", "chainID", "nodeName"})
29+
promPoolRPCNodeVerifiesSuccess = promauto.NewCounterVec(prometheus.CounterOpts{
30+
Name: "pool_rpc_node_verifies_success",
31+
Help: "The total number of successful chain ID verifications for the given RPC node",
32+
}, []string{"network", "chainID", "nodeName"})
33+
34+
// TODO: Should these all have network as well?
35+
// Node State Transitions
36+
promPoolRPCNodeTransitionsToAlive = promauto.NewCounterVec(prometheus.CounterOpts{
37+
Name: "pool_rpc_node_num_transitions_to_alive",
38+
Help: "Total number of times node has transitioned to Alive",
39+
}, []string{"chainID", "nodeName"})
40+
promPoolRPCNodeTransitionsToInSync = promauto.NewCounterVec(prometheus.CounterOpts{
41+
Name: "pool_rpc_node_num_transitions_to_in_sync",
42+
Help: "Total number of times node has transitioned from OutOfSync to Alive",
43+
}, []string{"chainID", "nodeName"})
44+
promPoolRPCNodeTransitionsToOutOfSync = promauto.NewCounterVec(prometheus.CounterOpts{
45+
Name: "pool_rpc_node_num_transitions_to_out_of_sync",
46+
Help: "Total number of times node has transitioned to OutOfSync",
47+
}, []string{"chainID", "nodeName"})
48+
promPoolRPCNodeTransitionsToUnreachable = promauto.NewCounterVec(prometheus.CounterOpts{
49+
Name: "pool_rpc_node_num_transitions_to_unreachable",
50+
Help: "Total number of times node has transitioned to Unreachable",
51+
}, []string{"chainID", "nodeName"})
52+
promPoolRPCNodeTransitionsToInvalidChainID = promauto.NewCounterVec(prometheus.CounterOpts{
53+
Name: "pool_rpc_node_num_transitions_to_invalid_chain_id",
54+
Help: "Total number of times node has transitioned to InvalidChainID",
55+
}, []string{"chainID", "nodeName"})
56+
promPoolRPCNodeTransitionsToUnusable = promauto.NewCounterVec(prometheus.CounterOpts{
57+
Name: "pool_rpc_node_num_transitions_to_unusable",
58+
Help: "Total number of times node has transitioned to Unusable",
59+
}, []string{"chainID", "nodeName"})
60+
promPoolRPCNodeTransitionsToSyncing = promauto.NewCounterVec(prometheus.CounterOpts{
61+
Name: "pool_rpc_node_num_transitions_to_syncing",
62+
Help: "Total number of times node has transitioned to Syncing",
63+
}, []string{"chainID", "nodeName"})
64+
)
65+
66+
type GenericMultiNodeMetrics interface {
67+
RecordNodeStates(ctx context.Context, state string, count int64)
68+
IncrementNodeVerifies(ctx context.Context, nodeName string)
69+
IncrementNodeVerifiesFailed(ctx context.Context, nodeName string)
70+
IncrementNodeVerifiesSuccess(ctx context.Context, nodeName string)
71+
IncrementNodeTransitionsToAlive(ctx context.Context, nodeName string)
72+
IncrementNodeTransitionsToInSync(ctx context.Context, nodeName string)
73+
IncrementNodeTransitionsToOutOfSync(ctx context.Context, nodeName string)
74+
IncrementNodeTransitionsToUnreachable(ctx context.Context, nodeName string)
75+
IncrementNodeTransitionsToInvalidChainID(ctx context.Context, nodeName string)
76+
IncrementNodeTransitionsToUnusable(ctx context.Context, nodeName string)
77+
IncrementNodeTransitionsToSyncing(ctx context.Context, nodeName string)
78+
}
79+
80+
var _ GenericMultiNodeMetrics = &multiNodeMetrics{}
81+
82+
type multiNodeMetrics struct {
83+
network string
84+
chainID string
85+
nodeStates metric.Int64Gauge
86+
nodeVerifies metric.Int64Counter
87+
nodeVerifiesFailed metric.Int64Counter
88+
nodeVerifiesSuccess metric.Int64Counter
89+
nodeTransitionsToAlive metric.Int64Counter
90+
nodeTransitionsToInSync metric.Int64Counter
91+
nodeTransitionsToOutOfSync metric.Int64Counter
92+
nodeTransitionsToUnreachable metric.Int64Counter
93+
nodeTransitionsToInvalidChainID metric.Int64Counter
94+
nodeTransitionsToUnusable metric.Int64Counter
95+
nodeTransitionsToSyncing metric.Int64Counter
96+
}
97+
98+
func NewGenericMultiNodeMetrics(network string, chainID string) (GenericMultiNodeMetrics, error) {
99+
nodeStates, err := beholder.GetMeter().Int64Gauge("multi_node_states")
100+
if err != nil {
101+
return nil, fmt.Errorf("failed to register multinode states metric: %w", err)
102+
}
103+
104+
nodeVerifies, err := beholder.GetMeter().Int64Counter("pool_rpc_node_verifies")
105+
if err != nil {
106+
return nil, fmt.Errorf("failed to register node verifies metric: %w", err)
107+
}
108+
109+
nodeVerifiesFailed, err := beholder.GetMeter().Int64Counter("pool_rpc_node_verifies_failed")
110+
if err != nil {
111+
return nil, fmt.Errorf("failed to register node verifies failed metric: %w", err)
112+
}
113+
114+
nodeVerifiesSuccess, err := beholder.GetMeter().Int64Counter("pool_rpc_node_verifies_success")
115+
if err != nil {
116+
return nil, fmt.Errorf("failed to register node verifies success metric: %w", err)
117+
}
118+
119+
nodeTransitionsToAlive, err := beholder.GetMeter().Int64Counter("pool_rpc_node_num_transitions_to_alive")
120+
if err != nil {
121+
return nil, fmt.Errorf("failed to register node transitions to alive metric: %w", err)
122+
}
123+
124+
nodeTransitionsToInSync, err := beholder.GetMeter().Int64Counter("pool_rpc_node_num_transitions_to_in_sync")
125+
if err != nil {
126+
return nil, fmt.Errorf("failed to register node transitions to in sync metric: %w", err)
127+
}
128+
129+
nodeTransitionsToOutOfSync, err := beholder.GetMeter().Int64Counter("pool_rpc_node_num_transitions_to_out_of_sync")
130+
if err != nil {
131+
return nil, fmt.Errorf("failed to register node transitions to out of sync metric: %w", err)
132+
}
133+
134+
nodeTransitionsToUnreachable, err := beholder.GetMeter().Int64Counter("pool_rpc_node_num_transitions_to_unreachable")
135+
if err != nil {
136+
return nil, fmt.Errorf("failed to register node transitions to unreachable metric: %w", err)
137+
}
138+
139+
nodeTransitionsToInvalidChainID, err := beholder.GetMeter().Int64Counter("pool_rpc_node_num_transitions_to_invalid_chain_id")
140+
if err != nil {
141+
return nil, fmt.Errorf("failed to register node transitions to invalid chain id metric: %w", err)
142+
}
143+
144+
nodeTransitionsToUnusable, err := beholder.GetMeter().Int64Counter("pool_rpc_node_num_transitions_to_unusable")
145+
if err != nil {
146+
return nil, fmt.Errorf("failed to register node transitions to unusable metric: %w", err)
147+
}
148+
149+
nodeTransitionsToSyncing, err := beholder.GetMeter().Int64Counter("pool_rpc_node_num_transitions_to_syncing")
150+
if err != nil {
151+
return nil, fmt.Errorf("failed to register node transitions to syncing metric: %w", err)
152+
}
153+
154+
return &multiNodeMetrics{
155+
network: network,
156+
chainID: chainID,
157+
nodeStates: nodeStates,
158+
nodeVerifies: nodeVerifies,
159+
nodeVerifiesFailed: nodeVerifiesFailed,
160+
nodeVerifiesSuccess: nodeVerifiesSuccess,
161+
nodeTransitionsToAlive: nodeTransitionsToAlive,
162+
nodeTransitionsToInSync: nodeTransitionsToInSync,
163+
nodeTransitionsToOutOfSync: nodeTransitionsToOutOfSync,
164+
nodeTransitionsToUnreachable: nodeTransitionsToUnreachable,
165+
nodeTransitionsToInvalidChainID: nodeTransitionsToInvalidChainID,
166+
nodeTransitionsToUnusable: nodeTransitionsToUnusable,
167+
nodeTransitionsToSyncing: nodeTransitionsToSyncing,
168+
}, nil
169+
}
170+
171+
func (m *multiNodeMetrics) RecordNodeStates(ctx context.Context, state string, count int64) {
172+
promMultiNodeRPCNodeStates.WithLabelValues(m.network, m.chainID, state).Set(float64(count))
173+
m.nodeStates.Record(ctx, count, metric.WithAttributes(
174+
attribute.String("network", m.network),
175+
attribute.String("chainID", m.chainID),
176+
attribute.String("state", state)))
177+
}
178+
179+
func (m *multiNodeMetrics) IncrementNodeVerifies(ctx context.Context, nodeName string) {
180+
promPoolRPCNodeVerifies.WithLabelValues(m.network, m.chainID, nodeName).Inc()
181+
m.nodeVerifies.Add(ctx, 1, metric.WithAttributes(
182+
attribute.String("network", m.network),
183+
attribute.String("chainID", m.chainID),
184+
attribute.String("nodeName", nodeName)))
185+
}
186+
187+
func (m *multiNodeMetrics) IncrementNodeVerifiesFailed(ctx context.Context, nodeName string) {
188+
promPoolRPCNodeVerifiesFailed.WithLabelValues(m.network, m.chainID, nodeName).Inc()
189+
m.nodeVerifiesFailed.Add(ctx, 1, metric.WithAttributes(
190+
attribute.String("network", m.network),
191+
attribute.String("chainID", m.chainID),
192+
attribute.String("nodeName", nodeName)))
193+
}
194+
195+
func (m *multiNodeMetrics) IncrementNodeVerifiesSuccess(ctx context.Context, nodeName string) {
196+
promPoolRPCNodeVerifiesSuccess.WithLabelValues(m.network, m.chainID, nodeName).Inc()
197+
m.nodeVerifiesSuccess.Add(ctx, 1, metric.WithAttributes(
198+
attribute.String("network", m.network),
199+
attribute.String("chainID", m.chainID),
200+
attribute.String("nodeName", nodeName)))
201+
}
202+
203+
func (m *multiNodeMetrics) IncrementNodeTransitionsToAlive(ctx context.Context, nodeName string) {
204+
promPoolRPCNodeTransitionsToAlive.WithLabelValues(m.network, m.chainID, nodeName).Inc()
205+
m.nodeTransitionsToAlive.Add(ctx, 1, metric.WithAttributes(
206+
attribute.String("network", m.network),
207+
attribute.String("chainID", m.chainID),
208+
attribute.String("nodeName", nodeName)))
209+
}
210+
211+
func (m *multiNodeMetrics) IncrementNodeTransitionsToInSync(ctx context.Context, nodeName string) {
212+
promPoolRPCNodeTransitionsToInSync.WithLabelValues(m.network, m.chainID, nodeName).Inc()
213+
m.nodeTransitionsToInSync.Add(ctx, 1, metric.WithAttributes(
214+
attribute.String("network", m.network),
215+
attribute.String("chainID", m.chainID),
216+
attribute.String("nodeName", nodeName)))
217+
}
218+
219+
func (m *multiNodeMetrics) IncrementNodeTransitionsToOutOfSync(ctx context.Context, nodeName string) {
220+
promPoolRPCNodeTransitionsToOutOfSync.WithLabelValues(m.network, m.chainID, nodeName).Inc()
221+
m.nodeTransitionsToOutOfSync.Add(ctx, 1, metric.WithAttributes(
222+
attribute.String("network", m.network),
223+
attribute.String("chainID", m.chainID),
224+
attribute.String("nodeName", nodeName)))
225+
}
226+
227+
func (m *multiNodeMetrics) IncrementNodeTransitionsToUnreachable(ctx context.Context, nodeName string) {
228+
promPoolRPCNodeTransitionsToUnreachable.WithLabelValues(m.network, m.chainID, nodeName).Inc()
229+
m.nodeTransitionsToUnreachable.Add(ctx, 1, metric.WithAttributes(
230+
attribute.String("network", m.network),
231+
attribute.String("chainID", m.chainID),
232+
attribute.String("nodeName", nodeName)))
233+
}
234+
235+
func (m *multiNodeMetrics) IncrementNodeTransitionsToInvalidChainID(ctx context.Context, nodeName string) {
236+
promPoolRPCNodeTransitionsToInvalidChainID.WithLabelValues(m.network, m.chainID, nodeName).Inc()
237+
m.nodeTransitionsToInvalidChainID.Add(ctx, 1, metric.WithAttributes(
238+
attribute.String("network", m.network),
239+
attribute.String("chainID", m.chainID),
240+
attribute.String("nodeName", nodeName)))
241+
}
242+
243+
func (m *multiNodeMetrics) IncrementNodeTransitionsToUnusable(ctx context.Context, nodeName string) {
244+
promPoolRPCNodeTransitionsToUnusable.WithLabelValues(m.network, m.chainID, nodeName).Inc()
245+
m.nodeTransitionsToUnusable.Add(ctx, 1, metric.WithAttributes(
246+
attribute.String("network", m.network),
247+
attribute.String("chainID", m.chainID),
248+
attribute.String("nodeName", nodeName)))
249+
}
250+
251+
func (m *multiNodeMetrics) IncrementNodeTransitionsToSyncing(ctx context.Context, nodeName string) {
252+
promPoolRPCNodeTransitionsToSyncing.WithLabelValues(m.network, m.chainID, nodeName).Inc()
253+
m.nodeTransitionsToSyncing.Add(ctx, 1, metric.WithAttributes(
254+
attribute.String("network", m.network),
255+
attribute.String("chainID", m.chainID),
256+
attribute.String("nodeName", nodeName)))
257+
}

0 commit comments

Comments
 (0)