Skip to content

Commit 7a913aa

Browse files
author
Larry Li
committed
update
1 parent 2925e01 commit 7a913aa

4 files changed

Lines changed: 112 additions & 188 deletions

File tree

docs/rpc_observability.md

Lines changed: 0 additions & 54 deletions
This file was deleted.

metrics/client.go

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
11
package metrics
22

33
import (
4+
"context"
5+
"fmt"
6+
"strconv"
47
"time"
58

69
"github.com/prometheus/client_golang/prometheus"
710
"github.com/prometheus/client_golang/prometheus/promauto"
11+
"go.opentelemetry.io/otel/attribute"
12+
"go.opentelemetry.io/otel/metric"
13+
14+
"github.com/smartcontractkit/chainlink-common/pkg/beholder"
815
)
916

1017
var (
1118
RPCCallLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
1219
Name: "rpc_call_latency",
13-
Help: "The duration of an RPC call in milliseconds",
20+
Help: "The duration of an RPC call in seconds",
1421
Buckets: []float64{
1522
float64(50 * time.Millisecond),
1623
float64(100 * time.Millisecond),
@@ -22,4 +29,100 @@ var (
2229
float64(8 * time.Second),
2330
},
2431
}, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "success", "rpcCallName"})
32+
33+
RPCCallErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
34+
Name: "rpc_call_errors_total",
35+
Help: "The total number of failed RPC calls",
36+
}, []string{"chainFamily", "chainID", "rpcUrl", "isSendOnly", "rpcCallName"})
37+
)
38+
39+
const (
40+
rpcCallLatencyBeholder = "rpc_call_latency"
41+
rpcCallErrorsTotalBeholder = "rpc_call_errors_total"
2542
)
43+
44+
// RPCClientMetrics records RPC latency and errors to Prometheus and Beholder (same pattern as multinode metrics).
45+
type RPCClientMetrics interface {
46+
// RecordRequest records latency for an RPC call (observed in seconds for Prometheus).
47+
// If err is non-nil, increments rpc_call_errors_total.
48+
RecordRequest(ctx context.Context, callName string, latency time.Duration, err error)
49+
}
50+
51+
var _ RPCClientMetrics = (*rpcClientMetrics)(nil)
52+
53+
type rpcClientMetrics struct {
54+
chainFamily string
55+
chainID string
56+
rpcURL string
57+
isSendOnly bool
58+
latency metric.Float64Histogram
59+
errorsTotal metric.Int64Counter
60+
}
61+
62+
// RPCClientMetricsConfig holds fixed labels for an RPC client instance.
63+
type RPCClientMetricsConfig struct {
64+
ChainFamily string
65+
ChainID string
66+
RPCURL string
67+
IsSendOnly bool
68+
}
69+
70+
// NewRPCClientMetrics creates RPC client metrics that publish to Prometheus and Beholder.
71+
func NewRPCClientMetrics(cfg RPCClientMetricsConfig) (RPCClientMetrics, error) {
72+
latency, err := beholder.GetMeter().Float64Histogram(rpcCallLatencyBeholder)
73+
if err != nil {
74+
return nil, fmt.Errorf("failed to register RPC call latency metric: %w", err)
75+
}
76+
errorsTotal, err := beholder.GetMeter().Int64Counter(rpcCallErrorsTotalBeholder)
77+
if err != nil {
78+
return nil, fmt.Errorf("failed to register RPC call errors metric: %w", err)
79+
}
80+
return &rpcClientMetrics{
81+
chainFamily: cfg.ChainFamily,
82+
chainID: cfg.ChainID,
83+
rpcURL: cfg.RPCURL,
84+
isSendOnly: cfg.IsSendOnly,
85+
latency: latency,
86+
errorsTotal: errorsTotal,
87+
}, nil
88+
}
89+
90+
func (m *rpcClientMetrics) RecordRequest(ctx context.Context, callName string, latency time.Duration, err error) {
91+
successStr := "true"
92+
if err != nil {
93+
successStr = "false"
94+
}
95+
sendStr := strconv.FormatBool(m.isSendOnly)
96+
sec := latency.Seconds()
97+
98+
RPCCallLatency.WithLabelValues(m.chainFamily, m.chainID, m.rpcURL, sendStr, successStr, callName).Observe(sec)
99+
100+
latAttrs := metric.WithAttributes(
101+
attribute.String("chainFamily", m.chainFamily),
102+
attribute.String("chainID", m.chainID),
103+
attribute.String("rpcUrl", m.rpcURL),
104+
attribute.String("isSendOnly", sendStr),
105+
attribute.String("success", successStr),
106+
attribute.String("rpcCallName", callName),
107+
)
108+
m.latency.Record(ctx, sec, latAttrs)
109+
110+
if err != nil {
111+
RPCCallErrorsTotal.WithLabelValues(m.chainFamily, m.chainID, m.rpcURL, sendStr, callName).Inc()
112+
errAttrs := metric.WithAttributes(
113+
attribute.String("chainFamily", m.chainFamily),
114+
attribute.String("chainID", m.chainID),
115+
attribute.String("rpcUrl", m.rpcURL),
116+
attribute.String("isSendOnly", sendStr),
117+
attribute.String("rpcCallName", callName),
118+
)
119+
m.errorsTotal.Add(ctx, 1, errAttrs)
120+
}
121+
}
122+
123+
// NoopRPCClientMetrics is a no-op implementation for when metrics are disabled.
124+
type NoopRPCClientMetrics struct{}
125+
126+
func (NoopRPCClientMetrics) RecordRequest(context.Context, string, time.Duration, error) {}
127+
128+
var _ RPCClientMetrics = NoopRPCClientMetrics{}
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,29 @@ import (
44
"context"
55
"errors"
66
"testing"
7+
"time"
78

89
"github.com/stretchr/testify/require"
910
)
1011

1112
func TestNewRPCClientMetrics(t *testing.T) {
1213
m, err := NewRPCClientMetrics(RPCClientMetricsConfig{
13-
Env: "staging",
14-
Network: "ethereum",
14+
ChainFamily: "evm",
1515
ChainID: "1",
16-
RPCProvider: "primary",
16+
RPCURL: "http://localhost:8545",
17+
IsSendOnly: false,
1718
})
1819
require.NoError(t, err)
1920
require.NotNil(t, m)
2021

2122
ctx := context.Background()
22-
m.RecordRequest(ctx, "latest_block", 100.0, nil)
23-
m.RecordRequest(ctx, "latest_block", 50.0, errors.New("rpc error"))
23+
m.RecordRequest(ctx, "latest_block", 100*time.Millisecond, nil)
24+
m.RecordRequest(ctx, "latest_block", 50*time.Millisecond, errors.New("rpc error"))
2425
}
2526

2627
func TestNoopRPCClientMetrics_RecordRequest(t *testing.T) {
2728
var m NoopRPCClientMetrics
2829
ctx := context.Background()
29-
m.RecordRequest(ctx, "latest_block", 100.0, nil)
30-
m.RecordRequest(ctx, "latest_block", 50.0, errors.New("rpc error"))
31-
// Noop should not panic
30+
m.RecordRequest(ctx, "latest_block", 100*time.Millisecond, nil)
31+
m.RecordRequest(ctx, "latest_block", 50*time.Millisecond, errors.New("rpc error"))
3232
}

metrics/rpc_client.go

Lines changed: 0 additions & 125 deletions
This file was deleted.

0 commit comments

Comments
 (0)