Skip to content

Commit 3d5e5c7

Browse files
CRE-4405: Add gateway request tracing; use default HTTP transport (#22559)
CRE-4405: Add request tracing, add default transport to outgoing HTTP requests
1 parent fe3a328 commit 3d5e5c7

19 files changed

Lines changed: 280 additions & 20 deletions

File tree

core/cmd/shell.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ import (
5252
"github.com/smartcontractkit/chainlink/v2/core/services/ccv/ccvcommon"
5353
"github.com/smartcontractkit/chainlink/v2/core/services/chainlink"
5454
"github.com/smartcontractkit/chainlink/v2/core/services/cre"
55+
gatewaynetwork "github.com/smartcontractkit/chainlink/v2/core/services/gateway/network"
5556
"github.com/smartcontractkit/chainlink/v2/core/services/keystore"
5657
"github.com/smartcontractkit/chainlink/v2/core/services/llo"
5758
ocr3beholderwrapper "github.com/smartcontractkit/chainlink/v2/core/services/ocr3/beholderwrapper"
@@ -80,6 +81,7 @@ func metricViews() []sdkmetric.View {
8081
ccvcommon.MetricViews(),
8182
ocr3beholderwrapper.MetricViews(),
8283
ocr3_1beholderwrapper.MetricViews(),
84+
gatewaynetwork.HTTPClientMetricViews(),
8385
)
8486
}
8587

core/scripts/go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,9 @@ require (
639639

640640
replace github.com/fbsobreira/gotron-sdk => github.com/smartcontractkit/chainlink-tron/relayer/gotron-sdk v0.0.5-0.20260218133534-cbd44da2856b
641641

642+
// to be removed after https://github.com/doyensec/safeurl/pull/11 is merged
643+
replace github.com/doyensec/safeurl => github.com/cedric-cordenier/safeurl v0.0.0-20260525105509-613a4d94ca55
644+
642645
// moby/go-archive v0.2.0 removed deprecated aliases (archive.Gzip, etc.) that
643646
// docker/cli@v28.5.x still uses. docker/compose has not migrated to docker/cli v29
644647
// yet, so we pin to v0.1.0 which has both the old aliases and the new compression API.

core/scripts/go.sum

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/services/gateway/network/httpclient.go

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"io"
99
"maps"
1010
"net/http"
11+
"net/http/httptrace"
1112
"slices"
1213
"strings"
1314

@@ -169,9 +170,10 @@ func responseHeadersFromNetHeader(h http.Header) (map[string]string, map[string]
169170
}
170171

171172
type httpClient struct {
172-
client *safeurl.WrappedClient
173-
config HTTPClientConfig
174-
lggr logger.Logger
173+
client *safeurl.WrappedClient
174+
config HTTPClientConfig
175+
lggr logger.Logger
176+
metrics *httpClientMetrics
175177
}
176178

177179
// NewHTTPClient creates a new NewHTTPClient
@@ -185,6 +187,12 @@ func NewHTTPClient(config HTTPClientConfig, lggr logger.Logger) (HTTPClient, err
185187
config.AllowedPorts = append(config.AllowedPorts, expanded...)
186188
}
187189
config.ApplyDefaults()
190+
191+
defaultTransport, ok := http.DefaultTransport.(*http.Transport)
192+
if !ok {
193+
return nil, errors.New("could not coerce http.DefaultTransport to *http.Transport")
194+
}
195+
188196
safeConfig := safeurl.
189197
GetConfigBuilder().
190198
SetAllowedIPs(config.AllowedIPs...).
@@ -194,12 +202,19 @@ func NewHTTPClient(config HTTPClientConfig, lggr logger.Logger) (HTTPClient, err
194202
SetBlockedIPs(config.BlockedIPs...).
195203
SetBlockedIPsCIDR(config.BlockedIPsCIDR...).
196204
SetCheckRedirect(disableRedirects).
205+
SetTransport(defaultTransport).
197206
Build()
198207

208+
metrics, err := newHTTPClientMetrics()
209+
if err != nil {
210+
return nil, err
211+
}
212+
199213
return &httpClient{
200-
config: config,
201-
client: safeurl.Client(safeConfig),
202-
lggr: lggr,
214+
config: config,
215+
client: safeurl.Client(safeConfig),
216+
lggr: lggr,
217+
metrics: metrics,
203218
}, nil
204219
}
205220

@@ -297,8 +312,13 @@ func (c *httpClient) Send(ctx context.Context, req HTTPRequest) (*HTTPResponse,
297312
timeoutCtx, cancel := context.WithTimeout(ctx, to)
298313
defer cancel()
299314

315+
requestStart := time.Now()
316+
trace, traceState := newClientTrace(ctx, req.Method, requestStart, c.metrics)
317+
timeoutCtx = httptrace.WithClientTrace(timeoutCtx, trace)
318+
300319
r, err := http.NewRequestWithContext(timeoutCtx, req.Method, req.URL, bytes.NewBuffer(req.Body))
301320
if err != nil {
321+
c.metrics.recordTotal(ctx, req.Method, 0, false, false, time.Since(requestStart))
302322
return nil, err
303323
}
304324
for k, values := range requestToNetHeader(req) {
@@ -309,6 +329,7 @@ func (c *httpClient) Send(ctx context.Context, req HTTPRequest) (*HTTPResponse,
309329

310330
resp, err := c.client.Do(r)
311331
if err != nil {
332+
c.metrics.recordTotal(ctx, req.Method, 0, false, traceState.connReused.Load(), time.Since(requestStart))
312333
if isBlockedRequest(err) {
313334
c.lggr.Warnw("HTTP request blocked", "err", err)
314335
return nil, fmt.Errorf("%w: %w", ErrBlockedRequest, err)
@@ -324,10 +345,13 @@ func (c *httpClient) Send(ctx context.Context, req HTTPRequest) (*HTTPResponse,
324345
reader := http.MaxBytesReader(nil, resp.Body, int64(n))
325346
body, err := io.ReadAll(reader)
326347
if err != nil {
348+
c.metrics.recordTotal(ctx, req.Method, resp.StatusCode, false, traceState.connReused.Load(), time.Since(requestStart))
327349
c.lggr.Errorw("failed to read HTTP response body", "err", err)
328350
return nil, errors.Join(err, ErrHTTPRead)
329351
}
330352

353+
c.metrics.recordTotal(ctx, req.Method, resp.StatusCode, true, traceState.connReused.Load(), time.Since(requestStart))
354+
331355
headers, multiHeaders := responseHeadersFromNetHeader(resp.Header)
332356
c.lggr.Debugw("received HTTP response", "statusCode", resp.StatusCode)
333357
return &HTTPResponse{
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package network
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strconv"
7+
"time"
8+
9+
"github.com/prometheus/client_golang/prometheus"
10+
"go.opentelemetry.io/otel/attribute"
11+
"go.opentelemetry.io/otel/metric"
12+
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
13+
14+
"github.com/smartcontractkit/chainlink-common/pkg/beholder"
15+
)
16+
17+
// TracePhase identifies a phase of the HTTP client request lifecycle as
18+
// observed via net/http/httptrace.
19+
type TracePhase string
20+
21+
const (
22+
PhaseGetConn TracePhase = "get_conn"
23+
PhaseDNSLookup TracePhase = "dns_lookup"
24+
PhaseTCPConnect TracePhase = "tcp_connect"
25+
PhaseTLSHandshake TracePhase = "tls_handshake"
26+
PhaseWroteRequest TracePhase = "wrote_request"
27+
PhaseTimeToFirstByte TracePhase = "time_to_first_byte"
28+
PhaseTotal TracePhase = "total"
29+
)
30+
31+
type httpClientMetrics struct {
32+
phaseDuration metric.Int64Histogram
33+
}
34+
35+
func newHTTPClientMetrics() (*httpClientMetrics, error) {
36+
phaseDuration, err := beholder.GetMeter().Int64Histogram(
37+
"platform_gateway_http_client_phase_duration",
38+
metric.WithUnit("ms"),
39+
metric.WithDescription("HTTP client request phase duration observed via httptrace. The count of phase=total observations is the request count, partitioned by method, statusCode, success, and connectionReused. Success does not imply a 2xx status code"),
40+
)
41+
if err != nil {
42+
return nil, fmt.Errorf("failed to create platform_gateway_http_client_phase_duration histogram: %w", err)
43+
}
44+
45+
return &httpClientMetrics{
46+
phaseDuration: phaseDuration,
47+
}, nil
48+
}
49+
50+
func (m *httpClientMetrics) recordPhase(ctx context.Context, method string, phase TracePhase, d time.Duration) {
51+
m.phaseDuration.Record(ctx, d.Milliseconds(), metric.WithAttributes(
52+
attribute.String("method", method),
53+
attribute.String("phase", string(phase)),
54+
))
55+
}
56+
57+
// recordTotal records the total request lifetime with the result attributes.
58+
// The histogram's count for phase=total doubles as the request counter.
59+
// Success means the request returned a response, it does not imply a successful 2xx statusCode.
60+
func (m *httpClientMetrics) recordTotal(ctx context.Context, method string, statusCode int, success, connReused bool, d time.Duration) {
61+
m.phaseDuration.Record(ctx, d.Milliseconds(), metric.WithAttributes(
62+
attribute.String("method", method),
63+
attribute.String("phase", string(PhaseTotal)),
64+
attribute.String("statusCode", strconv.Itoa(statusCode)),
65+
attribute.String("success", strconv.FormatBool(success)),
66+
attribute.String("connectionReused", strconv.FormatBool(connReused)),
67+
))
68+
}
69+
70+
// HTTPClientMetricViews returns histogram bucket definitions for the HTTP client trace metrics.
71+
// Due to the OTEL specification, all histogram buckets must be defined when the beholder client is created.
72+
func HTTPClientMetricViews() []sdkmetric.View {
73+
return []sdkmetric.View{
74+
sdkmetric.NewView(
75+
sdkmetric.Instrument{Name: "platform_gateway_http_client_phase_duration"},
76+
sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{
77+
// 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768 (ms)
78+
Boundaries: prometheus.ExponentialBuckets(1, 2, 16),
79+
}},
80+
),
81+
}
82+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package network
2+
3+
import (
4+
"context"
5+
"testing"
6+
"time"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestNewHTTPClientMetrics(t *testing.T) {
12+
m, err := newHTTPClientMetrics()
13+
require.NoError(t, err)
14+
require.NotNil(t, m)
15+
}
16+
17+
func TestHTTPClientMetrics_RecordPhase(t *testing.T) {
18+
m, err := newHTTPClientMetrics()
19+
require.NoError(t, err)
20+
21+
ctx := context.Background()
22+
for _, phase := range []TracePhase{
23+
PhaseGetConn,
24+
PhaseDNSLookup,
25+
PhaseTCPConnect,
26+
PhaseTLSHandshake,
27+
PhaseWroteRequest,
28+
PhaseTimeToFirstByte,
29+
PhaseTotal,
30+
} {
31+
m.recordPhase(ctx, "GET", phase, 25*time.Millisecond)
32+
}
33+
}
34+
35+
func TestHTTPClientMetrics_RecordTotal(t *testing.T) {
36+
m, err := newHTTPClientMetrics()
37+
require.NoError(t, err)
38+
39+
ctx := context.Background()
40+
m.recordTotal(ctx, "GET", 200, true, false, 25*time.Millisecond)
41+
m.recordTotal(ctx, "POST", 500, true, true, 100*time.Millisecond)
42+
m.recordTotal(ctx, "GET", 0, false, false, 5*time.Millisecond)
43+
}
44+
45+
func TestHTTPClientMetricViews(t *testing.T) {
46+
views := HTTPClientMetricViews()
47+
require.Len(t, views, 1)
48+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package network
2+
3+
import (
4+
"context"
5+
"crypto/tls"
6+
"net/http/httptrace"
7+
"sync/atomic"
8+
"time"
9+
)
10+
11+
// requestTrace holds out-of-band signals captured during a single HTTP request
12+
// so the caller can include them in the final request metric.
13+
type requestTrace struct {
14+
connReused atomic.Bool
15+
}
16+
17+
// newClientTrace returns an httptrace.ClientTrace that records per-phase
18+
// durations on m, along with a requestTrace exposing connection reuse.
19+
20+
// Metrics recorded from paired start/end callbacks (GetConn, DNS, TCP connect,
21+
// TLS handshake) are phase-local durations. WroteRequest and
22+
// GotFirstResponseByte record cumulative elapsed time since requestStart.
23+
//
24+
// httptrace may invoke callbacks concurrently so each phase keeps its own atomic start timestamp.
25+
func newClientTrace(ctx context.Context, method string, requestStart time.Time, m *httpClientMetrics) (*httptrace.ClientTrace, *requestTrace) {
26+
rt := &requestTrace{}
27+
28+
var (
29+
getConnStart atomic.Pointer[time.Time]
30+
dnsStart atomic.Pointer[time.Time]
31+
connectStart atomic.Pointer[time.Time]
32+
tlsStart atomic.Pointer[time.Time]
33+
)
34+
35+
storeNow := func(p *atomic.Pointer[time.Time]) {
36+
now := time.Now()
37+
p.Store(&now)
38+
}
39+
40+
recordPhase := func(start *atomic.Pointer[time.Time], phase TracePhase) {
41+
s := start.Load()
42+
if s == nil {
43+
return
44+
}
45+
m.recordPhase(ctx, method, phase, time.Since(*s))
46+
}
47+
48+
return &httptrace.ClientTrace{
49+
GetConn: func(string) {
50+
storeNow(&getConnStart)
51+
},
52+
GotConn: func(info httptrace.GotConnInfo) {
53+
recordPhase(&getConnStart, PhaseGetConn)
54+
if info.Reused {
55+
rt.connReused.Store(true)
56+
}
57+
},
58+
DNSStart: func(httptrace.DNSStartInfo) {
59+
storeNow(&dnsStart)
60+
},
61+
DNSDone: func(httptrace.DNSDoneInfo) {
62+
recordPhase(&dnsStart, PhaseDNSLookup)
63+
},
64+
ConnectStart: func(string, string) {
65+
storeNow(&connectStart)
66+
},
67+
ConnectDone: func(string, string, error) {
68+
recordPhase(&connectStart, PhaseTCPConnect)
69+
},
70+
TLSHandshakeStart: func() {
71+
storeNow(&tlsStart)
72+
},
73+
TLSHandshakeDone: func(tls.ConnectionState, error) {
74+
recordPhase(&tlsStart, PhaseTLSHandshake)
75+
},
76+
WroteRequest: func(httptrace.WroteRequestInfo) {
77+
m.recordPhase(ctx, method, PhaseWroteRequest, time.Since(requestStart))
78+
},
79+
GotFirstResponseByte: func() {
80+
m.recordPhase(ctx, method, PhaseTimeToFirstByte, time.Since(requestStart))
81+
},
82+
}, rt
83+
}

deployment/go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,3 +550,6 @@ require (
550550

551551
// gotron-sdk is not longer maintained
552552
replace github.com/fbsobreira/gotron-sdk => github.com/smartcontractkit/chainlink-tron/relayer/gotron-sdk v0.0.5-0.20260218133534-cbd44da2856b
553+
554+
// to be removed after https://github.com/doyensec/safeurl/pull/11 is merged
555+
replace github.com/doyensec/safeurl => github.com/cedric-cordenier/safeurl v0.0.0-20260525105509-613a4d94ca55

deployment/go.sum

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ require (
428428

429429
replace github.com/fbsobreira/gotron-sdk => github.com/smartcontractkit/chainlink-tron/relayer/gotron-sdk v0.0.5-0.20260218133534-cbd44da2856b
430430

431+
// to be removed after https://github.com/doyensec/safeurl/pull/11 is merged
432+
replace github.com/doyensec/safeurl => github.com/cedric-cordenier/safeurl v0.0.0-20260525105509-613a4d94ca55
433+
431434
tool github.com/smartcontractkit/chainlink-common/pkg/loop/cmd/loopinstall
432435

433436
tool github.com/smartcontractkit/chainlink-common/script/cmd/dependabot

0 commit comments

Comments
 (0)