Skip to content

Commit ad9cb32

Browse files
authored
Merge pull request #2329 from CortexFoundation/dev
p2p: fix dial metrics not picking up the right error
2 parents 4ff1266 + 87c1347 commit ad9cb32

3 files changed

Lines changed: 38 additions & 42 deletions

File tree

p2p/metrics.go

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ var (
4545
egressTrafficMeter = metrics.NewRegisteredMeter("p2p/egress", nil)
4646

4747
// general ingress/egress connection meters
48-
serveMeter metrics.Meter = metrics.NilMeter{}
49-
serveSuccessMeter metrics.Meter = metrics.NilMeter{}
50-
dialMeter metrics.Meter = metrics.NilMeter{}
51-
dialSuccessMeter metrics.Meter = metrics.NilMeter{}
52-
dialConnectionError metrics.Meter = metrics.NilMeter{}
48+
serveMeter = metrics.NewRegisteredMeter("p2p/serves", nil)
49+
serveSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success", nil)
50+
dialMeter = metrics.NewRegisteredMeter("p2p/dials", nil)
51+
dialSuccessMeter = metrics.NewRegisteredMeter("p2p/dials/success", nil)
52+
dialConnectionError = metrics.NewRegisteredMeter("p2p/dials/error/connection", nil) // dial timeout; no route to host; connection refused; network is unreachable
5353

5454
// count peers that stayed connected for at least 1 min
5555
serve1MinSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success/1min", nil)
@@ -61,49 +61,41 @@ var (
6161
dialSelf = metrics.NewRegisteredMeter("p2p/dials/error/self", nil)
6262
dialUselessPeer = metrics.NewRegisteredMeter("p2p/dials/error/useless", nil)
6363
dialUnexpectedIdentity = metrics.NewRegisteredMeter("p2p/dials/error/id/unexpected", nil)
64-
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil)
65-
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil)
66-
)
67-
68-
func init() {
69-
if !metrics.Enabled {
70-
return
71-
}
64+
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil) // EOF; connection reset during handshake; message too big; i/o timeout
65+
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil) // EOF
7266

73-
activePeerGauge = metrics.NewRegisteredGauge("p2p/peers", nil)
74-
activeInboundPeerGauge = metrics.NewRegisteredGauge("p2p/peers/inbound", nil)
75-
activeOutboundPeerGauge = metrics.NewRegisteredGauge("p2p/peers/outbound", nil)
76-
serveMeter = metrics.NewRegisteredMeter("p2p/serves", nil)
77-
serveSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success", nil)
78-
dialMeter = metrics.NewRegisteredMeter("p2p/dials", nil)
79-
dialSuccessMeter = metrics.NewRegisteredMeter("p2p/dials/success", nil)
80-
dialConnectionError = metrics.NewRegisteredMeter("p2p/dials/error/connection", nil)
81-
}
67+
// capture the rest of errors that are not handled by the above meters
68+
dialOtherError = metrics.NewRegisteredMeter("p2p/dials/error/other", nil)
69+
)
8270

83-
// markDialError matches errors that occur while setting up a dial connection
84-
// to the corresponding meter.
71+
// markDialError matches errors that occur while setting up a dial connection to the
72+
// corresponding meter. We don't maintain meters for evert possible error, just for
73+
// the most interesting ones.
8574
func markDialError(err error) {
8675
if !metrics.Enabled {
8776
return
8877
}
89-
if err2 := errors.Unwrap(err); err2 != nil {
90-
err = err2
91-
}
92-
switch err {
93-
case DiscTooManyPeers:
78+
79+
var reason DiscReason
80+
var handshakeErr *protoHandshakeError
81+
d := errors.As(err, &reason)
82+
switch {
83+
case d && reason == DiscTooManyPeers:
9484
dialTooManyPeers.Mark(1)
95-
case DiscAlreadyConnected:
85+
case d && reason == DiscAlreadyConnected:
9686
dialAlreadyConnected.Mark(1)
97-
case DiscSelf:
87+
case d && reason == DiscSelf:
9888
dialSelf.Mark(1)
99-
case DiscUselessPeer:
89+
case d && reason == DiscUselessPeer:
10090
dialUselessPeer.Mark(1)
101-
case DiscUnexpectedIdentity:
91+
case d && reason == DiscUnexpectedIdentity:
10292
dialUnexpectedIdentity.Mark(1)
103-
case errEncHandshakeError:
104-
dialEncHandshakeError.Mark(1)
105-
case errProtoHandshakeError:
93+
case errors.As(err, &handshakeErr):
10694
dialProtoHandshakeError.Mark(1)
95+
case errors.Is(err, errEncHandshakeError):
96+
dialEncHandshakeError.Mark(1)
97+
default:
98+
dialOtherError.Mark(1)
10799
}
108100
}
109101

p2p/server.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,15 @@ const (
6666
)
6767

6868
var (
69-
errServerStopped = errors.New("server stopped")
70-
errEncHandshakeError = errors.New("rlpx enc error")
71-
errProtoHandshakeError = errors.New("rlpx proto error")
69+
errServerStopped = errors.New("server stopped")
70+
errEncHandshakeError = errors.New("rlpx enc error")
7271
)
7372

73+
type protoHandshakeError struct{ err error }
74+
75+
func (e *protoHandshakeError) Error() string { return fmt.Sprintf("rlpx proto error: %v", e.err) }
76+
func (e *protoHandshakeError) Unwrap() error { return e.err }
77+
7478
// Server manages all peer connections.
7579
type Server struct {
7680
// Config fields may not be modified while the server is running.
@@ -907,7 +911,7 @@ func (srv *Server) setupConn(c *conn, dialDest *enode.Node) error {
907911
phs, err := c.doProtoHandshake(srv.ourHandshake)
908912
if err != nil {
909913
clog.Trace("Failed p2p handshake", "err", err)
910-
return fmt.Errorf("%w: %v", errProtoHandshakeError, err)
914+
return &protoHandshakeError{err: err}
911915
}
912916
if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) {
913917
clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID))

p2p/server_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,11 +410,11 @@ func TestServerSetupConn(t *testing.T) {
410410
wantCloseErr: DiscUnexpectedIdentity,
411411
},
412412
{
413-
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: errProtoHandshakeError},
413+
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: DiscTooManyPeers},
414414
dialDest: enode.NewV4(clientpub, nil, 0, 0),
415415
flags: dynDialedConn,
416416
wantCalls: "doEncHandshake,doProtoHandshake,close,",
417-
wantCloseErr: errProtoHandshakeError,
417+
wantCloseErr: DiscTooManyPeers,
418418
},
419419
{
420420
tt: &setupTransport{pubkey: srvpub, phs: protoHandshake{ID: crypto.FromECDSAPub(srvpub)[1:]}},

0 commit comments

Comments
 (0)