Skip to content

Commit cf4b382

Browse files
committed
mcp: log out-of-band errors instead of dropping them
Two errors that occur outside the request/response path were previously unobservable: - Keepalive ping failures (mcp/shared.go startKeepalive) silently closed the session with no record of why. - jsonrpc2 internal errors (mcp/transport.go connect) were printed via the global log.Printf, bypassing the configured slog.Logger. Both sites now report through the existing *slog.Logger that Server and Client already guarantee non-nil via ensureLogger. No new public API surface; the logger is threaded into the unexported helpers as a parameter. Includes a regression test (TestKeepAliveFailure_Logged) that asserts the keepalive failure produces a log line on the configured logger, and was verified to fail when the new log call is removed. Per @jba's suggestion on #865, this is the smaller no-new-API alternative to the ErrorHandler callback proposal. #865 remains open pending community input on whether a structured callback is also wanted. Refs #218
1 parent f36591b commit cf4b382

5 files changed

Lines changed: 82 additions & 9 deletions

File tree

mcp/client.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ func (c *Client) capabilities(protocolVersion string) *ClientCapabilities {
253253
// server, calls or notifications will return an error wrapping
254254
// [ErrConnectionClosed].
255255
func (c *Client) Connect(ctx context.Context, t Transport, opts *ClientSessionOptions) (cs *ClientSession, err error) {
256-
cs, err = connect(ctx, t, c, (*clientSessionState)(nil), nil)
256+
cs, err = connect(ctx, t, c, (*clientSessionState)(nil), nil, c.opts.Logger)
257257
if err != nil {
258258
return nil, err
259259
}
@@ -405,7 +405,7 @@ func (cs *ClientSession) registerElicitationWaiter(elicitationID string) (await
405405

406406
// startKeepalive starts the keepalive mechanism for this client session.
407407
func (cs *ClientSession) startKeepalive(interval time.Duration) {
408-
startKeepalive(cs, interval, &cs.keepaliveCancel)
408+
startKeepalive(cs, interval, &cs.keepaliveCancel, cs.client.opts.Logger)
409409
}
410410

411411
// AddRoots adds the given roots to the client,

mcp/mcp_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,70 @@ func TestKeepAliveFailure(t *testing.T) {
18171817
})
18181818
}
18191819

1820+
// TestKeepAliveFailure_Logged verifies that a keepalive ping failure is
1821+
// reported via the configured slog.Logger instead of being silently dropped.
1822+
// Regression test for #218.
1823+
func TestKeepAliveFailure_Logged(t *testing.T) {
1824+
synctest.Test(t, func(t *testing.T) {
1825+
ctx := context.Background()
1826+
1827+
ct, st := NewInMemoryTransports()
1828+
1829+
// Server without keepalive.
1830+
s := NewServer(testImpl, nil)
1831+
AddTool(s, greetTool(), sayHi)
1832+
ss, err := s.Connect(ctx, st, nil)
1833+
if err != nil {
1834+
t.Fatal(err)
1835+
}
1836+
1837+
// Client with a short keepalive and a logger that writes to a
1838+
// concurrency-safe buffer so we can assert on its output.
1839+
buf := &syncBuffer{}
1840+
clientOpts := &ClientOptions{
1841+
KeepAlive: 50 * time.Millisecond,
1842+
Logger: slog.New(slog.NewTextHandler(buf, &slog.HandlerOptions{Level: slog.LevelError})),
1843+
}
1844+
c := NewClient(testImpl, clientOpts)
1845+
cs, err := c.Connect(ctx, ct, nil)
1846+
if err != nil {
1847+
t.Fatal(err)
1848+
}
1849+
defer cs.Close()
1850+
1851+
synctest.Wait()
1852+
1853+
// Trigger a ping failure by closing the server side.
1854+
ss.Close()
1855+
1856+
time.Sleep(100 * time.Millisecond)
1857+
synctest.Wait()
1858+
1859+
got := buf.String()
1860+
if !strings.Contains(got, "keepalive ping failed") {
1861+
t.Errorf("expected keepalive failure to be logged, got log output:\n%s", got)
1862+
}
1863+
})
1864+
}
1865+
1866+
// syncBuffer is a goroutine-safe bytes.Buffer for capturing slog output in tests.
1867+
type syncBuffer struct {
1868+
mu sync.Mutex
1869+
buf bytes.Buffer
1870+
}
1871+
1872+
func (b *syncBuffer) Write(p []byte) (int, error) {
1873+
b.mu.Lock()
1874+
defer b.mu.Unlock()
1875+
return b.buf.Write(p)
1876+
}
1877+
1878+
func (b *syncBuffer) String() string {
1879+
b.mu.Lock()
1880+
defer b.mu.Unlock()
1881+
return b.buf.String()
1882+
}
1883+
18201884
func TestAddTool_DuplicateNoPanicAndNoDuplicate(t *testing.T) {
18211885
// Adding the same tool pointer twice should not panic and should not
18221886
// produce duplicates in the server's tool list.

mcp/server.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,7 @@ func (s *Server) Connect(ctx context.Context, t Transport, opts *ServerSessionOp
10261026
}
10271027

10281028
s.opts.Logger.Info("server connecting")
1029-
ss, err := connect(ctx, t, s, state, onClose)
1029+
ss, err := connect(ctx, t, s, state, onClose, s.opts.Logger)
10301030
if err != nil {
10311031
s.opts.Logger.Error("server connect error", "error", err)
10321032
return nil, err
@@ -1531,7 +1531,7 @@ func (ss *ServerSession) Wait() error {
15311531

15321532
// startKeepalive starts the keepalive mechanism for this server session.
15331533
func (ss *ServerSession) startKeepalive(interval time.Duration) {
1534-
startKeepalive(ss, interval, &ss.keepaliveCancel)
1534+
startKeepalive(ss, interval, &ss.keepaliveCancel, ss.server.opts.Logger)
15351535
}
15361536

15371537
// pageToken is the internal structure for the opaque pagination cursor.

mcp/shared.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,10 @@ type keepaliveSession interface {
582582
// startKeepalive starts the keepalive mechanism for a session.
583583
// It assigns the cancel function to the provided cancelPtr and starts a goroutine
584584
// that sends ping messages at the specified interval.
585-
func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr *context.CancelFunc) {
585+
//
586+
// logger must be non-nil; ping failures (which terminate the keepalive loop and
587+
// close the session) are reported via logger so they are not silently dropped.
588+
func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr *context.CancelFunc, logger *slog.Logger) {
586589
ctx, cancel := context.WithCancel(context.Background())
587590
// Assign cancel function before starting goroutine to avoid race condition.
588591
// We cannot return it because the caller may need to cancel during the
@@ -602,7 +605,9 @@ func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr
602605
err := session.Ping(pingCtx, nil)
603606
pingCancel()
604607
if err != nil {
605-
// Ping failed, close the session
608+
// Ping failed; log it before closing the session so the
609+
// failure is observable to operators. See #218.
610+
logger.Error("keepalive ping failed; closing session", "error", err)
606611
_ = session.Close()
607612
return
608613
}

mcp/transport.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"errors"
1111
"fmt"
1212
"io"
13-
"log"
13+
"log/slog"
1414
"net"
1515
"os"
1616
"sync"
@@ -152,7 +152,9 @@ type handler interface {
152152
handle(ctx context.Context, req *jsonrpc.Request) (any, error)
153153
}
154154

155-
func connect[H handler, State any](ctx context.Context, t Transport, b binder[H, State], s State, onClose func()) (H, error) {
155+
// connect wires a transport to a binder. logger must be non-nil; it receives
156+
// jsonrpc2 internal errors that would otherwise be dropped (see #218).
157+
func connect[H handler, State any](ctx context.Context, t Transport, b binder[H, State], s State, onClose func(), logger *slog.Logger) (H, error) {
156158
var zero H
157159
mcpConn, err := t.Connect(ctx)
158160
if err != nil {
@@ -178,7 +180,9 @@ func connect[H handler, State any](ctx context.Context, t Transport, b binder[H,
178180
OnDone: func() {
179181
b.disconnect(h)
180182
},
181-
OnInternalError: func(err error) { log.Printf("jsonrpc2 error: %v", err) },
183+
OnInternalError: func(err error) {
184+
logger.Error("jsonrpc2 internal error", "error", err)
185+
},
182186
})
183187
assert(preempter.conn != nil, "unbound preempter")
184188
return h, nil

0 commit comments

Comments
 (0)