Skip to content

Commit 1eedd3e

Browse files
gloursclaude
andcommitted
fix: don't cache transient errors in version negotiation
Replace sync.Once with sync.Mutex so that only successful version lookups are cached. Transient errors (context cancellation, network blips) are returned without caching, allowing subsequent calls to retry. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 659af68 commit 1eedd3e

2 files changed

Lines changed: 109 additions & 31 deletions

File tree

pkg/compose/compose.go

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -494,48 +494,61 @@ func (s *composeService) isSwarmEnabled(ctx context.Context) (bool, error) {
494494
return swarmEnabled.val, swarmEnabled.err
495495
}
496496

497+
// runtimeVersionCache caches a version string after a successful lookup.
498+
// Transient errors (including context cancellation) are not cached so that
499+
// subsequent calls can retry with a fresh context.
497500
type runtimeVersionCache struct {
498-
once sync.Once
499-
val string
500-
err error
501+
mu sync.Mutex
502+
val string
501503
}
502504

505+
// RuntimeVersion returns the raw API version reported by the daemon.
506+
// Callers that need the negotiated/effective client API version should use
507+
// CurrentAPIVersion instead.
503508
func (s *composeService) RuntimeVersion(ctx context.Context) (string, error) {
504-
// RuntimeVersion returns the raw API version reported by the daemon.
505-
// Callers that need the negotiated/effective client API version should use
506-
// CurrentAPIVersion instead.
507-
s.runtimeVersion.once.Do(func() {
508-
version, err := s.apiClient().ServerVersion(ctx, client.ServerVersionOptions{})
509-
if err != nil {
510-
s.runtimeVersion.err = err
511-
return
512-
}
513-
s.runtimeVersion.val = version.APIVersion
514-
})
515-
return s.runtimeVersion.val, s.runtimeVersion.err
509+
s.runtimeVersion.mu.Lock()
510+
defer s.runtimeVersion.mu.Unlock()
511+
if s.runtimeVersion.val != "" {
512+
return s.runtimeVersion.val, nil
513+
}
514+
version, err := s.apiClient().ServerVersion(ctx, client.ServerVersionOptions{})
515+
if err != nil {
516+
return "", err
517+
}
518+
s.runtimeVersion.val = version.APIVersion
519+
return s.runtimeVersion.val, nil
516520
}
517521

518522
// CurrentAPIVersion returns the API version currently used by the Docker client.
519523
// Trigger negotiation first so version-gated request shaping matches the version
520524
// that subsequent API calls will actually use.
525+
//
526+
// Lock ordering: currentAPIVersion.mu must be acquired before runtimeVersion.mu
527+
// (via the RuntimeVersion fallback). No code path should reverse this order.
521528
func (s *composeService) CurrentAPIVersion(ctx context.Context) (string, error) {
522-
s.currentAPIVersion.once.Do(func() {
523-
_, err := s.apiClient().Ping(ctx, client.PingOptions{NegotiateAPIVersion: true})
524-
if err != nil {
525-
s.currentAPIVersion.err = err
526-
return
527-
}
529+
s.currentAPIVersion.mu.Lock()
530+
defer s.currentAPIVersion.mu.Unlock()
531+
if s.currentAPIVersion.val != "" {
532+
return s.currentAPIVersion.val, nil
533+
}
528534

529-
version := s.apiClient().ClientVersion()
530-
if version != "" {
531-
s.currentAPIVersion.val = version
532-
return
533-
}
535+
_, err := s.apiClient().Ping(ctx, client.PingOptions{NegotiateAPIVersion: true})
536+
if err != nil {
537+
return "", err
538+
}
534539

535-
// Defensive fallback for unexpected client implementations or mocks that
536-
// do not populate ClientVersion after a successful negotiated ping.
537-
s.currentAPIVersion.val, s.currentAPIVersion.err = s.RuntimeVersion(ctx)
538-
})
540+
version := s.apiClient().ClientVersion()
541+
if version != "" {
542+
s.currentAPIVersion.val = version
543+
return s.currentAPIVersion.val, nil
544+
}
539545

540-
return s.currentAPIVersion.val, s.currentAPIVersion.err
546+
// Defensive fallback for unexpected client implementations or mocks that
547+
// do not populate ClientVersion after a successful negotiated ping.
548+
val, err := s.RuntimeVersion(ctx)
549+
if err != nil {
550+
return "", err
551+
}
552+
s.currentAPIVersion.val = val
553+
return s.currentAPIVersion.val, nil
541554
}

pkg/compose/convergence_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,3 +521,68 @@ func TestCurrentAPIVersionCachesNegotiation(t *testing.T) {
521521
assert.NilError(t, err)
522522
assert.Equal(t, version, "1.43")
523523
}
524+
525+
func TestRuntimeVersionRetriesOnTransientError(t *testing.T) {
526+
mockCtrl := gomock.NewController(t)
527+
defer mockCtrl.Finish()
528+
529+
apiClient := mocks.NewMockAPIClient(mockCtrl)
530+
cli := mocks.NewMockCli(mockCtrl)
531+
tested := &composeService{dockerCli: cli}
532+
533+
cli.EXPECT().Client().Return(apiClient).AnyTimes()
534+
535+
// First call: ServerVersion fails with a transient error
536+
firstCall := apiClient.EXPECT().ServerVersion(gomock.Any(), gomock.Any()).
537+
Return(client.ServerVersionResult{}, context.DeadlineExceeded).Times(1)
538+
539+
// Second call: succeeds
540+
apiClient.EXPECT().ServerVersion(gomock.Any(), gomock.Any()).
541+
Return(client.ServerVersionResult{APIVersion: "1.48"}, nil).Times(1).After(firstCall)
542+
543+
_, err := tested.RuntimeVersion(t.Context())
544+
assert.ErrorIs(t, err, context.DeadlineExceeded)
545+
546+
version, err := tested.RuntimeVersion(t.Context())
547+
assert.NilError(t, err)
548+
assert.Equal(t, version, "1.48")
549+
550+
// Third call returns cached value
551+
version, err = tested.RuntimeVersion(t.Context())
552+
assert.NilError(t, err)
553+
assert.Equal(t, version, "1.48")
554+
}
555+
556+
func TestCurrentAPIVersionRetriesOnTransientError(t *testing.T) {
557+
mockCtrl := gomock.NewController(t)
558+
defer mockCtrl.Finish()
559+
560+
apiClient := mocks.NewMockAPIClient(mockCtrl)
561+
cli := mocks.NewMockCli(mockCtrl)
562+
tested := &composeService{dockerCli: cli}
563+
564+
cli.EXPECT().Client().Return(apiClient).AnyTimes()
565+
566+
// First call: Ping fails with a transient error
567+
firstCall := apiClient.EXPECT().Ping(gomock.Any(), client.PingOptions{NegotiateAPIVersion: true}).
568+
Return(client.PingResult{}, context.DeadlineExceeded).Times(1)
569+
570+
// Second call: Ping succeeds after the transient failure
571+
apiClient.EXPECT().Ping(gomock.Any(), client.PingOptions{NegotiateAPIVersion: true}).
572+
Return(client.PingResult{APIVersion: "1.44"}, nil).Times(1).After(firstCall)
573+
apiClient.EXPECT().ClientVersion().Return("1.44").Times(1)
574+
575+
// First call should return the transient error
576+
_, err := tested.CurrentAPIVersion(t.Context())
577+
assert.ErrorIs(t, err, context.DeadlineExceeded)
578+
579+
// Second call should succeed — error was not cached
580+
version, err := tested.CurrentAPIVersion(t.Context())
581+
assert.NilError(t, err)
582+
assert.Equal(t, version, "1.44")
583+
584+
// Third call should return the cached value without calling Ping again
585+
version, err = tested.CurrentAPIVersion(t.Context())
586+
assert.NilError(t, err)
587+
assert.Equal(t, version, "1.44")
588+
}

0 commit comments

Comments
 (0)