Skip to content
This repository was archived by the owner on Jun 16, 2026. It is now read-only.

Commit 5900e78

Browse files
thaJeztahxenoscopic
authored andcommitted
simplify container start logic, taking advantage of idempotency
The start endpoint should be idempotent when trying to start a container that is already started (see [Daemon.containerStart][1], so we can take advantage of that. Once we either created the container, or received a "conflict" (name already in use), we can try starting the container. As there's a time window between a name being reserved and the container to exist (which may produce a 404 during that time window), we ignore "not found" errors, and retry the start. [1]: https://github.com/moby/moby/blob/5318877858c9fc94b5cccc3cf1a75d4ec46951b8/daemon/start.go#L76-L93 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
1 parent d119b50 commit 5900e78

1 file changed

Lines changed: 21 additions & 37 deletions

File tree

pkg/standalone/containers.go

Lines changed: 21 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -174,36 +174,26 @@ func determineBridgeGatewayIP(ctx context.Context, dockerClient client.NetworkAP
174174

175175
// waitForContainerToStart waits for a container to start.
176176
func waitForContainerToStart(ctx context.Context, dockerClient client.ContainerAPIClient, containerID string) error {
177-
// Unfortunately the Docker API's /containers/{id}/wait API (and the
178-
// corresponding Client.ContainerWait method) don't allow waiting for
179-
// container startup, so instead we'll take a polling approach.
180177
for i := 10; i > 0; i-- {
181-
if status, err := dockerClient.ContainerInspect(ctx, containerID); err != nil {
182-
// There is a small gap between the time that a container ID and
183-
// name are registered and the time that the container is actually
184-
// created and shows up in container list and inspect requests:
185-
//
186-
// https://github.com/moby/moby/blob/de24c536b0ea208a09e0fff3fd896c453da6ef2e/daemon/container.go#L138-L156
187-
//
188-
// Given that multiple install operations tend to end up tightly
189-
// synchronized by the preceeding pull operation and that this
190-
// method is specifically designed to work around these race
191-
// conditions, we'll allow 404 errors to pass silently (at least up
192-
// until the polling time out - unfortunately we can't make the 404
193-
// acceptance window any smaller than that because the CUDA-based
194-
// containers are large and can take time to create).
195-
if !errdefs.IsNotFound(err) {
196-
return fmt.Errorf("unable to inspect container (%s): %w", containerID[:12], err)
197-
}
198-
} else {
199-
switch status.State.Status {
200-
case container.StateRunning:
201-
return nil
202-
case container.StateCreated, container.StateRestarting:
203-
// wait for container to start
204-
default:
205-
return fmt.Errorf("container is in unexpected state %q", status.State.Status)
206-
}
178+
err := dockerClient.ContainerStart(ctx, containerID, container.StartOptions{})
179+
if err == nil {
180+
return nil
181+
}
182+
// There is a small gap between the time that a container ID and
183+
// name are registered and the time that the container is actually
184+
// created and shows up in container list and inspect requests:
185+
//
186+
// https://github.com/moby/moby/blob/de24c536b0ea208a09e0fff3fd896c453da6ef2e/daemon/container.go#L138-L156
187+
//
188+
// Given that multiple install operations tend to end up tightly
189+
// synchronized by the preceeding pull operation and that this
190+
// method is specifically designed to work around these race
191+
// conditions, we'll allow 404 errors to pass silently (at least up
192+
// until the polling time out - unfortunately we can't make the 404
193+
// acceptance window any smaller than that because the CUDA-based
194+
// containers are large and can take time to create).
195+
if !errdefs.IsNotFound(err) {
196+
return err
207197
}
208198
if i > 1 {
209199
select {
@@ -278,19 +268,13 @@ func CreateControllerContainer(ctx context.Context, dockerClient *client.Client,
278268
// progress, then we wait for whichever install process creates the
279269
// container first and then wait for its container to be ready.
280270
resp, err := dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, controllerContainerName)
281-
if err != nil {
282-
if errdefs.IsConflict(err) {
283-
if err := waitForContainerToStart(ctx, dockerClient, controllerContainerName); err != nil {
284-
return fmt.Errorf("failed waiting for concurrent installation: %w", err)
285-
}
286-
return nil
287-
}
271+
if !errdefs.IsConflict(err) {
288272
return fmt.Errorf("failed to create container %s: %w", controllerContainerName, err)
289273
}
290274

291275
// Start the container.
292276
printer.Printf("Starting model runner container %s...\n", controllerContainerName)
293-
if err := dockerClient.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
277+
if err := waitForContainerToStart(ctx, dockerClient, controllerContainerName); err != nil {
294278
_ = dockerClient.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
295279
return fmt.Errorf("failed to start container %s: %w", controllerContainerName, err)
296280
}

0 commit comments

Comments
 (0)