Skip to content

Commit bde0a0a

Browse files
committed
Fix race condition with sending a request while container is stopping
Due to some quirks in the runtime, it's possible for the DO to send a request to a container when it thinks the container is in a running state, but while the request is in flight, the container stops and the monitor promise resolves. This results in an error, and instead of retrying we throw a 500 error. Instead, recognize this case and restart the container. This is a bandage solution, but we will follow up with some improvements to the runtime that will clean up the state management required in this DO class.
1 parent 0b6112f commit bde0a0a

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

src/lib/container.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ const isRuntimeSignalledError = (error: unknown): boolean =>
7777
const isNotListeningError = (error: unknown): boolean => isErrorOfType(error, NOT_LISTENING_ERROR);
7878
const isContainerExitNonZeroError = (error: unknown): boolean =>
7979
isErrorOfType(error, UNEXPECTED_EXIT_ERROR);
80+
const isContainerNotRunningError = (error: unknown): boolean => {
81+
const patterns = [
82+
'the container is not running',
83+
'not expected to be running',
84+
'consider calling start()',
85+
];
86+
return patterns.some(pattern => isErrorOfType(error, pattern));
87+
};
8088

8189
function getExitCodeFromError(error: unknown): number | null {
8290
if (!(error instanceof Error)) {
@@ -721,6 +729,25 @@ export class Container<Env = unknown> extends DurableObject<Env> {
721729
throw e;
722730
}
723731

732+
// If container stopped during the request (e.g., sleepAfter expired), restart and retry
733+
if (!this.container.running || isContainerNotRunningError(e)) {
734+
try {
735+
await this.startAndWaitForPorts(port);
736+
return await tcpPort.fetch(containerUrl, request);
737+
} catch (retryError) {
738+
if (isNoInstanceError(retryError)) {
739+
return new Response(
740+
'There is no Container instance available at this time.\nThis is likely because you have reached your max concurrent instance count (set in wrangler config) or are you currently provisioning the Container.\nIf you are deploying your Container for the first time, check your dashboard to see provisioning status, this may take a few minutes.',
741+
{ status: 503 }
742+
);
743+
}
744+
return new Response(
745+
`Failed to restart container: ${retryError instanceof Error ? retryError.message : String(retryError)}`,
746+
{ status: 500 }
747+
);
748+
}
749+
}
750+
724751
// This error means that the container might've just restarted
725752
if (e.message.includes('Network connection lost.')) {
726753
return new Response('Container suddenly disconnected, try again', { status: 500 });

0 commit comments

Comments
 (0)