Skip to content

Commit df79ac5

Browse files
authored
Refactor container startup-failure detection into shared helpers (#4336)
* Initial plan * refactor: deduplicate startup failure helpers --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent 5475caf commit df79ac5

2 files changed

Lines changed: 49 additions & 61 deletions

File tree

src/container-lifecycle-branches.test.ts

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* - reportBlockedDomains "else" branch (domain allowed, standard port, other reason)
66
* - checkSquidLogs IPv6 target with non-numeric trailing segment
77
* - checkSquidLogs with no TCP_DENIED entries (empty log coverage)
8-
* - didApiProxyFailStartup with healthStatus === 'unhealthy' from inspect output
8+
* - didContainerFailStartup with healthStatus === 'unhealthy' from inspect output
99
*/
1010

1111
import { startContainers, runAgentCommand } from './container-lifecycle';
@@ -191,6 +191,37 @@ describe('container-lifecycle uncovered branches', () => {
191191
expect(upCalls).toHaveLength(2);
192192
});
193193

194+
describe('startContainers - squid unhealthy via inspect health status', () => {
195+
it('should retry when docker inspect reports squid running but unhealthy', async () => {
196+
// 1. docker rm (initial cleanup)
197+
mockExecaFn.mockResolvedValueOnce(makeExecaResult());
198+
// 2. docker compose up (first attempt — generic error)
199+
mockExecaFn.mockRejectedValueOnce(new Error('Command failed: docker compose up -d'));
200+
// 3. docker inspect awf-api-proxy -> healthy (not the failing container)
201+
mockExecaFn.mockResolvedValueOnce(makeExecaResult('running|healthy'));
202+
// 4. docker inspect awf-squid -> "running|unhealthy"
203+
mockExecaFn.mockResolvedValueOnce(makeExecaResult('running|unhealthy'));
204+
// 5. docker logs (diagnosis before retry)
205+
mockExecaFn.mockResolvedValueOnce(makeExecaResult('squid logs'));
206+
// 6. docker compose down (cleanup before retry)
207+
mockExecaFn.mockResolvedValueOnce(makeExecaResult());
208+
// 7. docker compose up (retry — succeeds)
209+
mockExecaFn.mockResolvedValueOnce(makeExecaResult());
210+
211+
await expect(startContainers(testDir, ['github.com'])).resolves.toBeUndefined();
212+
213+
const squidInspectCalls = mockExecaFn.mock.calls.filter((call: any[]) =>
214+
call[0] === 'docker' && Array.isArray(call[1]) && call[1][0] === 'inspect' && call[1][1] === 'awf-squid'
215+
);
216+
expect(squidInspectCalls).toHaveLength(1);
217+
218+
const upCalls = mockExecaFn.mock.calls.filter((call: any[]) =>
219+
call[0] === 'docker' && Array.isArray(call[1]) && call[1].includes('up')
220+
);
221+
expect(upCalls).toHaveLength(2);
222+
});
223+
});
224+
194225
it('should retry when docker inspect reports exited|unhealthy', async () => {
195226
// 1. docker rm (initial cleanup)
196227
mockExecaFn.mockResolvedValueOnce(makeExecaResult());

src/container-lifecycle.ts

Lines changed: 17 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -83,32 +83,32 @@ async function checkSquidLogs(workDir: string, proxyLogsDir?: string): Promise<{
8383
}
8484

8585
/**
86-
* Returns true when the Docker Compose error message indicates that the
87-
* api-proxy container specifically failed to start.
88-
* Docker emits "dependency failed to start: container <name> is unhealthy"
89-
* for healthcheck failures, and may emit "dependency failed to start:
90-
* container <name> exited (1)" for startup-time process exits.
86+
* Returns true when the Docker Compose error message indicates that a specific
87+
* container failed to start. Docker emits
88+
* "dependency failed to start: container <name> is unhealthy" for healthcheck
89+
* failures, and may emit "dependency failed to start: container <name> exited
90+
* (1)" for startup-time process exits.
9191
*/
92-
function isApiProxyStartupFailureError(errorMsg: string): boolean {
93-
if (!errorMsg.includes(API_PROXY_CONTAINER_NAME)) {
92+
function isContainerStartupFailureError(errorMsg: string, containerName: string): boolean {
93+
if (!errorMsg.includes(containerName)) {
9494
return false;
9595
}
9696
return errorMsg.includes('is unhealthy') || errorMsg.includes('exited (1)');
9797
}
9898

9999
/**
100100
* Some docker compose failures surface only as a generic execa error message
101-
* while the actionable api-proxy state is visible only via container inspect.
101+
* while the actionable container state is visible only via container inspect.
102102
*/
103-
async function didApiProxyFailStartup(errorMsg: string): Promise<boolean> {
104-
if (isApiProxyStartupFailureError(errorMsg)) {
103+
async function didContainerFailStartup(errorMsg: string, containerName: string): Promise<boolean> {
104+
if (isContainerStartupFailureError(errorMsg, containerName)) {
105105
return true;
106106
}
107107

108108
try {
109109
const result = await execa(
110110
'docker',
111-
['inspect', API_PROXY_CONTAINER_NAME, '--format', '{{.State.Status}}|{{if .State.Health}}{{.State.Health.Status}}{{end}}'],
111+
['inspect', containerName, '--format', '{{.State.Status}}|{{if .State.Health}}{{.State.Health.Status}}{{end}}'],
112112
{
113113
reject: false,
114114
env: getLocalDockerEnv(),
@@ -122,51 +122,7 @@ async function didApiProxyFailStartup(errorMsg: string): Promise<boolean> {
122122
const [containerStatus = '', healthStatus = ''] = result.stdout.trim().split('|');
123123
return containerStatus === 'exited' || healthStatus === 'unhealthy';
124124
} catch (error) {
125-
logger.debug(`Could not inspect ${API_PROXY_CONTAINER_NAME} after startup failure:`, error);
126-
return false;
127-
}
128-
}
129-
130-
/**
131-
* Returns true when the Docker Compose error message indicates that the
132-
* squid container specifically failed to start.
133-
* The squid proxy is occasionally flaky on busy CI runners; detecting its
134-
* startup failure separately allows the retry path to apply to it too.
135-
*/
136-
function isSquidStartupFailureError(errorMsg: string): boolean {
137-
if (!errorMsg.includes(SQUID_CONTAINER_NAME)) {
138-
return false;
139-
}
140-
return errorMsg.includes('is unhealthy') || errorMsg.includes('exited (1)');
141-
}
142-
143-
/**
144-
* Some docker compose failures surface only as a generic execa error message
145-
* while the actionable squid state is visible only via container inspect.
146-
*/
147-
async function didSquidFailStartup(errorMsg: string): Promise<boolean> {
148-
if (isSquidStartupFailureError(errorMsg)) {
149-
return true;
150-
}
151-
152-
try {
153-
const result = await execa(
154-
'docker',
155-
['inspect', SQUID_CONTAINER_NAME, '--format', '{{.State.Status}}|{{if .State.Health}}{{.State.Health.Status}}{{end}}'],
156-
{
157-
reject: false,
158-
env: getLocalDockerEnv(),
159-
}
160-
);
161-
162-
if (result.exitCode !== 0) {
163-
return false;
164-
}
165-
166-
const [containerStatus = '', healthStatus = ''] = result.stdout.trim().split('|');
167-
return containerStatus === 'exited' || healthStatus === 'unhealthy';
168-
} catch (error) {
169-
logger.debug(`Could not inspect ${SQUID_CONTAINER_NAME} after startup failure:`, error);
125+
logger.debug(`Could not inspect ${containerName} after startup failure:`, error);
170126
return false;
171127
}
172128
}
@@ -246,10 +202,11 @@ export async function startContainers(workDir: string, allowedDomains: string[],
246202
logger.success('Containers started successfully');
247203
} catch (firstError) {
248204
const firstErrorMsg = firstError instanceof Error ? firstError.message : String(firstError);
249-
const firstAttemptApiProxyStartupFailure = await didApiProxyFailStartup(firstErrorMsg);
205+
const firstAttemptApiProxyStartupFailure = await didContainerFailStartup(firstErrorMsg, API_PROXY_CONTAINER_NAME);
250206
// Only check squid if api-proxy didn't already claim the failure, so we
251207
// don't fire two inspect calls when api-proxy is the root cause.
252-
const firstAttemptSquidStartupFailure = !firstAttemptApiProxyStartupFailure && await didSquidFailStartup(firstErrorMsg);
208+
const firstAttemptSquidStartupFailure = !firstAttemptApiProxyStartupFailure
209+
&& await didContainerFailStartup(firstErrorMsg, SQUID_CONTAINER_NAME);
253210

254211
// When api-proxy or squid specifically fails to start, retry once.
255212
// Both containers are occasionally flaky on slow or busy CI runners:
@@ -274,7 +231,7 @@ export async function startContainers(workDir: string, allowedDomains: string[],
274231
return;
275232
} catch (retryError) {
276233
const retryErrorMsg = retryError instanceof Error ? retryError.message : String(retryError);
277-
if (await didApiProxyFailStartup(retryErrorMsg)) {
234+
if (await didContainerFailStartup(retryErrorMsg, API_PROXY_CONTAINER_NAME)) {
278235
// Surface api-proxy logs and emit a clear, unambiguous error so
279236
// downstream parse steps don't blame the model for never running.
280237
await logContainerLogsToStderr(API_PROXY_CONTAINER_NAME);
@@ -286,7 +243,7 @@ export async function startContainers(workDir: string, allowedDomains: string[],
286243
}
287244
// Dump squid container logs before falling through to the domain-blockage
288245
// diagnostic path, so that persistent squid failures are diagnosable.
289-
if (await didSquidFailStartup(retryErrorMsg)) {
246+
if (await didContainerFailStartup(retryErrorMsg, SQUID_CONTAINER_NAME)) {
290247
await logContainerLogsToStderr(SQUID_CONTAINER_NAME);
291248
}
292249
// Any remaining retry error (e.g. squid healthcheck or domain blockage) falls

0 commit comments

Comments
 (0)