diff --git a/.changeset/patch-fix-resume-auth-failure.md b/.changeset/patch-fix-resume-auth-failure.md new file mode 100644 index 00000000000..1bd3ed3c5f0 --- /dev/null +++ b/.changeset/patch-fix-resume-auth-failure.md @@ -0,0 +1,5 @@ +--- +"gh-aw": patch +--- + +Fix copilot-driver `--resume` authentication failures: detect "No authentication information found" as non-retryable, add GITHUB_TOKEN/GH_TOKEN fallback for COPILOT_GITHUB_TOKEN, and log auth token availability for diagnostics. diff --git a/actions/setup/js/copilot_driver.cjs b/actions/setup/js/copilot_driver.cjs index 5a25ede73e2..7ce238f9284 100644 --- a/actions/setup/js/copilot_driver.cjs +++ b/actions/setup/js/copilot_driver.cjs @@ -9,12 +9,14 @@ * * Retry policy: * - If the process produced any output (hasOutput) and exits with a non-zero code, the - * session is considered partially executed. The driver retries with --resume so the + * session is considered partially executed. The driver retries with --continue so the * Copilot CLI can continue from where it left off. * - CAPIError 400 is a well-known transient failure mode and is logged explicitly, but * any partial-execution failure is retried — not just CAPIError 400. * - If the process produced no output (failed to start / auth error before any work), the * driver does not retry because there is nothing to resume. + * - "No authentication information found" errors are non-retryable: the absent token will + * remain absent on every subsequent attempt, so all further retries will also fail. * - Retries use exponential backoff: 5s → 10s → 20s (capped at 60s). * - Maximum 3 retry attempts after the initial run. * @@ -43,6 +45,11 @@ const CAPI_ERROR_400_PATTERN = /CAPIError:\s*400/; // This is a persistent policy configuration error — retrying will not help. const MCP_POLICY_BLOCKED_PATTERN = /MCP servers were blocked by policy:/; +// Pattern to detect missing authentication credentials. +// This error means no auth token is available in the environment; retrying will not help +// because the missing token will still be absent on every subsequent attempt. +const NO_AUTH_INFO_PATTERN = /No authentication information found/; + /** * Emit a timestamped diagnostic log line to stderr. * All driver messages are prefixed with "[copilot-driver]" so they are easy to @@ -73,6 +80,17 @@ function isMCPPolicyError(output) { return MCP_POLICY_BLOCKED_PATTERN.test(output); } +/** + * Determines if the collected output contains a "No authentication information found" error. + * This means no auth token (COPILOT_GITHUB_TOKEN, GH_TOKEN, or GITHUB_TOKEN) is available + * in the environment. Retrying will not help because the absent token will remain absent. + * @param {string} output - Collected stdout+stderr from the process + * @returns {boolean} + */ +function isNoAuthInfoError(output) { + return NO_AUTH_INFO_PATTERN.test(output); +} + /** * Sleep for a specified duration * @param {number} ms - Duration in milliseconds @@ -221,11 +239,11 @@ async function main() { const driverStartTime = Date.now(); for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - // Add --resume flag on retries so the copilot session resumes from where it left off - const currentArgs = attempt > 0 ? [...args, "--resume"] : args; + // Add --continue flag on retries so the copilot session continues from where it left off + const currentArgs = attempt > 0 ? [...args, "--continue"] : args; if (attempt > 0) { - log(`retry ${attempt}/${MAX_RETRIES}: sleeping ${delay}ms before next attempt with --resume`); + log(`retry ${attempt}/${MAX_RETRIES}: sleeping ${delay}ms before next attempt with --continue`); await sleep(delay); delay = Math.min(delay * BACKOFF_MULTIPLIER, MAX_DELAY_MS); log(`retry ${attempt}/${MAX_RETRIES}: woke up, next delay cap will be ${Math.min(delay * BACKOFF_MULTIPLIER, MAX_DELAY_MS)}ms`); @@ -241,13 +259,22 @@ async function main() { } // Determine whether to retry. - // Retry whenever the session was partially executed (hasOutput), using --resume so that + // Retry whenever the session was partially executed (hasOutput), using --continue so that // the Copilot CLI can continue from where it left off. CAPIError 400 is the well-known - // transient case, but any partial-execution failure is eligible for a resume retry. - // Exception: MCP policy errors are persistent configuration issues — never retry. + // transient case, but any partial-execution failure is eligible for a continue retry. + // Exceptions: MCP policy errors and auth errors are persistent — never retry. const isCAPIError = isTransientCAPIError(result.output); const isMCPPolicy = isMCPPolicyError(result.output); - log(`attempt ${attempt + 1} failed:` + ` exitCode=${result.exitCode}` + ` isCAPIError400=${isCAPIError}` + ` isMCPPolicyError=${isMCPPolicy}` + ` hasOutput=${result.hasOutput}` + ` retriesRemaining=${MAX_RETRIES - attempt}`); + const isAuthErr = isNoAuthInfoError(result.output); + log( + `attempt ${attempt + 1} failed:` + + ` exitCode=${result.exitCode}` + + ` isCAPIError400=${isCAPIError}` + + ` isMCPPolicyError=${isMCPPolicy}` + + ` isAuthError=${isAuthErr}` + + ` hasOutput=${result.hasOutput}` + + ` retriesRemaining=${MAX_RETRIES - attempt}` + ); // MCP policy errors are persistent — retrying will not help. if (isMCPPolicy) { @@ -255,9 +282,17 @@ async function main() { break; } + // Auth errors are persistent for the duration of the job — retrying will not help. + // "No authentication information found" means COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN + // are all absent or invalid. Retrying with --continue will produce the same auth failure. + if (isAuthErr) { + log(`attempt ${attempt + 1}: no authentication information found — not retrying (COPILOT_GITHUB_TOKEN, GH_TOKEN, and GITHUB_TOKEN are all absent or invalid)`); + break; + } + if (attempt < MAX_RETRIES && result.hasOutput) { const reason = isCAPIError ? "CAPIError 400 (transient)" : "partial execution"; - log(`attempt ${attempt + 1}: ${reason} — will retry with --resume (attempt ${attempt + 2}/${MAX_RETRIES + 1})`); + log(`attempt ${attempt + 1}: ${reason} — will retry with --continue (attempt ${attempt + 2}/${MAX_RETRIES + 1})`); continue; } diff --git a/actions/setup/js/copilot_driver.test.cjs b/actions/setup/js/copilot_driver.test.cjs index 37ff8f843c2..dc8f5799dfc 100644 --- a/actions/setup/js/copilot_driver.test.cjs +++ b/actions/setup/js/copilot_driver.test.cjs @@ -34,7 +34,7 @@ describe("copilot_driver.cjs", () => { }); }); - describe("retry policy: resume on partial execution", () => { + describe("retry policy: continue on partial execution", () => { // Inline the same retry-eligibility logic as the driver for unit testing. // The driver retries whenever the session produced output (hasOutput), regardless // of the specific error type. CAPIError 400 is just the well-known case. @@ -141,6 +141,84 @@ describe("copilot_driver.cjs", () => { }); }); + describe("no-auth-info detection pattern", () => { + const NO_AUTH_INFO_PATTERN = /No authentication information found/; + + it("matches the exact error from the issue report", () => { + const errorOutput = + "Error: No authentication information found.\n" + + "Copilot can be authenticated with GitHub using an OAuth Token or a Fine-Grained Personal Access Token.\n" + + "To authenticate, you can use any of the following methods:\n" + + " - Start 'copilot' and run the '/login' command\n" + + " - Set the COPILOT_GITHUB_TOKEN, GH_TOKEN, or GITHUB_TOKEN environment variable\n" + + " - Run 'gh auth login' to authenticate with the GitHub CLI"; + expect(NO_AUTH_INFO_PATTERN.test(errorOutput)).toBe(true); + }); + + it("matches when embedded in larger output after a long run", () => { + const output = "Some agent work output\nMore work\nNo authentication information found\nEnd"; + expect(NO_AUTH_INFO_PATTERN.test(output)).toBe(true); + }); + + it("does not match unrelated auth errors", () => { + expect(NO_AUTH_INFO_PATTERN.test("Access denied by policy settings")).toBe(false); + expect(NO_AUTH_INFO_PATTERN.test("Error: 401 Unauthorized")).toBe(false); + expect(NO_AUTH_INFO_PATTERN.test("Authentication failed")).toBe(false); + expect(NO_AUTH_INFO_PATTERN.test("CAPIError: 400 Bad Request")).toBe(false); + expect(NO_AUTH_INFO_PATTERN.test("")).toBe(false); + }); + }); + + describe("auth error prevents retry", () => { + // Inline the same retry logic as the driver, including auth error check + const MCP_POLICY_BLOCKED_PATTERN = /MCP servers were blocked by policy:/; + const NO_AUTH_INFO_PATTERN = /No authentication information found/; + const MAX_RETRIES = 3; + + /** + * @param {{hasOutput: boolean, exitCode: number, output: string}} result + * @param {number} attempt + * @returns {boolean} + */ + function shouldRetry(result, attempt) { + if (result.exitCode === 0) return false; + // MCP policy errors are persistent — never retry + if (MCP_POLICY_BLOCKED_PATTERN.test(result.output)) return false; + // Auth errors are persistent — never retry + if (NO_AUTH_INFO_PATTERN.test(result.output)) return false; + return attempt < MAX_RETRIES && result.hasOutput; + } + + it("does not retry when auth fails on first attempt (no real work done)", () => { + const result = { exitCode: 1, hasOutput: true, output: "Error: No authentication information found." }; + expect(shouldRetry(result, 0)).toBe(false); + }); + + it("does not retry when auth fails on a --continue attempt (the reported bug scenario)", () => { + // This replicates the issue: attempt 1 ran for 39 min then failed, + // attempt 2 (--continue) fails with auth error — should not retry attempts 3 & 4. + const resumeResult = { exitCode: 1, hasOutput: true, output: "Error: No authentication information found." }; + expect(shouldRetry(resumeResult, 1)).toBe(false); + expect(shouldRetry(resumeResult, 2)).toBe(false); + expect(shouldRetry(resumeResult, 3)).toBe(false); + }); + + it("does not retry auth error even when output is mixed with other content", () => { + const result = { exitCode: 1, hasOutput: true, output: "Some output\nError: No authentication information found.\nMore output" }; + expect(shouldRetry(result, 0)).toBe(false); + }); + + it("still retries non-auth errors with output (CAPIError 400)", () => { + const result = { exitCode: 1, hasOutput: true, output: "CAPIError: 400 Bad Request" }; + expect(shouldRetry(result, 0)).toBe(true); + }); + + it("still retries generic partial-execution errors with output", () => { + const result = { exitCode: 1, hasOutput: true, output: "Failed to get response from the AI model; retried 5 times" }; + expect(shouldRetry(result, 0)).toBe(true); + }); + }); + describe("retry configuration", () => { it("has sensible default values", () => { // These match the constants in copilot_driver.cjs