Skip to content

Commit 8b1faef

Browse files
chore: sync actions from gh-aw@v0.81.5 (#174)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 9fba95c commit 8b1faef

14 files changed

Lines changed: 302 additions & 53 deletions

setup/js/add_labels.cjs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ const { MAX_LABELS } = require("./constants.cjs");
3333
const { createCountGatedHandler } = require("./handler_scaffold.cjs");
3434
const { withRetry, RATE_LIMIT_RETRY_CONFIG } = require("./error_recovery.cjs");
3535
const { resolveInvocationContext } = require("./invocation_context_helpers.cjs");
36-
const { normalizeIssueIntentLabelNames } = require("./issue_intents.cjs");
36+
const { hasIssueIntentsRuntimeFeature, normalizeIssueIntentLabelNames, normalizeIssueIntentLabelSpecs } = require("./issue_intents.cjs");
3737

3838
/**
3939
* Main handler factory for add_labels
@@ -92,9 +92,23 @@ const main = createCountGatedHandler({
9292
const contextType = effectiveContext.eventPayload?.pull_request ? "pull request" : "issue";
9393
const requestedLabels = message.labels ?? [];
9494
core.info(`Requested labels: ${JSON.stringify(requestedLabels)}`);
95+
const issueIntentsEnabled = hasIssueIntentsRuntimeFeature();
96+
/** @type {Map<string, {name: string, rationale?: string, confidence?: "LOW"|"MEDIUM"|"HIGH", suggest?: boolean}>} */
97+
const requestedLabelSpecByLowerName = new Map();
9598
let requestedLabelNames;
9699
try {
97-
requestedLabelNames = normalizeIssueIntentLabelNames(requestedLabels);
100+
if (issueIntentsEnabled) {
101+
const requestedLabelSpecs = normalizeIssueIntentLabelSpecs(requestedLabels);
102+
for (const labelSpec of requestedLabelSpecs) {
103+
const key = labelSpec.name.toLowerCase();
104+
if (!requestedLabelSpecByLowerName.has(key)) {
105+
requestedLabelSpecByLowerName.set(key, labelSpec);
106+
}
107+
}
108+
requestedLabelNames = requestedLabelSpecs.map(labelSpec => labelSpec.name);
109+
} else {
110+
requestedLabelNames = normalizeIssueIntentLabelNames(requestedLabels);
111+
}
98112
} catch (error) {
99113
const errorMessage = getErrorMessage(error);
100114
core.warning(`Invalid add_labels payload: ${errorMessage}`);
@@ -172,7 +186,9 @@ const main = createCountGatedHandler({
172186
};
173187
}
174188

175-
core.info(`Adding ${uniqueLabels.length} labels to ${contextType} #${itemNumber} in ${itemRepo}: ${JSON.stringify(uniqueLabels)}`);
189+
const labelsRequestPayload = issueIntentsEnabled ? uniqueLabels.map(name => requestedLabelSpecByLowerName.get(name.toLowerCase()) ?? { name }) : uniqueLabels;
190+
191+
core.info(`Adding ${uniqueLabels.length} labels to ${contextType} #${itemNumber} in ${itemRepo}: ${JSON.stringify(labelsRequestPayload)}`);
176192

177193
// If in staged mode, preview the labels without adding them
178194
if (isStaged) {
@@ -197,7 +213,7 @@ const main = createCountGatedHandler({
197213
owner: repoParts.owner,
198214
repo: repoParts.repo,
199215
issue_number: itemNumber,
200-
labels: uniqueLabels,
216+
labels: labelsRequestPayload,
201217
}),
202218
RATE_LIMIT_RETRY_CONFIG,
203219
`add_labels to ${contextType} #${itemNumber} in ${itemRepo}`

setup/js/claude_harness.cjs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ const {
4545
fetchAWFReflect,
4646
fetchModelsFromUrl,
4747
} = require("./awf_reflect.cjs");
48-
const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
48+
const { emitMissingToolPermissionIssue, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
4949
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
5050
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
5151
const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");
@@ -430,6 +430,14 @@ async function main() {
430430
}
431431

432432
if (hasNumerousPermissionDenied) {
433+
// If the agent already produced expected safe-outputs, the permission-denied
434+
// signals are from optional/exploratory commands — not from the core task work.
435+
// Suppress the terminal verdict and exit 0 to avoid a false-red run.
436+
if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
437+
log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
438+
lastExitCode = 0;
439+
break;
440+
}
433441
const deniedCommands = extractDeniedCommands(result.output);
434442
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
435443
log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
@@ -521,6 +529,7 @@ if (typeof module !== "undefined" && module.exports) {
521529
buildMissingToolPermissionIssuePayload,
522530
emitMissingToolPermissionIssue,
523531
hasNoopInSafeOutputs,
532+
hasExpectedSafeOutputs,
524533
};
525534
}
526535

setup/js/codex_harness.cjs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ const {
4444
fetchAWFReflect,
4545
fetchModelsFromUrl,
4646
} = require("./awf_reflect.cjs");
47-
const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
47+
const { emitMissingToolPermissionIssue, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
4848
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
4949
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
5050
const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");
@@ -492,6 +492,14 @@ async function main() {
492492
}
493493

494494
if (hasNumerousPermissionDenied) {
495+
// If the agent already produced expected safe-outputs, the permission-denied
496+
// signals are from optional/exploratory commands — not from the core task work.
497+
// Suppress the terminal verdict and exit 0 to avoid a false-red run.
498+
if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
499+
log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
500+
lastExitCode = 0;
501+
break;
502+
}
495503
const deniedCommands = extractDeniedCommands(result.output);
496504
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
497505
log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
@@ -553,6 +561,7 @@ if (typeof module !== "undefined" && module.exports) {
553561
getConfiguredOpenAIPortFromReflect,
554562
validateCodexOpenAIBaseURLFromReflect,
555563
hasNoopInSafeOutputs,
564+
hasExpectedSafeOutputs,
556565
};
557566
}
558567

setup/js/copilot_harness.cjs

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ const {
5858
fetchModelsFromUrl,
5959
resolveCopilotSDKCustomProviderFromReflect,
6060
} = require("./awf_reflect.cjs");
61-
const { runSafeOutputsCLI, buildMissingToolAlternatives, emitMissingToolPermissionIssue, emitInfrastructureIncomplete, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
61+
const { runSafeOutputsCLI, buildMissingToolAlternatives, emitMissingToolPermissionIssue, emitInfrastructureIncomplete, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
6262
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
6363
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
6464
const { isCAPIQuotaExceededError } = require("./detect_agent_errors.cjs");
@@ -447,6 +447,21 @@ function buildCopilotProxyAuthFailureDiagnostic(output, env = process.env, optio
447447
);
448448
}
449449

450+
/**
451+
* Determine whether an authentication_failed error came from the gh-aw API proxy after
452+
* partial execution, making a one-time fresh-run retry worthwhile.
453+
* @param {string} output
454+
* @param {boolean} hasOutput
455+
* @returns {boolean}
456+
*/
457+
function isRetryableProxyAuthenticationFailure(output, hasOutput) {
458+
if (!hasOutput || !isAuthenticationFailedError(output)) {
459+
return false;
460+
}
461+
const authFailure = parseProviderAuthFailure(output);
462+
return Boolean(authFailure && isLikelyAWFAPIProxyURL(authFailure.providerUrl));
463+
}
464+
450465
/**
451466
* Detect known Copilot error patterns for workflow outputs.
452467
* @param {string} output
@@ -823,6 +838,7 @@ async function main() {
823838
const isAuthErr = isNoAuthInfoError(result.output);
824839
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
825840
const proxyAuthDiagnostic = buildCopilotProxyAuthFailureDiagnostic(result.output, process.env);
841+
const retryableProxyAuthenticationFailure = isRetryableProxyAuthenticationFailure(result.output, result.hasOutput);
826842
const isNullTypeToolCall = isNullTypeToolCallError(result.output);
827843
const isSDKSessionIdleTimeout = isSDKSessionIdleTimeoutError(result.output);
828844
const isMCPGatewayShutdown = isMCPGatewayShutdownError(result.output);
@@ -882,16 +898,32 @@ async function main() {
882898
break;
883899
}
884900

885-
if (attempt === 0 && isAuthenticationFailed) {
901+
// attempt === 0 makes this a one-time fresh-run recovery path.
902+
if (attempt === 0 && retryableProxyAuthenticationFailure) {
903+
useContinueOnRetry = false;
904+
continueDisabledPermanently = true;
905+
log(`attempt ${attempt + 1}: provider authentication failed after partial execution - will retry once as fresh run to avoid losing completed agent work`);
906+
continue;
907+
}
908+
909+
if (isAuthenticationFailed) {
886910
if (proxyAuthDiagnostic) {
887-
log(`attempt ${attempt + 1}: ${proxyAuthDiagnostic} — not retrying (first-attempt auth failure is non-retryable)`);
911+
log(`attempt ${attempt + 1}: ${proxyAuthDiagnostic} — not retrying`);
888912
} else {
889-
log(`attempt ${attempt + 1}: authentication failed — not retrying (first-attempt auth failure is non-retryable)`);
913+
log(`attempt ${attempt + 1}: authentication failed — not retrying`);
890914
}
891915
break;
892916
}
893917

894918
if (hasNumerousPermissionDenied) {
919+
// If the agent already produced expected safe-outputs, the permission-denied
920+
// signals are from optional/exploratory commands — not from the core task work.
921+
// Suppress the terminal verdict and exit 0 to avoid a false-red run.
922+
if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
923+
log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
924+
lastExitCode = 0;
925+
break;
926+
}
895927
const deniedCommands = extractDeniedCommands(result.output);
896928
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
897929
log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
@@ -1035,6 +1067,7 @@ if (typeof module !== "undefined" && module.exports) {
10351067
buildCopilotSDKServerArgs,
10361068
getCopilotSDKServerPort,
10371069
hasNoopInSafeOutputs,
1070+
hasExpectedSafeOutputs,
10381071
isDetectionPhase,
10391072
isModelAvailableInReflectData,
10401073
isModelAvailableInReflectFile,
@@ -1043,6 +1076,7 @@ if (typeof module !== "undefined" && module.exports) {
10431076
detectCopilotErrors,
10441077
classifyCopilotFailure,
10451078
extractOutputTail,
1079+
isRetryableProxyAuthenticationFailure,
10461080
hasNumerousPermissionDeniedIssues,
10471081
INFERENCE_ACCESS_ERROR_PATTERN,
10481082
AGENTIC_ENGINE_TIMEOUT_PATTERN,

setup/js/detect_agent_errors.cjs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;
5757
// - "unknown model <id>"
5858
// - "model ... not found"
5959
// - "model ... does not exist"
60+
// - "Model not found" (standalone, e.g. AIC api-proxy 404: "404 Not Found: Model not found")
6061
const MODEL_NOT_SUPPORTED_PATTERN =
61-
/(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable))/i;
62+
/(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable)|404\b[^\n]*\bModel\s+not\s+found)/i;
6263

6364
// Pattern: Copilot/CAPI quota exhaustion and rate-limit responses.
6465
// Matches all observed forms:

setup/js/log_parser_bootstrap.cjs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,36 @@ async function runLogParser(options) {
7272
return count;
7373
}
7474

75+
/**
76+
* Returns true if the log entries show the agent ran at least one turn.
77+
*
78+
* "At least one turn" is used (rather than "all work finished") because the
79+
* log only records the turn count, not whether every intended task succeeded.
80+
* The check is sufficient to distinguish a post-completion MCP relaunch
81+
* failure (the agent was already executing) from a startup failure where the
82+
* MCP never launched and the agent ran zero turns.
83+
*
84+
* Handles both log formats:
85+
* - Legacy format (Codex, Copilot, etc.): { type: "result", num_turns: N }
86+
* - Copilot event format (Claude): { type: "session.result", data: { numTurns: N } }
87+
*
88+
* @param {Array|null|undefined} entries
89+
* @returns {boolean}
90+
*/
91+
function agentRanToCompletion(entries) {
92+
if (!entries || !Array.isArray(entries) || entries.length === 0) {
93+
return false;
94+
}
95+
return entries.some(e => {
96+
if (!e || typeof e !== "object") return false;
97+
// Legacy format
98+
if (e.type === "result" && typeof e.num_turns === "number" && e.num_turns > 0) return true;
99+
// Copilot event format (Claude)
100+
if (e.type === "session.result" && e.data && typeof e.data.numTurns === "number" && e.data.numTurns > 0) return true;
101+
return false;
102+
});
103+
}
104+
75105
try {
76106
const logPath = process.env.GH_AW_AGENT_OUTPUT;
77107
if (!logPath) {
@@ -309,6 +339,13 @@ async function runLogParser(options) {
309339
const failedServers = mcpFailures.join(", ");
310340
if (safeOutputEntriesCount > 0) {
311341
core.warning(`MCP server(s) failed to launch (${failedServers}), but agent completed with ${safeOutputEntriesCount} safe output ${safeOutputEntriesCount === 1 ? "entry" : "entries"}`);
342+
} else if (agentRanToCompletion(logEntries)) {
343+
// The agent ran turns to completion even though an MCP server failed to launch.
344+
// This is a post-completion relaunch/health-probe failure — the MCP server was
345+
// healthy during execution (the agent used it throughout the run) and the failure
346+
// occurred after the work was done. Treat as non-fatal so genuine task success
347+
// is not masked by a transient infrastructure event.
348+
core.warning(`MCP server(s) failed to launch (${failedServers}), but agent completed turns — treating as non-fatal post-completion relaunch`);
312349
} else {
313350
core.setFailed(`${ERR_API}: MCP server(s) failed to launch: ${failedServers}`);
314351
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// @ts-check
2+
3+
const fs = require("fs");
4+
const os = require("os");
5+
const path = require("path");
6+
7+
/**
8+
* Patch the AWF config file with chroot settings for ARC/DinD runners.
9+
*
10+
* @param {Object} [options]
11+
* @param {string} [options.runnerTemp]
12+
* @param {string} [options.binariesSourcePath]
13+
* @param {string} [options.identityHome]
14+
* @returns {string} The patched JSON content
15+
*/
16+
function patchAWFChrootConfig(options = {}) {
17+
const runnerTemp = options.runnerTemp || process.env.RUNNER_TEMP;
18+
if (!runnerTemp) {
19+
throw new Error("RUNNER_TEMP is required");
20+
}
21+
22+
const binariesSourcePath = options.binariesSourcePath || process.env.GH_AW_CHROOT_BINARIES_SOURCE_PATH || "/tmp/gh-aw";
23+
const identityHome = options.identityHome || process.env.GH_AW_CHROOT_IDENTITY_HOME || "/tmp/gh-aw/home";
24+
const configPath = path.join(runnerTemp, "gh-aw", "awf-config.json");
25+
const artifactConfigPath = path.join(binariesSourcePath, "awf-config.json");
26+
const config = JSON.parse(fs.readFileSync(configPath, "utf8"));
27+
const userInfo = os.userInfo();
28+
29+
config.chroot = {
30+
binariesSourcePath,
31+
identity: {
32+
user: userInfo.username,
33+
uid: userInfo.uid,
34+
gid: userInfo.gid,
35+
home: identityHome,
36+
},
37+
};
38+
39+
const output = `${JSON.stringify(config)}\n`;
40+
fs.writeFileSync(configPath, output);
41+
fs.writeFileSync(artifactConfigPath, output);
42+
return output;
43+
}
44+
45+
if (require.main === module) {
46+
try {
47+
patchAWFChrootConfig();
48+
} catch (error) {
49+
const message = error instanceof Error ? error.message : String(error);
50+
throw new Error(`chroot config patch failed: ${message}`);
51+
}
52+
}
53+
54+
module.exports = { patchAWFChrootConfig };

setup/js/pr_review_buffer.cjs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ const { isStagedMode } = require("./safe_output_helpers.cjs");
2525
const { generateWorkflowCallIdMarker, matchesWorkflowId } = require("./generate_footer.cjs");
2626
const { attachExecutionState, fetchPullRequestReviewState } = require("./safe_output_execution_metadata.cjs");
2727
const { withRetry, RATE_LIMIT_RETRY_CONFIG, isTransientError, sleep } = require("./error_recovery.cjs");
28+
const { ERR_API } = require("./error_codes.cjs");
2829

2930
const SUPERSEDE_REVIEW_MESSAGE = "Superseded by updated review from same workflow.";
3031
const MAX_SUPERSEDE_REVIEW_PAGES = 10;
@@ -126,7 +127,7 @@ function createReviewBuffer() {
126127
return await fetchPullRequestReviewState(github, repoParts, pullRequestNumber);
127128
} catch (error) {
128129
if (!isTransientError(error)) {
129-
throw new Error(`Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)} (non-transient)`, { cause: error });
130+
throw new Error(`${ERR_API}: Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)} (non-transient)`, { cause: error });
130131
}
131132
core.warning(`Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)}. Continuing without execution-state metadata.`);
132133
return null;

setup/js/safe_outputs_bootstrap.cjs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,36 @@ function bootstrapSafeOutputsServer(logger) {
4141
logger.debug("Loading safe-outputs configuration");
4242
const { config, outputFile } = loadConfig(logger);
4343

44+
enforceCreatePullRequestRuntimePolicy(config, logger);
45+
4446
// Load tools
4547
logger.debug("Loading safe-outputs tools");
4648
const tools = loadTools(logger);
4749

4850
return { config, outputFile, tools };
4951
}
5052

53+
/**
54+
* Refuse startup when runtime policy disables create-pull-request.
55+
* @param {Record<string, any>} config
56+
* @param {Logger} logger
57+
*/
58+
function enforceCreatePullRequestRuntimePolicy(config, logger) {
59+
const policyVarName = "GH_AW_POLICY_ALLOW_CREATE_PULL_REQUEST";
60+
const rawValue = process.env[policyVarName];
61+
const normalizedValue = typeof rawValue === "string" ? rawValue.trim().toLowerCase() : "";
62+
// config is always snake_case after loadConfig normalises keys (k.replace(/-/g, '_'))
63+
const createPullRequestConfigured = !!config && Object.prototype.hasOwnProperty.call(config, "create_pull_request");
64+
65+
if (!createPullRequestConfigured || normalizedValue !== "false") {
66+
return;
67+
}
68+
69+
const message = `create-pull-request is disabled by runtime policy: ${policyVarName}=false. ` + `Remove safe-outputs.create-pull-request or set ${policyVarName}=true.`;
70+
logger.debugError(message);
71+
throw new Error(message);
72+
}
73+
5174
/**
5275
* Delete the configuration file to ensure no secrets remain on disk.
5376
* This should be called after the server has been configured and started.
@@ -71,4 +94,5 @@ function cleanupConfigFile(logger) {
7194
module.exports = {
7295
bootstrapSafeOutputsServer,
7396
cleanupConfigFile,
97+
enforceCreatePullRequestRuntimePolicy,
7498
};

0 commit comments

Comments
 (0)