Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions setup/js/add_labels.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ const { MAX_LABELS } = require("./constants.cjs");
const { createCountGatedHandler } = require("./handler_scaffold.cjs");
const { withRetry, RATE_LIMIT_RETRY_CONFIG } = require("./error_recovery.cjs");
const { resolveInvocationContext } = require("./invocation_context_helpers.cjs");
const { normalizeIssueIntentLabelNames } = require("./issue_intents.cjs");
const { hasIssueIntentsRuntimeFeature, normalizeIssueIntentLabelNames, normalizeIssueIntentLabelSpecs } = require("./issue_intents.cjs");

/**
* Main handler factory for add_labels
Expand Down Expand Up @@ -92,9 +92,23 @@ const main = createCountGatedHandler({
const contextType = effectiveContext.eventPayload?.pull_request ? "pull request" : "issue";
const requestedLabels = message.labels ?? [];
core.info(`Requested labels: ${JSON.stringify(requestedLabels)}`);
const issueIntentsEnabled = hasIssueIntentsRuntimeFeature();
/** @type {Map<string, {name: string, rationale?: string, confidence?: "LOW"|"MEDIUM"|"HIGH", suggest?: boolean}>} */
const requestedLabelSpecByLowerName = new Map();
let requestedLabelNames;
try {
requestedLabelNames = normalizeIssueIntentLabelNames(requestedLabels);
if (issueIntentsEnabled) {
const requestedLabelSpecs = normalizeIssueIntentLabelSpecs(requestedLabels);
for (const labelSpec of requestedLabelSpecs) {
const key = labelSpec.name.toLowerCase();
if (!requestedLabelSpecByLowerName.has(key)) {
requestedLabelSpecByLowerName.set(key, labelSpec);
}
}
requestedLabelNames = requestedLabelSpecs.map(labelSpec => labelSpec.name);
} else {
requestedLabelNames = normalizeIssueIntentLabelNames(requestedLabels);
}
} catch (error) {
const errorMessage = getErrorMessage(error);
core.warning(`Invalid add_labels payload: ${errorMessage}`);
Expand Down Expand Up @@ -172,7 +186,9 @@ const main = createCountGatedHandler({
};
}

core.info(`Adding ${uniqueLabels.length} labels to ${contextType} #${itemNumber} in ${itemRepo}: ${JSON.stringify(uniqueLabels)}`);
const labelsRequestPayload = issueIntentsEnabled ? uniqueLabels.map(name => requestedLabelSpecByLowerName.get(name.toLowerCase()) ?? { name }) : uniqueLabels;

core.info(`Adding ${uniqueLabels.length} labels to ${contextType} #${itemNumber} in ${itemRepo}: ${JSON.stringify(labelsRequestPayload)}`);

// If in staged mode, preview the labels without adding them
if (isStaged) {
Expand All @@ -197,7 +213,7 @@ const main = createCountGatedHandler({
owner: repoParts.owner,
repo: repoParts.repo,
issue_number: itemNumber,
labels: uniqueLabels,
labels: labelsRequestPayload,
}),
RATE_LIMIT_RETRY_CONFIG,
`add_labels to ${contextType} #${itemNumber} in ${itemRepo}`
Expand Down
11 changes: 10 additions & 1 deletion setup/js/claude_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ const {
fetchAWFReflect,
fetchModelsFromUrl,
} = require("./awf_reflect.cjs");
const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { emitMissingToolPermissionIssue, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");
Expand Down Expand Up @@ -430,6 +430,14 @@ async function main() {
}

if (hasNumerousPermissionDenied) {
// If the agent already produced expected safe-outputs, the permission-denied
// signals are from optional/exploratory commands — not from the core task work.
// Suppress the terminal verdict and exit 0 to avoid a false-red run.
if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
lastExitCode = 0;
break;
}
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
Expand Down Expand Up @@ -521,6 +529,7 @@ if (typeof module !== "undefined" && module.exports) {
buildMissingToolPermissionIssuePayload,
emitMissingToolPermissionIssue,
hasNoopInSafeOutputs,
hasExpectedSafeOutputs,
};
}

Expand Down
11 changes: 10 additions & 1 deletion setup/js/codex_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ const {
fetchAWFReflect,
fetchModelsFromUrl,
} = require("./awf_reflect.cjs");
const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { emitMissingToolPermissionIssue, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");
Expand Down Expand Up @@ -492,6 +492,14 @@ async function main() {
}

if (hasNumerousPermissionDenied) {
// If the agent already produced expected safe-outputs, the permission-denied
// signals are from optional/exploratory commands — not from the core task work.
// Suppress the terminal verdict and exit 0 to avoid a false-red run.
if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
lastExitCode = 0;
break;
}
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
Expand Down Expand Up @@ -553,6 +561,7 @@ if (typeof module !== "undefined" && module.exports) {
getConfiguredOpenAIPortFromReflect,
validateCodexOpenAIBaseURLFromReflect,
hasNoopInSafeOutputs,
hasExpectedSafeOutputs,
};
}

Expand Down
42 changes: 38 additions & 4 deletions setup/js/copilot_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ const {
fetchModelsFromUrl,
resolveCopilotSDKCustomProviderFromReflect,
} = require("./awf_reflect.cjs");
const { runSafeOutputsCLI, buildMissingToolAlternatives, emitMissingToolPermissionIssue, emitInfrastructureIncomplete, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { runSafeOutputsCLI, buildMissingToolAlternatives, emitMissingToolPermissionIssue, emitInfrastructureIncomplete, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
const { isCAPIQuotaExceededError } = require("./detect_agent_errors.cjs");
Expand Down Expand Up @@ -447,6 +447,21 @@ function buildCopilotProxyAuthFailureDiagnostic(output, env = process.env, optio
);
}

/**
* Determine whether an authentication_failed error came from the gh-aw API proxy after
* partial execution, making a one-time fresh-run retry worthwhile.
* @param {string} output
* @param {boolean} hasOutput
* @returns {boolean}
*/
function isRetryableProxyAuthenticationFailure(output, hasOutput) {
if (!hasOutput || !isAuthenticationFailedError(output)) {
return false;
}
const authFailure = parseProviderAuthFailure(output);
return Boolean(authFailure && isLikelyAWFAPIProxyURL(authFailure.providerUrl));
}

/**
* Detect known Copilot error patterns for workflow outputs.
* @param {string} output
Expand Down Expand Up @@ -823,6 +838,7 @@ async function main() {
const isAuthErr = isNoAuthInfoError(result.output);
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const proxyAuthDiagnostic = buildCopilotProxyAuthFailureDiagnostic(result.output, process.env);
const retryableProxyAuthenticationFailure = isRetryableProxyAuthenticationFailure(result.output, result.hasOutput);
const isNullTypeToolCall = isNullTypeToolCallError(result.output);
const isSDKSessionIdleTimeout = isSDKSessionIdleTimeoutError(result.output);
const isMCPGatewayShutdown = isMCPGatewayShutdownError(result.output);
Expand Down Expand Up @@ -882,16 +898,32 @@ async function main() {
break;
}

if (attempt === 0 && isAuthenticationFailed) {
// attempt === 0 makes this a one-time fresh-run recovery path.
if (attempt === 0 && retryableProxyAuthenticationFailure) {
useContinueOnRetry = false;
continueDisabledPermanently = true;
log(`attempt ${attempt + 1}: provider authentication failed after partial execution - will retry once as fresh run to avoid losing completed agent work`);
continue;
}

if (isAuthenticationFailed) {
if (proxyAuthDiagnostic) {
log(`attempt ${attempt + 1}: ${proxyAuthDiagnostic} — not retrying (first-attempt auth failure is non-retryable)`);
log(`attempt ${attempt + 1}: ${proxyAuthDiagnostic} — not retrying`);
} else {
log(`attempt ${attempt + 1}: authentication failed — not retrying (first-attempt auth failure is non-retryable)`);
log(`attempt ${attempt + 1}: authentication failed — not retrying`);
}
break;
}

if (hasNumerousPermissionDenied) {
// If the agent already produced expected safe-outputs, the permission-denied
// signals are from optional/exploratory commands — not from the core task work.
// Suppress the terminal verdict and exit 0 to avoid a false-red run.
if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
lastExitCode = 0;
break;
}
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
Expand Down Expand Up @@ -1035,6 +1067,7 @@ if (typeof module !== "undefined" && module.exports) {
buildCopilotSDKServerArgs,
getCopilotSDKServerPort,
hasNoopInSafeOutputs,
hasExpectedSafeOutputs,
isDetectionPhase,
isModelAvailableInReflectData,
isModelAvailableInReflectFile,
Expand All @@ -1043,6 +1076,7 @@ if (typeof module !== "undefined" && module.exports) {
detectCopilotErrors,
classifyCopilotFailure,
extractOutputTail,
isRetryableProxyAuthenticationFailure,
hasNumerousPermissionDeniedIssues,
INFERENCE_ACCESS_ERROR_PATTERN,
AGENTIC_ENGINE_TIMEOUT_PATTERN,
Expand Down
3 changes: 2 additions & 1 deletion setup/js/detect_agent_errors.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;
// - "unknown model <id>"
// - "model ... not found"
// - "model ... does not exist"
// - "Model not found" (standalone, e.g. AIC api-proxy 404: "404 Not Found: Model not found")
const MODEL_NOT_SUPPORTED_PATTERN =
/(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable))/i;
/(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable)|404\b[^\n]*\bModel\s+not\s+found)/i;

// Pattern: Copilot/CAPI quota exhaustion and rate-limit responses.
// Matches all observed forms:
Expand Down
37 changes: 37 additions & 0 deletions setup/js/log_parser_bootstrap.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,36 @@ async function runLogParser(options) {
return count;
}

/**
* Returns true if the log entries show the agent ran at least one turn.
*
* "At least one turn" is used (rather than "all work finished") because the
* log only records the turn count, not whether every intended task succeeded.
* The check is sufficient to distinguish a post-completion MCP relaunch
* failure (the agent was already executing) from a startup failure where the
* MCP never launched and the agent ran zero turns.
*
* Handles both log formats:
* - Legacy format (Codex, Copilot, etc.): { type: "result", num_turns: N }
* - Copilot event format (Claude): { type: "session.result", data: { numTurns: N } }
*
* @param {Array|null|undefined} entries
* @returns {boolean}
*/
function agentRanToCompletion(entries) {
if (!entries || !Array.isArray(entries) || entries.length === 0) {
return false;
}
return entries.some(e => {
if (!e || typeof e !== "object") return false;
// Legacy format
if (e.type === "result" && typeof e.num_turns === "number" && e.num_turns > 0) return true;
// Copilot event format (Claude)
if (e.type === "session.result" && e.data && typeof e.data.numTurns === "number" && e.data.numTurns > 0) return true;
return false;
});
}

try {
const logPath = process.env.GH_AW_AGENT_OUTPUT;
if (!logPath) {
Expand Down Expand Up @@ -309,6 +339,13 @@ async function runLogParser(options) {
const failedServers = mcpFailures.join(", ");
if (safeOutputEntriesCount > 0) {
core.warning(`MCP server(s) failed to launch (${failedServers}), but agent completed with ${safeOutputEntriesCount} safe output ${safeOutputEntriesCount === 1 ? "entry" : "entries"}`);
} else if (agentRanToCompletion(logEntries)) {
// The agent ran turns to completion even though an MCP server failed to launch.
// This is a post-completion relaunch/health-probe failure — the MCP server was
// healthy during execution (the agent used it throughout the run) and the failure
// occurred after the work was done. Treat as non-fatal so genuine task success
// is not masked by a transient infrastructure event.
core.warning(`MCP server(s) failed to launch (${failedServers}), but agent completed turns — treating as non-fatal post-completion relaunch`);
} else {
core.setFailed(`${ERR_API}: MCP server(s) failed to launch: ${failedServers}`);
}
Expand Down
54 changes: 54 additions & 0 deletions setup/js/patch_awf_chroot_config.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// @ts-check

const fs = require("fs");
const os = require("os");
const path = require("path");

/**
* Patch the AWF config file with chroot settings for ARC/DinD runners.
*
* @param {Object} [options]
* @param {string} [options.runnerTemp]
* @param {string} [options.binariesSourcePath]
* @param {string} [options.identityHome]
* @returns {string} The patched JSON content
*/
function patchAWFChrootConfig(options = {}) {
const runnerTemp = options.runnerTemp || process.env.RUNNER_TEMP;
if (!runnerTemp) {
throw new Error("RUNNER_TEMP is required");
}

const binariesSourcePath = options.binariesSourcePath || process.env.GH_AW_CHROOT_BINARIES_SOURCE_PATH || "/tmp/gh-aw";
const identityHome = options.identityHome || process.env.GH_AW_CHROOT_IDENTITY_HOME || "/tmp/gh-aw/home";
const configPath = path.join(runnerTemp, "gh-aw", "awf-config.json");
const artifactConfigPath = path.join(binariesSourcePath, "awf-config.json");
const config = JSON.parse(fs.readFileSync(configPath, "utf8"));
const userInfo = os.userInfo();

config.chroot = {
binariesSourcePath,
identity: {
user: userInfo.username,
uid: userInfo.uid,
gid: userInfo.gid,
home: identityHome,
},
};

const output = `${JSON.stringify(config)}\n`;
fs.writeFileSync(configPath, output);
fs.writeFileSync(artifactConfigPath, output);
return output;
}

if (require.main === module) {
try {
patchAWFChrootConfig();
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
throw new Error(`chroot config patch failed: ${message}`);
}
}

module.exports = { patchAWFChrootConfig };
3 changes: 2 additions & 1 deletion setup/js/pr_review_buffer.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const { isStagedMode } = require("./safe_output_helpers.cjs");
const { generateWorkflowCallIdMarker, matchesWorkflowId } = require("./generate_footer.cjs");
const { attachExecutionState, fetchPullRequestReviewState } = require("./safe_output_execution_metadata.cjs");
const { withRetry, RATE_LIMIT_RETRY_CONFIG, isTransientError, sleep } = require("./error_recovery.cjs");
const { ERR_API } = require("./error_codes.cjs");

const SUPERSEDE_REVIEW_MESSAGE = "Superseded by updated review from same workflow.";
const MAX_SUPERSEDE_REVIEW_PAGES = 10;
Expand Down Expand Up @@ -126,7 +127,7 @@ function createReviewBuffer() {
return await fetchPullRequestReviewState(github, repoParts, pullRequestNumber);
} catch (error) {
if (!isTransientError(error)) {
throw new Error(`Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)} (non-transient)`, { cause: error });
throw new Error(`${ERR_API}: Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)} (non-transient)`, { cause: error });
}
core.warning(`Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)}. Continuing without execution-state metadata.`);
return null;
Expand Down
24 changes: 24 additions & 0 deletions setup/js/safe_outputs_bootstrap.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,36 @@ function bootstrapSafeOutputsServer(logger) {
logger.debug("Loading safe-outputs configuration");
const { config, outputFile } = loadConfig(logger);

enforceCreatePullRequestRuntimePolicy(config, logger);

// Load tools
logger.debug("Loading safe-outputs tools");
const tools = loadTools(logger);

return { config, outputFile, tools };
}

/**
* Refuse startup when runtime policy disables create-pull-request.
* @param {Record<string, any>} config
* @param {Logger} logger
*/
function enforceCreatePullRequestRuntimePolicy(config, logger) {
const policyVarName = "GH_AW_POLICY_ALLOW_CREATE_PULL_REQUEST";
const rawValue = process.env[policyVarName];
const normalizedValue = typeof rawValue === "string" ? rawValue.trim().toLowerCase() : "";
// config is always snake_case after loadConfig normalises keys (k.replace(/-/g, '_'))
const createPullRequestConfigured = !!config && Object.prototype.hasOwnProperty.call(config, "create_pull_request");

if (!createPullRequestConfigured || normalizedValue !== "false") {
return;
}

const message = `create-pull-request is disabled by runtime policy: ${policyVarName}=false. ` + `Remove safe-outputs.create-pull-request or set ${policyVarName}=true.`;
logger.debugError(message);
throw new Error(message);
}

/**
* Delete the configuration file to ensure no secrets remain on disk.
* This should be called after the server has been configured and started.
Expand All @@ -71,4 +94,5 @@ function cleanupConfigFile(logger) {
module.exports = {
bootstrapSafeOutputsServer,
cleanupConfigFile,
enforceCreatePullRequestRuntimePolicy,
};
Loading
Loading