github
diff --git a/‎setup/js/add_labels.cjs‎
Lines changed: 20 additions & 4 deletions b/‎setup/js/add_labels.cjs‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎setup/js/claude_harness.cjs‎
Lines changed: 10 additions & 1 deletion b/‎setup/js/claude_harness.cjs‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎setup/js/codex_harness.cjs‎
Lines changed: 10 additions & 1 deletion b/‎setup/js/codex_harness.cjs‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎setup/js/copilot_harness.cjs‎
Lines changed: 38 additions & 4 deletions b/‎setup/js/copilot_harness.cjs‎
Lines changed: 38 additions & 4 deletions
diff --git a/‎setup/js/detect_agent_errors.cjs‎
Lines changed: 2 additions & 1 deletion b/‎setup/js/detect_agent_errors.cjs‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎setup/js/log_parser_bootstrap.cjs‎
Lines changed: 37 additions & 0 deletions b/‎setup/js/log_parser_bootstrap.cjs‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎setup/js/patch_awf_chroot_config.cjs‎
Lines changed: 54 additions & 0 deletions b/‎setup/js/patch_awf_chroot_config.cjs‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎setup/js/pr_review_buffer.cjs‎
Lines changed: 2 additions & 1 deletion b/‎setup/js/pr_review_buffer.cjs‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎setup/js/safe_outputs_bootstrap.cjs‎
Lines changed: 24 additions & 0 deletions b/‎setup/js/safe_outputs_bootstrap.cjs‎
Lines changed: 24 additions & 0 deletions
@@ -33,7 +33,7 @@ const { MAX_LABELS } = require("./constants.cjs");
 const { createCountGatedHandler } = require("./handler_scaffold.cjs");
 const { withRetry, RATE_LIMIT_RETRY_CONFIG } = require("./error_recovery.cjs");
 const { resolveInvocationContext } = require("./invocation_context_helpers.cjs");
-const { normalizeIssueIntentLabelNames } = require("./issue_intents.cjs");
+const { hasIssueIntentsRuntimeFeature, normalizeIssueIntentLabelNames, normalizeIssueIntentLabelSpecs } = require("./issue_intents.cjs");
 
 /**
  * Main handler factory for add_labels
@@ -92,9 +92,23 @@ const main = createCountGatedHandler({
       const contextType = effectiveContext.eventPayload?.pull_request ? "pull request" : "issue";
       const requestedLabels = message.labels ?? [];
       core.info(`Requested labels: ${JSON.stringify(requestedLabels)}`);
+      const issueIntentsEnabled = hasIssueIntentsRuntimeFeature();
+      /** @type {Map<string, {name: string, rationale?: string, confidence?: "LOW"|"MEDIUM"|"HIGH", suggest?: boolean}>} */
+      const requestedLabelSpecByLowerName = new Map();
       let requestedLabelNames;
       try {
-        requestedLabelNames = normalizeIssueIntentLabelNames(requestedLabels);
+        if (issueIntentsEnabled) {
+          const requestedLabelSpecs = normalizeIssueIntentLabelSpecs(requestedLabels);
+          for (const labelSpec of requestedLabelSpecs) {
+            const key = labelSpec.name.toLowerCase();
+            if (!requestedLabelSpecByLowerName.has(key)) {
+              requestedLabelSpecByLowerName.set(key, labelSpec);
+            }
+          }
+          requestedLabelNames = requestedLabelSpecs.map(labelSpec => labelSpec.name);
+        } else {
+          requestedLabelNames = normalizeIssueIntentLabelNames(requestedLabels);
+        }
       } catch (error) {
         const errorMessage = getErrorMessage(error);
         core.warning(`Invalid add_labels payload: ${errorMessage}`);
@@ -172,7 +186,9 @@ const main = createCountGatedHandler({
         };
       }
 
-      core.info(`Adding ${uniqueLabels.length} labels to ${contextType} #${itemNumber} in ${itemRepo}: ${JSON.stringify(uniqueLabels)}`);
+      const labelsRequestPayload = issueIntentsEnabled ? uniqueLabels.map(name => requestedLabelSpecByLowerName.get(name.toLowerCase()) ?? { name }) : uniqueLabels;
+
+      core.info(`Adding ${uniqueLabels.length} labels to ${contextType} #${itemNumber} in ${itemRepo}: ${JSON.stringify(labelsRequestPayload)}`);
 
       // If in staged mode, preview the labels without adding them
       if (isStaged) {
@@ -197,7 +213,7 @@ const main = createCountGatedHandler({
               owner: repoParts.owner,
               repo: repoParts.repo,
               issue_number: itemNumber,
-              labels: uniqueLabels,
+              labels: labelsRequestPayload,
             }),
           RATE_LIMIT_RETRY_CONFIG,
           `add_labels to ${contextType} #${itemNumber} in ${itemRepo}`
 
@@ -45,7 +45,7 @@ const {
   fetchAWFReflect,
   fetchModelsFromUrl,
 } = require("./awf_reflect.cjs");
-const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
+const { emitMissingToolPermissionIssue, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
 const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
 const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
 const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");
@@ -430,6 +430,14 @@ async function main() {
     }
 
     if (hasNumerousPermissionDenied) {
+      // If the agent already produced expected safe-outputs, the permission-denied
+      // signals are from optional/exploratory commands — not from the core task work.
+      // Suppress the terminal verdict and exit 0 to avoid a false-red run.
+      if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
+        log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
+        lastExitCode = 0;
+        break;
+      }
       const deniedCommands = extractDeniedCommands(result.output);
       emitMissingToolPermissionIssue({ deniedCommands, logger: log });
       log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
@@ -521,6 +529,7 @@ if (typeof module !== "undefined" && module.exports) {
     buildMissingToolPermissionIssuePayload,
     emitMissingToolPermissionIssue,
     hasNoopInSafeOutputs,
+    hasExpectedSafeOutputs,
   };
 }
 
 
@@ -44,7 +44,7 @@ const {
   fetchAWFReflect,
   fetchModelsFromUrl,
 } = require("./awf_reflect.cjs");
-const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
+const { emitMissingToolPermissionIssue, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
 const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
 const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
 const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");
@@ -492,6 +492,14 @@ async function main() {
     }
 
     if (hasNumerousPermissionDenied) {
+      // If the agent already produced expected safe-outputs, the permission-denied
+      // signals are from optional/exploratory commands — not from the core task work.
+      // Suppress the terminal verdict and exit 0 to avoid a false-red run.
+      if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
+        log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
+        lastExitCode = 0;
+        break;
+      }
       const deniedCommands = extractDeniedCommands(result.output);
       emitMissingToolPermissionIssue({ deniedCommands, logger: log });
       log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
@@ -553,6 +561,7 @@ if (typeof module !== "undefined" && module.exports) {
     getConfiguredOpenAIPortFromReflect,
     validateCodexOpenAIBaseURLFromReflect,
     hasNoopInSafeOutputs,
+    hasExpectedSafeOutputs,
   };
 }
 
 
@@ -58,7 +58,7 @@ const {
   fetchModelsFromUrl,
   resolveCopilotSDKCustomProviderFromReflect,
 } = require("./awf_reflect.cjs");
-const { runSafeOutputsCLI, buildMissingToolAlternatives, emitMissingToolPermissionIssue, emitInfrastructureIncomplete, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
+const { runSafeOutputsCLI, buildMissingToolAlternatives, emitMissingToolPermissionIssue, emitInfrastructureIncomplete, hasExpectedSafeOutputs, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
 const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
 const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
 const { isCAPIQuotaExceededError } = require("./detect_agent_errors.cjs");
@@ -447,6 +447,21 @@ function buildCopilotProxyAuthFailureDiagnostic(output, env = process.env, optio
   );
 }
 
+/**
+ * Determine whether an authentication_failed error came from the gh-aw API proxy after
+ * partial execution, making a one-time fresh-run retry worthwhile.
+ * @param {string} output
+ * @param {boolean} hasOutput
+ * @returns {boolean}
+ */
+function isRetryableProxyAuthenticationFailure(output, hasOutput) {
+  if (!hasOutput || !isAuthenticationFailedError(output)) {
+    return false;
+  }
+  const authFailure = parseProviderAuthFailure(output);
+  return Boolean(authFailure && isLikelyAWFAPIProxyURL(authFailure.providerUrl));
+}
+
 /**
  * Detect known Copilot error patterns for workflow outputs.
  * @param {string} output
@@ -823,6 +838,7 @@ async function main() {
         const isAuthErr = isNoAuthInfoError(result.output);
         const isAuthenticationFailed = isAuthenticationFailedError(result.output);
         const proxyAuthDiagnostic = buildCopilotProxyAuthFailureDiagnostic(result.output, process.env);
+        const retryableProxyAuthenticationFailure = isRetryableProxyAuthenticationFailure(result.output, result.hasOutput);
         const isNullTypeToolCall = isNullTypeToolCallError(result.output);
         const isSDKSessionIdleTimeout = isSDKSessionIdleTimeoutError(result.output);
         const isMCPGatewayShutdown = isMCPGatewayShutdownError(result.output);
@@ -882,16 +898,32 @@ async function main() {
           break;
         }
 
-        if (attempt === 0 && isAuthenticationFailed) {
+        // attempt === 0 makes this a one-time fresh-run recovery path.
+        if (attempt === 0 && retryableProxyAuthenticationFailure) {
+          useContinueOnRetry = false;
+          continueDisabledPermanently = true;
+          log(`attempt ${attempt + 1}: provider authentication failed after partial execution - will retry once as fresh run to avoid losing completed agent work`);
+          continue;
+        }
+
+        if (isAuthenticationFailed) {
           if (proxyAuthDiagnostic) {
-            log(`attempt ${attempt + 1}: ${proxyAuthDiagnostic} — not retrying (first-attempt auth failure is non-retryable)`);
+            log(`attempt ${attempt + 1}: ${proxyAuthDiagnostic} — not retrying`);
           } else {
-            log(`attempt ${attempt + 1}: authentication failed — not retrying (first-attempt auth failure is non-retryable)`);
+            log(`attempt ${attempt + 1}: authentication failed — not retrying`);
           }
           break;
         }
 
         if (hasNumerousPermissionDenied) {
+          // If the agent already produced expected safe-outputs, the permission-denied
+          // signals are from optional/exploratory commands — not from the core task work.
+          // Suppress the terminal verdict and exit 0 to avoid a false-red run.
+          if (safeOutputsPath && hasExpectedSafeOutputs(safeOutputsPath, { logger: log })) {
+            log(`attempt ${attempt + 1}: detected numerous permission-denied issues but safe-outputs already contain expected output — suppressing terminal verdict (false-red: core work succeeded)`);
+            lastExitCode = 0;
+            break;
+          }
           const deniedCommands = extractDeniedCommands(result.output);
           emitMissingToolPermissionIssue({ deniedCommands, logger: log });
           log(`attempt ${attempt + 1}: detected numerous permission-denied issues — not retrying (classified as missing tool/permission issue)`);
@@ -1035,6 +1067,7 @@ if (typeof module !== "undefined" && module.exports) {
     buildCopilotSDKServerArgs,
     getCopilotSDKServerPort,
     hasNoopInSafeOutputs,
+    hasExpectedSafeOutputs,
     isDetectionPhase,
     isModelAvailableInReflectData,
     isModelAvailableInReflectFile,
@@ -1043,6 +1076,7 @@ if (typeof module !== "undefined" && module.exports) {
     detectCopilotErrors,
     classifyCopilotFailure,
     extractOutputTail,
+    isRetryableProxyAuthenticationFailure,
     hasNumerousPermissionDeniedIssues,
     INFERENCE_ACCESS_ERROR_PATTERN,
     AGENTIC_ENGINE_TIMEOUT_PATTERN,
 
@@ -57,8 +57,9 @@ const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;
 //   - "unknown model <id>"
 //   - "model ... not found"
 //   - "model ... does not exist"
+//   - "Model not found" (standalone, e.g. AIC api-proxy 404: "404 Not Found: Model not found")
 const MODEL_NOT_SUPPORTED_PATTERN =
-  /(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable))/i;
+  /(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable)|404\b[^\n]*\bModel\s+not\s+found)/i;
 
 // Pattern: Copilot/CAPI quota exhaustion and rate-limit responses.
 // Matches all observed forms:
 
@@ -72,6 +72,36 @@ async function runLogParser(options) {
     return count;
   }
 
+  /**
+   * Returns true if the log entries show the agent ran at least one turn.
+   *
+   * "At least one turn" is used (rather than "all work finished") because the
+   * log only records the turn count, not whether every intended task succeeded.
+   * The check is sufficient to distinguish a post-completion MCP relaunch
+   * failure (the agent was already executing) from a startup failure where the
+   * MCP never launched and the agent ran zero turns.
+   *
+   * Handles both log formats:
+   *   - Legacy format (Codex, Copilot, etc.): { type: "result", num_turns: N }
+   *   - Copilot event format (Claude): { type: "session.result", data: { numTurns: N } }
+   *
+   * @param {Array|null|undefined} entries
+   * @returns {boolean}
+   */
+  function agentRanToCompletion(entries) {
+    if (!entries || !Array.isArray(entries) || entries.length === 0) {
+      return false;
+    }
+    return entries.some(e => {
+      if (!e || typeof e !== "object") return false;
+      // Legacy format
+      if (e.type === "result" && typeof e.num_turns === "number" && e.num_turns > 0) return true;
+      // Copilot event format (Claude)
+      if (e.type === "session.result" && e.data && typeof e.data.numTurns === "number" && e.data.numTurns > 0) return true;
+      return false;
+    });
+  }
+
   try {
     const logPath = process.env.GH_AW_AGENT_OUTPUT;
     if (!logPath) {
@@ -309,6 +339,13 @@ async function runLogParser(options) {
       const failedServers = mcpFailures.join(", ");
       if (safeOutputEntriesCount > 0) {
         core.warning(`MCP server(s) failed to launch (${failedServers}), but agent completed with ${safeOutputEntriesCount} safe output ${safeOutputEntriesCount === 1 ? "entry" : "entries"}`);
+      } else if (agentRanToCompletion(logEntries)) {
+        // The agent ran turns to completion even though an MCP server failed to launch.
+        // This is a post-completion relaunch/health-probe failure — the MCP server was
+        // healthy during execution (the agent used it throughout the run) and the failure
+        // occurred after the work was done.  Treat as non-fatal so genuine task success
+        // is not masked by a transient infrastructure event.
+        core.warning(`MCP server(s) failed to launch (${failedServers}), but agent completed turns — treating as non-fatal post-completion relaunch`);
       } else {
         core.setFailed(`${ERR_API}: MCP server(s) failed to launch: ${failedServers}`);
       }
 
@@ -0,0 +1,54 @@
+// @ts-check
+
+const fs = require("fs");
+const os = require("os");
+const path = require("path");
+
+/**
+ * Patch the AWF config file with chroot settings for ARC/DinD runners.
+ *
+ * @param {Object} [options]
+ * @param {string} [options.runnerTemp]
+ * @param {string} [options.binariesSourcePath]
+ * @param {string} [options.identityHome]
+ * @returns {string} The patched JSON content
+ */
+function patchAWFChrootConfig(options = {}) {
+  const runnerTemp = options.runnerTemp || process.env.RUNNER_TEMP;
+  if (!runnerTemp) {
+    throw new Error("RUNNER_TEMP is required");
+  }
+
+  const binariesSourcePath = options.binariesSourcePath || process.env.GH_AW_CHROOT_BINARIES_SOURCE_PATH || "/tmp/gh-aw";
+  const identityHome = options.identityHome || process.env.GH_AW_CHROOT_IDENTITY_HOME || "/tmp/gh-aw/home";
+  const configPath = path.join(runnerTemp, "gh-aw", "awf-config.json");
+  const artifactConfigPath = path.join(binariesSourcePath, "awf-config.json");
+  const config = JSON.parse(fs.readFileSync(configPath, "utf8"));
+  const userInfo = os.userInfo();
+
+  config.chroot = {
+    binariesSourcePath,
+    identity: {
+      user: userInfo.username,
+      uid: userInfo.uid,
+      gid: userInfo.gid,
+      home: identityHome,
+    },
+  };
+
+  const output = `${JSON.stringify(config)}\n`;
+  fs.writeFileSync(configPath, output);
+  fs.writeFileSync(artifactConfigPath, output);
+  return output;
+}
+
+if (require.main === module) {
+  try {
+    patchAWFChrootConfig();
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`chroot config patch failed: ${message}`);
+  }
+}
+
+module.exports = { patchAWFChrootConfig };
@@ -25,6 +25,7 @@ const { isStagedMode } = require("./safe_output_helpers.cjs");
 const { generateWorkflowCallIdMarker, matchesWorkflowId } = require("./generate_footer.cjs");
 const { attachExecutionState, fetchPullRequestReviewState } = require("./safe_output_execution_metadata.cjs");
 const { withRetry, RATE_LIMIT_RETRY_CONFIG, isTransientError, sleep } = require("./error_recovery.cjs");
+const { ERR_API } = require("./error_codes.cjs");
 
 const SUPERSEDE_REVIEW_MESSAGE = "Superseded by updated review from same workflow.";
 const MAX_SUPERSEDE_REVIEW_PAGES = 10;
@@ -126,7 +127,7 @@ function createReviewBuffer() {
       return await fetchPullRequestReviewState(github, repoParts, pullRequestNumber);
     } catch (error) {
       if (!isTransientError(error)) {
-        throw new Error(`Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)} (non-transient)`, { cause: error });
+        throw new Error(`${ERR_API}: Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)} (non-transient)`, { cause: error });
       }
       core.warning(`Failed to capture ${phase} PR review state for #${pullRequestNumber}: ${getErrorMessage(error)}. Continuing without execution-state metadata.`);
       return null;
 
@@ -41,13 +41,36 @@ function bootstrapSafeOutputsServer(logger) {
   logger.debug("Loading safe-outputs configuration");
   const { config, outputFile } = loadConfig(logger);
 
+  enforceCreatePullRequestRuntimePolicy(config, logger);
+
   // Load tools
   logger.debug("Loading safe-outputs tools");
   const tools = loadTools(logger);
 
   return { config, outputFile, tools };
 }
 
+/**
+ * Refuse startup when runtime policy disables create-pull-request.
+ * @param {Record<string, any>} config
+ * @param {Logger} logger
+ */
+function enforceCreatePullRequestRuntimePolicy(config, logger) {
+  const policyVarName = "GH_AW_POLICY_ALLOW_CREATE_PULL_REQUEST";
+  const rawValue = process.env[policyVarName];
+  const normalizedValue = typeof rawValue === "string" ? rawValue.trim().toLowerCase() : "";
+  // config is always snake_case after loadConfig normalises keys (k.replace(/-/g, '_'))
+  const createPullRequestConfigured = !!config && Object.prototype.hasOwnProperty.call(config, "create_pull_request");
+
+  if (!createPullRequestConfigured || normalizedValue !== "false") {
+    return;
+  }
+
+  const message = `create-pull-request is disabled by runtime policy: ${policyVarName}=false. ` + `Remove safe-outputs.create-pull-request or set ${policyVarName}=true.`;
+  logger.debugError(message);
+  throw new Error(message);
+}
+
 /**
  * Delete the configuration file to ensure no secrets remain on disk.
  * This should be called after the server has been configured and started.
@@ -71,4 +94,5 @@ function cleanupConfigFile(logger) {
 module.exports = {
   bootstrapSafeOutputsServer,
   cleanupConfigFile,
+  enforceCreatePullRequestRuntimePolicy,
 };