Simplify retry options propagation and fix OpenAI fetch configuration (#1817)

Copilot · pelikhan · web-flow · commit 0ff18d9f6df8 · 2025-08-02T09:38:09.000-07:00
* Initial plan

* Implement retry options propagation from PromptArgs to fetch calls

Co-authored-by: pelikhan &lt;4175913+pelikhan@users.noreply.github.com&gt;

* Fix retry options propagation: remove env.vars lookup and ensure fetch options pass-through

Co-authored-by: pelikhan &lt;4175913+pelikhan@users.noreply.github.com&gt;

* Address review comments: keep retries: 0 for OpenAIListModels, delete test file, add sample and documentation

Co-authored-by: pelikhan &lt;4175913+pelikhan@users.noreply.github.com&gt;

* Update CLI command documentation and refactor retry options handling across the codebase

* Refactor import statement for CancellationOptions to use type-only import

* Remove deprecated retry options demo scripts and consolidate retry configuration examples

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: pelikhan &lt;4175913+pelikhan@users.noreply.github.com&gt;
Co-authored-by: Peli de Halleux &lt;pelikhan@users.noreply.github.com&gt;
diff --git a/docs/public/genaiscript.d.ts b/docs/public/genaiscript.d.ts
diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md
@@ -95,10 +95,10 @@ Options:
   --teams-message                          Posts a message to the teams channel
   -j, --json                               emit full JSON response to output
   --fail-on-errors                         fails on detected annotation error
-  --retry <number>                         number of retries (default: "10")
-  --retry-delay <number>                   minimum delay between retries (default: "1000")
-  --max-delay <number>                     maximum delay between retries (default: "60000")
-  --max-retry-after <number>               maximum retry-after delay in milliseconds before giving up (default: "300000")
+  --retry <number>                         number of retries
+  --retry-delay <number>                   minimum delay between retries
+  --max-delay <number>                     maximum delay between retries
+  --max-retry-after <number>               maximum retry-after delay in milliseconds before giving up
   -l, --label <string>                     label for the run
   -t, --temperature <number>               temperature for the run
   --top-p <number>                         top-p for the run
diff --git a/docs/src/content/docs/reference/scripts/metadata.md b/docs/src/content/docs/reference/scripts/metadata.md
@@ -259,6 +259,40 @@ script({
 })
 ```
 
+### Retry options
+
+You can configure retry behavior for failed LLM requests to improve reliability:
+
+```js
+script({
+    ...,
+    retries: 3,                    // Number of retry attempts (default: 2)
+    retryDelay: 1000,             // Initial delay in ms between retries (default: 1000) 
+    maxDelay: 5000,               // Maximum delay in ms with exponential backoff (default: 10000)
+    maxRetryAfter: 10000,         // Maximum time in ms to respect retry-after headers (default: 10000)
+    retryOn: [429, 500, 502, 503, 504], // HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
+})
+```
+
+These retry options help handle:
+- **Rate limiting** (HTTP 429): Automatically waits for rate limit windows
+- **Server errors** (HTTP 5xx): Retries on temporary server issues
+- **Network failures**: Uses exponential backoff to avoid overwhelming services
+
+Retry options can also be passed to `runPrompt()` calls to override script-level settings:
+
+```js
+const { text } = await runPrompt(
+    (_) => _.$`Summarize this text.`,
+    {
+        model: "small",
+        retries: 2,           // Override script retry settings
+        retryDelay: 500,      // Faster initial retry
+        maxDelay: 3000,       // Lower maximum delay
+    }
+)
+```
+
 ### Other parameters
 
 - `unlisted: true`, don't show it to the user in lists. Template `system.*` are automatically unlisted.
diff --git a/packages/api/src/run.ts b/packages/api/src/run.ts
@@ -100,9 +100,6 @@ import {
   rmDir,
   tryStat,
   createGitIgnorer,
-  OPENAI_MAX_RETRY_AFTER_DEFAULT,
-  OPENAI_MAX_RETRY_DELAY,
-  OPENAI_MAX_RETRY_COUNT,
   resolveRuntimeHost,
 } from "@genaiscript/core";
 
@@ -196,9 +193,9 @@ export async function runScriptInternal(
   const excludedFiles = options.excludedFiles || [];
   const stream = !options.json;
   const retries = normalizeInt(options.retry);
-  const retryDelay = normalizeInt(options.retryDelay) || OPENAI_MAX_RETRY_COUNT;
-  const maxDelay = normalizeInt(options.maxDelay) || OPENAI_MAX_RETRY_DELAY;
-  const maxRetryAfter = normalizeInt(options.maxRetryAfter) || OPENAI_MAX_RETRY_AFTER_DEFAULT;
+  const retryDelay = normalizeInt(options.retryDelay);
+  const maxDelay = normalizeInt(options.maxDelay);
+  const maxRetryAfter = normalizeInt(options.maxRetryAfter);
   const outTrace = options.outTrace;
   const outOutput = options.outOutput;
   const outAnnotations = options.outAnnotations;
diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts
@@ -14,10 +14,6 @@ import {
   DEBUG_SCRIPT_CATEGORY,
   GITHUB_REPO,
   MODEL_PROVIDERS,
-  OPENAI_MAX_RETRY_COUNT,
-  OPENAI_MAX_RETRY_DELAY,
-  OPENAI_RETRY_DEFAULT_DEFAULT,
-  OPENAI_MAX_RETRY_AFTER_DEFAULT,
   RUNTIME_ERROR_CODE,
   SERVER_PORT,
   TOOL_ID,
@@ -217,17 +213,12 @@ export async function cli(): Promise<void> {
     .option("--teams-message", "Posts a message to the teams channel")
     .option("-j, --json", "emit full JSON response to output")
     .option(`--fail-on-errors`, `fails on detected annotation error`)
-    .option("--retry <number>", "number of retries", String(OPENAI_MAX_RETRY_COUNT))
-    .option(
-      "--retry-delay <number>",
-      "minimum delay between retries",
-      String(OPENAI_RETRY_DEFAULT_DEFAULT),
-    )
-    .option("--max-delay <number>", "maximum delay between retries", String(OPENAI_MAX_RETRY_DELAY))
+    .option("--retry <number>", "number of retries")
+    .option("--retry-delay <number>", "minimum delay between retries")
+    .option("--max-delay <number>", "maximum delay between retries")
     .option(
       "--max-retry-after <number>",
       "maximum retry-after delay in milliseconds before giving up",
-      String(OPENAI_MAX_RETRY_AFTER_DEFAULT),
     )
     .option("-l, --label <string>", "label for the run")
     .option("-t, --temperature <number>", "temperature for the run")
@@ -485,7 +476,9 @@ export async function cli(): Promise<void> {
     .option("--port <number>", `HTTP port number, default: ${SERVER_PORT}`)
     .option("-n, --network", "Opens HTTP server on 0.0.0.0 to make it accessible on the network")
     .alias("mcps")
-    .description("Starts a Model Context Protocol server that exposes scripts as tools. Use --http for HTTP transport.")
+    .description(
+      "Starts a Model Context Protocol server that exposes scripts as tools. Use --http for HTTP transport.",
+    )
     .action(startMcpServer);
   addModelOptions(mcp);
 
diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
@@ -102,10 +102,7 @@ export const SARIFF_RULEID_PREFIX = "genaiscript/";
 export const SARIFF_BUILDER_URL = "https://github.com/microsoft/genaiscript/";
 export const SARIFF_BUILDER_TOOL_DRIVER_NAME = TOOL_ID;
 
-export const OPENAI_MAX_RETRY_DELAY = 60000; // 60s
-export const OPENAI_MAX_RETRY_COUNT = 10;
-export const OPENAI_RETRY_DEFAULT_DEFAULT = 1000;
-export const OPENAI_MAX_RETRY_AFTER_DEFAULT = 300000; // 300s
+export const CHAT_COMPLETION_RETRY_DEFAULT = 10;
 
 export const FETCH_RETRY_DEFAULT = 6;
 export const FETCH_RETRY_DELAY_DEFAULT = 2000;
diff --git a/packages/core/src/expander.ts b/packages/core/src/expander.ts
@@ -5,7 +5,15 @@ import { resolveScript } from "./ast.js";
 import { assert } from "./assert.js";
 import type { MarkdownTrace } from "./trace.js";
 import { errorMessage, isCancelError, NotSupportedError } from "./error.js";
-import { JS_REGEX, MAX_TOOL_CALLS, TS_IMPORT_REGEX } from "./constants.js";
+import {
+  CHAT_COMPLETION_RETRY_DEFAULT,
+  FETCH_RETRY_DELAY_DEFAULT,
+  FETCH_RETRY_MAX_DELAY_DEFAULT,
+  FETCH_RETRY_ON_DEFAULT,
+  JS_REGEX,
+  MAX_TOOL_CALLS,
+  TS_IMPORT_REGEX,
+} from "./constants.js";
 import {
   finalizeMessages,
   type PromptImage,
@@ -273,6 +281,14 @@ export async function expandTemplate(
   const disableChatPreview =
     options.disableChatPreview === true || template.disableChatPreview === true;
 
+  // Handle retry options from template
+  const retryOn = options.retryOn ?? template.retryOn ?? FETCH_RETRY_ON_DEFAULT;
+  const retries = options.retries ?? template.retries ?? CHAT_COMPLETION_RETRY_DEFAULT;
+  const retryDelay = options.retryDelay ?? template.retryDelay ?? FETCH_RETRY_DELAY_DEFAULT;
+  const maxDelay = options.maxDelay ?? template.maxDelay ?? FETCH_RETRY_MAX_DELAY_DEFAULT;
+  const maxRetryAfter =
+    options.maxRetryAfter ?? template.maxRetryAfter ?? FETCH_RETRY_MAX_DELAY_DEFAULT;
+
   // finalize options
   env.meta.model = model;
   Object.freeze(env.meta);
@@ -453,5 +469,10 @@ export async function expandTemplate(
     metadata,
     fallbackTools: options.fallbackTools,
     disableChatPreview,
+    retryOn,
+    retries,
+    retryDelay,
+    maxDelay,
+    maxRetryAfter,
   };
 }
diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts
@@ -107,17 +107,26 @@ export async function createFetch(
   options?: TraceOptions & CancellationOptions & RetryOptions,
 ): Promise<FetchType> {
   const {
+    trace,
+    cancellationToken,
     retries = FETCH_RETRY_DEFAULT,
     retryOn = FETCH_RETRY_ON_DEFAULT,
-    trace,
     retryDelay = FETCH_RETRY_DELAY_DEFAULT,
     maxDelay = FETCH_RETRY_MAX_DELAY_DEFAULT,
     maxRetryAfter = FETCH_RETRY_MAX_RETRY_AFTER_DEFAULT,
-    cancellationToken,
   } = options || {};
   const minDelay = FETCH_RETRY_MIN_DELAY_DEFAULT;
 
-  dbg(`create fetch`);
+  dbg(
+    `create fetch: retries: %d, retry on: %o, retry delay: %d, min delay: %d, max delay: %d, max retry after: %d`,
+    retries,
+    retryOn,
+    retryDelay,
+    minDelay,
+    maxDelay,
+    maxRetryAfter,
+  );
+
   // We create a proxy based on Node.js environment variables.
   const agent = await resolveHttpsProxyAgent();
 
@@ -137,16 +146,6 @@ export async function createFetch(
     return crossFetchWithProxy;
   }
 
-  // Create a fetch function with retry logic
-  dbgr(
-    `retries: %d, retry on: %o, retry delay: %d, min delay: %d, max delay: %d, max retry after: %d`,
-    retries,
-    retryOn,
-    retryDelay,
-    minDelay,
-    maxDelay,
-    maxRetryAfter,
-  );
   const fetchRetry = wrapFetch(crossFetchWithProxy, {
     retries,
     retryOn: (attempt, error, response) => {
diff --git a/packages/core/src/openai-chatcompletion.ts b/packages/core/src/openai-chatcompletion.ts
@@ -100,6 +100,7 @@ export const OpenAIv1ChatCompletion: ChatCompletionHandler = async (req, cfg, op
     requestOptions,
     partialCb,
     retries,
+    retryOn,
     retryDelay,
     maxDelay,
     maxRetryAfter,
@@ -243,6 +244,7 @@ export const OpenAIv1ChatCompletion: ChatCompletionHandler = async (req, cfg, op
   const fetchRetry = await createFetch({
     trace,
     retries,
+    retryOn,
     retryDelay,
     maxDelay,
     maxRetryAfter,
diff --git a/packages/core/src/openai-responses.ts b/packages/core/src/openai-responses.ts
@@ -10,12 +10,18 @@
 import OpenAI from "openai";
 import { genaiscriptDebug } from "./debug.js";
 import type { ChatCompletionHandler } from "./chat.js";
-import type { ChatCompletionMessageParam, ChatCompletionResponse } from "./chattypes.js";
+import type {
+  ChatCompletionMessageParam,
+  ChatCompletionResponse,
+  ChatCompletionsOptions,
+} from "./chattypes.js";
 import { errorMessage, isCancelError } from "./error.js";
 import { createFetch } from "./fetch.js";
 import { logError } from "./util.js";
-import { checkCancelled } from "./cancellation.js";
+import { type CancellationOptions, checkCancelled } from "./cancellation.js";
 import { deleteUndefinedValues } from "./cleaners.js";
+import type { RetryOptions } from "./types.js";
+import type { MarkdownTrace } from "./trace.js";
 const dbg = genaiscriptDebug("openai:responses");
 
 function statusToReason(
@@ -169,8 +175,8 @@ export const OpenAIv2ResponsesChatCompletion: ChatCompletionHandler = async (
 async function handleNonStreamingResponse(
   openai: OpenAI,
   request: OpenAI.Responses.ResponseCreateParams,
-  options: any,
-  trace: any,
+  options: ChatCompletionsOptions & CancellationOptions & RetryOptions,
+  trace: MarkdownTrace,
 ): Promise<ChatCompletionResponse> {
   const { cancellationToken } = options;
 
@@ -190,13 +196,16 @@ async function handleNonStreamingResponse(
 async function handleStreamingResponse(
   openai: OpenAI,
   request: OpenAI.Responses.ResponseCreateParams,
-  options: any,
-  trace: any,
+  options: ChatCompletionsOptions & CancellationOptions & RetryOptions,
+  trace: MarkdownTrace,
 ): Promise<ChatCompletionResponse> {
   const { cancellationToken, partialCb } = options;
 
   checkCancelled(cancellationToken);
 
+  let reasoningSoFar = "";
+  let responseSoFar = "";
+  let tokensSoFar = 0;
   const res: ChatCompletionResponse = {};
   try {
     const stream = await openai.responses.create({
@@ -222,8 +231,28 @@ async function handleStreamingResponse(
         case "response.created":
           Object.assign(res, responseToCompletion(chunk.response));
           break;
+        case "response.reasoning_summary_text.delta":
+          reasoningSoFar += chunk.delta;
+          if (partialCb)
+            partialCb({
+              reasoningSoFar,
+              tokensSoFar,
+              responseSoFar,
+              reasoningChunk: chunk.delta,
+              responseChunk: undefined,
+              inner: false,
+            });
+          break;
         case "response.output_text.delta":
-          if (partialCb) partialCb({ text: chunk.delta });
+          responseSoFar += chunk.delta;
+          if (partialCb)
+            partialCb({
+              reasoningSoFar,
+              responseChunk: chunk.delta,
+              inner: false,
+              tokensSoFar,
+              responseSoFar,
+            });
           trace?.appendContent(chunk.delta);
           break;
         case "response.refusal.done":
diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts
@@ -187,6 +187,11 @@ export async function runTemplate(
       cache,
       metadata,
       disableChatPreview,
+      retryOn,
+      retries,
+      retryDelay,
+      maxDelay,
+      maxRetryAfter,
     } = await expandTemplate(prj, template, options, env);
     // eslint-disable-next-line @typescript-eslint/no-unused-vars
     const { output, generator, secrets, dbg: envDbg, ...restEnv } = env;
@@ -269,6 +274,11 @@ export async function runTemplate(
       metadata,
       stats: runStats,
       disableChatPreview,
+      retryOn,
+      retries,
+      retryDelay,
+      maxDelay,
+      maxRetryAfter,
     };
     const chatResult = await executeChatSession(
       connection.configuration,
diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts
@@ -368,7 +368,7 @@ export type ChatToolChoice =
       name: string;
     };
 
-export interface ModelOptions extends ModelConnectionOptions, ModelTemplateOptions, CacheOptions {
+export interface ModelOptions extends ModelConnectionOptions, ModelTemplateOptions, CacheOptions, RetryOptions {
   /**
    * Temperature to use. Higher temperature means more hallucination/creativity.
    * Range 0.0-2.0.
diff --git a/samples/sample/genaisrc/retry-options.genai.mts b/samples/sample/genaisrc/retry-options.genai.mts