Skip to content

Commit 0ff18d9

Browse files
Copilotpelikhan
andauthored
Simplify retry options propagation and fix OpenAI fetch configuration (#1817)
* Initial plan * Implement retry options propagation from PromptArgs to fetch calls Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * Fix retry options propagation: remove env.vars lookup and ensure fetch options pass-through Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * Address review comments: keep retries: 0 for OpenAIListModels, delete test file, add sample and documentation Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * Update CLI command documentation and refactor retry options handling across the codebase * Refactor import statement for CancellationOptions to use type-only import * Remove deprecated retry options demo scripts and consolidate retry configuration examples --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Co-authored-by: Peli de Halleux <pelikhan@users.noreply.github.com>
1 parent 3cc364b commit 0ff18d9

13 files changed

Lines changed: 173 additions & 50 deletions

File tree

docs/public/genaiscript.d.ts

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/src/content/docs/reference/cli/commands.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,10 @@ Options:
9595
--teams-message Posts a message to the teams channel
9696
-j, --json emit full JSON response to output
9797
--fail-on-errors fails on detected annotation error
98-
--retry <number> number of retries (default: "10")
99-
--retry-delay <number> minimum delay between retries (default: "1000")
100-
--max-delay <number> maximum delay between retries (default: "60000")
101-
--max-retry-after <number> maximum retry-after delay in milliseconds before giving up (default: "300000")
98+
--retry <number> number of retries
99+
--retry-delay <number> minimum delay between retries
100+
--max-delay <number> maximum delay between retries
101+
--max-retry-after <number> maximum retry-after delay in milliseconds before giving up
102102
-l, --label <string> label for the run
103103
-t, --temperature <number> temperature for the run
104104
--top-p <number> top-p for the run

docs/src/content/docs/reference/scripts/metadata.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,40 @@ script({
259259
})
260260
```
261261

262+
### Retry options
263+
264+
You can configure retry behavior for failed LLM requests to improve reliability:
265+
266+
```js
267+
script({
268+
...,
269+
retries: 3, // Number of retry attempts (default: 2)
270+
retryDelay: 1000, // Initial delay in ms between retries (default: 1000)
271+
maxDelay: 5000, // Maximum delay in ms with exponential backoff (default: 10000)
272+
maxRetryAfter: 10000, // Maximum time in ms to respect retry-after headers (default: 10000)
273+
retryOn: [429, 500, 502, 503, 504], // HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
274+
})
275+
```
276+
277+
These retry options help handle:
278+
- **Rate limiting** (HTTP 429): Automatically waits for rate limit windows
279+
- **Server errors** (HTTP 5xx): Retries on temporary server issues
280+
- **Network failures**: Uses exponential backoff to avoid overwhelming services
281+
282+
Retry options can also be passed to `runPrompt()` calls to override script-level settings:
283+
284+
```js
285+
const { text } = await runPrompt(
286+
(_) => _.$`Summarize this text.`,
287+
{
288+
model: "small",
289+
retries: 2, // Override script retry settings
290+
retryDelay: 500, // Faster initial retry
291+
maxDelay: 3000, // Lower maximum delay
292+
}
293+
)
294+
```
295+
262296
### Other parameters
263297

264298
- `unlisted: true`, don't show it to the user in lists. Template `system.*` are automatically unlisted.

packages/api/src/run.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,6 @@ import {
100100
rmDir,
101101
tryStat,
102102
createGitIgnorer,
103-
OPENAI_MAX_RETRY_AFTER_DEFAULT,
104-
OPENAI_MAX_RETRY_DELAY,
105-
OPENAI_MAX_RETRY_COUNT,
106103
resolveRuntimeHost,
107104
} from "@genaiscript/core";
108105

@@ -196,9 +193,9 @@ export async function runScriptInternal(
196193
const excludedFiles = options.excludedFiles || [];
197194
const stream = !options.json;
198195
const retries = normalizeInt(options.retry);
199-
const retryDelay = normalizeInt(options.retryDelay) || OPENAI_MAX_RETRY_COUNT;
200-
const maxDelay = normalizeInt(options.maxDelay) || OPENAI_MAX_RETRY_DELAY;
201-
const maxRetryAfter = normalizeInt(options.maxRetryAfter) || OPENAI_MAX_RETRY_AFTER_DEFAULT;
196+
const retryDelay = normalizeInt(options.retryDelay);
197+
const maxDelay = normalizeInt(options.maxDelay);
198+
const maxRetryAfter = normalizeInt(options.maxRetryAfter);
202199
const outTrace = options.outTrace;
203200
const outOutput = options.outOutput;
204201
const outAnnotations = options.outAnnotations;

packages/cli/src/cli.ts

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@ import {
1414
DEBUG_SCRIPT_CATEGORY,
1515
GITHUB_REPO,
1616
MODEL_PROVIDERS,
17-
OPENAI_MAX_RETRY_COUNT,
18-
OPENAI_MAX_RETRY_DELAY,
19-
OPENAI_RETRY_DEFAULT_DEFAULT,
20-
OPENAI_MAX_RETRY_AFTER_DEFAULT,
2117
RUNTIME_ERROR_CODE,
2218
SERVER_PORT,
2319
TOOL_ID,
@@ -217,17 +213,12 @@ export async function cli(): Promise<void> {
217213
.option("--teams-message", "Posts a message to the teams channel")
218214
.option("-j, --json", "emit full JSON response to output")
219215
.option(`--fail-on-errors`, `fails on detected annotation error`)
220-
.option("--retry <number>", "number of retries", String(OPENAI_MAX_RETRY_COUNT))
221-
.option(
222-
"--retry-delay <number>",
223-
"minimum delay between retries",
224-
String(OPENAI_RETRY_DEFAULT_DEFAULT),
225-
)
226-
.option("--max-delay <number>", "maximum delay between retries", String(OPENAI_MAX_RETRY_DELAY))
216+
.option("--retry <number>", "number of retries")
217+
.option("--retry-delay <number>", "minimum delay between retries")
218+
.option("--max-delay <number>", "maximum delay between retries")
227219
.option(
228220
"--max-retry-after <number>",
229221
"maximum retry-after delay in milliseconds before giving up",
230-
String(OPENAI_MAX_RETRY_AFTER_DEFAULT),
231222
)
232223
.option("-l, --label <string>", "label for the run")
233224
.option("-t, --temperature <number>", "temperature for the run")
@@ -485,7 +476,9 @@ export async function cli(): Promise<void> {
485476
.option("--port <number>", `HTTP port number, default: ${SERVER_PORT}`)
486477
.option("-n, --network", "Opens HTTP server on 0.0.0.0 to make it accessible on the network")
487478
.alias("mcps")
488-
.description("Starts a Model Context Protocol server that exposes scripts as tools. Use --http for HTTP transport.")
479+
.description(
480+
"Starts a Model Context Protocol server that exposes scripts as tools. Use --http for HTTP transport.",
481+
)
489482
.action(startMcpServer);
490483
addModelOptions(mcp);
491484

packages/core/src/constants.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,7 @@ export const SARIFF_RULEID_PREFIX = "genaiscript/";
102102
export const SARIFF_BUILDER_URL = "https://github.com/microsoft/genaiscript/";
103103
export const SARIFF_BUILDER_TOOL_DRIVER_NAME = TOOL_ID;
104104

105-
export const OPENAI_MAX_RETRY_DELAY = 60000; // 60s
106-
export const OPENAI_MAX_RETRY_COUNT = 10;
107-
export const OPENAI_RETRY_DEFAULT_DEFAULT = 1000;
108-
export const OPENAI_MAX_RETRY_AFTER_DEFAULT = 300000; // 300s
105+
export const CHAT_COMPLETION_RETRY_DEFAULT = 10;
109106

110107
export const FETCH_RETRY_DEFAULT = 6;
111108
export const FETCH_RETRY_DELAY_DEFAULT = 2000;

packages/core/src/expander.ts

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@ import { resolveScript } from "./ast.js";
55
import { assert } from "./assert.js";
66
import type { MarkdownTrace } from "./trace.js";
77
import { errorMessage, isCancelError, NotSupportedError } from "./error.js";
8-
import { JS_REGEX, MAX_TOOL_CALLS, TS_IMPORT_REGEX } from "./constants.js";
8+
import {
9+
CHAT_COMPLETION_RETRY_DEFAULT,
10+
FETCH_RETRY_DELAY_DEFAULT,
11+
FETCH_RETRY_MAX_DELAY_DEFAULT,
12+
FETCH_RETRY_ON_DEFAULT,
13+
JS_REGEX,
14+
MAX_TOOL_CALLS,
15+
TS_IMPORT_REGEX,
16+
} from "./constants.js";
917
import {
1018
finalizeMessages,
1119
type PromptImage,
@@ -273,6 +281,14 @@ export async function expandTemplate(
273281
const disableChatPreview =
274282
options.disableChatPreview === true || template.disableChatPreview === true;
275283

284+
// Handle retry options from template
285+
const retryOn = options.retryOn ?? template.retryOn ?? FETCH_RETRY_ON_DEFAULT;
286+
const retries = options.retries ?? template.retries ?? CHAT_COMPLETION_RETRY_DEFAULT;
287+
const retryDelay = options.retryDelay ?? template.retryDelay ?? FETCH_RETRY_DELAY_DEFAULT;
288+
const maxDelay = options.maxDelay ?? template.maxDelay ?? FETCH_RETRY_MAX_DELAY_DEFAULT;
289+
const maxRetryAfter =
290+
options.maxRetryAfter ?? template.maxRetryAfter ?? FETCH_RETRY_MAX_DELAY_DEFAULT;
291+
276292
// finalize options
277293
env.meta.model = model;
278294
Object.freeze(env.meta);
@@ -453,5 +469,10 @@ export async function expandTemplate(
453469
metadata,
454470
fallbackTools: options.fallbackTools,
455471
disableChatPreview,
472+
retryOn,
473+
retries,
474+
retryDelay,
475+
maxDelay,
476+
maxRetryAfter,
456477
};
457478
}

packages/core/src/fetch.ts

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,26 @@ export async function createFetch(
107107
options?: TraceOptions & CancellationOptions & RetryOptions,
108108
): Promise<FetchType> {
109109
const {
110+
trace,
111+
cancellationToken,
110112
retries = FETCH_RETRY_DEFAULT,
111113
retryOn = FETCH_RETRY_ON_DEFAULT,
112-
trace,
113114
retryDelay = FETCH_RETRY_DELAY_DEFAULT,
114115
maxDelay = FETCH_RETRY_MAX_DELAY_DEFAULT,
115116
maxRetryAfter = FETCH_RETRY_MAX_RETRY_AFTER_DEFAULT,
116-
cancellationToken,
117117
} = options || {};
118118
const minDelay = FETCH_RETRY_MIN_DELAY_DEFAULT;
119119

120-
dbg(`create fetch`);
120+
dbg(
121+
`create fetch: retries: %d, retry on: %o, retry delay: %d, min delay: %d, max delay: %d, max retry after: %d`,
122+
retries,
123+
retryOn,
124+
retryDelay,
125+
minDelay,
126+
maxDelay,
127+
maxRetryAfter,
128+
);
129+
121130
// We create a proxy based on Node.js environment variables.
122131
const agent = await resolveHttpsProxyAgent();
123132

@@ -137,16 +146,6 @@ export async function createFetch(
137146
return crossFetchWithProxy;
138147
}
139148

140-
// Create a fetch function with retry logic
141-
dbgr(
142-
`retries: %d, retry on: %o, retry delay: %d, min delay: %d, max delay: %d, max retry after: %d`,
143-
retries,
144-
retryOn,
145-
retryDelay,
146-
minDelay,
147-
maxDelay,
148-
maxRetryAfter,
149-
);
150149
const fetchRetry = wrapFetch(crossFetchWithProxy, {
151150
retries,
152151
retryOn: (attempt, error, response) => {

packages/core/src/openai-chatcompletion.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ export const OpenAIv1ChatCompletion: ChatCompletionHandler = async (req, cfg, op
100100
requestOptions,
101101
partialCb,
102102
retries,
103+
retryOn,
103104
retryDelay,
104105
maxDelay,
105106
maxRetryAfter,
@@ -243,6 +244,7 @@ export const OpenAIv1ChatCompletion: ChatCompletionHandler = async (req, cfg, op
243244
const fetchRetry = await createFetch({
244245
trace,
245246
retries,
247+
retryOn,
246248
retryDelay,
247249
maxDelay,
248250
maxRetryAfter,

packages/core/src/openai-responses.ts

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,18 @@
1010
import OpenAI from "openai";
1111
import { genaiscriptDebug } from "./debug.js";
1212
import type { ChatCompletionHandler } from "./chat.js";
13-
import type { ChatCompletionMessageParam, ChatCompletionResponse } from "./chattypes.js";
13+
import type {
14+
ChatCompletionMessageParam,
15+
ChatCompletionResponse,
16+
ChatCompletionsOptions,
17+
} from "./chattypes.js";
1418
import { errorMessage, isCancelError } from "./error.js";
1519
import { createFetch } from "./fetch.js";
1620
import { logError } from "./util.js";
17-
import { checkCancelled } from "./cancellation.js";
21+
import { type CancellationOptions, checkCancelled } from "./cancellation.js";
1822
import { deleteUndefinedValues } from "./cleaners.js";
23+
import type { RetryOptions } from "./types.js";
24+
import type { MarkdownTrace } from "./trace.js";
1925
const dbg = genaiscriptDebug("openai:responses");
2026

2127
function statusToReason(
@@ -169,8 +175,8 @@ export const OpenAIv2ResponsesChatCompletion: ChatCompletionHandler = async (
169175
async function handleNonStreamingResponse(
170176
openai: OpenAI,
171177
request: OpenAI.Responses.ResponseCreateParams,
172-
options: any,
173-
trace: any,
178+
options: ChatCompletionsOptions & CancellationOptions & RetryOptions,
179+
trace: MarkdownTrace,
174180
): Promise<ChatCompletionResponse> {
175181
const { cancellationToken } = options;
176182

@@ -190,13 +196,16 @@ async function handleNonStreamingResponse(
190196
async function handleStreamingResponse(
191197
openai: OpenAI,
192198
request: OpenAI.Responses.ResponseCreateParams,
193-
options: any,
194-
trace: any,
199+
options: ChatCompletionsOptions & CancellationOptions & RetryOptions,
200+
trace: MarkdownTrace,
195201
): Promise<ChatCompletionResponse> {
196202
const { cancellationToken, partialCb } = options;
197203

198204
checkCancelled(cancellationToken);
199205

206+
let reasoningSoFar = "";
207+
let responseSoFar = "";
208+
let tokensSoFar = 0;
200209
const res: ChatCompletionResponse = {};
201210
try {
202211
const stream = await openai.responses.create({
@@ -222,8 +231,28 @@ async function handleStreamingResponse(
222231
case "response.created":
223232
Object.assign(res, responseToCompletion(chunk.response));
224233
break;
234+
case "response.reasoning_summary_text.delta":
235+
reasoningSoFar += chunk.delta;
236+
if (partialCb)
237+
partialCb({
238+
reasoningSoFar,
239+
tokensSoFar,
240+
responseSoFar,
241+
reasoningChunk: chunk.delta,
242+
responseChunk: undefined,
243+
inner: false,
244+
});
245+
break;
225246
case "response.output_text.delta":
226-
if (partialCb) partialCb({ text: chunk.delta });
247+
responseSoFar += chunk.delta;
248+
if (partialCb)
249+
partialCb({
250+
reasoningSoFar,
251+
responseChunk: chunk.delta,
252+
inner: false,
253+
tokensSoFar,
254+
responseSoFar,
255+
});
227256
trace?.appendContent(chunk.delta);
228257
break;
229258
case "response.refusal.done":

0 commit comments

Comments
 (0)