fix(providers): unify OPENAI_TIMEOUT_MS + AGENTMEMORY_LLM_TIMEOUT_MS (#446) (#453)

rohitg00 · web-flow · commit 49377db0db46 · 2026-05-17T12:41:30.000+01:00
* fix(providers): unify OPENAI_TIMEOUT_MS + AGENTMEMORY_LLM_TIMEOUT_MS (#446) v0.9.17 shipped OPENAI_TIMEOUT_MS scoped to the OpenAI LLM provider (inline AbortController, default 60s). PR #379 then shipped AGENTMEMORY_LLM_TIMEOUT_MS in the shared src/providers/_fetch.ts helper used by every other raw-fetch provider (Gemini, OpenRouter, MiniMax, OpenAI/Cohere/Voyage/OpenRouter embedding). Two env vars, same value, different names — ops confusion. Unify on the global name while keeping back-compat: 1. OPENAI_TIMEOUT_MS — OpenAI-scoped alias, takes precedence 2. AGENTMEMORY_LLM_TIMEOUT_MS — global fall-back across providers 3. 60 000 ms default The OpenAI LLM provider now routes through the shared fetchWithTimeout helper, dropping ~30 lines of duplicate AbortController + clearTimeout plumbing. Existing users with OPENAI_TIMEOUT_MS set keep the exact v0.9.17 behaviour; new users setting AGENTMEMORY_LLM_TIMEOUT_MS get the OpenAI LLM path covered too. README + .env.example now document AGENTMEMORY_LLM_TIMEOUT_MS as the canonical name and note OPENAI_TIMEOUT_MS as the OpenAI-scoped alias. 4 new precedence tests in test/fetch-timeout.test.ts cover all four env-var combinations. * test(providers): strict parse for OPENAI_TIMEOUT_MS env (CodeRabbit) CodeRabbit caught parseInt("30ms", 10) silently returning 30 in the timeout-resolve path. Real users hitting this would think they bound the call to 30ms when the regex would have rejected it. parsePositiveInt now rejects anything that isn't pure digits via /^\d+$/ (after trim). parseInt's lenience on trailing units / underscores / signs is gone — those fall back to the 60s default instead of masquerading as an aggressive bound. New regression test covers "30ms", "1_000", "60s", "30abc", "-30", "0". Whitespace padding (e.g. " 30 ") is still accepted — that's normal env-var handling. 992/992 tests pass on the worktree.
diff --git a/.env.example b/.env.example
@@ -45,6 +45,12 @@
 
 # MAX_TOKENS=4096                                # Cap LLM completion tokens for compression / summarise calls
 
+# Outbound LLM / embedding timeout — shared across every raw-fetch provider
+# (Gemini, OpenRouter, MiniMax, OpenAI LLM, and OpenAI/Cohere/Voyage/OpenRouter
+# embedding). The OpenAI LLM path also honours the OpenAI-scoped
+# OPENAI_TIMEOUT_MS alias for back-compat with v0.9.17 (precedence).
+# AGENTMEMORY_LLM_TIMEOUT_MS=60000                # Default: 60 000 ms (60 s)
+
 # Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk
 # child sessions). Off by default — the agent-sdk fallback can trigger
 # Stop-hook recursion (#149 follow-up) when invoked from inside Claude Code.
diff --git a/README.md b/README.md
@@ -1058,7 +1058,10 @@ Create `~/.agentmemory/.env`:
 #                                          # api-key header + api-version query param.
 # OPENAI_API_VERSION=2024-08-01-preview    # Optional: Azure api-version query param
 # OPENAI_MODEL=gpt-4o-mini                 # Optional: default model
-# OPENAI_TIMEOUT_MS=60000                  # Optional: outbound fetch timeout (default 60s)
+# OPENAI_TIMEOUT_MS=60000                  # Optional: OpenAI-scoped alias for the outbound fetch
+#                                          # timeout. Takes precedence over AGENTMEMORY_LLM_TIMEOUT_MS
+#                                          # for back-compat with v0.9.17. New configs should
+#                                          # prefer the global AGENTMEMORY_LLM_TIMEOUT_MS below.
 # OPENAI_REASONING_EFFORT=none             # Optional: "low" | "medium" | "high" | "none"
 #                                          # Honored only by OpenAI's reasoning models (o1, o3,
 #                                          # gpt-*-reasoning) and providers that mirror that
@@ -1083,7 +1086,11 @@ Create `~/.agentmemory/.env`:
 # Outbound LLM / embedding timeout
 # AGENTMEMORY_LLM_TIMEOUT_MS=60000       # Default: 60 000 ms (60 s). Applies to every
                                           # raw-fetch provider (Gemini, OpenRouter, MiniMax,
-                                          # OpenAI/Cohere/Voyage/OpenRouter embedding).
+                                          # OpenAI LLM, OpenAI/Cohere/Voyage/OpenRouter
+                                          # embedding). For the OpenAI LLM path, the
+                                          # OpenAI-scoped OPENAI_TIMEOUT_MS alias (above)
+                                          # takes precedence when set, for back-compat
+                                          # with v0.9.17.
                                           # Increase for slow networks or large batch calls;
                                           # decrease to fail-fast on rate-limit holds.
 
diff --git a/src/providers/openai.ts b/src/providers/openai.ts
@@ -1,5 +1,6 @@
 import type { MemoryProvider } from "../types.js";
 import { getEnvVar } from "../config.js";
+import { fetchWithTimeout } from "./_fetch.js";
 
 const DEFAULT_BASE_URL = "https://api.openai.com";
 const DEFAULT_MODEL = "gpt-4o-mini";
@@ -25,7 +26,12 @@ const DEFAULT_AZURE_API_VERSION = "2024-08-01-preview";
  *                              Azure: https://<resource>.openai.azure.com/openai/deployments/<deployment>
  *   OPENAI_MODEL             — model name (default: gpt-4o-mini)
  *   OPENAI_API_VERSION       — Azure api-version query param (default: 2024-08-01-preview)
- *   OPENAI_TIMEOUT_MS        — outbound fetch timeout in ms (default: 60000)
+ *   OPENAI_TIMEOUT_MS        — outbound fetch timeout in ms (OpenAI-scoped alias,
+ *                              takes precedence over AGENTMEMORY_LLM_TIMEOUT_MS
+ *                              for back-compat with the v0.9.17 shipping name).
+ *   AGENTMEMORY_LLM_TIMEOUT_MS — outbound fetch timeout in ms shared across all
+ *                              raw-fetch LLM + embedding providers. Used when
+ *                              OPENAI_TIMEOUT_MS is not set. Default: 60000.
  *   MAX_TOKENS               — max output tokens (default: from config or 4096)
  *   OPENAI_REASONING_EFFORT  — "low" | "medium" | "high" | "none"
  *                              Passthrough for reasoning models (e.g. Ollama Cloud
@@ -54,7 +60,7 @@ export class OpenAIProvider implements MemoryProvider {
       DEFAULT_BASE_URL
     ).replace(/\/+$/, "");
     this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined;
-    this.timeoutMs = parseTimeout(getEnvVar("OPENAI_TIMEOUT_MS"));
+    this.timeoutMs = resolveTimeout();
     this.azureApiVersion =
       getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION;
     this.isAzure = detectAzure(this.baseUrl);
@@ -107,33 +113,31 @@ export class OpenAIProvider implements MemoryProvider {
       body.reasoning_effort = this.reasoningEffort;
     }
 
-    // Bound the request with an AbortController so a hung provider
-    // can't stall the worker. The other raw-fetch providers
-    // (anthropic, gemini, openrouter, minimax) have the same gap
-    // tracked in a follow-up issue; this PR fixes it for the new
-    // surface only.
-    const ac = new AbortController();
-    const t = setTimeout(() => ac.abort(), this.timeoutMs);
+    // Bound the request via the shared fetchWithTimeout helper, which
+    // owns the AbortController + clearTimeout cleanup for every raw-fetch
+    // provider (minimax, openrouter, gemini, openrouter-embed, etc.).
+    // OPENAI_TIMEOUT_MS keeps its v0.9.17 meaning (OpenAI-scoped alias,
+    // takes precedence); when unset we fall through to
+    // AGENTMEMORY_LLM_TIMEOUT_MS and finally the 60s default. See #446.
     let response: Response;
     try {
-      response = await fetch(url, {
-        method: "POST",
-        headers: this.buildHeaders(),
-        body: JSON.stringify(body),
-        signal: ac.signal,
-      });
+      response = await fetchWithTimeout(
+        url,
+        {
+          method: "POST",
+          headers: this.buildHeaders(),
+          body: JSON.stringify(body),
+        },
+        this.timeoutMs,
+      );
     } catch (err) {
-      const aborted =
-        ac.signal.aborted ||
-        (err instanceof Error && err.name === "AbortError");
+      const aborted = err instanceof Error && err.name === "AbortError";
       if (aborted) {
         throw new Error(
-          `OpenAI API request timed out after ${this.timeoutMs}ms — set OPENAI_TIMEOUT_MS to raise the bound or check the provider status.`,
+          `OpenAI API request timed out after ${this.timeoutMs}ms — set OPENAI_TIMEOUT_MS (or AGENTMEMORY_LLM_TIMEOUT_MS) to raise the bound or check the provider status.`,
         );
       }
       throw err;
-    } finally {
-      clearTimeout(t);
     }
 
     if (!response.ok) {
@@ -160,10 +164,33 @@ export class OpenAIProvider implements MemoryProvider {
   }
 }
 
-function parseTimeout(raw: string | null | undefined): number {
-  if (!raw) return DEFAULT_TIMEOUT_MS;
-  const n = parseInt(raw, 10);
-  return Number.isFinite(n) && n > 0 ? n : DEFAULT_TIMEOUT_MS;
+// Resolves the outbound-fetch timeout for the OpenAI LLM path.
+// Precedence (preserving v0.9.17 behaviour):
+//   1. OPENAI_TIMEOUT_MS       — OpenAI-scoped alias (back-compat)
+//   2. AGENTMEMORY_LLM_TIMEOUT_MS — global LLM/embedding timeout (#446)
+//   3. 60 000 ms default
+function resolveTimeout(): number {
+  const openaiRaw = getEnvVar("OPENAI_TIMEOUT_MS");
+  const openai = parsePositiveInt(openaiRaw);
+  if (openai !== undefined) return openai;
+
+  const globalRaw = getEnvVar("AGENTMEMORY_LLM_TIMEOUT_MS");
+  const globalMs = parsePositiveInt(globalRaw);
+  if (globalMs !== undefined) return globalMs;
+
+  return DEFAULT_TIMEOUT_MS;
+}
+
+function parsePositiveInt(raw: string | null | undefined): number | undefined {
+  if (!raw) return undefined;
+  const trimmed = raw.trim();
+  // Reject malformed values like "30ms" or "1_000" — parseInt would
+  // silently return 30 / 1, swallowing user typos as valid timeouts.
+  // The regex enforces pure digits (no sign, no trailing units, no
+  // separators) before we hand off to Number.
+  if (!/^\d+$/.test(trimmed)) return undefined;
+  const n = Number(trimmed);
+  return Number.isFinite(n) && n > 0 ? n : undefined;
 }
 
 function detectAzure(baseUrl: string): boolean {
diff --git a/test/fetch-timeout.test.ts b/test/fetch-timeout.test.ts
@@ -2,6 +2,7 @@ import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
 import { fetchWithTimeout } from "../src/providers/_fetch.js";
 import { MinimaxProvider } from "../src/providers/minimax.js";
 import { OpenRouterProvider } from "../src/providers/openrouter.js";
+import { OpenAIProvider } from "../src/providers/openai.js";
 import { GeminiEmbeddingProvider } from "../src/providers/embedding/gemini.js";
 import { OpenAIEmbeddingProvider } from "../src/providers/embedding/openai.js";
 import { CohereEmbeddingProvider } from "../src/providers/embedding/cohere.js";
@@ -195,3 +196,83 @@ describe("Provider hang regression — OpenRouterEmbeddingProvider", () => {
     await expect(provider.embedBatch(["hello"])).rejects.toThrow();
   });
 });
+
+// ─────────────────────────────────────────────────────────────
+// #446 — OpenAI LLM provider env-var precedence
+//
+// v0.9.17 shipped OPENAI_TIMEOUT_MS (OpenAI-scoped). PR #379 then
+// shipped AGENTMEMORY_LLM_TIMEOUT_MS (shared). The provider now
+// honours both: OPENAI_TIMEOUT_MS wins for back-compat, with
+// AGENTMEMORY_LLM_TIMEOUT_MS as the global fall-back.
+// ─────────────────────────────────────────────────────────────
+describe("OpenAIProvider timeout env precedence (#446)", () => {
+  beforeEach(() => {
+    delete process.env["OPENAI_TIMEOUT_MS"];
+    delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
+    vi.spyOn(globalThis, "fetch").mockImplementation(hangingFetch as typeof fetch);
+  });
+  afterEach(() => {
+    vi.restoreAllMocks();
+    delete process.env["OPENAI_TIMEOUT_MS"];
+    delete process.env["AGENTMEMORY_LLM_TIMEOUT_MS"];
+  });
+
+  it("OPENAI_TIMEOUT_MS alone aborts the OpenAI LLM call", async () => {
+    process.env["OPENAI_TIMEOUT_MS"] = "30";
+    const provider = new OpenAIProvider("test-key", "gpt-4o-mini", 1024);
+    await expect(provider.compress("system", "user")).rejects.toThrow(
+      /timed out after 30ms/,
+    );
+  });
+
+  it("AGENTMEMORY_LLM_TIMEOUT_MS alone aborts the OpenAI LLM call", async () => {
+    process.env["AGENTMEMORY_LLM_TIMEOUT_MS"] = "30";
+    const provider = new OpenAIProvider("test-key", "gpt-4o-mini", 1024);
+    await expect(provider.compress("system", "user")).rejects.toThrow(
+      /timed out after 30ms/,
+    );
+  });
+
+  it("OPENAI_TIMEOUT_MS wins when both are set (back-compat)", async () => {
+    process.env["OPENAI_TIMEOUT_MS"] = "30";
+    // Set the global to a much larger value — if precedence is wrong,
+    // we'd time out at 5000ms and the test would hang past the 5s
+    // vitest default. We assert the message ms to lock the precedence.
+    process.env["AGENTMEMORY_LLM_TIMEOUT_MS"] = "5000";
+    const provider = new OpenAIProvider("test-key", "gpt-4o-mini", 1024);
+    await expect(provider.compress("system", "user")).rejects.toThrow(
+      /timed out after 30ms/,
+    );
+  });
+
+  it("falls back to the 60 000 ms default when neither is set", () => {
+    // We don't actually wait 60s — the provider stores timeoutMs at
+    // construction. Construct, then assert the bound via the error
+    // message after the hang aborts at a tiny pre-set value.
+    const provider = new OpenAIProvider("test-key", "gpt-4o-mini", 1024);
+    // Access the resolved timeout via the constructed field name. The
+    // class keeps `timeoutMs` private; reaching in via the index
+    // access keeps the test on the public observed behaviour: the ms
+    // value reported in the timeout error message must be 60000.
+    const ms = (provider as unknown as { timeoutMs: number }).timeoutMs;
+    expect(ms).toBe(60_000);
+  });
+
+  it("rejects malformed env values like '30ms' or '1_000' (CodeRabbit catch)", () => {
+    // parseInt would have silently returned 30 / 1 for these typos —
+    // strict parse now rejects them and the provider falls back to
+    // the 60 000 ms default so a malformed env doesn't masquerade as
+    // an aggressive bound.
+    // Whitespace-only padding (" 30 ") is legitimate env-var handling — we
+    // trim before validating. The cases below are real typos parseInt would
+    // silently swallow.
+    for (const bad of ["30ms", "1_000", "60s", "30abc", "-30", "0"]) {
+      process.env["OPENAI_TIMEOUT_MS"] = bad;
+      const provider = new OpenAIProvider("test-key", "gpt-4o-mini", 1024);
+      const ms = (provider as unknown as { timeoutMs: number }).timeoutMs;
+      expect(ms).toBe(60_000);
+      delete process.env["OPENAI_TIMEOUT_MS"];
+    }
+  });
+});
+