From 1b7d6de04d9fb724a850bb9b942f1ff58aaeace2 Mon Sep 17 00:00:00 2001
From: Hephaestus <hephaestus@aegis.dev>
Date: Tue, 23 Jun 2026 12:32:53 +0200
Subject: [PATCH 1/2] test(devops): add failing TDD spec for #4808 cron timeout
 override shim

Covers #4808 (Lane B of #4755). The release-please dispatch cron
ad1ab50a-dba8-40e2-a3de-ca2d2d09dba5 (issue-body nickname dbe0ed03)
times out per-provider during the sequential fallback chain because
each provider's per-call timeout is ~2.5min, too short for the
complex multi-step release-please pre-flight payload.

The upstream fix is openclaw/openclaw#95408 (per-agent
model.requestTimeoutSeconds, Lane C). Until that merges, we need a
workaround on the Aegis side: bump models.providers.<provider>.
timeoutSeconds for the 3 unique providers used by ag-hermes.

This commit adds a vitest spec that runs the bash script against
fixture OpenClaw configs to verify:
1. DRY-RUN does not modify the config
2. APPLY=1 sets timeoutSeconds on each target provider
3. TIMEOUT_SECONDS env var override
4. Idempotency (re-running is a no-op)
5. Skip semantics (providers already at-or-above target)
6. Scope (TARGET_PROVIDERS env var)
7. Error paths (missing config, invalid timeout, malformed config)
8. Partial success (missing target provider doesn't abort others)

The script itself is added in the next commit (green phase).
Expected: vitest currently fails with ENOENT on the missing script
- that's the red phase.
---
 .../add-cron-timeout-overrides.test.ts        | 258 ++++++++++++++++++
 1 file changed, 258 insertions(+)
 create mode 100644 scripts/devops/__tests__/add-cron-timeout-overrides.test.ts
diff --git a/scripts/devops/__tests__/add-cron-timeout-overrides.test.ts b/scripts/devops/__tests__/add-cron-timeout-overrides.test.ts
new file mode 100644
index 00000000..384b777b
--- /dev/null
+++ b/scripts/devops/__tests__/add-cron-timeout-overrides.test.ts
@@ -0,0 +1,258 @@
+/**
+ * Regression tests for scripts/devops/add-cron-timeout-overrides.sh
+ *
+ * Covers #4808 (Lane B of #4755). The script applies a per-provider
+ * `timeoutSeconds` override to the OpenClaw config so non-trivial isolated
+ * agentTurn cron payloads don't time out per-provider during the
+ * sequential fallback chain.
+ *
+ * These tests run the actual bash script against fixture OpenClaw config
+ * files in a temp directory. They verify:
+ *   1. DRY-RUN mode does NOT modify the config
+ *   2. APPLY=1 mode sets the timeoutSeconds on each target provider
+ *   3. Idempotency: re-running with the same target leaves the config unchanged
+ *   4. Skip semantics: providers already at-or-above target are skipped
+ *   5. Error path: missing jq, missing config, invalid timeout value
+ *
+ * Requires `bash` and `jq` on PATH (same as the script itself).
+ */
+import { execFileSync } from 'node:child_process';
+import { mkdtempSync, writeFileSync, readFileSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join, resolve } from 'node:path';
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+
+const REPO_ROOT = resolve(__dirname, '../../..');
+const SCRIPT_PATH = join(REPO_ROOT, 'scripts/devops/add-cron-timeout-overrides.sh');
+
+interface OpenClawConfigFixture {
+  models: {
+    mode: string;
+    providers: Record<string, Record<string, unknown>>;
+  };
+}
+
+function makeFixture(
+  overrides: Partial<Record<string, Record<string, unknown>>> = {},
+): OpenClawConfigFixture {
+  return {
+    models: {
+      mode: 'merge',
+      providers: {
+        'minimax-portal': { baseUrl: 'https://example.test' },
+        kimi: { baseUrl: 'https://example.test' },
+        zai: { baseUrl: 'https://example.test' },
+        'unrelated-provider': { baseUrl: 'https://example.test' },
+        ...overrides,
+      },
+    },
+  };
+}
+
+function runScript(params: {
+  configPath: string;
+  env?: Record<string, string>;
+  apply?: boolean;
+}): { stdout: string; stderr: string; status: number } {
+  const env: Record<string, string> = {
+    ...process.env,
+    OPENCLAW_CONFIG: params.configPath,
+    ...(params.apply ? { APPLY: '1' } : {}),
+    ...(params.env ?? {}),
+  };
+  try {
+    const stdout = execFileSync('bash', [SCRIPT_PATH], {
+      env,
+      encoding: 'utf8',
+      stdio: ['ignore', 'pipe', 'pipe'],
+    });
+    return { stdout, stderr: '', status: 0 };
+  } catch (err) {
+    const e = err as { stdout?: string; stderr?: string; status?: number };
+    return {
+      stdout: e.stdout ?? '',
+      stderr: e.stderr ?? '',
+      status: e.status ?? 1,
+    };
+  }
+}
+
+describe('add-cron-timeout-overrides.sh', () => {
+  let workDir: string;
+
+  beforeEach(() => {
+    workDir = mkdtempSync(join(tmpdir(), 'cron-timeout-shim-test-'));
+  });
+
+  afterEach(() => {
+    rmSync(workDir, { recursive: true, force: true });
+  });
+
+  function writeFixture(config: OpenClawConfigFixture): string {
+    const path = join(workDir, 'openclaw.json');
+    writeFileSync(path, JSON.stringify(config, null, 2));
+    return path;
+  }
+
+  function readConfig(path: string): OpenClawConfigFixture {
+    return JSON.parse(readFileSync(path, 'utf8')) as OpenClawConfigFixture;
+  }
+
+  it('DRY-RUN mode does not modify the config', () => {
+    const configPath = writeFixture(makeFixture());
+
+    const { stdout, status } = runScript({ configPath });
+
+    expect(status).toBe(0);
+    expect(stdout).toContain('DRY-RUN');
+
+    const config = readConfig(configPath);
+    expect(config.models.providers['minimax-portal'].timeoutSeconds).toBeUndefined();
+    expect(config.models.providers.kimi.timeoutSeconds).toBeUndefined();
+    expect(config.models.providers.zai.timeoutSeconds).toBeUndefined();
+  });
+
+  it('APPLY=1 sets timeoutSeconds on each target provider', () => {
+    const configPath = writeFixture(makeFixture());
+
+    const { stdout, status } = runScript({ configPath, apply: true });
+
+    expect(status).toBe(0);
+    expect(stdout).toContain('APPLY');
+
+    const config = readConfig(configPath);
+    expect(config.models.providers['minimax-portal'].timeoutSeconds).toBe(600);
+    expect(config.models.providers.kimi.timeoutSeconds).toBe(600);
+    expect(config.models.providers.zai.timeoutSeconds).toBe(600);
+  });
+
+  it('APPLY=1 with TIMEOUT_SECONDS uses the override value', () => {
+    const configPath = writeFixture(makeFixture());
+
+    const { status } = runScript({
+      configPath,
+      apply: true,
+      env: { TIMEOUT_SECONDS: '900' },
+    });
+
+    expect(status).toBe(0);
+    const config = readConfig(configPath);
+    expect(config.models.providers['minimax-portal'].timeoutSeconds).toBe(900);
+    expect(config.models.providers.zai.timeoutSeconds).toBe(900);
+  });
+
+  it('idempotent: re-running leaves the config unchanged after first apply', () => {
+    const configPath = writeFixture(makeFixture());
+
+    const first = runScript({ configPath, apply: true });
+    expect(first.status).toBe(0);
+
+    const afterFirst = readFileSync(configPath, 'utf8');
+
+    const second = runScript({ configPath, apply: true });
+    expect(second.status).toBe(0);
+    expect(second.stdout).toContain('Already at or above target (skipped): 3');
+
+    const afterSecond = readFileSync(configPath, 'utf8');
+    expect(afterSecond).toBe(afterFirst);
+  });
+
+  it('skips providers already at or above the target timeout', () => {
+    const configPath = writeFixture(
+      makeFixture({
+        'minimax-portal': { timeoutSeconds: 900 },
+      }),
+    );
+
+    const { stdout, status } = runScript({ configPath, apply: true });
+
+    expect(status).toBe(0);
+    const config = readConfig(configPath);
+    expect(config.models.providers['minimax-portal'].timeoutSeconds).toBe(900);
+    expect(config.models.providers.kimi.timeoutSeconds).toBe(600);
+    expect(config.models.providers.zai.timeoutSeconds).toBe(600);
+
+    expect(stdout).toContain('already has timeoutSeconds=900');
+  });
+
+  it('does not touch providers outside TARGET_PROVIDERS', () => {
+    const configPath = writeFixture(makeFixture());
+
+    const { status } = runScript({ configPath, apply: true });
+
+    expect(status).toBe(0);
+    const config = readConfig(configPath);
+    expect(config.models.providers['unrelated-provider'].timeoutSeconds).toBeUndefined();
+  });
+
+  it('TARGET_PROVIDERS env var scopes the patch', () => {
+    const configPath = writeFixture(makeFixture());
+
+    const { status } = runScript({
+      configPath,
+      apply: true,
+      env: { TARGET_PROVIDERS: 'zai' },
+    });
+
+    expect(status).toBe(0);
+    const config = readConfig(configPath);
+    expect(config.models.providers.zai.timeoutSeconds).toBe(600);
+    expect(config.models.providers['minimax-portal'].timeoutSeconds).toBeUndefined();
+    expect(config.models.providers.kimi.timeoutSeconds).toBeUndefined();
+  });
+
+  it('exits non-zero when config file is missing', () => {
+    const missing = join(workDir, 'does-not-exist.json');
+    const { status, stderr } = runScript({ configPath: missing });
+
+    expect(status).not.toBe(0);
+    expect(stderr).toContain('not found');
+  });
+
+  it('exits non-zero when TIMEOUT_SECONDS is invalid', () => {
+    const configPath = writeFixture(makeFixture());
+
+    const { status, stderr } = runScript({
+      configPath,
+      apply: true,
+      env: { TIMEOUT_SECONDS: 'not-a-number' },
+    });
+
+    expect(status).not.toBe(0);
+    expect(stderr).toContain('TIMEOUT_SECONDS must be a positive integer');
+  });
+
+  it('exits non-zero when config lacks models.providers object', () => {
+    const bogus = join(workDir, 'bogus.json');
+    writeFileSync(bogus, JSON.stringify({ meta: { foo: 'bar' } }));
+
+    const { status, stderr } = runScript({ configPath: bogus });
+
+    expect(status).not.toBe(0);
+    expect(stderr).toContain('does not have a models.providers object');
+  });
+
+  it('reports missing target provider in summary without aborting other updates', () => {
+    const fixture: OpenClawConfigFixture = {
+      models: {
+        mode: 'merge',
+        providers: {
+          'minimax-portal': { baseUrl: 'https://example.test' },
+          zai: { baseUrl: 'https://example.test' },
+        },
+      },
+    };
+    const configPath = writeFixture(fixture);
+
+    const { stdout, status } = runScript({ configPath, apply: true });
+
+    expect(status).toBe(0);
+    // The script uses an em-dash and 'not found' marker; assert on the stable parts.
+    expect(stdout).toMatch(/kimi\s+\S+\s+not found in models\.providers/);
+    expect(stdout).toContain('Provider not found in config: 1');
+
+    const config = readConfig(configPath);
+    expect(config.models.providers['minimax-portal'].timeoutSeconds).toBe(600);
+    expect(config.models.providers.zai.timeoutSeconds).toBe(600);
+  });
+});

From c4427ac87471caf478fb2caa201ca2416e801f97 Mon Sep 17 00:00:00 2001
From: Hephaestus <hephaestus@aegis.dev>
Date: Tue, 23 Jun 2026 12:33:24 +0200
Subject: [PATCH 2/2] feat(devops): add-cron-timeout-overrides.sh for #4808
 (Lane B of #4755)

Implements the aegis-side shim that raises the per-provider timeout
ceiling for non-trivial isolated agentTurn cron payloads (release-please
dispatch on ad1ab50a-dba8-40e2-a3de-ca2d2d09dba5).

The script applies models.providers.<provider>.timeoutSeconds to the 3
unique providers used by ag-hermes's fallback chain (minimax-portal,
kimi, zai). OpenClaw 2026.5.7 reads this knob at model-f6pqrkVH.js:348
(applyConfiguredProviderOverrides), so it takes effect on the next
gateway reload.

Key properties:
- Idempotent: re-running is a no-op once timeoutSeconds is at or above target
- DRY-RUN by default; APPLY=1 to actually patch
- TIMEOUT_SECONDS env var overrides the 600s default (4x the observed
  ~2.5min per-provider ceiling)
- TARGET_PROVIDERS env var scopes the patch (default: all 3 providers)
- OPENCLAW_CONFIG env var for non-default install paths
- jq-based atomic write via mktemp + mv (no shell-injection surface)
- Validates config has models.providers object before patching

The shim is global per-provider (not per-agent) because the OpenClaw
2026.5.7 schema only honors timeoutSeconds at the models.providers level.
This is acceptable because:
- Simple-payload crons complete well under 600s anyway
- The outer cron-level payload.timeoutSeconds is unchanged (each cron
  still has its own outer bound)
- The upstream fix openclaw/openclaw#95408 (per-agent
  model.requestTimeoutSeconds, Lane C, Hermes) will replace this once
  it merges + ships + this host upgrades

TDD discipline: the test commit 1b7d6de0 (red) verified all 11 cases
fail with status 127 (script not found). This commit (green) makes all
11 pass.

Companion docs: scripts/devops/README.md explains the problem, the
shim's safety rationale, and the operational steps to re-enable the
ad1ab50a cron after applying.

Companion example: examples/openclaw-agent/openclaw-cron-timeout.example.json
shows the config snippet for users who want to apply the override
manually instead of via the script.

Refs #4808, #4755 (Lane B), openclaw/openclaw#95408 (Lane C).
---
 .../openclaw-cron-timeout.example.json        |  19 ++
 scripts/devops/README.md                      | 115 +++++++++++
 scripts/devops/add-cron-timeout-overrides.sh  | 189 ++++++++++++++++++
 3 files changed, 323 insertions(+)
 create mode 100644 examples/openclaw-agent/openclaw-cron-timeout.example.json
 create mode 100644 scripts/devops/README.md
 create mode 100755 scripts/devops/add-cron-timeout-overrides.sh

diff --git a/examples/openclaw-agent/openclaw-cron-timeout.example.json b/examples/openclaw-agent/openclaw-cron-timeout.example.json
new file mode 100644
index 00000000..2f1c0cdd
--- /dev/null
+++ b/examples/openclaw-agent/openclaw-cron-timeout.example.json
@@ -0,0 +1,19 @@
+{
+  "$schema": "https://openclaw.dev/schemas/config.json",
+  "name": "aegis-cron-timeout-shim",
+  "description": "Reference OpenClaw config snippet demonstrating the models.providers.<provider>.timeoutSeconds knob that #4808 uses to raise the per-provider timeout ceiling for non-trivial isolated agentTurn cron payloads. Apply with scripts/devops/add-cron-timeout-overrides.sh.",
+  "models": {
+    "mode": "merge",
+    "providers": {
+      "minimax-portal": {
+        "timeoutSeconds": 600
+      },
+      "kimi": {
+        "timeoutSeconds": 600
+      },
+      "zai": {
+        "timeoutSeconds": 600
+      }
+    }
+  }
+}
diff --git a/scripts/devops/README.md b/scripts/devops/README.md
new file mode 100644
index 00000000..f4961196
--- /dev/null
+++ b/scripts/devops/README.md
@@ -0,0 +1,115 @@
+# Cron Timeout Override Shim
+
+**Issue:** [#4808](https://github.com/OneStepAt4time/aegis/issues/4808) — Lane B of [#4755](https://github.com/OneStepAt4time/aegis/issues/4755).
+
+## Problem
+
+Non-trivial `isolated agentTurn` cron payloads time out per-provider during
+the OpenClaw sequential fallback chain. Observed: 5 providers × ~2.5min ≈
+13min exceeds each provider's per-call timeout for complex multi-step
+workloads (release-please pre-flight). The cron fails with
+`FallbackSummaryError: All models failed (5)`.
+
+## Why this script exists
+
+The root fix is upstream ([openclaw/openclaw#95408](https://github.com/openclaw/openclaw/issues/95408) —
+per-agent `model.requestTimeoutSeconds`, Lane C, Hermes). Until that
+merges + ships + this host upgrades, we need a workaround on the Aegis
+side.
+
+The workaround: bump `models.providers.<provider>.timeoutSeconds` for the
+3 unique providers used by `ag-hermes` (the agent that runs the
+release-please cron). OpenClaw 2026.5.7 reads this knob at
+`model-f6pqrkVH.js:348` (`applyConfiguredProviderOverrides`).
+
+This script applies the override idempotently.
+
+## Why it's safe (global per-provider, not per-agent)
+
+The OpenClaw 2026.5.7 schema only honors `timeoutSeconds` at the
+`models.providers.<provider>` level, not per-agent. Setting it raises the
+ceiling for every agent that uses those providers. This is acceptable:
+
+- **Simple-payload crons** (watchdog, qa-scan, sentinel) complete in ~30s,
+  well under any reasonable `timeoutSeconds` value. The bump is invisible.
+- **Outer cron-level bound** (`payload.timeoutSeconds`) is unchanged.
+  Each cron still has its own outer timeout (e.g., 120s for watchdog,
+  900s for release-please). Bumping the inner per-provider timeout
+  doesn't extend those.
+- **Cost ceiling** is the same — the LLM call still pays per token, just
+  gets more wall-clock before giving up.
+
+The shim is documented as a workaround. Once Lane C merges + ships +
+this host upgrades, the override can be reverted by deleting the
+`timeoutSeconds` field from each provider in `~/.openclaw/openclaw.json`.
+
+## Usage
+
+```bash
+# DRY-RUN (default) — show what would change
+bash scripts/devops/add-cron-timeout-overrides.sh
+
+# Apply the default 600s (10min) override
+APPLY=1 bash scripts/devops/add-cron-timeout-overrides.sh
+
+# Apply a custom timeout
+TIMEOUT_SECONDS=900 APPLY=1 bash scripts/devops/add-cron-timeout-overrides.sh
+
+# Apply to a subset of providers
+TARGET_PROVIDERS="minimax-portal zai" APPLY=1 bash scripts/devops/add-cron-timeout-overrides.sh
+
+# Non-default install path
+OPENCLAW_CONFIG=/path/to/openclaw.json APPLY=1 bash scripts/devops/add-cron-timeout-overrides.sh
+```
+
+Default timeout: **600s (10min)** — 4× the observed ~2.5min per-provider
+ceiling, giving headroom for ~2× LLM round-trip variance.
+
+## Re-enabling the release-please cron
+
+After applying the override, the `ad1ab50a-dba8-40e2-a3de-ca2d2d09dba5`
+cron (release-please dispatch) can be re-enabled. The current state has
+it disabled with `sessionTarget: "session:agent:ag-hermes:..."` (named
+session, from Hephaestus's prior failed workaround on the named-session
+lock-in bug).
+
+The cron config update is manual at the `~/.openclaw/cron/jobs.json`
+level. Two changes required:
+
+1. Set `enabled: true`
+2. Change `sessionTarget` back to `"isolated"`
+3. Update the prompt to a current release-please dispatch (the current
+   one references issue #4708 and a 2026-06-16 memory file)
+
+The cron daemon picks up the change on its next read cycle (< 60s).
+
+## Restart the OpenClaw gateway
+
+The new `timeoutSeconds` takes effect on the next gateway reload. To
+pick up immediately:
+
+```bash
+openclaw gateway restart
+```
+
+Then trigger one manual isolated agentTurn run on `ad1ab50a` to verify
+the new ceiling holds.
+
+## Tests
+
+`scripts/devops/__tests__/add-cron-timeout-overrides.test.ts` covers:
+
+1. DRY-RUN does not modify the config
+2. APPLY=1 sets `timeoutSeconds` on each target provider
+3. `TIMEOUT_SECONDS` env var overrides the default
+4. Idempotency (re-running is a no-op)
+5. Skip semantics (providers already at-or-above target)
+6. Scope (`TARGET_PROVIDERS` env var)
+7. Error paths (missing config, invalid timeout, malformed config)
+8. Partial success (missing target provider doesn't abort other updates)
+
+Run with:
+
+```bash
+npx vitest run scripts/devops/__tests__/add-cron-timeout-overrides.test.ts
+```
diff --git a/scripts/devops/add-cron-timeout-overrides.sh b/scripts/devops/add-cron-timeout-overrides.sh
new file mode 100755
index 00000000..ae4cf132
--- /dev/null
+++ b/scripts/devops/add-cron-timeout-overrides.sh
@@ -0,0 +1,189 @@
+#!/usr/bin/env bash
+# scripts/devops/add-cron-timeout-overrides.sh
+#
+# Apply a per-provider `timeoutSeconds` override to the OpenClaw config so
+# non-trivial `isolated agentTurn` cron payloads don't time out per-provider
+# during the sequential fallback chain.
+#
+# Background — #4808 (Lane B of #4755, 2026-06-23): the release-please
+# dispatch cron `ad1ab50a-dba8-40e2-a3de-ca2d2d09dba5` (referred to in the
+# issue body as `dbe0ed03`, the leading-prefix nickname) was failing with
+# `FallbackSummaryError: All models failed (5)` because each provider in
+# the `ag-hermes` fallback chain timed out at ~2.5min before the LLM call
+# could complete for the complex release-please pre-flight payload.
+#
+# The upstream fix is `openclaw/openclaw#95408` (per-agent
+# `model.requestTimeoutSeconds`, Lane C, Hermes). This script implements
+# the aegis-side shim (Lane B) by raising `models.providers.<provider>.
+# timeoutSeconds` for the 3 unique providers used by `ag-hermes`'s
+# fallback chain: `minimax-portal`, `kimi`, `zai`. The OpenClaw 2026.5.7
+# runtime reads this knob at `model-f6pqrkVH.js:348`
+# (`applyConfiguredProviderOverrides`), so it takes effect on the next
+# gateway reload without code changes.
+#
+# The change is global per-provider (not per-agent), which is acceptable
+# because:
+#   - simple-payload crons (watchdog, qa-scan) complete in ~30s, well under
+#     any reasonable timeoutSeconds value
+#   - the cron-level `payload.timeoutSeconds` is the OUTER bound for each
+#     cron job; bumping per-provider timeoutSeconds doesn't extend those
+#   - the upstream Lane C fix will replace this shim with a per-agent
+#     knob once it merges; the shim is documented as a workaround
+#
+# Idempotent: re-running on an already-patched config is a no-op.
+#
+# Usage:
+#   bash scripts/devops/add-cron-timeout-overrides.sh                            # DRY-RUN, default 600s
+#   APPLY=1 bash scripts/devops/add-cron-timeout-overrides.sh                    # actually patch
+#   TIMEOUT_SECONDS=900 APPLY=1 bash scripts/devops/add-cron-timeout-overrides.sh  # custom timeout
+#   OPENCLAW_CONFIG=/path/to/openclaw.json bash ...                              # override config path
+#
+# Requires: jq for JSON parsing. No network. No OpenClaw gateway required
+# at apply time — the cron daemon picks up the new config on its next
+# reload cycle (typically < 60s).
+
+set -euo pipefail
+
+# ----------------------------------------------------------------------------
+# Configuration
+# ----------------------------------------------------------------------------
+
+# Default target providers — the 3 unique providers used by the `ag-hermes`
+# agent's 5-model fallback chain (per ~/.openclaw/openclaw.json agents[].id
+# == "ag-hermes" config):
+#   primary:   minimax-portal/MiniMax-M3
+#   fallbacks: kimi/kimi-code, zai/glm-5.1,
+#              minimax-portal/MiniMax-M2.7-highspeed, zai/glm-5-turbo
+# Override via TARGET_PROVIDERS env var (space-separated).
+if [[ -n "${TARGET_PROVIDERS:-}" ]]; then
+    # shellcheck disable=SC2206
+    TARGET_PROVIDERS_ARR=( $TARGET_PROVIDERS )
+else
+    TARGET_PROVIDERS_ARR=(
+        minimax-portal
+        kimi
+        zai
+    )
+fi
+
+# Default timeout: 600s = 10min (4x the observed ~2.5min per-provider ceiling)
+DEFAULT_TIMEOUT_SECONDS=600
+
+# Override via TIMEOUT_SECONDS env var.
+TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-$DEFAULT_TIMEOUT_SECONDS}"
+
+# Validate timeout is a positive integer
+if ! [[ "$TIMEOUT_SECONDS" =~ ^[1-9][0-9]*$ ]]; then
+    echo "ERROR: TIMEOUT_SECONDS must be a positive integer (got: '$TIMEOUT_SECONDS')" >&2
+    exit 2
+fi
+
+# OpenClaw config path. Default: ~/.openclaw/openclaw.json. Override via
+# OPENCLAW_CONFIG env var (useful for testing or non-default installs).
+OPENCLAW_CONFIG="${OPENCLAW_CONFIG:-$HOME/.openclaw/openclaw.json}"
+
+# DRY_RUN by default; set APPLY=1 to actually patch.
+DRY_RUN=1
+if [[ "${APPLY:-0}" == "1" ]]; then
+    DRY_RUN=0
+fi
+
+# ----------------------------------------------------------------------------
+# Preflight
+# ----------------------------------------------------------------------------
+
+if ! command -v jq >/dev/null 2>&1; then
+    echo "ERROR: jq is required for JSON parsing" >&2
+    exit 2
+fi
+
+if [[ ! -f "$OPENCLAW_CONFIG" ]]; then
+    echo "ERROR: OpenClaw config not found at: $OPENCLAW_CONFIG" >&2
+    echo "Set OPENCLAW_CONFIG env var to the correct path." >&2
+    exit 2
+fi
+
+# Sanity: the config must be valid JSON and have a models.providers map
+if ! jq -e '.models.providers | type == "object"' "$OPENCLAW_CONFIG" >/dev/null 2>&1; then
+    echo "ERROR: $OPENCLAW_CONFIG does not have a models.providers object" >&2
+    echo "This doesn't look like an OpenClaw config." >&2
+    exit 2
+fi
+
+# ----------------------------------------------------------------------------
+# Main
+# ----------------------------------------------------------------------------
+
+echo "=== add-cron-timeout-overrides.sh ==="
+echo "Config:           $OPENCLAW_CONFIG"
+echo "Mode:             $([[ $DRY_RUN -eq 1 ]] && echo 'DRY-RUN (set APPLY=1 to patch)' || echo 'APPLY (config will be patched)')"
+echo "TimeoutSeconds:   $TIMEOUT_SECONDS"
+echo "Target providers: ${TARGET_PROVIDERS_ARR[*]}"
+echo ""
+
+UPDATED=0
+SKIPPED=0
+NOT_FOUND=0
+
+for provider in "${TARGET_PROVIDERS_ARR[@]}"; do
+    # Read current timeoutSeconds for this provider (null if unset)
+    current=$(jq -r --arg p "$provider" '.models.providers[$p].timeoutSeconds // null' "$OPENCLAW_CONFIG")
+
+    # Check provider exists in config
+    provider_exists=$(jq -r --arg p "$provider" 'has("models") and (.models.providers[$p] != null)' "$OPENCLAW_CONFIG")
+
+    if [[ "$provider_exists" != "true" ]]; then
+        echo "❌ $provider — not found in models.providers"
+        NOT_FOUND=$((NOT_FOUND + 1))
+        continue
+    fi
+
+    # Idempotency: skip if already at or above target
+    if [[ "$current" != "null" ]] && [[ "$current" -ge "$TIMEOUT_SECONDS" ]]; then
+        echo "⏭️  $provider — already has timeoutSeconds=$current (>= $TIMEOUT_SECONDS)"
+        SKIPPED=$((SKIPPED + 1))
+        continue
+    fi
+
+    if [[ $DRY_RUN -eq 1 ]]; then
+        action="would set"
+        if [[ "$current" != "null" ]]; then
+            action="would raise from $current to"
+        fi
+        echo "🔍 $provider — $action $TIMEOUT_SECONDS"
+    else
+        echo "✏️  $provider — setting timeoutSeconds=$TIMEOUT_SECONDS"
+        # Atomic write via jq + mktemp. We avoid touching fields outside
+        # the targeted provider entry.
+        tmp=$(mktemp)
+        jq --arg p "$provider" --argjson t "$TIMEOUT_SECONDS" \
+            '.models.providers[$p].timeoutSeconds = $t' \
+            "$OPENCLAW_CONFIG" > "$tmp"
+        mv "$tmp" "$OPENCLAW_CONFIG"
+    fi
+    UPDATED=$((UPDATED + 1))
+done
+
+echo ""
+echo "=== Summary ==="
+echo "Would update / updated: $UPDATED"
+echo "Already at or above target (skipped): $SKIPPED"
+echo "Provider not found in config: $NOT_FOUND"
+echo ""
+
+if [[ $DRY_RUN -eq 1 ]]; then
+    echo "Re-run with APPLY=1 to actually patch:"
+    echo "  APPLY=1 bash $0"
+    echo ""
+    echo "Or with a custom timeout:"
+    echo "  TIMEOUT_SECONDS=900 APPLY=1 bash $0"
+    echo ""
+    echo "Or target a subset:"
+    echo "  TARGET_PROVIDERS=\"minimax-portal zai\" APPLY=1 bash $0"
+else
+    echo "Done. The OpenClaw gateway will pick up the new config on its next"
+    echo "reload cycle (typically < 60s, or restart with 'openclaw gateway restart')."
+    echo ""
+    echo "To verify, check the config directly:"
+    echo "  jq '.models.providers | to_entries | map({provider: .key, timeoutSeconds: .value.timeoutSeconds})' $OPENCLAW_CONFIG"
+fi