feat(orchestrator): executor drain-loop scheduler

gewenyu99 · claude · gewenyu99 · commit 5484f0b2e5b4 · 2026-06-17T11:39:41.000-04:00
Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/lib/programs/orchestrator/__tests__/executor.test.ts b/src/lib/programs/orchestrator/__tests__/executor.test.ts
@@ -0,0 +1,150 @@
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import {
+  QueueStore,
+  type QueuedTask,
+  type TaskHandoff,
+} from '@lib/programs/orchestrator/queue';
+import { drainQueue, type RunTask } from '@lib/programs/orchestrator/executor';
+
+jest.mock('@utils/analytics', () => ({
+  analytics: { captureException: jest.fn(), wizardCapture: jest.fn() },
+}));
+import { analytics } from '@utils/analytics';
+
+const HANDOFF: TaskHandoff = { goals: 'g', did: 'd', forNextAgent: 'n' };
+
+function tmpDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'executor-test-'));
+}
+
+describe('drainQueue', () => {
+  let dir: string;
+  let q: QueueStore;
+
+  beforeEach(() => {
+    dir = tmpDir();
+    q = new QueueStore(dir, 'run-1');
+  });
+
+  afterEach(() => fs.rmSync(dir, { recursive: true, force: true }));
+
+  const completing: RunTask = (task) => {
+    q.complete(task.id, HANDOFF);
+    return Promise.resolve();
+  };
+
+  it('runs a single task to done and drains', async () => {
+    const a = q.enqueue({ type: 'install' });
+    await drainQueue(q, completing, { maxStarts: 50 });
+    expect(q.get(a.id)?.status).toBe('done');
+    expect(q.isDrained()).toBe(true);
+  });
+
+  it('runs a dependent task only after its dependency completes', async () => {
+    const order: string[] = [];
+    const a = q.enqueue({ type: 'install' });
+    const b = q.enqueue({ type: 'init', dependsOn: [a.id] });
+    const runner: RunTask = (task) => {
+      order.push(task.type);
+      q.complete(task.id, HANDOFF);
+      return Promise.resolve();
+    };
+    await drainQueue(q, runner, { maxStarts: 50 });
+    expect(order).toEqual(['install', 'init']);
+    expect(q.get(b.id)?.status).toBe('done');
+  });
+
+  it('runs independent branches concurrently; the graph is the only schedule', async () => {
+    let active = 0;
+    let maxActive = 0;
+    const runner: RunTask = async (task) => {
+      active += 1;
+      maxActive = Math.max(maxActive, active);
+      await new Promise((r) => setTimeout(r, 5));
+      q.complete(task.id, HANDOFF);
+      active -= 1;
+    };
+    const a = q.enqueue({ type: 'install' });
+    const b = q.enqueue({ type: 'init' });
+    q.enqueue({ type: 'capture', dependsOn: [a.id, b.id] });
+    await drainQueue(q, runner, { maxStarts: 50 });
+    // install and init overlap; capture waits for both.
+    expect(maxActive).toBe(2);
+    expect(q.summary().done).toBe(3);
+  });
+
+  it('starts a dependent the moment its dependency finishes, not in waves', async () => {
+    const startedAt: Record<string, number> = {};
+    let clock = 0;
+    const runner: RunTask = async (task) => {
+      startedAt[task.type] = clock++;
+      // slow holds the wave open; fast finishes early and unblocks after-fast.
+      const delay = task.type === 'slow' ? 30 : 5;
+      await new Promise((r) => setTimeout(r, delay));
+      q.complete(task.id, HANDOFF);
+    };
+    q.enqueue({ type: 'slow' });
+    const fast = q.enqueue({ type: 'fast' });
+    q.enqueue({ type: 'after-fast', dependsOn: [fast.id] });
+    await drainQueue(q, runner, { maxStarts: 50 });
+    // after-fast started while slow was still running.
+    expect(startedAt['after-fast']).toBeDefined();
+    expect(q.summary().done).toBe(3);
+  });
+
+  it('retries a task that ends without reporting, then fails it', async () => {
+    const a = q.enqueue({ type: 'install', maxAttempts: 2 });
+    const noReport: RunTask = async () => {
+      /* agent never calls complete_task */
+    };
+    await drainQueue(q, noReport, { maxStarts: 50 });
+    expect(q.get(a.id)?.status).toBe('failed');
+    expect(q.get(a.id)?.attempts).toBe(2);
+  });
+
+  it('succeeds on a retry within the attempt budget', async () => {
+    let calls = 0;
+    const a = q.enqueue({ type: 'install', maxAttempts: 3 });
+    const flaky: RunTask = (task: QueuedTask) => {
+      calls += 1;
+      if (calls >= 2) q.complete(task.id, HANDOFF);
+      return Promise.resolve();
+    };
+    await drainQueue(q, flaky, { maxStarts: 50 });
+    expect(q.get(a.id)?.status).toBe('done');
+    expect(calls).toBe(2);
+  });
+
+  it('captures and fails a task whose runner throws', async () => {
+    const a = q.enqueue({ type: 'install', maxAttempts: 1 });
+    const throwing: RunTask = () => Promise.reject(new Error('agent exploded'));
+    await drainQueue(q, throwing, { maxStarts: 50 });
+    expect(q.get(a.id)?.status).toBe('failed');
+    expect(analytics.captureException).toHaveBeenCalled();
+  });
+
+  it('does not run a task whose dependency failed', async () => {
+    const a = q.enqueue({ type: 'install', maxAttempts: 1 });
+    const b = q.enqueue({ type: 'init', dependsOn: [a.id] });
+    const runner: RunTask = (task) => {
+      if (task.type === 'init') q.complete(task.id, HANDOFF);
+      // install never reports, so it fails after its single attempt.
+      return Promise.resolve();
+    };
+    await drainQueue(q, runner, { maxStarts: 50 });
+    expect(q.get(a.id)?.status).toBe('failed');
+    expect(q.get(b.id)?.status).toBe('pending');
+    expect(q.isDrained()).toBe(true);
+  });
+
+  it('terminates via the start backstop instead of looping forever', async () => {
+    const a = q.enqueue({ type: 'install', maxAttempts: 999 });
+    const neverReports: RunTask = async () => {
+      /* would retry forever without the backstop */
+    };
+    await drainQueue(q, neverReports, { maxStarts: 3 });
+    expect(q.get(a.id)?.attempts).toBeLessThanOrEqual(3);
+  });
+});
diff --git a/src/lib/programs/orchestrator/executor.ts b/src/lib/programs/orchestrator/executor.ts
@@ -0,0 +1,115 @@
+/**
+ * The executor drains the queue. It starts every runnable task (dependencies
+ * satisfied) as soon as it becomes runnable — parallelism is decided by the
+ * task graph, not by an executor knob. Each task runs through an injected
+ * `runTask` function and reports its outcome via `complete_task`; a task that
+ * ends without reporting is retried while attempts remain, then failed. A
+ * `maxStarts` backstop guarantees termination.
+ *
+ * The drain loop is independent of how a task actually runs. `runTask` is
+ * injected: the real one spins up a fresh agent, the tests use a fake.
+ */
+import { analytics } from '../../../utils/analytics';
+import { logToFile } from '../../../utils/debug';
+import { TaskStatus, type QueueStore, type QueuedTask } from './queue';
+
+/** Per-task agent configuration the resolver produces from a task's type. */
+export interface ResolvedTask {
+  model: string;
+  allowedTools: readonly string[];
+  disallowedTools: readonly string[];
+  /** Mini-skills to install before the task runs (the HOW). */
+  skills: readonly string[];
+  prompt: string;
+}
+
+/** Resolves a queued task to what the agent needs. The real one is markdown-backed. */
+export type TaskResolver = (
+  task: QueuedTask,
+  store: QueueStore,
+) => ResolvedTask;
+
+/** Runs one task's agent. It is expected to drive the task to a terminal state
+ *  (via the task agent calling complete_task). */
+export type RunTask = (task: QueuedTask) => Promise<void>;
+
+export interface DrainOptions {
+  /** Backstop against a pathological always-one-more-pending loop. */
+  maxStarts: number;
+}
+
+export const DEFAULT_DRAIN_OPTIONS: DrainOptions = {
+  maxStarts: 200,
+};
+
+async function runOne(
+  store: QueueStore,
+  runTask: RunTask,
+  task: QueuedTask,
+): Promise<void> {
+  store.start(task.id);
+  try {
+    await runTask(task);
+  } catch (error) {
+    // The task threw rather than reporting. The outcome check below handles
+    // the queue; the exception itself should never be silent.
+    logToFile(`[executor] runTask threw for ${task.type}:`, error);
+    analytics.captureException(
+      error instanceof Error ? error : new Error(String(error)),
+      { step: 'orchestrator_run_task', task_type: task.type },
+    );
+  }
+
+  const after = store.get(task.id);
+  if (!after) return;
+
+  if (after.status === TaskStatus.Running) {
+    // The agent ended without calling complete_task. Retry or fail.
+    if (after.attempts < after.maxAttempts) {
+      store.requeue(task.id);
+    } else {
+      store.fail(task.id, {
+        type: 'no-report',
+        message: 'Task ended without calling complete_task.',
+      });
+    }
+    return;
+  }
+
+  if (
+    after.status === TaskStatus.Failed &&
+    after.attempts < after.maxAttempts
+  ) {
+    store.requeue(task.id);
+  }
+}
+
+/**
+ * Drain the queue to a terminal state. Every runnable task starts the moment
+ * its dependencies finish; independent branches run concurrently. Returns when
+ * every task is done, failed, or blocked by a failed dependency, or when the
+ * start backstop trips.
+ */
+export async function drainQueue(
+  store: QueueStore,
+  runTask: RunTask,
+  opts: DrainOptions = DEFAULT_DRAIN_OPTIONS,
+): Promise<void> {
+  const running = new Map<string, Promise<void>>();
+  let starts = 0;
+
+  for (;;) {
+    for (const task of store.nextRunnable()) {
+      if (++starts > opts.maxStarts) break;
+      // runOne marks the task in_progress synchronously, so the next
+      // nextRunnable() call no longer offers it.
+      const p = runOne(store, runTask, task).finally(() =>
+        running.delete(task.id),
+      );
+      running.set(task.id, p);
+    }
+    if (running.size === 0) break;
+    // Wake on the first finish; it may have unblocked dependents or requeued.
+    await Promise.race(running.values());
+  }
+}