Skip to content

Commit 0a6d6f1

Browse files
committed
feat(supervisor): pass name, metadata, and resources in compute restore request
Restore calls now send a request body with the runner name, env override metadata, cpu, and memory so the agent can inject them before the VM resumes. The runner fetches these overrides from TRIGGER_METADATA_URL at restore time. runnerId is derived per restore cycle as runner-{runIdShort}-{checkpointSuffix}, matching iceman's pattern.
1 parent c1511f9 commit 0a6d6f1

File tree

2 files changed

+46
-8
lines changed

2 files changed

+46
-8
lines changed

apps/supervisor/src/index.ts

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,23 @@ class ManagedSupervisor {
225225

226226
if (this.isComputeMode && this.computeManager) {
227227
try {
228-
const didRestore = await this.computeManager.restore(checkpoint.location);
228+
// Derive runnerId unique per restore cycle (matches iceman's pattern)
229+
const runIdShort = message.run.friendlyId.replace("run_", "");
230+
const checkpointSuffix = checkpoint.id.slice(-8);
231+
const runnerId = `runner-${runIdShort}-${checkpointSuffix}`;
232+
233+
const didRestore = await this.computeManager.restore({
234+
snapshotId: checkpoint.location,
235+
runnerId,
236+
runFriendlyId: message.run.friendlyId,
237+
snapshotFriendlyId: message.snapshot.friendlyId,
238+
machine: message.run.machine,
239+
});
229240

230241
if (didRestore) {
231-
this.logger.log("Compute restore successful", { runId: message.run.id });
242+
this.logger.log("Compute restore successful", { runId: message.run.id, runnerId });
232243
} else {
233-
this.logger.error("Compute restore failed", { runId: message.run.id });
244+
this.logger.error("Compute restore failed", { runId: message.run.id, runnerId });
234245
}
235246
} catch (error) {
236247
this.logger.error("Failed to restore run (compute)", { error });

apps/supervisor/src/workloadManager/compute.ts

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,34 +247,61 @@ export class ComputeWorkloadManager implements WorkloadManager {
247247
return true;
248248
}
249249

250-
async restore(snapshotId: string): Promise<boolean> {
251-
const url = `${this.opts.gatewayUrl}/api/snapshots/${snapshotId}/restore`;
250+
async restore(opts: {
251+
snapshotId: string;
252+
runnerId: string;
253+
runFriendlyId: string;
254+
snapshotFriendlyId: string;
255+
machine: { cpu: number; memory: number };
256+
}): Promise<boolean> {
257+
const url = `${this.opts.gatewayUrl}/api/snapshots/${opts.snapshotId}/restore`;
258+
259+
const metadata: Record<string, string> = {
260+
TRIGGER_RUNNER_ID: opts.runnerId,
261+
TRIGGER_RUN_ID: opts.runFriendlyId,
262+
TRIGGER_SNAPSHOT_ID: opts.snapshotFriendlyId,
263+
TRIGGER_SUPERVISOR_API_PROTOCOL: this.opts.workloadApiProtocol,
264+
TRIGGER_SUPERVISOR_API_PORT: String(this.opts.workloadApiPort),
265+
TRIGGER_SUPERVISOR_API_DOMAIN: this.opts.workloadApiDomain ?? "",
266+
TRIGGER_WORKER_INSTANCE_NAME: env.TRIGGER_WORKER_INSTANCE_NAME,
267+
};
252268

253269
const [error, response] = await tryCatch(
254270
fetch(url, {
255271
method: "POST",
256272
headers: this.authHeaders,
257273
signal: AbortSignal.timeout(this.opts.gatewayTimeoutMs),
274+
body: JSON.stringify({
275+
name: opts.runnerId,
276+
metadata,
277+
cpu: opts.machine.cpu,
278+
memory_mb: opts.machine.memory * 1024,
279+
}),
258280
})
259281
);
260282

261283
if (error) {
262284
this.logger.error("restore request failed", {
263-
snapshotId,
285+
snapshotId: opts.snapshotId,
286+
runnerId: opts.runnerId,
264287
error: error instanceof Error ? error.message : String(error),
265288
});
266289
return false;
267290
}
268291

269292
if (!response.ok) {
270293
this.logger.error("restore request rejected", {
271-
snapshotId,
294+
snapshotId: opts.snapshotId,
295+
runnerId: opts.runnerId,
272296
status: response.status,
273297
});
274298
return false;
275299
}
276300

277-
this.logger.info("restore request success", { snapshotId });
301+
this.logger.info("restore request success", {
302+
snapshotId: opts.snapshotId,
303+
runnerId: opts.runnerId,
304+
});
278305
return true;
279306
}
280307
}

0 commit comments

Comments
 (0)