Skip to content

Commit c1021f2

Browse files
committed
fix: bound trace context map, gate on compute mode, use machine preset for templates
1 parent 641d6a3 commit c1021f2

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

apps/supervisor/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ class ManagedSupervisor {
293293
return;
294294
}
295295

296-
if (env.COMPUTE_TRACE_SPANS_ENABLED) {
296+
if (this.isComputeMode && env.COMPUTE_TRACE_SPANS_ENABLED) {
297297
const traceparent =
298298
message.run.traceContext &&
299299
"traceparent" in message.run.traceContext &&

apps/supervisor/src/workloadServer/index.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
113113
>();
114114

115115
private readonly workerClient: SupervisorHttpClient;
116+
// Bounded map for trace contexts used by compute snapshot spans.
117+
// Entries are added on dequeue and consumed on snapshot callback, which may arrive
118+
// hours later after a checkpoint/restore cycle. Using a capped map avoids unbounded
119+
// growth while keeping recent contexts available. Oldest entries are evicted first.
120+
private static readonly MAX_TRACE_CONTEXTS = 10_000;
116121
private readonly runTraceContexts = new Map<string, RunTraceContext>();
117122
private readonly snapshotDelayWheel?: TimerWheel<DelayedSnapshot>;
118123

@@ -821,6 +826,14 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
821826
}
822827

823828
registerRunTraceContext(runFriendlyId: string, ctx: RunTraceContext) {
829+
// Evict oldest entries if we've hit the cap
830+
if (this.runTraceContexts.size >= WorkloadServer.MAX_TRACE_CONTEXTS) {
831+
const firstKey = this.runTraceContexts.keys().next().value;
832+
if (firstKey) {
833+
this.runTraceContexts.delete(firstKey);
834+
}
835+
}
836+
824837
this.runTraceContexts.set(runFriendlyId, ctx);
825838
}
826839

@@ -829,6 +842,11 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
829842
}
830843

831844
async stop() {
845+
// Intentionally drop pending snapshots rather than dispatching them. The supervisor
846+
// is shutting down, so our callback URL will be dead by the time the gateway responds.
847+
// Runners detect the supervisor is gone and reconnect to a new instance, which
848+
// re-triggers the snapshot workflow. Snapshots are an optimization, not a correctness
849+
// requirement - runs continue fine without them.
832850
const remaining = this.snapshotDelayWheel?.stop() ?? [];
833851
if (remaining.length > 0) {
834852
this.logger.info("Snapshot delay wheel stopped, dropped pending snapshots", {

apps/webapp/app/v3/services/computeTemplateCreation.server.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { ComputeClient, stripImageDigest } from "@internal/compute";
2+
import { machinePresetFromName } from "~/v3/machinePresets.server";
23
import { env } from "~/env.server";
34
import { logger } from "~/services/logger.server";
45
import type { PrismaClientOrTransaction } from "~/db.server";
@@ -158,10 +159,13 @@ export class ComputeTemplateCreationService {
158159
}
159160

160161
try {
162+
// Templates are resource-agnostic - these values don't affect template content.
163+
const machine = machinePresetFromName("small-1x");
164+
161165
await this.client.templates.create({
162166
image: stripImageDigest(imageReference),
163-
cpu: 0.5,
164-
memory_mb: 512,
167+
cpu: machine.cpu,
168+
memory_mb: machine.memory * 1024,
165169
background: options?.background,
166170
});
167171
return { success: true };

0 commit comments

Comments
 (0)