feat(supervisor): schedule-tree node affinity

myftija · myftija · commit 72f14f8fd4d3 · 2026-03-25T16:57:37.000+01:00
diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts
@@ -117,6 +117,13 @@ const Env = z.object({
   KUBERNETES_PROJECT_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(50),
   KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY: z.string().trim().min(1).default("kubernetes.io/hostname"),
 
+  // Schedule affinity settings - runs from schedule trees prefer a dedicated pool
+  KUBERNETES_SCHEDULE_AFFINITY_ENABLED: BoolEnv.default(false),
+  KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY: z.string().default("node.cluster.x-k8s.io/machinepool"),
+  KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE: z.string().default("scheduled-runs"),
+  KUBERNETES_SCHEDULE_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(80),
+  KUBERNETES_SCHEDULE_ANTI_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(20),
+
   // Placement tags settings
   PLACEMENT_TAGS_ENABLED: BoolEnv.default(false),
   PLACEMENT_TAGS_PREFIX: z.string().default("node.cluster.x-k8s.io"),
diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts
@@ -267,6 +267,7 @@ class ManagedSupervisor {
           snapshotId: message.snapshot.id,
           snapshotFriendlyId: message.snapshot.friendlyId,
           placementTags: message.placementTags,
+          annotations: message.run.annotations,
         });
 
         // Disabled for now
diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts
@@ -120,7 +120,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
           },
           spec: {
             ...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags),
-            affinity: this.#getAffinity(opts.machine, opts.projectId),
+            affinity: this.#getAffinity(opts),
             terminationGracePeriodSeconds: 60 * 60,
             containers: [
               {
@@ -335,13 +335,22 @@ export class KubernetesWorkloadManager implements WorkloadManager {
     };
   }
 
+  #isScheduledRun(opts: WorkloadManagerCreateOptions): boolean {
+    return opts.annotations?.rootTriggerSource === "schedule";
+  }
+
   #getSharedLabels(opts: WorkloadManagerCreateOptions): Record<string, string> {
     return {
       env: opts.envId,
       envtype: this.#envTypeToLabelValue(opts.envType),
       org: opts.orgId,
       project: opts.projectId,
       machine: opts.machine.name,
+      // We intentionally use a boolean label rather than exposing the full trigger source
+      // (e.g. sdk, api, cli, mcp, schedule) to keep label cardinality low in metrics.
+      // The schedule vs non-schedule distinction is all we need for the current metrics
+      // and pool-level scheduling decisions; finer-grained source breakdowns live in run annotations.
+      scheduled: String(this.#isScheduledRun(opts)),
     };
   }
 
@@ -390,16 +399,37 @@ export class KubernetesWorkloadManager implements WorkloadManager {
     return preset.name.startsWith("large-");
   }
 
-  #getAffinity(preset: MachinePreset, projectId: string): k8s.V1Affinity | undefined {
-    const nodeAffinity = this.#getNodeAffinityRules(preset);
-    const podAffinity = this.#getProjectPodAffinity(projectId);
-
-    if (!nodeAffinity && !podAffinity) {
+  #getAffinity(opts: WorkloadManagerCreateOptions): k8s.V1Affinity | undefined {
+    const largeNodeAffinity = this.#getNodeAffinityRules(opts.machine);
+    const scheduleNodeAffinity = this.#getScheduleNodeAffinityRules(this.#isScheduledRun(opts));
+    const podAffinity = this.#getProjectPodAffinity(opts.projectId);
+
+    // Merge node affinity rules from multiple sources
+    const preferred = [
+      ...(largeNodeAffinity?.preferredDuringSchedulingIgnoredDuringExecution ?? []),
+      ...(scheduleNodeAffinity?.preferredDuringSchedulingIgnoredDuringExecution ?? []),
+    ];
+    // Only large machine affinity produces hard requirements (non-large runs must stay off the large pool).
+    // Schedule affinity is soft both ways.
+    const required = [
+      ...(largeNodeAffinity?.requiredDuringSchedulingIgnoredDuringExecution?.nodeSelectorTerms ?? []),
+    ];
+
+    const hasNodeAffinity = preferred.length > 0 || required.length > 0;
+
+    if (!hasNodeAffinity && !podAffinity) {
       return undefined;
     }
 
     return {
-      ...(nodeAffinity && { nodeAffinity }),
+      ...(hasNodeAffinity && {
+        nodeAffinity: {
+          ...(preferred.length > 0 && { preferredDuringSchedulingIgnoredDuringExecution: preferred }),
+          ...(required.length > 0 && {
+            requiredDuringSchedulingIgnoredDuringExecution: { nodeSelectorTerms: required },
+          }),
+        },
+      }),
       ...(podAffinity && { podAffinity }),
     };
   }
@@ -447,6 +477,50 @@ export class KubernetesWorkloadManager implements WorkloadManager {
     };
   }
 
+  #getScheduleNodeAffinityRules(isScheduledRun: boolean): k8s.V1NodeAffinity | undefined {
+    if (!env.KUBERNETES_SCHEDULE_AFFINITY_ENABLED || !env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE) {
+      return undefined;
+    }
+
+    if (isScheduledRun) {
+      // soft preference for the schedule pool
+      return {
+        preferredDuringSchedulingIgnoredDuringExecution: [
+          {
+            weight: env.KUBERNETES_SCHEDULE_AFFINITY_WEIGHT,
+            preference: {
+              matchExpressions: [
+                {
+                  key: env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY,
+                  operator: "In",
+                  values: [env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE],
+                },
+              ],
+            },
+          },
+        ],
+      };
+    }
+
+    // soft anti-affinity: non-schedule runs prefer to avoid the schedule pool
+    return {
+      preferredDuringSchedulingIgnoredDuringExecution: [
+        {
+          weight: env.KUBERNETES_SCHEDULE_ANTI_AFFINITY_WEIGHT,
+          preference: {
+            matchExpressions: [
+              {
+                key: env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY,
+                operator: "NotIn",
+                values: [env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE],
+              },
+            ],
+          },
+        },
+      ],
+    };
+  }
+
   #getProjectPodAffinity(projectId: string): k8s.V1PodAffinity | undefined {
     if (!env.KUBERNETES_PROJECT_AFFINITY_ENABLED) {
       return undefined;
diff --git a/apps/supervisor/src/workloadManager/types.ts b/apps/supervisor/src/workloadManager/types.ts
@@ -1,4 +1,4 @@
-import type { EnvironmentType, MachinePreset, PlacementTag } from "@trigger.dev/core/v3";
+import type { EnvironmentType, MachinePreset, PlacementTag, RunAnnotations } from "@trigger.dev/core/v3";
 
 export interface WorkloadManagerOptions {
   workloadApiProtocol: "http" | "https";
@@ -35,4 +35,5 @@ export interface WorkloadManagerCreateOptions {
   runFriendlyId: string;
   snapshotId: string;
   snapshotFriendlyId: string;
+  annotations?: RunAnnotations;
 }
diff --git a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts
@@ -1,7 +1,7 @@
 import type { BillingCache } from "../billingCache.js";
 import { startSpan } from "@internal/tracing";
 import { assertExhaustive, tryCatch } from "@trigger.dev/core";
-import { DequeuedMessage, RetryOptions } from "@trigger.dev/core/v3";
+import { DequeuedMessage, RetryOptions, RunAnnotations } from "@trigger.dev/core/v3";
 import { placementTag } from "@trigger.dev/core/v3/serverOnly";
 import { getMaxDuration } from "@trigger.dev/core/v3/isomorphic";
 import {
@@ -575,6 +575,7 @@ export class DequeueSystem {
                   // Keeping this for backwards compatibility, but really this should be called workerQueue
                   masterQueue: lockedTaskRun.workerQueue,
                   traceContext: lockedTaskRun.traceContext as Record<string, unknown>,
+                  annotations: RunAnnotations.safeParse(lockedTaskRun.annotations).data,
                 },
                 environment: {
                   id: lockedTaskRun.runtimeEnvironment.id,
diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts
@@ -1,6 +1,7 @@
 import { z } from "zod";
 import { Enum, MachinePreset, RuntimeEnvironmentType, TaskRunExecution } from "./common.js";
 import { EnvironmentType } from "./schemas.js";
+import { RunAnnotations } from "./api.js";
 import type * as DB_TYPES from "@trigger.dev/database";
 
 export const TaskRunExecutionStatus = {
@@ -259,6 +260,7 @@ export const DequeuedMessage = z.object({
     attemptNumber: z.number(),
     masterQueue: z.string(),
     traceContext: z.record(z.unknown()),
+    annotations: RunAnnotations.optional(),
   }),
   environment: z.object({
     id: z.string(),