refactor(webapp): prefix mollifier env vars with TRIGGER_

d-cs · d-cs · commit 948309234a8e · 2026-05-15T17:12:29.000+01:00
All MOLLIFIER_* env vars renamed to TRIGGER_MOLLIFIER_*. The mollifier
primitive is generic — buffer + drainer + trip evaluator with no
trigger-specific assumptions at the redis-worker layer — but this
PR's webapp wiring is specifically the trigger-task mollifier, with
PII-sensitive payload handling and trigger-flow semantics. If we later
mollify another surface (deploys, schedules, etc.) those will want
their own env-var namespace; pre-prefixing now avoids a breaking
rename later.

Renames are mechanical: schema keys in env.server.ts, env.* references
across the v3/mollifier* modules, and a handful of doc-comment
mentions. The bootstrap fallback that has DRAINER_ENABLED default to
the ENABLED value is updated to read TRIGGER_MOLLIFIER_ENABLED from
process.env too. Code-side naming (classes, file names, the literal
word "mollifier") stays unchanged — the rename is env-var only.
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
@@ -1052,46 +1052,46 @@ const EnvironmentSchema = z
       .optional()
       .transform((v) => v ?? process.env.REDIS_PASSWORD),
     COMMON_WORKER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
-    MOLLIFIER_ENABLED: z.string().default("0"),
+    TRIGGER_MOLLIFIER_ENABLED: z.string().default("0"),
     // Separate switch for the drainer (consumer side) so it can be split
     // off onto a dedicated worker service. Unset → inherits
-    // MOLLIFIER_ENABLED, so single-container self-hosters don't have to
+    // TRIGGER_MOLLIFIER_ENABLED, so single-container self-hosters don't have to
     // flip two switches. In multi-replica deployments, set this to "0"
     // explicitly on every replica except the one dedicated drainer
     // service — otherwise every replica's polling loop races for the
-    // same buffer entries. `MOLLIFIER_ENABLED` is still the master kill
-    // switch; setting this to "1" while `MOLLIFIER_ENABLED` is "0" is a
+    // same buffer entries. `TRIGGER_MOLLIFIER_ENABLED` is still the master kill
+    // switch; setting this to "1" while `TRIGGER_MOLLIFIER_ENABLED` is "0" is a
     // no-op because the gate-side singleton refuses to construct a
     // buffer when the system is off.
-    MOLLIFIER_DRAINER_ENABLED: z.string().default(process.env.MOLLIFIER_ENABLED ?? "0"),
-    MOLLIFIER_SHADOW_MODE: z.string().default("0"),
-    MOLLIFIER_REDIS_HOST: z
+    TRIGGER_MOLLIFIER_DRAINER_ENABLED: z.string().default(process.env.TRIGGER_MOLLIFIER_ENABLED ?? "0"),
+    TRIGGER_MOLLIFIER_SHADOW_MODE: z.string().default("0"),
+    TRIGGER_MOLLIFIER_REDIS_HOST: z
       .string()
       .optional()
       .transform((v) => v ?? process.env.REDIS_HOST),
-    MOLLIFIER_REDIS_PORT: z.coerce
+    TRIGGER_MOLLIFIER_REDIS_PORT: z.coerce
       .number()
       .optional()
       .transform(
         (v) => v ?? (process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT) : undefined),
       ),
-    MOLLIFIER_REDIS_USERNAME: z
+    TRIGGER_MOLLIFIER_REDIS_USERNAME: z
       .string()
       .optional()
       .transform((v) => v ?? process.env.REDIS_USERNAME),
-    MOLLIFIER_REDIS_PASSWORD: z
+    TRIGGER_MOLLIFIER_REDIS_PASSWORD: z
       .string()
       .optional()
       .transform((v) => v ?? process.env.REDIS_PASSWORD),
-    MOLLIFIER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
-    MOLLIFIER_TRIP_WINDOW_MS: z.coerce.number().int().positive().default(200),
-    MOLLIFIER_TRIP_THRESHOLD: z.coerce.number().int().positive().default(100),
-    MOLLIFIER_HOLD_MS: z.coerce.number().int().positive().default(500),
-    MOLLIFIER_DRAIN_CONCURRENCY: z.coerce.number().int().positive().default(50),
-    MOLLIFIER_ENTRY_TTL_S: z.coerce.number().int().positive().default(600),
-    MOLLIFIER_DRAIN_MAX_ATTEMPTS: z.coerce.number().int().positive().default(3),
-    MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS: z.coerce.number().int().positive().default(30_000),
-    MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK: z.coerce.number().int().positive().default(500),
+    TRIGGER_MOLLIFIER_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
+    TRIGGER_MOLLIFIER_TRIP_WINDOW_MS: z.coerce.number().int().positive().default(200),
+    TRIGGER_MOLLIFIER_TRIP_THRESHOLD: z.coerce.number().int().positive().default(100),
+    TRIGGER_MOLLIFIER_HOLD_MS: z.coerce.number().int().positive().default(500),
+    TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY: z.coerce.number().int().positive().default(50),
+    TRIGGER_MOLLIFIER_ENTRY_TTL_S: z.coerce.number().int().positive().default(600),
+    TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS: z.coerce.number().int().positive().default(3),
+    TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS: z.coerce.number().int().positive().default(30_000),
+    TRIGGER_MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK: z.coerce.number().int().positive().default(500),
 
     BATCH_TRIGGER_PROCESS_JOB_VISIBILITY_TIMEOUT_MS: z.coerce
       .number()
diff --git a/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts b/apps/webapp/app/v3/mollifier/mollifierBuffer.server.ts
@@ -0,0 +1,32 @@
+import { MollifierBuffer } from "@trigger.dev/redis-worker";
+import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
+import { singleton } from "~/utils/singleton";
+
+// DI seam type for consumers (e.g. triggerTask.server.ts) that need a
+// nullable buffer accessor at construction time.
+export type MollifierGetBuffer = () => MollifierBuffer | null;
+
+function initializeMollifierBuffer(): MollifierBuffer {
+  logger.debug("Initializing mollifier buffer", {
+    host: env.TRIGGER_MOLLIFIER_REDIS_HOST,
+  });
+
+  return new MollifierBuffer({
+    redisOptions: {
+      keyPrefix: "",
+      host: env.TRIGGER_MOLLIFIER_REDIS_HOST,
+      port: env.TRIGGER_MOLLIFIER_REDIS_PORT,
+      username: env.TRIGGER_MOLLIFIER_REDIS_USERNAME,
+      password: env.TRIGGER_MOLLIFIER_REDIS_PASSWORD,
+      enableAutoPipelining: true,
+      ...(env.TRIGGER_MOLLIFIER_REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }),
+    },
+    entryTtlSeconds: env.TRIGGER_MOLLIFIER_ENTRY_TTL_S,
+  });
+}
+
+export function getMollifierBuffer(): MollifierBuffer | null {
+  if (env.TRIGGER_MOLLIFIER_ENABLED !== "1") return null;
+  return singleton("mollifierBuffer", initializeMollifierBuffer);
+}
diff --git a/apps/webapp/app/v3/mollifier/mollifierDrainer.server.ts b/apps/webapp/app/v3/mollifier/mollifierDrainer.server.ts
@@ -11,8 +11,8 @@ function initializeMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload>
   if (!buffer) {
     // Unreachable in normal config: getMollifierDrainer() gates on the
     // same env flag as getMollifierBuffer(). If we hit this, fail loud
-    // — the operator has set MOLLIFIER_ENABLED=1 on a worker pod but
-    // the buffer can't initialise (e.g. MOLLIFIER_REDIS_HOST resolves
+    // — the operator has set TRIGGER_MOLLIFIER_ENABLED=1 on a worker pod but
+    // the buffer can't initialise (e.g. TRIGGER_MOLLIFIER_REDIS_HOST resolves
     // to nothing). Crashing surfaces the misconfig immediately rather
     // than silently leaving entries un-drained.
     throw new Error("MollifierDrainer initialised without a buffer — env vars inconsistent");
@@ -24,7 +24,7 @@ function initializeMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload>
   // polling with no SIGTERM handler registered by the caller — exactly
   // the failure mode the validation is supposed to prevent.
   //
-  // The SIGTERM handler in worker.server.ts is sync fire-and-forget:
+  // The SIGTERM handler in mollifierDrainerWorker.server.ts is sync fire-and-forget:
   // `drainer.stop({ timeoutMs })` returns a promise that keeps the event
   // loop alive, but in cluster mode the primary runs its own
   // GRACEFUL_SHUTDOWN_TIMEOUT and will call `process.exit(0)`
@@ -34,17 +34,17 @@ function initializeMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload>
   // its own teardown after the drainer settles.
   const shutdownMarginMs = 1_000;
   if (
-    env.MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS >=
+    env.TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS >=
     env.GRACEFUL_SHUTDOWN_TIMEOUT - shutdownMarginMs
   ) {
     throw new Error(
-      `MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS (${env.MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS}) must be at least ${shutdownMarginMs}ms below GRACEFUL_SHUTDOWN_TIMEOUT (${env.GRACEFUL_SHUTDOWN_TIMEOUT}); otherwise the primary's hard exit shadows the drainer's deadline.`,
+      `TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS (${env.TRIGGER_MOLLIFIER_DRAIN_SHUTDOWN_TIMEOUT_MS}) must be at least ${shutdownMarginMs}ms below GRACEFUL_SHUTDOWN_TIMEOUT (${env.GRACEFUL_SHUTDOWN_TIMEOUT}); otherwise the primary's hard exit shadows the drainer's deadline.`,
     );
   }
 
   logger.debug("Initializing mollifier drainer", {
-    concurrency: env.MOLLIFIER_DRAIN_CONCURRENCY,
-    maxAttempts: env.MOLLIFIER_DRAIN_MAX_ATTEMPTS,
+    concurrency: env.TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY,
+    maxAttempts: env.TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS,
   });
 
   // Phase 1 handler: no-op ack. The trigger has ALREADY been written to
@@ -74,9 +74,9 @@ function initializeMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload>
         payloadHash,
       });
     },
-    concurrency: env.MOLLIFIER_DRAIN_CONCURRENCY,
-    maxAttempts: env.MOLLIFIER_DRAIN_MAX_ATTEMPTS,
-    maxOrgsPerTick: env.MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK,
+    concurrency: env.TRIGGER_MOLLIFIER_DRAIN_CONCURRENCY,
+    maxAttempts: env.TRIGGER_MOLLIFIER_DRAIN_MAX_ATTEMPTS,
+    maxOrgsPerTick: env.TRIGGER_MOLLIFIER_DRAIN_MAX_ORGS_PER_TICK,
     // A no-op handler shouldn't throw, but if something does (e.g. an
     // unexpected deserialise failure), don't loop — let it FAIL terminally
     // so the entry is observable in metrics.
@@ -88,12 +88,12 @@ function initializeMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload>
 
 // Returns a configured-but-stopped drainer. Callers MUST register their
 // SIGTERM / SIGINT shutdown handlers before invoking `drainer.start()` —
-// see `apps/webapp/app/services/worker.server.ts`. Starting inside the
-// singleton factory would put the polling loop ahead of handler
-// registration, leaving a narrow window where a SIGTERM landing between
-// `start()` and `process.once("SIGTERM", ...)` would skip the graceful
-// stop. The split is intentional.
+// see `apps/webapp/app/v3/mollifierDrainerWorker.server.ts`. Starting
+// inside the singleton factory would put the polling loop ahead of
+// handler registration, leaving a narrow window where a SIGTERM landing
+// between `start()` and `process.once("SIGTERM", ...)` would skip the
+// graceful stop. The split is intentional.
 export function getMollifierDrainer(): MollifierDrainer<BufferedTriggerPayload> | null {
-  if (env.MOLLIFIER_ENABLED !== "1") return null;
+  if (env.TRIGGER_MOLLIFIER_ENABLED !== "1") return null;
   return singleton("mollifierDrainer", initializeMollifierDrainer);
 }
diff --git a/apps/webapp/app/v3/mollifier/mollifierGate.server.ts b/apps/webapp/app/v3/mollifier/mollifierGate.server.ts
@@ -0,0 +1,202 @@
+import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
+import { FEATURE_FLAG, FeatureFlagCatalog } from "~/v3/featureFlags";
+import { getMollifierBuffer } from "./mollifierBuffer.server";
+import { createRealTripEvaluator } from "./mollifierTripEvaluator.server";
+import {
+  recordDecision,
+  type DecisionOutcome,
+  type DecisionReason,
+} from "./mollifierTelemetry.server";
+
+// `count` is the fleet-wide fixed-window counter for the env (INCR with a
+// PEXPIRE armed on the first tick of each window — see
+// `mollifierEvaluateTrip` in `packages/redis-worker/src/mollifier/buffer.ts`).
+// All webapp replicas pointing at the same Redis share the key
+// `mollifier:rate:${envId}`, so the threshold is the fleet-wide ceiling
+// rather than a per-instance one. At a window boundary an env can briefly
+// admit up to ~2x threshold across the fleet before tripping (fixed-window
+// not sliding-window). The tripped marker is refreshed on every overage
+// call, so a sustained burst holds the divert state until the rate falls
+// below threshold within a window.
+export type TripDecision =
+  | { divert: false }
+  | {
+      divert: true;
+      reason: "per_env_rate";
+      count: number;
+      threshold: number;
+      windowMs: number;
+      holdMs: number;
+    };
+
+export type GateOutcome =
+  | { action: "pass_through" }
+  | { action: "mollify"; decision: Extract<TripDecision, { divert: true }> }
+  | { action: "shadow_log"; decision: Extract<TripDecision, { divert: true }> };
+
+export type GateInputs = {
+  envId: string;
+  orgId: string;
+  taskId: string;
+  // Org-scoped flag overrides — taken from `Organization.featureFlags` on the
+  // AuthenticatedEnvironment at the call site. The repo-wide `flag()` helper
+  // queries the global `FeatureFlag` table; passing per-org overrides lets the
+  // mollifier opt in a single org without touching the global row, matching
+  // the pattern used by `canAccessAi`, `canAccessPrivateConnections`, and the
+  // compute-template beta gate.
+  orgFeatureFlags: Record<string, unknown> | null;
+};
+
+export type TripEvaluator = (inputs: GateInputs) => Promise<TripDecision>;
+
+// DI seam type for consumers (e.g. triggerTask.server.ts) that inject the
+// gate at construction time. Deliberately narrower than `evaluateGate`'s
+// real signature — no `deps` param — because consumers only call it with
+// inputs and rely on the module-level defaults.
+export type MollifierEvaluateGate = (inputs: GateInputs) => Promise<GateOutcome>;
+
+export type GateDependencies = {
+  isMollifierEnabled: () => boolean;
+  isShadowModeOn: () => boolean;
+  resolveOrgFlag: (inputs: GateInputs) => Promise<boolean>;
+  evaluator: TripEvaluator;
+  logShadow: (
+    inputs: GateInputs,
+    decision: Extract<TripDecision, { divert: true }>,
+  ) => void;
+  logMollified: (
+    inputs: GateInputs,
+    decision: Extract<TripDecision, { divert: true }>,
+  ) => void;
+  recordDecision: (outcome: DecisionOutcome, reason?: DecisionReason) => void;
+};
+
+// `options` is a thunk so env reads happen per-evaluation, not at module load.
+// Don't "simplify" to a plain object — Phase 2 dynamic config relies on the
+// gate observing whichever env values are live at trigger time.
+const defaultEvaluator = createRealTripEvaluator({
+  getBuffer: () => getMollifierBuffer(),
+  options: () => ({
+    windowMs: env.TRIGGER_MOLLIFIER_TRIP_WINDOW_MS,
+    threshold: env.TRIGGER_MOLLIFIER_TRIP_THRESHOLD,
+    holdMs: env.TRIGGER_MOLLIFIER_HOLD_MS,
+  }),
+});
+
+function logDivertDecision(
+  message: "mollifier.would_mollify" | "mollifier.mollified",
+  inputs: GateInputs,
+  decision: Extract<TripDecision, { divert: true }>,
+): void {
+  logger.info(message, {
+    envId: inputs.envId,
+    orgId: inputs.orgId,
+    taskId: inputs.taskId,
+    reason: decision.reason,
+    count: decision.count,
+    threshold: decision.threshold,
+    windowMs: decision.windowMs,
+    holdMs: decision.holdMs,
+  });
+}
+
+// Resolve the per-org mollifier flag purely from the in-memory
+// `Organization.featureFlags` JSON. No DB query — `triggerTask` is the
+// trigger hot path and the webapp CLAUDE.md forbids adding Prisma calls
+// there. The fleet-wide kill switch lives in `TRIGGER_MOLLIFIER_ENABLED`; rollout
+// is per-org via the JSON, matching the pattern used by `canAccessAi`,
+// `hasComputeAccess`, etc. There is no global `FeatureFlag` table read
+// in this path by design.
+export function makeResolveMollifierFlag(): (inputs: GateInputs) => Promise<boolean> {
+  return (inputs) => {
+    const override = inputs.orgFeatureFlags?.[FEATURE_FLAG.mollifierEnabled];
+    if (override !== undefined) {
+      const parsed = FeatureFlagCatalog[FEATURE_FLAG.mollifierEnabled].safeParse(override);
+      if (parsed.success) {
+        return Promise.resolve(parsed.data);
+      }
+    }
+    return Promise.resolve(false);
+  };
+}
+
+const resolveMollifierFlag = makeResolveMollifierFlag();
+
+export const defaultGateDependencies: GateDependencies = {
+  isMollifierEnabled: () => env.TRIGGER_MOLLIFIER_ENABLED === "1",
+  isShadowModeOn: () => env.TRIGGER_MOLLIFIER_SHADOW_MODE === "1",
+  resolveOrgFlag: resolveMollifierFlag,
+  evaluator: defaultEvaluator,
+  logShadow: (inputs, decision) =>
+    logDivertDecision("mollifier.would_mollify", inputs, decision),
+  logMollified: (inputs, decision) =>
+    logDivertDecision("mollifier.mollified", inputs, decision),
+  recordDecision,
+};
+
+export async function evaluateGate(
+  inputs: GateInputs,
+  deps: Partial<GateDependencies> = {},
+): Promise<GateOutcome> {
+  const d = { ...defaultGateDependencies, ...deps };
+
+  if (!d.isMollifierEnabled()) {
+    d.recordDecision("pass_through");
+    return { action: "pass_through" };
+  }
+
+  // Fail open: a transient DB error resolving the per-org flag must not
+  // block triggers. Mirror the evaluator's fail-open posture in
+  // `mollifierTripEvaluator.server.ts`.
+  let orgFlagEnabled: boolean;
+  try {
+    orgFlagEnabled = await d.resolveOrgFlag(inputs);
+  } catch (error) {
+    logger.warn("mollifier.resolve_org_flag_failed", {
+      envId: inputs.envId,
+      orgId: inputs.orgId,
+      taskId: inputs.taskId,
+      error: error instanceof Error ? error.message : String(error),
+    });
+    orgFlagEnabled = false;
+  }
+  const shadowOn = d.isShadowModeOn();
+
+  if (!orgFlagEnabled && !shadowOn) {
+    d.recordDecision("pass_through");
+    return { action: "pass_through" };
+  }
+
+  // Fail open on evaluator errors too. The default `createRealTripEvaluator`
+  // catches its own errors and returns `{ divert: false }`, but injected or
+  // future evaluators may not — keep the contract symmetric with the org
+  // flag resolution above so the trigger hot path can never be broken by a
+  // gate-internal failure.
+  let decision: TripDecision;
+  try {
+    decision = await d.evaluator(inputs);
+  } catch (error) {
+    logger.warn("mollifier.evaluator_failed", {
+      envId: inputs.envId,
+      orgId: inputs.orgId,
+      taskId: inputs.taskId,
+      error: error instanceof Error ? error.message : String(error),
+    });
+    decision = { divert: false };
+  }
+  if (!decision.divert) {
+    d.recordDecision("pass_through");
+    return { action: "pass_through" };
+  }
+
+  if (orgFlagEnabled) {
+    d.logMollified(inputs, decision);
+    d.recordDecision("mollify", decision.reason);
+    return { action: "mollify", decision };
+  }
+
+  d.logShadow(inputs, decision);
+  d.recordDecision("shadow_log", decision.reason);
+  return { action: "shadow_log", decision };
+}
diff --git a/apps/webapp/app/v3/mollifierDrainerWorker.server.ts b/apps/webapp/app/v3/mollifierDrainerWorker.server.ts
diff --git a/apps/webapp/test/mollifierGate.test.ts b/apps/webapp/test/mollifierGate.test.ts