Skip to content

Commit 969973c

Browse files
committed
fix: downgrade queue size limit errors to warnings
Queue limit ServiceValidationErrors were being logged at error level, consuming ~1.37M Sentry events/week (66% of total budget). These are expected validation rejections, not bugs. - Add logLevel property to ServiceValidationError (webapp + run-engine) - Set logLevel: warn on all queue limit throws - Schedule engine: detect queue limit failures and log as warn - Redis-worker: respect logLevel on thrown errors refs TRI-7911
1 parent 1cfc296 commit 969973c

File tree

10 files changed

+77
-20
lines changed

10 files changed

+77
-20
lines changed

apps/webapp/app/runEngine/services/triggerTask.server.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,9 @@ export class RunEngineTriggerTaskService {
263263

264264
if (!queueSizeGuard.ok) {
265265
throw new ServiceValidationError(
266-
`Cannot trigger ${taskId} as the queue size limit for this environment has been reached. The maximum size is ${queueSizeGuard.maximumSize}`
266+
`Cannot trigger ${taskId} as the queue size limit for this environment has been reached. The maximum size is ${queueSizeGuard.maximumSize}`,
267+
undefined,
268+
"warn"
267269
);
268270
}
269271
}

apps/webapp/app/v3/scheduleEngine.server.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { ScheduleEngine } from "@internal/schedule-engine";
2+
import type { TriggerScheduledTaskErrorType } from "@internal/schedule-engine";
23
import { stringifyIO } from "@trigger.dev/core/v3";
34
import { prisma } from "~/db.server";
45
import { env } from "~/env.server";
@@ -8,6 +9,7 @@ import { singleton } from "~/utils/singleton";
89
import { TriggerTaskService } from "./services/triggerTask.server";
910
import { meter, tracer } from "./tracer.server";
1011
import { workerQueue } from "~/services/worker.server";
12+
import { ServiceValidationError } from "./services/common.server";
1113

1214
export const scheduleEngine = singleton("ScheduleEngine", createScheduleEngine);
1315

@@ -111,9 +113,20 @@ function createScheduleEngine() {
111113

112114
return { success: !!result };
113115
} catch (error) {
116+
const errorMessage = error instanceof Error ? error.message : String(error);
117+
let errorType: TriggerScheduledTaskErrorType = "SYSTEM_ERROR";
118+
119+
if (
120+
error instanceof ServiceValidationError &&
121+
errorMessage.includes("queue size limit for this environment has been reached")
122+
) {
123+
errorType = "QUEUE_LIMIT";
124+
}
125+
114126
return {
115127
success: false,
116-
error: error instanceof Error ? error.message : String(error),
128+
error: errorMessage,
129+
errorType,
117130
};
118131
}
119132
},

apps/webapp/app/v3/services/batchTriggerV3.server.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,9 @@ export class BatchTriggerV3Service extends BaseService {
249249

250250
if (!queueSizeGuard.isWithinLimits) {
251251
throw new ServiceValidationError(
252-
`Cannot trigger ${newRunCount} tasks as the queue size limit for this environment has been reached. The maximum size is ${queueSizeGuard.maximumSize}`
252+
`Cannot trigger ${newRunCount} tasks as the queue size limit for this environment has been reached. The maximum size is ${queueSizeGuard.maximumSize}`,
253+
undefined,
254+
"warn"
253255
);
254256
}
255257

apps/webapp/app/v3/services/common.server.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
export type ServiceValidationErrorLevel = "error" | "warn" | "info";
2+
13
export class ServiceValidationError extends Error {
2-
constructor(message: string, public status?: number) {
4+
constructor(
5+
message: string,
6+
public status?: number,
7+
public logLevel?: ServiceValidationErrorLevel
8+
) {
39
super(message);
410
this.name = "ServiceValidationError";
511
}

apps/webapp/app/v3/services/triggerTaskV1.server.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,9 @@ export class TriggerTaskServiceV1 extends BaseService {
134134

135135
if (!queueSizeGuard.isWithinLimits) {
136136
throw new ServiceValidationError(
137-
`Cannot trigger ${taskId} as the queue size limit for this environment has been reached. The maximum size is ${queueSizeGuard.maximumSize}`
137+
`Cannot trigger ${taskId} as the queue size limit for this environment has been reached. The maximum size is ${queueSizeGuard.maximumSize}`,
138+
undefined,
139+
"warn"
138140
);
139141
}
140142
}

internal-packages/run-engine/src/engine/errors.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,14 @@ export function runStatusFromError(
6969
}
7070
}
7171

72+
export type ServiceValidationErrorLevel = "error" | "warn" | "info";
73+
7274
export class ServiceValidationError extends Error {
7375
constructor(
7476
message: string,
7577
public status?: number,
76-
public metadata?: Record<string, unknown>
78+
public metadata?: Record<string, unknown>,
79+
public logLevel?: ServiceValidationErrorLevel
7780
) {
7881
super(message);
7982
this.name = "ServiceValidationError";

internal-packages/schedule-engine/src/engine/index.ts

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -497,17 +497,28 @@ export class ScheduleEngine {
497497

498498
span.setAttribute("trigger_success", true);
499499
} else {
500-
this.logger.error("Failed to trigger scheduled task", {
501-
instanceId: params.instanceId,
502-
taskIdentifier: instance.taskSchedule.taskIdentifier,
503-
durationMs: triggerDuration,
504-
error: result.error,
505-
});
500+
const isQueueLimit = result.errorType === "QUEUE_LIMIT";
501+
502+
if (isQueueLimit) {
503+
this.logger.warn("Scheduled task trigger skipped due to queue limit", {
504+
instanceId: params.instanceId,
505+
taskIdentifier: instance.taskSchedule.taskIdentifier,
506+
durationMs: triggerDuration,
507+
error: result.error,
508+
});
509+
} else {
510+
this.logger.error("Failed to trigger scheduled task", {
511+
instanceId: params.instanceId,
512+
taskIdentifier: instance.taskSchedule.taskIdentifier,
513+
durationMs: triggerDuration,
514+
error: result.error,
515+
});
516+
}
506517

507518
this.scheduleExecutionFailureCounter.add(1, {
508519
environment_type: environmentType,
509520
schedule_type: scheduleType,
510-
error_type: "task_failure",
521+
error_type: isQueueLimit ? "queue_limit" : "task_failure",
511522
});
512523

513524
span.setAttribute("trigger_success", false);

internal-packages/schedule-engine/src/engine/types.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,14 @@ export type TriggerScheduledTaskParams = {
2424
exactScheduleTime?: Date;
2525
};
2626

27+
export type TriggerScheduledTaskErrorType = "QUEUE_LIMIT" | "SYSTEM_ERROR";
28+
2729
export interface TriggerScheduledTaskCallback {
28-
(params: TriggerScheduledTaskParams): Promise<{ success: boolean; error?: string }>;
30+
(params: TriggerScheduledTaskParams): Promise<{
31+
success: boolean;
32+
error?: string;
33+
errorType?: TriggerScheduledTaskErrorType;
34+
}>;
2935
}
3036

3137
export interface ScheduleEngineOptions {

internal-packages/schedule-engine/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ export type {
33
ScheduleEngineOptions,
44
TriggerScheduleParams,
55
TriggerScheduledTaskCallback,
6+
TriggerScheduledTaskErrorType,
67
} from "./engine/types.js";

packages/redis-worker/src/worker.ts

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,8 @@ class Worker<TCatalog extends WorkerCatalog> {
895895
const errorMessage = error instanceof Error ? error.message : String(error);
896896

897897
const shouldLogError = catalogItem.logErrors ?? true;
898+
const errorLogLevel =
899+
error && typeof error === "object" && "logLevel" in error ? error.logLevel : undefined;
898900

899901
const logAttributes = {
900902
name: this.options.name,
@@ -906,10 +908,14 @@ class Worker<TCatalog extends WorkerCatalog> {
906908
errorMessage,
907909
};
908910

909-
if (shouldLogError) {
910-
this.logger.error(`Worker error processing item`, logAttributes);
911-
} else {
911+
if (!shouldLogError) {
912912
this.logger.info(`Worker failed to process item`, logAttributes);
913+
} else if (errorLogLevel === "warn") {
914+
this.logger.warn(`Worker error processing item`, logAttributes);
915+
} else if (errorLogLevel === "info") {
916+
this.logger.info(`Worker error processing item`, logAttributes);
917+
} else {
918+
this.logger.error(`Worker error processing item`, logAttributes);
913919
}
914920

915921
// Attempt requeue logic.
@@ -922,13 +928,18 @@ class Worker<TCatalog extends WorkerCatalog> {
922928
const retryDelay = calculateNextRetryDelay(retrySettings, newAttempt);
923929

924930
if (!retryDelay) {
925-
if (shouldLogError) {
926-
this.logger.error(`Worker item reached max attempts. Moving to DLQ.`, {
931+
if (!shouldLogError || errorLogLevel === "info") {
932+
this.logger.info(`Worker item reached max attempts. Moving to DLQ.`, {
933+
...logAttributes,
934+
attempt: newAttempt,
935+
});
936+
} else if (errorLogLevel === "warn") {
937+
this.logger.warn(`Worker item reached max attempts. Moving to DLQ.`, {
927938
...logAttributes,
928939
attempt: newAttempt,
929940
});
930941
} else {
931-
this.logger.info(`Worker item reached max attempts. Moving to DLQ.`, {
942+
this.logger.error(`Worker item reached max attempts. Moving to DLQ.`, {
932943
...logAttributes,
933944
attempt: newAttempt,
934945
});

0 commit comments

Comments
 (0)