Skip to content

Commit 2e94f50

Browse files
authored
chore: add low tps model alerts (anomalyco#27055)
1 parent 4498fc9 commit 2e94f50

2 files changed

Lines changed: 91 additions & 6 deletions

File tree

infra/monitoring.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,34 @@ const providerHttpErrorsQuery = (product: "go" | "zen") => {
111111
}).json
112112
}
113113

114+
const modelLowTpsQuery = (product: "go" | "zen") => {
115+
const filters = [
116+
{ column: "model", op: "exists" },
117+
{ column: "event_type", op: "=", value: "completions" },
118+
{ column: "user_agent", op: "contains", value: "opencode" },
119+
{ column: "isGoTier", op: "=", value: product === "go" ? "true" : "false" },
120+
{ column: "status", op: ">=", value: "200" },
121+
{ column: "status", op: "<", value: "400" },
122+
{ column: "tps.output", op: "exists" },
123+
]
124+
125+
return honeycomb.getQuerySpecificationOutput({
126+
breakdowns: ["model"],
127+
calculations: [
128+
{ op: "COUNT", name: "TOTAL", filterCombination: "AND", filters },
129+
{
130+
op: "P50",
131+
name: "TPS",
132+
column: "tps.output",
133+
filterCombination: "AND",
134+
filters,
135+
},
136+
],
137+
formulas: [{ name: "LOW_TPS", expression: "IF(GTE($TOTAL, 100), $TPS, 999)" }],
138+
timeRange: 900,
139+
}).json
140+
}
141+
114142
new honeycomb.Trigger("IncreasedModelHttpErrorsGo", {
115143
name: "Increased Model HTTP Errors [Go]",
116144
description,
@@ -149,6 +177,46 @@ new honeycomb.Trigger("IncreasedModelHttpErrorsZen", {
149177
],
150178
})
151179

180+
new honeycomb.Trigger("LowModelTpsGo", {
181+
disabled: true,
182+
name: "Low Model TPS [Go]",
183+
description,
184+
queryJson: modelLowTpsQuery("go"),
185+
alertType: "on_change",
186+
frequency: 300,
187+
thresholds: [{ op: "<", value: 20, exceededLimit: 1 }],
188+
recipients: [
189+
{
190+
id: webhookRecipient.id,
191+
notificationDetails: [
192+
{
193+
variables: [{ name: "type", value: "model_low_tps" }],
194+
},
195+
],
196+
},
197+
],
198+
})
199+
200+
new honeycomb.Trigger("LowModelTpsZen", {
201+
disabled: true,
202+
name: "Low Model TPS [Zen]",
203+
description,
204+
queryJson: modelLowTpsQuery("zen"),
205+
alertType: "on_change",
206+
frequency: 300,
207+
thresholds: [{ op: "<", value: 20, exceededLimit: 1 }],
208+
recipients: [
209+
{
210+
id: webhookRecipient.id,
211+
notificationDetails: [
212+
{
213+
variables: [{ name: "type", value: "model_low_tps" }],
214+
},
215+
],
216+
},
217+
],
218+
})
219+
152220
new honeycomb.Trigger("IncreasedProviderHttpErrorsGo", {
153221
name: "Increased Provider HTTP Errors [Go]",
154222
description,

packages/console/app/src/routes/honeycomb/webhook.ts

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,19 @@ const basePayload = z.object({
1212
url: z.string(),
1313
})
1414

15-
const groups = z.object({ group: z.object({ key: z.string(), value: z.string() }).array() }).array()
15+
const groups = z
16+
.object({ result: z.union([z.number(), z.string()]).nullish(), group: z.object({ key: z.string(), value: z.string() }).array() })
17+
.array()
1618

1719
const honeycombWebhookPayload = z.discriminatedUnion("type", [
1820
basePayload.extend({
1921
type: z.literal("model_http_errors"),
2022
groups,
2123
}),
24+
basePayload.extend({
25+
type: z.literal("model_low_tps"),
26+
groups,
27+
}),
2228
basePayload.extend({
2329
type: z.literal("provider_http_errors"),
2430
groups,
@@ -29,14 +35,25 @@ const honeycombWebhookPayload = z.discriminatedUnion("type", [
2935
])
3036

3137
const postDiscordMessage = async (payload: z.infer<typeof honeycombWebhookPayload>) => {
32-
const group =
33-
payload.type === "model_http_errors" ? "model" : payload.type === "provider_http_errors" ? "provider" : undefined
34-
const names = payload.type === "custom" ? [] : payload.groups.flatMap((item) => item.group.map((g) => g.value))
38+
const names =
39+
payload.type === "custom"
40+
? []
41+
: payload.groups.flatMap((item) =>
42+
item.group.map((g) => {
43+
const result = item.result == null ? undefined : Number(item.result)
44+
return `- ${g.value}${
45+
result !== undefined && Number.isFinite(result)
46+
? payload.type === "model_low_tps"
47+
? ` (${Math.round(result)} TPS)`
48+
: ` (${Math.round(result * 100)}% errors)`
49+
: ""
50+
}`
51+
}),
52+
)
3553

3654
const content = [
3755
`[**${payload.isTest ? "[TEST] " : ""}${payload.name ?? "Honeycomb alert"}**](${payload.url})`,
38-
group && names.length > 0 ? `Affected ${group}s:` : undefined,
39-
...names.map((name) => `- ${name}`),
56+
...names,
4057
"",
4158
`<@&${DISCORD_ALERT_ROLE_ID}>`,
4259
]

0 commit comments

Comments
 (0)