Set up alerts based on user-facing symptoms (SLO violations) rather than system metrics (CPU usage).
import { Effect, Metric, Schedule, Duration, Ref } from "effect"
// ============================================
// 1. Define alertable conditions
// ============================================
interface Alert {
readonly name: string
readonly severity: "critical" | "warning" | "info"
readonly message: string
readonly timestamp: Date
readonly labels: Record<string, string>
}
interface AlertRule {
readonly name: string
readonly condition: Effect.Effect<boolean>
readonly severity: "critical" | "warning" | "info"
readonly message: string
readonly labels: Record<string, string>
readonly forDuration: Duration.DurationInput
}
// ============================================
// 2. Define alert rules
// ============================================
const createAlertRules = (metrics: {
errorRate: () => Effect.Effect<number>
latencyP99: () => Effect.Effect<number>
availability: () => Effect.Effect<number>
}): AlertRule[] => [
{
name: "HighErrorRate",
condition: metrics.errorRate().pipe(Effect.map((rate) => rate > 0.01)),
severity: "critical",
message: "Error rate exceeds 1%",
labels: { team: "backend", service: "api" },
forDuration: "5 minutes",
},
{
name: "HighLatency",
condition: metrics.latencyP99().pipe(Effect.map((p99) => p99 > 2)),
severity: "warning",
message: "P99 latency exceeds 2 seconds",
labels: { team: "backend", service: "api" },
forDuration: "10 minutes",
},
{
name: "LowAvailability",
condition: metrics.availability().pipe(Effect.map((avail) => avail < 99.9)),
severity: "critical",
message: "Availability below 99.9% SLO",
labels: { team: "backend", service: "api" },
forDuration: "5 minutes",
},
{
name: "ErrorBudgetLow",
condition: Effect.succeed(false), // Implement based on error budget calc
severity: "warning",
message: "Error budget below 25%",
labels: { team: "backend", service: "api" },
forDuration: "0 seconds",
},
]
// ============================================
// 3. Alert manager
// ============================================
interface AlertState {
readonly firing: Map<string, { since: Date; alert: Alert }>
readonly resolved: Alert[]
}
const makeAlertManager = Effect.gen(function* () {
const state = yield* Ref.make<AlertState>({
firing: new Map(),
resolved: [],
})
const checkRule = (rule: AlertRule) =>
Effect.gen(function* () {
const isTriggered = yield* rule.condition
yield* Ref.modify(state, (s) => {
const firing = new Map(s.firing)
const resolved = [...s.resolved]
const key = rule.name
if (isTriggered) {
if (!firing.has(key)) {
// New alert
firing.set(key, {
since: new Date(),
alert: {
name: rule.name,
severity: rule.severity,
message: rule.message,
timestamp: new Date(),
labels: rule.labels,
},
})
}
} else {
if (firing.has(key)) {
// Alert resolved
const prev = firing.get(key)!
resolved.push({
...prev.alert,
message: `[RESOLVED] ${prev.alert.message}`,
timestamp: new Date(),
})
firing.delete(key)
}
}
return [undefined, { firing, resolved }]
})
})
const getActiveAlerts = () =>
Ref.get(state).pipe(
Effect.map((s) => Array.from(s.firing.values()).map((f) => f.alert))
)
const getRecentResolved = () =>
Ref.get(state).pipe(Effect.map((s) => s.resolved.slice(-10)))
return {
checkRule,
getActiveAlerts,
getRecentResolved,
}
})
// ============================================
// 4. Alert notification
// ============================================
interface NotificationChannel {
readonly send: (alert: Alert) => Effect.Effect<void>
}
const slackChannel: NotificationChannel = {
send: (alert) =>
Effect.gen(function* () {
const emoji =
alert.severity === "critical"
? "🔴"
: alert.severity === "warning"
? "🟡"
: "🔵"
yield* Effect.log(`${emoji} [${alert.severity.toUpperCase()}] ${alert.name}`).pipe(
Effect.annotateLogs({
message: alert.message,
labels: JSON.stringify(alert.labels),
})
)
// In real implementation: call Slack API
}),
}
const pagerDutyChannel: NotificationChannel = {
send: (alert) =>
Effect.gen(function* () {
if (alert.severity === "critical") {
yield* Effect.log("PagerDuty: Creating incident").pipe(
Effect.annotateLogs({ alert: alert.name })
)
// In real implementation: call PagerDuty API
}
}),
}
// ============================================
// 5. Alert evaluation loop
// ============================================
const runAlertEvaluation = (
rules: AlertRule[],
channels: NotificationChannel[],
interval: Duration.DurationInput
) =>
Effect.gen(function* () {
const alertManager = yield* makeAlertManager
const previousAlerts = yield* Ref.make(new Set<string>())
yield* Effect.forever(
Effect.gen(function* () {
// Check all rules
for (const rule of rules) {
yield* alertManager.checkRule(rule)
}
// Get current active alerts
const active = yield* alertManager.getActiveAlerts()
const current = new Set(active.map((a) => a.name))
const previous = yield* Ref.get(previousAlerts)
// Find newly firing alerts
for (const alert of active) {
if (!previous.has(alert.name)) {
// New alert - send notifications
for (const channel of channels) {
yield* channel.send(alert)
}
}
}
yield* Ref.set(previousAlerts, current)
yield* Effect.sleep(interval)
})
)
})
// ============================================
// 6. Prometheus alerting rules (YAML)
// ============================================
const prometheusAlertRules = `
groups:
- name: effect-app-alerts
rules:
- alert: HighErrorRate
expr: |
sum(rate(http_errors_total[5m]))
/
sum(rate(http_requests_total[5m]))
> 0.01
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate detected"
description: "Error rate is {{ $value | humanizePercentage }}"
- alert: HighLatency
expr: |
histogram_quantile(0.99,
sum(rate(http_request_duration_seconds_bucket[5m])) by (le)
) > 2
for: 10m
labels:
severity: warning
annotations:
summary: "High P99 latency"
description: "P99 latency is {{ $value }}s"
- alert: SLOViolation
expr: |
sum(rate(http_requests_total{status!~"5.."}[30m]))
/
sum(rate(http_requests_total[30m]))
< 0.999
for: 5m
labels:
severity: critical
annotations:
summary: "SLO violation"
description: "Availability is {{ $value | humanizePercentage }}"
`