-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathslow_ping.analyzer.ts
More file actions
105 lines (93 loc) · 4.43 KB
/
slow_ping.analyzer.ts
File metadata and controls
105 lines (93 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// Copyright (c) 2026 Azimutt, Inc.
// Licensed under the MIT License. See LICENSES/MIT.txt in the repository root.
import * as z from 'zod'
import { indexBy } from '@azimutt/utils'
import {
AlertId,
AlertStarted,
AnalyzerCtx,
AnalyzerPlugin,
AnalyzerPluginId,
CollectorId,
diffBy,
Duration,
durationToMillis,
ExecutionId,
ExecutionSuccess,
InstanceId,
instanceLabel,
JobId,
router,
zodErrorToString,
} from '@azimutt/inspector-core'
import PingCollector from './ping.collector'
const SlowPingAnalyzerConfig = z.strictObject({ absolute: Duration }).meta({ id: 'SlowPingAnalyzerConfig' })
type SlowPingAnalyzerConfig = z.infer<typeof SlowPingAnalyzerConfig>
const SlowPingAnalyzerResult = z.strictObject({ alertsCreated: AlertStarted.array(), alertsResolved: AlertId.array() }).meta({ id: 'SlowPingAnalyzerResult' })
type SlowPingAnalyzerResult = z.infer<typeof SlowPingAnalyzerResult>
const SlowPingAnalyzerIssue = 'slow_ping'
const SlowPingAnalyzerModule = AnalyzerPluginId.parse('ext.slow_ping')
const SlowPingAnalyzer: AnalyzerPlugin<SlowPingAnalyzerResult, SlowPingAnalyzerConfig> = {
module: SlowPingAnalyzerModule,
description: 'Open an alert if the ping duration is above the absolute threshold.',
doc: `Get the ${PingCollector.module} last results (for all monitored instances) and check which ones are above the absolute threshold.
It creates, keep open or close alerts depending on instances breaking the threshold.
The threshold is configured with the \`absolute\` property, which is a duration (ex: in millis if number or with unit like \`1s\`).
Generates ${[SlowPingAnalyzerIssue].map(issue => `[${issue}](${router.alerts({ issue })})`).join(', ')} alerts.`,
config: SlowPingAnalyzerConfig,
analyze: async (ctx: AnalyzerCtx<SlowPingAnalyzerConfig>): Promise<SlowPingAnalyzerResult> => {
const execInfosByInstance = await ctx.getCollectorsLastResultInfos()
const pingExecInfos = Object.values(execInfosByInstance).flatMap(infos => infos.filter(e => e.module === PingCollector.module))
const pingExecs = await ctx.getExecutionResults({ ids: pingExecInfos.map(e => e.id) })
const pings: PingResult[] = pingExecs.flatMap(executionToResults)
const slowMs = durationToMillis(ctx.config.absolute)
const slowPings: PingResult[] = pings.filter(q => q.duration > slowMs)
const alerts = await ctx.getAlerts({ module: SlowPingAnalyzerModule, resolved: false })
const {left: newAlerts, right: resolvedAlerts, both: _ongoingAlerts} = diffBy(slowPings, p => p.instance, alerts, a => a.instance!)
const instances = await ctx.getInstances(Object.keys(execInfosByInstance)).then(list => indexBy(list, i => i.id))
const alertsCreated = await ctx.createAlerts(newAlerts.map(p => ({
issue: SlowPingAnalyzerIssue,
title: `Slow ping on ${instanceLabel(instances[p.instance] || { id: p.instance })}`,
reason: `A slow ping can indicate a network issue or a global performance problem on the instance.`,
explanation: `Ping duration is above the absolute threshold of ${ctx.config.absolute}`,
resolution: `Check the instance and fix the issue`,
severity: 'warning',
instance: p.instance,
execution: p.execution,
props: { startedAt: p.date }
}))).then(([res]) => res)
const alertsResolved = await ctx.resolveAlerts(resolvedAlerts).then(([res]) => res.map(a => a.id))
// await Promise.all(ongoingAlerts.map(({left: p, right: a}) => ctx.storage.updateAlert(a.id, { ... }))) // TODO: update alert with last ping duration
return { alertsCreated, alertsResolved }
},
}
export default SlowPingAnalyzer
function executionToResults(exec: ExecutionSuccess): PingResult[] {
const res = PingCollectorExecution.safeParse(exec)
if (res.success) {
return res.data.result.map(r => ({
execution: res.data.id,
instance: res.data.instance,
duration: r.duration,
date: res.data.startedAt,
}))
} else {
console.error(`SlowPingAnalyzer can't parse collector result for execution ${exec.id}: ${zodErrorToString(res.error, PingCollectorExecution, 'PingCollectorExecution', exec)}`)
return []
}
}
const PingCollectorExecution = z.object({
id: ExecutionId,
job: JobId,
startedAt: z.date(),
instance: InstanceId,
collector: CollectorId,
result: PingCollector.result.array(),
})
type PingCollectorExecution = z.infer<typeof PingCollectorExecution>
type PingResult = {
execution: ExecutionId,
instance: InstanceId,
duration: number,
date: Date,
}