|
1 | 1 | import * as client from 'prom-client'; |
2 | | -import os from 'os'; |
3 | | -import { nanoid } from 'nanoid'; |
| 2 | +import * as http from 'http'; |
4 | 3 | import createLogger from './logger'; |
5 | 4 |
|
6 | 5 | const register = new client.Registry(); |
7 | 6 | const logger = createLogger(); |
8 | 7 |
|
9 | | -const DEFAULT_PUSH_INTERVAL_MS = 10_000; |
10 | | -const ID_SIZE = 5; |
11 | | -const METRICS_JOB_NAME = 'workers'; |
| 8 | +const DEFAULT_METRICS_HOST = '0.0.0.0'; |
| 9 | +const DEFAULT_METRICS_PATH = '/metrics'; |
| 10 | +const MIN_PORT = 1; |
| 11 | +const MAX_PORT = 65535; |
| 12 | +const HTTP_OK = 200; |
| 13 | +const HTTP_NOT_FOUND = 404; |
| 14 | +const HTTP_INTERNAL_SERVER_ERROR = 500; |
12 | 15 |
|
13 | | -let pushInterval: NodeJS.Timeout | null = null; |
| 16 | +let metricsServer: http.Server | null = null; |
14 | 17 | let currentWorkerName = ''; |
15 | 18 |
|
16 | 19 | client.collectDefaultMetrics({ register }); |
17 | 20 |
|
18 | 21 | export { register, client }; |
19 | 22 |
|
20 | 23 | /** |
21 | | - * Parse push interval from environment. |
| 24 | + * Parse metrics endpoint port from environment. |
22 | 25 | */ |
23 | | -function getPushIntervalMs(): number { |
24 | | - const rawInterval = process.env.PROMETHEUS_PUSHGATEWAY_INTERVAL; |
25 | | - const parsedInterval = rawInterval === undefined |
26 | | - ? DEFAULT_PUSH_INTERVAL_MS |
27 | | - : Number(rawInterval); |
28 | | - |
29 | | - const interval = Number.isFinite(parsedInterval) && parsedInterval > 0 |
30 | | - ? parsedInterval |
31 | | - : DEFAULT_PUSH_INTERVAL_MS; |
32 | | - |
33 | | - if (rawInterval !== undefined && interval !== parsedInterval) { |
34 | | - logger.warn(`[metrics] invalid PROMETHEUS_PUSHGATEWAY_INTERVAL="${rawInterval}", fallback to ${DEFAULT_PUSH_INTERVAL_MS}ms`); |
| 26 | +function getMetricsPort(): number | null { |
| 27 | + const rawPort = process.env.PROMETHEUS_METRICS_PORT; |
| 28 | + |
| 29 | + if (!rawPort) { |
| 30 | + return null; |
| 31 | + } |
| 32 | + |
| 33 | + const port = Number(rawPort); |
| 34 | + |
| 35 | + if (!Number.isInteger(port) || port < MIN_PORT || port > MAX_PORT) { |
| 36 | + logger.warn(`[metrics] invalid PROMETHEUS_METRICS_PORT="${rawPort}"; expected an integer between ${MIN_PORT} and ${MAX_PORT}`); |
| 37 | + |
| 38 | + return null; |
| 39 | + } |
| 40 | + |
| 41 | + return port; |
| 42 | +} |
| 43 | + |
| 44 | +/** |
| 45 | + * Read metrics endpoint path from environment. |
| 46 | + */ |
| 47 | +function getMetricsPath(): string { |
| 48 | + const rawPath = process.env.PROMETHEUS_METRICS_PATH; |
| 49 | + |
| 50 | + if (!rawPath) { |
| 51 | + return DEFAULT_METRICS_PATH; |
| 52 | + } |
| 53 | + |
| 54 | + const path = rawPath.trim(); |
| 55 | + |
| 56 | + if (!path) { |
| 57 | + logger.warn(`[metrics] invalid PROMETHEUS_METRICS_PATH="${rawPath}", fallback to ${DEFAULT_METRICS_PATH}`); |
| 58 | + |
| 59 | + return DEFAULT_METRICS_PATH; |
| 60 | + } |
| 61 | + |
| 62 | + if (!path.startsWith('/')) { |
| 63 | + const normalizedPath = `/${path}`; |
| 64 | + |
| 65 | + logger.warn(`[metrics] normalized PROMETHEUS_METRICS_PATH from "${rawPath}" to "${normalizedPath}"`); |
| 66 | + |
| 67 | + return normalizedPath; |
35 | 68 | } |
36 | 69 |
|
37 | | - return interval; |
| 70 | + return path; |
38 | 71 | } |
39 | 72 |
|
40 | 73 | /** |
41 | | - * Stop periodic push to pushgateway. |
| 74 | + * Stop HTTP metrics endpoint. |
42 | 75 | */ |
43 | | -export function stopMetricsPushing(): void { |
44 | | - if (!pushInterval) { |
| 76 | +export function stopMetricsServer(): void { |
| 77 | + if (!metricsServer) { |
45 | 78 | return; |
46 | 79 | } |
47 | 80 |
|
48 | | - clearInterval(pushInterval); |
49 | | - pushInterval = null; |
50 | | - logger.info(`[metrics] stopped pushing metrics for worker=${currentWorkerName}`); |
51 | | - currentWorkerName = ''; |
| 81 | + const serverToStop = metricsServer; |
| 82 | + const stoppedWorkerName = currentWorkerName; |
| 83 | + |
| 84 | + if (!serverToStop.listening) { |
| 85 | + logger.info(`[metrics] endpoint already stopped for worker=${stoppedWorkerName}`); |
| 86 | + |
| 87 | + if (metricsServer === serverToStop) { |
| 88 | + metricsServer = null; |
| 89 | + currentWorkerName = ''; |
| 90 | + } |
| 91 | + |
| 92 | + return; |
| 93 | + } |
| 94 | + |
| 95 | + serverToStop.close((error) => { |
| 96 | + if (error) { |
| 97 | + logger.error(`[metrics] failed to stop endpoint for worker=${stoppedWorkerName}: ${error.message}`); |
| 98 | + |
| 99 | + return; |
| 100 | + } |
| 101 | + |
| 102 | + if (metricsServer === serverToStop) { |
| 103 | + metricsServer = null; |
| 104 | + currentWorkerName = ''; |
| 105 | + } |
| 106 | + |
| 107 | + logger.info(`[metrics] stopped endpoint for worker=${stoppedWorkerName}`); |
| 108 | + }); |
52 | 109 | } |
53 | 110 |
|
54 | 111 | /** |
55 | | - * Start periodic push to pushgateway. |
| 112 | + * Start HTTP metrics endpoint for scraper-based monitoring. |
56 | 113 | * |
57 | | - * @param workerName - name of the worker for grouping. |
| 114 | + * @param workerName - name of the worker for default metric labels. |
58 | 115 | */ |
59 | | -export function startMetricsPushing(workerName: string): () => void { |
60 | | - const url = process.env.PROMETHEUS_PUSHGATEWAY_URL; |
| 116 | +export function startMetricsServer(workerName: string): () => void { |
| 117 | + const port = getMetricsPort(); |
61 | 118 |
|
62 | | - if (!url) { |
63 | | - return stopMetricsPushing; |
| 119 | + if (!port) { |
| 120 | + return stopMetricsServer; |
64 | 121 | } |
65 | 122 |
|
66 | | - if (pushInterval) { |
67 | | - logger.warn(`[metrics] pushing is already started for worker=${currentWorkerName}, skip duplicate start for worker=${workerName}`); |
| 123 | + if (metricsServer) { |
| 124 | + logger.warn(`[metrics] endpoint is already started for worker=${currentWorkerName}, skip duplicate start for worker=${workerName}`); |
68 | 125 |
|
69 | | - return stopMetricsPushing; |
| 126 | + return stopMetricsServer; |
70 | 127 | } |
71 | 128 |
|
72 | | - const interval = getPushIntervalMs(); |
73 | | - const hostname = os.hostname(); |
74 | | - const id = nanoid(ID_SIZE); |
75 | | - const gateway = new client.Pushgateway(url, undefined, register); |
| 129 | + const host = process.env.PROMETHEUS_METRICS_HOST || DEFAULT_METRICS_HOST; |
| 130 | + const path = getMetricsPath(); |
| 131 | + |
| 132 | + register.setDefaultLabels({ worker: workerName }); |
76 | 133 |
|
| 134 | + const server = http.createServer(async (request, response) => { |
| 135 | + const requestPath = request.url?.split('?')[0]; |
| 136 | + |
| 137 | + if (requestPath === '/-/healthy') { |
| 138 | + response.writeHead(HTTP_OK, { 'Content-Type': 'text/plain' }); |
| 139 | + response.end('ok'); |
| 140 | + |
| 141 | + return; |
| 142 | + } |
| 143 | + |
| 144 | + if (request.method !== 'GET' || requestPath !== path) { |
| 145 | + response.writeHead(HTTP_NOT_FOUND, { 'Content-Type': 'text/plain' }); |
| 146 | + response.end('not found'); |
| 147 | + |
| 148 | + return; |
| 149 | + } |
| 150 | + |
| 151 | + try { |
| 152 | + response.writeHead(HTTP_OK, { 'Content-Type': register.contentType }); |
| 153 | + response.end(await register.metrics()); |
| 154 | + } catch (error) { |
| 155 | + const message = error instanceof Error ? error.message : String(error); |
| 156 | + |
| 157 | + logger.error(`[metrics] failed to render metrics: ${message}`); |
| 158 | + response.writeHead(HTTP_INTERNAL_SERVER_ERROR, { 'Content-Type': 'text/plain' }); |
| 159 | + response.end('metrics error'); |
| 160 | + } |
| 161 | + }); |
| 162 | + |
| 163 | + server.on('error', (error) => { |
| 164 | + logger.error(`[metrics] endpoint error for worker=${workerName}: ${error.message}`); |
| 165 | + |
| 166 | + if (metricsServer === server) { |
| 167 | + metricsServer = null; |
| 168 | + currentWorkerName = ''; |
| 169 | + } |
| 170 | + }); |
| 171 | + |
| 172 | + metricsServer = server; |
77 | 173 | currentWorkerName = workerName; |
78 | 174 |
|
79 | | - logger.info(`Start pushing metrics to ${url} every ${interval}ms (host: ${hostname}, id: ${id}, worker: ${workerName})`); |
80 | | - |
81 | | - pushInterval = setInterval(() => { |
82 | | - gateway.pushAdd({ |
83 | | - jobName: METRICS_JOB_NAME, |
84 | | - groupings: { |
85 | | - worker: workerName, |
86 | | - host: hostname, |
87 | | - id, |
88 | | - }, |
89 | | - }, (err) => { |
90 | | - if (err) { |
91 | | - logger.error(`Metrics push error: ${err.message || err}`); |
92 | | - } |
93 | | - }); |
94 | | - }, interval); |
95 | | - |
96 | | - return stopMetricsPushing; |
| 175 | + server.listen(port, host, () => { |
| 176 | + logger.info(`[metrics] endpoint started for worker=${workerName} at http://${host}:${port}${path}`); |
| 177 | + }); |
| 178 | + |
| 179 | + return stopMetricsServer; |
97 | 180 | } |
0 commit comments