Skip to content

Commit 5092983

Browse files
committed
feat: implement monitoring templates
1 parent cd58fb0 commit 5092983

3 files changed

Lines changed: 346 additions & 1 deletion

File tree

docker-compose.monitoring.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ services:
2424
networks:
2525
- monitoring
2626
ports:
27-
- "127.0.0.1:9200:8080"
27+
- "127.0.0.1:9202:8080"
2828
volumes:
2929
- /:/rootfs:ro
3030
- /var/run:/var/run:ro
@@ -58,6 +58,7 @@ services:
5858
volumes:
5959
- grafana-data:/var/lib/grafana
6060
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
61+
- ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
6162
environment:
6263
GF_SECURITY_ADMIN_USER: '${GRAFANA_ADMIN_USER:-admin}'
6364
GF_SECURITY_ADMIN_PASSWORD: '${GRAFANA_ADMIN_PASSWORD}'
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
{
2+
"uid": "prostaff-containers",
3+
"title": "ProStaff - Containers",
4+
"tags": ["prostaff", "docker"],
5+
"timezone": "browser",
6+
"refresh": "30s",
7+
"time": { "from": "now-3h", "to": "now" },
8+
"schemaVersion": 38,
9+
"panels": [
10+
{
11+
"id": 1, "type": "table", "title": "Container Status",
12+
"gridPos": { "x": 0, "y": 0, "w": 24, "h": 7 },
13+
"options": { "sortBy": [{ "displayName": "CPU %", "desc": true }] },
14+
"fieldConfig": {
15+
"defaults": { "custom": { "align": "auto" } },
16+
"overrides": [
17+
{ "matcher": { "id": "byName", "options": "CPU %" }, "properties": [{ "id": "unit", "value": "percent" }, { "id": "custom.displayMode", "value": "color-background-solid" },
18+
{ "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 50 }, { "color": "red", "value": 80 }] } }
19+
]},
20+
{ "matcher": { "id": "byName", "options": "Memory" }, "properties": [{ "id": "unit", "value": "bytes" }] },
21+
{ "matcher": { "id": "byName", "options": "Mem %" }, "properties": [{ "id": "unit", "value": "percent" }, { "id": "custom.displayMode", "value": "color-background-solid" },
22+
{ "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 60 }, { "color": "red", "value": 85 }] } }
23+
]}
24+
]
25+
},
26+
"targets": [
27+
{
28+
"expr": "sort_desc(sum by (name) (rate(container_cpu_usage_seconds_total{name!=\"\",name!~\".*pause.*\"}[5m])) * 100)",
29+
"legendFormat": "{{name}}", "instant": true, "format": "table", "refId": "CPU"
30+
},
31+
{
32+
"expr": "sort_desc(sum by (name) (container_memory_usage_bytes{name!=\"\",name!~\".*pause.*\"}))",
33+
"legendFormat": "{{name}}", "instant": true, "format": "table", "refId": "MEM"
34+
}
35+
],
36+
"transformations": [
37+
{ "id": "merge", "options": {} },
38+
{ "id": "organize", "options": { "renameByName": { "Value #CPU": "CPU %", "Value #MEM": "Memory", "name": "Container" } } }
39+
],
40+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
41+
},
42+
{
43+
"id": 2, "type": "timeseries", "title": "CPU % — ProStaff Core",
44+
"gridPos": { "x": 0, "y": 7, "w": 12, "h": 7 },
45+
"fieldConfig": { "defaults": { "unit": "percent" } },
46+
"options": { "tooltip": { "mode": "multi" } },
47+
"targets": [
48+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*x8ogsg0s4gws0840w8kksokk-api.*\"}[5m]) * 100", "legendFormat": "api" },
49+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*x8ogsg0s4gws0840w8kksokk-sidekiq.*\"}[5m]) * 100", "legendFormat": "sidekiq" },
50+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*events-ocosg.*\"}[5m]) * 100", "legendFormat": "events" },
51+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*x8ogsg0s4gws0840w8kksokk-redis.*\"}[5m]) * 100", "legendFormat": "redis" },
52+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*x8ogsg0s4gws0840w8kksokk-postgres.*\"}[5m]) * 100", "legendFormat": "postgres" }
53+
],
54+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
55+
},
56+
{
57+
"id": 3, "type": "timeseries", "title": "CPU % — Infraestrutura",
58+
"gridPos": { "x": 12, "y": 7, "w": 12, "h": 7 },
59+
"fieldConfig": { "defaults": { "unit": "percent" } },
60+
"options": { "tooltip": { "mode": "multi" } },
61+
"targets": [
62+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*elasticsearch.*\"}[5m]) * 100", "legendFormat": "elasticsearch" },
63+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*scraper-api.*\"}[5m]) * 100", "legendFormat": "scraper-api" },
64+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*gateway.*\"}[5m]) * 100", "legendFormat": "gateway" },
65+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*ai-service.*\"}[5m]) * 100", "legendFormat": "ml" },
66+
{ "expr": "rate(container_cpu_usage_seconds_total{name=~\".*coolify$\"}[5m]) * 100", "legendFormat": "coolify" }
67+
],
68+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
69+
},
70+
{
71+
"id": 4, "type": "timeseries", "title": "Memoria — ProStaff Core",
72+
"gridPos": { "x": 0, "y": 14, "w": 12, "h": 7 },
73+
"fieldConfig": { "defaults": { "unit": "bytes" } },
74+
"options": { "tooltip": { "mode": "multi" } },
75+
"targets": [
76+
{ "expr": "container_memory_usage_bytes{name=~\".*x8ogsg0s4gws0840w8kksokk-api.*\"}", "legendFormat": "api" },
77+
{ "expr": "container_memory_usage_bytes{name=~\".*x8ogsg0s4gws0840w8kksokk-sidekiq.*\"}", "legendFormat": "sidekiq" },
78+
{ "expr": "container_memory_usage_bytes{name=~\".*events-ocosg.*\"}", "legendFormat": "events" },
79+
{ "expr": "container_memory_usage_bytes{name=~\".*x8ogsg0s4gws0840w8kksokk-postgres.*\"}", "legendFormat": "postgres" },
80+
{ "expr": "container_memory_usage_bytes{name=~\".*x8ogsg0s4gws0840w8kksokk-redis.*\"}", "legendFormat": "redis" }
81+
],
82+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
83+
},
84+
{
85+
"id": 5, "type": "timeseries", "title": "Memoria — Infraestrutura",
86+
"gridPos": { "x": 12, "y": 14, "w": 12, "h": 7 },
87+
"fieldConfig": { "defaults": { "unit": "bytes" } },
88+
"options": { "tooltip": { "mode": "multi" } },
89+
"targets": [
90+
{ "expr": "container_memory_usage_bytes{name=~\".*elasticsearch.*\"}", "legendFormat": "elasticsearch" },
91+
{ "expr": "container_memory_usage_bytes{name=~\".*kibana.*\"}", "legendFormat": "kibana" },
92+
{ "expr": "container_memory_usage_bytes{name=~\".*scraper-api.*\"}", "legendFormat": "scraper-api" },
93+
{ "expr": "container_memory_usage_bytes{name=~\".*ai-service.*\"}", "legendFormat": "ml" },
94+
{ "expr": "container_memory_usage_bytes{name=~\".*coolify$\"}", "legendFormat": "coolify" }
95+
],
96+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
97+
},
98+
{
99+
"id": 6, "type": "timeseries", "title": "Network Receive — por container",
100+
"gridPos": { "x": 0, "y": 21, "w": 12, "h": 7 },
101+
"fieldConfig": { "defaults": { "unit": "Bps" } },
102+
"options": { "tooltip": { "mode": "multi" } },
103+
"targets": [
104+
{ "expr": "rate(container_network_receive_bytes_total{name=~\".*x8ogsg0s4gws0840w8kksokk-api.*\"}[5m])", "legendFormat": "api" },
105+
{ "expr": "rate(container_network_receive_bytes_total{name=~\".*x8ogsg0s4gws0840w8kksokk-sidekiq.*\"}[5m])", "legendFormat": "sidekiq" },
106+
{ "expr": "rate(container_network_receive_bytes_total{name=~\".*events-ocosg.*\"}[5m])", "legendFormat": "events" },
107+
{ "expr": "rate(container_network_receive_bytes_total{name=~\".*scraper-api.*\"}[5m])", "legendFormat": "scraper-api" },
108+
{ "expr": "rate(container_network_receive_bytes_total{name=~\".*elasticsearch.*\"}[5m])", "legendFormat": "elasticsearch" }
109+
],
110+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
111+
},
112+
{
113+
"id": 7, "type": "timeseries", "title": "Network Transmit — por container",
114+
"gridPos": { "x": 12, "y": 21, "w": 12, "h": 7 },
115+
"fieldConfig": { "defaults": { "unit": "Bps" } },
116+
"options": { "tooltip": { "mode": "multi" } },
117+
"targets": [
118+
{ "expr": "rate(container_network_transmit_bytes_total{name=~\".*x8ogsg0s4gws0840w8kksokk-api.*\"}[5m])", "legendFormat": "api" },
119+
{ "expr": "rate(container_network_transmit_bytes_total{name=~\".*x8ogsg0s4gws0840w8kksokk-sidekiq.*\"}[5m])", "legendFormat": "sidekiq" },
120+
{ "expr": "rate(container_network_transmit_bytes_total{name=~\".*events-ocosg.*\"}[5m])", "legendFormat": "events" },
121+
{ "expr": "rate(container_network_transmit_bytes_total{name=~\".*scraper-api.*\"}[5m])", "legendFormat": "scraper-api" },
122+
{ "expr": "rate(container_network_transmit_bytes_total{name=~\".*elasticsearch.*\"}[5m])", "legendFormat": "elasticsearch" }
123+
],
124+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
125+
},
126+
{
127+
"id": 8, "type": "stat", "title": "Containers rodando",
128+
"gridPos": { "x": 0, "y": 28, "w": 6, "h": 3 },
129+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value" },
130+
"fieldConfig": {
131+
"defaults": { "unit": "short",
132+
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
133+
}
134+
},
135+
"targets": [{ "expr": "count(container_last_seen{name!=\"\",name!~\".*pause.*\"})", "legendFormat": "" }],
136+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
137+
},
138+
{
139+
"id": 9, "type": "timeseries", "title": "Container Restarts",
140+
"gridPos": { "x": 6, "y": 28, "w": 18, "h": 5 },
141+
"fieldConfig": { "defaults": { "unit": "short" } },
142+
"options": { "tooltip": { "mode": "multi" } },
143+
"targets": [
144+
{ "expr": "increase(container_start_time_seconds{name!=\"\",name!~\".*pause.*\"}[1h]) > 0", "legendFormat": "{{name}}" }
145+
],
146+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
147+
}
148+
],
149+
"templating": {
150+
"list": [
151+
{
152+
"name": "DS_PROMETHEUS",
153+
"type": "datasource",
154+
"pluginId": "prometheus",
155+
"current": { "text": "Prometheus", "value": "Prometheus" }
156+
}
157+
]
158+
}
159+
}
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
{
2+
"uid": "prostaff-host-overview",
3+
"title": "ProStaff - Host Overview",
4+
"tags": ["prostaff", "host"],
5+
"timezone": "browser",
6+
"refresh": "30s",
7+
"time": { "from": "now-3h", "to": "now" },
8+
"schemaVersion": 38,
9+
"panels": [
10+
{
11+
"id": 1, "type": "stat", "title": "Uptime",
12+
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 3 },
13+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "none", "textMode": "auto" },
14+
"fieldConfig": { "defaults": { "unit": "s" } },
15+
"targets": [{ "expr": "time() - node_boot_time_seconds", "legendFormat": "" }],
16+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
17+
},
18+
{
19+
"id": 2, "type": "stat", "title": "CPU Cores",
20+
"gridPos": { "x": 4, "y": 0, "w": 3, "h": 3 },
21+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "none" },
22+
"fieldConfig": { "defaults": { "unit": "short" } },
23+
"targets": [{ "expr": "count(count by (cpu) (node_cpu_seconds_total))", "legendFormat": "" }],
24+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
25+
},
26+
{
27+
"id": 3, "type": "stat", "title": "Total RAM",
28+
"gridPos": { "x": 7, "y": 0, "w": 4, "h": 3 },
29+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "none" },
30+
"fieldConfig": { "defaults": { "unit": "bytes" } },
31+
"targets": [{ "expr": "node_memory_MemTotal_bytes", "legendFormat": "" }],
32+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
33+
},
34+
{
35+
"id": 4, "type": "stat", "title": "Disk Total (/)",
36+
"gridPos": { "x": 11, "y": 0, "w": 4, "h": 3 },
37+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "none" },
38+
"fieldConfig": { "defaults": { "unit": "bytes" } },
39+
"targets": [{ "expr": "node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}", "legendFormat": "" }],
40+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
41+
},
42+
{
43+
"id": 5, "type": "stat", "title": "TCP Connections",
44+
"gridPos": { "x": 15, "y": 0, "w": 4, "h": 3 },
45+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "none" },
46+
"fieldConfig": { "defaults": { "unit": "short" } },
47+
"targets": [{ "expr": "node_sockstat_TCP_inuse", "legendFormat": "" }],
48+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
49+
},
50+
{
51+
"id": 6, "type": "timeseries", "title": "CPU Usage %",
52+
"gridPos": { "x": 0, "y": 3, "w": 12, "h": 7 },
53+
"fieldConfig": {
54+
"defaults": { "unit": "percent", "min": 0, "max": 100,
55+
"thresholds": { "mode": "absolute", "steps": [
56+
{ "color": "green", "value": null },
57+
{ "color": "yellow", "value": 70 },
58+
{ "color": "red", "value": 90 }
59+
]}
60+
}
61+
},
62+
"options": { "tooltip": { "mode": "multi" } },
63+
"targets": [
64+
{ "expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "CPU Avg %" },
65+
{ "expr": "avg(rate(node_cpu_seconds_total{mode=\"user\"}[5m])) * 100", "legendFormat": "User" },
66+
{ "expr": "avg(rate(node_cpu_seconds_total{mode=\"system\"}[5m])) * 100", "legendFormat": "System" },
67+
{ "expr": "avg(rate(node_cpu_seconds_total{mode=\"iowait\"}[5m])) * 100", "legendFormat": "IOWait" }
68+
],
69+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
70+
},
71+
{
72+
"id": 7, "type": "timeseries", "title": "Load Average",
73+
"gridPos": { "x": 12, "y": 3, "w": 12, "h": 7 },
74+
"fieldConfig": { "defaults": { "unit": "short" } },
75+
"options": { "tooltip": { "mode": "multi" } },
76+
"targets": [
77+
{ "expr": "node_load1", "legendFormat": "1m" },
78+
{ "expr": "node_load5", "legendFormat": "5m" },
79+
{ "expr": "node_load15", "legendFormat": "15m" },
80+
{ "expr": "count(count by (cpu) (node_cpu_seconds_total))", "legendFormat": "CPU cores" }
81+
],
82+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
83+
},
84+
{
85+
"id": 8, "type": "gauge", "title": "Memory Used %",
86+
"gridPos": { "x": 0, "y": 10, "w": 4, "h": 5 },
87+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "minVizWidth": 75 },
88+
"fieldConfig": {
89+
"defaults": { "unit": "percent", "min": 0, "max": 100,
90+
"thresholds": { "mode": "absolute", "steps": [
91+
{ "color": "green", "value": null },
92+
{ "color": "yellow", "value": 75 },
93+
{ "color": "red", "value": 90 }
94+
]}
95+
}
96+
},
97+
"targets": [{ "expr": "(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100", "legendFormat": "" }],
98+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
99+
},
100+
{
101+
"id": 9, "type": "timeseries", "title": "Memory Breakdown",
102+
"gridPos": { "x": 4, "y": 10, "w": 20, "h": 5 },
103+
"fieldConfig": { "defaults": { "unit": "bytes" } },
104+
"options": { "tooltip": { "mode": "multi" } },
105+
"targets": [
106+
{ "expr": "node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes", "legendFormat": "Used" },
107+
{ "expr": "node_memory_Buffers_bytes + node_memory_Cached_bytes", "legendFormat": "Buffers+Cache" },
108+
{ "expr": "node_memory_MemAvailable_bytes", "legendFormat": "Available" },
109+
{ "expr": "node_memory_SwapTotal_bytes - node_memory_SwapFree_bytes", "legendFormat": "Swap Used" }
110+
],
111+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
112+
},
113+
{
114+
"id": 10, "type": "gauge", "title": "Disk Used % (/)",
115+
"gridPos": { "x": 0, "y": 15, "w": 4, "h": 5 },
116+
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } },
117+
"fieldConfig": {
118+
"defaults": { "unit": "percent", "min": 0, "max": 100,
119+
"thresholds": { "mode": "absolute", "steps": [
120+
{ "color": "green", "value": null },
121+
{ "color": "yellow", "value": 70 },
122+
{ "color": "red", "value": 85 }
123+
]}
124+
}
125+
},
126+
"targets": [{ "expr": "(1 - node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}) * 100", "legendFormat": "" }],
127+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
128+
},
129+
{
130+
"id": 11, "type": "timeseries", "title": "Disk I/O",
131+
"gridPos": { "x": 4, "y": 15, "w": 10, "h": 5 },
132+
"fieldConfig": { "defaults": { "unit": "Bps" } },
133+
"options": { "tooltip": { "mode": "multi" } },
134+
"targets": [
135+
{ "expr": "rate(node_disk_read_bytes_total{device=~\"sd.*|vd.*|nvme.*\"}[5m])", "legendFormat": "Read {{device}}" },
136+
{ "expr": "rate(node_disk_written_bytes_total{device=~\"sd.*|vd.*|nvme.*\"}[5m])", "legendFormat": "Write {{device}}" }
137+
],
138+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
139+
},
140+
{
141+
"id": 12, "type": "timeseries", "title": "Network I/O (eth0)",
142+
"gridPos": { "x": 14, "y": 15, "w": 10, "h": 5 },
143+
"fieldConfig": { "defaults": { "unit": "Bps" } },
144+
"options": { "tooltip": { "mode": "multi" } },
145+
"targets": [
146+
{ "expr": "rate(node_network_receive_bytes_total{device=\"eth0\"}[5m])", "legendFormat": "In" },
147+
{ "expr": "rate(node_network_transmit_bytes_total{device=\"eth0\"}[5m])", "legendFormat": "Out" }
148+
],
149+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
150+
},
151+
{
152+
"id": 13, "type": "timeseries", "title": "IO Pressure (stall time)",
153+
"gridPos": { "x": 0, "y": 20, "w": 12, "h": 5 },
154+
"fieldConfig": { "defaults": { "unit": "percentunit" } },
155+
"options": { "tooltip": { "mode": "multi" } },
156+
"targets": [
157+
{ "expr": "rate(node_pressure_cpu_waiting_seconds_total[5m])", "legendFormat": "CPU pressure" },
158+
{ "expr": "rate(node_pressure_io_stalled_seconds_total[5m])", "legendFormat": "IO stalled" },
159+
{ "expr": "rate(node_pressure_memory_stalled_seconds_total[5m])", "legendFormat": "Memory stalled" }
160+
],
161+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
162+
},
163+
{
164+
"id": 14, "type": "timeseries", "title": "Open File Descriptors",
165+
"gridPos": { "x": 12, "y": 20, "w": 12, "h": 5 },
166+
"fieldConfig": { "defaults": { "unit": "short" } },
167+
"options": { "tooltip": { "mode": "multi" } },
168+
"targets": [
169+
{ "expr": "node_filefd_allocated", "legendFormat": "Allocated" },
170+
{ "expr": "node_filefd_maximum", "legendFormat": "Max" }
171+
],
172+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }
173+
}
174+
],
175+
"templating": {
176+
"list": [
177+
{
178+
"name": "DS_PROMETHEUS",
179+
"type": "datasource",
180+
"pluginId": "prometheus",
181+
"current": { "text": "Prometheus", "value": "Prometheus" }
182+
}
183+
]
184+
}
185+
}

0 commit comments

Comments
 (0)