Skip to content

Commit 584ad7d

Browse files
committed
feat: add uptime monitors with datadog synthetics
1 parent fe2896e commit 584ad7d

3 files changed

Lines changed: 80 additions & 0 deletions

File tree

pillar/dev/secrets/datadog.sls

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
datadog_api_key: deadbeefdeadbeefdeadbeefdeadbeefdeadbeef
2+
datadog_app_key: deadbeefdeadbeefdeadbeefdeadbeefdeadbeef

salt/datadog/synthetics.sls

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
{% set minion_id = grains['id'] %}
2+
{% set fqdn = grains['fqdn'] %}
3+
{% set api_key = pillar.get('datadog_api_key') %}
4+
{% set app_key = pillar.get('datadog_app_key') %}
5+
{% set datadog_locations = salt['http.query']('https://api.datadoghq.com/api/v1/synthetics/locations', header_list=['DD-API-KEY: ' + api_key, 'DD-APPLICATION-KEY: ' + app_key], decode=True) %}
6+
{% set existing_monitors = salt['http.query']('https://api.datadoghq.com/api/v1/synthetics/tests', header_list=['DD-API-KEY: ' + api_key, 'DD-APPLICATION-KEY: ' + app_key], decode=True) %}
7+
{% set monitor_name = minion_id + ' HTTP Health' %}
8+
{% set monitor_exists = existing_monitors.get('dict', {}).get('tests', []) | selectattr('name', 'equalto', monitor_name) | list | length > 0 %}
9+
10+
#notable this fails to capture multi-host minions (bugs has bugs.python, bugs.jython, bugs.roundup and
11+
# codespeed has speed.python and speed.pypy)
12+
{% set web_roles = ['loadbalancer', 'docs', 'downloads', 'hg', 'moin', 'planet', 'bugs', 'buildbot', 'codespeed', 'pythontest'] %}
13+
{% set matched_roles = [] %}
14+
{% for role in web_roles %}
15+
{% if salt["match.compound"](pillar["roles"][role]["pattern"]) %}
16+
{% set _ = matched_roles.append(role) %}
17+
{% endif %}
18+
{% endfor %}
19+
{% set is_web_minion = matched_roles|length > 0 %}
20+
{% set is_loadbalancer = 'loadbalancer' in matched_roles %}
21+
22+
# hit the haproxy status page for loadbalancers or the root for other web minions
23+
# web minions also haev _haproxy_status endpoint but im not sure if that needs to be checked?
24+
{% set health_url = "https://" + fqdn + ("/_haproxy_status" if is_loadbalancer else "/") %}
25+
26+
{% if is_web_minion and api_key and app_key and not monitor_exists %}
27+
create-synthetics-monitor-{{ minion_id }}:
28+
http.query:
29+
- name: https://api.datadoghq.com/api/v1/synthetics/tests
30+
- method: POST
31+
- header_list:
32+
- "DD-API-KEY: {{ api_key }}"
33+
- "DD-APPLICATION-KEY: {{ app_key }}"
34+
- "Content-Type: application/json"
35+
- data: |
36+
{
37+
"name": "{{ minion_id }} HTTP Health",
38+
"type": "api",
39+
"subtype": "http",
40+
"config": {
41+
"request": {
42+
"url": "{{ health_url }}",
43+
"method": "GET",
44+
"timeout": 30
45+
},
46+
"assertions": [
47+
{
48+
"type": "statusCode",
49+
"operator": "is",
50+
"target": 200
51+
},
52+
{
53+
"type": "responseTime",
54+
"operator": "lessThan",
55+
"target": 2000
56+
}
57+
]
58+
},
59+
"locations": {{ datadog_locations.get('dict', {}).get('locations', []) | map(attribute='id') | list | tojson }},
60+
"options": {
61+
"tick_every": 60,
62+
"min_failure_duration": 180,
63+
"min_location_failed": 5,
64+
"retry": {
65+
"count": 1,
66+
"interval": 300
67+
}
68+
},
69+
"message": "{{ minion_id }} is down in 5 or more locations! @pagerduty-Datadog",
70+
"tags": [
71+
"minion_id:{{ minion_id }}",
72+
"auto_created:salt.synthetics.sls"
73+
]
74+
}
75+
- status: 200
76+
{% endif %}

salt/top.sls

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ base:
1717
- tls
1818
- rsyslog
1919
- datadog
20+
- datadog.synthetics
21+
- secrets.datadog
2022
- base.motd
2123
- base.swap
2224

0 commit comments

Comments
 (0)