Skip to content

Commit b5d57e2

Browse files
feat: poll testnet status and open infra recovery issues (#742)
* ansible: codify dashmon status monitoring access * feat: poll testnet status and open infra recovery issues * fix: shorten dashmon authorized key ansible line --------- Co-authored-by: dashinfraclaw <dashinfraclaw@users.noreply.github.com>
1 parent 555e290 commit b5d57e2

14 files changed

Lines changed: 707 additions & 5 deletions

File tree

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: Poll Testnet Status
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: '0 */2 * * *'
7+
8+
permissions:
9+
contents: read
10+
issues: write
11+
12+
jobs:
13+
poll-testnet-status:
14+
name: Poll testnet status and open recovery issues
15+
runs-on: ubuntu-22.04
16+
timeout-minutes: 10
17+
concurrency:
18+
group: poll-testnet-status
19+
cancel-in-progress: false
20+
21+
env:
22+
GH_TOKEN: ${{ secrets.INFRA_ISSUES_TOKEN }}
23+
TESTNET_RECOVERY_ASSIGNEE: dashinfraclaw
24+
TESTNET_RECOVERY_ISSUE_REPOSITORY: dashpay/infra
25+
26+
steps:
27+
- name: Check out repo
28+
uses: actions/checkout@v4
29+
30+
- name: Set up Node.js
31+
uses: actions/setup-node@v4
32+
with:
33+
node-version: '20'
34+
35+
- name: Install dependencies
36+
run: npm ci
37+
38+
- name: Validate cross-repo issue token
39+
run: |
40+
if [[ -z "${GH_TOKEN}" ]]; then
41+
echo "INFRA_ISSUES_TOKEN secret is required to create issues in dashpay/infra"
42+
exit 1
43+
fi
44+
45+
- name: Poll status API and open recovery issues
46+
run: node bin/poll-testnet-status.js

ansible/roles/status_dashboard/defaults/main.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@
33
status_dashboard_image: dashpay/status:latest
44
status_dashboard_port: 3010
55
status_dashboard_path: "{{ dashd_home }}/status_dashboard"
6+
status_dashboard_ssh_private_key_path: "{{ lookup('env', 'STATUS_DASHBOARD_SSH_KEY_PATH') | default('~/.ssh/dashmon-testnet', true) }}"
7+
status_dashboard_ssh_user: dashmon
68
status_dashboard_poll_interval: 10000
79
status_dashboard_poll_concurrency: 20

ansible/roles/status_dashboard/tasks/main.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
dest: "{{ status_dashboard_path }}/inventory"
1313
mode: "0644"
1414

15-
- name: Copy SSH deploy key for status dashboard
15+
- name: Copy SSH monitoring key for status dashboard
1616
ansible.builtin.copy:
17-
src: "{{ lookup('env', 'PRIVATE_KEY_PATH') | default('~/.ssh/evo-app-deploy.rsa', true) }}"
17+
src: "{{ status_dashboard_ssh_private_key_path }}"
1818
dest: "{{ status_dashboard_path }}/ssh_key"
1919
mode: "0600"
2020
owner: root

ansible/roles/status_dashboard/templates/docker-compose.yml.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ services:
1010
environment:
1111
- INVENTORY_PATH=/app/data/inventory
1212
- SSH_KEY_PATH=/app/data/ssh_key
13-
- SSH_USER=ubuntu
13+
- SSH_USER={{ status_dashboard_ssh_user }}
1414
- SSH_COMMAND=/usr/local/bin/dashmon-check
1515
- SSH_PORT=22
1616
- POLL_INTERVAL_MS={{ status_dashboard_poll_interval }}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
3+
status_monitoring_user: dashmon
4+
status_monitoring_home: "/home/{{ status_monitoring_user }}"
5+
status_monitoring_forced_command: /usr/local/bin/dashmon-check

ansible/roles/status_monitoring/files/dashmon-check.sh

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,29 @@ set -euo pipefail
77

88
if [[ -f /home/dashmate/.dashmate/config.json ]]; then
99
# HP masternode: dashmate status as the dashmate user
10-
sudo -u dashmate dashmate status 2>&1 || true
10+
sudo -u dashmate dashmate status 2>&1
11+
echo "===TENDERDASH==="
12+
# Query Tenderdash RPC for proposer info (localhost only, no sudo needed)
13+
python3 -c '
14+
import json, urllib.request
15+
try:
16+
def fetch(path):
17+
return json.loads(urllib.request.urlopen(
18+
"http://127.0.0.1:36657" + path, timeout=5
19+
).read())
20+
validators = fetch("/validators?per_page=100")
21+
sorted_ptx = sorted(v["pro_tx_hash"] for v in validators["validators"])
22+
block = fetch("/block")
23+
header = block["block"]["header"]
24+
cur_prop = header["proposer_pro_tx_hash"]
25+
height = int(header["height"])
26+
idx = sorted_ptx.index(cur_prop)
27+
next_prop = sorted_ptx[(idx + 1) % len(sorted_ptx)]
28+
print(json.dumps({"currentProposer": cur_prop,
29+
"nextProposer": next_prop, "platformHeight": height}))
30+
except Exception as e:
31+
print(json.dumps({"error": str(e)}))
32+
' 2>/dev/null || echo '{"error":"tenderdash-unavailable"}'
1133
echo "===SYSMETRICS==="
1234
else
1335
# Regular masternode: dash-cli as the ubuntu user
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# /etc/sudoers.d/dashmon
2+
# Allow dashmon user to run read-only monitoring commands only.
3+
# Both rule sets present on all nodes; unused rules are harmless.
4+
5+
# HP masternodes: dashmate status as dashmate user
6+
dashmon ALL=(dashmate) NOPASSWD: /usr/bin/dashmate status
7+
8+
# Regular masternodes: dash-cli commands as ubuntu user
9+
dashmon ALL=(ubuntu) NOPASSWD: /usr/local/bin/dash-cli getblockchaininfo
10+
dashmon ALL=(ubuntu) NOPASSWD: /usr/local/bin/dash-cli masternode status
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOZRnc5hqc+WjCLt9PHiVVfFPfkWSlWNscOwSZrUnRAu dashmon-readonly@testnet-dashboard
Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,50 @@
11
---
22

3+
- name: Create dashmon monitoring user
4+
ansible.builtin.user:
5+
name: "{{ status_monitoring_user }}"
6+
shell: /bin/bash
7+
password: "!"
8+
create_home: true
9+
10+
- name: Create dashmon SSH directory
11+
ansible.builtin.file:
12+
path: "{{ status_monitoring_home }}/.ssh"
13+
state: directory
14+
owner: "{{ status_monitoring_user }}"
15+
group: "{{ status_monitoring_user }}"
16+
mode: "0700"
17+
18+
- name: Install dashmon authorized key with forced command
19+
ansible.builtin.copy:
20+
dest: "{{ status_monitoring_home }}/.ssh/authorized_keys"
21+
content: >-
22+
{{ status_monitoring_authorized_key_options | join(',') }}
23+
{{ lookup('file', role_path + '/files/dashmon-testnet.pub') | trim }}
24+
owner: "{{ status_monitoring_user }}"
25+
group: "{{ status_monitoring_user }}"
26+
mode: "0600"
27+
vars:
28+
status_monitoring_authorized_key_options:
29+
- 'command="{{ status_monitoring_forced_command }}"'
30+
- no-port-forwarding
31+
- no-X11-forwarding
32+
- no-agent-forwarding
33+
- no-pty
34+
335
- name: Copy dashmon-check monitoring script
436
ansible.builtin.copy:
537
src: dashmon-check.sh
6-
dest: /usr/local/bin/dashmon-check
38+
dest: "{{ status_monitoring_forced_command }}"
739
mode: "0755"
840
owner: root
941
group: root
42+
43+
- name: Install dashmon sudoers rules
44+
ansible.builtin.copy:
45+
src: dashmon-sudoers
46+
dest: /etc/sudoers.d/dashmon
47+
mode: "0440"
48+
owner: root
49+
group: root
50+
validate: /usr/sbin/visudo -cf %s

bin/poll-testnet-status.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/* eslint-disable no-console */
2+
3+
const {
4+
pollTestnetStatus,
5+
} = require('../lib/testnetStatus/pollTestnetStatus');
6+
7+
async function main() {
8+
const result = await pollTestnetStatus();
9+
10+
console.log(`Checked ${result.expectedNodeCount} expected testnet masternodes.`);
11+
console.log(`Detected ${result.incidentCount} active incidents.`);
12+
13+
for (const incident of result.skippedIncidents) {
14+
console.log(`Skipped existing issue for ${incident.nodeName} (${incident.observedState}).`);
15+
}
16+
17+
for (const createdIssue of result.createdIssues) {
18+
if (createdIssue.dryRun) {
19+
console.log(`Would create issue for ${createdIssue.nodeName} (${createdIssue.observedState}).`);
20+
} else {
21+
console.log(
22+
`Created recovery issue for ${createdIssue.nodeName} `
23+
+ `(${createdIssue.observedState}): ${createdIssue.issueUrl}`,
24+
);
25+
}
26+
}
27+
}
28+
29+
main().catch((error) => {
30+
console.error(error.message);
31+
process.exit(1);
32+
});

0 commit comments

Comments
 (0)