-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommands.sh
More file actions
227 lines (194 loc) · 11.2 KB
/
commands.sh
File metadata and controls
227 lines (194 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/bin/bash
# Commands executed during the lab
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.1: Install InfluxDB
# ========================================================================
sudo apt update
sudo apt install -y wget curl bc ca-certificates
wget https://dl.influxdata.com/influxdb/releases/influxdb2-2.7.4-amd64.deb
sudo apt install -y ./influxdb2-2.7.4-amd64.deb
curl -O https://dl.influxdata.com/influxdb/releases/influxdb2-client-2.7.5-linux-amd64.tar.gz
tar xvf influxdb2-client-2.7.5-linux-amd64.tar.gz
sudo install -m 0755 influx /usr/local/bin/influx
sudo systemctl start influxdb
sudo systemctl enable influxdb
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.2: Initialize InfluxDB
# ========================================================================
export INFLUX_TOKEN="energy-operator-token-1234567890"
influx setup \
--username admin \
--password adminpassword123 \
--token "$INFLUX_TOKEN" \
--org energyorg \
--bucket energy_metrics \
--retention 168h \
--force
echo 'export INFLUX_TOKEN="energy-operator-token-1234567890"' >> ~/.bashrc
source ~/.bashrc
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.3: Install Telegraf
# ========================================================================
wget https://dl.influxdata.com/telegraf/releases/telegraf_1.29.1-1_amd64.deb
sudo apt install -y ./telegraf_1.29.1-1_amd64.deb
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.4: Configure Telegraf for Energy Metrics Collection
# ========================================================================
sudo nano /etc/telegraf/telegraf.conf
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.5: Create Simulated Energy Data Source
# ========================================================================
sudo nano /usr/local/bin/energy_simulator.sh
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.5: Create Simulated Energy Data Source / Make it executable and run in background
# ========================================================================
sudo chmod +x /usr/local/bin/energy_simulator.sh
sudo touch /var/log/energy_data.log
sudo chmod 644 /var/log/energy_data.log
nohup sudo /usr/local/bin/energy_simulator.sh > /dev/null 2>&1 &
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.6: Start Telegraf
# ========================================================================
sudo systemctl start telegraf
sudo systemctl enable telegraf
sudo systemctl status telegraf --no-pager
# ========================================================================
# Task 1: Install and Configure the Monitoring Stack / Step 1.7: Verify Data Ingestion
# ========================================================================
influx query 'from(bucket: "energy_metrics")
|> range(start: -5m)
|> filter(fn: (r) => r._measurement == "energy_consumption")
|> limit(n: 10)' \
--org energyorg \
--token "$INFLUX_TOKEN"
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.1: Install Kapacitor
# ========================================================================
wget https://dl.influxdata.com/kapacitor/releases/kapacitor_1.7.0_amd64.deb
sudo apt install -y ./kapacitor_1.7.0_amd64.deb
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.2: Configure Kapacitor
# ========================================================================
sudo mkdir -p /var/log/kapacitor
sudo chown kapacitor:kapacitor /var/log/kapacitor
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.2: Configure Kapacitor
# ========================================================================
sudo nano /etc/kapacitor/kapacitor.conf
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.2: Configure Kapacitor / Start Kapacitor
# ========================================================================
sudo systemctl start kapacitor
sudo systemctl enable kapacitor
sudo systemctl status kapacitor --no-pager
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.3: Create Threshold Detection Script
# ========================================================================
nano ~/energy_threshold_alert.tick
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.4: Define and Enable the Alert Task
# ========================================================================
BUCKET_ID=$(influx bucket list --name energy_metrics --hide-headers | awk 'NR==1 {print $1}')
echo "$BUCKET_ID"
influx v1 dbrp create \
--db energy_metrics \
--rp autogen \
--bucket-id "$BUCKET_ID" \
--default
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.4: Define and Enable the Alert Task
# ========================================================================
kapacitor define energy_alert \
-tick ~/energy_threshold_alert.tick \
-type batch \
-dbrp energy_metrics.autogen
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.4: Define and Enable the Alert Task
# ========================================================================
kapacitor enable energy_alert
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.4: Define and Enable the Alert Task
# ========================================================================
kapacitor show energy_alert
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.5: Create Alert Log Directory
# ========================================================================
sudo mkdir -p /var/log/kapacitor
sudo chown kapacitor:kapacitor /var/log/kapacitor
sudo systemctl restart kapacitor
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.6: Trigger Test Alerts
# ========================================================================
for i in {1..10}; do
TIMESTAMP=$(date +%s%N)
echo "energy_consumption,location=datacenter,rack=A1 power_watts=135i,voltage=120i,current=1.125 ${TIMESTAMP}" | sudo tee -a /var/log/energy_data.log > /dev/null
sleep 2
done
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.7: Monitor Alerts
# ========================================================================
sudo tail -f /var/log/kapacitor/energy_alerts.log
# ========================================================================
# Task 2: Configure Threshold Detection and Alerting / Step 2.7: Monitor Alerts
# ========================================================================
kapacitor stats general
kapacitor list tasks
# ========================================================================
# Verification / Verify Data Ingestion
# ========================================================================
sudo journalctl -u telegraf -n 50 --no-pager
# ========================================================================
# Verification / Verify Data Ingestion
# ========================================================================
influx query 'from(bucket: "energy_metrics")
|> range(start: -10m)
|> filter(fn: (r) => r._measurement == "energy_consumption")
|> group(columns: ["location"])
|> count()' \
--org energyorg \
--token "$INFLUX_TOKEN"
# ========================================================================
# Verification / Verify Threshold Detection
# ========================================================================
grep -Ei "crit|warn" /var/log/kapacitor/energy_alerts.log
# ========================================================================
# Verification / Verify Threshold Detection
# ========================================================================
kapacitor show energy_alert | grep -A 5 "Status"
# ========================================================================
# Verification / Complete System Check
# ========================================================================
nano ~/verify_lab.sh
# ========================================================================
# Verification / Complete System Check / Make it executable and run it
# ========================================================================
chmod +x ~/verify_lab.sh
~/verify_lab.sh
# ========================================================================
# Troubleshooting Tips / Issue: Telegraf not collecting data
# ========================================================================
telegraf --config /etc/telegraf/telegraf.conf --test
cpu,cpu=cpu-total,host=ip-172-31-10-187 usage_idle=96.2015503875969,usage_user=1.937984496124031,usage_system=1.8604651162790697 1744466268000000000
mem,host=ip-172-31-10-187 used_percent=23.948230127170916,total=4028628992i,available=3064027136i,used=964601856i 1744466268000000000
energy_consumption,host=ip-172-31-10-187,location=datacenter,rack=A1 current=1.125,power_watts=135i,voltage=120i 1744466268000000000
# ========================================================================
# Troubleshooting Tips / Issue: No alerts generated
# ========================================================================
sudo journalctl -u kapacitor -n 100 --no-pager
# ========================================================================
# Troubleshooting Tips / Issue: No alerts generated
# ========================================================================
kapacitor show energy_alert
# ========================================================================
# Troubleshooting Tips / Issue: InfluxDB connection errors
# ========================================================================
sudo systemctl status influxdb --no-pager
# ========================================================================
# Troubleshooting Tips / Issue: InfluxDB connection errors
# ========================================================================
sudo ss -tlnp | grep 8086
# ========================================================================
# Troubleshooting Tips / Issue: InfluxDB connection errors
# ========================================================================
echo "Check token permissions in InfluxDB UI:"
echo "http://localhost:8086"