Skip to content

Commit 2ff63a0

Browse files
committed
v2.1.17
check for traceroute health even for nodes not running the software
1 parent 56b3d99 commit 2ff63a0

4 files changed

Lines changed: 78 additions & 46 deletions

File tree

meshtastic_listener/__main__.py

Lines changed: 68 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
MessageReceived, NodeBase, WaypointPayload,
1616
DevicePayload, TransmissionPayload, EnvironmentPayload,
1717
NodeHealthCheck, InsufficientDataError,
18-
AdvertiseInstancePayload
18+
AdvertiseInstancePayload, AlertSettings
1919
)
2020
from meshtastic_listener.utils import coords_int_to_float, load_node_env_var, system_stats
2121

@@ -273,7 +273,7 @@ def __traceroute_upstream__(self) -> None:
273273
This function is designed to run in a thread in a loop.
274274
'''
275275

276-
favorites = self.db.select_favorite_nodes()
276+
favorites = self.db.get_favorite_nodes()
277277
if len(favorites) > 0:
278278
logging.info(f'Favorite nodes set to: {[self.__sanitize_string__(str(f.longName)) for f in favorites]}')
279279
else:
@@ -328,6 +328,62 @@ def __advertise_instance__(self) -> None:
328328
self.__check_listener_instances__()
329329
self.__sleep_with_exit__(60)
330330

331+
def __check_traceroute_responses__(self, alert_settings: AlertSettings, lookback_ts: int) -> None:
332+
for node in self.db.get_favorite_nodes():
333+
traceroute_results = self.db.get_traceroute_results_by_node(
334+
source_id=self.local_node_id,
335+
target_id=node.nodeNum,
336+
lookback_ts=lookback_ts
337+
)
338+
total = len(traceroute_results)
339+
if total == 0:
340+
continue
341+
342+
successful = len([t for t in traceroute_results if t.rxTime is not None])
343+
rate = (successful / total * 100)
344+
345+
if total >= 3 and rate <= alert_settings.tracerouteFailureThreshold:
346+
alert_msg = f'Low Traceroute Success Rate to favorite node {node.nodeNum} ({self.__sanitize_string__(str(node.longName))}): {rate:.2f}% over last {total} attempts.'
347+
logging.warning(alert_msg)
348+
self.__notify_admins__(alert_msg, priority=True)
349+
350+
def __create_node_health_alert__(self, alert_settings: AlertSettings, health_check_stats: NodeHealthCheck) -> str:
351+
alert_context = ''
352+
353+
### CHANNEL UTILIZATION ###
354+
if health_check_stats.channelUsage >= alert_settings.channelUsageThreshold:
355+
alert_context += f'High Channel Usage: {health_check_stats.channelUsage}%\n'
356+
357+
### TRACEROUTE SUCCESS RATE ###
358+
trace_avg = health_check_stats.tracerouteStatistics.average()
359+
if trace_avg <= alert_settings.tracerouteFailureThreshold and health_check_stats.tracerouteStatistics.total >= 30:
360+
# 30 for minimum statistical significance
361+
alert_context += f'Low TR Success Rate: {trace_avg}%\n'
362+
363+
### TEMPERATURE ###
364+
if health_check_stats.environmentMetrics.temperature is not None:
365+
# https://helium.nebra.com/datasheets/hotspots/outdoor/Nebra%20Outdoor%20Hotspot%20Datasheet.pdf
366+
# the rated ambient operating temperature for the Nebra Outdoor Miner is -20C to 80C
367+
# give a buffer of +-20C for high and low temp warnings
368+
if health_check_stats.environmentMetrics.temperature >= alert_settings.highTemperatureThreshold:
369+
alert_context += f'High Temperature: {health_check_stats.environmentMetrics.temperature}°C\n'
370+
elif health_check_stats.environmentMetrics.temperature <= alert_settings.lowTemperatureThreshold:
371+
alert_context += f'Low Temperature: {health_check_stats.environmentMetrics.temperature}°C\n'
372+
373+
### HUMIDITY ###
374+
if health_check_stats.environmentMetrics.relativeHumidity is not None:
375+
if health_check_stats.environmentMetrics.relativeHumidity >= alert_settings.highHumidityThreshold:
376+
alert_context += f'High Humidity: {health_check_stats.environmentMetrics.relativeHumidity}%\n'
377+
378+
### SYSTEM STATS ###
379+
if health_check_stats.systemResources.cpuUsagePercent >= alert_settings.cpuUsageThreshold:
380+
alert_context += f'High CPU Usage: {health_check_stats.systemResources.cpuUsagePercent}%\n'
381+
if health_check_stats.systemResources.memoryUsagePercent >= alert_settings.memoryUsageThreshold:
382+
alert_context += f'High Memory Usage: {health_check_stats.systemResources.memoryUsagePercent}%\n'
383+
384+
return alert_context
385+
386+
331387
def __check_node_health__(self) -> None:
332388
'''
333389
Using the software host node ID, pull the last n hours of metrics and see what general trends are.
@@ -341,11 +397,14 @@ def __check_node_health__(self) -> None:
341397
while not self.shutdown_flag.is_set():
342398
try:
343399
settings = self.db.get_alert_settings()
344-
logging.debug(f'Fetched alert settings from DB: {settings.model_dump()}')
345-
346400
now = time.time()
347401
lookback_ts = int(now - timedelta(hours=lookback_hours).total_seconds())
348402

403+
self.__check_traceroute_responses__(
404+
alert_settings=settings,
405+
lookback_ts=lookback_ts
406+
)
407+
349408
health_check_stats = NodeHealthCheck(
350409
nodeNum=self.local_node_id,
351410
startTs=lookback_ts,
@@ -354,7 +413,7 @@ def __check_node_health__(self) -> None:
354413
node_num=self.local_node_id,
355414
lookback_ts=lookback_ts
356415
),
357-
tracerouteStatistics=self.db.return_traceroute_success_rate(
416+
tracerouteStatistics=self.db.get_traceroute_success_rate(
358417
from_id=self.local_node_id,
359418
lookback_ts=lookback_ts
360419
),
@@ -365,39 +424,10 @@ def __check_node_health__(self) -> None:
365424
systemResources=system_stats()
366425
)
367426

368-
alert_context = ''
369-
370-
### CHANNEL UTILIZATION ###
371-
if health_check_stats.channelUsage >= settings.channelUsageThreshold:
372-
alert_context += f'High Channel Usage: {health_check_stats.channelUsage}%\n'
373-
374-
### TRACEROUTE SUCCESS RATE ###
375-
trace_avg = health_check_stats.tracerouteStatistics.average()
376-
if trace_avg <= settings.tracerouteFailureThreshold and health_check_stats.tracerouteStatistics.total >= 30:
377-
# 30 for minimum statistical significance
378-
alert_context += f'Low TR Success Rate: {trace_avg}%\n'
379-
380-
### TEMPERATURE ###
381-
if health_check_stats.environmentMetrics.temperature is not None:
382-
# https://helium.nebra.com/datasheets/hotspots/outdoor/Nebra%20Outdoor%20Hotspot%20Datasheet.pdf
383-
# the rated ambient operating temperature for the Nebra Outdoor Miner is -20C to 80C
384-
# give a buffer of +-20C for high and low temp warnings
385-
if health_check_stats.environmentMetrics.temperature >= settings.highTemperatureThreshold:
386-
alert_context += f'High Temperature: {health_check_stats.environmentMetrics.temperature}°C\n'
387-
elif health_check_stats.environmentMetrics.temperature <= settings.lowTemperatureThreshold:
388-
alert_context += f'Low Temperature: {health_check_stats.environmentMetrics.temperature}°C\n'
389-
390-
### HUMIDITY ###
391-
if health_check_stats.environmentMetrics.relativeHumidity is not None:
392-
if health_check_stats.environmentMetrics.relativeHumidity >= settings.highHumidityThreshold:
393-
alert_context += f'High Humidity: {health_check_stats.environmentMetrics.relativeHumidity}%\n'
394-
395-
### SYSTEM STATS ###
396-
if health_check_stats.systemResources.cpuUsagePercent >= settings.cpuUsageThreshold:
397-
alert_context += f'High CPU Usage: {health_check_stats.systemResources.cpuUsagePercent}%\n'
398-
if health_check_stats.systemResources.memoryUsagePercent >= settings.memoryUsageThreshold:
399-
alert_context += f'High Memory Usage: {health_check_stats.systemResources.memoryUsagePercent}%\n'
400-
427+
alert_context = self.__create_node_health_alert__(
428+
alert_settings=settings,
429+
health_check_stats=health_check_stats
430+
)
401431
if alert_context != '':
402432
self.__notify_admins__(f'Node: {self.interface.getLongName()}\n{alert_context.strip()}', priority=True)
403433

meshtastic_listener/listener_db/listener_db.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -533,17 +533,19 @@ def insert_traceroute_attempt(self, source_node: int, id: int, toId: int) -> Non
533533
)
534534
session.commit()
535535

536-
def retrieve_traceroute_results(self) -> list[Traceroute]:
536+
def get_traceroute_results_by_node(self, source_id: int, target_id: int, lookback_ts: int = 0) -> list[Traceroute]:
537537
with self.session() as session:
538538
return session.query(
539539
Traceroute
540540
).filter(
541-
Traceroute.tracerouteDetails.isnot(None)
541+
Traceroute.txTime >= lookback_ts,
542+
Traceroute.fromId == source_id,
543+
Traceroute.toId == target_id
542544
).order_by(
543545
Traceroute.rxTime.desc()
544546
).all()
545547

546-
def return_traceroute_success_rate(self, from_id: int, lookback_ts: int = 0) -> TracerouteStatistics:
548+
def get_traceroute_success_rate(self, from_id: int, lookback_ts: int = 0) -> TracerouteStatistics:
547549
'''
548550
Given all traceroutes sent by this node, return the percentage of responses
549551
'''
@@ -572,15 +574,15 @@ def return_traceroute_success_rate(self, from_id: int, lookback_ts: int = 0) ->
572574
avgTraceDuration=round(avg, 2) if durations else 0.0
573575
)
574576

575-
def select_favorite_nodes(self) -> list[Node]:
577+
def get_favorite_nodes(self) -> list[Node]:
576578
'''
577-
Returns all nodes marked as favorite nodes
579+
Returns all nodes marked as favorite nodes or listener nodes
578580
'''
579581
with self.session() as session:
580582
return session.query(
581583
Node
582584
).filter(
583-
Node.isFavorite == True
585+
(Node.isFavorite == True) | (Node.isHost == True)
584586
).order_by(
585587
Node.lastHeard.desc()
586588
).all()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "meshtatic_listener"
3-
version = "2.1.16"
3+
version = "2.1.17"
44
description = "A server for connecting to a Meshtastic device and responding to commands."
55
authors = [{ name = "Michael Gillett", email = "51103663+migillett@users.noreply.github.com" }]
66
requires-python = ">=3.10,<3.14"

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)