Skip to content

Commit d669cbf

Browse files
committed
feat(systemd-units-failed): show failed unit names in first output line (#967)
1 parent 0b5d0c4 commit d669cbf

File tree

4 files changed

+37
-31
lines changed

4 files changed

+37
-31
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ Monitoring Plugins:
8585
* redis-status, valkey-status: modernize code and unify both plugins again after [PR #954](https://github.com/Linuxfabrik/monitoring-plugins/pull/954)
8686
* rocketchat-stats: improve output
8787
* statuspal: replace `flatdict` dependency with a recursive approach ([#1044](https://github.com/Linuxfabrik/monitoring-plugins/issues/1044))
88+
* systemd-units-failed: show failed unit names in the first output line for better dashboard and SMS alert readability ([#967](https://github.com/Linuxfabrik/monitoring-plugins/issues/967))
8889
* updates: adapt to updated powershell.py library
8990

9091

check-plugins/systemd-units-failed/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ options:
4545
Output:
4646

4747
```text
48-
There is 1 failed unit.
48+
1 failed unit: ipmievd.service
4949
50-
unit load active sub description
51-
---- ---- ------ --- -----------
52-
ipmievd.service loaded failed failed Ipmievd Daemon
50+
unit ! load ! active ! sub ! description
51+
----------------+--------+--------+--------+----------------
52+
ipmievd.service ! loaded ! failed ! failed ! Ipmievd Daemon
5353
```
5454

5555

check-plugins/systemd-units-failed/systemd-units-failed

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,20 @@
1111
"""See the check's README for more details.
1212
"""
1313

14-
import argparse # pylint: disable=C0413
15-
import fnmatch # pylint: disable=C0413
16-
import sys # pylint: disable=C0413
17-
18-
import lib.args # pylint: disable=C0413
19-
import lib.base # pylint: disable=C0413
20-
import lib.shell # pylint: disable=C0413
21-
import lib.lftest # pylint: disable=C0413
22-
import lib.txt # pylint: disable=C0413
23-
from lib.globals import (STATE_OK, STATE_UNKNOWN, # pylint: disable=C0413
14+
import argparse
15+
import fnmatch
16+
import sys
17+
18+
import lib.args
19+
import lib.base
20+
import lib.lftest
21+
import lib.shell
22+
import lib.txt
23+
from lib.globals import (STATE_OK, STATE_UNKNOWN,
2424
STATE_WARN)
2525

2626
__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
27-
__version__ = '2025100601'
27+
__version__ = '2026040801'
2828

2929
DESCRIPTION = """Warns on any failed systemd units."""
3030

@@ -39,7 +39,7 @@ def parse_args():
3939
parser.add_argument(
4040
'-V', '--version',
4141
action='version',
42-
version='%(prog)s: v{} by {}'.format(__version__, __author__)
42+
version=f'%(prog)s: v{__version__} by {__author__}'
4343
)
4444

4545
parser.add_argument(
@@ -52,7 +52,10 @@ def parse_args():
5252

5353
parser.add_argument(
5454
'--ignore',
55-
help='Ignore a unit, for example "dhcpd.service" (repeating). Supports glob according to https://docs.python.org/3/library/fnmatch.html. Default: %(default)s',
55+
help='Ignore a unit, for example "dhcpd.service" (repeating). '
56+
'Supports glob according to '
57+
'https://docs.python.org/3/library/fnmatch.html. '
58+
'Default: %(default)s',
5659
dest='IGNORE',
5760
default=DEFAULT_IGNORE,
5861
action='append',
@@ -103,7 +106,7 @@ def main():
103106
if line.startswith('*'):
104107
offset = 1
105108
unit = line.split()
106-
if any([fnmatch.fnmatchcase(unit[0 + offset], ignore) for ignore in args.IGNORE]):
109+
if any(fnmatch.fnmatchcase(unit[0 + offset], ignore) for ignore in args.IGNORE):
107110
continue
108111
count += 1
109112
table_data.append({
@@ -115,25 +118,27 @@ def main():
115118
})
116119
if count > 0:
117120
state = STATE_WARN
118-
msg = 'There {} {} failed {}.\n'.format(
119-
lib.txt.pluralize('', count, 'is,are'),
120-
count,
121-
lib.txt.pluralize('unit', count),
122-
)
121+
unit_names = ', '.join(row['unit'] for row in table_data)
122+
msg = f'{count} failed {lib.txt.pluralize("unit", count)}: {unit_names}\n'
123123
table = lib.base.get_table(
124124
table_data,
125125
['unit', 'load', 'active', 'sub', 'description'],
126126
['unit', 'load', 'active', 'sub', 'description'],
127127
)
128128

129-
perfdata = lib.base.get_perfdata('systemd-units-failed', count, None, 1, None, 0, None)
129+
perfdata = lib.base.get_perfdata(
130+
'systemd-units-failed',
131+
count,
132+
warn=1,
133+
_min=0,
134+
)
130135

131136
# over and out
132-
lib.base.oao('{}\n\n{}'.format(msg, table), state, perfdata, always_ok=args.ALWAYS_OK)
137+
lib.base.oao(f'{msg}\n\n{table}', state, perfdata, always_ok=args.ALWAYS_OK)
133138

134139

135140
if __name__ == '__main__':
136141
try:
137142
main()
138-
except Exception: # pylint: disable=W0703
143+
except Exception:
139144
lib.base.cu()

check-plugins/systemd-units-failed/unit-test/run

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,19 @@ class TestCheck(unittest.TestCase):
3232

3333
def test_if_check_runs_EXAMPLE02(self):
3434
stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(self.check + ' --test=stdout/EXAMPLE02,,0'))
35-
self.assertRegex(stdout, r'There is 1 failed unit.')
35+
self.assertIn('1 failed unit: openipmi.service', stdout)
3636
self.assertEqual(stderr, '')
3737
self.assertEqual(retc, STATE_WARN)
3838

3939
def test_if_check_runs_EXAMPLE03(self):
4040
stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(self.check + ' --test=stdout/EXAMPLE03,,0'))
41-
self.assertRegex(stdout, r'There are 2 failed units.')
41+
self.assertIn('2 failed units: openipmi.service, smartd.service', stdout)
4242
self.assertEqual(stderr, '')
4343
self.assertEqual(retc, STATE_WARN)
4444

4545
def test_if_check_runs_EXAMPLE03_ignore1(self):
4646
stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(self.check + ' --ignore=openipmi.service --test=stdout/EXAMPLE03,,0'))
47-
self.assertRegex(stdout, r'There is 1 failed unit.')
47+
self.assertIn('1 failed unit: smartd.service', stdout)
4848
self.assertEqual(stderr, '')
4949
self.assertEqual(retc, STATE_WARN)
5050

@@ -62,13 +62,13 @@ class TestCheck(unittest.TestCase):
6262

6363
def test_if_check_runs_EXAMPLE04(self):
6464
stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(self.check + ' --test=stdout/EXAMPLE04,,0'))
65-
self.assertRegex(stdout, r'There is 1 failed unit.')
65+
self.assertIn('1 failed unit: chronyd.service', stdout)
6666
self.assertEqual(stderr, '')
6767
self.assertEqual(retc, STATE_WARN)
6868

6969
def test_if_check_runs_EXAMPLE05(self):
7070
stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(self.check + ' --test=stdout/EXAMPLE05,,0'))
71-
self.assertRegex(stdout, r'There are 3 failed units.')
71+
self.assertIn('3 failed units: tomcat8.service, varnish.service, varnishlog.service', stdout)
7272
self.assertRegex(stdout, r'unit ! load ! active ! sub ! description')
7373
self.assertRegex(stdout, r'tomcat8.service ! not-found ! failed ! failed ! tomcat8.service')
7474
self.assertRegex(stdout, r'varnish.service ! loaded ! failed ! failed ! Varnish HTTP accelerator')

0 commit comments

Comments
 (0)