Linuxfabrik
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎check-plugins/docker-info/docker-info‎
Lines changed: 1 addition & 1 deletion b/‎check-plugins/docker-info/docker-info‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎check-plugins/docker-stats/README.md‎
Lines changed: 9 additions & 8 deletions b/‎check-plugins/docker-stats/README.md‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎check-plugins/docker-stats/docker-stats‎
Lines changed: 17 additions & 28 deletions b/‎check-plugins/docker-stats/docker-stats‎
Lines changed: 17 additions & 28 deletions
diff --git a/‎check-plugins/docker-stats/unit-test/run‎
Lines changed: 6 additions & 6 deletions b/‎check-plugins/docker-stats/unit-test/run‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎check-plugins/podman-info/README.md‎
Lines changed: 2 additions & 0 deletions b/‎check-plugins/podman-info/README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎check-plugins/podman-info/podman-info‎
Lines changed: 1 addition & 1 deletion b/‎check-plugins/podman-info/podman-info‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎check-plugins/podman-stats/README.md‎
Lines changed: 15 additions & 8 deletions b/‎check-plugins/podman-stats/README.md‎
Lines changed: 15 additions & 8 deletions
@@ -132,6 +132,8 @@ Monitoring Plugins:
 * by-ssh: add missing `--verbose` parameter
 * cpu-usage: fix false 100% readings on Windows with 64+ cores caused by all-zero CPU time samples from psutil ([#626](https://github.com/Linuxfabrik/monitoring-plugins/issues/626))
 * docker-stats: fix memory perfdata using CPU thresholds instead of memory thresholds
+* docker-stats: replace per-container perfdata with aggregate metrics (containers, cpu)
+* podman-stats: use `podman stats --format '{{json .}}'` for precise numeric values; aggregate perfdata includes block I/O and network I/O totals
 * file-age: handle `FileNotFoundError` race condition when files disappear on busy file systems
 * fs-ro: ignore `/run/credentials` (https://systemd.io/CREDENTIALS/)
 * keycloak-stats: fix incorrect symlink for lib
 
@@ -75,7 +75,7 @@ def main():
             lib.shell.shell_exec('docker info'),
         )
         if retc != 0:
-            lib.base.cu(f'{stderr}\n{stdout}')
+            lib.base.oao(f'{stderr}\n{stdout}', STATE_CRIT)
         if 'server version:' not in stdout.lower():
             lib.base.cu(
                 'Unable to parse docker info output.'
 
@@ -83,19 +83,20 @@ myconti_ds_1              ! 0.0   ! 11.42
 
 ## States
 
-Alerts if
+* CRIT on `docker info` or `docker stats` return codes != 0
+* WARN if any container cpu usage is above the warning cpu threshold during the last n checks (default: 5)
+* CRIT if any container cpu usage is above the critical cpu threshold during the last n checks (default: 5)
+* WARN or CRIT if any container memory usage is above the memory thresholds
 
-* any container memory usage is above the memory thresholds
-* any container cpu usage is above the cpu thresholds during the last n checks (default: 5)
+CPU usage is normalized by dividing by the number of host CPUs, so 100% means all host CPUs are fully utilized. On an 8-core system, a container using one core at full capacity would show 12.5%. Memory usage is relative to the container's memory limit if one is set, otherwise relative to the total host memory.
 
 
 ## Perfdata / Metrics
 
-| Name                         | Type       | Description                        |
-|------------------------------|------------|------------------------------------|
-| cpu                          | Number     | Number of Host CPUs                |
-| \<containername\>\_cpu_usage | Percentage | Container's CPU usage (normalized) |
-| \<containername\>\_mem_usage | Percentage | Container's memory usage (Percent) |
+| Name       | Type   | Description                  |
+|------------|--------|------------------------------|
+| containers_running | Number | Number of running containers |
+| cpu        | Number | Number of Host CPUs          |
 
 
 ## Credits, License
 
@@ -24,7 +24,7 @@ from lib.globals import (STATE_CRIT, STATE_OK,
                           STATE_UNKNOWN, STATE_WARN)
 
 __author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
-__version__ = '2026040802'
+__version__ = '2026040803'
 
 DESCRIPTION = """This check prints cpu and memory statistics for all running Docker
                 containers, using the "docker stats" command. Container CPU usage is divided
@@ -166,7 +166,7 @@ def main():
             lib.shell.shell_exec('docker info'),
         )
         if retc != 0:
-            lib.base.cu(f'{stderr}\n{stdout}')
+            lib.base.oao(f'{stderr}\n{stdout}', STATE_CRIT)
         if 'server version:' not in stdout.lower():
             lib.base.cu(
                 'Unable to parse docker info output.'
@@ -181,7 +181,7 @@ def main():
             lib.shell.shell_exec('docker stats --no-stream'),
         )
         if retc != 0:
-            lib.base.cu(stderr)
+            lib.base.oao(stderr, STATE_CRIT)
     else:
         # do not call the command, put in test data
         host_cpus = 1
@@ -190,11 +190,7 @@ def main():
     # init some vars
     msg = ''
     state = STATE_OK
-    perfdata = lib.base.get_perfdata(
-        'cpu',
-        host_cpus,
-        _min=0,
-    )
+    perfdata = ''
     table_values = []
 
     # analyze data
@@ -209,22 +205,13 @@ def main():
             if not args.FULL_NAME:
                 name = shorten(name)
             cpu_percent = container[2]
-            mem_usage = container[3]
             mem_percent = container[6]
         except Exception:
             continue
 
         # divide by number of cores (got by docker info)
         cpu_usage = round(float(cpu_percent.replace('%', '').strip()) / host_cpus, 1)
-        perfdata += lib.base.get_perfdata(
-            f'{name}_cpu_usage',
-            cpu_usage,
-            uom='%',
-            warn=args.WARN_CPU,
-            crit=args.CRIT_CPU,
-            _min=0,
-            _max=100,
-        )
+        mem_usage = round(float(mem_percent.replace('%', '').strip()), 1)
 
         # save trend data to local sqlite database, limited to "count" rows max.
         lib.base.coe(
@@ -246,16 +233,6 @@ def main():
         state = lib.base.get_worst(cpu_state, state)
 
         # alert when container mem_usage is exceeded
-        mem_usage = float(mem_percent.replace('%', '').strip())
-        perfdata += lib.base.get_perfdata(
-            f'{name}_mem_usage',
-            mem_usage,
-            uom='%',
-            warn=args.WARN_MEM,
-            crit=args.CRIT_MEM,
-            _min=0,
-            _max=100,
-        )
         mem_state = lib.base.get_state(mem_usage, args.WARN_MEM, args.CRIT_MEM)
         if mem_state != STATE_OK:
             msg += f'"{name}" memory {mem_usage}% {lib.base.state2str(mem_state)}, '
@@ -271,6 +248,18 @@ def main():
     lib.db_sqlite.commit(conn)
     lib.db_sqlite.close(conn)
 
+    # build perfdata
+    perfdata += lib.base.get_perfdata(
+        'containers_running',
+        len(table_values),
+        _min=0,
+    )
+    perfdata += lib.base.get_perfdata(
+        'cpu',
+        host_cpus,
+        _min=0,
+    )
+
     # create output
     if state == STATE_OK:
         msg = f'Everything is ok, {len(table_values)} containers checked.\n\n'
 
@@ -57,9 +57,9 @@ class TestCheck(unittest.TestCase):
         self.assertIn('Everything is ok,', stdout)
         self.assertIn('Container     ! CPU % ! Mem %', stdout)
         self.assertIn('--------------+-------+------', stdout)
-        self.assertIn('elasticsearch ! 188.8 ! 16.73', stdout)
-        self.assertIn('graylog       ! 204.2 ! 5.69', stdout)
-        self.assertIn('mongo         ! 0.3   ! 1.95', stdout)
+        self.assertIn('elasticsearch ! 188.8 ! 16.7', stdout)
+        self.assertIn('graylog       ! 204.2 ! 5.7', stdout)
+        self.assertIn('mongo         ! 0.3   ! 1.9', stdout)
         self.assertEqual(stderr, '')
         self.assertEqual(retc, STATE_OK)
 
@@ -68,9 +68,9 @@ class TestCheck(unittest.TestCase):
         self.assertIn('Everything is ok,', stdout)
         self.assertIn('Container                                                           ! CPU % ! Mem %', stdout)
         self.assertIn('--------------------------------------------------------------------+-------+------', stdout)
-        self.assertIn('runner-7ayh6h5f-project-107-concurrent-0-37b2c7aee9359db9-build     ! 95.0  ! 1.22 ', stdout)
-        self.assertIn('runner-7ayh6h5f-project-19-concurrent-0-99f0211c36d59d01-build      ! 59.5  ! 0.99 ', stdout)
-        self.assertIn('runner-7ayh6h5f-project-49-concurrent-0-e180afe41fc754dc-predefined ! 79.5  ! 0.15', stdout)
+        self.assertIn('runner-7ayh6h5f-project-107-concurrent-0-37b2c7aee9359db9-build     ! 95.0  ! 1.2', stdout)
+        self.assertIn('runner-7ayh6h5f-project-19-concurrent-0-99f0211c36d59d01-build      ! 59.5  ! 1.0', stdout)
+        self.assertIn('runner-7ayh6h5f-project-49-concurrent-0-e180afe41fc754dc-predefined ! 79.5  ! 0.1', stdout)
         self.assertEqual(stderr, '')
         self.assertEqual(retc, STATE_OK)
 
 
@@ -51,6 +51,8 @@ Output:
 
 ## States
 
+* WARN on `podman info` warnings
+* CRIT on `podman info` errors
 * CRIT on `podman info` return codes != 0
 
 
 
@@ -77,7 +77,7 @@ def main():
             lib.shell.shell_exec('podman info --format json'),
         )
         if retc != 0:
-            lib.base.cu(f'{stderr}\n{stdout}')
+            lib.base.oao(f'{stderr}\n{stdout}', STATE_CRIT)
         try:
             result = json.loads(stdout)
         except Exception:
 
@@ -84,19 +84,26 @@ myconti_ds_1              ! 0.0   ! 11.42
 
 ## States
 
-Alerts if
+* CRIT on `podman info` or `podman stats` return codes != 0
+* WARN if any container cpu usage is above the warning cpu threshold during the last n checks (default: 5)
+* CRIT if any container cpu usage is above the critical cpu threshold during the last n checks (default: 5)
+* WARN or CRIT if any container memory usage is above the memory thresholds
 
-* any container memory usage is above the memory thresholds
-* any container cpu usage is above the cpu thresholds during the last n checks (default: 5)
+CPU usage is normalized by dividing by the number of host CPUs, so 100% means all host CPUs are fully utilized. On an 8-core system, a container using one core at full capacity would show 12.5%. Memory usage is relative to the container's memory limit if one is set, otherwise relative to the total host memory.
 
 
 ## Perfdata / Metrics
 
-| Name                         | Type       | Description                        |
-|------------------------------|------------|------------------------------------|
-| cpu                          | Number     | Number of Host CPUs                |
-| \<containername\>\_cpu_usage | Percentage | Container's CPU usage (normalized) |
-| \<containername\>\_mem_usage | Percentage | Container's memory usage (Percent) |
+| Name               | Type   | Description                                              |
+|--------------------|--------|----------------------------------------------------------|
+| block_input        | Bytes  | Total data read from block device across all containers   |
+| block_output       | Bytes  | Total data written to block device across all containers  |
+| containers_running | Number | Number of running containers                              |
+| cpu          | Number | Number of Host CPUs                                       |
+| images       | Number | Number of images                                          |
+| net_rx       | Bytes  | Total network bytes received across all containers        |
+| net_tx       | Bytes  | Total network bytes transmitted across all containers     |
+| ram          | Bytes  | Total Host Memory                                         |
 
 
 ## Credits, License
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ def main():`
`75`	`75`	`lib.shell.shell_exec('docker info'),`
`76`	`76`	`)`
`77`	`77`	`if retc != 0:`
`78`		`- lib.base.cu(f'{stderr}\n{stdout}')`
	`78`	`+ lib.base.oao(f'{stderr}\n{stdout}', STATE_CRIT)`
`79`	`79`	`if 'server version:' not in stdout.lower():`
`80`	`80`	`lib.base.cu(`
`81`	`81`	`'Unable to parse docker info output.'`
Original file line number	Diff line number	Diff line change
`@@ -77,7 +77,7 @@ def main():`
`77`	`77`	`lib.shell.shell_exec('podman info --format json'),`
`78`	`78`	`)`
`79`	`79`	`if retc != 0:`
`80`		`- lib.base.cu(f'{stderr}\n{stdout}')`
	`80`	`+ lib.base.oao(f'{stderr}\n{stdout}', STATE_CRIT)`
`81`	`81`	`try:`
`82`	`82`	`result = json.loads(stdout)`
`83`	`83`	`except Exception:`