11#!/usr/bin/env python3
2- # logging.basicConfig(level=logging.DEBUG)
2+
33import argparse
44import json
55import logging
88import re
99import socket
1010import stat
11+ import sys
1112import traceback
1213from collections import deque , namedtuple , UserDict , defaultdict
1314from concurrent import futures
@@ -138,13 +139,13 @@ def http_response(self, request, response):
138139 https_response = http_response
139140
140141 @staticmethod
141- def _get_outh2_token (www_authenticate_header ):
142+ def _get_oauth2_token (www_authenticate_header ):
142143 auth_fields = dict (re .findall (r"""(?:(?P<key>[^ ,=]+)="([^"]+)")""" , www_authenticate_header ))
143144
144145 auth_url = "{realm}?scope={scope}&service={service}" .format (
145- realm = auth_fields [ 'realm' ] ,
146- scope = auth_fields [ 'scope' ] ,
147- service = auth_fields [ 'service' ] ,
146+ realm = auth_fields . get ( 'realm' ) ,
147+ scope = auth_fields . get ( 'scope' ) ,
148+ service = auth_fields . get ( 'service' ) ,
148149 )
149150 token_request = Request (auth_url )
150151 token_request .add_header ("Content-Type" , "application/x-www-form-urlencoded; charset=utf-8" )
@@ -160,7 +161,7 @@ def process_oauth2(self, request, response, www_authenticate_header):
160161 raise HTTPError (full_url , 401 , "Stopping Oauth2 failure loop for {}" .format (full_url ),
161162 response .headers , response )
162163
163- auth_token = self ._get_outh2_token (www_authenticate_header )
164+ auth_token = self ._get_oauth2_token (www_authenticate_header )
164165
165166 request .add_unredirected_header ('Authorization' , 'Bearer ' + auth_token )
166167 return self .parent .open (request , timeout = request .timeout )
@@ -276,16 +277,31 @@ def evaluate_numeric_thresholds(container, value, thresholds, name, short_name,
276277@lru_cache (maxsize = None )
277278def get_url (url ):
278279 logger .debug ("get_url: {}" .format (url ))
279- response = better_urllib_get .open (url , timeout = timeout )
280- logger .debug ("get_url: {} {}" .format (url , response .status ))
281- return process_urllib_response (response ), response .status
280+ try :
281+ response = better_urllib_get .open (url , timeout = timeout )
282+ logger .debug ("get_url: {} {}" .format (url , response .status ))
283+ return process_urllib_response (response ), response .status
284+ except URLError as e :
285+ unknown (f'Failed to connect to daemon: { e .reason } .' )
286+ # We have no result, so we can just exit
287+ print_results ()
288+ sys .exit (rc )
282289
283290
284291def process_urllib_response (response ):
285292 response_bytes = response .read ()
286293 body = response_bytes .decode ('utf-8' )
287- # logger.debug("BODY: {}".format(body))
288- return json .loads (body )
294+ logger .debug (body )
295+
296+ resp = {}
297+ try :
298+ resp = json .loads (body )
299+ except json .JSONDecodeError as e :
300+ unknown (f'Unable to parse response.' )
301+ print_results ()
302+ sys .exit (rc )
303+
304+ return resp
289305
290306
291307def get_container_info (name ):
@@ -358,26 +374,39 @@ def normalize_image_name_to_manifest_url(image_name, insecure_registries):
358374
359375 # Registry query url
360376 scheme = 'http' if parsed_url .registry .lower () in lower_insecure else 'https'
361- url = '{scheme}://{registry}/v2/{image_name}/manifests/{image_tag} ' .format (scheme = scheme ,
377+ url = '{scheme}://{registry}/v2/{image_name}/manifests' .format (scheme = scheme ,
362378 registry = parsed_url .registry ,
363- image_name = parsed_url .name ,
364- image_tag = parsed_url .tag )
365- return url , parsed_url .registry
379+ image_name = parsed_url .name )
380+ image_tag = parsed_url .tag
381+
382+ return url , image_tag , parsed_url .registry
366383
367384
368385# Auth servers seem picky about being hit too hard. Can't figure out why. ;)
369386# As result it is best to single thread this check
370387# This is based on https://docs.docker.com/registry/spec/auth/token/#requesting-a-token
371- def get_digest_from_registry (url ):
388+ def get_digest_from_registry (url , image_tag , image_arch ):
372389 logger .debug ("get_digest_from_registry" )
373390 # query registry
374391 # TODO: Handle logging in if needed
375- registry_info , status_code = get_url (url = url )
392+ image_url = '{}/{}' .format (url , image_tag )
393+ registry_info , status_code = get_url (url = image_url )
394+
395+ if 'manifests' in registry_info :
396+ digest = find_digest_for_architecture (registry_info ['manifests' ], image_arch )
397+ image_url = '{}/{}' .format (url , digest )
398+ registry_info , status_code = get_url (url = image_url )
376399
377400 if status_code != 200 :
378401 raise RegistryError (response = registry_info )
402+
379403 return registry_info ['config' ].get ('digest' , None )
380404
405+ def find_digest_for_architecture (manifests , image_arch ):
406+ for manifest in manifests :
407+ if 'platform' in manifest and manifest ['platform' ]['architecture' ] == image_arch :
408+ return manifest .get ('digest' )
409+ return None
381410
382411def set_rc (new_rc ):
383412 global rc
@@ -520,7 +549,14 @@ def check_memory(container, thresholds):
520549 inspection = get_stats (container )
521550
522551 # Subtracting cache to match what `docker stats` does.
523- adjusted_usage = inspection ['memory_stats' ]['usage' ] - inspection ['memory_stats' ]['stats' ]['total_cache' ]
552+ adjusted_usage = inspection ['memory_stats' ]['usage' ]
553+ if 'total_cache' in inspection ['memory_stats' ]['stats' ]:
554+ # CGroups v1 - https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
555+ adjusted_usage -= inspection ['memory_stats' ]['stats' ]['total_cache' ]
556+ elif 'inactive_file' in inspection ['memory_stats' ]['stats' ]:
557+ # CGroups v2 - https://www.kernel.org/doc/Documentation/cgroup-v2.txt
558+ adjusted_usage -= inspection ['memory_stats' ]['stats' ]['inactive_file' ]
559+
524560 if thresholds .units == '%' :
525561 max = 100
526562 usage = int (100 * adjusted_usage / inspection ['memory_stats' ]['limit' ])
@@ -618,10 +654,14 @@ def check_version(container, insecure_registries):
618654 unknown ('"{}" has last no repository tag. Is this anywhere else?' .format (container ))
619655 return
620656
621- url , registry = normalize_image_name_to_manifest_url (image_urls [0 ], insecure_registries )
622- logger .debug ("Looking up image digest here {}" .format (url ))
657+
658+ container_image = get_container_info (container )['Image' ]
659+ image_arch = get_image_info (container_image )['Architecture' ]
660+
661+ url , image_tag , registry = normalize_image_name_to_manifest_url (image_urls [0 ], insecure_registries )
662+ logger .debug ("Looking up image digest here {}/{}" .format (url , image_tag ))
623663 try :
624- registry_hash = get_digest_from_registry (url )
664+ registry_hash = get_digest_from_registry (url , image_tag , image_arch )
625665 except URLError as e :
626666 if hasattr (e .reason , 'reason' ) and e .reason .reason == 'UNKNOWN_PROTOCOL' :
627667 unknown (
@@ -630,12 +670,12 @@ def check_version(container, insecure_registries):
630670 return
631671 elif hasattr (e .reason , 'strerror' ) and e .reason .strerror == 'nodename nor servname provided, or not known' :
632672 unknown (
633- "Cannot reach registry for {} at {}" .format (container , url ))
673+ "Cannot reach registry for {} at {}/{} " .format (container , url , image_tag ))
634674 return
635675 else :
636676 raise e
637677 except RegistryError as e :
638- unknown ("Cannot check version, couldn't retrieve digest for {} while checking {}." .format (container , url ))
678+ unknown ("Cannot check version, couldn't retrieve digest for {} while checking {}/{} ." .format (container , url , image_tag ))
639679 return
640680 logger .debug ("Image digests, local={} remote={}" .format (image_id , registry_hash ))
641681 if registry_hash == image_id :
@@ -766,7 +806,7 @@ def process_args(args):
766806 action = 'store' ,
767807 type = str ,
768808 metavar = 'WARN:CRIT' ,
769- help = 'Check cpu usage percentage taking into account any limits.' )
809+ help = 'Check cpu usage percentage taking into account any limits. Valid values are 0 - 100. ' )
770810
771811 # Memory
772812 parser .add_argument ('--memory' ,
@@ -842,13 +882,22 @@ def process_args(args):
842882 action = 'store_true' ,
843883 help = 'Suppress performance data. Reduces output when performance data is not being used.' )
844884
885+ # Debug logging
886+ parser .add_argument ('--debug' ,
887+ dest = 'debug' ,
888+ action = 'store_true' ,
889+ help = 'Enable debug logging.' )
890+
845891 parser .add_argument ('-V' , action = 'version' , version = '%(prog)s {}' .format (__version__ ))
846892
847893 if len (args ) == 0 :
848894 parser .print_help ()
849895
850896 parsed_args = parser .parse_args (args = args )
851897
898+ if parsed_args .debug :
899+ logging .basicConfig (level = logging .DEBUG )
900+
852901 global timeout
853902 timeout = parsed_args .timeout
854903
@@ -892,10 +941,10 @@ def print_results():
892941 if len (filtered_messages ) == 0 :
893942 messages_concat = 'OK'
894943 else :
895- messages_concat = '; ' .join (filtered_messages )
944+ messages_concat = '\n ' .join (filtered_messages )
896945
897946 else :
898- messages_concat = '; ' .join (messages )
947+ messages_concat = '\n ' .join (messages )
899948
900949 if no_performance or len (performance_data ) == 0 :
901950 print (messages_concat )
@@ -919,7 +968,7 @@ def perform_checks(raw_args):
919968 no_ok = args .no_ok
920969
921970 global no_performance
922- no_performance = args .no_ok
971+ no_performance = args .no_performance
923972
924973 if socketfile_permissions_failure (args ):
925974 unknown ("Cannot access docker socket file. User ID={}, socket file={}" .format (os .getuid (), args .connection ))
@@ -934,7 +983,6 @@ def perform_checks(raw_args):
934983 return
935984
936985 # Here is where all the work happens
937- #############################################################################################
938986 containers = get_containers (args .containers , args .present )
939987
940988 if len (containers ) == 0 and not args .present :
0 commit comments