11"""Manage authentication flow for FastAPI endpoints with K8S/OCP."""
22
33import os
4+ from http import HTTPStatus
45from typing import Optional , Self , cast
56
67import kubernetes .client
2930)
3031
3132
32- class ClusterIDUnavailableError (Exception ):
33- """Cluster ID is not available."""
33+ class K8sAuthenticationError (Exception ):
34+ """Base exception for Kubernetes authentication errors."""
35+
36+
37+ class K8sAPIConnectionError (K8sAuthenticationError ):
38+ """Cannot connect to Kubernetes API server.
39+
40+ Indicates transient failures that may be resolved by retrying.
41+ Maps to HTTP 503 Service Unavailable.
42+ """
43+
44+
45+ class K8sConfigurationError (K8sAuthenticationError ):
46+ """Kubernetes cluster configuration issue.
47+
48+ Indicates persistent configuration problems requiring admin intervention.
49+ Maps to HTTP 500 Internal Server Error.
50+ """
51+
52+
53+ class ClusterVersionNotFoundError (K8sConfigurationError ):
54+ """ClusterVersion resource not found in OpenShift cluster.
55+
56+ Raised when the ClusterVersion custom resource does not exist (HTTP 404).
57+ """
58+
59+
60+ class ClusterVersionPermissionError (K8sConfigurationError ):
61+ """No permission to access ClusterVersion resource.
62+
63+ Raised when RBAC denies access to the ClusterVersion resource (HTTP 403).
64+ """
65+
66+
67+ class InvalidClusterVersionError (K8sConfigurationError ):
68+ """ClusterVersion resource has invalid structure or missing required fields.
69+
70+ Raised when the ClusterVersion exists but is missing spec.clusterID or has wrong type.
71+ """
3472
3573
3674class K8sClientSingleton :
@@ -156,8 +194,10 @@ def _get_cluster_id(cls) -> str:
156194 str: The cluster's `clusterID`.
157195
158196 Raises:
159- ClusterIDUnavailableError: If the cluster ID cannot be obtained due
160- to missing keys, an API error, or any unexpected error.
197+ K8sAPIConnectionError: If the Kubernetes API is unreachable or returns 5xx errors.
198+ ClusterVersionNotFoundError: If the ClusterVersion resource does not exist (404).
199+ ClusterVersionPermissionError: If access to ClusterVersion is denied (403).
200+ InvalidClusterVersionError: If ClusterVersion has invalid structure or missing fields.
161201 """
162202 try :
163203 custom_objects_api = cls .get_custom_objects_api ()
@@ -170,27 +210,64 @@ def _get_cluster_id(cls) -> str:
170210 )
171211 spec = version_data .get ("spec" )
172212 if not isinstance (spec , dict ):
173- raise ClusterIDUnavailableError (
213+ raise InvalidClusterVersionError (
174214 "Missing or invalid 'spec' in ClusterVersion"
175215 )
176216 cluster_id = spec .get ("clusterID" )
177217 if not isinstance (cluster_id , str ) or not cluster_id .strip ():
178- raise ClusterIDUnavailableError (
218+ raise InvalidClusterVersionError (
179219 "Missing or invalid 'clusterID' in ClusterVersion"
180220 )
181221 cls ._cluster_id = cluster_id
182222 return cluster_id
183- except KeyError as e :
223+ except ApiException as e :
224+ # Handle specific HTTP status codes from Kubernetes API
225+ if e .status is None :
226+ # No status code indicates a connection/network issue
227+ logger .error ("Kubernetes API error with no status code: %s" , e .reason )
228+ raise K8sAPIConnectionError (
229+ f"Failed to connect to Kubernetes API: { e .reason } "
230+ ) from e
231+
232+ if e .status == HTTPStatus .NOT_FOUND :
233+ logger .error (
234+ "ClusterVersion resource 'version' not found in cluster: %s" ,
235+ e .reason ,
236+ )
237+ raise ClusterVersionNotFoundError (
238+ "ClusterVersion 'version' resource not found in OpenShift cluster"
239+ ) from e
240+ if e .status == HTTPStatus .FORBIDDEN :
241+ logger .error (
242+ "Permission denied to access ClusterVersion resource: %s" , e .reason
243+ )
244+ raise ClusterVersionPermissionError (
245+ "Insufficient permissions to read ClusterVersion resource"
246+ ) from e
247+ # Classify errors by status code range
248+ # 5xx errors and 429 (rate limit) are transient - map to 503
249+ if (
250+ e .status >= HTTPStatus .INTERNAL_SERVER_ERROR
251+ or e .status == HTTPStatus .TOO_MANY_REQUESTS
252+ ):
253+ logger .error (
254+ "Kubernetes API unavailable while fetching ClusterVersion (status %s): %s" ,
255+ e .status ,
256+ e .reason ,
257+ )
258+ raise K8sAPIConnectionError (
259+ f"Failed to connect to Kubernetes API: { e .reason } (status { e .status } )"
260+ ) from e
261+ # All other errors (4xx client errors) are configuration issues - map to 500
184262 logger .error (
185- "Failed to get cluster_id from cluster, missing keys in version object"
263+ "Kubernetes API returned client error while fetching "
264+ "ClusterVersion (status %s): %s" ,
265+ e .status ,
266+ e .reason ,
186267 )
187- raise ClusterIDUnavailableError ("Failed to get cluster ID" ) from e
188- except ApiException as e :
189- logger .error ("API exception during ClusterInfo: %s" , e )
190- raise ClusterIDUnavailableError ("Failed to get cluster ID" ) from e
191- except Exception as e :
192- logger .error ("Unexpected error during getting cluster ID: %s" , e )
193- raise ClusterIDUnavailableError ("Failed to get cluster ID" ) from e
268+ raise K8sConfigurationError (
269+ f"Kubernetes API request failed: { e .reason } (status { e .status } )"
270+ ) from e
194271
195272 @classmethod
196273 def get_cluster_id (cls ) -> str :
@@ -207,7 +284,10 @@ def get_cluster_id(cls) -> str:
207284 str: The cluster identifier.
208285
209286 Raises:
210- ClusterIDUnavailableError: If running in-cluster and fetching the cluster ID fails.
287+ K8sAPIConnectionError: If the Kubernetes API is unreachable.
288+ ClusterVersionNotFoundError: If the ClusterVersion resource does not exist.
289+ ClusterVersionPermissionError: If access to ClusterVersion is denied.
290+ InvalidClusterVersionError: If ClusterVersion has invalid structure.
211291 """
212292 if cls ._instance is None :
213293 cls ()
@@ -230,7 +310,10 @@ def get_user_info(token: str) -> Optional[kubernetes.client.V1TokenReviewStatus]
230310 The V1TokenReviewStatus if the token is valid, None otherwise.
231311
232312 Raises:
233- HTTPException: If unable to connect to Kubernetes API or unexpected error occurs.
313+ HTTPException:
314+ 503 if Kubernetes API is unavailable (5xx errors, 429 rate limit).
315+ 503 if unable to initialize Kubernetes client.
316+ 500 if Kubernetes API configuration issue (4xx errors).
234317 """
235318 try :
236319 auth_api = K8sClientSingleton .get_authn_api ()
@@ -254,8 +337,47 @@ def get_user_info(token: str) -> Optional[kubernetes.client.V1TokenReviewStatus]
254337 if status is not None and status .authenticated :
255338 return status
256339 return None
340+ except ApiException as e :
341+ if e .status is None :
342+ logger .error (
343+ "Kubernetes API error during TokenReview with no status code: %s" ,
344+ e .reason ,
345+ )
346+ response = ServiceUnavailableResponse (
347+ backend_name = "Kubernetes API" ,
348+ cause = f"Failed to connect to Kubernetes API: { e .reason } " ,
349+ )
350+ raise HTTPException (** response .model_dump ()) from e
351+
352+ # 5xx errors and 429 (rate limit) are transient - map to 503
353+ if (
354+ e .status >= HTTPStatus .INTERNAL_SERVER_ERROR
355+ or e .status == HTTPStatus .TOO_MANY_REQUESTS
356+ ):
357+ logger .error (
358+ "Kubernetes API unavailable during TokenReview (status %s): %s" ,
359+ e .status ,
360+ e .reason ,
361+ )
362+ response = ServiceUnavailableResponse (
363+ backend_name = "Kubernetes API" ,
364+ cause = f"Kubernetes API unavailable: { e .reason } (status { e .status } )" ,
365+ )
366+ raise HTTPException (** response .model_dump ()) from e
367+
368+ # All other errors (4xx client errors) are configuration issues - map to 500
369+ logger .error (
370+ "Kubernetes API returned client error during TokenReview (status %s): %s" ,
371+ e .status ,
372+ e .reason ,
373+ )
374+ response_obj = InternalServerErrorResponse (
375+ response = "Internal server error" ,
376+ cause = f"Kubernetes API request failed: { e .reason } (status { e .status } )" ,
377+ )
378+ raise HTTPException (** response_obj .model_dump ()) from e
257379 except Exception as e : # pylint: disable=broad-exception-caught
258- logger .error ("API exception during TokenReview: %s" , e )
380+ logger .error ("Unexpected error during TokenReview: %s" , e )
259381 return None
260382
261383
@@ -325,11 +447,20 @@ async def __call__(self, request: Request) -> tuple[str, str, bool, str]:
325447 if user .username == "kube:admin" :
326448 try :
327449 user .uid = K8sClientSingleton .get_cluster_id ()
328- except ClusterIDUnavailableError as e :
329- logger .error ("Failed to get cluster ID: %s" , e )
450+ except K8sAPIConnectionError as e :
451+ # Kubernetes API is unreachable - return 503
452+ logger .error ("Cannot connect to Kubernetes API: %s" , e )
453+ response = ServiceUnavailableResponse (
454+ backend_name = "Kubernetes API" ,
455+ cause = str (e ),
456+ )
457+ raise HTTPException (** response .model_dump ()) from e
458+ except K8sConfigurationError as e :
459+ # Cluster misconfiguration or client error - return 500
460+ logger .error ("Cluster configuration error: %s" , e )
330461 response = InternalServerErrorResponse (
331462 response = "Internal server error" ,
332- cause = "Unable to retrieve cluster ID" ,
463+ cause = str ( e ) ,
333464 )
334465 raise HTTPException (** response .model_dump ()) from e
335466
0 commit comments