11"""Manage authentication flow for FastAPI endpoints with K8S/OCP."""
22
33import os
4+ from http import HTTPStatus
45from typing import Optional , Self , cast
56
67import kubernetes .client
2930)
3031
3132
32- class ClusterIDUnavailableError (Exception ):
33- """Cluster ID is not available."""
33+ class K8sAuthenticationError (Exception ):
34+ """Base exception for Kubernetes authentication errors."""
35+
36+
37+ class K8sAPIConnectionError (K8sAuthenticationError ):
38+ """Cannot connect to Kubernetes API server.
39+
40+ Indicates transient failures that may be resolved by retrying.
41+ Maps to HTTP 503 Service Unavailable.
42+ """
43+
44+
45+ class K8sConfigurationError (K8sAuthenticationError ):
46+ """Kubernetes cluster configuration issue.
47+
48+ Indicates persistent configuration problems requiring admin intervention.
49+ Maps to HTTP 500 Internal Server Error.
50+ """
51+
52+
53+ class ClusterVersionNotFoundError (K8sConfigurationError ):
54+ """ClusterVersion resource not found in OpenShift cluster.
55+
56+ Raised when the ClusterVersion custom resource does not exist (HTTP 404).
57+ """
58+
59+
60+ class ClusterVersionPermissionError (K8sConfigurationError ):
61+ """No permission to access ClusterVersion resource.
62+
63+ Raised when RBAC denies access to the ClusterVersion resource (HTTP 403).
64+ """
65+
66+
67+ class InvalidClusterVersionError (K8sConfigurationError ):
68+ """ClusterVersion resource has invalid structure or missing required fields.
69+
70+ Raised when the ClusterVersion exists but is missing spec.clusterID or has wrong type.
71+ """
3472
3573
3674class K8sClientSingleton :
@@ -156,8 +194,10 @@ def _get_cluster_id(cls) -> str:
156194 str: The cluster's `clusterID`.
157195
158196 Raises:
159- ClusterIDUnavailableError: If the cluster ID cannot be obtained due
160- to missing keys, an API error, or any unexpected error.
197+ K8sAPIConnectionError: If the Kubernetes API is unreachable or returns 5xx errors.
198+ ClusterVersionNotFoundError: If the ClusterVersion resource does not exist (404).
199+ ClusterVersionPermissionError: If access to ClusterVersion is denied (403).
200+ InvalidClusterVersionError: If ClusterVersion has invalid structure or missing fields.
161201 """
162202 try :
163203 custom_objects_api = cls .get_custom_objects_api ()
@@ -170,27 +210,61 @@ def _get_cluster_id(cls) -> str:
170210 )
171211 spec = version_data .get ("spec" )
172212 if not isinstance (spec , dict ):
173- raise ClusterIDUnavailableError (
213+ raise InvalidClusterVersionError (
174214 "Missing or invalid 'spec' in ClusterVersion"
175215 )
176216 cluster_id = spec .get ("clusterID" )
177217 if not isinstance (cluster_id , str ) or not cluster_id .strip ():
178- raise ClusterIDUnavailableError (
218+ raise InvalidClusterVersionError (
179219 "Missing or invalid 'clusterID' in ClusterVersion"
180220 )
181221 cls ._cluster_id = cluster_id
182222 return cluster_id
183- except KeyError as e :
223+ except ApiException as e :
224+ # Handle specific HTTP status codes from Kubernetes API
225+ if e .status is None :
226+ # No status code indicates a connection/network issue
227+ logger .error ("Kubernetes API error with no status code: %s" , e .reason )
228+ raise K8sAPIConnectionError (
229+ f"Failed to connect to Kubernetes API: { e .reason } "
230+ ) from e
231+
232+ if e .status == HTTPStatus .NOT_FOUND :
233+ logger .error (
234+ "ClusterVersion resource 'version' not found in cluster: %s" ,
235+ e .reason ,
236+ )
237+ raise ClusterVersionNotFoundError (
238+ "ClusterVersion 'version' resource not found in OpenShift cluster"
239+ ) from e
240+ if e .status == HTTPStatus .FORBIDDEN :
241+ logger .error (
242+ "Permission denied to access ClusterVersion resource: %s" , e .reason
243+ )
244+ raise ClusterVersionPermissionError (
245+ "Insufficient permissions to read ClusterVersion resource"
246+ ) from e
247+ # Classify errors by status code range
248+ # 5xx errors and 429 (rate limit) are transient - map to 503
249+ if e .status >= 500 or e .status == HTTPStatus .TOO_MANY_REQUESTS :
250+ logger .error (
251+ "Kubernetes API unavailable while fetching ClusterVersion (status %s): %s" ,
252+ e .status ,
253+ e .reason ,
254+ )
255+ raise K8sAPIConnectionError (
256+ f"Failed to connect to Kubernetes API: { e .reason } (status { e .status } )"
257+ ) from e
258+ # All other errors (4xx client errors) are configuration issues - map to 500
184259 logger .error (
185- "Failed to get cluster_id from cluster, missing keys in version object"
260+ "Kubernetes API returned client error while fetching "
261+ "ClusterVersion (status %s): %s" ,
262+ e .status ,
263+ e .reason ,
186264 )
187- raise ClusterIDUnavailableError ("Failed to get cluster ID" ) from e
188- except ApiException as e :
189- logger .error ("API exception during ClusterInfo: %s" , e )
190- raise ClusterIDUnavailableError ("Failed to get cluster ID" ) from e
191- except Exception as e :
192- logger .error ("Unexpected error during getting cluster ID: %s" , e )
193- raise ClusterIDUnavailableError ("Failed to get cluster ID" ) from e
265+ raise K8sConfigurationError (
266+ f"Kubernetes API request failed: { e .reason } (status { e .status } )"
267+ ) from e
194268
195269 @classmethod
196270 def get_cluster_id (cls ) -> str :
@@ -207,7 +281,10 @@ def get_cluster_id(cls) -> str:
207281 str: The cluster identifier.
208282
209283 Raises:
210- ClusterIDUnavailableError: If running in-cluster and fetching the cluster ID fails.
284+ K8sAPIConnectionError: If the Kubernetes API is unreachable.
285+ ClusterVersionNotFoundError: If the ClusterVersion resource does not exist.
286+ ClusterVersionPermissionError: If access to ClusterVersion is denied.
287+ InvalidClusterVersionError: If ClusterVersion has invalid structure.
211288 """
212289 if cls ._instance is None :
213290 cls ()
@@ -230,7 +307,9 @@ def get_user_info(token: str) -> Optional[kubernetes.client.V1TokenReviewStatus]
230307 The V1TokenReviewStatus if the token is valid, None otherwise.
231308
232309 Raises:
233- HTTPException: If unable to connect to Kubernetes API or unexpected error occurs.
310+ HTTPException: 503 if Kubernetes API is unavailable (5xx errors, 429 rate limit).
311+ 500 if Kubernetes API configuration issue (4xx errors).
312+ 503 if unable to initialize Kubernetes client.
234313 """
235314 try :
236315 auth_api = K8sClientSingleton .get_authn_api ()
@@ -254,8 +333,44 @@ def get_user_info(token: str) -> Optional[kubernetes.client.V1TokenReviewStatus]
254333 if status is not None and status .authenticated :
255334 return status
256335 return None
336+ except ApiException as e :
337+ if e .status is None :
338+ logger .error (
339+ "Kubernetes API error during TokenReview with no status code: %s" ,
340+ e .reason ,
341+ )
342+ response = ServiceUnavailableResponse (
343+ backend_name = "Kubernetes API" ,
344+ cause = f"Failed to connect to Kubernetes API: { e .reason } " ,
345+ )
346+ raise HTTPException (** response .model_dump ()) from e
347+
348+ # 5xx errors and 429 (rate limit) are transient - map to 503
349+ if e .status >= 500 or e .status == HTTPStatus .TOO_MANY_REQUESTS :
350+ logger .error (
351+ "Kubernetes API unavailable during TokenReview (status %s): %s" ,
352+ e .status ,
353+ e .reason ,
354+ )
355+ response = ServiceUnavailableResponse (
356+ backend_name = "Kubernetes API" ,
357+ cause = f"Kubernetes API unavailable: { e .reason } (status { e .status } )" ,
358+ )
359+ raise HTTPException (** response .model_dump ()) from e
360+
361+ # All other errors (4xx client errors) are configuration issues - map to 500
362+ logger .error (
363+ "Kubernetes API returned client error during TokenReview (status %s): %s" ,
364+ e .status ,
365+ e .reason ,
366+ )
367+ response_obj = InternalServerErrorResponse (
368+ response = "Internal server error" ,
369+ cause = f"Kubernetes API request failed: { e .reason } (status { e .status } )" ,
370+ )
371+ raise HTTPException (** response_obj .model_dump ()) from e
257372 except Exception as e : # pylint: disable=broad-exception-caught
258- logger .error ("API exception during TokenReview: %s" , e )
373+ logger .error ("Unexpected error during TokenReview: %s" , e )
259374 return None
260375
261376
@@ -325,11 +440,20 @@ async def __call__(self, request: Request) -> tuple[str, str, bool, str]:
325440 if user .username == "kube:admin" :
326441 try :
327442 user .uid = K8sClientSingleton .get_cluster_id ()
328- except ClusterIDUnavailableError as e :
329- logger .error ("Failed to get cluster ID: %s" , e )
443+ except K8sAPIConnectionError as e :
444+ # Kubernetes API is unreachable - return 503
445+ logger .error ("Cannot connect to Kubernetes API: %s" , e )
446+ response = ServiceUnavailableResponse (
447+ backend_name = "Kubernetes API" ,
448+ cause = str (e ),
449+ )
450+ raise HTTPException (** response .model_dump ()) from e
451+ except K8sConfigurationError as e :
452+ # Cluster misconfiguration or client error - return 500
453+ logger .error ("Cluster configuration error: %s" , e )
330454 response = InternalServerErrorResponse (
331455 response = "Internal server error" ,
332- cause = "Unable to retrieve cluster ID" ,
456+ cause = str ( e ) ,
333457 )
334458 raise HTTPException (** response .model_dump ()) from e
335459
0 commit comments