@@ -575,6 +575,63 @@ def _resolve_quota_subject(request: Request, auth: AuthTuple) -> Optional[str]:
575575 return system_id
576576
577577
578+ def _check_infer_quota (
579+ request : Request , auth : AuthTuple , endpoint_path : str
580+ ) -> Optional [str ]:
581+ """Check infer quota availability and record bounded quota metrics.
582+
583+ Resolves the quota subject from the request and auth context, then
584+ verifies that the subject has tokens available. All outcomes (success,
585+ failure, error, skipped) are recorded as Prometheus metrics.
586+
587+ Args:
588+ request: The incoming FastAPI request used to resolve the quota subject.
589+ auth: Authentication tuple ``(user_id, username, skip_userid_check, token)``.
590+ endpoint_path: API endpoint path for metric labeling.
591+
592+ Returns:
593+ The resolved quota subject identifier, or ``None`` when quota is disabled.
594+
595+ Raises:
596+ HTTPException: Re-raised from the quota limiter when the subject has
597+ exhausted its token allowance (HTTP 429).
598+ """
599+ quota_id = _resolve_quota_subject (request , auth )
600+ quota_type = configuration .rlsapi_v1 .quota_subject or "disabled"
601+ if quota_id is None :
602+ recording .record_quota_check (
603+ endpoint_path , quota_type , recording .QUOTA_RESULT_SKIPPED , 0.0
604+ )
605+ return None
606+
607+ quota_start_time = time .monotonic ()
608+ try :
609+ check_tokens_available (configuration .quota_limiters , quota_id )
610+ except HTTPException :
611+ recording .record_quota_check (
612+ endpoint_path ,
613+ quota_type ,
614+ recording .QUOTA_RESULT_FAILURE ,
615+ time .monotonic () - quota_start_time ,
616+ )
617+ raise
618+ except Exception : # pylint: disable=broad-exception-caught
619+ recording .record_quota_check (
620+ endpoint_path ,
621+ quota_type ,
622+ recording .QUOTA_RESULT_ERROR ,
623+ time .monotonic () - quota_start_time ,
624+ )
625+ raise
626+ recording .record_quota_check (
627+ endpoint_path ,
628+ quota_type ,
629+ recording .QUOTA_RESULT_SUCCESS ,
630+ time .monotonic () - quota_start_time ,
631+ )
632+ return quota_id
633+
634+
578635def _build_infer_response (
579636 response_text : str ,
580637 request_id : str ,
@@ -733,16 +790,17 @@ async def infer_endpoint( # pylint: disable=R0914,R0915
733790
734791 logger .info ("Processing rlsapi v1 /infer request %s" , request_id )
735792
736- # Quota enforcement: resolve subject and check availability before any work.
737- # No-op when quota_subject is not configured or no quota limiters exist .
738- quota_id = _resolve_quota_subject ( request , auth )
739- if quota_id is not None :
793+ # Quota enforcement: check availability before any work and record metrics for
794+ # both enforced and disabled quota paths .
795+ quota_subject = configuration . rlsapi_v1 . quota_subject
796+ if quota_subject is not None :
740797 logger .info (
741798 "Checking quota availability for rlsapi v1 request %s using subject type %s" ,
742799 request_id ,
743- configuration . rlsapi_v1 . quota_subject ,
800+ quota_subject ,
744801 )
745- check_tokens_available (configuration .quota_limiters , quota_id )
802+ quota_id = _check_infer_quota (request , auth , endpoint_path )
803+ if quota_id is not None :
746804 logger .info (
747805 "Quota availability check passed for rlsapi v1 request %s" , request_id
748806 )
0 commit comments