@@ -735,7 +735,14 @@ async def create_chat_completion(
735735 gem_id = gem_id ,
736736 )
737737
738+ # Keep track of whether we are using a reused session for fallback logic
739+ is_reused_session = False
740+ model_input = ""
741+ files = []
742+
738743 if session :
744+ logger .info (f"Reusing existing session with client: { client .id } " )
745+ is_reused_session = True
739746 messages_to_send = _prepare_messages_for_model (
740747 remaining_messages , request .tools , request .tool_choice , extra_instructions
741748 )
@@ -759,7 +766,7 @@ async def create_chat_completion(
759766 # Start a new session and concat messages into a single string
760767 try :
761768 client = await pool .acquire ()
762- logger .info (f"[DEBUG_GEM] Initializing new chat session with model={ model } " )
769+ logger .info (f"[DEBUG_GEM] Initializing new chat session with model={ model } using client: { client . id } " )
763770 session = client .start_chat (model = model )
764771 messages_to_send = _prepare_messages_for_model (
765772 request .messages , request .tools , request .tool_choice , extra_instructions
@@ -776,29 +783,99 @@ async def create_chat_completion(
776783 raise
777784 logger .debug ("New session started." )
778785
779- # Generate response
786+ # Generate response with Fallback/Retry logic
787+ response = None
780788 try :
781789 assert session and client , "Session and client not available"
782790 client_id = client .id
783791 logger .debug (
784792 f"Client ID: { client_id } , Input length: { len (model_input )} , files count: { len (files )} "
785793 )
786794 response = await _send_with_split (session , model_input , files = files )
787- except APIError as exc :
788- client_id = client .id if client else "unknown"
789- logger .warning (f"Gemini API returned invalid response for client { client_id } : { exc } " )
790- raise HTTPException (
791- status_code = status .HTTP_503_SERVICE_UNAVAILABLE ,
792- detail = "Gemini temporarily returned an invalid response. Please retry." ,
793- ) from exc
794- except HTTPException :
795- raise
796- except Exception as e :
797- logger .exception (f"Unexpected error generating content from Gemini API: { e } " )
798- raise HTTPException (
799- status_code = status .HTTP_502_BAD_GATEWAY ,
800- detail = "Gemini returned an unexpected error." ,
801- ) from e
795+ except Exception as first_error :
796+ # If the first attempt fails, we try to fallback to other clients
797+ failed_client_id = client .id if client else "unknown"
798+ logger .warning (f"First attempt failed with client { failed_client_id } : { first_error } " )
799+
800+ # Prepare fallback input (full conversation) if we haven't already
801+ fallback_model_input = model_input
802+ fallback_files = files
803+
804+ if is_reused_session :
805+ logger .info ("Regenerating full conversation input for fallback..." )
806+ try :
807+ full_messages_to_send = _prepare_messages_for_model (
808+ request .messages , request .tools , request .tool_choice , extra_instructions
809+ )
810+ fallback_model_input , fallback_files = await GeminiClientWrapper .process_conversation (
811+ full_messages_to_send , tmp_dir
812+ )
813+ except Exception as prep_e :
814+ logger .error (f"Failed to prepare fallback input: { prep_e } " )
815+ if isinstance (first_error , APIError ):
816+ raise HTTPException (
817+ status_code = status .HTTP_503_SERVICE_UNAVAILABLE ,
818+ detail = "Gemini temporarily returned an invalid response. Please retry." ,
819+ ) from first_error
820+ raise HTTPException (
821+ status_code = status .HTTP_502_BAD_GATEWAY ,
822+ detail = "Gemini returned an unexpected error." ,
823+ ) from first_error
824+
825+ # Retry loop
826+ failed_clients = {failed_client_id }
827+ retry_success = False
828+ last_error = first_error
829+
830+ # Try up to N times (number of clients)
831+ max_retries = len (pool .clients )
832+
833+ for i in range (max_retries ):
834+ try :
835+ # Acquire a new client
836+ retry_client = await pool .acquire ()
837+
838+ # If we have multiple clients, skip the one that just failed if possible
839+ if len (pool .clients ) > 1 and retry_client .id in failed_clients :
840+ # If we cycled back to a failed client, we might want to skip.
841+ # But since acquire() is round-robin, we should just try the next one.
842+ pass
843+
844+ logger .info (
845+ f"Fallback attempt { i + 1 } /{ max_retries } : Switching to client { retry_client .id } "
846+ )
847+
848+ retry_session = retry_client .start_chat (model = model )
849+ response = await _send_with_split (
850+ retry_session , fallback_model_input , files = fallback_files
851+ )
852+
853+ # Success! Update references
854+ client = retry_client
855+ session = retry_session
856+ retry_success = True
857+ logger .info (f"Fallback successful with client { client .id } " )
858+ break
859+
860+ except Exception as retry_exc :
861+ logger .warning (
862+ f"Fallback attempt failed with client { retry_client .id if 'retry_client' in locals () else 'unknown' } : { retry_exc } "
863+ )
864+ if "retry_client" in locals () and retry_client :
865+ failed_clients .add (retry_client .id )
866+ last_error = retry_exc
867+
868+ if not retry_success :
869+ logger .error ("All fallback attempts failed." )
870+ if isinstance (last_error , APIError ):
871+ raise HTTPException (
872+ status_code = status .HTTP_503_SERVICE_UNAVAILABLE ,
873+ detail = "Gemini temporarily returned an invalid response. Please retry." ,
874+ ) from last_error
875+ raise HTTPException (
876+ status_code = status .HTTP_502_BAD_GATEWAY ,
877+ detail = "Gemini returned an unexpected error." ,
878+ ) from last_error
802879
803880 # Format the response from API
804881 try :
@@ -1000,6 +1077,7 @@ async def _build_payload(
10001077
10011078 reuse_session = session is not None
10021079 if reuse_session :
1080+ logger .info (f"Reusing existing session with client: { client .id } " )
10031081 messages_to_send = _prepare_messages_for_model (
10041082 remaining_messages ,
10051083 tools = None ,
@@ -1019,6 +1097,7 @@ async def _build_payload(
10191097 else :
10201098 try :
10211099 client = await pool .acquire ()
1100+ logger .info (f"Initializing new session for responses with client: { client .id } " )
10221101 session = client .start_chat (model = model )
10231102 payload_messages = messages
10241103 model_input , files = await _build_payload (payload_messages , _reuse_session = False )
@@ -1031,28 +1110,112 @@ async def _build_payload(
10311110 raise
10321111 logger .debug ("New session started for /v1/responses request." )
10331112
1113+ model_output = None
10341114 try :
10351115 assert session and client , "Session and client not available"
10361116 client_id = client .id
10371117 logger .debug (
10381118 f"Client ID: { client_id } , Input length: { len (model_input )} , files count: { len (files )} "
10391119 )
10401120 model_output = await _send_with_split (session , model_input , files = files )
1041- except APIError as exc :
1042- client_id = client .id if client else "unknown"
1043- logger .warning (f"Gemini API returned invalid response for client { client_id } : { exc } " )
1044- raise HTTPException (
1045- status_code = status .HTTP_503_SERVICE_UNAVAILABLE ,
1046- detail = "Gemini temporarily returned an invalid response. Please retry." ,
1047- ) from exc
1048- except HTTPException :
1049- raise
1050- except Exception as e :
1051- logger .exception (f"Unexpected error generating content from Gemini API for responses: { e } " )
1052- raise HTTPException (
1053- status_code = status .HTTP_502_BAD_GATEWAY ,
1054- detail = "Gemini returned an unexpected error." ,
1055- ) from e
1121+ except Exception as first_error :
1122+ # If the first attempt fails, we try to fallback to other clients
1123+ failed_client_id = client .id if client else "unknown"
1124+ logger .warning (f"First attempt failed with client { failed_client_id } : { first_error } " )
1125+
1126+ # Prepare fallback input (full conversation) if we haven't already
1127+ fallback_model_input = model_input
1128+ fallback_files = files
1129+
1130+ if reuse_session :
1131+ logger .info ("Regenerating full conversation input for fallback..." )
1132+ try :
1133+ # Reconstruct full messages
1134+ # Note: 'messages' variable in this scope holds the full conversation
1135+ # because we derived it earlier via _prepare_messages_for_model
1136+ # but we need to re-run _prepare_messages_for_model to get a fresh start if needed.
1137+ # Actually, in create_response, 'messages' (from line 1045) is already the prepared list of Messages.
1138+ # But wait, 'messages' was prepared for the *initial* session.
1139+ # If we are reusing a session, 'messages_to_send' (line 1079) was just the delta.
1140+ # So if we fallback, we need the FULL conversation prepared.
1141+
1142+ # We need to rebuild the full message list exactly as we did before pool acquisition.
1143+ # The variables 'conversation_messages', 'standard_tools', 'model_tool_choice', etc. are available.
1144+
1145+ full_messages_to_send = _prepare_messages_for_model (
1146+ conversation_messages ,
1147+ tools = standard_tools or None ,
1148+ tool_choice = model_tool_choice ,
1149+ extra_instructions = extra_instructions or None ,
1150+ )
1151+
1152+ fallback_model_input , fallback_files = await GeminiClientWrapper .process_conversation (
1153+ full_messages_to_send , tmp_dir
1154+ )
1155+ except Exception as prep_e :
1156+ logger .error (f"Failed to prepare fallback input: { prep_e } " )
1157+ if isinstance (first_error , APIError ):
1158+ raise HTTPException (
1159+ status_code = status .HTTP_503_SERVICE_UNAVAILABLE ,
1160+ detail = "Gemini temporarily returned an invalid response. Please retry." ,
1161+ ) from first_error
1162+ raise HTTPException (
1163+ status_code = status .HTTP_502_BAD_GATEWAY ,
1164+ detail = "Gemini returned an unexpected error." ,
1165+ ) from first_error
1166+
1167+ # Retry loop
1168+ failed_clients = {failed_client_id }
1169+ retry_success = False
1170+ last_error = first_error
1171+
1172+ # Try up to N times (number of clients)
1173+ max_retries = len (pool .clients )
1174+
1175+ for i in range (max_retries ):
1176+ try :
1177+ # Acquire a new client
1178+ retry_client = await pool .acquire ()
1179+
1180+ # If we have multiple clients, skip the one that just failed if possible
1181+ if len (pool .clients ) > 1 and retry_client .id in failed_clients :
1182+ pass
1183+
1184+ logger .info (
1185+ f"Fallback attempt { i + 1 } /{ max_retries } : Switching to client { retry_client .id } "
1186+ )
1187+
1188+ retry_session = retry_client .start_chat (model = model )
1189+ model_output = await _send_with_split (
1190+ retry_session , fallback_model_input , files = fallback_files
1191+ )
1192+
1193+ # Success! Update references
1194+ client = retry_client
1195+ session = retry_session
1196+ retry_success = True
1197+ logger .info (f"Fallback successful with client { client .id } " )
1198+ break
1199+
1200+ except Exception as retry_exc :
1201+ logger .warning (
1202+ f"Fallback attempt failed with client { retry_client .id if 'retry_client' in locals () else 'unknown' } : { retry_exc } "
1203+ )
1204+ if "retry_client" in locals () and retry_client :
1205+ failed_clients .add (retry_client .id )
1206+ last_error = retry_exc
1207+
1208+ if not retry_success :
1209+ logger .error ("All fallback attempts failed." )
1210+ if isinstance (last_error , APIError ):
1211+ raise HTTPException (
1212+ status_code = status .HTTP_503_SERVICE_UNAVAILABLE ,
1213+ detail = "Gemini temporarily returned an invalid response. Please retry." ,
1214+ ) from last_error
1215+ raise HTTPException (
1216+ status_code = status .HTTP_502_BAD_GATEWAY ,
1217+ detail = "Gemini returned an unexpected error." ,
1218+ ) from last_error
10561219
10571220 try :
10581221 text_with_think = GeminiClientWrapper .extract_output (model_output , include_thoughts = True )
0 commit comments