Skip to content

Commit ea23b9f

Browse files
author
sagitchu
committed
feat: 多账号和负载均衡重试
1 parent 4285c7f commit ea23b9f

3 files changed

Lines changed: 197 additions & 34 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Web-based Gemini models wrapped into an OpenAI-compatible API. Powered by [Hanao
1616
- **🔍 Google Search Included**: Get up-to-date answers using web-based Gemini's search capabilities.
1717
- **💾 Conversation Persistence**: LMDB-based storage supporting multi-turn conversations.
1818
- **🖼️ Multi-modal Support**: Support for handling text, images, and file uploads.
19-
- **⚖️ Multi-account Load Balancing**: Distribute requests across multiple accounts with per-account proxy settings.
19+
- **⚖️ Multi-account Load Balancing & Fallback**: Distribute requests across multiple accounts. Automatically switches to a healthy account if the current one fails (e.g. due to rate limits or expired cookies), ensuring high availability.
2020

2121
## Quick Start
2222

README.zh.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
- 🔍 **内置 Google 搜索**:API 已内置 Gemini 网页端的搜索能力,模型响应更加准确。
1818
- 💾 **会话持久化**:基于 LMDB 存储,支持多轮对话历史记录。
1919
- 🖼️ **多模态支持**:可处理文本、图片及文件上传。
20-
- ⚖️ **多账户负载均衡**:支持多账户分发请求,可为每个账户单独配置代理
20+
- ⚖️ **多账户负载均衡与自动故障转移**:支持多账户分发请求。当当前账户调用失败(如触发风控、Cookie 过期)时,自动切换至其他可用账户重试,确保高可用性
2121

2222
## 快速开始
2323

app/server/chat.py

Lines changed: 195 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,14 @@ async def create_chat_completion(
735735
gem_id=gem_id,
736736
)
737737

738+
# Keep track of whether we are using a reused session for fallback logic
739+
is_reused_session = False
740+
model_input = ""
741+
files = []
742+
738743
if session:
744+
logger.info(f"Reusing existing session with client: {client.id}")
745+
is_reused_session = True
739746
messages_to_send = _prepare_messages_for_model(
740747
remaining_messages, request.tools, request.tool_choice, extra_instructions
741748
)
@@ -759,7 +766,7 @@ async def create_chat_completion(
759766
# Start a new session and concat messages into a single string
760767
try:
761768
client = await pool.acquire()
762-
logger.info(f"[DEBUG_GEM] Initializing new chat session with model={model}")
769+
logger.info(f"[DEBUG_GEM] Initializing new chat session with model={model} using client: {client.id}")
763770
session = client.start_chat(model=model)
764771
messages_to_send = _prepare_messages_for_model(
765772
request.messages, request.tools, request.tool_choice, extra_instructions
@@ -776,29 +783,99 @@ async def create_chat_completion(
776783
raise
777784
logger.debug("New session started.")
778785

779-
# Generate response
786+
# Generate response with Fallback/Retry logic
787+
response = None
780788
try:
781789
assert session and client, "Session and client not available"
782790
client_id = client.id
783791
logger.debug(
784792
f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}"
785793
)
786794
response = await _send_with_split(session, model_input, files=files)
787-
except APIError as exc:
788-
client_id = client.id if client else "unknown"
789-
logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}")
790-
raise HTTPException(
791-
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
792-
detail="Gemini temporarily returned an invalid response. Please retry.",
793-
) from exc
794-
except HTTPException:
795-
raise
796-
except Exception as e:
797-
logger.exception(f"Unexpected error generating content from Gemini API: {e}")
798-
raise HTTPException(
799-
status_code=status.HTTP_502_BAD_GATEWAY,
800-
detail="Gemini returned an unexpected error.",
801-
) from e
795+
except Exception as first_error:
796+
# If the first attempt fails, we try to fallback to other clients
797+
failed_client_id = client.id if client else "unknown"
798+
logger.warning(f"First attempt failed with client {failed_client_id}: {first_error}")
799+
800+
# Prepare fallback input (full conversation) if we haven't already
801+
fallback_model_input = model_input
802+
fallback_files = files
803+
804+
if is_reused_session:
805+
logger.info("Regenerating full conversation input for fallback...")
806+
try:
807+
full_messages_to_send = _prepare_messages_for_model(
808+
request.messages, request.tools, request.tool_choice, extra_instructions
809+
)
810+
fallback_model_input, fallback_files = await GeminiClientWrapper.process_conversation(
811+
full_messages_to_send, tmp_dir
812+
)
813+
except Exception as prep_e:
814+
logger.error(f"Failed to prepare fallback input: {prep_e}")
815+
if isinstance(first_error, APIError):
816+
raise HTTPException(
817+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
818+
detail="Gemini temporarily returned an invalid response. Please retry.",
819+
) from first_error
820+
raise HTTPException(
821+
status_code=status.HTTP_502_BAD_GATEWAY,
822+
detail="Gemini returned an unexpected error.",
823+
) from first_error
824+
825+
# Retry loop
826+
failed_clients = {failed_client_id}
827+
retry_success = False
828+
last_error = first_error
829+
830+
# Try up to N times (number of clients)
831+
max_retries = len(pool.clients)
832+
833+
for i in range(max_retries):
834+
try:
835+
# Acquire a new client
836+
retry_client = await pool.acquire()
837+
838+
# If we have multiple clients, skip the one that just failed if possible
839+
if len(pool.clients) > 1 and retry_client.id in failed_clients:
840+
# If we cycled back to a failed client, we might want to skip.
841+
# But since acquire() is round-robin, we should just try the next one.
842+
pass
843+
844+
logger.info(
845+
f"Fallback attempt {i+1}/{max_retries}: Switching to client {retry_client.id}"
846+
)
847+
848+
retry_session = retry_client.start_chat(model=model)
849+
response = await _send_with_split(
850+
retry_session, fallback_model_input, files=fallback_files
851+
)
852+
853+
# Success! Update references
854+
client = retry_client
855+
session = retry_session
856+
retry_success = True
857+
logger.info(f"Fallback successful with client {client.id}")
858+
break
859+
860+
except Exception as retry_exc:
861+
logger.warning(
862+
f"Fallback attempt failed with client {retry_client.id if 'retry_client' in locals() else 'unknown'}: {retry_exc}"
863+
)
864+
if "retry_client" in locals() and retry_client:
865+
failed_clients.add(retry_client.id)
866+
last_error = retry_exc
867+
868+
if not retry_success:
869+
logger.error("All fallback attempts failed.")
870+
if isinstance(last_error, APIError):
871+
raise HTTPException(
872+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
873+
detail="Gemini temporarily returned an invalid response. Please retry.",
874+
) from last_error
875+
raise HTTPException(
876+
status_code=status.HTTP_502_BAD_GATEWAY,
877+
detail="Gemini returned an unexpected error.",
878+
) from last_error
802879

803880
# Format the response from API
804881
try:
@@ -1000,6 +1077,7 @@ async def _build_payload(
10001077

10011078
reuse_session = session is not None
10021079
if reuse_session:
1080+
logger.info(f"Reusing existing session with client: {client.id}")
10031081
messages_to_send = _prepare_messages_for_model(
10041082
remaining_messages,
10051083
tools=None,
@@ -1019,6 +1097,7 @@ async def _build_payload(
10191097
else:
10201098
try:
10211099
client = await pool.acquire()
1100+
logger.info(f"Initializing new session for responses with client: {client.id}")
10221101
session = client.start_chat(model=model)
10231102
payload_messages = messages
10241103
model_input, files = await _build_payload(payload_messages, _reuse_session=False)
@@ -1031,28 +1110,112 @@ async def _build_payload(
10311110
raise
10321111
logger.debug("New session started for /v1/responses request.")
10331112

1113+
model_output = None
10341114
try:
10351115
assert session and client, "Session and client not available"
10361116
client_id = client.id
10371117
logger.debug(
10381118
f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}"
10391119
)
10401120
model_output = await _send_with_split(session, model_input, files=files)
1041-
except APIError as exc:
1042-
client_id = client.id if client else "unknown"
1043-
logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}")
1044-
raise HTTPException(
1045-
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
1046-
detail="Gemini temporarily returned an invalid response. Please retry.",
1047-
) from exc
1048-
except HTTPException:
1049-
raise
1050-
except Exception as e:
1051-
logger.exception(f"Unexpected error generating content from Gemini API for responses: {e}")
1052-
raise HTTPException(
1053-
status_code=status.HTTP_502_BAD_GATEWAY,
1054-
detail="Gemini returned an unexpected error.",
1055-
) from e
1121+
except Exception as first_error:
1122+
# If the first attempt fails, we try to fallback to other clients
1123+
failed_client_id = client.id if client else "unknown"
1124+
logger.warning(f"First attempt failed with client {failed_client_id}: {first_error}")
1125+
1126+
# Prepare fallback input (full conversation) if we haven't already
1127+
fallback_model_input = model_input
1128+
fallback_files = files
1129+
1130+
if reuse_session:
1131+
logger.info("Regenerating full conversation input for fallback...")
1132+
try:
1133+
# Reconstruct full messages
1134+
# Note: 'messages' variable in this scope holds the full conversation
1135+
# because we derived it earlier via _prepare_messages_for_model
1136+
# but we need to re-run _prepare_messages_for_model to get a fresh start if needed.
1137+
# Actually, in create_response, 'messages' (from line 1045) is already the prepared list of Messages.
1138+
# But wait, 'messages' was prepared for the *initial* session.
1139+
# If we are reusing a session, 'messages_to_send' (line 1079) was just the delta.
1140+
# So if we fallback, we need the FULL conversation prepared.
1141+
1142+
# We need to rebuild the full message list exactly as we did before pool acquisition.
1143+
# The variables 'conversation_messages', 'standard_tools', 'model_tool_choice', etc. are available.
1144+
1145+
full_messages_to_send = _prepare_messages_for_model(
1146+
conversation_messages,
1147+
tools=standard_tools or None,
1148+
tool_choice=model_tool_choice,
1149+
extra_instructions=extra_instructions or None,
1150+
)
1151+
1152+
fallback_model_input, fallback_files = await GeminiClientWrapper.process_conversation(
1153+
full_messages_to_send, tmp_dir
1154+
)
1155+
except Exception as prep_e:
1156+
logger.error(f"Failed to prepare fallback input: {prep_e}")
1157+
if isinstance(first_error, APIError):
1158+
raise HTTPException(
1159+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
1160+
detail="Gemini temporarily returned an invalid response. Please retry.",
1161+
) from first_error
1162+
raise HTTPException(
1163+
status_code=status.HTTP_502_BAD_GATEWAY,
1164+
detail="Gemini returned an unexpected error.",
1165+
) from first_error
1166+
1167+
# Retry loop
1168+
failed_clients = {failed_client_id}
1169+
retry_success = False
1170+
last_error = first_error
1171+
1172+
# Try up to N times (number of clients)
1173+
max_retries = len(pool.clients)
1174+
1175+
for i in range(max_retries):
1176+
try:
1177+
# Acquire a new client
1178+
retry_client = await pool.acquire()
1179+
1180+
# If we have multiple clients, skip the one that just failed if possible
1181+
if len(pool.clients) > 1 and retry_client.id in failed_clients:
1182+
pass
1183+
1184+
logger.info(
1185+
f"Fallback attempt {i+1}/{max_retries}: Switching to client {retry_client.id}"
1186+
)
1187+
1188+
retry_session = retry_client.start_chat(model=model)
1189+
model_output = await _send_with_split(
1190+
retry_session, fallback_model_input, files=fallback_files
1191+
)
1192+
1193+
# Success! Update references
1194+
client = retry_client
1195+
session = retry_session
1196+
retry_success = True
1197+
logger.info(f"Fallback successful with client {client.id}")
1198+
break
1199+
1200+
except Exception as retry_exc:
1201+
logger.warning(
1202+
f"Fallback attempt failed with client {retry_client.id if 'retry_client' in locals() else 'unknown'}: {retry_exc}"
1203+
)
1204+
if "retry_client" in locals() and retry_client:
1205+
failed_clients.add(retry_client.id)
1206+
last_error = retry_exc
1207+
1208+
if not retry_success:
1209+
logger.error("All fallback attempts failed.")
1210+
if isinstance(last_error, APIError):
1211+
raise HTTPException(
1212+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
1213+
detail="Gemini temporarily returned an invalid response. Please retry.",
1214+
) from last_error
1215+
raise HTTPException(
1216+
status_code=status.HTTP_502_BAD_GATEWAY,
1217+
detail="Gemini returned an unexpected error.",
1218+
) from last_error
10561219

10571220
try:
10581221
text_with_think = GeminiClientWrapper.extract_output(model_output, include_thoughts=True)

0 commit comments

Comments
 (0)