@@ -446,15 +446,19 @@ def on_chunk():
446446
447447 def get_valid_llm_response (self , prompt : str , validator , context : str = "" ) -> str :
448448 """
449- SOTA LLM Orchestrator:
450- - Rotates Gemini Keys
451- - Pivots to GitHub Models (Phi-4) in the cloud
452- - Enforces mandatory bucket refills (60s) on empty responses
453- - Prevents high-frequency API spam
449+ Elite LLM Orchestrator (v0.3.2) :
450+ - Prevents high-frequency Machine Gun API spam.
451+ - Immediate Cloud Pivot (Gemini -> GitHub Models).
452+ - Enforces mandatory 60s bucket refills on cloud failure.
453+ - Multi-tier wait logic for rate-limit protection.
454454 """
455455 attempts = 0
456456 is_cloud = os .environ .get ("GITHUB_ACTIONS" ) == "true"
457457
458+ logger .info (
459+ f"📊 Engine check: Found { len (self .key_cooldowns )} Gemini API keys."
460+ )
461+
458462 while True :
459463 key = None
460464 now = time .time ()
@@ -465,53 +469,53 @@ def get_valid_llm_response(self, prompt: str, validator, context: str = "") -> s
465469 logger .info (
466470 f"Attempting Gemini API Key { attempts % len (available_keys ) + 1 } /{ len (available_keys )} "
467471 )
468- response_text = self ._stream_single_llm (
469- prompt , key = key , context = context
470- )
471472 elif is_cloud :
472- logger .warning (
473- "⏳ Gemini keys limited. Pivoting to GitHub Models (Phi-4)..."
474- )
475- response_text = self ._stream_single_llm (
476- prompt , key = None , context = context
477- )
473+ logger .warning ("⏳ Gemini keys limited. Using GitHub Models (Phi-4)..." )
478474 else :
479475 logger .info ("🏠 Using Local Ollama Engine..." )
480- response_text = self ._stream_single_llm (
481- prompt , key = None , context = context
482- )
483476
484- if response_text .startswith ("ERROR_CODE_429" ):
485- if key :
486- self .key_cooldowns [key ] = time .time () + 1200
487- logger .warning (f"⚠️ Key { key [- 4 :]} rate-limited (429). Rotating..." )
488- else :
489- logger .warning (
490- "🚫 GitHub Models rate-limited. Sleeping 2 minutes..."
491- )
492- time .sleep (120 )
493- attempts += 1
494- continue
477+ response_text = self ._stream_single_llm (prompt , key = key , context = context )
495478
496479 if is_cloud and (
497480 not response_text or response_text .startswith ("ERROR_CODE_" )
498481 ):
499482 if key is not None :
483+ if "429" in response_text :
484+ self .key_cooldowns [key ] = time .time () + 1200
485+ logger .warning (f"⚠️ Key { key [- 4 :]} rate-limited. Pivoting..." )
486+
500487 logger .warning (
501- "☁️ Gemini failed /limited. Pivoting to GitHub Models (Phi-4) immediately..."
488+ "☁️ Gemini blipped /limited. Pivoting to GitHub Models (Phi-4) immediately..."
502489 )
503490 response_text = self ._stream_single_llm (
504491 prompt , key = None , context = context
505492 )
493+
506494 if not response_text or response_text .startswith ("ERROR_CODE_" ):
507495 wait_time = 60
508496 logger .warning (
509- f"⚠️ All Cloud Engines failed . Sleeping { wait_time } s to refill tokens..."
497+ f"⚠️ All Cloud Engines exhausted . Sleeping { wait_time } s to refill tokens..."
510498 )
511499 time .sleep (wait_time )
512500 attempts += 1
513501 continue
514502
503+ if response_text .startswith ("ERROR_CODE_429" ):
504+ if key :
505+ self .key_cooldowns [key ] = time .time () + 1200
506+ logger .warning (f"⚠️ Key { key [- 4 :]} rate-limited (429). Rotating..." )
507+ else :
508+ logger .warning (
509+ "🚫 GitHub Models rate-limited. Sleeping 2 minutes..."
510+ )
511+ time .sleep (120 )
512+ attempts += 1
513+ continue
514+
515+ if is_cloud and key :
516+ logger .info ("⏳ Rotating keys... (10s anti-spam breather)" )
517+ time .sleep (10 )
518+
515519 if not response_text or response_text .startswith ("ERROR_CODE_" ):
516520 logger .warning ("⚠️ Generic LLM error. Retrying in 10s..." )
517521 time .sleep (10 )
0 commit comments