@@ -656,8 +656,70 @@ def test_llm_client_accepts_fenced_raw_string_batch_json(self):
656656 [("hello" , "Hola" ), ("goodbye" , "Adiós" )],
657657 )
658658 self .assertEqual (mock_completion .call_args .kwargs ["timeout" ], 60 )
659+ self .assertEqual (mock_completion .call_args .kwargs ["max_tokens" ], 2048 )
659660 self .assertEqual (mock_completion .call_args .kwargs ["num_retries" ], 2 )
660661
662+ def test_llm_client_uses_configured_max_tokens (self ):
663+ """Configured max tokens should be used when no per-call override is provided."""
664+
665+ response = SimpleNamespace (
666+ choices = [
667+ SimpleNamespace (
668+ message = SimpleNamespace (
669+ content = '{"translations": [{"key": "hello", "translation": "Hola"}]}' ,
670+ reasoning_content = None ,
671+ )
672+ )
673+ ]
674+ )
675+ llm_config = LLMConfig (
676+ provider = "openrouter" ,
677+ model = "openrouter/owl-alpha" ,
678+ max_tokens = 1234 ,
679+ )
680+
681+ with patch (
682+ "llm_provider.litellm.completion" , return_value = response
683+ ) as mock_completion :
684+ LLMClient (llm_config ).chat_completion (
685+ messages = [],
686+ response_model = StringBatchTranslation ,
687+ temperature = 0 ,
688+ )
689+
690+ self .assertEqual (mock_completion .call_args .kwargs ["max_tokens" ], 1234 )
691+
692+ def test_llm_client_allows_max_tokens_override (self ):
693+ """Callers can override max tokens per request."""
694+
695+ response = SimpleNamespace (
696+ choices = [
697+ SimpleNamespace (
698+ message = SimpleNamespace (
699+ content = '{"translations": [{"key": "hello", "translation": "Hola"}]}' ,
700+ reasoning_content = None ,
701+ )
702+ )
703+ ]
704+ )
705+ llm_config = LLMConfig (
706+ provider = "openrouter" ,
707+ model = "openrouter/owl-alpha" ,
708+ max_tokens = 1234 ,
709+ )
710+
711+ with patch (
712+ "llm_provider.litellm.completion" , return_value = response
713+ ) as mock_completion :
714+ LLMClient (llm_config ).chat_completion (
715+ messages = [],
716+ response_model = StringBatchTranslation ,
717+ temperature = 0 ,
718+ max_tokens = 512 ,
719+ )
720+
721+ self .assertEqual (mock_completion .call_args .kwargs ["max_tokens" ], 512 )
722+
661723 def test_llm_client_allows_retry_override (self ):
662724 """Callers can override the default LiteLLM retry count per request."""
663725
@@ -697,6 +759,16 @@ def test_llm_config_rejects_negative_retries(self):
697759 num_retries = - 1 ,
698760 )
699761
762+ def test_llm_config_rejects_non_positive_max_tokens (self ):
763+ """Max tokens must be positive because providers reject invalid caps."""
764+
765+ with self .assertRaisesRegex (ValueError , "Max tokens must be greater than zero" ):
766+ LLMConfig (
767+ provider = "openrouter" ,
768+ model = "openrouter/owl-alpha" ,
769+ max_tokens = 0 ,
770+ )
771+
700772 def test_llm_client_retries_invalid_structured_output (self ):
701773 """Malformed model JSON should trigger a fresh structured-output attempt."""
702774
0 commit comments