@@ -754,7 +754,7 @@ def test_convert_chat_completion_stream_output_to_streaming_chunk(
754754 def test_live_run_serverless (self ):
755755 generator = HuggingFaceAPIChatGenerator (
756756 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
757- api_params = {"model" : "microsoft/Phi-3 .5-mini-instruct " , "provider" : "featherless-ai " },
757+ api_params = {"model" : "Qwen/Qwen2 .5-7B-Instruct " , "provider" : "together " },
758758 generation_kwargs = {"max_tokens" : 20 },
759759 )
760760
@@ -776,7 +776,7 @@ def test_live_run_serverless(self):
776776 assert meta ["usage" ]["prompt_tokens" ] > 0
777777 assert "completion_tokens" in meta ["usage" ]
778778 assert meta ["usage" ]["completion_tokens" ] > 0
779- assert meta ["model" ] == "microsoft/Phi-3 .5-mini-instruct "
779+ assert meta ["model" ] == "Qwen/Qwen2 .5-7B-Instruct "
780780 assert meta ["finish_reason" ] is not None
781781
782782 @pytest .mark .integration
@@ -789,7 +789,7 @@ def test_live_run_serverless(self):
789789 def test_live_run_serverless_streaming (self ):
790790 generator = HuggingFaceAPIChatGenerator (
791791 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
792- api_params = {"model" : "microsoft/Phi-3 .5-mini-instruct " , "provider" : "featherless-ai " },
792+ api_params = {"model" : "Qwen/Qwen2 .5-7B-Instruct " , "provider" : "together " },
793793 generation_kwargs = {"max_tokens" : 20 },
794794 streaming_callback = streaming_callback_handler ,
795795 )
@@ -801,6 +801,8 @@ def test_live_run_serverless_streaming(self):
801801 ]
802802 response = generator .run (messages = messages )
803803
804+ print (response )
805+
804806 assert "replies" in response
805807 assert isinstance (response ["replies" ], list )
806808 assert len (response ["replies" ]) > 0
@@ -812,10 +814,11 @@ def test_live_run_serverless_streaming(self):
812814 assert datetime .fromisoformat (response_meta ["completion_start_time" ]) <= datetime .now ()
813815 assert "usage" in response_meta
814816 assert "prompt_tokens" in response_meta ["usage" ]
815- assert response_meta ["usage" ]["prompt_tokens" ] > 0
817+ assert response_meta ["usage" ]["prompt_tokens" ] >= 0
816818 assert "completion_tokens" in response_meta ["usage" ]
817- assert response_meta ["usage" ]["completion_tokens" ] > 0
818- assert response_meta ["model" ] == "microsoft/Phi-3.5-mini-instruct"
819+ assert response_meta ["usage" ]["completion_tokens" ] >= 0
820+ # internally, Together calls this "Qwen/Qwen2.5-7B-Instruct-Turbo"
821+ assert "Qwen/Qwen2.5-7B-Instruct" in response_meta ["model" ]
819822 assert response_meta ["finish_reason" ] is not None
820823
821824 @pytest .mark .integration
@@ -1026,7 +1029,7 @@ async def test_run_async_with_tools(self, tools, mock_check_valid_model):
10261029 async def test_live_run_async_serverless (self ):
10271030 generator = HuggingFaceAPIChatGenerator (
10281031 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
1029- api_params = {"model" : "microsoft/Phi-3 .5-mini-instruct " , "provider" : "featherless-ai " },
1032+ api_params = {"model" : "Qwen/Qwen2 .5-7B-Instruct " , "provider" : "together " },
10301033 generation_kwargs = {"max_tokens" : 20 },
10311034 )
10321035
@@ -1048,7 +1051,7 @@ async def test_live_run_async_serverless(self):
10481051 assert meta ["usage" ]["prompt_tokens" ] > 0
10491052 assert "completion_tokens" in meta ["usage" ]
10501053 assert meta ["usage" ]["completion_tokens" ] > 0
1051- assert meta ["model" ] == "microsoft/Phi-3 .5-mini-instruct "
1054+ assert meta ["model" ] == "Qwen/Qwen2 .5-7B-Instruct "
10521055 assert meta ["finish_reason" ] is not None
10531056 finally :
10541057 await generator ._async_client .close ()
0 commit comments