@@ -233,6 +233,36 @@ def test_models(self, monkeypatch):
233233 timeout = 60.0 ,
234234 )
235235
236+ def test_embed_raises_on_http_error (self , monkeypatch ):
237+ error_response = requests .Response ()
238+ error_response .status_code = 500
239+ error_response ._content = b"server exploded"
240+ with patch ("requests.sessions.Session.post" , return_value = error_response ):
241+ monkeypatch .setenv ("NVIDIA_API_KEY" , "fake-api-key" )
242+ backend = NimBackend (model = "nvidia/nv-embedqa-e5-v5" , api_url = DEFAULT_API_URL , client = "NvidiaTextEmbedder" )
243+ with pytest .raises (ValueError , match = "Failed to query embedding endpoint" ):
244+ backend .embed (texts = ["a" ])
245+
246+ def test_generate_raises_on_http_error (self , monkeypatch ):
247+ error_response = requests .Response ()
248+ error_response .status_code = 500
249+ error_response ._content = b"server exploded"
250+ with patch ("requests.sessions.Session.post" , return_value = error_response ):
251+ monkeypatch .setenv ("NVIDIA_API_KEY" , "fake-api-key" )
252+ backend = NimBackend (model = "meta/llama3-8b-instruct" , api_url = DEFAULT_API_URL , client = "NvidiaGenerator" )
253+ with pytest .raises (ValueError , match = "Failed to query chat completion endpoint" ):
254+ backend .generate (prompt = "hi" )
255+
256+ def test_models_raises_when_empty (self , monkeypatch ):
257+ empty_response = requests .Response ()
258+ empty_response .status_code = 200
259+ empty_response ._content = json .dumps ({"data" : []}).encode ()
260+ with patch ("requests.sessions.Session.get" , return_value = empty_response ):
261+ monkeypatch .setenv ("NVIDIA_API_KEY" , "fake-api-key" )
262+ backend = NimBackend (model = "custom-model" , api_url = "http://localhost:8000" )
263+ with pytest .raises (ValueError , match = "No hosted model were found" ):
264+ backend .models ()
265+
236266 def test_rank (self , monkeypatch ):
237267 with patch ("requests.sessions.Session.post" , side_effect = mock_rank_post_response ) as mock_post :
238268 monkeypatch .setenv ("NVIDIA_API_KEY" , "fake-api-key" )
@@ -258,3 +288,27 @@ def test_rank(self, monkeypatch):
258288 },
259289 timeout = 60.0 ,
260290 )
291+
292+ def test_rank_raises_on_http_error (self , monkeypatch ):
293+ error_response = requests .Response ()
294+ error_response .status_code = 500
295+ error_response ._content = b"server exploded"
296+ with patch ("requests.sessions.Session.post" , return_value = error_response ):
297+ monkeypatch .setenv ("NVIDIA_API_KEY" , "fake-api-key" )
298+ backend = NimBackend (
299+ model = "nvidia/llama-3.2-nv-rerankqa-1b-v2" , api_url = DEFAULT_API_URL , client = "NvidiaRanker"
300+ )
301+ with pytest .raises (ValueError , match = "Failed to rank endpoint" ):
302+ backend .rank (query_text = "q" , document_texts = ["a" ])
303+
304+ def test_rank_raises_when_rankings_missing (self , monkeypatch ):
305+ response = requests .Response ()
306+ response .status_code = 200
307+ response ._content = json .dumps ({"unexpected" : "payload" }).encode ()
308+ with patch ("requests.sessions.Session.post" , return_value = response ):
309+ monkeypatch .setenv ("NVIDIA_API_KEY" , "fake-api-key" )
310+ backend = NimBackend (
311+ model = "nvidia/llama-3.2-nv-rerankqa-1b-v2" , api_url = DEFAULT_API_URL , client = "NvidiaRanker"
312+ )
313+ with pytest .raises (ValueError , match = "Expected 'rankings' in response" ):
314+ backend .rank (query_text = "q" , document_texts = ["a" ])
0 commit comments