@@ -315,7 +315,7 @@ def invoke(command, args=None, **kwargs):
315315 ],
316316 # TODO add more locales as we add support for them
317317 )
318- @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:nebius :hfrelay" ])
318+ @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:scaleway :hfrelay" ])
319319 def test_benchmark_basic_run_produces_json (
320320 self ,
321321 runner ,
@@ -393,7 +393,7 @@ def test_benchmark_basic_run_produces_json(
393393 ],
394394 # TODO add more locales as we add support for them
395395 )
396- @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:nebius :hfrelay;mt=500;t=0.3" ])
396+ @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:scaleway :hfrelay;mt=500;t=0.3" ])
397397 def test_benchmark_multiple_suts_produces_json (
398398 self , mock_run_benchmarks , runner , version , locale , prompt_set , sut_uid , run_dir , monkeypatch
399399 ):
@@ -523,7 +523,7 @@ def test_calls_score_benchmark_with_correct_v1_locale(self, runner, mock_run_ben
523523 #
524524 # benchmark_arg = mock_score_benchmarks.call_args.args[0][0]
525525 # assert isinstance(benchmark_arg, GeneralPurposeAiChatBenchmark)
526- @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:nebius :hfrelay" ])
526+ @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:scaleway :hfrelay" ])
527527 def test_v1_en_us_demo_is_default (self , runner , mock_run_benchmarks , sut_uid ):
528528 _ = runner (cli , ["benchmark" , "general" , "--sut" , sut_uid ])
529529
@@ -532,14 +532,14 @@ def test_v1_en_us_demo_is_default(self, runner, mock_run_benchmarks, sut_uid):
532532 assert benchmark_arg .locale == EN_US
533533 assert benchmark_arg .prompt_set == "demo"
534534
535- @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:nebius :hfrelay" ])
535+ @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:scaleway :hfrelay" ])
536536 def test_nonexistent_benchmark_prompt_sets_can_not_be_called (self , runner , sut_uid ):
537537 result = runner (cli , ["benchmark" , "general" , "--prompt-set" , "fake" , "--sut" , sut_uid ])
538538 assert result .exit_code == 2
539539 assert "Invalid value for '--prompt-set'" in result .output
540540
541541 @pytest .mark .parametrize ("prompt_set" , GENERAL_PROMPT_SETS .keys ())
542- @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:nebius :hfrelay" ])
542+ @pytest .mark .parametrize ("sut_uid" , ["fake-sut" , "google/gemma-3-27b-it:scaleway :hfrelay" ])
543543 def test_calls_score_benchmark_with_correct_prompt_set (self , runner , mock_run_benchmarks , prompt_set , sut_uid ):
544544 _ = runner (cli , ["benchmark" , "general" , "--prompt-set" , prompt_set , "--sut" , sut_uid ])
545545
0 commit comments