remove print and replace inference provider (#1466)

rogthefrog · web-flow · commit cf177566a10e · 2026-01-26T13:09:42.000-05:00
* remove print

* nebius is no longer offered
diff --git a/tests/modelbench_tests/test_record.py b/tests/modelbench_tests/test_record.py
@@ -321,7 +321,6 @@ def test_dump_json(benchmark_score, tmp_path, run_uid):
 
 
 def test_dump_json_user(benchmark_score, tmp_path):
-    print("TESTING")
 
     def dump_and_read_record(**kwargs):
         json_path = tmp_path / "foo.json"
diff --git a/tests/modelbench_tests/test_run.py b/tests/modelbench_tests/test_run.py
@@ -315,7 +315,7 @@ def invoke(command, args=None, **kwargs):
         ],
         # TODO add more locales as we add support for them
     )
-    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:nebius:hfrelay"])
+    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:scaleway:hfrelay"])
     def test_benchmark_basic_run_produces_json(
         self,
         runner,
@@ -393,7 +393,7 @@ def test_benchmark_basic_run_produces_json(
         ],
         # TODO add more locales as we add support for them
     )
-    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:nebius:hfrelay;mt=500;t=0.3"])
+    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:scaleway:hfrelay;mt=500;t=0.3"])
     def test_benchmark_multiple_suts_produces_json(
         self, mock_run_benchmarks, runner, version, locale, prompt_set, sut_uid, run_dir, monkeypatch
     ):
@@ -523,7 +523,7 @@ def test_calls_score_benchmark_with_correct_v1_locale(self, runner, mock_run_ben
     #
     #     benchmark_arg = mock_score_benchmarks.call_args.args[0][0]
     #     assert isinstance(benchmark_arg, GeneralPurposeAiChatBenchmark)
-    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:nebius:hfrelay"])
+    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:scaleway:hfrelay"])
     def test_v1_en_us_demo_is_default(self, runner, mock_run_benchmarks, sut_uid):
         _ = runner(cli, ["benchmark", "general", "--sut", sut_uid])
 
@@ -532,14 +532,14 @@ def test_v1_en_us_demo_is_default(self, runner, mock_run_benchmarks, sut_uid):
         assert benchmark_arg.locale == EN_US
         assert benchmark_arg.prompt_set == "demo"
 
-    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:nebius:hfrelay"])
+    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:scaleway:hfrelay"])
     def test_nonexistent_benchmark_prompt_sets_can_not_be_called(self, runner, sut_uid):
         result = runner(cli, ["benchmark", "general", "--prompt-set", "fake", "--sut", sut_uid])
         assert result.exit_code == 2
         assert "Invalid value for '--prompt-set'" in result.output
 
     @pytest.mark.parametrize("prompt_set", GENERAL_PROMPT_SETS.keys())
-    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:nebius:hfrelay"])
+    @pytest.mark.parametrize("sut_uid", ["fake-sut", "google/gemma-3-27b-it:scaleway:hfrelay"])
     def test_calls_score_benchmark_with_correct_prompt_set(self, runner, mock_run_benchmarks, prompt_set, sut_uid):
         _ = runner(cli, ["benchmark", "general", "--prompt-set", prompt_set, "--sut", sut_uid])