1- import functools
21import math
3- import os
42import pathlib
53from datetime import datetime
64from typing import List , Mapping , Sequence
2321from modelbench .scoring import ValueEstimate
2422from modelgauge .base_test import PromptResponseTest
2523from modelgauge .preflight import make_sut
26- from modelgauge .config import SECRETS_PATH
2724from modelgauge .dynamic_sut_factory import ModelNotSupportedError , ProviderNotFoundError , UnknownSUTMakerError
2825from modelgauge .locales import DEFAULT_LOCALE , EN_US , FR_FR , LOCALES
2926from modelgauge .prompt_sets import PROMPT_SETS
3229from modelgauge .sut import PromptResponseSUT
3330from modelgauge_tests .fake_sut import FakeSUT
3431
35- TEST_SECRETS_PATH = os .path .join ("tests" , "config" , "secrets.toml" )
36-
3732
3833class AHazard (HazardDefinition ):
3934 def tests (self , secrets : RawSecrets ) -> List [PromptResponseTest ]:
@@ -111,33 +106,6 @@ def mock_score(
111106 datetime .now (),
112107 )
113108
114- def manage_test_secrets (func ):
115- """Decorator that manages test secrets during test execution.
116-
117- 1. If a secrets file exists, it's backed up
118- 2. The test secrets file is copied to the expected location
119- 3. After the test completes, the original state is restored
120- """
121-
122- @functools .wraps (func )
123- def wrapper (* args , ** kwargs ):
124- secrets_src = pathlib .Path (TEST_SECRETS_PATH )
125- secrets_dst = pathlib .Path (SECRETS_PATH )
126- backup_dst = secrets_dst .with_suffix (".bak" )
127-
128- if secrets_dst .exists ():
129- secrets_dst .replace (backup_dst )
130- secrets_src .replace (secrets_dst )
131-
132- try :
133- return func (* args , ** kwargs )
134- finally :
135- secrets_dst .replace (secrets_src )
136- if backup_dst .exists ():
137- backup_dst .replace (secrets_dst )
138-
139- return wrapper
140-
141109 @pytest .fixture (autouse = False )
142110 def mock_run_benchmarks (self , sut , monkeypatch , tmp_path ):
143111 mock = MagicMock (return_value = fake_benchmark_run (AHazard (), sut , tmp_path ))
@@ -168,7 +136,6 @@ def runner(self):
168136 ],
169137 # TODO add more locales as we add support for them
170138 )
171- @manage_test_secrets
172139 def test_benchmark_basic_run_produces_json (
173140 self , runner , mock_run_benchmarks , mock_score_benchmarks , sut_uid , version , locale , prompt_set , tmp_path
174141 ):
@@ -232,7 +199,6 @@ def test_security_benchmark_basic_run_produces_json(
232199 ],
233200 # TODO add more locales as we add support for them
234201 )
235- @manage_test_secrets
236202 def test_benchmark_multiple_suts_produces_json (
237203 self , mock_run_benchmarks , runner , version , locale , prompt_set , sut_uid , tmp_path , monkeypatch
238204 ):
@@ -368,7 +334,6 @@ def test_calls_score_benchmark_with_correct_v1_locale(self, runner, mock_run_ben
368334 # benchmark_arg = mock_score_benchmarks.call_args.args[0][0]
369335 # assert isinstance(benchmark_arg, GeneralPurposeAiChatBenchmark)
370336
371- @manage_test_secrets
372337 def test_v1_en_us_demo_is_default (self , runner , mock_run_benchmarks , sut_uid ):
373338 result = runner .invoke (cli , ["benchmark" , "--sut" , sut_uid ])
374339
@@ -383,7 +348,6 @@ def test_nonexistent_benchmark_prompt_sets_can_not_be_called(self, runner, sut_u
383348 assert "Invalid value for '--prompt-set'" in result .output
384349
385350 @pytest .mark .parametrize ("prompt_set" , PROMPT_SETS .keys ())
386- @manage_test_secrets
387351 def test_calls_score_benchmark_with_correct_prompt_set (self , runner , mock_run_benchmarks , prompt_set , sut_uid ):
388352 result = runner .invoke (cli , ["benchmark" , "--prompt-set" , prompt_set , "--sut" , sut_uid ])
389353
0 commit comments