File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -103,7 +103,8 @@ def default(self, o):
103103 del result ["_scoring_log" ]
104104 return result
105105 elif isinstance (o , BenchmarkDefinition ):
106- return {"uid" : o .uid , "hazards" : o .hazards ()}
106+ benchmark_version = o .get_uid_part ("version" )
107+ return {"uid" : o .uid , "hazards" : o .hazards (), "version" : benchmark_version }
107108 elif isinstance (o , HazardDefinition ):
108109 result = {"uid" : o .uid , "reference_standard" : o .reference_standard ()}
109110 if o ._tests :
Original file line number Diff line number Diff line change @@ -78,5 +78,16 @@ def uid(self):
7878
7979 return HasUid ._render_uid (self , self .__class__ ._uid_definition )
8080
81+ def get_uid_part (self , part_name : str ) -> str :
82+ """Gets string-rendered value of a specific part of the UID."""
83+ if not hasattr (self .__class__ , "_uid_definition" ):
84+ raise AttributeError ("classes with HasUid must define _uid_definition" )
85+
86+ uid_def = self .__class__ ._uid_definition
87+ if part_name not in uid_def :
88+ raise KeyError (f"part name { part_name } not found in _uid_definition" )
89+
90+ return HasUid ._render_uid (self , {part_name : uid_def [part_name ]})
91+
8192 def __str__ (self ):
8293 return f"{ self .__class__ .__name__ } ({ self .uid } )"
Original file line number Diff line number Diff line change @@ -186,12 +186,21 @@ def test_v1_hazard_definition_with_tests_loaded(secrets):
186186 assert j ["reference_standard" ] == hazard .reference_standard ()
187187
188188
189- def test_benchmark_definition ():
189+ def test_general_benchmark_definition ():
190190 j = encode_and_parse (GeneralPurposeAiChatBenchmarkV1 (locale = EN_US , prompt_set = "practice" ))
191191 assert j ["uid" ] == "general_purpose_ai_chat_benchmark-1.1-en_us-practice-default"
192+ assert j ["version" ] == "1.1"
192193 assert "safe_hazard-1.1-cse-en_us-practice" in [i ["uid" ] for i in j ["hazards" ]]
193194
194195
196+ def test_security_benchmark_definition ():
197+ j = encode_and_parse (SecurityBenchmark (locale = EN_US , prompt_set = "official" ))
198+ assert j ["uid" ] == "security_benchmark-0.5-en_us-official-default"
199+ assert j ["version" ] == "0.5"
200+ hazard_uids = [i ["uid" ] for i in j ["hazards" ]]
201+ assert "security_jailbreak_hazard-0.5-en_us-official" in hazard_uids
202+
203+
195204def test_hazard_score ():
196205 hazard = SafeHazardV1 ("cse" , EN_US , "practice" )
197206 hazard .set_standard (GeneralPurposeAiChatBenchmarkV1 (locale = EN_US , prompt_set = "practice" ).standards )
You can’t perform that action at this time.
0 commit comments