@@ -176,7 +176,9 @@ async def run_benchmark_command(args: argparse.Namespace) -> None:
176176 # ===== YAML MODE - Load from config file =====
177177 config_path = args .config # Required by argparse
178178 try :
179- effective_config = ConfigLoader .load_yaml (Path (config_path ))
179+ effective_config : BenchmarkConfig = ConfigLoader .load_yaml (
180+ Path (config_path )
181+ )
180182
181183 # Only auxiliary params allowed (output)
182184 mode_str = getattr (args , "mode" , None )
@@ -203,7 +205,9 @@ async def run_benchmark_command(args: argparse.Namespace) -> None:
203205 elif benchmark_mode_str in ("offline" , "online" ):
204206 # ===== CLI MODE - Build config from CLI params =====
205207 benchmark_mode = TestType (benchmark_mode_str ) # TestType values are lowercase
206- effective_config = _build_config_from_cli (args , benchmark_mode_str )
208+ effective_config : BenchmarkConfig = _build_config_from_cli (
209+ args , benchmark_mode_str
210+ )
207211 test_mode = (
208212 TestMode (args .mode ) if getattr (args , "mode" , None ) else TestMode .PERF
209213 )
@@ -264,7 +268,7 @@ def _build_config_from_cli(
264268 name = args .dataset .stem ,
265269 type = DatasetType .PERFORMANCE ,
266270 path = str (args .dataset ),
267- format = "pkl" , # Will be inferred by DataLoaderFactory
271+ format = None , # Will be inferred by DataLoaderFactory
268272 )
269273 ],
270274 settings = Settings (
@@ -289,6 +293,7 @@ def _build_config_from_cli(
289293 ),
290294 ),
291295 model_params = ModelParams (
296+ name = args .model ,
292297 temperature = 0.7 ,
293298 max_new_tokens = args .max_output_tokens if args .max_output_tokens else 1024 ,
294299 osl_distribution = OSLDistribution (
@@ -327,8 +332,7 @@ def _get_dataset_path(args: argparse.Namespace, config: BenchmarkConfig) -> Path
327332 2. Validate all dataset paths exist
328333 3. Support dataset interleaving strategies
329334 """
330- # Priority: CLI args > config
331- if args .dataset :
335+ if hasattr (args , "dataset" ) and args .dataset :
332336 dataset_path = Path (args .dataset )
333337 else :
334338 # TODO: Multi-dataset - currently just picks single dataset
@@ -431,6 +435,8 @@ def _run_benchmark(
431435 model_name = getattr (args , "model" , None )
432436 if not model_name and config .submission_ref :
433437 model_name = config .submission_ref .model
438+ if not model_name and config .model_params .name :
439+ model_name = config .model_params .name
434440
435441 if model_name :
436442 try :
@@ -476,17 +482,17 @@ def _run_benchmark(
476482 logger .info ("Streaming: disabled (auto, offline mode)" )
477483
478484 try :
479- # Create loader using factory
480- def parser (x ):
481- return {
482- "prompt" : x .text_input ,
483- "output" : x .ref_output ,
484- "model" : model_name ,
485- "stream" : enable_streaming , # Enable streaming only for online mode
486- }
485+ if any (d .parser for d in config .datasets ):
486+ key_maps = [d .parser for d in config .datasets ]
487+ else :
488+ key_maps = None
489+ logger .info (f"Parser key maps: { key_maps } " )
487490
488491 dataloader = DataLoaderFactory .create_loader (
489- dataset_path , format = dataset_format , parser = parser
492+ dataset_path ,
493+ format = dataset_format ,
494+ key_maps = key_maps ,
495+ metadata = {"model" : model_name , "stream" : enable_streaming },
490496 )
491497 dataloader .load ()
492498 logger .info (f"Loaded { dataloader .num_samples ()} samples" )
0 commit comments