@@ -235,7 +235,7 @@ async def run_benchmark_command(args: argparse.Namespace) -> None:
235235 collect_responses = test_mode in [TestMode .ACC , TestMode .BOTH ]
236236
237237 # Run benchmark
238- _run_benchmark (args , effective_config , collect_responses , test_mode , benchmark_mode )
238+ _run_benchmark (effective_config , collect_responses , test_mode , benchmark_mode )
239239
240240
241241def _build_config_from_cli (
@@ -264,7 +264,10 @@ def _build_config_from_cli(
264264 load_pattern_type = LoadPatternType .CONCURRENCY
265265 case "online" :
266266 load_pattern_type = LoadPatternType .POISSON
267-
267+ report_dir = getattr (args , "report_dir" , None )
268+ timeout = getattr (args , "timeout" , None )
269+ verbose = getattr (args , "verbose" , False )
270+ output = getattr (args , "output" , None )
268271 # Build BenchmarkConfig from CLI params
269272 return BenchmarkConfig (
270273 name = f"cli_{ benchmark_mode } " ,
@@ -315,6 +318,10 @@ def _build_config_from_cli(
315318 endpoint_config = EndpointConfig (endpoint = args .endpoint , api_key = args .api_key ),
316319 metrics = Metrics (),
317320 baseline = None , # CLI mode doesn't use baseline
321+ report_dir = report_dir ,
322+ output = output ,
323+ timeout = timeout ,
324+ verbose = verbose ,
318325 )
319326
320327
@@ -391,7 +398,6 @@ def _get_dataset_format(config: BenchmarkConfig, dataset_path: Path) -> str:
391398
392399
393400def _run_benchmark (
394- args : argparse .Namespace ,
395401 config : BenchmarkConfig ,
396402 collect_responses : bool ,
397403 test_mode : TestMode ,
@@ -440,12 +446,19 @@ def _run_benchmark(
440446 # Load tokenizer if model name is provided
441447 # Priority: CLI args (offline/online modes) > config submission_ref (from-config mode)
442448 tokenizer = None
443- model_name = getattr ( args , "model" , None )
449+ model_name = config . model_params . name
444450 if not model_name and config .submission_ref :
445451 model_name = config .submission_ref .model
446452 if not model_name and config .model_params .name :
447453 model_name = config .model_params .name
448454
455+ if config .report_dir :
456+ report_dir = Path (config .report_dir )
457+ report_dir .mkdir (parents = True , exist_ok = True )
458+ config .to_yaml_file (report_dir / "config.yaml" )
459+
460+ max_tokens = config .model_params .max_new_tokens
461+
449462 if model_name :
450463 try :
451464 logger .info (f"Loading tokenizer for model: { model_name } " )
@@ -460,18 +473,14 @@ def _run_benchmark(
460473 # Throw exception if no model name is provided
461474 raise InputValidationError ("No model name provided" )
462475
463- # Get report path if specified
464- report_path = getattr (args , "report_path" , None )
465- if report_path :
466- logger .info (f"Report will be saved to: { report_path } " )
467-
468476 # Get dataset - from CLI or from config
469477 # TODO: Dataset Logic is not yet fully implemented
470- dataset_path = _get_dataset_path (args , config )
478+ # dataset_path = _get_dataset_path(args, config)
479+ dataset_path = config .datasets [0 ].path
471480
472481 # Load dataset using factory
473482 dataset_format = _get_dataset_format (config , dataset_path )
474- logger .info (f"Loading: { dataset_path . name } (format: { dataset_format } )" )
483+ logger .info (f"Loading: { dataset_path } (format: { dataset_format } )" )
475484
476485 # Determine if streaming should be enabled based on config
477486 streaming_mode = config .model_params .streaming
@@ -500,10 +509,17 @@ def _run_benchmark(
500509 dataset_path ,
501510 format = dataset_format ,
502511 key_maps = key_maps ,
503- metadata = {"model" : model_name , "stream" : enable_streaming },
512+ metadata = {
513+ "model" : model_name ,
514+ "stream" : enable_streaming ,
515+ "max_completion_tokens" : max_tokens ,
516+ },
504517 )
505518 dataloader .load ()
506519 logger .info (f"Loaded { dataloader .num_samples ()} samples" )
520+ except FileNotFoundError as e :
521+ logger .error (f"Dataset file not found: { dataset_path } " )
522+ raise InputValidationError (f"Dataset file not found: { dataset_path } " ) from e
507523 except NotImplementedError as e :
508524 logger .error (f"Dataset format not supported: { dataset_format } " )
509525 raise SetupError (str (e )) from e
@@ -550,20 +566,17 @@ def _run_benchmark(
550566 # Create endpoint client
551567 endpoint = config .endpoint_config .endpoint
552568 num_workers = config .settings .client .workers
553- max_concurrency = config .settings .client .max_concurrency
554569
555570 logger .info (f"Connecting: { endpoint } " )
556- logger .info (
557- f"Client config: workers={ num_workers } , max_concurrency={ max_concurrency if max_concurrency > 0 else 'unlimited' } "
558- )
571+ logger .info (f"Client config: workers={ num_workers } " )
559572
560573 tmp_dir = tempfile .mkdtemp (prefix = "inference_endpoint_" )
561574
562575 try :
563576 http_config = HTTPClientConfig (
564577 endpoint_url = urljoin (endpoint , "/v1/chat/completions" ),
565578 num_workers = num_workers ,
566- max_concurrency = max_concurrency ,
579+ max_concurrency = - 1 , # unlimited
567580 )
568581 aiohttp_config = AioHttpConfig ()
569582 zmq_config = ZMQConfig (
@@ -595,9 +608,9 @@ def _run_benchmark(
595608 scheduler ,
596609 name = "cli_benchmark" ,
597610 stop_sample_issuer_on_test_end = False ,
598- report_path = report_path ,
611+ report_dir = config . report_dir ,
599612 tokenizer_override = tokenizer ,
600- max_shutdown_timeout_s = args .timeout if args .timeout else None ,
613+ max_shutdown_timeout_s = config .timeout if config .timeout else None ,
601614 )
602615
603616 # Wait for test end with ability to interrupt
@@ -629,14 +642,14 @@ def signal_handler(signum, frame):
629642
630643 if response_collector .errors :
631644 logger .warning (f"Errors: { len (response_collector .errors )} " )
632- if args .verbose :
645+ if config .verbose :
633646 for error in response_collector .errors [:3 ]:
634647 logger .warning (f" { error } " )
635648 if len (response_collector .errors ) > 3 :
636649 logger .warning (f" ... +{ len (response_collector .errors ) - 3 } more" )
637650
638651 # Save results if requested
639- if hasattr ( args , "output" ) and args .output :
652+ if config .output :
640653 try :
641654 results = {
642655 "config" : {
@@ -660,9 +673,9 @@ def signal_handler(signum, frame):
660673 if response_collector .errors :
661674 results ["errors" ] = response_collector .errors
662675
663- with open (args .output , "w" ) as f :
676+ with open (config .output , "w" ) as f :
664677 json .dump (results , f , indent = 2 )
665- logger .info (f"Saved: { args .output } " )
678+ logger .info (f"Saved: { config .output } " )
666679 except Exception as e :
667680 logger .error (f"Save failed: { e } " )
668681
@@ -685,5 +698,5 @@ def signal_handler(signum, frame):
685698 http_client .shutdown ()
686699 shutil .rmtree (tmp_dir , ignore_errors = True )
687700 except Exception as e :
688- if args .verbose :
701+ if config .verbose :
689702 logger .warning (f"Cleanup error: { e } " )
0 commit comments