Skip to content

Commit 9a7cdbe

Browse files
authored
feat: Add worker event recording (#40)
* Add worker event recording * Use tmp dir as default report path --------- Signed-off-by: Rashid Kaleem <230885705+arekay-nv@users.noreply.github.com>
1 parent f9f99f8 commit 9a7cdbe

10 files changed

Lines changed: 321 additions & 195 deletions

File tree

src/inference_endpoint/commands/benchmark.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from transformers import AutoTokenizer
3434
from transformers.utils import logging as transformers_logging
3535

36+
from inference_endpoint.commands.utils import get_default_report_path
3637
from inference_endpoint.config.runtime_settings import RuntimeSettings
3738
from inference_endpoint.config.schema import (
3839
BenchmarkConfig,
@@ -267,7 +268,11 @@ def _build_config_from_cli(
267268
load_pattern_type = LoadPatternType.CONCURRENCY
268269
case "online":
269270
load_pattern_type = LoadPatternType.POISSON
270-
report_dir = getattr(args, "report_dir", None)
271+
report_dir = getattr(
272+
args,
273+
"report_dir",
274+
get_default_report_path(),
275+
)
271276
timeout = getattr(args, "timeout", None)
272277
verbose = getattr(args, "verbose", False)
273278
output = getattr(args, "output", None)
@@ -457,8 +462,11 @@ def _run_benchmark(
457462

458463
if config.report_dir:
459464
report_dir = Path(config.report_dir)
460-
report_dir.mkdir(parents=True, exist_ok=True)
461-
config.to_yaml_file(report_dir / "config.yaml")
465+
else:
466+
report_dir = get_default_report_path()
467+
468+
report_dir.mkdir(parents=True, exist_ok=True)
469+
config.to_yaml_file(report_dir / "config.yaml")
462470

463471
max_tokens = config.model_params.max_new_tokens
464472

@@ -584,6 +592,8 @@ def _run_benchmark(
584592
endpoint_url=urljoin(endpoint, "/v1/chat/completions"),
585593
num_workers=num_workers,
586594
max_concurrency=-1, # unlimited
595+
record_worker_events=config.settings.client.record_worker_events,
596+
event_logs_dir=report_dir,
587597
)
588598
aiohttp_config = AioHttpConfig()
589599
zmq_config = ZMQConfig(
@@ -615,7 +625,7 @@ def _run_benchmark(
615625
scheduler,
616626
name=f"cli_benchmark_{uuid.uuid4().hex[0:8]}",
617627
stop_sample_issuer_on_test_end=False,
618-
report_dir=config.report_dir,
628+
report_dir=report_dir,
619629
tokenizer_override=tokenizer,
620630
max_shutdown_timeout_s=config.timeout if config.timeout else None,
621631
)
@@ -636,9 +646,7 @@ def signal_handler(signum, frame):
636646

637647
elapsed_time = time.time() - start_time
638648
success_count = response_collector.count - len(response_collector.errors)
639-
estimated_qps = (
640-
response_collector.count / elapsed_time if elapsed_time > 0 else 0
641-
)
649+
estimated_qps = success_count / elapsed_time if elapsed_time > 0 else 0
642650

643651
# Report results
644652
logger.info(f"Completed in {elapsed_time:.1f}s")

0 commit comments

Comments
 (0)