Merge pull request #2 from aiwantaozi/fix/1

aiwantaozi · web-flow · commit 4c8f30973157 · 2026-03-05T16:08:34.000+08:00
fix: benchmark run hang indefinitely is macos with default multiprocessing/data loader settings
diff --git a/README.md b/README.md
@@ -12,6 +12,7 @@ What it adds
 - ShareGPT dataset conversion to GuideLLM-compatible JSONL.
 - A JSON summary output format for benchmark reports.
 - Custom response handler for accurate TTFT/ITL metrics with reasoning tokens (e.g., DeepSeek-R1).
+- Optional backend mode to preserve HTTP error details (`message/type/code`) in failed request records.
 
 Install
 -------
@@ -57,6 +58,32 @@ benchmark-runner benchmark \
   --progress-auth YOUR_TOKEN
 ```
 
+HTTP Error Details for Failed Requests
+--------------------------------------
+GuideLLM's default `openai_http` backend does not always preserve response-body
+error payloads in request-level benchmark errors. Benchmark Runner provides an
+opt-in backend type that enriches failed request errors using OpenAI-style error
+fields (`error.message`, `error.type`, `error.code`):
+
+```bash
+benchmark-runner benchmark run \
+  --target http://localhost:8000/v1 \
+  --backend openai_http_error_detail \
+  --profile constant \
+  --rate 10 \
+  --max-requests 100 \
+  --sample-requests 20 \
+  --data "prompt_tokens=128,output_tokens=256" \
+  --processor PROCESSOR_PATH
+```
+
+When a request fails, `requests.errored[*].info.error` in benchmark outputs will
+contain text similar to:
+`HTTP 400: ... (type=BadRequestError, code=400)`.
+
+Note: if `--sample-requests 0` is used, request-level samples are omitted by design,
+including failed request details.
+
 ShareGPT dataset support
 ------------------------
 If a dataset filename contains "sharegpt" and ends with `.json` or `.jsonl`,
@@ -123,6 +150,24 @@ Install development dependencies:
 pip install -e ".[dev]"
 ```
 
+macOS Notes
+-----------
+Benchmark Runner applies two macOS-only runtime defaults to avoid known
+multiprocessing hangs:
+- switch GuideLLM multiprocessing context from `fork` to `spawn` (unless
+  `GUIDELLM__MP_CONTEXT_TYPE` is explicitly set)
+- default `--data-num-workers` to `0` unless provided on the CLI
+
+References:
+- https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
+- https://bugs.python.org/issue33725
+
+To disable these defaults for debugging/experiments:
+
+```bash
+BENCHMARK_RUNNER_DISABLE_MACOS_WORKAROUNDS=1 benchmark-runner benchmark run ...
+```
+
 License
 -------
 See repository license information.
diff --git a/benchmark_runner/main.py b/benchmark_runner/main.py
@@ -18,6 +18,8 @@
 from __future__ import annotations
 
 import asyncio
+import os
+import platform
 from pathlib import Path
 
 import click
@@ -42,12 +44,44 @@
 )
 from guidellm.scheduler import StrategyType
 from guidellm.schemas import GenerativeRequestType
-from guidellm.settings import print_config
+from guidellm.settings import print_config, settings as guidellm_settings
 from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
 from guidellm.utils import cli as cli_tools
 
-STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
 """Available strategy and profile type choices for benchmark execution."""
+STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
+
+
+DISABLE_MACOS_WORKAROUNDS_ENV = "BENCHMARK_RUNNER_DISABLE_MACOS_WORKAROUNDS"
+"""Set to 1/true/yes to disable runtime macOS defaults for process/data workers."""
+
+
+def apply_macos_runtime_workarounds(kwargs: dict) -> None:
+    if platform.system() != "Darwin":
+        return
+
+    if os.environ.get(DISABLE_MACOS_WORKAROUNDS_ENV, "").lower() in {
+        "1",
+        "true",
+        "yes",
+    }:
+        return
+
+    # Why this exists:
+    # - GuideLLM defaults to multiprocessing "fork", which can hang on macOS in
+    #   mixed runtime stacks (tokenizers/torch/http clients).
+    # - See Python multiprocessing docs and macOS fork notes:
+    #   https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
+    #   https://bugs.python.org/issue33725
+    if (
+        "GUIDELLM__MP_CONTEXT_TYPE" not in os.environ
+        and guidellm_settings.mp_context_type == "fork"
+    ):
+        guidellm_settings.mp_context_type = "spawn"
+
+    # Keep DataLoader single-process by default on macOS unless user overrides it.
+    if "data_num_workers" not in kwargs:
+        kwargs["data_num_workers"] = 0
 
 
 @click.group()
@@ -393,6 +427,7 @@ def benchmark():
 def run(**kwargs):  # noqa: C901
     # Only set CLI args that differ from click defaults
     kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
+    apply_macos_runtime_workarounds(kwargs)
 
     # Handle remapping for request params
     request_type = kwargs.pop("request_type", None)