|
22 | 22 | _VLLM_PATTERNS: list[tuple[str, str]] = [ |
23 | 23 | ("tensor_parallel", r"tensor_parallel_size\s*[=:]\s*'?(\d+)"), |
24 | 24 | ("pipeline_parallel", r"pipeline_parallel_size\s*[=:]\s*'?(\d+)"), |
25 | | - ("expert_parallel", r"expert_parallel_size\s*[=:]\s*'?(\d+)"), |
26 | | - ("batch", r"max_num_seqs\s*[=:]\s*'?(\d+)"), |
| 25 | + ("expert_parallel", r"expert_parallel_size\s*[=:]\s*'?(\d+)"), |
| 26 | + ("batch", r"max_num_seqs\s*[=:]\s*'?(\d+)"), |
27 | 27 | ] |
28 | 28 |
|
29 | 29 | # SGLang patterns match the server_args=ServerArgs(...) startup line. |
|
36 | 36 | ] |
37 | 37 |
|
38 | 38 |
|
| 39 | +def _choose_patterns(text: str, serving_framework: str) -> list[tuple[str, str]]: |
| 40 | + if serving_framework == "vllm": |
| 41 | + return _VLLM_PATTERNS |
| 42 | + if serving_framework == "sglang": |
| 43 | + return _SGLANG_PATTERNS |
| 44 | + # auto: detect from log keywords |
| 45 | + if re.search(r"(?i)sglang", text): |
| 46 | + return _SGLANG_PATTERNS |
| 47 | + return _VLLM_PATTERNS |
| 48 | + |
| 49 | +# SGLang patterns match the server_args=ServerArgs(...) startup line. |
| 50 | +# max_running_requests=None does not match \d+, so batch stays null when unlimited. |
| 51 | +_SGLANG_PATTERNS: list[tuple[str, str]] = [ |
| 52 | + ("tensor_parallel", r"tp_size=(\d+)"), |
| 53 | + ("pipeline_parallel", r"pp_size=(\d+)"), |
| 54 | + ("expert_parallel", r"ep_size=(\d+)"), |
| 55 | + ("batch", r"max_running_requests=(\d+)"), |
| 56 | +] |
| 57 | + |
| 58 | + |
39 | 59 | def _choose_patterns(text: str, serving_framework: str) -> list[tuple[str, str]]: |
40 | 60 | if serving_framework == "vllm": |
41 | 61 | return _VLLM_PATTERNS |
|
0 commit comments