Skip to content

Commit ef6e6d7

Browse files
committed
remove temp scripts
1 parent 9572820 commit ef6e6d7

6 files changed

Lines changed: 6 additions & 324 deletions

File tree

benchmarks/format_benchmark_metrics.py

Lines changed: 0 additions & 115 deletions
This file was deleted.

benchmarks/plot_benchmark_metrics.py

Lines changed: 0 additions & 203 deletions
This file was deleted.

docs/benchmark.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Metrics are aligned with `benchmark_serving.py --percentile-metrics`:
7474
| `ttft` | Time to First Token (client arrival → first token) | ms |
7575
| `s_ttft` | Server TTFT (inference start → first token) | ms |
7676
| `tpot` | Time per Output Token (excluding first token) | ms |
77-
| `itl` | Inter-token Latency | ms |
77+
| `s_itl` | Infer Inter-token Latency | ms |
7878
| `e2el` | End-to-end Latency (client arrival → last token) | ms |
7979
| `s_e2el` | Server E2EL (inference start → last token) | ms |
8080
| `s_decode` | Decode speed (excluding first token) | tok/s |

docs/zh/benchmark.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
7474
| `ttft` | 首 Token 时延(客户端到达 → 首 Token) | ms |
7575
| `s_ttft` | 服务端首 Token 时延(推理开始 → 首 Token) | ms |
7676
| `tpot` | 每 Token 输出时延(不含首 Token) | ms |
77-
| `itl` | Token 间时延 | ms |
77+
| `s_itl` | 推理 Token 间时延 | ms |
7878
| `e2el` | 端到端时延(客户端到达 → 最后一个 Token) | ms |
7979
| `s_e2el` | 服务端端到端时延(推理开始 → 最后一个 Token) | ms |
8080
| `s_decode` | 解码速度(不含首 Token) | tok/s |

fastdeploy/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,7 +1911,7 @@ class BenchmarkMetricsConfig:
19111911
ttft - Time to First Token (client arrival → first token)
19121912
s_ttft - Server TTFT (inference start → first token)
19131913
tpot - Time per Output Token (excluding first token)
1914-
itl - Inter-token Latency
1914+
s_itl - Infer Inter-token Latency
19151915
e2el - End-to-end Latency (client arrival → last token)
19161916
s_e2el - Server E2EL (inference start → last token)
19171917
s_decode - Decode speed (tokens/s, excluding first token)
@@ -1932,7 +1932,7 @@ class BenchmarkMetricsConfig:
19321932
"ttft", # Time to First Token
19331933
"s_ttft", # Server TTFT
19341934
"tpot", # Time per Output Token
1935-
"itl", # Inter-token Latency
1935+
"s_itl", # Infer Inter-token Latency
19361936
"e2el", # End-to-end Latency
19371937
"s_e2el", # Server E2EL
19381938
"s_decode", # Decode speed (tok/s)

fastdeploy/metrics/benchmark_metrics_logger.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ def _compute_rolling_stats(self) -> dict:
180180
result["s_ttft_ms"] = self._stats(s_ttfts, percentile_values)
181181
if "tpot" in selected:
182182
result["tpot_ms"] = self._stats(tpots, percentile_values)
183-
if "itl" in selected:
184-
result["itl_ms"] = self._stats(all_itls, percentile_values)
183+
if "s_itl" in selected:
184+
result["s_itl_ms"] = self._stats(all_itls, percentile_values)
185185
if "e2el" in selected:
186186
result["e2el_ms"] = self._stats(e2els, percentile_values)
187187
if "s_e2el" in selected:

0 commit comments

Comments
 (0)