Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions examples/02_ServerBenchmarking/offline_llama3_8b_cnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ settings:
client:
num_workers: 4

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
7 changes: 0 additions & 7 deletions examples/02_ServerBenchmarking/online_llama2_70b_cnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,6 @@ settings:
client:
num_workers: 4

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
7 changes: 0 additions & 7 deletions examples/04_GPTOSS120B_Example/gptoss_120b_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ settings:
num_workers: 4
record_worker_events: false

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:3000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,6 @@ settings:
num_workers: 8
record_worker_events: false

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:30000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,6 @@ settings:
num_workers: 8
record_worker_events: false

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
7 changes: 0 additions & 7 deletions examples/05_Llama3.1-8B_Example/offline_llama3_8b_cnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,6 @@ settings:
client:
num_workers: 4 # Number of client workers

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
7 changes: 0 additions & 7 deletions examples/05_Llama3.1-8B_Example/online_llama3_8b_cnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ settings:
client:
num_workers: 4 # Number of client workers

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
7 changes: 0 additions & 7 deletions examples/06_Llama2-70B_Example/online_llama2_70b_orca.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,6 @@ settings:
client:
num_workers: 4

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ settings:
# Increase timeout for slow worker startup (spawn, imports). Default 40s may be too short.
worker_initialization_timeout: 120

metrics:
collect:
- "throughput"
- "latency"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ settings:
# Increase timeout for slow worker startup (spawn, imports). Default 40s may be too short.
worker_initialization_timeout: 120

metrics:
collect:
- "latency"
- "ttft"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
48 changes: 0 additions & 48 deletions src/inference_endpoint/config/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,51 +417,6 @@ class OnlineSettings(Settings):
pass


def _default_metrics() -> list[str]:
"""
TODO: PoC only, subject to change!
Default metrics to collect."""
return ["throughput", "latency", "ttft", "tpot"]


class Metrics(BaseModel):
"""Metrics collection configuration.

Note: Currently uses string-based metric names for YAML simplicity.
Use get_metric_types() to convert to actual Metric type classes.
"""

model_config = ConfigDict(extra="forbid", frozen=True)

collect: list[str] = Field(default_factory=_default_metrics)

def get_metric_types(self) -> list[type[metrics.Metric]]:
"""Convert string metric names to Metric type classes.

Returns:
List of Metric type classes corresponding to collect list

Raises:
ValueError: If metric name is not recognized
"""
metric_map = {
"throughput": metrics.Throughput,
"latency": metrics.QueryLatency,
"ttft": metrics.TTFT,
"tpot": metrics.TPOT,
}

result = []
for name in self.collect:
if name not in metric_map:
raise ValueError(
f"Unknown metric name: {name}. Available: {list(metric_map.keys())}"
)
result.append(metric_map[name])

return result


class EndpointConfig(BaseModel):
"""Endpoint connection configuration.

Expand Down Expand Up @@ -516,9 +471,6 @@ class BenchmarkConfig(WithUpdatesMixin, BaseModel):
default_factory=list, description="Dataset configs"
)
settings: Settings = Field(default_factory=Settings)
metrics: Annotated[Metrics, cyclopts.Parameter(show=False)] = Field(
default_factory=Metrics
)
endpoint_config: EndpointConfig
report_dir: Annotated[
Path | None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,6 @@ settings:
max_idle_time: 4.0 # Discard connections idle longer than this (seconds)
min_required_connections: -1 # Min connections to initialize (-1=auto, 0=disabled)
worker_gc_mode: relaxed # Worker GC strategy | options: disabled, relaxed, system
metrics:
collect:
- throughput
- latency
- ttft
- tpot
endpoint_config:
endpoints: # Endpoint URL(s)
- '<ENDPOINT_URL eg: http://localhost:8000>'
Expand Down
4 changes: 0 additions & 4 deletions src/inference_endpoint/config/templates/eval_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ settings:
client:
num_workers: 4

metrics:
collect:
- "accuracy"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,6 @@ settings:
max_idle_time: 4.0 # Discard connections idle longer than this (seconds)
min_required_connections: -1 # Min connections to initialize (-1=auto, 0=disabled)
worker_gc_mode: relaxed # Worker GC strategy | options: disabled, relaxed, system
metrics:
collect:
- throughput
- latency
- ttft
- tpot
endpoint_config:
endpoints: # Endpoint URL(s)
- '<ENDPOINT_URL eg: http://localhost:8000>'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,6 @@ settings:
max_idle_time: 4.0 # Discard connections idle longer than this (seconds)
min_required_connections: -1 # Min connections to initialize (-1=auto, 0=disabled)
worker_gc_mode: relaxed # Worker GC strategy | options: disabled, relaxed, system
metrics:
collect:
- throughput
- latency
- ttft
- tpot
endpoint_config:
endpoints: # Endpoint URL(s)
- '<ENDPOINT_URL eg: http://localhost:8000>'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,6 @@ settings:
client:
num_workers: 4

metrics:
collect:
- "throughput"
- "latency"
- "ttft"
- "tpot"
- "accuracy"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
15 changes: 0 additions & 15 deletions tests/unit/config/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
Dataset,
DatasetType,
EvalMethod,
Metrics,
ModelParams,
OSLDistribution,
OSLDistributionType,
Expand Down Expand Up @@ -109,20 +108,6 @@ def test_auto_derive_name(self):
assert ds.name == "my_data"


class TestMetrics:
@pytest.mark.unit
def test_get_metric_types(self):
m = Metrics(collect=["throughput", "latency", "ttft", "tpot"])
types = m.get_metric_types()
assert len(types) == 4

@pytest.mark.unit
def test_unknown_metric_raises(self):
m = Metrics(collect=["nonexistent"])
with pytest.raises(ValueError, match="Unknown metric"):
m.get_metric_types()


class TestBenchmarkConfig:
@pytest.mark.unit
def test_minimal_offline(self):
Expand Down
4 changes: 0 additions & 4 deletions tests/unit/config/test_yaml_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ def test_load_valid_yaml(self, tmp_path):
recv_buffer_size: 16777216
send_buffer_size: 8388608

metrics:
collect:
- "throughput"

endpoint_config:
endpoints:
- "http://localhost:8000"
Expand Down
Loading