diff --git a/examples/02_ServerBenchmarking/offline_llama3_8b_cnn.yaml b/examples/02_ServerBenchmarking/offline_llama3_8b_cnn.yaml
index fab814e0..bc5b92f1 100644
--- a/examples/02_ServerBenchmarking/offline_llama3_8b_cnn.yaml
+++ b/examples/02_ServerBenchmarking/offline_llama3_8b_cnn.yaml
@@ -30,13 +30,6 @@ settings:
   client:
     num_workers: 4
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/examples/02_ServerBenchmarking/online_llama2_70b_cnn.yaml b/examples/02_ServerBenchmarking/online_llama2_70b_cnn.yaml
index bda7f884..d1603544 100644
--- a/examples/02_ServerBenchmarking/online_llama2_70b_cnn.yaml
+++ b/examples/02_ServerBenchmarking/online_llama2_70b_cnn.yaml
@@ -31,13 +31,6 @@ settings:
   client:
     num_workers: 4
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/examples/04_GPTOSS120B_Example/gptoss_120b_example.yaml b/examples/04_GPTOSS120B_Example/gptoss_120b_example.yaml
index 8e0a7820..fb5edd33 100644
--- a/examples/04_GPTOSS120B_Example/gptoss_120b_example.yaml
+++ b/examples/04_GPTOSS120B_Example/gptoss_120b_example.yaml
@@ -30,13 +30,6 @@ settings:
     num_workers: 4
     record_worker_events: false
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:3000"
diff --git a/examples/04_GPTOSS120B_Example/sglang_gptoss_120b_example.yaml b/examples/04_GPTOSS120B_Example/sglang_gptoss_120b_example.yaml
index 5a6ad852..761d5c3b 100644
--- a/examples/04_GPTOSS120B_Example/sglang_gptoss_120b_example.yaml
+++ b/examples/04_GPTOSS120B_Example/sglang_gptoss_120b_example.yaml
@@ -52,13 +52,6 @@ settings:
     num_workers: 8
     record_worker_events: false
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:30000"
diff --git a/examples/04_GPTOSS120B_Example/vllm_gptoss_120b_example.yaml b/examples/04_GPTOSS120B_Example/vllm_gptoss_120b_example.yaml
index fe6e3e52..7cf215f9 100644
--- a/examples/04_GPTOSS120B_Example/vllm_gptoss_120b_example.yaml
+++ b/examples/04_GPTOSS120B_Example/vllm_gptoss_120b_example.yaml
@@ -55,13 +55,6 @@ settings:
     num_workers: 8
     record_worker_events: false
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/examples/05_Llama3.1-8B_Example/offline_llama3_8b_cnn.yaml b/examples/05_Llama3.1-8B_Example/offline_llama3_8b_cnn.yaml
index 2f85210d..57e105c7 100644
--- a/examples/05_Llama3.1-8B_Example/offline_llama3_8b_cnn.yaml
+++ b/examples/05_Llama3.1-8B_Example/offline_llama3_8b_cnn.yaml
@@ -40,13 +40,6 @@ settings:
   client:
     num_workers: 4 # Number of client workers
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/examples/05_Llama3.1-8B_Example/online_llama3_8b_cnn.yaml b/examples/05_Llama3.1-8B_Example/online_llama3_8b_cnn.yaml
index 0190d620..66861f2f 100644
--- a/examples/05_Llama3.1-8B_Example/online_llama3_8b_cnn.yaml
+++ b/examples/05_Llama3.1-8B_Example/online_llama3_8b_cnn.yaml
@@ -41,13 +41,6 @@ settings:
   client:
     num_workers: 4 # Number of client workers
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/examples/06_Llama2-70B_Example/online_llama2_70b_orca.yaml b/examples/06_Llama2-70B_Example/online_llama2_70b_orca.yaml
index f54d6dc2..5a7f6ce5 100644
--- a/examples/06_Llama2-70B_Example/online_llama2_70b_orca.yaml
+++ b/examples/06_Llama2-70B_Example/online_llama2_70b_orca.yaml
@@ -34,13 +34,6 @@ settings:
   client:
     num_workers: 4
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
index 394209be..95445781 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/offline_qwen3_vl_235b_a22b_shopify.yaml
@@ -42,11 +42,6 @@ settings:
     # Increase timeout for slow worker startup (spawn, imports). Default 40s may be too short.
     worker_initialization_timeout: 120
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
index 44ddb2b9..db23f163 100644
--- a/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
+++ b/examples/08_Qwen3-VL-235B-A22B_Example/online_qwen3_vl_235b_a22b_shopify.yaml
@@ -36,11 +36,6 @@ settings:
     # Increase timeout for slow worker startup (spawn, imports). Default 40s may be too short.
     worker_initialization_timeout: 120
 
-metrics:
-  collect:
-    - "latency"
-    - "ttft"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/src/inference_endpoint/config/schema.py b/src/inference_endpoint/config/schema.py
index a8fb87ac..e7bfe19e 100644
--- a/src/inference_endpoint/config/schema.py
+++ b/src/inference_endpoint/config/schema.py
@@ -417,51 +417,6 @@ class OnlineSettings(Settings):
     pass
 
 
-def _default_metrics() -> list[str]:
-    """
-    TODO: PoC only, subject to change!
-    Default metrics to collect."""
-    return ["throughput", "latency", "ttft", "tpot"]
-
-
-class Metrics(BaseModel):
-    """Metrics collection configuration.
-
-    Note: Currently uses string-based metric names for YAML simplicity.
-    Use get_metric_types() to convert to actual Metric type classes.
-    """
-
-    model_config = ConfigDict(extra="forbid", frozen=True)
-
-    collect: list[str] = Field(default_factory=_default_metrics)
-
-    def get_metric_types(self) -> list[type[metrics.Metric]]:
-        """Convert string metric names to Metric type classes.
-
-        Returns:
-            List of Metric type classes corresponding to collect list
-
-        Raises:
-            ValueError: If metric name is not recognized
-        """
-        metric_map = {
-            "throughput": metrics.Throughput,
-            "latency": metrics.QueryLatency,
-            "ttft": metrics.TTFT,
-            "tpot": metrics.TPOT,
-        }
-
-        result = []
-        for name in self.collect:
-            if name not in metric_map:
-                raise ValueError(
-                    f"Unknown metric name: {name}. Available: {list(metric_map.keys())}"
-                )
-            result.append(metric_map[name])
-
-        return result
-
-
 class EndpointConfig(BaseModel):
     """Endpoint connection configuration.
 
@@ -516,9 +471,6 @@ class BenchmarkConfig(WithUpdatesMixin, BaseModel):
         default_factory=list, description="Dataset configs"
     )
     settings: Settings = Field(default_factory=Settings)
-    metrics: Annotated[Metrics, cyclopts.Parameter(show=False)] = Field(
-        default_factory=Metrics
-    )
     endpoint_config: EndpointConfig
     report_dir: Annotated[
         Path | None,
diff --git a/src/inference_endpoint/config/templates/concurrency_template_full.yaml b/src/inference_endpoint/config/templates/concurrency_template_full.yaml
index 1e18b3bf..48f1b34f 100644
--- a/src/inference_endpoint/config/templates/concurrency_template_full.yaml
+++ b/src/inference_endpoint/config/templates/concurrency_template_full.yaml
@@ -69,12 +69,6 @@ settings:
     max_idle_time: 4.0  # Discard connections idle longer than this (seconds)
     min_required_connections: -1  # Min connections to initialize (-1=auto, 0=disabled)
     worker_gc_mode: relaxed  # Worker GC strategy | options: disabled, relaxed, system
-metrics:
-  collect:
-  - throughput
-  - latency
-  - ttft
-  - tpot
 endpoint_config:
   endpoints:  # Endpoint URL(s)
   - '<ENDPOINT_URL eg: http://localhost:8000>'
diff --git a/src/inference_endpoint/config/templates/eval_template.yaml b/src/inference_endpoint/config/templates/eval_template.yaml
index 947c3447..3213efd1 100644
--- a/src/inference_endpoint/config/templates/eval_template.yaml
+++ b/src/inference_endpoint/config/templates/eval_template.yaml
@@ -26,10 +26,6 @@ settings:
   client:
     num_workers: 4
 
-metrics:
-  collect:
-    - "accuracy"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/src/inference_endpoint/config/templates/offline_template_full.yaml b/src/inference_endpoint/config/templates/offline_template_full.yaml
index 29a661ed..7c5f43c6 100644
--- a/src/inference_endpoint/config/templates/offline_template_full.yaml
+++ b/src/inference_endpoint/config/templates/offline_template_full.yaml
@@ -69,12 +69,6 @@ settings:
     max_idle_time: 4.0  # Discard connections idle longer than this (seconds)
     min_required_connections: -1  # Min connections to initialize (-1=auto, 0=disabled)
     worker_gc_mode: relaxed  # Worker GC strategy | options: disabled, relaxed, system
-metrics:
-  collect:
-  - throughput
-  - latency
-  - ttft
-  - tpot
 endpoint_config:
   endpoints:  # Endpoint URL(s)
   - '<ENDPOINT_URL eg: http://localhost:8000>'
diff --git a/src/inference_endpoint/config/templates/online_template_full.yaml b/src/inference_endpoint/config/templates/online_template_full.yaml
index ad1a2423..6e274f8e 100644
--- a/src/inference_endpoint/config/templates/online_template_full.yaml
+++ b/src/inference_endpoint/config/templates/online_template_full.yaml
@@ -69,12 +69,6 @@ settings:
     max_idle_time: 4.0  # Discard connections idle longer than this (seconds)
     min_required_connections: -1  # Min connections to initialize (-1=auto, 0=disabled)
     worker_gc_mode: relaxed  # Worker GC strategy | options: disabled, relaxed, system
-metrics:
-  collect:
-  - throughput
-  - latency
-  - ttft
-  - tpot
 endpoint_config:
   endpoints:  # Endpoint URL(s)
   - '<ENDPOINT_URL eg: http://localhost:8000>'
diff --git a/src/inference_endpoint/config/templates/submission_template.yaml b/src/inference_endpoint/config/templates/submission_template.yaml
index df760a5f..793c047d 100644
--- a/src/inference_endpoint/config/templates/submission_template.yaml
+++ b/src/inference_endpoint/config/templates/submission_template.yaml
@@ -58,14 +58,6 @@ settings:
   client:
     num_workers: 4
 
-metrics:
-  collect:
-    - "throughput"
-    - "latency"
-    - "ttft"
-    - "tpot"
-    - "accuracy"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"
diff --git a/tests/unit/config/test_schema.py b/tests/unit/config/test_schema.py
index f6f33afa..1792e52c 100644
--- a/tests/unit/config/test_schema.py
+++ b/tests/unit/config/test_schema.py
@@ -22,7 +22,6 @@
     Dataset,
     DatasetType,
     EvalMethod,
-    Metrics,
     ModelParams,
     OSLDistribution,
     OSLDistributionType,
@@ -109,20 +108,6 @@ def test_auto_derive_name(self):
         assert ds.name == "my_data"
 
 
-class TestMetrics:
-    @pytest.mark.unit
-    def test_get_metric_types(self):
-        m = Metrics(collect=["throughput", "latency", "ttft", "tpot"])
-        types = m.get_metric_types()
-        assert len(types) == 4
-
-    @pytest.mark.unit
-    def test_unknown_metric_raises(self):
-        m = Metrics(collect=["nonexistent"])
-        with pytest.raises(ValueError, match="Unknown metric"):
-            m.get_metric_types()
-
-
 class TestBenchmarkConfig:
     @pytest.mark.unit
     def test_minimal_offline(self):
diff --git a/tests/unit/config/test_yaml_loader.py b/tests/unit/config/test_yaml_loader.py
index 37190931..716055dd 100644
--- a/tests/unit/config/test_yaml_loader.py
+++ b/tests/unit/config/test_yaml_loader.py
@@ -57,10 +57,6 @@ def test_load_valid_yaml(self, tmp_path):
       recv_buffer_size: 16777216
       send_buffer_size: 8388608
 
-metrics:
-  collect:
-    - "throughput"
-
 endpoint_config:
   endpoints:
     - "http://localhost:8000"