Regenerate template files

nv-alicheng · nv-alicheng · commit d44be1073359 · 2026-04-22T13:02:15.000-07:00
diff --git a/src/inference_endpoint/config/templates/concurrency_template.yaml b/src/inference_endpoint/config/templates/concurrency_template.yaml
@@ -17,5 +17,5 @@ settings:
     type: concurrency  # Load pattern type | options: max_throughput, poisson, concurrency, burst, step
     target_concurrency: 32  # Concurrent requests
 endpoint_config:
-  endpoints:  # Endpoint URL(s)
-  - 'http://localhost:8000'
+  endpoints:  # Endpoint URL(s). Must include scheme, e.g. 'http://host:port'.
+  - '<ENDPOINT_URL eg: http://localhost:8000>'
diff --git a/src/inference_endpoint/config/templates/concurrency_template_full.yaml b/src/inference_endpoint/config/templates/concurrency_template_full.yaml
@@ -69,8 +69,8 @@ settings:
     min_required_connections: -1  # Min connections to initialize (-1=auto, 0=disabled)
     worker_gc_mode: relaxed  # Worker GC strategy | options: disabled, relaxed, system
 endpoint_config:
-  endpoints:  # Endpoint URL(s)
-    - 'http://localhost:8000'
+  endpoints:  # Endpoint URL(s). Must include scheme, e.g. 'http://host:port'.
+  - '<ENDPOINT_URL eg: http://localhost:8000>'
   api_key: null  # API key
   api_type: openai  # API type: openai or sglang | options: openai, sglang
 report_dir: null  # Report output directory
diff --git a/src/inference_endpoint/config/templates/offline_template.yaml b/src/inference_endpoint/config/templates/offline_template.yaml
@@ -14,5 +14,5 @@ settings:
     max_duration_ms: 0  # Maximum test duration in ms (0 for no limit)
     n_samples_to_issue: null  # Sample count override
 endpoint_config:
-  endpoints:  # Endpoint URL(s)
-  - 'http://localhost:8000'
+  endpoints:  # Endpoint URL(s). Must include scheme, e.g. 'http://host:port'.
+  - '<ENDPOINT_URL eg: http://localhost:8000>'
diff --git a/src/inference_endpoint/config/templates/offline_template_full.yaml b/src/inference_endpoint/config/templates/offline_template_full.yaml
@@ -69,8 +69,8 @@ settings:
     min_required_connections: -1  # Min connections to initialize (-1=auto, 0=disabled)
     worker_gc_mode: relaxed  # Worker GC strategy | options: disabled, relaxed, system
 endpoint_config:
-  endpoints:  # Endpoint URL(s)
-    - 'http://localhost:8000'
+  endpoints:  # Endpoint URL(s). Must include scheme, e.g. 'http://host:port'.
+  - '<ENDPOINT_URL eg: http://localhost:8000>'
   api_key: null  # API key
   api_type: openai  # API type: openai or sglang | options: openai, sglang
 report_dir: null  # Report output directory
diff --git a/src/inference_endpoint/config/templates/online_template.yaml b/src/inference_endpoint/config/templates/online_template.yaml
@@ -17,5 +17,5 @@ settings:
     type: poisson  # Load pattern type | options: max_throughput, poisson, concurrency, burst, step
     target_qps: 10.0  # Target QPS
 endpoint_config:
-  endpoints:  # Endpoint URL(s)
-  - 'http://localhost:8000'
+  endpoints:  # Endpoint URL(s). Must include scheme, e.g. 'http://host:port'.
+  - '<ENDPOINT_URL eg: http://localhost:8000>'
diff --git a/src/inference_endpoint/config/templates/online_template_full.yaml b/src/inference_endpoint/config/templates/online_template_full.yaml
@@ -69,8 +69,8 @@ settings:
     min_required_connections: -1  # Min connections to initialize (-1=auto, 0=disabled)
     worker_gc_mode: relaxed  # Worker GC strategy | options: disabled, relaxed, system
 endpoint_config:
-  endpoints:  # Endpoint URL(s)
-    - 'http://localhost:8000'
+  endpoints:  # Endpoint URL(s). Must include scheme, e.g. 'http://host:port'.
+  - '<ENDPOINT_URL eg: http://localhost:8000>'
   api_key: null  # API key
   api_type: openai  # API type: openai or sglang | options: openai, sglang
 report_dir: null  # Report output directory