Fix/benchmarking#1459
Open
nayana-kumari wants to merge 2 commits into
Open
Conversation
Contributor
|
Unsigned commits detected! Please sign your commits. For instructions on how to set up GPG/SSH signing and verify your commits, please see GitHub Documentation. |
Signed-off-by: modassar rana <modassar.rana@ibm.com>
Signed-off-by: modassar rana <modassar.rana@ibm.com>
a49e9b7 to
1388b15
Compare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
(.venv) [root@ocpai5 llm-d-benchmark]# oc logs -f guidellm-7h42882i -n llmdbench
===== FIX START =====
===== FIX YQ =====
===== VERIFY ENV =====
/tmp/fixbin/yq
/usr/bin/bc
===== RUN HARNESS =====
++ date -u +%Y-%m-%dT%H:%M:%SZ
++ grep '^guidellm:' /workspace/repos.txt
++ cut '-d ' -f3
LLMDBENCH_CONTROL_WORK_DIR=/requests/guidellm-1780565700-yz3g6c_1
LLMDBENCH_DEPLOY_CURRENT_MODEL=ibm-granite/granite-3.3-8b-instruct
LLMDBENCH_DEPLOY_CURRENT_TOKENIZER=ibm-granite/granite-3.3-8b-instruct
LLMDBENCH_DEPLOY_METHODS=modelservice
LLMDBENCH_HARNESS_ARGS=--workload sanity_random.yaml
LLMDBENCH_HARNESS_GIT_BRANCH=v0.5.3
LLMDBENCH_HARNESS_GIT_REPO=https://github.com/vllm-project/guidellm.git
LLMDBENCH_HARNESS_NAME=guidellm
LLMDBENCH_HARNESS_STACK_ENDPOINT_URL=http://infra-llmdbench-inference-gateway-istio.llmdbench.svc.cluster.local:80
LLMDBENCH_HARNESS_STACK_NAME=ibm-gran-f422a27c-instruct
LLMDBENCH_HARNESS_STACK_TYPE=llm-d
LLMDBENCH_HARNESS_START=2026-06-04T09:35:04Z
LLMDBENCH_HARNESS_VERSION=v0.5.3
LLMDBENCH_MAGIC_ENVAR=harness_pod
LLMDBENCH_RUN_EXPERIMENT_ANALYZER=guidellm-analyze_results.sh
LLMDBENCH_RUN_EXPERIMENT_HARNESS=guidellm-llm-d-benchmark.sh
LLMDBENCH_RUN_EXPERIMENT_HARNESS_DIR=guidellm
LLMDBENCH_RUN_EXPERIMENT_HARNESS_LOADGEN_EC=1
LLMDBENCH_RUN_EXPERIMENT_HARNESS_MAX_TRIES=3
LLMDBENCH_RUN_EXPERIMENT_HARNESS_NAME_AUTO=1
LLMDBENCH_RUN_EXPERIMENT_HARNESS_REPORT_EC=1
LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_AUTO=1
LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME=sanity_random.yaml
LLMDBENCH_RUN_EXPERIMENT_ID=guidellm-1780565700-yz3g6c
LLMDBENCH_RUN_EXPERIMENT_LAUNCHER=1
LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR=/requests/guidellm-1780565700-yz3g6c_1
LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR_PREFIX=/requests
LLMDBENCH_RUN_WORKSPACE_DIR=/workspace
LLMDBENCH_VLLM_COMMON_INFERENCE_PORT=8000
LLMDBENCH_VLLM_COMMON_METRICS_PORT=8200
LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED=true
LLMDBENCH_VLLM_COMMON_NAMESPACE=llmdbench
LLMDBENCH_VLLM_MONITORING_METRICS_PATH=/metrics
Running harness: /usr/local/bin/guidellm-llm-d-benchmark.sh
Using experiment result dir: /requests/guidellm-1780565700-yz3g6c_1
Starting metrics collection...
Metrics collector started with PID: 93
Metrics collection logs: /requests/guidellm-1780565700-yz3g6c_1/metrics_collection.log
✔ OpenAIHTTPBackend backend validated with model
ibm-granite/granite-3.3-8b-instruct
{'target':
'http://infra-llmdbench-inference-gateway-istio.llmdbench.svc.cluster.local:80
', 'model': 'ibm-granite/granite-3.3-8b-instruct', 'timeout': 60.0, 'http2':
True, 'follow_redirects': True, 'verify': False, 'openai_paths': {'health':
'health', 'models': 'v1/models', 'text_completions': 'v1/completions',
'chat_completions': 'v1/chat/completions', 'audio_transcriptions':
'v1/audio/transcriptions', 'audio_translations': 'v1/audio/translations'},
'validate_backend': {'method': 'GET', 'url':
'http://infra-llmdbench-inference-gateway-istio.llmdbench.svc.cluster.local:80
/health'}}
✔ Processor resolved
Using model 'ibm-granite/granite-3.3-8b-instruct' as processor
✔ Request loader initialized with inf unique requests
{'data': "[{'prompt_tokens': 50, 'prompt_tokens_stdev': 10,
'prompt_tokens_min': 10, 'prompt_tokens_max': 100, 'output_tokens': 50,
'output_tokens_stdev': 10, 'output_tokens_min': 10, 'output_tokens_max':
100}]", 'data_args': '[]', 'data_samples': -1, 'preprocessors':
['GenerativeColumnMapper', 'GenerativeTextCompletionsRequestFormatter'],
'collator': 'GenerativeRequestCollator', 'sampler': 'None', 'num_workers': 1,
'random_seed': 42}
✔ Resolved transient phase configurations
Warmup: percent=None value=None mode='prefer_duration'
Cooldown: percent=None value=None mode='prefer_duration'
Rampup (Throughput/Concurrent): 0.0
✔ AsyncProfile profile resolved
{'str': "type_='constant' completed_strategies=[] constraints={'max_seconds':
30} rampup_duration=0.0 strategy_type='constant' rate=[1.0]
max_concurrency=None random_seed=42 strategy_types=['constant']", 'type':
'AsyncProfile', 'class': 'AsyncProfile', 'module':
'guidellm.benchmark.profiles', 'attributes': {'type_': 'constant',
'completed_strategies': [], 'constraints': {'max_seconds': 30},
'rampup_duration': 0.0, 'strategy_type': 'constant', 'rate': [1.0],
'max_concurrency': 'None', 'random_seed': 42}}
✔ Output formats resolved
{'json':
"output_path=PosixPath('/requests/guidellm-1780565700-yz3g6c_1/results.json')"
}
✔ Setup complete, starting benchmarks...
ℹ Run Summary Info
|===========|==========|==========|======|======|======|=======|=======|=====|=======|======|=====|
| Benchmark | Timings ||||| Input Tokens ||| Output Tokens |||
| Strategy | Start | End | Dur | Warm | Cool | Comp | Inc | Err | Comp | Inc | Err |
ℹ Text Metrics Statistics (Completed Requests)
|===========|=======|======|======|======|=======|======|======|======|=======|=======|=======|=======|
| Benchmark | Input Tokens |||| Input Words |||| Input Characters ||||
| Strategy | Per Request || Per Second || Per Request || Per Second || Per Request || Per Second ||
ℹ Request Token Statistics (Completed Requests)
|===========|======|======|======|======|======|=======|=======|=======|=========|========|
| Benchmark | Input Tok || Output Tok || Total Tok || Stream Iter || Output Tok ||
| Strategy | Per Req || Per Req || Per Req || Per Req || Per Stream Iter ||
ℹ Request Latency Statistics (Completed Requests)
|===========|========|=========|========|========|=======|=======|=======|=======|
| Benchmark | Request Latency || TTFT || ITL || TPOT ||
| Strategy | Sec || ms || ms || ms ||
ℹ Server Throughput Statistics (All Requests)
|===========|=====|======|=======|======|=======|=======|========|=======|=======|=======|
| Benchmark | Requests |||| Input Tokens || Output Tokens || Total Tokens ||
| Strategy | Per Sec || Concurrency || Per Sec || Per Sec || Per Sec ||
✔ Benchmarking complete, generated 1 benchmark(s)
… json : /requests/guidellm-1780565700-yz3g6c_1/results.json
Stopping metrics collection...
Processing collected metrics...
Metrics collection complete. Check metrics_collection.log for details.
Run metadata written to /requests/guidellm-1780565700-yz3g6c_1/run_metadata.yaml
Harness completed successfully.
Harness completed: /usr/local/bin/guidellm-llm-d-benchmark.sh
Running analysis: /usr/local/bin/guidellm-analyze_results.sh
Converting results.json to Benchmark Report v0.1
Warning: LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_CONFIG empty.
Converting results.json to Benchmark Report v0.2
Environment variable empty: LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_CONFIG
Results data conversion completed successfully.
Integrating metrics summary into benchmark report(s) v0.2...
Metrics integrated into: /requests/guidellm-1780565700-yz3g6c_1/benchmark_report_v0.2,_results.json_0.yaml
Generating metric plots...
Collecting time series data...
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_prefix_cache_hit_rate.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_external_prefix_cache_hit_rate.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_kv_cache_usage_perc.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_num_requests_running.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_num_requests_waiting.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_prefix_cache_hits_total.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_prefix_cache_queries_total.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_external_prefix_cache_hits_total.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_external_prefix_cache_queries_total.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/vllm_num_preemptions_total.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/inference_pool_average_kv_cache_utilization.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/inference_pool_average_queue_size.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/inference_pool_ready_pods.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/pod_startup_times.png
Saved plot: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs/replica_status.png
All visualizations saved to: /requests/guidellm-1780565700-yz3g6c_1/metrics/graphs