From a484baa0e0b7cbb8c702cbcdd6221472c4645721 Mon Sep 17 00:00:00 2001 From: tensorrt-cicd <90828364+tensorrt-cicd@users.noreply.github.com> Date: Fri, 29 May 2026 01:21:09 -0700 Subject: [PATCH 1/2] [nvbugs/6215678][fix] Write aiperf artifacts to per-run subdir for stress test aiperf was given a single shared --output-artifact-dir, so each concurrency run overwrote the previous export and no -openai-completions-<...> subdir existed for extract_stress_test_metrics to parse, raising "No model directories found ...". Point --output-artifact-dir at a unique per-run subdir so every run is preserved and the directory-name parser matches. Gitignore the test-generated artifacts dir and remove the obsolete waivers. Signed-off-by: tensorrt-cicd <90828364+tensorrt-cicd@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> --- .gitignore | 3 +++ tests/integration/defs/stress_test/stress_test.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8d39054480f5..47e39a1b2d71 100644 --- a/.gitignore +++ b/.gitignore @@ -114,6 +114,9 @@ enroot/tensorrt_llm.devel.sqsh # MacOSX Files .DS_Store +# stress test aiperf output artifacts +tests/integration/defs/stress_test/artifacts/ + # Agent related files .claude/agent-memory/ .claude/agent-tests/perf-test-sync/report.html diff --git a/tests/integration/defs/stress_test/stress_test.py b/tests/integration/defs/stress_test/stress_test.py index 04705260ddcb..c84d288336c4 100644 --- a/tests/integration/defs/stress_test/stress_test.py +++ b/tests/integration/defs/stress_test/stress_test.py @@ -964,7 +964,9 @@ def create_aiperf_command(model_name, "--concurrency", str(concurrency), "--output-artifact-dir", - ARTIFACTS_DIR, + os.path.join( + ARTIFACTS_DIR, + f"{model_name}-openai-completions-concurrency{concurrency}"), # "--verbose", ] From 0eb3bb704accc3a622dea312e32126d71d14e2ae Mon Sep 17 00:00:00 2001 From: tensorrt-cicd <90828364+tensorrt-cicd@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:22:44 -0700 Subject: [PATCH 2/2] [nvbugs/6215678][chore] Remove stale waiver after fix Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> --- tests/integration/test_lists/waives.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 7d4d45cb8022..ca860df96d87 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -358,8 +358,6 @@ perf/test_perf_sanity.py::test_e2e[disagg_upload-gen_only-gb300_kimi-k25-thinkin perf/test_visual_gen_perf_sanity.py::test_visual_gen_e2e[vg_upload-ltx2_blackwell-ltx2_2stage_bf16_i2v_cfg2_ulysses4_compile_on] SKIP (https://nvbugs/6294413) perf/test_visual_gen_perf_sanity.py::test_visual_gen_e2e[vg_upload-ltx2_blackwell-ltx2_2stage_bf16_t2v_cfg2_ulysses4_compile_on] SKIP (https://nvbugs/6294413) perf/test_visual_gen_perf_sanity.py::test_visual_gen_e2e[vg_upload-ltx2_blackwell-ltx2_nvfp4_i2v_cfg2_ulysses4_compile_on] SKIP (https://nvbugs/6294413) -stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-pytorch-stress-test] SKIP (https://nvbugs/6215678) -stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test] SKIP (https://nvbugs/6215678) test_doc.py::test_url_validity SKIP (https://nvbugs/6215684) test_e2e.py::test_draft_token_tree_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B] SKIP (https://nvbugs/5989907) test_e2e.py::test_draft_token_tree_quickstart_advanced_eagle3_depth_1_tree[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B] SKIP (https://nvbugs/5989907)