[TRTLLM-13028][doc] Add VisualGen API walkthrough example and docs page (#14685)

zhenhuaw-me · web-flow · commit c3f6d981b133 · 2026-06-02T11:19:06.000+08:00
Signed-off-by: Zhenhua Wang &lt;zhenhuaw@nvidia.com&gt;
diff --git a/docs/source/helper.py b/docs/source/helper.py
@@ -64,8 +64,18 @@ def extract_meta_info(filename: str) -> Optional[DocMeta]:
 def generate_examples():
     root_dir = Path(__file__).parent.parent.parent.resolve()
     ignore_list = {
-        '__init__.py', 'quickstart_example.py', 'quickstart_advanced.py',
-        'quickstart_multimodal.py', 'star_attention.py'
+        '__init__.py',
+        'quickstart_example.py',
+        'quickstart_advanced.py',
+        'quickstart_multimodal.py',
+        'star_attention.py',
+        # Older VisualGen example scripts without ### :title metadata; opt
+        # in by adding the metadata block and removing the entry below.
+        'visual_gen_flux.py',
+        'visual_gen_ltx2.py',
+        'visual_gen_wan_i2v.py',
+        'visual_gen_wan_t2v.py',
+        'visual_gen_mgmn_distributed.sh'
     }
     doc_dir = root_dir / "docs/source/examples"
 
@@ -95,6 +105,13 @@ def collect_script_paths(examples_subdir: str) -> list[Path]:
     ]
     serve_script_base_url = f"https://github.com/NVIDIA/TensorRT-LLM/blob/{commit_hash}/examples/serve"
 
+    # Collect source paths for VisualGen examples
+    visual_gen_script_paths = collect_script_paths("visual_gen")
+    visual_gen_doc_paths = [
+        doc_dir / f"{path.stem}.rst" for path in visual_gen_script_paths
+    ]
+    visual_gen_script_base_url = f"https://github.com/NVIDIA/TensorRT-LLM/blob/{commit_hash}/examples/visual_gen"
+
     def _get_lines_without_metadata(filename: str) -> str:
         """Get line ranges that exclude metadata lines.
         Returns a string like "5-10,15-20" for use in :lines: directive.
@@ -267,6 +284,18 @@ def write_index(metas: list[DocMeta], doc_template_path: Path,
                 example_name="Online Serving Examples",
                 section_order=[])
 
+    # Generate the toctree for VisualGen example scripts. No section_order
+    # while the example set is small; add one alongside ### :section
+    # metadata on the scripts once we have enough examples to group.
+    visual_gen_metas = write_scripts(visual_gen_script_base_url,
+                                     visual_gen_script_paths,
+                                     visual_gen_doc_paths)
+    write_index(metas=visual_gen_metas,
+                doc_template_path=doc_dir / "llm_examples_index.template.rst_",
+                doc_path=doc_dir / "visual_gen_examples.rst",
+                example_name="VisualGen Examples",
+                section_order=[])
+
 
 def extract_all_and_eval(file_path):
     ''' Extract the __all__ variable from a Python file.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -23,6 +23,7 @@ Welcome to TensorRT LLM's Documentation!
    :name: Deployment Guide
 
    examples/llm_api_examples.rst
+   examples/visual_gen_examples.rst
    examples/trtllm_serve_examples
    examples/dynamo_k8s_example.rst
    deployment-guide/index.rst
diff --git a/examples/visual_gen/api_walkthrough.py b/examples/visual_gen/api_walkthrough.py
@@ -0,0 +1,71 @@
+### :title API walkthrough
+### :order 0
+from tensorrt_llm import VisualGen, VisualGenArgs
+from tensorrt_llm.visual_gen.args import CompilationConfig
+
+
+def main():
+    # 1. List supported models registered with the pipeline registry.
+    print("\n=== Supported models ===")
+    for hf_id in VisualGen.supported_models():
+        print(f"  - {hf_id}")
+
+    # 2. Inspect default pipeline_config knobs for the chosen model. These
+    #    are per-architecture runtime knobs (e.g. Lightricks/LTX-2's
+    #    ``text_encoder_path``); Wan-AI/Wan2.1-T2V-1.3B-Diffusers registers
+    #    none, so the dict is empty.
+    pipeline_defaults = VisualGen.pipeline_config("Wan-AI/Wan2.1-T2V-1.3B-Diffusers")
+    print("\n=== Pipeline config defaults for Wan-AI/Wan2.1-T2V-1.3B-Diffusers ===")
+    print(f"  {pipeline_defaults or '(none)'}")
+
+    # 3. Build VisualGenArgs. ``pipeline_config`` carries the per-architecture
+    #    knobs from step 2 (here we just forward the registered defaults;
+    #    real callers would override entries like ``text_encoder_path``).
+    #    ``compilation_config.skip_warmup`` skips the post-load warmup pass.
+    visual_gen = VisualGen(
+        model="Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+        args=VisualGenArgs(
+            pipeline_config=pipeline_defaults,
+            compilation_config=CompilationConfig(skip_warmup=True),
+        ),
+    )
+
+    # 4. Discover model-specific ``extra_params`` accepted by the loaded
+    #    pipeline. Wan-AI/Wan2.1-T2V-1.3B-Diffusers declares none;
+    #    Wan-AI/Wan2.2-T2V-A14B-Diffusers surfaces ``guidance_scale_2`` and
+    #    ``boundary_ratio`` here.
+    specs = visual_gen.extra_param_specs
+    print("\n=== Extra param specs (extra_params keys) ===")
+    for name, spec in specs.items():
+        print(f"  - {name}: {spec}")
+    if not specs:
+        print("  (none for this model)")
+
+    # 5. Take the pipeline's resolved defaults (height/width/steps/etc.)
+    #    and override fields. ``default_params`` already pre-populates
+    #    ``params.extra_params`` with each declared spec's default, so the
+    #    override below shows how a caller would set a model-specific knob
+    #    -- no-op on Wan-AI/Wan2.1-T2V-1.3B-Diffusers, but the wiring is
+    #    the same on Wan-AI/Wan2.2-T2V-A14B-Diffusers where
+    #    ``extra_params["guidance_scale_2"]`` is honored.
+    params = visual_gen.default_params
+    # Wan requires num_frames of the form 4k+1; 1.25x the model default (81)
+    # is 101.25, so we round to the nearest valid value, 101 (= 4*25 + 1).
+    params.num_frames = 101
+    for name, spec in specs.items():
+        params.extra_params[name] = spec.default
+
+    print("\n=== Request params ===")
+    print(params.model_dump_json(indent=2))
+
+    output = visual_gen.generate(inputs="A cute cat playing piano in a sunny room", params=params)
+
+    # 6. Persist to disk. ``save`` infers the container from the file
+    #    extension (.avi/.mp4) and uses the frame_rate carried on the
+    #    output.
+    saved = output.save("api_walkthrough_output.avi")
+    print(f"\nSaved: {saved}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration/defs/examples/test_visual_gen.py b/tests/integration/defs/examples/test_visual_gen.py
@@ -1188,6 +1188,28 @@ def test_visual_gen_quickstart(_visual_gen_deps, llm_root, llm_venv):
     assert os.path.isfile(output_path), f"Quickstart did not produce output.avi at {output_path}"
 
 
+def test_visual_gen_api_walkthrough(_visual_gen_deps, llm_root, llm_venv):
+    """Run examples/visual_gen/api_walkthrough.py end-to-end."""
+    scratch_space = conftest.llm_models_root()
+    model_src = os.path.join(scratch_space, WAN_T2V_MODEL_SUBPATH)
+    if not os.path.isdir(model_src):
+        pytest.skip(
+            f"Model not found: {model_src} "
+            f"(set LLM_MODELS_ROOT or place {WAN_T2V_MODEL_SUBPATH} under scratch)"
+        )
+
+    model_dst = os.path.join(llm_venv.get_working_directory(), "Wan-AI", WAN_T2V_MODEL_SUBPATH)
+    if not os.path.islink(model_dst):
+        os.makedirs(os.path.dirname(model_dst), exist_ok=True)
+        os.symlink(model_src, model_dst, target_is_directory=True)
+
+    script_path = os.path.join(llm_root, "examples", "visual_gen", "api_walkthrough.py")
+    venv_check_call(llm_venv, [script_path])
+
+    output_path = os.path.join(llm_venv.get_working_directory(), "api_walkthrough_output.avi")
+    assert os.path.isfile(output_path), f"API walkthrough did not produce {output_path}"
+
+
 # =============================================================================
 # Core example tests — run per-model scripts from examples/visual_gen/models/
 # with shared YAML configs from examples/visual_gen/configs/.
diff --git a/tests/integration/test_lists/test-db/l0_dgx_b200.yml b/tests/integration/test_lists/test-db/l0_dgx_b200.yml
@@ -304,6 +304,7 @@ l0_dgx_b200:
   - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTEDSL-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-low_precision_combine=True-torch_compile=False]
   - accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp4_tp2pp2[torch_compile=False-enable_gemm_allreduce_fusion=False]
   - examples/test_visual_gen.py::test_visual_gen_quickstart
+  - examples/test_visual_gen.py::test_visual_gen_api_walkthrough
   - examples/test_visual_gen.py::test_wan_t2v_example
   - examples/test_visual_gen.py::test_flux1_lpips_against_golden
   - examples/test_visual_gen.py::test_flux2_lpips_against_golden
diff --git a/tests/integration/test_lists/test-db/l0_gh200.yml b/tests/integration/test_lists/test-db/l0_gh200.yml
@@ -24,6 +24,7 @@ l0_gh200:
   - unittest/llmapi/test_llm_quant.py
   - llmapi/test_llm_examples.py::test_llmapi_quickstart_atexit
   - examples/test_visual_gen.py::test_visual_gen_quickstart
+  - examples/test_visual_gen.py::test_visual_gen_api_walkthrough
   - unittest/test_model_runner_cpp.py
   - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype
   - examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] TIMEOUT (90)
diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml
@@ -272,6 +272,7 @@ l0_h100:
   - test_e2e.py::test_mistral_large_hidden_vocab_size
   - llmapi/test_llm_examples.py::test_llmapi_quickstart_atexit
   - examples/test_visual_gen.py::test_visual_gen_quickstart
+  - examples/test_visual_gen.py::test_visual_gen_api_walkthrough
   - unittest/trt/attention/test_gpt_attention_IFB.py
   - accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_fp8_prequantized
   - accuracy/test_cli_flow.py::TestLlama2_7B::test_fp8
diff --git a/tests/integration/test_lists/test-db/l0_l40s.yml b/tests/integration/test_lists/test-db/l0_l40s.yml
@@ -64,6 +64,7 @@ l0_l40s:
   - examples/test_nemotron_nas.py::test_nemotron_nas_summary_1gpu[DeciLM-7B]
   - llmapi/test_llm_examples.py::test_llmapi_quickstart
   - examples/test_visual_gen.py::test_visual_gen_quickstart
+  - examples/test_visual_gen.py::test_visual_gen_api_walkthrough
   - llmapi/test_llm_examples.py::test_llmapi_example_inference
   - llmapi/test_llm_examples.py::test_llmapi_example_inference_async
   - llmapi/test_llm_examples.py::test_llmapi_example_inference_async_streaming