Skip to content

Commit fb3b06b

Browse files
[None][test] Revert redundant AD accuracy helper
Signed-off-by: Govind Ramnarayan <105831528+govind-ramnarayan@users.noreply.github.com>
1 parent e419a17 commit fb3b06b

1 file changed

Lines changed: 3 additions & 9 deletions

File tree

tests/integration/defs/accuracy/test_llm_api_autodeploy.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -168,14 +168,6 @@ def low_memory_overrides(config,
168168
return config
169169

170170

171-
def disable_piecewise_cuda_graph_for_speculation(config: dict) -> dict:
172-
"""Disable piecewise CUDA graph capture for speculative AutoDeploy tests."""
173-
config.setdefault("transforms",
174-
{}).setdefault("compile_model",
175-
{})["piecewise_enabled"] = False
176-
return config
177-
178-
179171
def reduced_model_kwargs(num_hidden_layers: int,
180172
model_path: str | None = None) -> dict:
181173
"""Return model_kwargs to cap a model at ``num_hidden_layers`` layers.
@@ -378,7 +370,9 @@ def get_default_kwargs(self, attn_backend="flashinfer"):
378370
"torch_dtype": "bfloat16"
379371
},
380372
}
381-
disable_piecewise_cuda_graph_for_speculation(kwargs)
373+
kwargs.setdefault("transforms",
374+
{}).setdefault("compile_model",
375+
{})["piecewise_enabled"] = False
382376

383377
return kwargs
384378

0 commit comments

Comments
 (0)