[None][test] Revert redundant AD accuracy helper

govind-ramnarayan · govind-ramnarayan · commit fb3b06bfe007 · 2026-06-17T16:14:14.000-07:00
Signed-off-by: Govind Ramnarayan &lt;105831528+govind-ramnarayan@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/accuracy/test_llm_api_autodeploy.py b/tests/integration/defs/accuracy/test_llm_api_autodeploy.py
@@ -168,14 +168,6 @@ def low_memory_overrides(config,
     return config
 
 
-def disable_piecewise_cuda_graph_for_speculation(config: dict) -> dict:
-    """Disable piecewise CUDA graph capture for speculative AutoDeploy tests."""
-    config.setdefault("transforms",
-                      {}).setdefault("compile_model",
-                                     {})["piecewise_enabled"] = False
-    return config
-
-
 def reduced_model_kwargs(num_hidden_layers: int,
                          model_path: str | None = None) -> dict:
     """Return model_kwargs to cap a model at ``num_hidden_layers`` layers.
@@ -378,7 +370,9 @@ def get_default_kwargs(self, attn_backend="flashinfer"):
                 "torch_dtype": "bfloat16"
             },
         }
-        disable_piecewise_cuda_graph_for_speculation(kwargs)
+        kwargs.setdefault("transforms",
+                          {}).setdefault("compile_model",
+                                         {})["piecewise_enabled"] = False
 
         return kwargs