diff --git a/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py b/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py index 6cee4311beec..b272740deab4 100644 --- a/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py +++ b/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py @@ -494,6 +494,11 @@ def __init__( else: self.max_total_draft_tokens = 0 + # ADEngine skips PyTorchModelEngine.__init__, so set the spec-decode + # flags that shared PyExecutor code expects on a ModelEngine. + self.is_spec_decode = self.spec_config is not None + self.enable_spec_decode = self.is_spec_decode + # For compatibility with PyTorchModelEngine utilities self.batch_size = cache_seq_interface.info.max_batch_size