[None][perf] executor: avoid deepcopy of prompt_token_ids on enqueue (NVIDIA#14895)

lancelly · GitLab CI Bot · commit efeac271282e · 2026-06-24T03:01:07.000Z
Signed-off-by: Lanyu Liao &lt;lancelly@users.noreply.github.com&gt;
Co-authored-by: Lanyu Liao &lt;lancelly@users.noreply.github.com&gt;
Signed-off-by: GitLab CI Bot &lt;gitlab-ci@nvidia.com&gt;
diff --git a/tensorrt_llm/executor/base_worker.py b/tensorrt_llm/executor/base_worker.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import copy
 import datetime
 import enum
 import gc
@@ -449,7 +448,7 @@ def _enqueue_request(self,
         else:
             lora_config = None
 
-        prompt_token_ids = copy.deepcopy(request.prompt_token_ids)
+        prompt_token_ids = list(request.prompt_token_ids)
         prompt_tuning_config = None
         if request.prompt_adapter_request is not None:
             self._load_prompt_adapter(request.prompt_adapter_request)