fix: use Qwen2_5_VLProcessor directly instead of AutoProcessor to avoid AutoConfig lookup

Copilot · lstein · Copilot · commit 8bbb06d1b160 · 2026-03-25T15:01:48.000Z
Co-authored-by: lstein <111189+lstein@users.noreply.github.com> Agent-Logs-Url: https://github.com/lstein/InvokeAI/sessions/1a071545-ea9e-42c2-8296-d0582d851478
diff --git a/invokeai/app/invocations/qwen_image_edit_text_encoder.py b/invokeai/app/invocations/qwen_image_edit_text_encoder.py
@@ -1,6 +1,6 @@
 import torch
 from PIL import Image as PILImage
-from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
+from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2_5_VLProcessor
 
 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
 from invokeai.app.invocations.fields import (
@@ -71,14 +71,14 @@ def _encode(self, context: InvocationContext, images: list[PILImage.Image]) -> t
 
         messages = [{"role": "user", "content": content}]
 
-        # Load the full processor (image_processor + tokenizer) from the tokenizer submodel path.
-        # Using AutoProcessor.from_pretrained ensures all components are loaded correctly
-        # regardless of whether the model uses Qwen2VLProcessor or Qwen2_5_VLProcessor.
-        # For diffusers models the processor config lives in the `tokenizer` subdirectory,
-        # so we append the submodel directory name to the root model path.
+        # Load the processor from the tokenizer subdirectory of the diffusers root.
+        # Qwen2_5_VLProcessor.from_pretrained is used directly instead of AutoProcessor
+        # because AutoProcessor calls AutoConfig.from_pretrained first to determine the
+        # model type, but the `tokenizer` subdirectory has no config.json with a model_type
+        # key — only tokenizer/preprocessor files live there.
         tokenizer_config = context.models.get_config(self.qwen_vl_encoder.tokenizer)
         tokenizer_abs_path = context.models.get_absolute_path(tokenizer_config) / "tokenizer"
-        processor = AutoProcessor.from_pretrained(str(tokenizer_abs_path), local_files_only=True)
+        processor = Qwen2_5_VLProcessor.from_pretrained(str(tokenizer_abs_path), local_files_only=True)
 
         text_encoder_info = context.models.load(self.qwen_vl_encoder.text_encoder)
         with text_encoder_info.model_on_device() as (_, text_encoder):