Skip to content

Commit 666fdfa

Browse files
Copilotlstein
andcommitted
fix: use AutoProcessor.from_pretrained to load Qwen VL processor correctly
Co-authored-by: lstein <111189+lstein@users.noreply.github.com> Agent-Logs-Url: https://github.com/lstein/InvokeAI/sessions/4d4417be-0f61-4faa-a21c-16e9ce81fec7
1 parent 2be0d8a commit 666fdfa

1 file changed

Lines changed: 9 additions & 14 deletions

File tree

invokeai/app/invocations/qwen_image_edit_text_encoder.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import torch
22
from PIL import Image as PILImage
3-
from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2VLProcessor
3+
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
44

55
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
66
from invokeai.app.invocations.fields import (
@@ -71,25 +71,20 @@ def _encode(self, context: InvocationContext, images: list[PILImage.Image]) -> t
7171

7272
messages = [{"role": "user", "content": content}]
7373

74+
# Load the full processor (image_processor + tokenizer) from the tokenizer submodel path.
75+
# Using AutoProcessor.from_pretrained ensures all components are loaded correctly
76+
# regardless of whether the model uses Qwen2VLProcessor or Qwen2_5_VLProcessor.
77+
tokenizer_config = context.models.get_config(self.qwen_vl_encoder.tokenizer)
78+
tokenizer_abs_path = context.models.get_absolute_path(tokenizer_config)
79+
processor = AutoProcessor.from_pretrained(str(tokenizer_abs_path), local_files_only=True)
80+
7481
text_encoder_info = context.models.load(self.qwen_vl_encoder.text_encoder)
75-
with (
76-
text_encoder_info.model_on_device() as (_, text_encoder),
77-
context.models.load(self.qwen_vl_encoder.tokenizer).model_on_device() as (_, tokenizer),
78-
):
82+
with text_encoder_info.model_on_device() as (_, text_encoder):
7983
device = get_effective_device(text_encoder)
8084

8185
context.util.signal_progress("Running Qwen2.5-VL text/vision encoder")
8286
assert isinstance(text_encoder, Qwen2_5_VLForConditionalGeneration)
8387

84-
# The tokenizer for Qwen2.5-VL is actually a processor that handles images+text
85-
# In the diffusers pipeline, the processor is used for this
86-
# We need to use the processor to format inputs properly
87-
if isinstance(tokenizer, Qwen2VLProcessor):
88-
processor = tokenizer
89-
else:
90-
# Fall back to creating processor from tokenizer
91-
processor = Qwen2VLProcessor(tokenizer=tokenizer, image_processor=None)
92-
9388
# Apply chat template and process inputs
9489
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
9590
inputs = processor(

0 commit comments

Comments
 (0)