Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions src/diffusers/modular_pipelines/ernie_image/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,23 @@
import json

import torch
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoTokenizer, Mistral3Model

from ...configuration_utils import FrozenDict
from ...guiders import ClassifierFreeGuidance
from ...utils import logging
from ...utils.import_utils import is_transformers_version
from ..modular_pipeline import ModularPipelineBlocks, PipelineState
from ..modular_pipeline_utils import ComponentSpec, InputParam, OutputParam
from .modular_pipeline import ErnieImageModularPipeline


if is_transformers_version("<", "5.0.0"):
raise ImportError("`ErnieImageModularPipeline` requires `transformers>=5.0.0` for `Ministral3ForCausalLM`.")

from transformers import Ministral3ForCausalLM # noqa: E402


logger = logging.get_logger(__name__) # pylint: disable=invalid-name


Expand All @@ -38,7 +45,7 @@ def description(self) -> str:
@property
def expected_components(self) -> list[ComponentSpec]:
return [
ComponentSpec("pe", AutoModelForCausalLM),
ComponentSpec("pe", Ministral3ForCausalLM),
ComponentSpec("pe_tokenizer", AutoTokenizer),
]

Expand Down Expand Up @@ -83,7 +90,7 @@ def intermediate_outputs(self) -> list[OutputParam]:

@staticmethod
def _enhance_prompt(
pe: AutoModelForCausalLM,
pe: Ministral3ForCausalLM,
pe_tokenizer: AutoTokenizer,
prompt: str,
device: torch.device,
Expand Down Expand Up @@ -160,7 +167,7 @@ def description(self) -> str:
@property
def expected_components(self) -> list[ComponentSpec]:
return [
ComponentSpec("text_encoder", AutoModel),
ComponentSpec("text_encoder", Mistral3Model),
ComponentSpec("tokenizer", AutoTokenizer),
ComponentSpec(
"guider",
Expand Down Expand Up @@ -200,7 +207,7 @@ def intermediate_outputs(self) -> list[OutputParam]:

@staticmethod
def _encode(
text_encoder: AutoModel,
text_encoder: Mistral3Model,
tokenizer: AutoTokenizer,
prompt: list[str],
device: torch.device,
Expand Down
13 changes: 10 additions & 3 deletions src/diffusers/pipelines/ernie_image/pipeline_ernie_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,25 @@
from typing import Callable, List, Optional, Union

import torch
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoTokenizer, Mistral3Model

from ...image_processor import VaeImageProcessor
from ...loaders import ErnieImageLoraLoaderMixin
from ...models import AutoencoderKLFlux2
from ...models.transformers import ErnieImageTransformer2DModel
from ...pipelines.pipeline_utils import DiffusionPipeline
from ...schedulers import FlowMatchEulerDiscreteScheduler
from ...utils.import_utils import is_transformers_version
from ...utils.torch_utils import randn_tensor
from .pipeline_output import ErnieImagePipelineOutput


if is_transformers_version("<", "5.0.0"):
raise ImportError("`ErnieImagePipeline` requires `transformers>=5.0.0` for `Ministral3ForCausalLM`.")

from transformers import Ministral3ForCausalLM # noqa: E402


class ErnieImagePipeline(DiffusionPipeline, ErnieImageLoraLoaderMixin):
"""
Pipeline for text-to-image generation using ErnieImageTransformer2DModel.
Expand All @@ -52,10 +59,10 @@ def __init__(
self,
transformer: ErnieImageTransformer2DModel,
vae: AutoencoderKLFlux2,
text_encoder: AutoModel,
text_encoder: Mistral3Model,
tokenizer: AutoTokenizer,
scheduler: FlowMatchEulerDiscreteScheduler,
pe: Optional[AutoModelForCausalLM] = None,
pe: Optional[Ministral3ForCausalLM] = None,
pe_tokenizer: Optional[AutoTokenizer] = None,
):
super().__init__()
Expand Down
Loading