diff --git a/angelslim/compressor/cache/teacache.py b/angelslim/compressor/cache/teacache.py index 0ac13640..2bc78d6a 100644 --- a/angelslim/compressor/cache/teacache.py +++ b/angelslim/compressor/cache/teacache.py @@ -18,15 +18,9 @@ import numpy as np import torch -from diffusers.models.modeling_outputs import Transformer2DModelOutput -from diffusers.utils import ( - USE_PEFT_BACKEND, - is_torch_version, - scale_lora_layers, - unscale_lora_layers, -) from ...utils import print_info +from ...utils.lazy_imports import Transformer2DModelOutput, diffusers class TeaCache: @@ -130,9 +124,9 @@ def flux_teacache_forward( else: lora_scale = 1.0 - if USE_PEFT_BACKEND: + if diffusers.utils.USE_PEFT_BACKEND: # weight the lora layers by setting `lora_scale` for each PEFT layer - scale_lora_layers(self, lora_scale) + diffusers.utils.scale_lora_layers(self, lora_scale) else: if ( joint_attention_kwargs is not None @@ -236,7 +230,7 @@ def custom_forward(*inputs): ckpt_kwargs: Dict[str, Any] = ( {"use_reentrant": False} - if is_torch_version(">=", "1.11.0") + if diffusers.utils.is_torch_version(">=", "1.11.0") else {} ) encoder_hidden_states, hidden_states = ( @@ -294,7 +288,7 @@ def custom_forward(*inputs): ckpt_kwargs: Dict[str, Any] = ( {"use_reentrant": False} - if is_torch_version(">=", "1.11.0") + if diffusers.utils.is_torch_version(">=", "1.11.0") else {} ) hidden_states = torch.utils.checkpoint.checkpoint( @@ -342,7 +336,9 @@ def custom_forward(*inputs): return custom_forward ckpt_kwargs: Dict[str, Any] = ( - {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {} + {"use_reentrant": False} + if diffusers.utils.is_torch_version(">=", "1.11.0") + else {} ) encoder_hidden_states, hidden_states = ( torch.utils.checkpoint.checkpoint( @@ -398,7 +394,9 @@ def custom_forward(*inputs): return custom_forward ckpt_kwargs: Dict[str, Any] = ( - {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {} + {"use_reentrant": False} + if diffusers.utils.is_torch_version(">=", "1.11.0") + else {} ) hidden_states = torch.utils.checkpoint.checkpoint( create_custom_forward(block), @@ -432,9 +430,9 @@ def custom_forward(*inputs): hidden_states = self.norm_out(hidden_states, temb) output = self.proj_out(hidden_states) - if USE_PEFT_BACKEND: + if diffusers.utils.USE_PEFT_BACKEND: # remove `lora_scale` from each PEFT layer - unscale_lora_layers(self, lora_scale) + diffusers.utils.unscale_lora_layers(self, lora_scale) if not return_dict: return (output,) diff --git a/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py b/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py index dc1fe93c..ec10c50e 100644 --- a/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py +++ b/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py @@ -20,10 +20,10 @@ from typing import Any, Dict, Optional import numpy as np -import ray -from fastchat.llm_judge.common import load_questions from transformers import AutoTokenizer +from angelslim.utils.lazy_imports import fastchat, ray + from .generate_baseline_answer import get_model_answers as get_baseline_answers from .generate_eagle_answer import get_model_answers as get_eagle_answers @@ -146,7 +146,7 @@ def _run_eagle_benchmark(self): """Run Eagle speculative decoding benchmark""" args = self._create_args_namespace("eagle") - questions = load_questions( + questions = fastchat.llm_judge.common.load_questions( self._get_question_file_path(), self.config.question_begin, self.config.question_end, @@ -186,7 +186,7 @@ def _run_baseline_benchmark(self): """Run baseline benchmark""" args = self._create_args_namespace("baseline") - questions = load_questions( + questions = fastchat.llm_judge.common.load_questions( self._get_question_file_path(), self.config.question_begin, self.config.question_end, diff --git a/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py b/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py index dc4a6d70..6b2210ea 100644 --- a/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py +++ b/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py @@ -20,13 +20,12 @@ from typing import Any, Dict, List import numpy as np -import ray import shortuuid import torch -from fastchat.llm_judge.common import load_questions from tqdm import tqdm from angelslim.compressor.speculative.inference.models import Eagle3Model +from angelslim.utils.lazy_imports import fastchat, ray SYSTEM_PROMPT = { "role": "system", @@ -231,7 +230,7 @@ def get_model_answers( def run_evaluation(config: EvaluationConfig, args: argparse.Namespace) -> None: """Run the evaluation with optional distributed processing""" - questions = load_questions( + questions = fastchat.llm_judge.common.load_questions( config.question_file, args.question_begin, args.question_end ) diff --git a/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py b/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py index 736e35cb..9451b742 100644 --- a/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py +++ b/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py @@ -20,13 +20,12 @@ from typing import Any, Dict, List import numpy as np -import ray import shortuuid import torch -from fastchat.llm_judge.common import load_questions from tqdm import tqdm from angelslim.compressor.speculative.inference.models import Eagle3Model +from angelslim.utils.lazy_imports import fastchat, ray SYSTEM_PROMPT = { "role": "system", @@ -237,7 +236,7 @@ def get_model_answers( def run_evaluation(config: EvaluationConfig, args: argparse.Namespace) -> None: """Run the evaluation with optional distributed processing""" - questions = load_questions( + questions = fastchat.llm_judge.common.load_questions( config.question_file, args.question_begin, args.question_end ) diff --git a/angelslim/data/multimodal_dataset.py b/angelslim/data/multimodal_dataset.py index c843c0bb..bf05c5e0 100644 --- a/angelslim/data/multimodal_dataset.py +++ b/angelslim/data/multimodal_dataset.py @@ -18,10 +18,10 @@ from datasets import load_dataset from PIL import Image -from qwen_vl_utils import process_vision_info from tqdm import tqdm from transformers import ProcessorMixin +from ..utils.lazy_imports import qwen_vl_utils from .base_dataset import BaseDataset @@ -108,7 +108,7 @@ def _process_and_append(self, messages: List[Dict]): ) # Extract vision info - image_inputs, video_inputs = process_vision_info(messages) + image_inputs, video_inputs = qwen_vl_utils.process_vision_info(messages) # Process inputs inputs = self.processor( diff --git a/angelslim/models/diffusion/flux.py b/angelslim/models/diffusion/flux.py index 8e63007c..aaa2c2fa 100644 --- a/angelslim/models/diffusion/flux.py +++ b/angelslim/models/diffusion/flux.py @@ -18,15 +18,18 @@ import numpy as np import torch import torch.nn as nn -from diffusers import FluxPipeline -from diffusers.pipelines.flux.pipeline_flux import calculate_shift, retrieve_timesteps -from diffusers.pipelines.flux.pipeline_output import FluxPipelineOutput from safetensors.torch import load_file from tqdm import tqdm from ...compressor import CompressorFactory from ...compressor.quant.core import PTQDiffusionSave, PTQOnlyScaleSave, QuantConfig from ...compressor.quant.modules import QLinear +from ...utils.lazy_imports import ( + FluxPipelineOutput, + calculate_shift, + diffusers, + retrieve_timesteps, +) from ...utils.utils import find_layers, find_parent_layer_and_sub_name from ..base_model import BaseDiffusionModel from ..model_factory import SlimModelFactory @@ -82,7 +85,7 @@ def from_pretrained( [comp_name], self, slim_config=slim_config ) else: - self.model = FluxPipeline.from_pretrained( + self.model = diffusers.FluxPipeline.from_pretrained( model_path, torch_dtype=torch_dtype, cache_dir=cache_dir, @@ -199,7 +202,7 @@ def model_forward(self, dataloader, **kwargs): ).images[0] -class FluxSlimPipeline(FluxPipeline): +class FluxSlimPipeline(diffusers.FluxPipeline): def __init__( self, scheduler, diff --git a/angelslim/utils/__init__.py b/angelslim/utils/__init__.py index e8f87898..d9cb0ff7 100644 --- a/angelslim/utils/__init__.py +++ b/angelslim/utils/__init__.py @@ -14,6 +14,7 @@ from .config_parser import SlimConfigParser, parse_json_full_config # noqa: F401 from .default_compress_config import * # noqa: F401 F403 +from .lazy_imports import * # noqa: F401 F403 from .utils import common_prefix # noqa: F401 from .utils import find_layers # noqa: F401 from .utils import find_parent_layer_and_sub_name # noqa: F401 diff --git a/angelslim/utils/lazy_imports.py b/angelslim/utils/lazy_imports.py new file mode 100644 index 00000000..ee2d6bdf --- /dev/null +++ b/angelslim/utils/lazy_imports.py @@ -0,0 +1,174 @@ +# Copyright 2025 Tencent Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +from typing import Any + +""" +Central lazy import module for AngelSlim toolkit. +This module provides lazy loading functionality for optional dependencies, +delaying actual imports until the packages are first used. +""" + + +class LazyModule: + """ + A proxy class for lazy module loading. + + This class delays the actual import of a module until its attributes are + first accessed, which helps reduce startup time and memory usage when + dealing with optional dependencies that may not be used in every execution. + + Attributes: + _module_name (str): The full name of the module to import + _extra_group (str): The extra dependency group required for this module + _module (ModuleType): The actual imported module (None until first access) + + Example: + >>> ray = LazyModule('ray', 'speculative') + >>> # The actual import happens here on first attribute access + >>> ray.init() + """ + + def __init__(self, module_name: str, extra_group: str = None): + """ + Initialize a lazy module wrapper. + + Args: + module_name: Full name of the module to import (e.g., 'diffusers') + extra_group: Name of the extra dependency group required for this module + """ + self._module_name = module_name + self._extra_group = extra_group + self._module = None + + def __getattr__(self, name: str) -> Any: + """ + Delegate attribute access to the actual module. + + On first access, this method imports the target module and then + delegates the attribute lookup to the actual module. + + Args: + name: Name of the attribute to access + + Returns: + The requested attribute from the target module + + Raises: + ImportError: If the module cannot be imported and an + extra_group is specified, provides installation instructions + """ + if self._module is None: + try: + self._module = importlib.import_module(self._module_name) + except ImportError as e: + if self._extra_group: + raise ImportError( + f"Module '{self._module_name}' requires " + f"additional dependencies. Please install: " + f"pip install 'angelslim[{self._extra_group}]'" + ) from e + raise + return getattr(self._module, name) + + +class LazyAttribute: + """ + A proxy class for lazy loading of specific module attributes. + + This class delays the import of a module and retrieval of a specific attribute + until the attribute is first accessed. Useful for optimizing imports of + large modules when only specific components are needed. + + Attributes: + _module_name (str): The name of the module containing the target attribute + _attribute_name (str): The name of the specific attribute to load + _extra_group (str): The extra dependency group required for this attribute + _attribute (Any): The actual attribute value (None until first access) + """ + + def __init__(self, module_name: str, attribute_name: str, extra_group: str = None): + """ + Initialize a lazy attribute wrapper. + + Args: + module_name: Name of the module containing the target attribute + attribute_name: Name of the specific attribute to load lazily + extra_group: Name of the extra dependency group required + """ + self._module_name = module_name + self._attribute_name = attribute_name + self._extra_group = extra_group + self._attribute = None + + def __getattr__(self, name: str) -> Any: + """ + Delegate attribute access to the target attribute. + + On first access, this method imports the module and retrieves the + target attribute, then delegates subsequent attribute access to it. + + Args: + name: Name of the attribute to access + + Returns: + The requested attribute from the target attribute + + Raises: + ImportError: If the module cannot be imported and an extra_group + is specified, provides installation instructions + """ + if self._attribute is None: + try: + module = importlib.import_module(self._module_name) + self._attribute = getattr(module, self._attribute_name) + except ImportError as e: + if self._extra_group: + raise ImportError( + f"Attribute '{self._attribute_name}' requires " + f"additional dependencies. Please install: " + f"pip install 'angelslim[{self._extra_group}]'" + ) from e + raise + return getattr(self._attribute, name) + + +# Create global lazy loading objects for optional dependencies + +# --- Speculative decoding related lazy imports --- +ray = LazyModule("ray", "speculative") +fastchat = LazyModule("fastchat", "speculative") +openai = LazyModule("openai", "speculative") +anthropic = LazyModule("anthropic", "speculative") +jsonschema_specifications = LazyModule("jsonschema_specifications", "speculative") +referencing = LazyModule("referencing", "speculative") + +# --- Diffusion related lazy imports --- +diffusers = LazyModule("diffusers", "diffusion") +Transformer2DModelOutput = LazyAttribute( + "diffusers.models.modeling_outputs", "Transformer2DModelOutput", "diffusion" +) +retrieve_timesteps = LazyAttribute( + "diffusers.pipelines.flux.pipeline_flux", "retrieve_timesteps", "diffusion" +) +calculate_shift = LazyAttribute( + "diffusers.pipelines.flux.pipeline_flux", "calculate_shift", "diffusion" +) +FluxPipelineOutput = LazyAttribute( + "diffusers.pipelines.flux.pipeline_output", "FluxPipelineOutput", "diffusion" +) + +# --- VLM related lazy imports --- +qwen_vl_utils = LazyModule("qwen_vl_utils", "vlm") diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index f9d13e18..00000000 --- a/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -torch>=2.6.0 -torchvision>=0.21.0 -transformers>=4.52.0 -safetensors>=0.5.3 -diffusers>=0.34.0 -numpy -tqdm -pyarrow -threadpoolctl -qwen_vl_utils==0.0.11 -tiktoken -triton -datasets -fschat -openai -anthropic -ray -referencing -jsonschema_specifications \ No newline at end of file diff --git a/requirements/requirements.txt b/requirements/requirements.txt new file mode 100644 index 00000000..42b2b8d2 --- /dev/null +++ b/requirements/requirements.txt @@ -0,0 +1,11 @@ +torch>=2.6.0 +torchvision>=0.21.0 +transformers>=4.56.1 +safetensors>=0.5.3 +numpy +tqdm +triton +pyarrow +tiktoken +datasets +threadpoolctl \ No newline at end of file diff --git a/requirements/requirements_diffusion.txt b/requirements/requirements_diffusion.txt new file mode 100644 index 00000000..c0951486 --- /dev/null +++ b/requirements/requirements_diffusion.txt @@ -0,0 +1 @@ +diffusers>=0.34.0 \ No newline at end of file diff --git a/requirements/requirements_speculative.txt b/requirements/requirements_speculative.txt new file mode 100644 index 00000000..f3c75853 --- /dev/null +++ b/requirements/requirements_speculative.txt @@ -0,0 +1,6 @@ +fschat +openai +anthropic +ray +referencing +jsonschema_specifications \ No newline at end of file diff --git a/requirements/requirements_vlm.txt b/requirements/requirements_vlm.txt new file mode 100644 index 00000000..87d1012c --- /dev/null +++ b/requirements/requirements_vlm.txt @@ -0,0 +1 @@ +qwen_vl_utils==0.0.11 \ No newline at end of file diff --git a/setup.py b/setup.py index 059b370b..30a97710 100644 --- a/setup.py +++ b/setup.py @@ -26,9 +26,9 @@ TOOLS_VERSION = tag_list[-1] -def get_requirements(): - """from requirements.txt load dependency package""" - with open("requirements.txt") as f: +def get_requirements(filename): + """Load dependency packages from specified requirements file""" + with open(filename) as f: return [ line.strip() for line in f.readlines() @@ -43,7 +43,23 @@ def get_requirements(): long_description="Tools for llm model compression", url="https://github.com/Tencent/AngelSlim", author="Tencent Author", - install_requires=get_requirements(), + # Core dependencies: installed by default + install_requires=get_requirements("requirements/requirements.txt"), + # Define optional dependency groups + extras_require={ + # Install all optional features: pip install angelslim[all] + "all": ( + get_requirements("requirements/requirements_speculative.txt") + + get_requirements("requirements/requirements_diffusion.txt") + + get_requirements("requirements/requirements_vlm.txt") + ), + # Install speculative sampling functionality: pip install angelslim[speculative] + "speculative": get_requirements("requirements/requirements_speculative.txt"), + # Install Diffusion functionality: pip install angelslim[diffusion] + "diffusion": get_requirements("requirements/requirements_diffusion.txt"), + # Install Diffusion functionality: pip install angelslim[diffusion] + "vlm": get_requirements("requirements/requirements_vlm.txt"), + }, packages=find_packages(), python_requires=">=3.0", # PyPI package information.