diff --git a/angelslim/compressor/cache/teacache.py b/angelslim/compressor/cache/teacache.py
index 0ac13640..2bc78d6a 100644
--- a/angelslim/compressor/cache/teacache.py
+++ b/angelslim/compressor/cache/teacache.py
@@ -18,15 +18,9 @@
 
 import numpy as np
 import torch
-from diffusers.models.modeling_outputs import Transformer2DModelOutput
-from diffusers.utils import (
-    USE_PEFT_BACKEND,
-    is_torch_version,
-    scale_lora_layers,
-    unscale_lora_layers,
-)
 
 from ...utils import print_info
+from ...utils.lazy_imports import Transformer2DModelOutput, diffusers
 
 
 class TeaCache:
@@ -130,9 +124,9 @@ def flux_teacache_forward(
     else:
         lora_scale = 1.0
 
-    if USE_PEFT_BACKEND:
+    if diffusers.utils.USE_PEFT_BACKEND:
         # weight the lora layers by setting `lora_scale` for each PEFT layer
-        scale_lora_layers(self, lora_scale)
+        diffusers.utils.scale_lora_layers(self, lora_scale)
     else:
         if (
             joint_attention_kwargs is not None
@@ -236,7 +230,7 @@ def custom_forward(*inputs):
 
                     ckpt_kwargs: Dict[str, Any] = (
                         {"use_reentrant": False}
-                        if is_torch_version(">=", "1.11.0")
+                        if diffusers.utils.is_torch_version(">=", "1.11.0")
                         else {}
                     )
                     encoder_hidden_states, hidden_states = (
@@ -294,7 +288,7 @@ def custom_forward(*inputs):
 
                     ckpt_kwargs: Dict[str, Any] = (
                         {"use_reentrant": False}
-                        if is_torch_version(">=", "1.11.0")
+                        if diffusers.utils.is_torch_version(">=", "1.11.0")
                         else {}
                     )
                     hidden_states = torch.utils.checkpoint.checkpoint(
@@ -342,7 +336,9 @@ def custom_forward(*inputs):
                     return custom_forward
 
                 ckpt_kwargs: Dict[str, Any] = (
-                    {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+                    {"use_reentrant": False}
+                    if diffusers.utils.is_torch_version(">=", "1.11.0")
+                    else {}
                 )
                 encoder_hidden_states, hidden_states = (
                     torch.utils.checkpoint.checkpoint(
@@ -398,7 +394,9 @@ def custom_forward(*inputs):
                     return custom_forward
 
                 ckpt_kwargs: Dict[str, Any] = (
-                    {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+                    {"use_reentrant": False}
+                    if diffusers.utils.is_torch_version(">=", "1.11.0")
+                    else {}
                 )
                 hidden_states = torch.utils.checkpoint.checkpoint(
                     create_custom_forward(block),
@@ -432,9 +430,9 @@ def custom_forward(*inputs):
     hidden_states = self.norm_out(hidden_states, temb)
     output = self.proj_out(hidden_states)
 
-    if USE_PEFT_BACKEND:
+    if diffusers.utils.USE_PEFT_BACKEND:
         # remove `lora_scale` from each PEFT layer
-        unscale_lora_layers(self, lora_scale)
+        diffusers.utils.unscale_lora_layers(self, lora_scale)
 
     if not return_dict:
         return (output,)
diff --git a/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py b/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py
index dc1fe93c..ec10c50e 100644
--- a/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py
+++ b/angelslim/compressor/speculative/benchmark/pytorch/benchmark_engine.py
@@ -20,10 +20,10 @@
 from typing import Any, Dict, Optional
 
 import numpy as np
-import ray
-from fastchat.llm_judge.common import load_questions
 from transformers import AutoTokenizer
 
+from angelslim.utils.lazy_imports import fastchat, ray
+
 from .generate_baseline_answer import get_model_answers as get_baseline_answers
 from .generate_eagle_answer import get_model_answers as get_eagle_answers
 
@@ -146,7 +146,7 @@ def _run_eagle_benchmark(self):
         """Run Eagle speculative decoding benchmark"""
         args = self._create_args_namespace("eagle")
 
-        questions = load_questions(
+        questions = fastchat.llm_judge.common.load_questions(
             self._get_question_file_path(),
             self.config.question_begin,
             self.config.question_end,
@@ -186,7 +186,7 @@ def _run_baseline_benchmark(self):
         """Run baseline benchmark"""
         args = self._create_args_namespace("baseline")
 
-        questions = load_questions(
+        questions = fastchat.llm_judge.common.load_questions(
             self._get_question_file_path(),
             self.config.question_begin,
             self.config.question_end,
diff --git a/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py b/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py
index dc4a6d70..6b2210ea 100644
--- a/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py
+++ b/angelslim/compressor/speculative/benchmark/pytorch/generate_baseline_answer.py
@@ -20,13 +20,12 @@
 from typing import Any, Dict, List
 
 import numpy as np
-import ray
 import shortuuid
 import torch
-from fastchat.llm_judge.common import load_questions
 from tqdm import tqdm
 
 from angelslim.compressor.speculative.inference.models import Eagle3Model
+from angelslim.utils.lazy_imports import fastchat, ray
 
 SYSTEM_PROMPT = {
     "role": "system",
@@ -231,7 +230,7 @@ def get_model_answers(
 
 def run_evaluation(config: EvaluationConfig, args: argparse.Namespace) -> None:
     """Run the evaluation with optional distributed processing"""
-    questions = load_questions(
+    questions = fastchat.llm_judge.common.load_questions(
         config.question_file, args.question_begin, args.question_end
     )
 
diff --git a/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py b/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py
index 736e35cb..9451b742 100644
--- a/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py
+++ b/angelslim/compressor/speculative/benchmark/pytorch/generate_eagle_answer.py
@@ -20,13 +20,12 @@
 from typing import Any, Dict, List
 
 import numpy as np
-import ray
 import shortuuid
 import torch
-from fastchat.llm_judge.common import load_questions
 from tqdm import tqdm
 
 from angelslim.compressor.speculative.inference.models import Eagle3Model
+from angelslim.utils.lazy_imports import fastchat, ray
 
 SYSTEM_PROMPT = {
     "role": "system",
@@ -237,7 +236,7 @@ def get_model_answers(
 
 def run_evaluation(config: EvaluationConfig, args: argparse.Namespace) -> None:
     """Run the evaluation with optional distributed processing"""
-    questions = load_questions(
+    questions = fastchat.llm_judge.common.load_questions(
         config.question_file, args.question_begin, args.question_end
     )
 
diff --git a/angelslim/data/multimodal_dataset.py b/angelslim/data/multimodal_dataset.py
index c843c0bb..bf05c5e0 100644
--- a/angelslim/data/multimodal_dataset.py
+++ b/angelslim/data/multimodal_dataset.py
@@ -18,10 +18,10 @@
 
 from datasets import load_dataset
 from PIL import Image
-from qwen_vl_utils import process_vision_info
 from tqdm import tqdm
 from transformers import ProcessorMixin
 
+from ..utils.lazy_imports import qwen_vl_utils
 from .base_dataset import BaseDataset
 
 
@@ -108,7 +108,7 @@ def _process_and_append(self, messages: List[Dict]):
         )
 
         # Extract vision info
-        image_inputs, video_inputs = process_vision_info(messages)
+        image_inputs, video_inputs = qwen_vl_utils.process_vision_info(messages)
 
         # Process inputs
         inputs = self.processor(
diff --git a/angelslim/models/diffusion/flux.py b/angelslim/models/diffusion/flux.py
index 8e63007c..aaa2c2fa 100644
--- a/angelslim/models/diffusion/flux.py
+++ b/angelslim/models/diffusion/flux.py
@@ -18,15 +18,18 @@
 import numpy as np
 import torch
 import torch.nn as nn
-from diffusers import FluxPipeline
-from diffusers.pipelines.flux.pipeline_flux import calculate_shift, retrieve_timesteps
-from diffusers.pipelines.flux.pipeline_output import FluxPipelineOutput
 from safetensors.torch import load_file
 from tqdm import tqdm
 
 from ...compressor import CompressorFactory
 from ...compressor.quant.core import PTQDiffusionSave, PTQOnlyScaleSave, QuantConfig
 from ...compressor.quant.modules import QLinear
+from ...utils.lazy_imports import (
+    FluxPipelineOutput,
+    calculate_shift,
+    diffusers,
+    retrieve_timesteps,
+)
 from ...utils.utils import find_layers, find_parent_layer_and_sub_name
 from ..base_model import BaseDiffusionModel
 from ..model_factory import SlimModelFactory
@@ -82,7 +85,7 @@ def from_pretrained(
                         [comp_name], self, slim_config=slim_config
                     )
         else:
-            self.model = FluxPipeline.from_pretrained(
+            self.model = diffusers.FluxPipeline.from_pretrained(
                 model_path,
                 torch_dtype=torch_dtype,
                 cache_dir=cache_dir,
@@ -199,7 +202,7 @@ def model_forward(self, dataloader, **kwargs):
                 ).images[0]
 
 
-class FluxSlimPipeline(FluxPipeline):
+class FluxSlimPipeline(diffusers.FluxPipeline):
     def __init__(
         self,
         scheduler,
diff --git a/angelslim/utils/__init__.py b/angelslim/utils/__init__.py
index e8f87898..d9cb0ff7 100644
--- a/angelslim/utils/__init__.py
+++ b/angelslim/utils/__init__.py
@@ -14,6 +14,7 @@
 
 from .config_parser import SlimConfigParser, parse_json_full_config  # noqa: F401
 from .default_compress_config import *  # noqa: F401 F403
+from .lazy_imports import *  # noqa: F401 F403
 from .utils import common_prefix  # noqa: F401
 from .utils import find_layers  # noqa: F401
 from .utils import find_parent_layer_and_sub_name  # noqa: F401
diff --git a/angelslim/utils/lazy_imports.py b/angelslim/utils/lazy_imports.py
new file mode 100644
index 00000000..ee2d6bdf
--- /dev/null
+++ b/angelslim/utils/lazy_imports.py
@@ -0,0 +1,174 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+from typing import Any
+
+"""
+Central lazy import module for AngelSlim toolkit.
+This module provides lazy loading functionality for optional dependencies,
+delaying actual imports until the packages are first used.
+"""
+
+
+class LazyModule:
+    """
+    A proxy class for lazy module loading.
+
+    This class delays the actual import of a module until its attributes are
+    first accessed, which helps reduce startup time and memory usage when
+    dealing with optional dependencies that may not be used in every execution.
+
+    Attributes:
+        _module_name (str): The full name of the module to import
+        _extra_group (str): The extra dependency group required for this module
+        _module (ModuleType): The actual imported module (None until first access)
+
+    Example:
+        >>> ray = LazyModule('ray', 'speculative')
+        >>> # The actual import happens here on first attribute access
+        >>> ray.init()
+    """
+
+    def __init__(self, module_name: str, extra_group: str = None):
+        """
+        Initialize a lazy module wrapper.
+
+        Args:
+            module_name: Full name of the module to import (e.g., 'diffusers')
+            extra_group: Name of the extra dependency group required for this module
+        """
+        self._module_name = module_name
+        self._extra_group = extra_group
+        self._module = None
+
+    def __getattr__(self, name: str) -> Any:
+        """
+        Delegate attribute access to the actual module.
+
+        On first access, this method imports the target module and then
+        delegates the attribute lookup to the actual module.
+
+        Args:
+            name: Name of the attribute to access
+
+        Returns:
+            The requested attribute from the target module
+
+        Raises:
+            ImportError: If the module cannot be imported and an
+                extra_group is specified, provides installation instructions
+        """
+        if self._module is None:
+            try:
+                self._module = importlib.import_module(self._module_name)
+            except ImportError as e:
+                if self._extra_group:
+                    raise ImportError(
+                        f"Module '{self._module_name}' requires "
+                        f"additional dependencies. Please install: "
+                        f"pip install 'angelslim[{self._extra_group}]'"
+                    ) from e
+                raise
+        return getattr(self._module, name)
+
+
+class LazyAttribute:
+    """
+    A proxy class for lazy loading of specific module attributes.
+
+    This class delays the import of a module and retrieval of a specific attribute
+    until the attribute is first accessed. Useful for optimizing imports of
+    large modules when only specific components are needed.
+
+    Attributes:
+        _module_name (str): The name of the module containing the target attribute
+        _attribute_name (str): The name of the specific attribute to load
+        _extra_group (str): The extra dependency group required for this attribute
+        _attribute (Any): The actual attribute value (None until first access)
+    """
+
+    def __init__(self, module_name: str, attribute_name: str, extra_group: str = None):
+        """
+        Initialize a lazy attribute wrapper.
+
+        Args:
+            module_name: Name of the module containing the target attribute
+            attribute_name: Name of the specific attribute to load lazily
+            extra_group: Name of the extra dependency group required
+        """
+        self._module_name = module_name
+        self._attribute_name = attribute_name
+        self._extra_group = extra_group
+        self._attribute = None
+
+    def __getattr__(self, name: str) -> Any:
+        """
+        Delegate attribute access to the target attribute.
+
+        On first access, this method imports the module and retrieves the
+        target attribute, then delegates subsequent attribute access to it.
+
+        Args:
+            name: Name of the attribute to access
+
+        Returns:
+            The requested attribute from the target attribute
+
+        Raises:
+            ImportError: If the module cannot be imported and an extra_group
+                is specified, provides installation instructions
+        """
+        if self._attribute is None:
+            try:
+                module = importlib.import_module(self._module_name)
+                self._attribute = getattr(module, self._attribute_name)
+            except ImportError as e:
+                if self._extra_group:
+                    raise ImportError(
+                        f"Attribute '{self._attribute_name}' requires "
+                        f"additional dependencies. Please install: "
+                        f"pip install 'angelslim[{self._extra_group}]'"
+                    ) from e
+                raise
+        return getattr(self._attribute, name)
+
+
+# Create global lazy loading objects for optional dependencies
+
+# --- Speculative decoding related lazy imports ---
+ray = LazyModule("ray", "speculative")
+fastchat = LazyModule("fastchat", "speculative")
+openai = LazyModule("openai", "speculative")
+anthropic = LazyModule("anthropic", "speculative")
+jsonschema_specifications = LazyModule("jsonschema_specifications", "speculative")
+referencing = LazyModule("referencing", "speculative")
+
+# --- Diffusion related lazy imports ---
+diffusers = LazyModule("diffusers", "diffusion")
+Transformer2DModelOutput = LazyAttribute(
+    "diffusers.models.modeling_outputs", "Transformer2DModelOutput", "diffusion"
+)
+retrieve_timesteps = LazyAttribute(
+    "diffusers.pipelines.flux.pipeline_flux", "retrieve_timesteps", "diffusion"
+)
+calculate_shift = LazyAttribute(
+    "diffusers.pipelines.flux.pipeline_flux", "calculate_shift", "diffusion"
+)
+FluxPipelineOutput = LazyAttribute(
+    "diffusers.pipelines.flux.pipeline_output", "FluxPipelineOutput", "diffusion"
+)
+
+# --- VLM related lazy imports ---
+qwen_vl_utils = LazyModule("qwen_vl_utils", "vlm")
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index f9d13e18..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-torch>=2.6.0
-torchvision>=0.21.0
-transformers>=4.52.0
-safetensors>=0.5.3
-diffusers>=0.34.0
-numpy
-tqdm
-pyarrow
-threadpoolctl
-qwen_vl_utils==0.0.11
-tiktoken
-triton
-datasets
-fschat
-openai
-anthropic
-ray
-referencing
-jsonschema_specifications
\ No newline at end of file
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
new file mode 100644
index 00000000..42b2b8d2
--- /dev/null
+++ b/requirements/requirements.txt
@@ -0,0 +1,11 @@
+torch>=2.6.0
+torchvision>=0.21.0
+transformers>=4.56.1
+safetensors>=0.5.3
+numpy
+tqdm
+triton
+pyarrow
+tiktoken
+datasets
+threadpoolctl
\ No newline at end of file
diff --git a/requirements/requirements_diffusion.txt b/requirements/requirements_diffusion.txt
new file mode 100644
index 00000000..c0951486
--- /dev/null
+++ b/requirements/requirements_diffusion.txt
@@ -0,0 +1 @@
+diffusers>=0.34.0
\ No newline at end of file
diff --git a/requirements/requirements_speculative.txt b/requirements/requirements_speculative.txt
new file mode 100644
index 00000000..f3c75853
--- /dev/null
+++ b/requirements/requirements_speculative.txt
@@ -0,0 +1,6 @@
+fschat
+openai
+anthropic
+ray
+referencing
+jsonschema_specifications
\ No newline at end of file
diff --git a/requirements/requirements_vlm.txt b/requirements/requirements_vlm.txt
new file mode 100644
index 00000000..87d1012c
--- /dev/null
+++ b/requirements/requirements_vlm.txt
@@ -0,0 +1 @@
+qwen_vl_utils==0.0.11
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 059b370b..30a97710 100644
--- a/setup.py
+++ b/setup.py
@@ -26,9 +26,9 @@
     TOOLS_VERSION = tag_list[-1]
 
 
-def get_requirements():
-    """from requirements.txt load dependency package"""
-    with open("requirements.txt") as f:
+def get_requirements(filename):
+    """Load dependency packages from specified requirements file"""
+    with open(filename) as f:
         return [
             line.strip()
             for line in f.readlines()
@@ -43,7 +43,23 @@ def get_requirements():
     long_description="Tools for llm model compression",
     url="https://github.com/Tencent/AngelSlim",
     author="Tencent Author",
-    install_requires=get_requirements(),
+    # Core dependencies: installed by default
+    install_requires=get_requirements("requirements/requirements.txt"),
+    # Define optional dependency groups
+    extras_require={
+        # Install all optional features: pip install angelslim[all]
+        "all": (
+            get_requirements("requirements/requirements_speculative.txt")
+            + get_requirements("requirements/requirements_diffusion.txt")
+            + get_requirements("requirements/requirements_vlm.txt")
+        ),
+        # Install speculative sampling functionality: pip install angelslim[speculative]
+        "speculative": get_requirements("requirements/requirements_speculative.txt"),
+        # Install Diffusion functionality: pip install angelslim[diffusion]
+        "diffusion": get_requirements("requirements/requirements_diffusion.txt"),
+        # Install Diffusion functionality: pip install angelslim[diffusion]
+        "vlm": get_requirements("requirements/requirements_vlm.txt"),
+    },
     packages=find_packages(),
     python_requires=">=3.0",
     # PyPI package information.