QuantLLM/quantllm/core/__init__.py at c752dfa35494b537f6c18de7782203b9acba7df3 · codewithdark-git/QuantLLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
QuantLLM Core Module

Contains the high-performance turbo API for easy model loading,
quantization, fine-tuning, and export.
"""

from .hardware import HardwareProfiler
from .smart_config import SmartConfig
from .model_analyzer import ModelAnalyzer
from .turbo_model import TurboModel, turbo, register_architecture
from .compilation import (
    compile_model,
    compile_for_inference,
    compile_for_training,
    compile_for_max_speed,
    is_compile_supported,
    CompiledModelWrapper,
)
from .flash_attention import (
    flash_attention,
    is_flash_attention_available,
    enable_flash_attention_for_model,
    FlashAttentionWrapper,
)
from .memory import (
    MemoryManager,
    DynamicOffloader,
    GradientCheckpointManager,
    CPUOffloadOptimizer,
    setup_memory_efficient_training,
)
from .training import (
    AutoBatchSizeFinder,
    LoRAAutoConfig,
    TrainingConfig,
    TrainingCallbacks,
    auto_configure_training,
    load_training_data,
)
from .export import (
    UniversalExporter,
    ExportFormat,
    export_model,
)

__all__ = [
    # Main API
    "HardwareProfiler",
    "SmartConfig",
    "ModelAnalyzer",
    "TurboModel",
    "turbo",
    "register_architecture",
    # Compilation
    "compile_model",
    "compile_for_inference",
    "compile_for_training",
    "compile_for_max_speed",
    "is_compile_supported",
    "CompiledModelWrapper",
    # Flash Attention
    "flash_attention",
    "is_flash_attention_available",
    "enable_flash_attention_for_model",
    "FlashAttentionWrapper",
    # Memory Optimization
    "MemoryManager",
    "DynamicOffloader",
    "GradientCheckpointManager",
    "CPUOffloadOptimizer",
    "setup_memory_efficient_training",
    # Training
    "AutoBatchSizeFinder",
    "LoRAAutoConfig",
    "TrainingConfig",
    "TrainingCallbacks",
    "auto_configure_training",
    "load_training_data",
    # Export
    "UniversalExporter",
    "ExportFormat",
    "export_model",
]