.. automodule:: quantllm.model.model
:members:
:undoc-members:
:show-inheritance:
.. automodule:: quantllm.model.lora_config
:members:
:undoc-members:
:show-inheritance:
from quantllm import Model, ModelConfig
# Configure model
config = ModelConfig(
model_name="facebook/opt-125m",
load_in_4bit=True
)
# Load model
model = Model(config)
model_instance = model.get_model()
config = ModelConfig(
model_name="facebook/opt-125m",
load_in_4bit=True,
use_lora=True
)
model = Model(config)
config = ModelConfig(
model_name="facebook/opt-125m",
cpu_offload=True
)
model = Model(config)
config = ModelConfig(
model_name="facebook/opt-125m",
load_in_4bit=True,
use_lora=True,
gradient_checkpointing=True,
bf16=True,
trust_remote_code=True
)
model = Model(config)