Skip to content

Commit ac64988

Browse files
Update example files and README for QuantLLM v2.1
1 parent 9b6e6cc commit ac64988

5 files changed

Lines changed: 45 additions & 74 deletions

File tree

examples/01_quickstart.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
QuantLLM v2.0 - Quick Start Example
2+
QuantLLM v2.1 - Quick Start Example
33
44
The simplest way to use QuantLLM.
55
"""

examples/02_gguf_export.py

Lines changed: 18 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
"""
2-
QuantLLM v2.0 - GGUF Export Example
2+
QuantLLM v2.1 - GGUF Export Example
33
44
Export models to GGUF format for use with llama.cpp, Ollama, LM Studio.
55
No external dependencies required!
66
"""
77

8-
from quantllm import turbo, list_quant_types
8+
from quantllm import turbo, GGUF_QUANT_TYPES, QUANT_RECOMMENDATIONS
99

1010
# ============================================
1111
# Show Available Quantization Types
1212
# ============================================
1313
print("📦 Available quantization types:\n")
14-
for name, desc in list_quant_types().items():
15-
print(f" {name:12} - {desc}")
14+
for qt in GGUF_QUANT_TYPES:
15+
print(f" {qt}")
16+
17+
print("\n📦 Recommended quantization types:\n")
18+
for use_case, qt in QUANT_RECOMMENDATIONS.items():
19+
print(f" {use_case:12}{qt}")
1620

1721
# ============================================
1822
# Load Model
@@ -24,44 +28,20 @@
2428
# Export to GGUF
2529
# ============================================
2630

27-
# Option 1: Quick export (default q4_0)
28-
print("\n🚀 Exporting to GGUF (q4_0)...")
29-
model.export("gguf", "tinyllama-q4.gguf")
30-
31-
# Option 2: High quality (q8_0)
32-
print("\n🚀 Exporting to GGUF (q8_0)...")
33-
model.export("gguf", "tinyllama-q8.gguf", quantization="q8_0")
34-
35-
# Option 3: Half precision (f16)
36-
print("\n🚀 Exporting to GGUF (f16)...")
37-
model.export("gguf", "tinyllama-f16.gguf", quantization="f16")
38-
39-
# ============================================
40-
# Using convert_to_gguf Directly
41-
# ============================================
42-
from quantllm import convert_to_gguf
43-
from transformers import AutoModelForCausalLM, AutoTokenizer
44-
45-
print("\n🔧 Using convert_to_gguf directly...")
31+
# Option 1: Quick export (default Q4_K_M)
32+
print("\n🚀 Exporting to GGUF (Q4_K_M)...")
33+
model.export("gguf", "tinyllama-q4.gguf", quantization="Q4_K_M")
4634

47-
# Load with transformers
48-
hf_model = AutoModelForCausalLM.from_pretrained(
49-
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
50-
torch_dtype="auto"
51-
)
52-
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
35+
# Option 2: High quality (Q8_0)
36+
print("\n🚀 Exporting to GGUF (Q8_0)...")
37+
model.export("gguf", "tinyllama-q8.gguf", quantization="Q8_0")
5338

54-
# Convert
55-
convert_to_gguf(
56-
model=hf_model,
57-
tokenizer=tokenizer,
58-
output_path="tinyllama-direct.gguf",
59-
quant_type="q4_0",
60-
verbose=True
61-
)
39+
# Option 3: Half precision (F16)
40+
print("\n🚀 Exporting to GGUF (F16)...")
41+
model.export("gguf", "tinyllama-f16.gguf", quantization="F16")
6242

6343
print("\n✅ All exports complete!")
6444
print("\nUse these files with:")
65-
print(" - llama.cpp: ./main -m tinyllama-q4.gguf")
45+
print(" - llama.cpp: ./llama-cli -m tinyllama-q4.gguf -p 'Hello!'")
6646
print(" - Ollama: ollama create mymodel -f Modelfile")
6747
print(" - LM Studio: Import the .gguf file")

examples/03_finetuning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
QuantLLM v2.0 - Fine-tuning Example
2+
QuantLLM v2.1 - Fine-tuning Example
33
44
Fine-tune a quantized model using LoRA.
55
"""

examples/04_hub_push.py

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,45 @@
11
"""
2-
QuantLLM v2.0 - Push to HuggingFace Hub
2+
QuantLLM v2.1 - Push to HuggingFace Hub
33
4-
Push your models to HuggingFace Hub.
4+
Push your models to HuggingFace Hub with auto-generated model cards.
55
"""
66

7-
from quantllm import turbo, HubManager
7+
from quantllm import turbo
88

99
# ============================================
1010
# 1. Load and Prepare Model
1111
# ============================================
1212
print("📦 Loading model...")
13-
model = turbo("TinyLlama/TinyLlama-1.1B-Chat-v1.0", bits=4)
14-
15-
# ============================================
16-
# 2. Save Locally First
17-
# ============================================
18-
print("\n💾 Saving model locally...")
19-
model.export("safetensors", "./my_quantized_model/")
13+
model = turbo(
14+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
15+
bits=4,
16+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
17+
)
2018

2119
# ============================================
22-
# 3. Push to Hub
20+
# 2. Push to Hub (GGUF format)
2321
# ============================================
24-
print("\n🚀 Pushing to HuggingFace Hub...")
25-
26-
hub = HubManager(token="YOUR_HF_TOKEN") # Or set HF_TOKEN env var
22+
print("\n🚀 Pushing GGUF to HuggingFace Hub...")
2723

28-
# Push the saved model
29-
hub.push_model(
30-
model_path="./my_quantized_model/",
31-
repo_name="my-quantized-tinyllama",
32-
private=False,
33-
commit_message="Upload quantized model via QuantLLM"
24+
# Uses shared config — format and quantization from config={}
25+
model.push(
26+
"YOUR_USERNAME/my-quantized-tinyllama-gguf",
27+
license="apache-2.0",
28+
# token="hf_..." # Or set HF_TOKEN env var
3429
)
3530

36-
print("\n✅ Model pushed to Hub!")
37-
print(" Visit: https://huggingface.co/YOUR_USERNAME/my-quantized-tinyllama")
31+
print("\n✅ GGUF pushed to Hub!")
3832

3933
# ============================================
40-
# Alternative: Push GGUF File
34+
# 3. Push SafeTensors format
4135
# ============================================
42-
print("\n📦 Creating and pushing GGUF...")
43-
44-
# Export to GGUF
45-
model.export("gguf", "tinyllama-q4.gguf")
36+
print("\n📦 Pushing SafeTensors to HuggingFace Hub...")
4637

47-
# Push GGUF file
48-
hub.push_model(
49-
model_path="tinyllama-q4.gguf",
50-
repo_name="my-gguf-model",
51-
private=False
38+
model.push(
39+
"YOUR_USERNAME/my-quantized-tinyllama",
40+
format="safetensors",
41+
license="apache-2.0",
5242
)
5343

54-
print("\n✅ GGUF pushed to Hub!")
44+
print("\n✅ SafeTensors pushed to Hub!")
45+
print(" Visit: https://huggingface.co/YOUR_USERNAME/my-quantized-tinyllama")

examples/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# QuantLLM v2.0 Examples
1+
# QuantLLM v2.1 Examples
22

33
Simple examples for the new TurboModel API.
44

0 commit comments

Comments
 (0)