Skip to content

Commit 1aa2f52

Browse files
fix: address review feedback and refresh docs for v2.1 prerelease
Agent-Logs-Url: https://github.com/codewithdark-git/QuantLLM/sessions/4f6f0bc8-c954-444c-afa4-6244c9cf4865 Co-authored-by: codewithdark-git <144595403+codewithdark-git@users.noreply.github.com>
1 parent 037d537 commit 1aa2f52

26 files changed

Lines changed: 151 additions & 74 deletions

README.md

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<div align="center">
22
<img src="docs/images/1.png" alt="QuantLLM Logo" />
33

4-
# 🚀 QuantLLM v2.0
4+
# 🚀 QuantLLM v2.1 (pre-release)
55

66
**The Ultra-Fast LLM Quantization & Export Library**
77

@@ -52,9 +52,12 @@ model = AutoModelForCausalLM.from_pretrained(
5252
```python
5353
from quantllm import turbo
5454

55-
model = turbo("meta-llama/Llama-3-8B") # Auto-quantizes
55+
model = turbo(
56+
"meta-llama/Llama-3-8B",
57+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
58+
) # Auto-quantizes
5659
model.generate("Hello!") # Generate text
57-
model.export("gguf", quantization="Q4_K_M") # Export to GGUF
60+
model.export() # Export to GGUF with shared config
5861
```
5962

6063
---
@@ -77,14 +80,17 @@ pip install "quantllm[full] @ git+https://github.com/codewithdark-git/QuantLLM.g
7780
from quantllm import turbo
7881

7982
# Load with automatic optimization
80-
model = turbo("meta-llama/Llama-3.2-3B")
83+
model = turbo(
84+
"meta-llama/Llama-3.2-3B",
85+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
86+
)
8187

8288
# Generate text
8389
response = model.generate("Explain quantum computing simply")
8490
print(response)
8591

8692
# Export to GGUF
87-
model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
93+
model.export("gguf", "model.Q4_K_M.gguf")
8894
```
8995

9096
**QuantLLM automatically:**
@@ -136,7 +142,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
136142

137143
```
138144
╔════════════════════════════════════════════════════════════╗
139-
║ 🚀 QuantLLM v2.0.0
145+
║ 🚀 QuantLLM v2.1.0rc1
140146
║ Ultra-fast LLM Quantization & Export ║
141147
║ ✓ GGUF ✓ ONNX ✓ MLX ✓ SafeTensors ║
142148
╚════════════════════════════════════════════════════════════╝
@@ -151,7 +157,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
151157
Auto-generates model cards with YAML frontmatter, usage examples, and "Use this model" button:
152158

153159
```python
154-
model.push("user/my-model", format="gguf", quantization="Q4_K_M")
160+
model.push("user/my-model")
155161
```
156162

157163
---
@@ -198,7 +204,10 @@ model.export("safetensors", "./model-hf/")
198204
```python
199205
from quantllm import turbo
200206

201-
model = turbo("meta-llama/Llama-3.2-3B")
207+
model = turbo(
208+
"meta-llama/Llama-3.2-3B",
209+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
210+
)
202211

203212
# Simple generation
204213
response = model.generate(
@@ -270,8 +279,6 @@ model = turbo("meta-llama/Llama-3.2-3B")
270279
# Push with auto-generated model card
271280
model.push(
272281
"your-username/my-model",
273-
format="gguf",
274-
quantization="Q4_K_M",
275282
license="apache-2.0"
276283
)
277284
```

docs/api/gguf.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@ Export models to GGUF format for llama.cpp, Ollama, and LM Studio.
1010
from quantllm import turbo, convert_to_gguf, quantize_gguf
1111

1212
# Method 1: Via TurboModel
13-
model = turbo("meta-llama/Llama-3.2-3B")
14-
model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
13+
model = turbo(
14+
"meta-llama/Llama-3.2-3B",
15+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
16+
)
17+
model.export("gguf", "model.Q4_K_M.gguf")
1518

1619
# Method 2: Direct conversion
1720
convert_to_gguf("meta-llama/Llama-3.2-3B", "model.Q4_K_M.gguf", quant_type="Q4_K_M")

docs/api/hub.md

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@ Push models to HuggingFace Hub with auto-generated model cards.
1010
from quantllm import turbo, QuantLLMHubManager
1111

1212
# Method 1: TurboModel.push() (Recommended)
13-
model = turbo("meta-llama/Llama-3.2-3B")
14-
model.push("user/my-model", format="gguf", quantization="Q4_K_M")
13+
model = turbo(
14+
"meta-llama/Llama-3.2-3B",
15+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
16+
)
17+
model.push("user/my-model")
1518

1619
# Method 2: QuantLLMHubManager (Advanced)
1720
manager = QuantLLMHubManager("user/my-model", hf_token="hf_...")
@@ -30,7 +33,7 @@ def push(
3033
self,
3134
repo_id: str,
3235
token: Optional[str] = None,
33-
format: str = "safetensors",
36+
format: Optional[str] = None,
3437
quantization: Optional[str] = None,
3538
license: str = "apache-2.0",
3639
commit_message: str = "Upload model via QuantLLM",
@@ -44,7 +47,7 @@ def push(
4447
|-----------|------|---------|-------------|
4548
| `repo_id` | str | required | HuggingFace repo ID (user/model) |
4649
| `token` | str | None | HF token (or use HF_TOKEN env) |
47-
| `format` | str | "safetensors" | Export format |
50+
| `format` | str | None | Export format (uses `config["push_format"]` when omitted) |
4851
| `quantization` | str | None | Quantization type |
4952
| `license` | str | "apache-2.0" | License type |
5053

@@ -62,13 +65,14 @@ def push(
6265
```python
6366
from quantllm import turbo
6467

65-
model = turbo("meta-llama/Llama-3.2-3B")
68+
model = turbo(
69+
"meta-llama/Llama-3.2-3B",
70+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
71+
)
6672

6773
# Push as GGUF
6874
model.push(
69-
"your-username/llama-3.2-3b-gguf",
70-
format="gguf",
71-
quantization="Q4_K_M"
75+
"your-username/llama-3.2-3b-gguf"
7276
)
7377

7478
# Push as ONNX

docs/api/turbo.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ When `verbose=True` (default), you'll see:
126126

127127
```
128128
╔════════════════════════════════════════════════════════════╗
129-
║ 🚀 QuantLLM v2.0.0
129+
║ 🚀 QuantLLM v2.1.0rc1
130130
╚════════════════════════════════════════════════════════════╝
131131

132132
📊 Loading: meta-llama/Llama-3.2-3B

docs/api/turbomodel.md

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -232,23 +232,27 @@ Export the model to various formats.
232232
```python
233233
def export(
234234
self,
235-
format: str,
236-
output_path: str,
235+
format: Optional[str] = None,
236+
output_path: Optional[str] = None,
237237
quantization: Optional[str] = None,
238238
**kwargs
239239
) -> str
240240
```
241241

242242
| Parameter | Type | Description |
243243
|-----------|------|-------------|
244-
| `format` | str | "gguf", "onnx", "mlx", "safetensors" |
245-
| `output_path` | str | Output file or directory |
244+
| `format` | str | "gguf", "onnx", "mlx", "safetensors" (optional, uses shared config) |
245+
| `output_path` | str | Output file or directory (optional) |
246246
| `quantization` | str | Quantization type (format-specific) |
247247

248248
**Examples:**
249249
```python
250250
# GGUF
251-
model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
251+
model = turbo(
252+
"meta-llama/Llama-3.2-3B",
253+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
254+
)
255+
model.export()
252256

253257
# ONNX
254258
model.export("onnx", "./model-onnx/")
@@ -269,7 +273,7 @@ def push(
269273
self,
270274
repo_id: str,
271275
token: Optional[str] = None,
272-
format: str = "safetensors",
276+
format: Optional[str] = None,
273277
quantization: Optional[str] = None,
274278
license: str = "apache-2.0",
275279
commit_message: str = "Upload model via QuantLLM",
@@ -281,9 +285,7 @@ def push(
281285
```python
282286
# Push as GGUF
283287
model.push(
284-
"your-username/my-model",
285-
format="gguf",
286-
quantization="Q4_K_M"
288+
"your-username/my-model"
287289
)
288290

289291
# Push as MLX

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
project = 'QuantLLM'
44
copyright = '2024, Dark Coder'
55
author = 'Dark Coder'
6-
release = '2.0.0'
6+
release = '2.1.0rc1'
77

88
# Extensions
99
extensions = [
@@ -21,7 +21,7 @@
2121
# HTML output
2222
html_theme = 'sphinx_rtd_theme'
2323
html_static_path = ['_static']
24-
html_title = 'QuantLLM v2.0'
24+
html_title = 'QuantLLM v2.1'
2525
html_logo = 'images/logo.png'
2626
html_favicon = 'images/favicon.ico'
2727

docs/guide/finetuning.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,13 @@ print("Fine-tuned:", model.generate("prompt"))
193193

194194
```python
195195
# Export to GGUF
196-
model.export("gguf", "finetuned.Q4_K_M.gguf", quantization="Q4_K_M")
196+
model.export("gguf", "finetuned.Q4_K_M.gguf")
197197

198198
# Export to SafeTensors
199199
model.export("safetensors", "./finetuned-model/")
200200

201201
# Push to HuggingFace
202-
model.push("your-username/finetuned-model", format="gguf")
202+
model.push("your-username/finetuned-model")
203203
```
204204

205205
### Save and Load

docs/guide/gguf-export.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,12 @@ print(output["choices"][0]["text"])
130130
Export and push in one step:
131131

132132
```python
133+
model = turbo(
134+
"meta-llama/Llama-3.2-3B",
135+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
136+
)
133137
model.push(
134138
"your-username/my-model-gguf",
135-
format="gguf",
136-
quantization="Q4_K_M",
137139
license="apache-2.0"
138140
)
139141
```

docs/guide/hub-integration.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ The easiest way to share your model:
1111
```python
1212
from quantllm import turbo
1313

14-
model = turbo("meta-llama/Llama-3.2-3B")
14+
model = turbo(
15+
"meta-llama/Llama-3.2-3B",
16+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
17+
)
1518

1619
# Push with auto-generated model card
1720
model.push(
1821
"your-username/my-model",
19-
token="hf_...",
20-
format="gguf",
21-
quantization="Q4_K_M"
22+
token="hf_..."
2223
)
2324
```
2425

docs/index.md

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,27 @@
88

99
---
1010

11-
## Welcome to QuantLLM v2.0
11+
## Welcome to QuantLLM v2.1 (pre-release)
1212

1313
QuantLLM makes working with large language models simple. Load any model, quantize it automatically, fine-tune with your data, and export to any format — all with just a few lines of code.
1414

1515
```python
1616
from quantllm import turbo
1717

18-
# Load with automatic 4-bit quantization
19-
model = turbo("meta-llama/Llama-3.2-3B")
18+
# Load with shared export/push defaults
19+
model = turbo(
20+
"meta-llama/Llama-3.2-3B",
21+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
22+
)
2023

2124
# Generate text
2225
print(model.generate("Explain quantum computing"))
2326

2427
# Export to GGUF for Ollama/llama.cpp
25-
model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
28+
model.export()
2629

2730
# Push to HuggingFace with auto-generated model card
28-
model.push("username/my-model", format="gguf", quantization="Q4_K_M")
31+
model.push("username/my-model")
2932
```
3033

3134
---
@@ -89,7 +92,11 @@ model = turbo("microsoft/phi-3-mini")
8992

9093
### Export to Any Format
9194
```python
92-
model.export("gguf", "model.gguf", quantization="Q4_K_M")
95+
model = turbo(
96+
"meta-llama/Llama-3.2-3B",
97+
config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
98+
)
99+
model.export()
93100
model.export("onnx", "./model-onnx/")
94101
model.export("mlx", "./model-mlx/", quantization="4bit")
95102
```
@@ -101,7 +108,7 @@ model.finetune("training_data.json", epochs=3)
101108

102109
### Push to HuggingFace
103110
```python
104-
model.push("username/my-model", format="gguf")
111+
model.push("username/my-model")
105112
```
106113

107114
---

0 commit comments

Comments
 (0)