Update example files and README for QuantLLM v2.1

codewithdark-git · codewithdark-git · commit ac649886c975 · 2026-04-29T18:11:47.000+05:00
diff --git a/examples/01_quickstart.py b/examples/01_quickstart.py
@@ -1,5 +1,5 @@
 """
-QuantLLM v2.0 - Quick Start Example
+QuantLLM v2.1 - Quick Start Example
 
 The simplest way to use QuantLLM.
 """
diff --git a/examples/02_gguf_export.py b/examples/02_gguf_export.py
@@ -1,18 +1,22 @@
 """
-QuantLLM v2.0 - GGUF Export Example
+QuantLLM v2.1 - GGUF Export Example
 
 Export models to GGUF format for use with llama.cpp, Ollama, LM Studio.
 No external dependencies required!
 """
 
-from quantllm import turbo, list_quant_types
+from quantllm import turbo, GGUF_QUANT_TYPES, QUANT_RECOMMENDATIONS
 
 # ============================================
 # Show Available Quantization Types
 # ============================================
 print("📦 Available quantization types:\n")
-for name, desc in list_quant_types().items():
-    print(f"  {name:12} - {desc}")
+for qt in GGUF_QUANT_TYPES:
+    print(f"  {qt}")
+
+print("\n📦 Recommended quantization types:\n")
+for use_case, qt in QUANT_RECOMMENDATIONS.items():
+    print(f"  {use_case:12} → {qt}")
 
 # ============================================
 # Load Model
@@ -24,44 +28,20 @@
 # Export to GGUF
 # ============================================
 
-# Option 1: Quick export (default q4_0)
-print("\n🚀 Exporting to GGUF (q4_0)...")
-model.export("gguf", "tinyllama-q4.gguf")
-
-# Option 2: High quality (q8_0)
-print("\n🚀 Exporting to GGUF (q8_0)...")
-model.export("gguf", "tinyllama-q8.gguf", quantization="q8_0")
-
-# Option 3: Half precision (f16)
-print("\n🚀 Exporting to GGUF (f16)...")
-model.export("gguf", "tinyllama-f16.gguf", quantization="f16")
-
-# ============================================
-# Using convert_to_gguf Directly
-# ============================================
-from quantllm import convert_to_gguf
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-print("\n🔧 Using convert_to_gguf directly...")
+# Option 1: Quick export (default Q4_K_M)
+print("\n🚀 Exporting to GGUF (Q4_K_M)...")
+model.export("gguf", "tinyllama-q4.gguf", quantization="Q4_K_M")
 
-# Load with transformers
-hf_model = AutoModelForCausalLM.from_pretrained(
-    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    torch_dtype="auto"
-)
-tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+# Option 2: High quality (Q8_0)
+print("\n🚀 Exporting to GGUF (Q8_0)...")
+model.export("gguf", "tinyllama-q8.gguf", quantization="Q8_0")
 
-# Convert
-convert_to_gguf(
-    model=hf_model,
-    tokenizer=tokenizer,
-    output_path="tinyllama-direct.gguf",
-    quant_type="q4_0",
-    verbose=True
-)
+# Option 3: Half precision (F16)
+print("\n🚀 Exporting to GGUF (F16)...")
+model.export("gguf", "tinyllama-f16.gguf", quantization="F16")
 
 print("\n✅ All exports complete!")
 print("\nUse these files with:")
-print("  - llama.cpp: ./main -m tinyllama-q4.gguf")
+print("  - llama.cpp: ./llama-cli -m tinyllama-q4.gguf -p 'Hello!'")
 print("  - Ollama: ollama create mymodel -f Modelfile")
 print("  - LM Studio: Import the .gguf file")
diff --git a/examples/03_finetuning.py b/examples/03_finetuning.py
@@ -1,5 +1,5 @@
 """
-QuantLLM v2.0 - Fine-tuning Example
+QuantLLM v2.1 - Fine-tuning Example
 
 Fine-tune a quantized model using LoRA.
 """
diff --git a/examples/04_hub_push.py b/examples/04_hub_push.py
@@ -1,54 +1,45 @@
 """
-QuantLLM v2.0 - Push to HuggingFace Hub
+QuantLLM v2.1 - Push to HuggingFace Hub
 
-Push your models to HuggingFace Hub.
+Push your models to HuggingFace Hub with auto-generated model cards.
 """
 
-from quantllm import turbo, HubManager
+from quantllm import turbo
 
 # ============================================
 # 1. Load and Prepare Model
 # ============================================
 print("📦 Loading model...")
-model = turbo("TinyLlama/TinyLlama-1.1B-Chat-v1.0", bits=4)
-
-# ============================================
-# 2. Save Locally First
-# ============================================
-print("\n💾 Saving model locally...")
-model.export("safetensors", "./my_quantized_model/")
+model = turbo(
+    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    bits=4,
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # ============================================
-# 3. Push to Hub
+# 2. Push to Hub (GGUF format)
 # ============================================
-print("\n🚀 Pushing to HuggingFace Hub...")
-
-hub = HubManager(token="YOUR_HF_TOKEN")  # Or set HF_TOKEN env var
+print("\n🚀 Pushing GGUF to HuggingFace Hub...")
 
-# Push the saved model
-hub.push_model(
-    model_path="./my_quantized_model/",
-    repo_name="my-quantized-tinyllama",
-    private=False,
-    commit_message="Upload quantized model via QuantLLM"
+# Uses shared config — format and quantization from config={}
+model.push(
+    "YOUR_USERNAME/my-quantized-tinyllama-gguf",
+    license="apache-2.0",
+    # token="hf_..."  # Or set HF_TOKEN env var
 )
 
-print("\n✅ Model pushed to Hub!")
-print("   Visit: https://huggingface.co/YOUR_USERNAME/my-quantized-tinyllama")
+print("\n✅ GGUF pushed to Hub!")
 
 # ============================================
-# Alternative: Push GGUF File
+# 3. Push SafeTensors format
 # ============================================
-print("\n📦 Creating and pushing GGUF...")
-
-# Export to GGUF
-model.export("gguf", "tinyllama-q4.gguf")
+print("\n📦 Pushing SafeTensors to HuggingFace Hub...")
 
-# Push GGUF file
-hub.push_model(
-    model_path="tinyllama-q4.gguf",
-    repo_name="my-gguf-model",
-    private=False
+model.push(
+    "YOUR_USERNAME/my-quantized-tinyllama",
+    format="safetensors",
+    license="apache-2.0",
 )
 
-print("\n✅ GGUF pushed to Hub!")
+print("\n✅ SafeTensors pushed to Hub!")
+print("   Visit: https://huggingface.co/YOUR_USERNAME/my-quantized-tinyllama")
diff --git a/examples/README.md b/examples/README.md
@@ -1,4 +1,4 @@
-# QuantLLM v2.0 Examples
+# QuantLLM v2.1 Examples
 
 Simple examples for the new TurboModel API.
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# QuantLLM v2.0 Examples`
	`1`	`+# QuantLLM v2.1 Examples`
`2`	`2`
`3`	`3`	`Simple examples for the new TurboModel API.`
`4`	`4`