codewithdark-git
diff --git a/‎README.md‎
Lines changed: 17 additions & 10 deletions b/‎README.md‎
Lines changed: 17 additions & 10 deletions
diff --git a/‎docs/api/gguf.md‎
Lines changed: 5 additions & 2 deletions b/‎docs/api/gguf.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎docs/api/hub.md‎
Lines changed: 12 additions & 8 deletions b/‎docs/api/hub.md‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎docs/api/turbo.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/api/turbo.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/api/turbomodel.md‎
Lines changed: 11 additions & 9 deletions b/‎docs/api/turbomodel.md‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 2 additions & 2 deletions b/‎docs/conf.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/guide/finetuning.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/guide/finetuning.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/guide/gguf-export.md‎
Lines changed: 4 additions & 2 deletions b/‎docs/guide/gguf-export.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/guide/hub-integration.md‎
Lines changed: 5 additions & 4 deletions b/‎docs/guide/hub-integration.md‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎docs/index.md‎
Lines changed: 14 additions & 7 deletions b/‎docs/index.md‎
Lines changed: 14 additions & 7 deletions
@@ -1,7 +1,7 @@
 <div align="center">
   <img src="docs/images/1.png" alt="QuantLLM Logo" />
 
-  # 🚀 QuantLLM v2.0
+  # 🚀 QuantLLM v2.1 (pre-release)
 
   **The Ultra-Fast LLM Quantization & Export Library**
 
@@ -52,9 +52,12 @@ model = AutoModelForCausalLM.from_pretrained(
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3-8B")     # Auto-quantizes
+model = turbo(
+    "meta-llama/Llama-3-8B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)  # Auto-quantizes
 model.generate("Hello!")                    # Generate text
-model.export("gguf", quantization="Q4_K_M") # Export to GGUF
+model.export()                              # Export to GGUF with shared config
 ```
 
 ---
@@ -77,14 +80,17 @@ pip install "quantllm[full] @ git+https://github.com/codewithdark-git/QuantLLM.g
 from quantllm import turbo
 
 # Load with automatic optimization
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Generate text
 response = model.generate("Explain quantum computing simply")
 print(response)
 
 # Export to GGUF
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export("gguf", "model.Q4_K_M.gguf")
 ```
 
 **QuantLLM automatically:**
@@ -136,7 +142,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
 
 ```
 ╔════════════════════════════════════════════════════════════╗
-║   🚀 QuantLLM v2.0.0                                       ║
+║   🚀 QuantLLM v2.1.0rc1                                    ║
 ║   Ultra-fast LLM Quantization & Export                     ║
 ║   ✓ GGUF  ✓ ONNX  ✓ MLX  ✓ SafeTensors                     ║
 ╚════════════════════════════════════════════════════════════╝
@@ -151,7 +157,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
 Auto-generates model cards with YAML frontmatter, usage examples, and "Use this model" button:
 
 ```python
-model.push("user/my-model", format="gguf", quantization="Q4_K_M")
+model.push("user/my-model")
 ```
 
 ---
@@ -198,7 +204,10 @@ model.export("safetensors", "./model-hf/")
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Simple generation
 response = model.generate(
@@ -270,8 +279,6 @@ model = turbo("meta-llama/Llama-3.2-3B")
 # Push with auto-generated model card
 model.push(
     "your-username/my-model",
-    format="gguf",
-    quantization="Q4_K_M",
     license="apache-2.0"
 )
 ```
 
@@ -10,8 +10,11 @@ Export models to GGUF format for llama.cpp, Ollama, and LM Studio.
 from quantllm import turbo, convert_to_gguf, quantize_gguf
 
 # Method 1: Via TurboModel
-model = turbo("meta-llama/Llama-3.2-3B")
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export("gguf", "model.Q4_K_M.gguf")
 
 # Method 2: Direct conversion
 convert_to_gguf("meta-llama/Llama-3.2-3B", "model.Q4_K_M.gguf", quant_type="Q4_K_M")
 
@@ -10,8 +10,11 @@ Push models to HuggingFace Hub with auto-generated model cards.
 from quantllm import turbo, QuantLLMHubManager
 
 # Method 1: TurboModel.push() (Recommended)
-model = turbo("meta-llama/Llama-3.2-3B")
-model.push("user/my-model", format="gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.push("user/my-model")
 
 # Method 2: QuantLLMHubManager (Advanced)
 manager = QuantLLMHubManager("user/my-model", hf_token="hf_...")
@@ -30,7 +33,7 @@ def push(
     self,
     repo_id: str,
     token: Optional[str] = None,
-    format: str = "safetensors",
+    format: Optional[str] = None,
     quantization: Optional[str] = None,
     license: str = "apache-2.0",
     commit_message: str = "Upload model via QuantLLM",
@@ -44,7 +47,7 @@ def push(
 |-----------|------|---------|-------------|
 | `repo_id` | str | required | HuggingFace repo ID (user/model) |
 | `token` | str | None | HF token (or use HF_TOKEN env) |
-| `format` | str | "safetensors" | Export format |
+| `format` | str | None | Export format (uses `config["push_format"]` when omitted) |
 | `quantization` | str | None | Quantization type |
 | `license` | str | "apache-2.0" | License type |
 
@@ -62,13 +65,14 @@ def push(
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Push as GGUF
 model.push(
-    "your-username/llama-3.2-3b-gguf",
-    format="gguf",
-    quantization="Q4_K_M"
+    "your-username/llama-3.2-3b-gguf"
 )
 
 # Push as ONNX
 
@@ -126,7 +126,7 @@ When `verbose=True` (default), you'll see:
 
 ```
 ╔════════════════════════════════════════════════════════════╗
-║  🚀 QuantLLM v2.0.0                                        ║
+║  🚀 QuantLLM v2.1.0rc1                                        ║
 ╚════════════════════════════════════════════════════════════╝
 
 📊 Loading: meta-llama/Llama-3.2-3B
 
@@ -232,23 +232,27 @@ Export the model to various formats.
 ```python
 def export(
     self,
-    format: str,
-    output_path: str,
+    format: Optional[str] = None,
+    output_path: Optional[str] = None,
     quantization: Optional[str] = None,
     **kwargs
 ) -> str
 ```
 
 | Parameter | Type | Description |
 |-----------|------|-------------|
-| `format` | str | "gguf", "onnx", "mlx", "safetensors" |
-| `output_path` | str | Output file or directory |
+| `format` | str | "gguf", "onnx", "mlx", "safetensors" (optional, uses shared config) |
+| `output_path` | str | Output file or directory (optional) |
 | `quantization` | str | Quantization type (format-specific) |
 
 **Examples:**
 ```python
 # GGUF
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export()
 
 # ONNX
 model.export("onnx", "./model-onnx/")
@@ -269,7 +273,7 @@ def push(
     self,
     repo_id: str,
     token: Optional[str] = None,
-    format: str = "safetensors",
+    format: Optional[str] = None,
     quantization: Optional[str] = None,
     license: str = "apache-2.0",
     commit_message: str = "Upload model via QuantLLM",
@@ -281,9 +285,7 @@ def push(
 ```python
 # Push as GGUF
 model.push(
-    "your-username/my-model",
-    format="gguf",
-    quantization="Q4_K_M"
+    "your-username/my-model"
 )
 
 # Push as MLX
 
@@ -3,7 +3,7 @@
 project = 'QuantLLM'
 copyright = '2024, Dark Coder'
 author = 'Dark Coder'
-release = '2.0.0'
+release = '2.1.0rc1'
 
 # Extensions
 extensions = [
@@ -21,7 +21,7 @@
 # HTML output
 html_theme = 'sphinx_rtd_theme'
 html_static_path = ['_static']
-html_title = 'QuantLLM v2.0'
+html_title = 'QuantLLM v2.1'
 html_logo = 'images/logo.png'
 html_favicon = 'images/favicon.ico'
 
 
@@ -193,13 +193,13 @@ print("Fine-tuned:", model.generate("prompt"))
 
 ```python
 # Export to GGUF
-model.export("gguf", "finetuned.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export("gguf", "finetuned.Q4_K_M.gguf")
 
 # Export to SafeTensors
 model.export("safetensors", "./finetuned-model/")
 
 # Push to HuggingFace
-model.push("your-username/finetuned-model", format="gguf")
+model.push("your-username/finetuned-model")
 ```
 
 ### Save and Load
 
@@ -130,10 +130,12 @@ print(output["choices"][0]["text"])
 Export and push in one step:
 
 ```python
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 model.push(
     "your-username/my-model-gguf",
-    format="gguf",
-    quantization="Q4_K_M",
     license="apache-2.0"
 )
 ```
 
@@ -11,14 +11,15 @@ The easiest way to share your model:
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Push with auto-generated model card
 model.push(
     "your-username/my-model",
-    token="hf_...",
-    format="gguf",
-    quantization="Q4_K_M"
+    token="hf_..."
 )
 ```
 
 
@@ -8,24 +8,27 @@
 
 ---
 
-## Welcome to QuantLLM v2.0
+## Welcome to QuantLLM v2.1 (pre-release)
 
 QuantLLM makes working with large language models simple. Load any model, quantize it automatically, fine-tune with your data, and export to any format — all with just a few lines of code.
 
 ```python
 from quantllm import turbo
 
-# Load with automatic 4-bit quantization
-model = turbo("meta-llama/Llama-3.2-3B")
+# Load with shared export/push defaults
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Generate text
 print(model.generate("Explain quantum computing"))
 
 # Export to GGUF for Ollama/llama.cpp
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export()
 
 # Push to HuggingFace with auto-generated model card
-model.push("username/my-model", format="gguf", quantization="Q4_K_M")
+model.push("username/my-model")
 ```
 
 ---
@@ -89,7 +92,11 @@ model = turbo("microsoft/phi-3-mini")
 
 ### Export to Any Format
 ```python
-model.export("gguf", "model.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export()
 model.export("onnx", "./model-onnx/")
 model.export("mlx", "./model-mlx/", quantization="4bit")
 ```
@@ -101,7 +108,7 @@ model.finetune("training_data.json", epochs=3)
 
 ### Push to HuggingFace
 ```python
-model.push("username/my-model", format="gguf")
+model.push("username/my-model")
 ```
 
 ---