refactor: use shared Turbo config defaults for export and push

Copilot · codewithdark-git · web-flow · commit 10ff17bf2525 · 2026-04-24T20:28:27.000Z
Agent-Logs-Url: https://github.com/codewithdark-git/QuantLLM/sessions/aa78d528-be1d-4467-813d-711a55ade22a Co-authored-by: codewithdark-git <144595403+codewithdark-git@users.noreply.github.com>
diff --git a/quantllm/core/turbo_model.py b/quantllm/core/turbo_model.py
@@ -122,8 +122,6 @@ def from_pretrained(
             quantize: Whether to quantize the model
             config_override: Dict to override any auto-detected settings
             config: Shared export/push config (format, quantization, push_format, etc.)
-            quantize: Whether to quantize the model
-            config_override: Dict to override any auto-detected settings
             verbose: Print loading progress
             
         Returns:
@@ -1009,10 +1007,16 @@ def export(
             >>> model.export("onnx", "./my_model_onnx/")
             >>> model.export("mlx", "./my_model_mlx/", quantization="4bit")
         """
-        format = (format or self.export_push_config["format"]).lower()
+        format = (
+            format
+            if format is not None
+            else self.export_push_config.get("format", DEFAULT_EXPORT_PUSH_CONFIG["format"])
+        ).lower()
         effective_quantization = quantization
         if effective_quantization is None and format == "gguf":
-            effective_quantization = self.export_push_config["quantization"]
+            effective_quantization = self.export_push_config.get(
+                "quantization", DEFAULT_EXPORT_PUSH_CONFIG["quantization"]
+            )
         
         # Merge LoRA if applied
         if self._lora_applied:
@@ -1025,7 +1029,7 @@ def export(
         if output_path is None:
             model_name = self.model.config._name_or_path.split('/')[-1]
             if format == "gguf":
-                quant = effective_quantization or "Q4_K_M"
+                quant = effective_quantization
                 output_path = f"{model_name}.{quant.upper()}.gguf"
             elif format == "safetensors":
                 output_path = f"./{model_name}-quantllm/"
@@ -1086,8 +1090,14 @@ def push_to_hub(
         """
         from ..hub import QuantLLMHubManager
         
-        format_lower = (format or self.export_push_config["push_format"]).lower()
-        push_quantization = quantization or self.export_push_config["push_quantization"]
+        format_lower = (
+            format
+            if format is not None
+            else self.export_push_config.get("push_format", DEFAULT_EXPORT_PUSH_CONFIG["push_format"])
+        ).lower()
+        push_quantization = quantization or self.export_push_config.get(
+            "push_quantization", DEFAULT_EXPORT_PUSH_CONFIG["push_quantization"]
+        )
         
         # Get the original base model name (full path for HuggingFace link)
         base_model_full = self.model.config._name_or_path
@@ -1101,7 +1111,7 @@ def push_to_hub(
         
         if format_lower == "gguf":
             # Export GGUF directly to staging
-            quant_label = push_quantization or "Q4_K_M"
+            quant_label = push_quantization
             filename = f"{model_name}.{quant_label.upper()}.gguf"
             save_path = os.path.join(manager.staging_dir, filename)
             
diff --git a/tests/test_export_push_config.py b/tests/test_export_push_config.py
@@ -11,7 +11,8 @@ def _stub_turbo(export_push_config):
     model = TurboModel.__new__(TurboModel)
     model.model = _stub_model()
     model.tokenizer = None
-    model.config = SimpleNamespace(quant_type="Q8_0")
+    smart_config = SimpleNamespace(quant_type="Q8_0")
+    model.config = smart_config
     model._lora_applied = False
     model.verbose = False
     model.export_push_config = export_push_config
@@ -36,7 +37,7 @@ def test_build_export_push_config_aligns_push_values_with_export_values():
     assert resolved["push_quantization"] == "Q5_K_M"
 
 
-def test_export_uses_shared_config_when_format_and_quantization_are_omitted():
+def test_export_prefers_shared_quantization_over_smart_config_quant_type():
     model = _stub_turbo(
         {
             "format": "gguf",
@@ -60,11 +61,12 @@ def fake_export_gguf(output_path, quantization=None, **kwargs):
 
     output = model.export()
 
+    assert model.config.quant_type == "Q8_0"
     assert output.endswith(".Q4_K_M.gguf")
     assert captured["quantization"] == "Q4_K_M"
 
 
-def test_push_uses_shared_config_when_omitted(monkeypatch):
+def test_gguf_push_uses_shared_config_when_omitted(monkeypatch):
     model = _stub_turbo({
         "format": "gguf",
         "push_format": "gguf",