2626from .memory import memory_optimized_tensor_order
2727
2828DEFAULT_CHUNKED_SHARD_SIZE = "2GB"
29+ DEFAULT_EXPORT_PUSH_CONFIG = {
30+ "format" : "safetensors" ,
31+ "push_format" : "safetensors" ,
32+ "quantization" : "Q4_K_M" ,
33+ "push_quantization" : "Q4_K_M" ,
34+ }
2935
3036
3137class TurboModel :
@@ -57,6 +63,7 @@ def __init__(
5763 model : PreTrainedModel ,
5864 tokenizer : PreTrainedTokenizer ,
5965 config : SmartConfig ,
66+ export_push_config : Optional [Dict [str , Any ]] = None ,
6067 verbose : bool = False ,
6168 ):
6269 """
@@ -76,6 +83,7 @@ def __init__(
7683 self ._is_quantized = False
7784 self ._is_finetuned = False
7885 self ._lora_applied = False
86+ self .export_push_config = self ._build_export_push_config (export_push_config )
7987 self .verbose = verbose
8088
8189 @classmethod
@@ -92,6 +100,7 @@ def from_pretrained(
92100 trust_remote_code : bool = True ,
93101 quantize : bool = True ,
94102 config_override : Optional [Dict [str , Any ]] = None ,
103+ config : Optional [Dict [str , Any ]] = None ,
95104 verbose : bool = True ,
96105 ) -> "TurboModel" :
97106 """
@@ -112,6 +121,7 @@ def from_pretrained(
112121 trust_remote_code: Trust remote code in model
113122 quantize: Whether to quantize the model
114123 config_override: Dict to override any auto-detected settings
124+ config: Shared export/push config (format, quantization, push_format, etc.)
115125 quantize: Whether to quantize the model
116126 config_override: Dict to override any auto-detected settings
117127 verbose: Print loading progress
@@ -268,7 +278,7 @@ def from_pretrained(
268278 print_success ("Model loaded successfully!" )
269279 logger .info ("" )
270280
271- instance = cls (model , tokenizer , smart_config )
281+ instance = cls (model , tokenizer , smart_config , export_push_config = config )
272282 instance ._is_quantized = quantize and smart_config .bits < 16
273283
274284 return instance
@@ -494,6 +504,27 @@ def _get_quantization_kwargs(config: SmartConfig) -> Dict[str, Any]:
494504 except ImportError :
495505 logger .warning ("⚠ bitsandbytes not installed, loading without quantization" )
496506 return {}
507+
508+ @staticmethod
509+ def _build_export_push_config (config : Optional [Dict [str , Any ]]) -> Dict [str , Any ]:
510+ """Build shared export/push config with deterministic defaults."""
511+ resolved = dict (DEFAULT_EXPORT_PUSH_CONFIG )
512+ if config :
513+ aliases = {
514+ "export_format" : "format" ,
515+ "export_quantization" : "quantization" ,
516+ }
517+ for key , value in config .items ():
518+ mapped_key = aliases .get (key , key )
519+ if mapped_key in resolved and value is not None :
520+ resolved [mapped_key ] = value
521+
522+ if "format" in config and "push_format" not in config :
523+ resolved ["push_format" ] = resolved ["format" ]
524+ if "quantization" in config and "push_quantization" not in config :
525+ resolved ["push_quantization" ] = resolved ["quantization" ]
526+
527+ return resolved
497528
498529 @staticmethod
499530 def _enable_flash_attention (model : PreTrainedModel , verbose : bool = True ) -> None :
@@ -945,7 +976,7 @@ def tokenize_function(examples):
945976
946977 def export (
947978 self ,
948- format : str ,
979+ format : Optional [ str ] = None ,
949980 output_path : Optional [str ] = None ,
950981 * ,
951982 quantization : Optional [str ] = None ,
@@ -961,7 +992,7 @@ def export(
961992 - "mlx": For Apple Silicon Macs
962993
963994 Args:
964- format: Target format (gguf, safetensors, onnx, mlx)
995+ format: Target format (gguf, safetensors, onnx, mlx). Uses shared config when omitted.
965996 output_path: Output file/directory path
966997 quantization: Format-specific quantization:
967998 - GGUF: Q4_K_M, Q5_K_M, Q8_0, etc.
@@ -978,7 +1009,10 @@ def export(
9781009 >>> model.export("onnx", "./my_model_onnx/")
9791010 >>> model.export("mlx", "./my_model_mlx/", quantization="4bit")
9801011 """
981- format = format .lower ()
1012+ format = (format or self .export_push_config ["format" ]).lower ()
1013+ effective_quantization = quantization
1014+ if effective_quantization is None and format == "gguf" :
1015+ effective_quantization = self .export_push_config ["quantization" ]
9821016
9831017 # Merge LoRA if applied
9841018 if self ._lora_applied :
@@ -991,7 +1025,7 @@ def export(
9911025 if output_path is None :
9921026 model_name = self .model .config ._name_or_path .split ('/' )[- 1 ]
9931027 if format == "gguf" :
994- quant = quantization or self . config . quant_type or "q4_k_m "
1028+ quant = effective_quantization or "Q4_K_M "
9951029 output_path = f"{ model_name } .{ quant .upper ()} .gguf"
9961030 elif format == "safetensors" :
9971031 output_path = f"./{ model_name } -quantllm/"
@@ -1012,7 +1046,7 @@ def export(
10121046 raise ValueError (f"Unknown format: { format } . Supported: { list (exporters .keys ())} " )
10131047
10141048 print_header (f"Exporting to { format .upper ()} " )
1015- result = exporters [format ](output_path , quantization = quantization , ** kwargs )
1049+ result = exporters [format ](output_path , quantization = effective_quantization , ** kwargs )
10161050 print_success (f"Exported to: { result } " )
10171051
10181052 return result
@@ -1021,7 +1055,7 @@ def push_to_hub(
10211055 self ,
10221056 repo_id : str ,
10231057 token : Optional [str ] = None ,
1024- format : str = "safetensors" ,
1058+ format : Optional [ str ] = None ,
10251059 quantization : Optional [str ] = None ,
10261060 commit_message : str = "Upload model via QuantLLM" ,
10271061 license : str = "apache-2.0" ,
@@ -1052,7 +1086,8 @@ def push_to_hub(
10521086 """
10531087 from ..hub import QuantLLMHubManager
10541088
1055- format_lower = format .lower ()
1089+ format_lower = (format or self .export_push_config ["push_format" ]).lower ()
1090+ push_quantization = quantization or self .export_push_config ["push_quantization" ]
10561091
10571092 # Get the original base model name (full path for HuggingFace link)
10581093 base_model_full = self .model .config ._name_or_path
@@ -1066,7 +1101,7 @@ def push_to_hub(
10661101
10671102 if format_lower == "gguf" :
10681103 # Export GGUF directly to staging
1069- quant_label = quantization or ( self . config . quant_type if self . config . quant_type != "GGUF" else "q4_k_m" ) or "q4_k_m "
1104+ quant_label = push_quantization or "Q4_K_M "
10701105 filename = f"{ model_name } .{ quant_label .upper ()} .gguf"
10711106 save_path = os .path .join (manager .staging_dir , filename )
10721107
@@ -1085,11 +1120,11 @@ def push_to_hub(
10851120 print_info ("Exporting to ONNX format..." )
10861121 save_path = manager .staging_dir
10871122
1088- self ._export_onnx (save_path , quantization = quantization , ** kwargs )
1123+ self ._export_onnx (save_path , quantization = push_quantization , ** kwargs )
10891124
10901125 manager .track_hyperparameters ({
10911126 "format" : "onnx" ,
1092- "quantization" : quantization ,
1127+ "quantization" : push_quantization ,
10931128 "base_model" : base_model_full ,
10941129 "license" : license ,
10951130 })
@@ -1100,11 +1135,11 @@ def push_to_hub(
11001135 print_info ("Exporting to MLX format..." )
11011136 save_path = manager .staging_dir
11021137
1103- self ._export_mlx (save_path , quantization = quantization , ** kwargs )
1138+ self ._export_mlx (save_path , quantization = push_quantization , ** kwargs )
11041139
11051140 manager .track_hyperparameters ({
11061141 "format" : "mlx" ,
1107- "quantization" : quantization ,
1142+ "quantization" : push_quantization ,
11081143 "base_model" : base_model_full ,
11091144 "license" : license ,
11101145 })
@@ -1117,7 +1152,7 @@ def push_to_hub(
11171152 "base_model" : base_model_full ,
11181153 "license" : license ,
11191154 })
1120- manager .save_final_model (self , format = format )
1155+ manager .save_final_model (self , format = format_lower )
11211156 manager ._generate_model_card (format = format_lower )
11221157
11231158 manager .push (commit_message = commit_message )
@@ -1852,6 +1887,7 @@ def turbo(
18521887 max_length : Optional [int ] = None ,
18531888 device : Optional [str ] = None ,
18541889 dtype : Optional [str ] = None ,
1890+ config : Optional [Dict [str , Any ]] = None ,
18551891 ** kwargs ,
18561892) -> TurboModel :
18571893 """
@@ -1866,6 +1902,7 @@ def turbo(
18661902 max_length: Override max sequence length (default: auto)
18671903 device: Override device (default: best GPU)
18681904 dtype: Override dtype (default: bf16/fp16)
1905+ config: Shared export/push config (format, quantization, push_format, etc.)
18691906 **kwargs: Additional options passed to from_pretrained
18701907
18711908 Returns:
@@ -1896,5 +1933,6 @@ def turbo(
18961933 max_length = max_length ,
18971934 device = device ,
18981935 dtype = dtype ,
1936+ config = config ,
18991937 ** kwargs ,
19001938 )
0 commit comments