@@ -37,10 +37,12 @@ def __init__(
3737 data_source : Union [str , Dict ] = None ,
3838 is_hf_dataset : bool = False ,
3939 model_name : str = None ,
40+ quantization_config : str = None ,
4041 ):
4142 super ().__init__ (processor , device , max_length )
4243 self .is_hf_dataset = is_hf_dataset
4344 self .model_name = model_name
45+ self .quant_algo = quantization_config .name if quantization_config else None
4446
4547 if is_hf_dataset :
4648 self ._load_hf_dataset (data_source , num_samples )
@@ -174,13 +176,21 @@ def _load_hf_dataset(self, dataset: str, num_samples: int):
174176
175177 def _process_and_append (self , messages : List [Dict ], tools = None ):
176178 """Process messages and append to dataset"""
179+
180+ # max_length padding for gptq and awq
181+ if "gptq" in self .quant_algo or "awq" in self .quant_algo :
182+ padding = "max_length"
183+ else :
184+ padding = True
185+
177186 if self .model_name in ["Qwen3VL" , "Qwen3VLMoE" ]:
178187 inputs = self .processor .apply_chat_template (
179188 messages ,
180189 tools = tools ,
181190 tokenize = True ,
182191 add_generation_prompt = True ,
183192 return_dict = True ,
193+ padding = padding ,
184194 truncation = True ,
185195 return_tensors = "pt" ,
186196 max_length = self .max_length ,
@@ -195,6 +205,7 @@ def _process_and_append(self, messages: List[Dict], tools=None):
195205 inputs = self .processor (
196206 text = [text ],
197207 images = image_inputs ,
208+ padding = padding ,
198209 truncation = True ,
199210 return_tensors = "pt" ,
200211 max_length = self .max_length ,
@@ -212,6 +223,7 @@ def _process_and_append(self, messages: List[Dict], tools=None):
212223 text = [text ],
213224 images = image_inputs ,
214225 videos = video_inputs ,
226+ padding = padding ,
215227 truncation = True ,
216228 return_tensors = "pt" ,
217229 max_length = self .max_length ,
0 commit comments