@@ -62,11 +62,14 @@ def _load_file_based_dataset(self, data_path: str, num_samples: int):
6262 # Validate format
6363 assert "messages" in data or "question" in data , "JSON format error"
6464
65- # Prepare messages
66- messages = self ._prepare_messages (data )
65+ try :
66+ # Prepare messages
67+ messages , tools = self ._prepare_messages (data )
6768
68- self ._process_and_append (messages )
69- line_count += 1
69+ self ._process_and_append (messages , tools )
70+ line_count += 1
71+ except Exception as e :
72+ print (f"Warning: processing data: { e } , continue to next data" )
7073
7174 def _prepare_messages (self , data : Dict ) -> List [Dict ]:
7275 image_dir = os .path .join (os .path .dirname (self .data_path ), "images" )
@@ -134,7 +137,15 @@ def _prepare_messages(self, data: Dict) -> List[Dict]:
134137 for message in messages :
135138 if message ["role" ] == "assistant" or message ["role" ] == "system" :
136139 message ["content" ] = message ["content" ][0 ]["text" ]
137- return messages
140+
141+ # extract tools if exist
142+ try :
143+ tools = data .get ("tools" , None )
144+ if tools is not None :
145+ tools = json .loads (tools )
146+ except Exception as e :
147+ print (f"Error extracting tools: { e } " )
148+ return messages , tools
138149
139150 def _load_hf_dataset (self , dataset : str , num_samples : int ):
140151 """Load dataset from Hugging Face format"""
@@ -161,11 +172,12 @@ def _load_hf_dataset(self, dataset: str, num_samples: int):
161172 ]
162173 self ._process_and_append (messages )
163174
164- def _process_and_append (self , messages : List [Dict ]):
175+ def _process_and_append (self , messages : List [Dict ], tools = None ):
165176 """Process messages and append to dataset"""
166177 if self .model_name in ["Qwen3VL" , "Qwen3VLMoE" ]:
167178 inputs = self .processor .apply_chat_template (
168179 messages ,
180+ tools = tools ,
169181 tokenize = True ,
170182 add_generation_prompt = True ,
171183 return_dict = True ,
0 commit comments