Skip to content

Commit d22ac4b

Browse files
authored
support tools_call in calibrate messages for qwen3_vl (#213) (#215)
1 parent 20119fd commit d22ac4b

1 file changed

Lines changed: 18 additions & 6 deletions

File tree

angelslim/data/multimodal_dataset.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,14 @@ def _load_file_based_dataset(self, data_path: str, num_samples: int):
6262
# Validate format
6363
assert "messages" in data or "question" in data, "JSON format error"
6464

65-
# Prepare messages
66-
messages = self._prepare_messages(data)
65+
try:
66+
# Prepare messages
67+
messages, tools = self._prepare_messages(data)
6768

68-
self._process_and_append(messages)
69-
line_count += 1
69+
self._process_and_append(messages, tools)
70+
line_count += 1
71+
except Exception as e:
72+
print(f"Warning: processing data: {e}, continue to next data")
7073

7174
def _prepare_messages(self, data: Dict) -> List[Dict]:
7275
image_dir = os.path.join(os.path.dirname(self.data_path), "images")
@@ -134,7 +137,15 @@ def _prepare_messages(self, data: Dict) -> List[Dict]:
134137
for message in messages:
135138
if message["role"] == "assistant" or message["role"] == "system":
136139
message["content"] = message["content"][0]["text"]
137-
return messages
140+
141+
# extract tools if exist
142+
try:
143+
tools = data.get("tools", None)
144+
if tools is not None:
145+
tools = json.loads(tools)
146+
except Exception as e:
147+
print(f"Error extracting tools: {e}")
148+
return messages, tools
138149

139150
def _load_hf_dataset(self, dataset: str, num_samples: int):
140151
"""Load dataset from Hugging Face format"""
@@ -161,11 +172,12 @@ def _load_hf_dataset(self, dataset: str, num_samples: int):
161172
]
162173
self._process_and_append(messages)
163174

164-
def _process_and_append(self, messages: List[Dict]):
175+
def _process_and_append(self, messages: List[Dict], tools=None):
165176
"""Process messages and append to dataset"""
166177
if self.model_name in ["Qwen3VL", "Qwen3VLMoE"]:
167178
inputs = self.processor.apply_chat_template(
168179
messages,
180+
tools=tools,
169181
tokenize=True,
170182
add_generation_prompt=True,
171183
return_dict=True,

0 commit comments

Comments
 (0)