@@ -43,29 +43,37 @@ class TemplateWorkarounds {
4343 List <LlamaChatMessage > messages,
4444 ChatFormat format,
4545 ) {
46+ final needsFuncArgsNormalization = _formatsNeedFuncArgsNormalization
47+ .contains (format);
48+ final needsGenericSchema = _formatsNeedGenericSchema.contains (format);
49+ final needsMoveToolCallsToContent = _formatsNeedMoveToolCallsToContent
50+ .contains (format);
51+
52+ if (! needsFuncArgsNormalization &&
53+ ! needsGenericSchema &&
54+ ! needsMoveToolCallsToContent) {
55+ return messages;
56+ }
57+
58+ if (! _hasTypedToolCalls (messages)) {
59+ return messages;
60+ }
61+
4662 final jsonMessages = messages.map ((m) => m.toJson ()).toList ();
47- var changed = false ;
4863
49- if (_formatsNeedFuncArgsNormalization. contains (format) ) {
64+ if (needsFuncArgsNormalization ) {
5065 normalizeToolCallArgs (jsonMessages);
51- changed = true ;
5266 }
5367
54- if (_formatsNeedGenericSchema. contains (format) ) {
68+ if (needsGenericSchema ) {
5569 useGenericSchema (jsonMessages);
56- changed = true ;
5770 }
5871
59- if (_formatsNeedMoveToolCallsToContent. contains (format) ) {
72+ if (needsMoveToolCallsToContent ) {
6073 moveToolCallsToContent (jsonMessages);
61- changed = true ;
62- }
63-
64- if (! changed) {
65- return messages;
6674 }
6775
68- return _messagesFromJson (jsonMessages);
76+ return _messagesFromJson (jsonMessages, messages );
6977 }
7078
7179 /// Ensures tool call arguments are JSON objects, not strings.
@@ -172,6 +180,12 @@ class TemplateWorkarounds {
172180 }
173181 }
174182
183+ static bool _hasTypedToolCalls (List <LlamaChatMessage > messages) {
184+ return messages.any (
185+ (message) => message.parts.any ((part) => part is LlamaToolCallContent ),
186+ );
187+ }
188+
175189 static Map <String , dynamic > _argumentsToObject (Object ? args) {
176190 final map = ToolCallParsingUtils .decodeJsonMapValue (args);
177191 if (map != null ) {
@@ -193,11 +207,18 @@ class TemplateWorkarounds {
193207
194208 static List <LlamaChatMessage > _messagesFromJson (
195209 List <Map <String , dynamic >> messages,
210+ List <LlamaChatMessage > originals,
196211 ) {
197- return messages.map (_messageFromJson).toList ();
212+ return [
213+ for (var i = 0 ; i < messages.length; i++ )
214+ _messageFromJson (messages[i], original: originals[i]),
215+ ];
198216 }
199217
200- static LlamaChatMessage _messageFromJson (Map <String , dynamic > message) {
218+ static LlamaChatMessage _messageFromJson (
219+ Map <String , dynamic > message, {
220+ required LlamaChatMessage original,
221+ }) {
201222 final role = _parseRole (message['role' ] as String ? ?? 'user' );
202223 final parts = < LlamaContentPart > [];
203224
@@ -244,10 +265,7 @@ class TemplateWorkarounds {
244265 ),
245266 );
246267 } else {
247- final text = _extractTextContent (content);
248- if (text.isNotEmpty) {
249- parts.add (LlamaTextContent (text));
250- }
268+ parts.addAll (_extractContentParts (content, original: original));
251269 }
252270
253271 if (parts.isEmpty) {
@@ -257,21 +275,68 @@ class TemplateWorkarounds {
257275 return LlamaChatMessage .withContent (role: role, content: parts);
258276 }
259277
260- static String _extractTextContent (Object ? content) {
261- if (content == null ) return '' ;
262- if (content is String ) return content;
263- if (content is ! List ) return content.toString ();
278+ static List <LlamaContentPart > _extractContentParts (
279+ Object ? content, {
280+ required LlamaChatMessage original,
281+ }) {
282+ if (content == null ) return const [];
283+ if (content is String ) {
284+ return content.isEmpty ? const [] : [LlamaTextContent (content)];
285+ }
286+ if (content is ! List ) {
287+ final text = content.toString ();
288+ return text.isEmpty ? const [] : [LlamaTextContent (text)];
289+ }
290+
291+ final originalImages = original.parts
292+ .whereType <LlamaImageContent >()
293+ .toList ();
294+ final originalAudio = original.parts
295+ .whereType <LlamaAudioContent >()
296+ .toList ();
297+ var imageIndex = 0 ;
298+ var audioIndex = 0 ;
299+ final parts = < LlamaContentPart > [];
264300
265- final buffer = StringBuffer ();
266301 for (final item in content) {
267- if (item is Map <String , dynamic > && item['type' ] == 'text' ) {
268- final text = item['text' ];
269- if (text is String ) {
270- buffer.write (text);
271- }
302+ if (item is ! Map <String , dynamic >) continue ;
303+ switch (item['type' ]) {
304+ case 'text' :
305+ final text = item['text' ];
306+ if (text is String && text.isNotEmpty) {
307+ parts.add (LlamaTextContent (text));
308+ }
309+ break ;
310+ case 'image' :
311+ case 'image_url' :
312+ if (imageIndex < originalImages.length) {
313+ parts.add (originalImages[imageIndex++ ]);
314+ } else {
315+ parts.add (_imageContentFromJson (item));
316+ }
317+ break ;
318+ case 'input_audio' :
319+ case 'audio' :
320+ if (audioIndex < originalAudio.length) {
321+ parts.add (originalAudio[audioIndex++ ]);
322+ }
323+ break ;
272324 }
273325 }
274- return buffer.toString ();
326+
327+ return parts;
328+ }
329+
330+ static LlamaImageContent _imageContentFromJson (Map <String , dynamic > item) {
331+ final imageUrl = item['image_url' ];
332+ final url = imageUrl is Map <String , dynamic > ? imageUrl['url' ] : null ;
333+ if (url is String && url.startsWith ('file://' )) {
334+ return LlamaImageContent (path: url.substring ('file://' .length));
335+ }
336+ if (url is String && url.isNotEmpty) {
337+ return LlamaImageContent (url: url);
338+ }
339+ return const LlamaImageContent ();
275340 }
276341
277342 static LlamaChatRole _parseRole (String role) {
0 commit comments