@@ -11,10 +11,12 @@ import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctio
1111// source: https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M/blob/main/tokenizer_config.json#L197
1212export class QwenChatWrapper extends ChatWrapper {
1313 public readonly wrapperName : string = "Qwen" ;
14+ public readonly variation : "3" | "3.5" ;
1415
1516 public readonly keepOnlyLastThought : boolean ;
1617 public readonly thoughts : "auto" | "discourage" ;
1718 /** @internal */ private readonly _flatFunctionResultString : boolean ;
19+ /** @internal */ private readonly _ensureModelThoughtBeforeTextOnLastResponse : boolean ;
1820
1921 public override readonly settings : ChatWrapperSettings ;
2022
@@ -35,38 +37,90 @@ export class QwenChatWrapper extends ChatWrapper {
3537 */
3638 thoughts ?: "auto" | "discourage" ,
3739
40+ /**
41+ * Chat template variation to use.
42+ *
43+ * Defaults to `"3"`.
44+ */
45+ variation ?: "3" | "3.5" ,
46+
3847 /** @internal */
3948 _lineBreakBeforeFunctionCallPrefix ?: boolean ,
4049
4150 /** @internal */
42- _flatFunctionResultString ?: boolean
51+ _flatFunctionResultString ?: boolean ,
52+
53+ /** @internal */
54+ _ensureModelThoughtBeforeTextOnLastResponse ?: boolean
4355 } = { } ) {
4456 super ( ) ;
4557
4658 const {
4759 keepOnlyLastThought = true ,
4860 thoughts = "auto" ,
61+ variation = "3" ,
4962 _lineBreakBeforeFunctionCallPrefix = false ,
50- _flatFunctionResultString = false
63+ _flatFunctionResultString = false ,
64+ _ensureModelThoughtBeforeTextOnLastResponse = false
5165 } = options ;
5266
5367 this . keepOnlyLastThought = keepOnlyLastThought ;
5468 this . thoughts = thoughts ;
69+ this . variation = variation ;
5570 this . _flatFunctionResultString = _flatFunctionResultString ;
71+ this . _ensureModelThoughtBeforeTextOnLastResponse = _ensureModelThoughtBeforeTextOnLastResponse ;
72+
73+ if ( variation === "3" )
74+ this . settings = {
75+ supportsSystemMessages : true ,
76+ functions : {
77+ call : {
78+ optionalPrefixSpace : true ,
79+ prefix : LlamaText ( [
80+ _lineBreakBeforeFunctionCallPrefix
81+ ? "\n"
82+ : "" ,
83+ new SpecialTokensText ( "<tool_call>" ) , '\n{"name": "'
84+ ] ) ,
85+ paramsPrefix : '", "arguments": ' ,
86+ suffix : LlamaText ( "}\n" , new SpecialTokensText ( "</tool_call>" ) ) ,
87+ emptyCallParamsPlaceholder : { }
88+ } ,
89+ result : {
90+ prefix : LlamaText ( new SpecialTokensText ( "\n<tool_response>\n" ) ) ,
91+ suffix : LlamaText ( new SpecialTokensText ( "\n</tool_response>" ) )
92+ } ,
93+ parallelism : {
94+ call : {
95+ sectionPrefix : "" ,
96+ betweenCalls : _lineBreakBeforeFunctionCallPrefix
97+ ? ""
98+ : "\n" ,
99+ sectionSuffix : LlamaText ( new SpecialTokensText ( "<|im_end|>\n" ) )
100+ } ,
101+ result : {
102+ sectionPrefix : LlamaText ( new SpecialTokensText ( "<|im_start|>user" ) ) ,
103+ sectionSuffix : LlamaText ( new SpecialTokensText ( "<|im_end|>\n<|im_start|>assistant\n" ) )
104+ }
105+ }
106+ } ,
107+ segments : {
108+ reiterateStackAfterFunctionCalls : true ,
109+ thought : {
110+ prefix : LlamaText ( new SpecialTokensText ( "<think>\n" ) ) ,
111+ suffix : LlamaText ( new SpecialTokensText ( "\n</think>" ) )
112+ }
113+ }
114+ } ;
56115
57116 this . settings = {
58117 supportsSystemMessages : true ,
59118 functions : {
60119 call : {
61120 optionalPrefixSpace : true ,
62- prefix : LlamaText ( [
63- _lineBreakBeforeFunctionCallPrefix
64- ? "\n"
65- : "" ,
66- new SpecialTokensText ( "<tool_call>" ) , '\n{"name": "'
67- ] ) ,
68- paramsPrefix : '", "arguments": ' ,
69- suffix : LlamaText ( "}\n" , new SpecialTokensText ( "</tool_call>" ) ) ,
121+ prefix : LlamaText ( new SpecialTokensText ( "<tool_call>\n<function=" ) ) ,
122+ paramsPrefix : ">\n<parameter=params>\n" ,
123+ suffix : LlamaText ( new SpecialTokensText ( "\n</parameter>\n</function>\n</tool_call>" ) ) ,
70124 emptyCallParamsPlaceholder : { }
71125 } ,
72126 result : {
@@ -75,10 +129,10 @@ export class QwenChatWrapper extends ChatWrapper {
75129 } ,
76130 parallelism : {
77131 call : {
78- sectionPrefix : "" ,
79- betweenCalls : _lineBreakBeforeFunctionCallPrefix
80- ? ""
81- : "\n" ,
132+ sectionPrefix : _lineBreakBeforeFunctionCallPrefix
133+ ? "\n\n"
134+ : "" ,
135+ betweenCalls : "\n" ,
82136 sectionSuffix : LlamaText ( new SpecialTokensText ( "<|im_end|>\n" ) )
83137 } ,
84138 result : {
@@ -146,20 +200,35 @@ export class QwenChatWrapper extends ChatWrapper {
146200 } else if ( item . type === "model" ) {
147201 flush ( ) ;
148202
149- const transformedModelResponse = ( this . thoughts === "discourage" && isLastItem )
203+ let transformedModelResponse = ( this . thoughts === "discourage" && isLastItem )
150204 ? discourageThoughtsInModelResponse ( item . response )
151205 : item . response ;
152206
207+ if ( this . keepOnlyLastThought && ! isLastItem )
208+ transformedModelResponse = transformedModelResponse . filter ( ( response ) => (
209+ ! isChatModelResponseSegment ( response ) || response . segmentType !== "thought"
210+ ) ) ;
211+ else if ( isLastItem && this . _ensureModelThoughtBeforeTextOnLastResponse ) {
212+ transformedModelResponse = transformedModelResponse . flatMap ( ( response , index ) : ChatModelResponse [ "response" ] => {
213+ if ( typeof response !== "string" )
214+ return [ response ] ;
215+
216+ const previousResponse = transformedModelResponse [ index - 1 ] ;
217+ if ( previousResponse != null && isChatModelResponseSegment ( previousResponse ) && previousResponse . segmentType === "thought" )
218+ return [ response ] ;
219+
220+ return [ {
221+ type : "segment" ,
222+ segmentType : "thought" ,
223+ text : "" ,
224+ ended : true ,
225+ raw : LlamaText ( new SpecialTokensText ( "<think>\n\n</think>\n\n" ) ) . toJSON ( )
226+ } , response ] ;
227+ } ) ;
228+ }
229+
153230 currentAggregateFocus = null ;
154- modelTexts . push (
155- this . generateModelResponseText (
156- ( this . keepOnlyLastThought && ! isLastItem )
157- ? transformedModelResponse . filter ( ( response ) => (
158- ! isChatModelResponseSegment ( response ) || response . segmentType !== "thought"
159- ) )
160- : transformedModelResponse
161- )
162- ) ;
231+ modelTexts . push ( this . generateModelResponseText ( transformedModelResponse ) ) ;
163232 } else
164233 void ( item satisfies never ) ;
165234 }
@@ -226,20 +295,60 @@ export class QwenChatWrapper extends ChatWrapper {
226295 if ( ! functionsDocumentationGenerator . hasAnyFunctions )
227296 return LlamaText ( [ ] ) ;
228297
298+ if ( this . variation === "3" )
299+ return LlamaText . joinValues ( "\n" , [
300+ "# Tools" ,
301+ "" ,
302+ "You may call one or more functions to assist with the user query." ,
303+ "" ,
304+ LlamaText ( "You are provided with function signatures within " , new SpecialTokensText ( "<tools></tools>" ) , " XML tags:" ) ,
305+ LlamaText ( new SpecialTokensText ( "<tools>" ) ) ,
306+ functionsDocumentationGenerator . getQwenFunctionSignatures ( { documentParams} ) ,
307+ LlamaText ( new SpecialTokensText ( "</tools>" ) ) ,
308+ "" ,
309+ LlamaText ( "For each function call, return a json object with function name and arguments within " , new SpecialTokensText ( "<tool_call></tool_call>" ) , " XML tags:" ) ,
310+ LlamaText ( new SpecialTokensText ( "<tool_call>" ) ) ,
311+ '{"name": <function-name>, "arguments": <args-json-object>}' ,
312+ LlamaText ( new SpecialTokensText ( "</tool_call>" ) )
313+ ] ) ;
314+
229315 return LlamaText . joinValues ( "\n" , [
230316 "# Tools" ,
231317 "" ,
232- "You may call one or more functions to assist with the user query. " ,
318+ "You have access to the following functions: " ,
233319 "" ,
234- LlamaText ( "You are provided with function signatures within " , new SpecialTokensText ( "<tools></tools>" ) , " XML tags:" ) ,
235320 LlamaText ( new SpecialTokensText ( "<tools>" ) ) ,
236321 functionsDocumentationGenerator . getQwenFunctionSignatures ( { documentParams} ) ,
237322 LlamaText ( new SpecialTokensText ( "</tools>" ) ) ,
238323 "" ,
239- LlamaText ( "For each function call, return a json object with function name and arguments within " , new SpecialTokensText ( "<tool_call></tool_call>" ) , " XML tags:" ) ,
324+ LlamaText ( "If you choose to call a function ONLY reply in the following format with NO suffix:" ) ,
325+ "" ,
240326 LlamaText ( new SpecialTokensText ( "<tool_call>" ) ) ,
241- '{"name": <function-name>, "arguments": <args-json-object>}' ,
242- LlamaText ( new SpecialTokensText ( "</tool_call>" ) )
327+ LlamaText ( new SpecialTokensText ( "<function=" ) , "example_function_name" , new SpecialTokensText ( ">" ) ) ,
328+ LlamaText ( new SpecialTokensText ( "<parameter=" ) , "example_parameter_1" , new SpecialTokensText ( ">" ) ) ,
329+ "value_1" ,
330+ LlamaText ( new SpecialTokensText ( "</parameter>" ) ) ,
331+ LlamaText ( new SpecialTokensText ( "<parameter=" ) , "example_parameter_2" , new SpecialTokensText ( ">" ) ) ,
332+ "This is the value for the second parameter" ,
333+ "that can span" ,
334+ "multiple lines" ,
335+ LlamaText ( new SpecialTokensText ( "</parameter>" ) ) ,
336+ LlamaText ( new SpecialTokensText ( "</function>" ) ) ,
337+ LlamaText ( new SpecialTokensText ( "</tool_call>" ) ) ,
338+ "" ,
339+ LlamaText ( new SpecialTokensText ( "<IMPORTANT>" ) ) ,
340+ "Reminder:" ,
341+ LlamaText ( [
342+ "- Function calls MUST follow the specified format: an inner " ,
343+ new SpecialTokensText ( "<function=...></function>" ) ,
344+ " block must be nested within " ,
345+ new SpecialTokensText ( "<tool_call></tool_call>" ) ,
346+ " XML tags"
347+ ] ) ,
348+ "- Required parameters MUST be specified" ,
349+ "- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after" ,
350+ "- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls" ,
351+ LlamaText ( new SpecialTokensText ( "</IMPORTANT>" ) )
243352 ] ) ;
244353 }
245354
@@ -254,7 +363,9 @@ export class QwenChatWrapper extends ChatWrapper {
254363 architecture === GgufArchitectureType . qwen3 ||
255364 architecture === GgufArchitectureType . qwen3moe ||
256365 architecture === GgufArchitectureType . qwen3vl ||
257- architecture === GgufArchitectureType . qwen3vlmoe
366+ architecture === GgufArchitectureType . qwen3vlmoe ||
367+ architecture === GgufArchitectureType . qwen35 ||
368+ architecture === GgufArchitectureType . qwen35moe
258369 ) ;
259370 }
260371
@@ -277,6 +388,27 @@ export class QwenChatWrapper extends ChatWrapper {
277388 { _flatFunctionResultString : true , thoughts : "discourage" , _lineBreakBeforeFunctionCallPrefix : true } ,
278389 { } ,
279390 { _requireFunctionCallSettingsExtraction : true }
391+ ] ,
392+
393+ [
394+ { variation : "3.5" } ,
395+ { variation : "3.5" } ,
396+ { _requireFunctionCallSettingsExtraction : true , _functionCallExtractionExamineNonFirst : true }
397+ ] ,
398+ [
399+ { variation : "3.5" , _lineBreakBeforeFunctionCallPrefix : true } ,
400+ { variation : "3.5" } ,
401+ { _requireFunctionCallSettingsExtraction : true , _functionCallExtractionExamineNonFirst : true }
402+ ] ,
403+ [
404+ { variation : "3.5" , _ensureModelThoughtBeforeTextOnLastResponse : true , _lineBreakBeforeFunctionCallPrefix : true } ,
405+ { variation : "3.5" } ,
406+ { _requireFunctionCallSettingsExtraction : true , _functionCallExtractionExamineNonFirst : true }
407+ ] ,
408+ [
409+ { variation : "3.5" , _ensureModelThoughtBeforeTextOnLastResponse : true } ,
410+ { variation : "3.5" } ,
411+ { _requireFunctionCallSettingsExtraction : true , _functionCallExtractionExamineNonFirst : true }
280412 ]
281413 ] ;
282414 }
0 commit comments