feat(QwenChatWrapper): support Qwen 3.5

giladgd · giladgd · commit 8b284f426bcf · 2026-03-10T22:00:47.000+02:00
diff --git a/src/chatWrappers/QwenChatWrapper.ts b/src/chatWrappers/QwenChatWrapper.ts
@@ -11,10 +11,12 @@ import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctio
 // source: https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M/blob/main/tokenizer_config.json#L197
 export class QwenChatWrapper extends ChatWrapper {
     public readonly wrapperName: string = "Qwen";
+    public readonly variation: "3" | "3.5";
 
     public readonly keepOnlyLastThought: boolean;
     public readonly thoughts: "auto" | "discourage";
     /** @internal */ private readonly _flatFunctionResultString: boolean;
+    /** @internal */ private readonly _ensureModelThoughtBeforeTextOnLastResponse: boolean;
 
     public override readonly settings: ChatWrapperSettings;
 
@@ -35,38 +37,90 @@ export class QwenChatWrapper extends ChatWrapper {
          */
         thoughts?: "auto" | "discourage",
 
+        /**
+         * Chat template variation to use.
+         *
+         * Defaults to `"3"`.
+         */
+        variation?: "3" | "3.5",
+
         /** @internal */
         _lineBreakBeforeFunctionCallPrefix?: boolean,
 
         /** @internal */
-        _flatFunctionResultString?: boolean
+        _flatFunctionResultString?: boolean,
+
+        /** @internal */
+        _ensureModelThoughtBeforeTextOnLastResponse?: boolean
     } = {}) {
         super();
 
         const {
             keepOnlyLastThought = true,
             thoughts = "auto",
+            variation = "3",
             _lineBreakBeforeFunctionCallPrefix = false,
-            _flatFunctionResultString = false
+            _flatFunctionResultString = false,
+            _ensureModelThoughtBeforeTextOnLastResponse = false
         } = options;
 
         this.keepOnlyLastThought = keepOnlyLastThought;
         this.thoughts = thoughts;
+        this.variation = variation;
         this._flatFunctionResultString = _flatFunctionResultString;
+        this._ensureModelThoughtBeforeTextOnLastResponse = _ensureModelThoughtBeforeTextOnLastResponse;
+
+        if (variation === "3")
+            this.settings = {
+                supportsSystemMessages: true,
+                functions: {
+                    call: {
+                        optionalPrefixSpace: true,
+                        prefix: LlamaText([
+                            _lineBreakBeforeFunctionCallPrefix
+                                ? "\n"
+                                : "",
+                            new SpecialTokensText("<tool_call>"), '\n{"name": "'
+                        ]),
+                        paramsPrefix: '", "arguments": ',
+                        suffix: LlamaText("}\n", new SpecialTokensText("</tool_call>")),
+                        emptyCallParamsPlaceholder: {}
+                    },
+                    result: {
+                        prefix: LlamaText(new SpecialTokensText("\n<tool_response>\n")),
+                        suffix: LlamaText(new SpecialTokensText("\n</tool_response>"))
+                    },
+                    parallelism: {
+                        call: {
+                            sectionPrefix: "",
+                            betweenCalls: _lineBreakBeforeFunctionCallPrefix
+                                ? ""
+                                : "\n",
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n"))
+                        },
+                        result: {
+                            sectionPrefix: LlamaText(new SpecialTokensText("<|im_start|>user")),
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n<|im_start|>assistant\n"))
+                        }
+                    }
+                },
+                segments: {
+                    reiterateStackAfterFunctionCalls: true,
+                    thought: {
+                        prefix: LlamaText(new SpecialTokensText("<think>\n")),
+                        suffix: LlamaText(new SpecialTokensText("\n</think>"))
+                    }
+                }
+            };
 
         this.settings = {
             supportsSystemMessages: true,
             functions: {
                 call: {
                     optionalPrefixSpace: true,
-                    prefix: LlamaText([
-                        _lineBreakBeforeFunctionCallPrefix
-                            ? "\n"
-                            : "",
-                        new SpecialTokensText("<tool_call>"), '\n{"name": "'
-                    ]),
-                    paramsPrefix: '", "arguments": ',
-                    suffix: LlamaText("}\n", new SpecialTokensText("</tool_call>")),
+                    prefix: LlamaText(new SpecialTokensText("<tool_call>\n<function=")),
+                    paramsPrefix: ">\n<parameter=params>\n",
+                    suffix: LlamaText(new SpecialTokensText("\n</parameter>\n</function>\n</tool_call>")),
                     emptyCallParamsPlaceholder: {}
                 },
                 result: {
@@ -75,10 +129,10 @@ export class QwenChatWrapper extends ChatWrapper {
                 },
                 parallelism: {
                     call: {
-                        sectionPrefix: "",
-                        betweenCalls: _lineBreakBeforeFunctionCallPrefix
-                            ? ""
-                            : "\n",
+                        sectionPrefix: _lineBreakBeforeFunctionCallPrefix
+                            ? "\n\n"
+                            : "",
+                        betweenCalls: "\n",
                         sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n"))
                     },
                     result: {
@@ -146,20 +200,35 @@ export class QwenChatWrapper extends ChatWrapper {
             } else if (item.type === "model") {
                 flush();
 
-                const transformedModelResponse = (this.thoughts === "discourage" && isLastItem)
+                let transformedModelResponse = (this.thoughts === "discourage" && isLastItem)
                     ? discourageThoughtsInModelResponse(item.response)
                     : item.response;
 
+                if (this.keepOnlyLastThought && !isLastItem)
+                    transformedModelResponse = transformedModelResponse.filter((response) => (
+                        !isChatModelResponseSegment(response) || response.segmentType !== "thought"
+                    ));
+                else if (isLastItem && this._ensureModelThoughtBeforeTextOnLastResponse) {
+                    transformedModelResponse = transformedModelResponse.flatMap((response, index): ChatModelResponse["response"] => {
+                        if (typeof response !== "string")
+                            return [response];
+
+                        const previousResponse = transformedModelResponse[index - 1];
+                        if (previousResponse != null && isChatModelResponseSegment(previousResponse) && previousResponse.segmentType === "thought")
+                            return [response];
+
+                        return [{
+                            type: "segment",
+                            segmentType: "thought",
+                            text: "",
+                            ended: true,
+                            raw: LlamaText(new SpecialTokensText("<think>\n\n</think>\n\n")).toJSON()
+                        }, response];
+                    });
+                }
+
                 currentAggregateFocus = null;
-                modelTexts.push(
-                    this.generateModelResponseText(
-                        (this.keepOnlyLastThought && !isLastItem)
-                            ? transformedModelResponse.filter((response) => (
-                                !isChatModelResponseSegment(response) || response.segmentType !== "thought"
-                            ))
-                            : transformedModelResponse
-                    )
-                );
+                modelTexts.push(this.generateModelResponseText(transformedModelResponse));
             } else
                 void (item satisfies never);
         }
@@ -226,20 +295,60 @@ export class QwenChatWrapper extends ChatWrapper {
         if (!functionsDocumentationGenerator.hasAnyFunctions)
             return LlamaText([]);
 
+        if (this.variation === "3")
+            return LlamaText.joinValues("\n", [
+                "# Tools",
+                "",
+                "You may call one or more functions to assist with the user query.",
+                "",
+                LlamaText("You are provided with function signatures within ", new SpecialTokensText("<tools></tools>"), " XML tags:"),
+                LlamaText(new SpecialTokensText("<tools>")),
+                functionsDocumentationGenerator.getQwenFunctionSignatures({documentParams}),
+                LlamaText(new SpecialTokensText("</tools>")),
+                "",
+                LlamaText("For each function call, return a json object with function name and arguments within ", new SpecialTokensText("<tool_call></tool_call>"), " XML tags:"),
+                LlamaText(new SpecialTokensText("<tool_call>")),
+                '{"name": <function-name>, "arguments": <args-json-object>}',
+                LlamaText(new SpecialTokensText("</tool_call>"))
+            ]);
+
         return LlamaText.joinValues("\n", [
             "# Tools",
             "",
-            "You may call one or more functions to assist with the user query.",
+            "You have access to the following functions:",
             "",
-            LlamaText("You are provided with function signatures within ", new SpecialTokensText("<tools></tools>"), " XML tags:"),
             LlamaText(new SpecialTokensText("<tools>")),
             functionsDocumentationGenerator.getQwenFunctionSignatures({documentParams}),
             LlamaText(new SpecialTokensText("</tools>")),
             "",
-            LlamaText("For each function call, return a json object with function name and arguments within ", new SpecialTokensText("<tool_call></tool_call>"), " XML tags:"),
+            LlamaText("If you choose to call a function ONLY reply in the following format with NO suffix:"),
+            "",
             LlamaText(new SpecialTokensText("<tool_call>")),
-            '{"name": <function-name>, "arguments": <args-json-object>}',
-            LlamaText(new SpecialTokensText("</tool_call>"))
+            LlamaText(new SpecialTokensText("<function="), "example_function_name", new SpecialTokensText(">")),
+            LlamaText(new SpecialTokensText("<parameter="), "example_parameter_1", new SpecialTokensText(">")),
+            "value_1",
+            LlamaText(new SpecialTokensText("</parameter>")),
+            LlamaText(new SpecialTokensText("<parameter="), "example_parameter_2", new SpecialTokensText(">")),
+            "This is the value for the second parameter",
+            "that can span",
+            "multiple lines",
+            LlamaText(new SpecialTokensText("</parameter>")),
+            LlamaText(new SpecialTokensText("</function>")),
+            LlamaText(new SpecialTokensText("</tool_call>")),
+            "",
+            LlamaText(new SpecialTokensText("<IMPORTANT>")),
+            "Reminder:",
+            LlamaText([
+                "- Function calls MUST follow the specified format: an inner ",
+                new SpecialTokensText("<function=...></function>"),
+                " block must be nested within ",
+                new SpecialTokensText("<tool_call></tool_call>"),
+                " XML tags"
+            ]),
+            "- Required parameters MUST be specified",
+            "- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after",
+            "- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls",
+            LlamaText(new SpecialTokensText("</IMPORTANT>"))
         ]);
     }
 
@@ -254,7 +363,9 @@ export class QwenChatWrapper extends ChatWrapper {
             architecture === GgufArchitectureType.qwen3 ||
             architecture === GgufArchitectureType.qwen3moe ||
             architecture === GgufArchitectureType.qwen3vl ||
-            architecture === GgufArchitectureType.qwen3vlmoe
+            architecture === GgufArchitectureType.qwen3vlmoe ||
+            architecture === GgufArchitectureType.qwen35 ||
+            architecture === GgufArchitectureType.qwen35moe
         );
     }
 
@@ -277,6 +388,27 @@ export class QwenChatWrapper extends ChatWrapper {
                 {_flatFunctionResultString: true, thoughts: "discourage", _lineBreakBeforeFunctionCallPrefix: true},
                 {},
                 {_requireFunctionCallSettingsExtraction: true}
+            ],
+
+            [
+                {variation: "3.5"},
+                {variation: "3.5"},
+                {_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
+            ],
+            [
+                {variation: "3.5", _lineBreakBeforeFunctionCallPrefix: true},
+                {variation: "3.5"},
+                {_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
+            ],
+            [
+                {variation: "3.5", _ensureModelThoughtBeforeTextOnLastResponse: true, _lineBreakBeforeFunctionCallPrefix: true},
+                {variation: "3.5"},
+                {_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
+            ],
+            [
+                {variation: "3.5", _ensureModelThoughtBeforeTextOnLastResponse: true},
+                {variation: "3.5"},
+                {_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
             ]
         ];
     }
diff --git a/src/chatWrappers/generic/JinjaTemplateChatWrapper.ts b/src/chatWrappers/generic/JinjaTemplateChatWrapper.ts
@@ -105,7 +105,10 @@ export type JinjaTemplateChatWrapperOptions = {
     tokenizer?: Tokenizer,
 
     /** @internal */
-    _requireFunctionCallSettingsExtraction?: boolean
+    _requireFunctionCallSettingsExtraction?: boolean,
+
+    /** @internal */
+    _functionCallExtractionExamineNonFirst?: boolean
 };
 
 export type JinjaTemplateChatWrapperOptionsConvertMessageFormat = {
@@ -185,7 +188,8 @@ export class JinjaTemplateChatWrapper extends ChatWrapper {
             additionalRenderParameters,
             segments,
             tokenizer,
-            _requireFunctionCallSettingsExtraction = false
+            _requireFunctionCallSettingsExtraction = false,
+            _functionCallExtractionExamineNonFirst = false
         } = options;
 
         if (template == null)
@@ -359,7 +363,8 @@ export class JinjaTemplateChatWrapper extends ChatWrapper {
                 this._wrapFunctionParamsInsideMapKey = detectNeedToWrapFunctionArgumentsWithMap({idsGenerator, renderTemplate});
                 const extractedSettings = extractFunctionCallSettingsFromJinjaTemplate({
                     idsGenerator,
-                    renderTemplate
+                    renderTemplate,
+                    examineNonFirstFunctionCall: _functionCallExtractionExamineNonFirst
                 });
                 functionCallSettings = extractedSettings.settings;
 
diff --git a/src/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.ts b/src/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.ts
@@ -12,10 +12,12 @@ export type ExtractFunctionCallSettingsRenderTemplate = (options: {
 
 export function extractFunctionCallSettingsFromJinjaTemplate({
     idsGenerator,
-    renderTemplate
+    renderTemplate,
+    examineNonFirstFunctionCall = false
 }: {
     idsGenerator: UniqueIdGenerator,
-    renderTemplate: ExtractFunctionCallSettingsRenderTemplate
+    renderTemplate: ExtractFunctionCallSettingsRenderTemplate,
+    examineNonFirstFunctionCall?: boolean
 }): {
     settings: ChatWrapperSettings["functions"] | null,
     stringifyParams: boolean,
@@ -79,6 +81,13 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
         type: "user",
         text: userMessage1
     }];
+    const dummyChatTurn: ChatHistoryItem[] = [{
+        type: "user",
+        text: "Hi"
+    }, {
+        type: "model",
+        response: ["Hey"]
+    }];
     const chatHistory1Call: ChatHistoryItem[] = [...baseChatHistory, {
         type: "model",
         response: [
@@ -95,7 +104,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
             },
             modelMessage2
         ]
-    }];
+    }, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
     const chatHistoryOnlyCall: ChatHistoryItem[] = [...baseChatHistory, {
         type: "model",
         response: [
@@ -111,7 +120,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
             },
             modelMessage2
         ]
-    }];
+    }, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
     const chatHistory2Calls: ChatHistoryItem[] = [...baseChatHistory, {
         type: "model",
         response: [
@@ -135,7 +144,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
             },
             modelMessage2
         ]
-    }];
+    }, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
     const chatHistory2CallsNewChunk: ChatHistoryItem[] = [...baseChatHistory, {
         type: "model",
         response: [
@@ -159,7 +168,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
             },
             modelMessage2
         ]
-    }];
+    }, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
 
     const additionalParams = {
         "bos_token": bosTokenId,
diff --git a/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts b/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts