Skip to content

Commit 8b284f4

Browse files
committed
feat(QwenChatWrapper): support Qwen 3.5
1 parent 48e5ed4 commit 8b284f4

4 files changed

Lines changed: 357 additions & 41 deletions

File tree

src/chatWrappers/QwenChatWrapper.ts

Lines changed: 162 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@ import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctio
1111
// source: https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M/blob/main/tokenizer_config.json#L197
1212
export class QwenChatWrapper extends ChatWrapper {
1313
public readonly wrapperName: string = "Qwen";
14+
public readonly variation: "3" | "3.5";
1415

1516
public readonly keepOnlyLastThought: boolean;
1617
public readonly thoughts: "auto" | "discourage";
1718
/** @internal */ private readonly _flatFunctionResultString: boolean;
19+
/** @internal */ private readonly _ensureModelThoughtBeforeTextOnLastResponse: boolean;
1820

1921
public override readonly settings: ChatWrapperSettings;
2022

@@ -35,38 +37,90 @@ export class QwenChatWrapper extends ChatWrapper {
3537
*/
3638
thoughts?: "auto" | "discourage",
3739

40+
/**
41+
* Chat template variation to use.
42+
*
43+
* Defaults to `"3"`.
44+
*/
45+
variation?: "3" | "3.5",
46+
3847
/** @internal */
3948
_lineBreakBeforeFunctionCallPrefix?: boolean,
4049

4150
/** @internal */
42-
_flatFunctionResultString?: boolean
51+
_flatFunctionResultString?: boolean,
52+
53+
/** @internal */
54+
_ensureModelThoughtBeforeTextOnLastResponse?: boolean
4355
} = {}) {
4456
super();
4557

4658
const {
4759
keepOnlyLastThought = true,
4860
thoughts = "auto",
61+
variation = "3",
4962
_lineBreakBeforeFunctionCallPrefix = false,
50-
_flatFunctionResultString = false
63+
_flatFunctionResultString = false,
64+
_ensureModelThoughtBeforeTextOnLastResponse = false
5165
} = options;
5266

5367
this.keepOnlyLastThought = keepOnlyLastThought;
5468
this.thoughts = thoughts;
69+
this.variation = variation;
5570
this._flatFunctionResultString = _flatFunctionResultString;
71+
this._ensureModelThoughtBeforeTextOnLastResponse = _ensureModelThoughtBeforeTextOnLastResponse;
72+
73+
if (variation === "3")
74+
this.settings = {
75+
supportsSystemMessages: true,
76+
functions: {
77+
call: {
78+
optionalPrefixSpace: true,
79+
prefix: LlamaText([
80+
_lineBreakBeforeFunctionCallPrefix
81+
? "\n"
82+
: "",
83+
new SpecialTokensText("<tool_call>"), '\n{"name": "'
84+
]),
85+
paramsPrefix: '", "arguments": ',
86+
suffix: LlamaText("}\n", new SpecialTokensText("</tool_call>")),
87+
emptyCallParamsPlaceholder: {}
88+
},
89+
result: {
90+
prefix: LlamaText(new SpecialTokensText("\n<tool_response>\n")),
91+
suffix: LlamaText(new SpecialTokensText("\n</tool_response>"))
92+
},
93+
parallelism: {
94+
call: {
95+
sectionPrefix: "",
96+
betweenCalls: _lineBreakBeforeFunctionCallPrefix
97+
? ""
98+
: "\n",
99+
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n"))
100+
},
101+
result: {
102+
sectionPrefix: LlamaText(new SpecialTokensText("<|im_start|>user")),
103+
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n<|im_start|>assistant\n"))
104+
}
105+
}
106+
},
107+
segments: {
108+
reiterateStackAfterFunctionCalls: true,
109+
thought: {
110+
prefix: LlamaText(new SpecialTokensText("<think>\n")),
111+
suffix: LlamaText(new SpecialTokensText("\n</think>"))
112+
}
113+
}
114+
};
56115

57116
this.settings = {
58117
supportsSystemMessages: true,
59118
functions: {
60119
call: {
61120
optionalPrefixSpace: true,
62-
prefix: LlamaText([
63-
_lineBreakBeforeFunctionCallPrefix
64-
? "\n"
65-
: "",
66-
new SpecialTokensText("<tool_call>"), '\n{"name": "'
67-
]),
68-
paramsPrefix: '", "arguments": ',
69-
suffix: LlamaText("}\n", new SpecialTokensText("</tool_call>")),
121+
prefix: LlamaText(new SpecialTokensText("<tool_call>\n<function=")),
122+
paramsPrefix: ">\n<parameter=params>\n",
123+
suffix: LlamaText(new SpecialTokensText("\n</parameter>\n</function>\n</tool_call>")),
70124
emptyCallParamsPlaceholder: {}
71125
},
72126
result: {
@@ -75,10 +129,10 @@ export class QwenChatWrapper extends ChatWrapper {
75129
},
76130
parallelism: {
77131
call: {
78-
sectionPrefix: "",
79-
betweenCalls: _lineBreakBeforeFunctionCallPrefix
80-
? ""
81-
: "\n",
132+
sectionPrefix: _lineBreakBeforeFunctionCallPrefix
133+
? "\n\n"
134+
: "",
135+
betweenCalls: "\n",
82136
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n"))
83137
},
84138
result: {
@@ -146,20 +200,35 @@ export class QwenChatWrapper extends ChatWrapper {
146200
} else if (item.type === "model") {
147201
flush();
148202

149-
const transformedModelResponse = (this.thoughts === "discourage" && isLastItem)
203+
let transformedModelResponse = (this.thoughts === "discourage" && isLastItem)
150204
? discourageThoughtsInModelResponse(item.response)
151205
: item.response;
152206

207+
if (this.keepOnlyLastThought && !isLastItem)
208+
transformedModelResponse = transformedModelResponse.filter((response) => (
209+
!isChatModelResponseSegment(response) || response.segmentType !== "thought"
210+
));
211+
else if (isLastItem && this._ensureModelThoughtBeforeTextOnLastResponse) {
212+
transformedModelResponse = transformedModelResponse.flatMap((response, index): ChatModelResponse["response"] => {
213+
if (typeof response !== "string")
214+
return [response];
215+
216+
const previousResponse = transformedModelResponse[index - 1];
217+
if (previousResponse != null && isChatModelResponseSegment(previousResponse) && previousResponse.segmentType === "thought")
218+
return [response];
219+
220+
return [{
221+
type: "segment",
222+
segmentType: "thought",
223+
text: "",
224+
ended: true,
225+
raw: LlamaText(new SpecialTokensText("<think>\n\n</think>\n\n")).toJSON()
226+
}, response];
227+
});
228+
}
229+
153230
currentAggregateFocus = null;
154-
modelTexts.push(
155-
this.generateModelResponseText(
156-
(this.keepOnlyLastThought && !isLastItem)
157-
? transformedModelResponse.filter((response) => (
158-
!isChatModelResponseSegment(response) || response.segmentType !== "thought"
159-
))
160-
: transformedModelResponse
161-
)
162-
);
231+
modelTexts.push(this.generateModelResponseText(transformedModelResponse));
163232
} else
164233
void (item satisfies never);
165234
}
@@ -226,20 +295,60 @@ export class QwenChatWrapper extends ChatWrapper {
226295
if (!functionsDocumentationGenerator.hasAnyFunctions)
227296
return LlamaText([]);
228297

298+
if (this.variation === "3")
299+
return LlamaText.joinValues("\n", [
300+
"# Tools",
301+
"",
302+
"You may call one or more functions to assist with the user query.",
303+
"",
304+
LlamaText("You are provided with function signatures within ", new SpecialTokensText("<tools></tools>"), " XML tags:"),
305+
LlamaText(new SpecialTokensText("<tools>")),
306+
functionsDocumentationGenerator.getQwenFunctionSignatures({documentParams}),
307+
LlamaText(new SpecialTokensText("</tools>")),
308+
"",
309+
LlamaText("For each function call, return a json object with function name and arguments within ", new SpecialTokensText("<tool_call></tool_call>"), " XML tags:"),
310+
LlamaText(new SpecialTokensText("<tool_call>")),
311+
'{"name": <function-name>, "arguments": <args-json-object>}',
312+
LlamaText(new SpecialTokensText("</tool_call>"))
313+
]);
314+
229315
return LlamaText.joinValues("\n", [
230316
"# Tools",
231317
"",
232-
"You may call one or more functions to assist with the user query.",
318+
"You have access to the following functions:",
233319
"",
234-
LlamaText("You are provided with function signatures within ", new SpecialTokensText("<tools></tools>"), " XML tags:"),
235320
LlamaText(new SpecialTokensText("<tools>")),
236321
functionsDocumentationGenerator.getQwenFunctionSignatures({documentParams}),
237322
LlamaText(new SpecialTokensText("</tools>")),
238323
"",
239-
LlamaText("For each function call, return a json object with function name and arguments within ", new SpecialTokensText("<tool_call></tool_call>"), " XML tags:"),
324+
LlamaText("If you choose to call a function ONLY reply in the following format with NO suffix:"),
325+
"",
240326
LlamaText(new SpecialTokensText("<tool_call>")),
241-
'{"name": <function-name>, "arguments": <args-json-object>}',
242-
LlamaText(new SpecialTokensText("</tool_call>"))
327+
LlamaText(new SpecialTokensText("<function="), "example_function_name", new SpecialTokensText(">")),
328+
LlamaText(new SpecialTokensText("<parameter="), "example_parameter_1", new SpecialTokensText(">")),
329+
"value_1",
330+
LlamaText(new SpecialTokensText("</parameter>")),
331+
LlamaText(new SpecialTokensText("<parameter="), "example_parameter_2", new SpecialTokensText(">")),
332+
"This is the value for the second parameter",
333+
"that can span",
334+
"multiple lines",
335+
LlamaText(new SpecialTokensText("</parameter>")),
336+
LlamaText(new SpecialTokensText("</function>")),
337+
LlamaText(new SpecialTokensText("</tool_call>")),
338+
"",
339+
LlamaText(new SpecialTokensText("<IMPORTANT>")),
340+
"Reminder:",
341+
LlamaText([
342+
"- Function calls MUST follow the specified format: an inner ",
343+
new SpecialTokensText("<function=...></function>"),
344+
" block must be nested within ",
345+
new SpecialTokensText("<tool_call></tool_call>"),
346+
" XML tags"
347+
]),
348+
"- Required parameters MUST be specified",
349+
"- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after",
350+
"- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls",
351+
LlamaText(new SpecialTokensText("</IMPORTANT>"))
243352
]);
244353
}
245354

@@ -254,7 +363,9 @@ export class QwenChatWrapper extends ChatWrapper {
254363
architecture === GgufArchitectureType.qwen3 ||
255364
architecture === GgufArchitectureType.qwen3moe ||
256365
architecture === GgufArchitectureType.qwen3vl ||
257-
architecture === GgufArchitectureType.qwen3vlmoe
366+
architecture === GgufArchitectureType.qwen3vlmoe ||
367+
architecture === GgufArchitectureType.qwen35 ||
368+
architecture === GgufArchitectureType.qwen35moe
258369
);
259370
}
260371

@@ -277,6 +388,27 @@ export class QwenChatWrapper extends ChatWrapper {
277388
{_flatFunctionResultString: true, thoughts: "discourage", _lineBreakBeforeFunctionCallPrefix: true},
278389
{},
279390
{_requireFunctionCallSettingsExtraction: true}
391+
],
392+
393+
[
394+
{variation: "3.5"},
395+
{variation: "3.5"},
396+
{_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
397+
],
398+
[
399+
{variation: "3.5", _lineBreakBeforeFunctionCallPrefix: true},
400+
{variation: "3.5"},
401+
{_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
402+
],
403+
[
404+
{variation: "3.5", _ensureModelThoughtBeforeTextOnLastResponse: true, _lineBreakBeforeFunctionCallPrefix: true},
405+
{variation: "3.5"},
406+
{_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
407+
],
408+
[
409+
{variation: "3.5", _ensureModelThoughtBeforeTextOnLastResponse: true},
410+
{variation: "3.5"},
411+
{_requireFunctionCallSettingsExtraction: true, _functionCallExtractionExamineNonFirst: true}
280412
]
281413
];
282414
}

src/chatWrappers/generic/JinjaTemplateChatWrapper.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,10 @@ export type JinjaTemplateChatWrapperOptions = {
105105
tokenizer?: Tokenizer,
106106

107107
/** @internal */
108-
_requireFunctionCallSettingsExtraction?: boolean
108+
_requireFunctionCallSettingsExtraction?: boolean,
109+
110+
/** @internal */
111+
_functionCallExtractionExamineNonFirst?: boolean
109112
};
110113

111114
export type JinjaTemplateChatWrapperOptionsConvertMessageFormat = {
@@ -185,7 +188,8 @@ export class JinjaTemplateChatWrapper extends ChatWrapper {
185188
additionalRenderParameters,
186189
segments,
187190
tokenizer,
188-
_requireFunctionCallSettingsExtraction = false
191+
_requireFunctionCallSettingsExtraction = false,
192+
_functionCallExtractionExamineNonFirst = false
189193
} = options;
190194

191195
if (template == null)
@@ -359,7 +363,8 @@ export class JinjaTemplateChatWrapper extends ChatWrapper {
359363
this._wrapFunctionParamsInsideMapKey = detectNeedToWrapFunctionArgumentsWithMap({idsGenerator, renderTemplate});
360364
const extractedSettings = extractFunctionCallSettingsFromJinjaTemplate({
361365
idsGenerator,
362-
renderTemplate
366+
renderTemplate,
367+
examineNonFirstFunctionCall: _functionCallExtractionExamineNonFirst
363368
});
364369
functionCallSettings = extractedSettings.settings;
365370

src/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.ts

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ export type ExtractFunctionCallSettingsRenderTemplate = (options: {
1212

1313
export function extractFunctionCallSettingsFromJinjaTemplate({
1414
idsGenerator,
15-
renderTemplate
15+
renderTemplate,
16+
examineNonFirstFunctionCall = false
1617
}: {
1718
idsGenerator: UniqueIdGenerator,
18-
renderTemplate: ExtractFunctionCallSettingsRenderTemplate
19+
renderTemplate: ExtractFunctionCallSettingsRenderTemplate,
20+
examineNonFirstFunctionCall?: boolean
1921
}): {
2022
settings: ChatWrapperSettings["functions"] | null,
2123
stringifyParams: boolean,
@@ -79,6 +81,13 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
7981
type: "user",
8082
text: userMessage1
8183
}];
84+
const dummyChatTurn: ChatHistoryItem[] = [{
85+
type: "user",
86+
text: "Hi"
87+
}, {
88+
type: "model",
89+
response: ["Hey"]
90+
}];
8291
const chatHistory1Call: ChatHistoryItem[] = [...baseChatHistory, {
8392
type: "model",
8493
response: [
@@ -95,7 +104,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
95104
},
96105
modelMessage2
97106
]
98-
}];
107+
}, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
99108
const chatHistoryOnlyCall: ChatHistoryItem[] = [...baseChatHistory, {
100109
type: "model",
101110
response: [
@@ -111,7 +120,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
111120
},
112121
modelMessage2
113122
]
114-
}];
123+
}, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
115124
const chatHistory2Calls: ChatHistoryItem[] = [...baseChatHistory, {
116125
type: "model",
117126
response: [
@@ -135,7 +144,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
135144
},
136145
modelMessage2
137146
]
138-
}];
147+
}, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
139148
const chatHistory2CallsNewChunk: ChatHistoryItem[] = [...baseChatHistory, {
140149
type: "model",
141150
response: [
@@ -159,7 +168,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
159168
},
160169
modelMessage2
161170
]
162-
}];
171+
}, ...(examineNonFirstFunctionCall ? dummyChatTurn : [])];
163172

164173
const additionalParams = {
165174
"bos_token": bosTokenId,

0 commit comments

Comments
 (0)