fix: support tool call syntax with optional whitespace prefix

giladgd · giladgd · commit ea838e66cc82 · 2026-02-15T05:08:48.000+02:00
diff --git a/src/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.ts b/src/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.ts
@@ -398,7 +398,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
     const callPrefixText = func1ParamsToFunc2Name.text.slice(-callPrefixLength);
     const parallelismCallPrefix = modelMessage1ToFunc1Name.text.slice(0, -callPrefixLength);
 
-    const callSuffixLength = findCommandStartLength(func1ParamsToFunc2Name.text, func2ParamsToFunc1Result.text);
+    const callSuffixLength = findCommonStartLength(func1ParamsToFunc2Name.text, func2ParamsToFunc1Result.text);
     const callSuffixText = func1ParamsToFunc2Name.text.slice(0, callSuffixLength);
 
     const parallelismBetweenCallsText = func1ParamsToFunc2Name.text.slice(callSuffixLength, -callPrefixLength);
@@ -407,7 +407,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
     const resultPrefixLength = findCommonEndLength(func2ParamsToFunc1Result.text, func1ResultToFunc2Result.text);
     const resultPrefixText = func2ParamsToFunc1Result.text.slice(-resultPrefixLength);
 
-    const resultSuffixLength = findCommandStartLength(func1ResultToFunc2Result.text, func2ResultToModelMessage2.text);
+    const resultSuffixLength = findCommonStartLength(func1ResultToFunc2Result.text, func2ResultToModelMessage2.text);
     const resultSuffixText = func1ResultToFunc2Result.text.slice(0, resultSuffixLength);
     const parallelismResultBetweenResultsText = func1ResultToFunc2Result.text.slice(resultSuffixLength, -resultPrefixLength);
     const parallelismResultSuffixText = func2ResultToModelMessage2.text.slice(resultSuffixLength);
@@ -452,7 +452,7 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
         const onlyCallUserMessage1ToFunc1Name = getTextBetweenIds(renderedOnlyCall, userMessage1, func1name);
 
         if (userMessage1ToModelMessage1Start.text != null && onlyCallUserMessage1ToFunc1Name.text != null) {
-            const onlyCallModelMessagePrefixLength = findCommandStartLength(
+            const onlyCallModelMessagePrefixLength = findCommonStartLength(
                 userMessage1ToModelMessage1Start.text,
                 onlyCallUserMessage1ToFunc1Name.text
             );
@@ -470,14 +470,29 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
         }
     }
 
+    const {
+        whitespacePrefix: revivedCallWhitespacePrefix,
+        newTarget: cleanRevivedCallPrefix
+    } = extractWhitespacePrefixFromRevivedText(revivedCallPrefix);
+
+    const {
+        whitespacePrefix: revivedParallelismWhitespacePrefix,
+        newTarget: cleanRevivedParallelismCallSectionPrefix
+    } = extractWhitespacePrefixFromRevivedText(
+        LlamaText([
+            revivedParallelismCallSectionPrefix,
+            revivedCallWhitespacePrefix
+        ])
+    );
+
     return {
         stringifyParams,
         stringifyResult,
         combineModelMessageAndToolCalls,
         settings: {
             call: {
                 optionalPrefixSpace: true,
-                prefix: revivedCallPrefix,
+                prefix: cleanRevivedCallPrefix,
                 paramsPrefix: reviveSeparatorText(callParamsPrefixText, idToStaticContent, contentIds),
                 suffix: reviveSeparatorText(callSuffixText, idToStaticContent, contentIds),
                 emptyCallParamsPlaceholder: {}
@@ -504,8 +519,17 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
             },
             parallelism: {
                 call: {
-                    sectionPrefix: revivedParallelismCallSectionPrefix,
-                    betweenCalls: revivedParallelismCallBetweenCalls,
+                    sectionPrefix: LlamaText([
+                        revivedParallelismWhitespacePrefix,
+                        cleanRevivedParallelismCallSectionPrefix
+                    ]),
+                    sectionPrefixAlternateMatches: revivedParallelismWhitespacePrefix.values.length === 0
+                        ? undefined
+                        : [cleanRevivedParallelismCallSectionPrefix],
+                    betweenCalls: LlamaText([
+                        revivedParallelismCallBetweenCalls,
+                        revivedCallWhitespacePrefix
+                    ]),
                     sectionSuffix: reviveSeparatorText(parallelismCallSuffixText, idToStaticContent, contentIds)
                 },
                 result: {
@@ -567,7 +591,7 @@ function removeCommonRevivedPrefix(target: LlamaText, matchStart: LlamaText) {
             if (targetValue === matchStartValue)
                 continue;
         } else if (targetValue instanceof SpecialTokensText && matchStartValue instanceof SpecialTokensText) {
-            const commonLength = findCommandStartLength(targetValue.value, matchStartValue.value);
+            const commonLength = findCommonStartLength(targetValue.value, matchStartValue.value);
             if (commonLength === targetValue.value.length && commonLength === matchStartValue.value.length)
                 continue;
 
@@ -620,7 +644,56 @@ function removeCommonRevivedSuffix(target: LlamaText, matchEnd: LlamaText) {
     return LlamaText(target.values.slice(0, target.values.length - matchEnd.values.length));
 }
 
-function findCommandStartLength(text1: string, text2: string) {
+function extractWhitespacePrefixFromRevivedText(target: LlamaText) {
+    for (let i = 0; i < target.values.length; i++) {
+        const value = target.values[i];
+        if (typeof value === "string") {
+            const trimmedValueLength = value.trimStart().length;
+            if (trimmedValueLength === 0)
+                continue;
+
+            const whitespaceLength = value.length - trimmedValueLength;
+            return {
+                whitespacePrefix: LlamaText([
+                    ...target.values.slice(0, i),
+                    value.slice(0, whitespaceLength)
+                ]),
+                newTarget: LlamaText([
+                    value.slice(whitespaceLength),
+                    ...target.values.slice(i + 1)
+                ])
+            };
+        } else if (value instanceof SpecialTokensText) {
+            const trimmedValue = value.value.trimStart();
+            if (trimmedValue.length === 0)
+                continue;
+
+            const whitespaceLength = value.value.length - trimmedValue.length;
+            return {
+                whitespacePrefix: LlamaText([
+                    ...target.values.slice(0, i),
+                    new SpecialTokensText(value.value.slice(0, whitespaceLength))
+                ]),
+                newTarget: LlamaText([
+                    new SpecialTokensText(value.value.slice(whitespaceLength)),
+                    ...target.values.slice(i + 1)
+                ])
+            };
+        }
+
+        return {
+            whitespacePrefix: LlamaText(target.values.slice(0, i)),
+            newTarget: LlamaText(target.values.slice(i))
+        };
+    }
+
+    return {
+        whitespacePrefix: target,
+        newTarget: LlamaText([])
+    };
+}
+
+function findCommonStartLength(text1: string, text2: string) {
     let commonStartLength = 0;
     while (commonStartLength < text1.length && commonStartLength < text2.length) {
         if (text1[commonStartLength] !== text2[commonStartLength])
diff --git a/src/evaluator/LlamaChat/LlamaChat.ts b/src/evaluator/LlamaChat/LlamaChat.ts
@@ -1861,16 +1861,21 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
             StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
                 .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
 
-        if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText)
-            this.functionSyntaxStartDetector.addStopTrigger(
-                StopGenerationDetector.resolveLlamaTextTrigger(
-                    LlamaText([
-                        this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
-                        this.chatWrapper.settings.functions.call.prefix
-                    ]),
-                    this.llamaChat.model.tokenizer
-                )
-            );
+        if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText) {
+            for (const sectionPrefix of [
+                this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
+                ...(this.chatWrapper.settings.functions?.parallelism?.call.sectionPrefixAlternateMatches ?? [])
+            ])
+                this.functionSyntaxStartDetector.addStopTrigger(
+                    StopGenerationDetector.resolveLlamaTextTrigger(
+                        LlamaText([
+                            sectionPrefix,
+                            this.chatWrapper.settings.functions.call.prefix
+                        ]),
+                        this.llamaChat.model.tokenizer
+                    )
+                );
+        }
 
         const segmentDefinitions: ConstructorParameters<typeof SegmentHandler>[0]["segmentDefinitions"] = new Map();
         for (const segmentType of allSegmentTypes) {
@@ -1895,15 +1900,19 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
         });
 
         if (this.abortOnNonText) {
-            this.stopGenerationDetector.addStopTrigger(
-                StopGenerationDetector.resolveLlamaTextTrigger(
-                    LlamaText([
-                        this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
-                        this.chatWrapper.settings.functions.call.prefix
-                    ]),
-                    this.llamaChat.model.tokenizer
-                )
-            );
+            for (const sectionPrefix of [
+                this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
+                ...(this.chatWrapper.settings.functions?.parallelism?.call.sectionPrefixAlternateMatches ?? [])
+            ])
+                this.stopGenerationDetector.addStopTrigger(
+                    StopGenerationDetector.resolveLlamaTextTrigger(
+                        LlamaText([
+                            sectionPrefix,
+                            this.chatWrapper.settings.functions.call.prefix
+                        ]),
+                        this.llamaChat.model.tokenizer
+                    )
+                );
 
             for (const segmentType of allSegmentTypes) {
                 const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
diff --git a/src/types.ts b/src/types.ts
@@ -80,6 +80,13 @@ export type ChatWrapperSettings = {
         readonly parallelism?: {
             readonly call: {
                 readonly sectionPrefix: string | LlamaText,
+
+                /**
+                 * Alternate section prefixes that can be used to detect a function call section,
+                 * but won't be used to construct the context when building it from scratch.
+                 */
+                readonly sectionPrefixAlternateMatches?: Array<string | LlamaText>,
+
                 readonly betweenCalls?: string | LlamaText,
                 readonly sectionSuffix?: string | LlamaText
             },
diff --git a/test/modelDependent/qwen3-0.6b/functions.test.ts b/test/modelDependent/qwen3-0.6b/functions.test.ts
@@ -99,7 +99,7 @@ describe("qwen3 0.6b", () => {
                 }
             } as const;
 
-            const res = await chatSession.prompt("What is the second word? No yapping, no formatting", {
+            const res = await chatSession.prompt("What is the second word? No yapping, no formatting, use the function", {
                 ...promptOptions,
                 maxTokens: 250,
                 budgets: {
diff --git a/vitest.config.ts b/vitest.config.ts
@@ -8,28 +8,20 @@ export default defineConfig({
         ],
         pool: "forks",
         maxWorkers: 1,
-        minWorkers: 1,
         maxConcurrency: 1,
-        poolOptions: {
-            forks: {
-                minForks: 1,
-                maxForks: 1,
-                singleFork: true
-
-                // uncomment for profiling
-                // execArgv: [
-                //     "--cpu-prof",
-                //     "--cpu-prof-dir=test-runner-profile",
-                //     "--heap-prof",
-                //     "--heap-prof-dir=test-runner-profile"
-                // ]
-            }
-        },
         snapshotSerializers: [
             "./test/utils/helpers/llamaTextSerializer.ts",
             "./test/utils/helpers/SpecialTokensTextSerializer.ts",
             "./test/utils/helpers/SpecialTokenSerializer.ts"
         ],
         setupFiles: ["./test/utils/helpers/testSetup.ts"]
+
+        // uncomment for profiling
+        // execArgv: [
+        //     "--cpu-prof",
+        //     "--cpu-prof-dir=test-runner-profile",
+        //     "--heap-prof",
+        //     "--heap-prof-dir=test-runner-profile"
+        // ]
     }
 });

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ describe("qwen3 0.6b", () => {`
`99`	`99`	`}`
`100`	`100`	`} as const;`
`101`	`101`
`102`		`- const res = await chatSession.prompt("What is the second word? No yapping, no formatting", {`
	`102`	`+ const res = await chatSession.prompt("What is the second word? No yapping, no formatting, use the function", {`
`103`	`103`	`...promptOptions,`
`104`	`104`	`maxTokens: 250,`
`105`	`105`	`budgets: {`