Skip to content

Commit 28a236b

Browse files
committed
fix: default thinking budget to 75% of the context size to avoid low quality responses due to thinking segment context shifts
1 parent 89b293f commit 28a236b

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

src/evaluator/LlamaChat/LlamaChat.ts

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -357,14 +357,20 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
357357
/**
358358
* Budget for thought tokens.
359359
*
360-
* Defaults to `Infinity`.
360+
* Set to `Infinity` for unlimited budget.
361+
*
362+
* Defaults to 75% of the context size.
363+
* When the context size is smaller than `8192`, defaults to 50% of the context size.
361364
*/
362365
thoughtTokens?: number,
363366

364367
/**
365368
* Budget for comment tokens.
366369
*
367-
* Defaults to `Infinity`.
370+
* Set to `Infinity` for unlimited budget.
371+
*
372+
* Defaults to 75% of the context size.
373+
* When the context size is smaller than `8192`, defaults to 50% of the context size.
368374
*/
369375
commentTokens?: number
370376
},
@@ -511,6 +517,11 @@ const defaultContextShiftOptions: Required<LLamaChatContextShiftOptions> = {
511517
const defaultRepeatPenaltyLastTokens = 64;
512518
const defaultTrimWhitespaceSuffix = false;
513519
const defaultEvaluationPriority: EvaluationPriority = 5;
520+
const defaultSegmentBudgetSize = (contextSize: number) => (
521+
contextSize < 8192
522+
? contextSize * 0.5
523+
: contextSize * 0.75
524+
);
514525

515526

516527
export class LlamaChat {
@@ -3549,9 +3560,11 @@ class GenerateResponseState<const Functions extends ChatModelFunctions | undefin
35493560

35503561
public getSegmentBudget(segmentType: ChatModelSegmentType) {
35513562
const getBudget = (budget: number | undefined) => (
3552-
(budget == null || budget === Infinity)
3553-
? null
3554-
: budget
3563+
budget == null
3564+
? Math.ceil(defaultSegmentBudgetSize(this.llamaChat.sequence.contextSize))
3565+
: budget === Infinity
3566+
? null
3567+
: budget
35553568
);
35563569

35573570
if (this.budgets == null)

src/evaluator/LlamaChatSession/LlamaChatSession.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,14 +263,20 @@ export type LLamaChatPromptOptions<Functions extends ChatSessionModelFunctions |
263263
/**
264264
* Budget for thought tokens.
265265
*
266-
* Defaults to `Infinity`.
266+
* Set to `Infinity` for unlimited budget.
267+
*
268+
* Defaults to 75% of the context size.
269+
* When the context size is smaller than `8192`, defaults to 50% of the context size.
267270
*/
268271
thoughtTokens?: number,
269272

270273
/**
271274
* Budget for comment tokens.
272275
*
273-
* Defaults to `Infinity`.
276+
* Set to `Infinity` for unlimited budget.
277+
*
278+
* Defaults to 75% of the context size.
279+
* When the context size is smaller than `8192`, defaults to 50% of the context size.
274280
*/
275281
commentTokens?: number
276282
}

0 commit comments

Comments
 (0)