fix: raise output token budget to 65536, drop undersized models

codebam · claude · codebam · commit 3a68b787b2f8 · 2026-05-19T11:26:09.000-04:00
8K still truncated kimi-k2.6 mid-reasoning. Bump to 65536 (Gemini's hard cap,
which all retained CF native models accept since they have ≥128K context).
Drop hermes (8K context) and llama-3.3-70b-fp8-fast (24K context) from
AVAILABLE_MODELS since they would 400 with this budget.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/bot b/bot
@@ -1 +1 @@
-Subproject commit 4f556a0d04955ce0ea6873bf4ff2e2eabf892b9a
+Subproject commit 66d9d2f51fcc0c93cbc5096d28da6a49ecde91ce
diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts
@@ -289,7 +289,6 @@ export const AVAILABLE_MODELS: Record<
 	string,
 	{ id: string; cost: number; supportsTools?: boolean; supportsVision?: boolean }
 > = {
-	hermes: { id: '@hf/nousresearch/hermes-2-pro-mistral-7b', cost: 5, supportsTools: true },
 	gemma4: { id: '@cf/google/gemma-4-26b-a4b-it', cost: 10, supportsTools: true, supportsVision: true },
 	'google/gemini-3-flash': { id: 'google/gemini-3-flash', cost: 15, supportsTools: true, supportsVision: true },
 	'google/gemini-3.1-flash-lite': {
@@ -307,11 +306,6 @@ export const AVAILABLE_MODELS: Record<
 	},
 	'kimi-k2.6': { id: '@cf/moonshotai/kimi-k2.6', cost: 40, supportsTools: true, supportsVision: true },
 	'glm-4.7-flash': { id: '@cf/zai-org/glm-4.7-flash', cost: 10, supportsTools: true, supportsVision: true },
-	'llama-3.3-70b': {
-		id: '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
-		cost: 40,
-		supportsTools: true
-	},
 	'deepseek-r1-32b': {
 		id: '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b',
 		cost: 60,