fix(finance): raise AI report token budget to avoid gpt-5.5 truncation

ralyodio · claude · ralyodio · commit 76f552cd54df · 2026-06-18T11:12:40.000Z
The AI research report intermittently failed with "Could not generate
the report" (ledger: "Model returned an empty or unusable report").
gpt-5.5 spends hidden reasoning tokens out of the same
max_completion_tokens budget before emitting output; the report JSON is
~1500-1900 tokens, and successful runs sat at 1903/1912 against the 2000
cap. When reasoning ran long the JSON truncated, JSON.parse failed, and
the route surfaced a 502.

Raise MAX_COMPLETION_TOKENS 2000 -&gt; 4000 (a ceiling, not added cost) and
have the pipeline read finish_reason so a 'length' truncation throws a
distinct, diagnosable error instead of a vague "empty report".

Co-Authored-By: Claude Opus 4.8 &lt;noreply@anthropic.com&gt;
diff --git a/src/lib/finance/analysis/pipeline.ts b/src/lib/finance/analysis/pipeline.ts
@@ -17,6 +17,8 @@ export interface LLMCompletion {
   promptTokens: number;
   completionTokens: number;
   totalTokens: number;
+  /** OpenAI finish_reason — 'length' means the budget was exhausted (truncated). */
+  finishReason?: string;
 }
 
 export interface ReportLLM {
@@ -57,6 +59,7 @@ export function createOpenAIReportLLM(apiKey: string): ReportLLM {
         promptTokens: completion.usage?.prompt_tokens ?? 0,
         completionTokens: completion.usage?.completion_tokens ?? 0,
         totalTokens: completion.usage?.total_tokens ?? 0,
+        finishReason: completion.choices[0]?.finish_reason,
       };
     },
   };
@@ -87,6 +90,14 @@ export async function generateReport({
 
   const { sections, sources: parsedSources } = parseReportJson(completion.content);
   if (!isReportUsable(sections)) {
+    // 'length' means the model hit max_completion_tokens before finishing the
+    // JSON (often reasoning tokens eating the budget) — distinguish it so the
+    // failure is diagnosable rather than a vague "empty report".
+    if (completion.finishReason === 'length') {
+      throw new ReportGenerationError(
+        `Report truncated at the token budget (${MAX_COMPLETION_TOKENS}); raise MAX_COMPLETION_TOKENS`,
+      );
+    }
     throw new ReportGenerationError('Model returned an empty or unusable report');
   }
 
diff --git a/src/lib/finance/analysis/prompt.ts b/src/lib/finance/analysis/prompt.ts
@@ -9,8 +9,17 @@ import type { ReportInputs } from './types';
 
 export const PROMPT_VERSION = 1;
 
-/** Hard token budget per report (cost control, PRD §3.3). */
-export const MAX_COMPLETION_TOKENS = 2000;
+/**
+ * Hard token budget per report (cost control, PRD §3.3).
+ *
+ * NB: gpt-5.x reasoning models spend *hidden reasoning tokens* out of this same
+ * `max_completion_tokens` budget before emitting any output. The JSON report
+ * itself runs ~1500-1900 tokens, so a 2000 cap left almost no headroom — when
+ * reasoning ran long the output got truncated, JSON.parse failed, and the route
+ * surfaced a misleading "empty or unusable report". 4000 gives reasoning room
+ * without changing per-run cost (you only pay for tokens actually produced).
+ */
+export const MAX_COMPLETION_TOKENS = 4000;
 
 export const SYSTEM_PROMPT = `You are a financial research analyst writing an informational, long-form narrative thesis about a publicly traded company or ETF, in the spirit of a community "narrative" — covering what the business does, recent catalysts, a bull case, a bear case, valuation framing, and key risks.