fix(compiler): don't cap concepts-plan output at max_tokens=2048 (#90)

cnndabbler · web-flow · commit 01e9a5a6f00c · 2026-06-12T17:39:47.000+08:00
Reasoning/thinking models can exhaust a 2048-token budget before emitting
the JSON plan, yielding empty output -&gt; unparseable plan -&gt; zero concept
pages (silently). Remove the cap so concepts-plan matches the uncapped
summary call.
diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
@@ -1414,7 +1414,7 @@ async def _compile_concepts(
             concept_briefs=concept_briefs,
             entity_briefs=entity_briefs,
         ).replace("__ENTITY_TYPES__", types_str)},
-    ], "concepts-plan", max_tokens=2048, response_format=_JSON_RESPONSE_FORMAT)
+    ], "concepts-plan", response_format=_JSON_RESPONSE_FORMAT)
 
     def _write_v1_summary_stripped() -> None:
         """Fallback writer for the v1 summary on early-return paths.