Skip to content

Commit 51c5e78

Browse files
committed
feat(ai): configurable max_tokens for extraction via init and database
- Add --max-tokens flag to configure.py ai-provider - Store max_tokens in ai_extraction_config table (column already existed) - OpenRouterProvider reads max_tokens from config (default 16384) - Init flow prompts for max tokens with press-enter-to-accept default - Factory passes max_tokens from database config to provider
1 parent d9c6e43 commit 51c5e78

3 files changed

Lines changed: 25 additions & 8 deletions

File tree

api/app/lib/ai_providers.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,7 @@ def __init__(
973973
api_key: Optional[str] = None,
974974
extraction_model: Optional[str] = None,
975975
embedding_provider: Optional[AIProvider] = None,
976+
max_tokens: Optional[int] = None,
976977
):
977978
"""
978979
Initialize OpenRouter provider.
@@ -982,6 +983,7 @@ def __init__(
982983
extraction_model: Model ID (e.g., 'openai/gpt-4o', 'anthropic/claude-sonnet-4')
983984
embedding_provider: Separate provider for embeddings (required — OpenRouter
984985
doesn't serve embeddings)
986+
max_tokens: Max completion tokens for extraction (default: 16384)
985987
"""
986988
from openai import OpenAI
987989

@@ -1012,6 +1014,7 @@ def __init__(
10121014
self.extraction_model = extraction_model or os.getenv(
10131015
"OPENROUTER_EXTRACTION_MODEL", "openai/gpt-4o"
10141016
)
1017+
self.max_tokens = max_tokens or 16384
10151018
self.embedding_provider = embedding_provider
10161019

10171020
def extract_concepts(
@@ -1028,7 +1031,7 @@ def extract_concepts(
10281031
{"role": "system", "content": system_prompt},
10291032
{"role": "user", "content": f"Text to analyze:\n\n{text}"},
10301033
],
1031-
max_tokens=16384,
1034+
max_tokens=self.max_tokens,
10321035
temperature=0.3,
10331036
response_format={"type": "json_object"},
10341037
)
@@ -1836,11 +1839,14 @@ def get_provider(provider_name: Optional[str] = None) -> AIProvider:
18361839

18371840
# Determine provider and model based on DEVELOPMENT_MODE
18381841
extraction_model = None
1842+
max_tokens = None
18391843

18401844
if is_development_mode():
18411845
# Development mode: Use environment variables
18421846
provider_name = provider_name or os.getenv("AI_PROVIDER", "openai").lower()
1843-
# extraction_model will be set by provider constructor from env vars
1847+
max_tokens_env = os.getenv("MAX_EXTRACTION_TOKENS")
1848+
if max_tokens_env:
1849+
max_tokens = int(max_tokens_env)
18441850
logger.debug(f"[DEV MODE] Using .env configuration: provider={provider_name}")
18451851
else:
18461852
# Production mode: Load from database
@@ -1859,7 +1865,8 @@ def get_provider(provider_name: Optional[str] = None) -> AIProvider:
18591865

18601866
provider_name = provider_name or config['provider']
18611867
extraction_model = config['model_name']
1862-
logger.debug(f"[PROD MODE] Using database configuration: provider={provider_name}, model={extraction_model}")
1868+
max_tokens = config.get('max_tokens')
1869+
logger.debug(f"[PROD MODE] Using database configuration: provider={provider_name}, model={extraction_model}, max_tokens={max_tokens}")
18631870

18641871
# Check for separate embedding provider configuration
18651872
embedding_provider = get_embedding_provider()
@@ -1902,6 +1909,7 @@ def get_provider(provider_name: Optional[str] = None) -> AIProvider:
19021909
return OpenRouterProvider(
19031910
extraction_model=extraction_model,
19041911
embedding_provider=embedding_provider,
1912+
max_tokens=max_tokens,
19051913
)
19061914
elif provider_name == "mock":
19071915
from .mock_ai_provider import MockAIProvider

operator/configure.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def cmd_ai_provider(self, args):
114114
"""Configure AI extraction provider"""
115115
provider = args.provider
116116
model = args.model
117+
max_tokens = getattr(args, 'max_tokens', None)
117118

118119
if not provider:
119120
print("❌ Provider required (openai, anthropic, ollama, or openrouter)")
@@ -161,14 +162,15 @@ def cmd_ai_provider(self, args):
161162
# Insert/update configuration
162163
cur.execute(
163164
"""INSERT INTO kg_api.ai_extraction_config
164-
(provider, model_name, supports_vision, supports_json_mode, active)
165-
VALUES (%s, %s, true, true, true)
165+
(provider, model_name, supports_vision, supports_json_mode, max_tokens, active)
166+
VALUES (%s, %s, true, true, %s, true)
166167
ON CONFLICT (active) WHERE active = true
167168
DO UPDATE SET
168169
provider = EXCLUDED.provider,
169170
model_name = EXCLUDED.model_name,
171+
max_tokens = COALESCE(EXCLUDED.max_tokens, kg_api.ai_extraction_config.max_tokens),
170172
updated_at = NOW()""",
171-
(provider, model)
173+
(provider, model, max_tokens)
172174
)
173175
conn.commit()
174176
print(f"✅ Configured AI extraction: {provider} / {model}")
@@ -653,6 +655,7 @@ def main():
653655
ai_parser = subparsers.add_parser('ai-provider', help='Configure AI extraction provider')
654656
ai_parser.add_argument('provider', nargs='?', help='Provider: openai, anthropic, ollama, openrouter')
655657
ai_parser.add_argument('--model', help='Model name (optional, uses default)')
658+
ai_parser.add_argument('--max-tokens', type=int, help='Max completion tokens for extraction (default: 16384)')
656659

657660
# embedding
658661
embed_parser = subparsers.add_parser('embedding', help='List or activate embedding profile')

operator/lib/guided-init.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,12 +547,18 @@ else
547547
echo ""
548548
echo -e "${GREEN}${NC} Selected: ${BOLD}${CHOSEN_NAME}${NC} (${CHOSEN_MODEL_ID})"
549549

550+
# Prompt for max completion tokens with sensible default
551+
echo ""
552+
read -p "Max completion tokens [16384]: " -r MAX_TOKENS_INPUT
553+
MAX_TOKENS="${MAX_TOKENS_INPUT:-16384}"
554+
echo -e "${GREEN}${NC} Max tokens: ${MAX_TOKENS}"
555+
550556
# Enable and set as default in catalog
551557
docker exec kg-operator python /workspace/operator/configure.py models enable "$CHOSEN_CATALOG_ID" 2>/dev/null
552558
docker exec kg-operator python /workspace/operator/configure.py models default "$CHOSEN_CATALOG_ID" 2>/dev/null
553559

554-
# Update active extraction config with chosen model
555-
docker exec kg-operator python /workspace/operator/configure.py ai-provider "$AI_PROVIDER" --model "$CHOSEN_MODEL_ID"
560+
# Update active extraction config with chosen model and max tokens
561+
docker exec kg-operator python /workspace/operator/configure.py ai-provider "$AI_PROVIDER" --model "$CHOSEN_MODEL_ID" --max-tokens "$MAX_TOKENS"
556562
SELECTING=false
557563
else
558564
echo -e "${YELLOW}${NC} Invalid choice, please try again."

0 commit comments

Comments
 (0)