Skip to content

Commit 02f96e0

Browse files
committed
feat(operator): interactive AI provider and model selection during init
Replace hardcoded OpenAI setup in guided-init.sh with interactive flow: - Step 4: Choose provider (OpenAI, Anthropic, OpenRouter) - Step 5: Enter and validate API key - Step 6: Refresh model catalog, present filtered menu, user picks model OpenRouter shows curated subset (GPT-4o, Claude, Gemini, Llama, etc.) with option [0] to show all 200+ models. Ollama noted as post-init config. Also adds --tsv, --category, --limit flags to configure.py models list for machine-parseable output used by the init script.
1 parent 1a27e01 commit 02f96e0

2 files changed

Lines changed: 200 additions & 29 deletions

File tree

operator/configure.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,28 +361,46 @@ def cmd_models(self, args):
361361
try:
362362
if action == 'list':
363363
provider = getattr(args, 'provider_name', None)
364+
use_tsv = getattr(args, 'tsv', False)
365+
category_filter = getattr(args, 'category', None)
366+
limit = getattr(args, 'limit', 0) or 0
367+
364368
with conn.cursor() as cur:
365369
conditions = []
366370
params = []
367371
if provider:
368372
conditions.append("provider = %s")
369373
params.append(provider)
374+
if category_filter:
375+
conditions.append("category = %s")
376+
params.append(category_filter)
370377

371378
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
379+
limit_clause = f"LIMIT {int(limit)}" if limit > 0 else ""
372380
cur.execute(
373381
f"""SELECT id, provider, model_id, display_name, category,
374382
enabled, is_default,
375383
price_prompt_per_m, price_completion_per_m,
376384
fetched_at
377385
FROM kg_api.provider_model_catalog
378386
{where}
379-
ORDER BY provider, sort_order, model_id""",
387+
ORDER BY provider, sort_order, model_id
388+
{limit_clause}""",
380389
params,
381390
)
382391
rows = cur.fetchall()
383392

384393
if not rows:
385-
print("📭 No models in catalog." + (" Try: models refresh <provider>" if provider else ""))
394+
if not use_tsv:
395+
print("📭 No models in catalog." + (" Try: models refresh <provider>" if provider else ""))
396+
return True
397+
398+
if use_tsv:
399+
# Machine-parseable: ID\tmodel_id\tdisplay_name\tprice_prompt\tprice_completion
400+
for row in rows:
401+
prompt_p = f"{float(row['price_prompt_per_m']):.4f}" if row['price_prompt_per_m'] is not None else ""
402+
comp_p = f"{float(row['price_completion_per_m']):.4f}" if row['price_completion_per_m'] is not None else ""
403+
print(f"{row['id']}\t{row['model_id']}\t{row['display_name'] or row['model_id']}\t{prompt_p}\t{comp_p}")
386404
return True
387405

388406
current_provider = None
@@ -648,6 +666,9 @@ def main():
648666
models_parser.add_argument('model_id', nargs='?', help='Catalog ID (for enable/disable/default/price)')
649667
models_parser.add_argument('--prompt', type=float, help='Prompt price per 1M tokens (for price)')
650668
models_parser.add_argument('--completion', type=float, help='Completion price per 1M tokens (for price)')
669+
models_parser.add_argument('--tsv', action='store_true', help='Output in TSV format (for scripting)')
670+
models_parser.add_argument('--category', default='extraction', help='Filter by category (default: extraction)')
671+
models_parser.add_argument('--limit', type=int, default=0, help='Limit number of results (0=unlimited)')
651672

652673
# status
653674
subparsers.add_parser('status', help='Show configuration status')

operator/lib/guided-init.sh

Lines changed: 177 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,12 @@ echo ""
7171
echo -e "${YELLOW}Development defaults (for quick evaluation):${NC}"
7272
echo -e " • Admin password: ${RED}Password1!${NC}"
7373
echo -e " • Database password: ${RED}password${NC}"
74-
echo " • AI extraction: OpenAI GPT-4o"
74+
echo " • AI extraction: Choose from OpenAI, Anthropic, or OpenRouter"
7575
echo " • Embeddings: Local (nomic-ai/nomic-embed-text-v1.5)"
7676
echo ""
7777
echo -e "${YELLOW}Prerequisites:${NC}"
7878
echo " • Docker with permissions (docker ps should work)"
79-
echo "OpenAI API key (will prompt during setup)"
79+
echo " • API key for your AI provider (will prompt during setup)"
8080
echo " • Node.js + npm (for kg CLI installation)"
8181
echo ""
8282
echo -e "${YELLOW}Supported Platforms:${NC}"
@@ -200,7 +200,7 @@ echo ""
200200
echo -e "${YELLOW}ℹ️ What this affects:${NC}"
201201
echo " • WHERE local embeddings are computed (MPS/CUDA/ROCm/CPU)"
202202
echo " • Does NOT affect WHICH models are used (local vs API)"
203-
echo " • AI extraction always uses remote API (OpenAI/Anthropic)"
203+
echo " • AI extraction uses remote API (OpenAI/Anthropic/OpenRouter)"
204204
echo ""
205205
read -p "Choose option (1-5): " -r
206206
echo ""
@@ -246,7 +246,7 @@ echo ""
246246

247247
# Step 1: Generate secrets
248248
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
249-
echo -e "${BOLD}Step 1/7: Generating infrastructure secrets${NC}"
249+
echo -e "${BOLD}Step 1/9: Generating infrastructure secrets${NC}"
250250
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
251251
echo ""
252252

@@ -286,7 +286,7 @@ echo ""
286286

287287
# Step 2: Start infrastructure
288288
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
289-
echo -e "${BOLD}Step 2/7: Starting infrastructure (Postgres + Garage + Operator)${NC}"
289+
echo -e "${BOLD}Step 2/9: Starting infrastructure (Postgres + Garage + Operator)${NC}"
290290
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
291291
echo ""
292292
./operator/lib/start-infra.sh
@@ -295,7 +295,7 @@ echo ""
295295

296296
# Step 3: Configure admin
297297
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
298-
echo -e "${BOLD}Step 3/7: Creating admin user${NC}"
298+
echo -e "${BOLD}Step 3/9: Creating admin user${NC}"
299299
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
300300
echo ""
301301

@@ -308,50 +308,81 @@ fi
308308
docker exec kg-operator python /workspace/operator/configure.py admin --password "$ADMIN_PASSWORD"
309309
echo ""
310310

311-
# Step 4: Configure AI provider
311+
# Step 4: Configure AI provider (interactive selection)
312312
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
313-
echo -e "${BOLD}Step 4/7: Configuring AI extraction provider${NC}"
313+
echo -e "${BOLD}Step 4/9: Choosing AI extraction provider${NC}"
314314
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
315315
echo ""
316-
echo "Setting OpenAI GPT-4o as extraction provider..."
317-
docker exec kg-operator python /workspace/operator/configure.py ai-provider openai --model gpt-4o
316+
echo "Choose your AI extraction provider:"
318317
echo ""
319-
320-
# Step 5: Configure embeddings
321-
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
322-
echo -e "${BOLD}Step 5/7: Configuring embedding provider${NC}"
323-
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
318+
echo -e " ${GREEN}[1] OpenAI${NC} (GPT-4o, GPT-4o-mini)"
319+
echo " Direct access to OpenAI models"
324320
echo ""
325-
echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5)..."
326-
docker exec kg-operator python /workspace/operator/configure.py embedding --provider local
321+
echo -e " ${GREEN}[2] Anthropic${NC} (Claude Sonnet 4, Claude 3.5 Sonnet)"
322+
echo " Direct access to Anthropic Claude models"
323+
echo ""
324+
echo -e " ${GREEN}[3] OpenRouter${NC} (200+ models from all providers)"
325+
echo " Unified API — access OpenAI, Anthropic, Google, Meta, Mistral, etc."
326+
echo " Single API key for all models"
327+
echo ""
328+
# Ollama requires separate setup (local inference, no API key)
329+
# Configure via: ./operator.sh shell → configure ai-provider ollama
330+
echo -e " ${YELLOW}Note:${NC} Ollama (local inference) can be configured after setup"
331+
echo " via: ./operator.sh shell → configure ai-provider ollama"
332+
echo ""
333+
read -p "Choose option (1-3): " -r
334+
echo ""
335+
336+
case "$REPLY" in
337+
1)
338+
AI_PROVIDER="openai"
339+
AI_KEY_PROMPT="OpenAI API key (sk-...)"
340+
echo -e "${GREEN}${NC} Selected OpenAI"
341+
;;
342+
2)
343+
AI_PROVIDER="anthropic"
344+
AI_KEY_PROMPT="Anthropic API key (sk-ant-...)"
345+
echo -e "${GREEN}${NC} Selected Anthropic"
346+
;;
347+
3)
348+
AI_PROVIDER="openrouter"
349+
AI_KEY_PROMPT="OpenRouter API key (sk-or-...)"
350+
echo -e "${GREEN}${NC} Selected OpenRouter"
351+
;;
352+
*)
353+
AI_PROVIDER="openai"
354+
AI_KEY_PROMPT="OpenAI API key (sk-...)"
355+
echo -e "${YELLOW}${NC} Invalid option, defaulting to OpenAI"
356+
;;
357+
esac
327358
echo ""
328359

329-
# Step 6: Store OpenAI API key with validation loop
360+
# Step 5: Store API key (skip for Ollama)
330361
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
331-
echo -e "${BOLD}Step 6/8: Storing OpenAI API key${NC}"
362+
echo -e "${BOLD}Step 5/9: Validating API key${NC}"
332363
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
333364
echo ""
334-
echo "Please enter your OpenAI API key."
365+
366+
echo "Please enter your ${AI_PROVIDER} API key."
335367
echo "The key will be validated and stored encrypted in the database."
336368
echo ""
337369
echo -e "${YELLOW}Press Ctrl+C to cancel${NC}"
338370
echo ""
339371

340372
API_KEY_STORED=false
341373
while [ "$API_KEY_STORED" = false ]; do
342-
read -sp "OpenAI API key (sk-...): " OPENAI_KEY
374+
read -sp "${AI_KEY_PROMPT}: " AI_KEY
343375
echo ""
344376

345-
if [ -z "$OPENAI_KEY" ]; then
377+
if [ -z "$AI_KEY" ]; then
346378
echo -e "${RED}${NC} API key cannot be empty. Please try again."
347379
echo ""
348380
continue
349381
fi
350382

351383
echo -e "${BLUE}${NC} Validating and storing API key..."
352384

353-
# Try to store the key (will validate automatically)
354-
if docker exec kg-operator python /workspace/operator/configure.py api-key openai --key "$OPENAI_KEY" 2>&1; then
385+
if docker exec kg-operator python /workspace/operator/configure.py api-key "$AI_PROVIDER" --key "$AI_KEY" 2>&1; then
355386
API_KEY_STORED=true
356387
echo ""
357388
else
@@ -361,9 +392,128 @@ while [ "$API_KEY_STORED" = false ]; do
361392
fi
362393
done
363394

364-
# Step 7: Configure Garage credentials
395+
# Step 6: Refresh model catalog and select model
396+
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
397+
echo -e "${BOLD}Step 6/9: Selecting extraction model${NC}"
398+
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
399+
echo ""
400+
401+
# Set initial provider config with default model so catalog refresh can work
402+
docker exec kg-operator python /workspace/operator/configure.py ai-provider "$AI_PROVIDER" 2>/dev/null
403+
404+
# Refresh model catalog from provider API
405+
echo -e "${BLUE}${NC} Fetching available models from ${AI_PROVIDER}..."
406+
docker exec kg-operator python /workspace/operator/configure.py models refresh "$AI_PROVIDER" 2>&1
407+
echo ""
408+
409+
# Get model list in TSV format for parsing
410+
# Get full model list in TSV format
411+
FULL_MODEL_LIST=$(docker exec kg-operator python /workspace/operator/configure.py models list "$AI_PROVIDER" --tsv --category extraction 2>/dev/null)
412+
413+
if [ -z "$FULL_MODEL_LIST" ]; then
414+
echo -e "${YELLOW}${NC} Could not fetch models from catalog. Using provider default."
415+
echo ""
416+
else
417+
# For OpenRouter (200+ models), filter to well-known reasoning models first.
418+
# For OpenAI/Anthropic, the seed data is already a curated list.
419+
if [ "$AI_PROVIDER" = "openrouter" ]; then
420+
# Pattern match popular/capable reasoning models
421+
MODEL_LIST=$(echo "$FULL_MODEL_LIST" | grep -iE '(gpt-4o|gpt-4\.5|gpt-5|claude.*sonnet|claude.*opus|claude.*haiku|gemini.*pro|gemini.*flash|llama.*70|llama.*405|qwen.*72|mistral.*large|deepseek.*chat|deepseek.*r1|command-r)')
422+
else
423+
MODEL_LIST="$FULL_MODEL_LIST"
424+
fi
425+
426+
# Build numbered menu from filtered list
427+
display_model_menu() {
428+
local model_list="$1"
429+
MENU_INDEX=0
430+
declare -g -a MODEL_IDS MODEL_NAMES MODEL_CATALOG_IDS MODEL_PRICES
431+
MODEL_IDS=()
432+
MODEL_NAMES=()
433+
MODEL_CATALOG_IDS=()
434+
MODEL_PRICES=()
435+
436+
while IFS=$'\t' read -r cat_id model_id display_name prompt_price comp_price; do
437+
MENU_INDEX=$((MENU_INDEX + 1))
438+
MODEL_CATALOG_IDS[$MENU_INDEX]="$cat_id"
439+
MODEL_IDS[$MENU_INDEX]="$model_id"
440+
MODEL_NAMES[$MENU_INDEX]="$display_name"
441+
442+
if [ -n "$prompt_price" ] && [ "$prompt_price" != "0.0000" ]; then
443+
MODEL_PRICES[$MENU_INDEX]="\$${prompt_price}/\$${comp_price} per 1M tokens"
444+
else
445+
MODEL_PRICES[$MENU_INDEX]="free (local)"
446+
fi
447+
448+
printf " ${GREEN}[%2d]${NC} %-45s %s\n" "$MENU_INDEX" "$display_name" "${MODEL_PRICES[$MENU_INDEX]}"
449+
done <<< "$model_list"
450+
}
451+
452+
echo "Available extraction models:"
453+
echo ""
454+
display_model_menu "$MODEL_LIST"
455+
456+
# Offer "show all" option for OpenRouter
457+
if [ "$AI_PROVIDER" = "openrouter" ]; then
458+
TOTAL_COUNT=$(echo "$FULL_MODEL_LIST" | wc -l)
459+
echo ""
460+
echo -e " ${YELLOW}[ 0]${NC} Show all ${TOTAL_COUNT} available models"
461+
fi
462+
463+
echo ""
464+
465+
SELECTING=true
466+
while [ "$SELECTING" = true ]; do
467+
read -p "Choose model (1-${MENU_INDEX}) [1]: " -r MODEL_CHOICE
468+
if [ -z "$MODEL_CHOICE" ]; then
469+
MODEL_CHOICE=1
470+
fi
471+
472+
# Handle "show all" for OpenRouter
473+
if [ "$MODEL_CHOICE" = "0" ] && [ "$AI_PROVIDER" = "openrouter" ]; then
474+
echo ""
475+
echo "All available models:"
476+
echo ""
477+
display_model_menu "$FULL_MODEL_LIST"
478+
echo ""
479+
continue
480+
fi
481+
482+
# Validate and apply choice
483+
if [ "$MODEL_CHOICE" -ge 1 ] 2>/dev/null && [ "$MODEL_CHOICE" -le "$MENU_INDEX" ] 2>/dev/null; then
484+
CHOSEN_MODEL_ID="${MODEL_IDS[$MODEL_CHOICE]}"
485+
CHOSEN_CATALOG_ID="${MODEL_CATALOG_IDS[$MODEL_CHOICE]}"
486+
CHOSEN_NAME="${MODEL_NAMES[$MODEL_CHOICE]}"
487+
488+
echo ""
489+
echo -e "${GREEN}${NC} Selected: ${BOLD}${CHOSEN_NAME}${NC} (${CHOSEN_MODEL_ID})"
490+
491+
# Enable and set as default in catalog
492+
docker exec kg-operator python /workspace/operator/configure.py models enable "$CHOSEN_CATALOG_ID" 2>/dev/null
493+
docker exec kg-operator python /workspace/operator/configure.py models default "$CHOSEN_CATALOG_ID" 2>/dev/null
494+
495+
# Update active extraction config with chosen model
496+
docker exec kg-operator python /workspace/operator/configure.py ai-provider "$AI_PROVIDER" --model "$CHOSEN_MODEL_ID"
497+
SELECTING=false
498+
else
499+
echo -e "${YELLOW}${NC} Invalid choice, please try again."
500+
fi
501+
done
502+
fi
503+
echo ""
504+
505+
# Step 7: Configure embeddings
506+
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
507+
echo -e "${BOLD}Step 7/9: Configuring embedding provider${NC}"
508+
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
509+
echo ""
510+
echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5)..."
511+
docker exec kg-operator python /workspace/operator/configure.py embedding --provider local
512+
echo ""
513+
514+
# Step 8: Configure Garage credentials
365515
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
366-
echo -e "${BOLD}Step 7/8: Configuring Garage object storage${NC}"
516+
echo -e "${BOLD}Step 8/9: Configuring Garage object storage${NC}"
367517
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
368518
echo ""
369519
echo "Configuring S3-compatible object storage for images..."
@@ -414,7 +564,7 @@ fi
414564

415565
# Step 8: Save configuration and start application
416566
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
417-
echo -e "${BOLD}Step 8/8: Starting application (API + Web)${NC}"
567+
echo -e "${BOLD}Step 9/9: Starting application (API + Web)${NC}"
418568
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
419569
echo ""
420570

0 commit comments

Comments
 (0)