Skip to content

Commit 5daafa0

Browse files
CreatmanCEOclaude
andcommitted
fix: OpenRouter + DeepSeek V3.2 as primary LLM, CSV/Metrics layout overlay
LLM Provider Migration: - Primary (Pool A+B): DeepSeek V3.2 via OpenRouter ($0.26/$0.42 per 1M tokens) - Pool A fallback: Gemini Flash direct (free) - Pool B fallback: NVIDIA Nemotron 3 Super via OpenRouter (free) - Pool B upgrade: Claude Haiku 4.5 via OpenRouter - Added openrouter_api_key to config, made cerebras/anthropic keys optional UI Layout: - CSV and Metrics panels now overlay above chat messages (not replace them) - Messages always visible below panel Updated: model_adaptors (DeepSeek style), eval pricing, sample metrics Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5fe035b commit 5daafa0

7 files changed

Lines changed: 104 additions & 130 deletions

File tree

.env.example

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
# LLM Providers (get keys from respective consoles)
1+
# LLM Providers
22
GEMINI_API_KEY=your-gemini-key-from-aistudio.google.com
3-
CEREBRAS_API_KEY=your-cerebras-key-from-cloud.cerebras.ai
4-
ANTHROPIC_API_KEY=your-anthropic-key-from-console.anthropic.com
3+
OPENROUTER_API_KEY=your-openrouter-key-from-openrouter.ai
54

6-
# Model routing — Pool A (simple/medium tasks, mutual fallback)
7-
MODEL_POOL_A_PRIMARY=gemini/gemini-2.5-flash
8-
MODEL_POOL_A_FALLBACK=cerebras/llama-3.3-70b
5+
# Model routing — Pool A (DeepSeek primary, Gemini fallback)
6+
MODEL_POOL_A_PRIMARY=openrouter/deepseek/deepseek-chat-v3-0324
7+
MODEL_POOL_A_FALLBACK=gemini/gemini-2.5-flash
98

10-
# Model routing — Pool B (complex tasks)
11-
MODEL_POOL_B_DEFAULT=anthropic/claude-haiku-4-5-20251001
12-
MODEL_POOL_B_COMPLEX=anthropic/claude-sonnet-4-5-20250514
9+
# Model routing — Pool B (DeepSeek primary, Haiku upgrade)
10+
MODEL_POOL_B_DEFAULT=openrouter/deepseek/deepseek-chat-v3-0324
11+
MODEL_POOL_B_COMPLEX=openrouter/anthropic/claude-haiku-4-5-20251001
1312

1413
# LLM Settings
1514
LLM_TEMPERATURE=0.1

backend/config.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,17 @@
77
class Settings(BaseSettings):
88
# LLM Provider Keys
99
gemini_api_key: SecretStr
10-
cerebras_api_key: SecretStr
11-
anthropic_api_key: SecretStr
10+
cerebras_api_key: SecretStr = SecretStr("")
11+
anthropic_api_key: SecretStr = SecretStr("")
12+
openrouter_api_key: SecretStr = SecretStr("")
1213

13-
# Model routing — Pool A (simple/medium, mutual fallback)
14-
model_pool_a_primary: str = "gemini/gemini-2.5-flash"
15-
model_pool_a_fallback: str = "cerebras/llama-3.3-70b"
14+
# Model routing — Pool A (simple/medium)
15+
model_pool_a_primary: str = "openrouter/deepseek/deepseek-chat-v3-0324"
16+
model_pool_a_fallback: str = "gemini/gemini-2.5-flash"
1617

1718
# Model routing — Pool B (complex tasks)
18-
model_pool_b_default: str = "anthropic/claude-haiku-4-5-20251001"
19-
model_pool_b_complex: str = "anthropic/claude-sonnet-4-5-20250514"
19+
model_pool_b_default: str = "openrouter/deepseek/deepseek-chat-v3-0324"
20+
model_pool_b_complex: str = "openrouter/anthropic/claude-haiku-4-5-20251001"
2021

2122
llm_temperature: float = 0.1
2223

backend/eval/batch_runner.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,10 @@ def estimate_cost(model: str, tokens_in: int, tokens_out: int, response=None) ->
177177

178178
# Fallback: manual approximate pricing (USD per 1M tokens)
179179
pricing = {
180+
"openrouter/deepseek/deepseek-chat-v3-0324": {"input": 0.26, "output": 0.42},
181+
"openrouter/nvidia/nemotron-3-super": {"input": 0.0, "output": 0.0},
182+
"openrouter/anthropic/claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00},
180183
"gemini/gemini-2.5-flash": {"input": 0.15, "output": 0.60},
181-
"gemini/gemini-2.5-pro": {"input": 1.25, "output": 5.00},
182-
"cerebras/llama-3.3-70b": {"input": 0.60, "output": 0.60},
183-
"anthropic/claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00},
184-
"anthropic/claude-sonnet-4-5-20250514": {"input": 3.00, "output": 15.00},
185184
}
186185

187186
rates = pricing.get(model, {"input": 1.0, "output": 3.0})

backend/eval/metrics_api.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,21 @@
1010

1111
# Sample metrics for demo (used when no eval has been run yet)
1212
SAMPLE_METRICS = {
13+
"openrouter/deepseek/deepseek-chat-v3-0324": {
14+
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
15+
"pool": "pool-a + pool-b",
16+
"total_cases": 48,
17+
"accuracy": 0.896,
18+
"schema_compliance": 0.875,
19+
"latency_p50": 380,
20+
"latency_p95": 950,
21+
"cost_per_request": 0.000052,
22+
"avg_tokens_per_request": 900,
23+
"error_rate": 0.0,
24+
},
1325
"gemini/gemini-2.5-flash": {
1426
"model": "gemini/gemini-2.5-flash",
15-
"pool": "pool-a",
27+
"pool": "pool-a (fallback)",
1628
"total_cases": 48,
1729
"accuracy": 0.875,
1830
"schema_compliance": 0.812,
@@ -22,21 +34,21 @@
2234
"avg_tokens_per_request": 850,
2335
"error_rate": 0.021,
2436
},
25-
"cerebras/llama-3.3-70b": {
26-
"model": "cerebras/llama-3.3-70b",
27-
"pool": "pool-a (fallback)",
37+
"openrouter/nvidia/nemotron-3-super": {
38+
"model": "openrouter/nvidia/nemotron-3-super",
39+
"pool": "pool-b (fallback)",
2840
"total_cases": 48,
29-
"accuracy": 0.833,
30-
"schema_compliance": 0.854,
31-
"latency_p50": 280,
32-
"latency_p95": 680,
33-
"cost_per_request": 0.000062,
34-
"avg_tokens_per_request": 920,
41+
"accuracy": 0.812,
42+
"schema_compliance": 0.792,
43+
"latency_p50": 520,
44+
"latency_p95": 1400,
45+
"cost_per_request": 0.0,
46+
"avg_tokens_per_request": 1050,
3547
"error_rate": 0.042,
3648
},
37-
"anthropic/claude-haiku-4-5-20251001": {
38-
"model": "anthropic/claude-haiku-4-5-20251001",
39-
"pool": "pool-b",
49+
"openrouter/anthropic/claude-haiku-4-5-20251001": {
50+
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
51+
"pool": "pool-b-upgrade",
4052
"total_cases": 48,
4153
"accuracy": 0.938,
4254
"schema_compliance": 0.917,
@@ -46,26 +58,14 @@
4658
"avg_tokens_per_request": 1100,
4759
"error_rate": 0.0,
4860
},
49-
"anthropic/claude-sonnet-4-5-20250514": {
50-
"model": "anthropic/claude-sonnet-4-5-20250514",
51-
"pool": "pool-b-upgrade",
52-
"total_cases": 48,
53-
"accuracy": 0.958,
54-
"schema_compliance": 0.938,
55-
"latency_p50": 1200,
56-
"latency_p95": 3500,
57-
"cost_per_request": 0.00145,
58-
"avg_tokens_per_request": 1450,
59-
"error_rate": 0.0,
60-
},
6161
}
6262

6363

6464
POOL_MAP = {
65-
"gemini/gemini-2.5-flash": "pool-a",
66-
"cerebras/llama-3.3-70b": "pool-a (fallback)",
67-
"anthropic/claude-haiku-4-5-20251001": "pool-b",
68-
"anthropic/claude-sonnet-4-5-20250514": "pool-b-upgrade",
65+
"openrouter/deepseek/deepseek-chat-v3-0324": "pool-a + pool-b",
66+
"gemini/gemini-2.5-flash": "pool-a (fallback)",
67+
"openrouter/nvidia/nemotron-3-super": "pool-b (fallback)",
68+
"openrouter/anthropic/claude-haiku-4-5-20251001": "pool-b-upgrade",
6969
}
7070

7171

backend/prompts/model_adaptors.py

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,30 @@
11
"""Model-specific adaptors — tailored instructions per LLM provider.
22
33
Each provider has different strengths. We optimize prompt style accordingly:
4-
- Gemini Flash: fast, good at structured output with concise prompts
5-
- Cerebras Llama: needs explicit format examples, simpler vocabulary
6-
- Anthropic Haiku: benefits from chain-of-thought permission
7-
- Anthropic Sonnet: full reasoning freedom, comprehensive analysis
4+
- DeepSeek V3: strong structured output, good reasoning, cost-effective
5+
- Gemini Flash: fast, good at concise responses
6+
- NVIDIA Nemotron: free tier, needs explicit format guidance
7+
- Anthropic Haiku: excellent tool calling, chain-of-thought
88
"""
99

1010
MODEL_ADAPTORS = {
1111
"pool-a": {
12-
"gemini_flash": """## Response Style (Gemini Flash)
12+
"deepseek": """## Response Style (DeepSeek — Efficient)
1313
- Be concise: respond in under 150 words unless detailed analysis is explicitly requested
1414
- When using tools, summarize results in 2-3 sentences
1515
- For structured output, use exact JSON format — no markdown wrapping
1616
- Prefer bullet points over paragraphs
1717
- Include well IDs and numeric values, skip verbose explanations
1818
- Example anomaly summary: "AUH-01-003: debit declined 32% (12.1→8.2 L/s). Recommend pump inspection."
1919
""",
20-
"cerebras_llama": """## Response Style (Llama)
21-
- Use clear, simple language. Avoid complex nested sentences.
22-
- When returning structured data, follow this exact format:
23-
```json
24-
{"type": "anomaly_card", "severity": "high", "well_id": "AUH-01-003", ...}
25-
```
26-
- Always state findings before recommendations
27-
- List items with numbered steps: 1. Finding, 2. Cause, 3. Action
28-
- When uncertain, say "Based on available data..." rather than speculating
29-
- Keep responses under 200 words for simple queries
20+
"gemini_flash": """## Response Style (Gemini Flash — Concise)
21+
- Be very concise: under 100 words for simple queries
22+
- Use bullet points, not paragraphs
23+
- Include well IDs and numeric values with units
24+
- For structured output, return exact JSON — no wrapping
3025
""",
3126
},
32-
"pool-b": """## Response Style (Haiku — Analytical)
27+
"pool-b": """## Response Style (Analytical)
3328
- Think step by step before concluding. Consider multiple hypotheses.
3429
- Structure your analysis:
3530
1. Observation: what the data shows
@@ -41,7 +36,7 @@
4136
- Compare with neighboring wells when relevant
4237
- Cite specific values and well IDs throughout
4338
""",
44-
"pool-b-upgrade": """## Response Style (Sonnet — Comprehensive)
39+
"pool-b-upgrade": """## Response Style (Comprehensive Analysis)
4540
- Provide comprehensive analysis with evidence and reasoning
4641
- You have full freedom to reason at length — use it for complex cases
4742
- Consider geological, operational, and seasonal factors holistically
@@ -63,7 +58,7 @@ def get_model_adaptor(model_pool: str, model_name: str = "") -> str:
6358
"""Get the appropriate model adaptor text for a model pool."""
6459
adaptor = MODEL_ADAPTORS.get(model_pool, "")
6560
if isinstance(adaptor, dict):
66-
if "cerebras" in model_name or "llama" in model_name:
67-
return adaptor.get("cerebras_llama", list(adaptor.values())[0])
68-
return adaptor.get("gemini_flash", list(adaptor.values())[0])
61+
if "gemini" in model_name:
62+
return adaptor.get("gemini_flash", list(adaptor.values())[0])
63+
return adaptor.get("deepseek", list(adaptor.values())[0])
6964
return adaptor

backend/services/llm_router.py

Lines changed: 36 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -23,59 +23,43 @@ def create_router() -> Router:
2323
"""Create LiteLLM router with two model pools."""
2424
settings = get_settings()
2525

26+
or_key = settings.openrouter_api_key.get_secret_value()
27+
gemini_key = settings.gemini_api_key.get_secret_value()
28+
2629
model_list = [
27-
# Pool A — simple/medium tasks (mutual fallback)
28-
{
29-
"model_name": "pool-a",
30-
"litellm_params": {
31-
"model": settings.model_pool_a_primary,
32-
"api_key": settings.gemini_api_key.get_secret_value(),
33-
},
34-
},
35-
{
36-
"model_name": "pool-a",
37-
"litellm_params": {
38-
"model": settings.model_pool_a_fallback,
39-
"api_key": settings.cerebras_api_key.get_secret_value(),
40-
},
41-
},
42-
# Pool B — complex tasks (Anthropic → Gemini → Cerebras fallback chain)
43-
{
44-
"model_name": "pool-b",
45-
"litellm_params": {
46-
"model": settings.model_pool_b_default,
47-
"api_key": settings.anthropic_api_key.get_secret_value(),
48-
},
49-
},
50-
{
51-
"model_name": "pool-b",
52-
"litellm_params": {
53-
"model": settings.model_pool_a_primary,
54-
"api_key": settings.gemini_api_key.get_secret_value(),
55-
},
56-
},
57-
{
58-
"model_name": "pool-b",
59-
"litellm_params": {
60-
"model": settings.model_pool_a_fallback,
61-
"api_key": settings.cerebras_api_key.get_secret_value(),
62-
},
63-
},
64-
# Pool B upgrade — same fallback chain
65-
{
66-
"model_name": "pool-b-upgrade",
67-
"litellm_params": {
68-
"model": settings.model_pool_b_complex,
69-
"api_key": settings.anthropic_api_key.get_secret_value(),
70-
},
71-
},
72-
{
73-
"model_name": "pool-b-upgrade",
74-
"litellm_params": {
75-
"model": settings.model_pool_a_primary,
76-
"api_key": settings.gemini_api_key.get_secret_value(),
77-
},
78-
},
30+
# Pool A — simple/medium tasks
31+
# Primary: DeepSeek V3.2 via OpenRouter (cheap, stable, tool calling)
32+
{"model_name": "pool-a", "litellm_params": {
33+
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
34+
"api_key": or_key,
35+
}},
36+
# Fallback: Gemini Flash direct (free but sometimes unreliable)
37+
{"model_name": "pool-a", "litellm_params": {
38+
"model": "gemini/gemini-2.5-flash",
39+
"api_key": gemini_key,
40+
}},
41+
42+
# Pool B — complex tasks (reasoning, anomaly interpretation)
43+
# Primary: DeepSeek V3.2 via OpenRouter
44+
{"model_name": "pool-b", "litellm_params": {
45+
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
46+
"api_key": or_key,
47+
}},
48+
# Fallback: free NVIDIA Nemotron via OpenRouter
49+
{"model_name": "pool-b", "litellm_params": {
50+
"model": "openrouter/nvidia/nemotron-3-super",
51+
"api_key": or_key,
52+
}},
53+
54+
# Pool B upgrade — deep reasoning
55+
{"model_name": "pool-b-upgrade", "litellm_params": {
56+
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
57+
"api_key": or_key,
58+
}},
59+
{"model_name": "pool-b-upgrade", "litellm_params": {
60+
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
61+
"api_key": or_key,
62+
}},
7963
]
8064

8165
return Router(

frontend/src/components/Chat/ChatPanel.tsx

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,23 +64,21 @@ export function ChatPanel() {
6464
</div>
6565
</div>
6666

67-
{/* View switcher */}
67+
{/* Panel overlays — shown above messages, not instead of */}
6868
{view === "csv" && (
69-
<div className="flex-1 overflow-y-auto border-b">
69+
<div className="border-b max-h-[40%] overflow-y-auto shrink-0">
7070
<CSVUpload />
7171
</div>
7272
)}
7373

7474
{view === "metrics" && (
75-
<div className="flex-1 overflow-y-auto">
75+
<div className="border-b max-h-[60%] overflow-y-auto shrink-0">
7676
<MetricsPanel />
7777
</div>
7878
)}
7979

80-
{view === "chat" && (
81-
<>
82-
{/* Messages */}
83-
<div className="flex-1 overflow-y-auto px-4 py-3 space-y-1">
80+
{/* Messages — always visible */}
81+
<div className="flex-1 overflow-y-auto px-4 py-3 space-y-1">
8482
{messages.length === 0 && !streamingText && (
8583
<div className="px-2 py-4">
8684
{/* Welcome message styled as assistant bubble */}
@@ -151,8 +149,6 @@ export function ChatPanel() {
151149

152150
<div ref={messagesEndRef} />
153151
</div>
154-
</>
155-
)}
156152

157153
{/* Input — always visible */}
158154
<form onSubmit={handleSubmit} className="border-t px-4 py-3">

0 commit comments

Comments
 (0)