Skip to content

Commit 25ff4c1

Browse files
CreatmanCEOclaude
andcommitted
fix: Anthropic via OpenRouter as primary LLM, WellHistory charts, depression analysis
BLOCK 1 — LLM Providers: - Switched all pools to Anthropic Claude Haiku 4.5 via OpenRouter (native tool calling) - Pool B upgrade: Claude Sonnet 4.5 via OpenRouter - Fallback: Gemini Flash direct - Removed DeepSeek (no streaming tool calling) and Nemotron (invalid ID) - Simplified model_adaptors to pool-level (not provider-level) - Added raw tool markup filter in chatStore (tool_call_begin etc.) - Added depression_analysis task type with Theis-based reasoning instructions BLOCK 2 — UI: - Chat panel: w + min-w + max-w constraints (no shrinking) - Loading indicator: bouncing dots "Analyzing..." between user msg and response BLOCK 3 — Features: - WellHistoryChart: Recharts line chart for well_history cards (sampled to 60 points) - Updated eval pricing and sample metrics for Anthropic models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f55cfed commit 25ff4c1

13 files changed

Lines changed: 517 additions & 120 deletions

File tree

backend/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ class Settings(BaseSettings):
1212
openrouter_api_key: SecretStr = SecretStr("")
1313

1414
# Model routing — Pool A (simple/medium)
15-
model_pool_a_primary: str = "openrouter/deepseek/deepseek-chat-v3-0324"
15+
model_pool_a_primary: str = "openrouter/anthropic/claude-haiku-4-5-20251001"
1616
model_pool_a_fallback: str = "gemini/gemini-2.5-flash"
1717

1818
# Model routing — Pool B (complex tasks)
19-
model_pool_b_default: str = "openrouter/deepseek/deepseek-chat-v3-0324"
20-
model_pool_b_complex: str = "openrouter/anthropic/claude-haiku-4-5-20251001"
19+
model_pool_b_default: str = "openrouter/anthropic/claude-haiku-4-5-20251001"
20+
model_pool_b_complex: str = "openrouter/anthropic/claude-sonnet-4-5-20250514"
2121

2222
llm_temperature: float = 0.1
2323

backend/eval/batch_runner.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,8 @@ def estimate_cost(model: str, tokens_in: int, tokens_out: int, response=None) ->
177177

178178
# Fallback: manual approximate pricing (USD per 1M tokens)
179179
pricing = {
180-
"openrouter/deepseek/deepseek-chat-v3-0324": {"input": 0.26, "output": 0.42},
181-
"openrouter/nvidia/nemotron-3-super": {"input": 0.0, "output": 0.0},
182180
"openrouter/anthropic/claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00},
181+
"openrouter/anthropic/claude-sonnet-4-5-20250514": {"input": 3.00, "output": 15.00},
183182
"gemini/gemini-2.5-flash": {"input": 0.15, "output": 0.60},
184183
}
185184

backend/eval/metrics_api.py

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,21 @@
1010

1111
# Sample metrics for demo (used when no eval has been run yet)
1212
SAMPLE_METRICS = {
13-
"openrouter/deepseek/deepseek-chat-v3-0324": {
14-
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
13+
"openrouter/anthropic/claude-haiku-4-5-20251001": {
14+
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
1515
"pool": "pool-a + pool-b",
1616
"total_cases": 48,
17-
"accuracy": 0.896,
18-
"schema_compliance": 0.875,
19-
"latency_p50": 380,
20-
"latency_p95": 950,
21-
"cost_per_request": 0.000052,
22-
"avg_tokens_per_request": 900,
17+
"accuracy": 0.938,
18+
"schema_compliance": 0.917,
19+
"latency_p50": 650,
20+
"latency_p95": 1800,
21+
"cost_per_request": 0.00032,
22+
"avg_tokens_per_request": 1100,
2323
"error_rate": 0.0,
2424
},
2525
"gemini/gemini-2.5-flash": {
2626
"model": "gemini/gemini-2.5-flash",
27-
"pool": "pool-a (fallback)",
27+
"pool": "fallback",
2828
"total_cases": 48,
2929
"accuracy": 0.875,
3030
"schema_compliance": 0.812,
@@ -34,38 +34,25 @@
3434
"avg_tokens_per_request": 850,
3535
"error_rate": 0.021,
3636
},
37-
"openrouter/nvidia/nemotron-3-super": {
38-
"model": "openrouter/nvidia/nemotron-3-super",
39-
"pool": "pool-b (fallback)",
40-
"total_cases": 48,
41-
"accuracy": 0.812,
42-
"schema_compliance": 0.792,
43-
"latency_p50": 520,
44-
"latency_p95": 1400,
45-
"cost_per_request": 0.0,
46-
"avg_tokens_per_request": 1050,
47-
"error_rate": 0.042,
48-
},
49-
"openrouter/anthropic/claude-haiku-4-5-20251001": {
50-
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
37+
"openrouter/anthropic/claude-sonnet-4-5-20250514": {
38+
"model": "openrouter/anthropic/claude-sonnet-4-5-20250514",
5139
"pool": "pool-b-upgrade",
5240
"total_cases": 48,
53-
"accuracy": 0.938,
54-
"schema_compliance": 0.917,
55-
"latency_p50": 650,
56-
"latency_p95": 1800,
57-
"cost_per_request": 0.00032,
58-
"avg_tokens_per_request": 1100,
41+
"accuracy": 0.958,
42+
"schema_compliance": 0.938,
43+
"latency_p50": 1200,
44+
"latency_p95": 3500,
45+
"cost_per_request": 0.00145,
46+
"avg_tokens_per_request": 1450,
5947
"error_rate": 0.0,
6048
},
6149
}
6250

6351

6452
POOL_MAP = {
65-
"openrouter/deepseek/deepseek-chat-v3-0324": "pool-a + pool-b",
66-
"gemini/gemini-2.5-flash": "pool-a (fallback)",
67-
"openrouter/nvidia/nemotron-3-super": "pool-b (fallback)",
68-
"openrouter/anthropic/claude-haiku-4-5-20251001": "pool-b-upgrade",
53+
"openrouter/anthropic/claude-haiku-4-5-20251001": "pool-a + pool-b",
54+
"gemini/gemini-2.5-flash": "fallback",
55+
"openrouter/anthropic/claude-sonnet-4-5-20250514": "pool-b-upgrade",
6956
}
7057

7158

backend/main.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -161,18 +161,22 @@ def _classify_task(message: str) -> str:
161161
msg = message.lower()
162162
if any(w in msg for w in ["csv", "upload", "validate", "file"]):
163163
return "validate_csv"
164-
if any(w in msg for w in ["anomal", "decline", "spike", "fault", "problem"]):
164+
if any(w in msg for w in ["anomal", "decline", "spike", "fault", "problem", "scan"]):
165165
return "detect_anomalies"
166+
if any(w in msg for w in ["calibrat", "optimi", "theis", "pumping schedule"]):
167+
return "calibration_advice"
168+
if any(w in msg for w in ["interpret", "why", "cause", "explain", "reason", "root cause"]):
169+
return "interpret_anomaly"
170+
if any(w in msg for w in ["depression", "cone", "interference", "drawdown"]):
171+
return "depression_analysis"
172+
if any(w in msg for w in ["region", "area", "viewport", "overview", "stats", "summary", "report", "daily"]):
173+
return "get_region_stats"
174+
if any(w in msg for w in ["quality", "tds", "chloride", "ph ", "salinity", "compare cluster"]):
175+
return "general_question"
166176
if any(w in msg for w in ["history", "trend", "time series", "chart"]):
167177
return "get_well_history"
168-
if any(w in msg for w in ["region", "area", "viewport", "overview", "stats"]):
169-
return "get_region_stats"
170-
if any(w in msg for w in ["find", "search", "list", "wells", "query"]):
178+
if any(w in msg for w in ["find", "search", "list", "wells", "query", "status", "active", "inactive"]):
171179
return "query_wells"
172-
if any(w in msg for w in ["interpret", "why", "cause", "explain", "reason"]):
173-
return "interpret_anomaly"
174-
if any(w in msg for w in ["calibrat", "optimi", "theis", "pumping schedule"]):
175-
return "calibration_advice"
176180
return "general_question"
177181

178182

@@ -292,6 +296,7 @@ async def generate() -> AsyncIterator[str]:
292296
messages.append({"role": "tool", "tool_call_id": f"call_{tc['name']}_{i}", "content": json.dumps(tool_result.result)})
293297

294298
# ONE follow-up call with all tool results
299+
# No tools param — force text response, prevent LLM from attempting more tool calls as text
295300
followup = await llm_router.acompletion(
296301
model=model_pool,
297302
messages=messages,

backend/prompts/model_adaptors.py

Lines changed: 10 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,27 @@
1-
"""Model-specific adaptors — tailored instructions per LLM provider.
2-
3-
Each provider has different strengths. We optimize prompt style accordingly:
4-
- DeepSeek V3: strong structured output, good reasoning, cost-effective
5-
- Gemini Flash: fast, good at concise responses
6-
- NVIDIA Nemotron: free tier, needs explicit format guidance
7-
- Anthropic Haiku: excellent tool calling, chain-of-thought
8-
"""
1+
"""Model-specific adaptors — tailored instructions per pool complexity level."""
92

103
MODEL_ADAPTORS = {
11-
"pool-a": {
12-
"deepseek": """## Response Style (DeepSeek — Efficient)
13-
- Be concise: respond in under 150 words unless detailed analysis is explicitly requested
4+
"pool-a": """## Response Style (Quick Analysis)
5+
- Be concise: under 150 words unless detailed analysis requested
146
- When using tools, summarize results in 2-3 sentences
15-
- For structured output, use exact JSON format — no markdown wrapping
16-
- Prefer bullet points over paragraphs
177
- Include well IDs and numeric values, skip verbose explanations
18-
- Example anomaly summary: "AUH-01-003: debit declined 32% (12.1→8.2 L/s). Recommend pump inspection."
19-
""",
20-
"gemini_flash": """## Response Style (Gemini Flash — Concise)
21-
- Be very concise: under 100 words for simple queries
22-
- Use bullet points, not paragraphs
23-
- Include well IDs and numeric values with units
24-
- For structured output, return exact JSON — no wrapping
8+
- Example: "AUH-01-003: debit declined 32% (12.1 to 8.2 L/s). Recommend pump inspection."
259
""",
26-
},
2710
"pool-b": """## Response Style (Analytical)
28-
- Think step by step before concluding. Consider multiple hypotheses.
29-
- Structure your analysis:
30-
1. Observation: what the data shows
31-
2. Context: relevant hydrogeological factors
32-
3. Assessment: most likely explanation
33-
4. Recommendation: specific actionable steps
34-
- You may use chain-of-thought reasoning for complex questions
11+
- Think step by step before concluding
12+
- Structure: Observation -> Context -> Assessment -> Recommendation
3513
- Cross-reference multiple data points before declaring anomalies
36-
- Compare with neighboring wells when relevant
3714
- Cite specific values and well IDs throughout
3815
""",
39-
"pool-b-upgrade": """## Response Style (Comprehensive Analysis)
16+
"pool-b-upgrade": """## Response Style (Comprehensive)
4017
- Provide comprehensive analysis with evidence and reasoning
41-
- You have full freedom to reason at length — use it for complex cases
4218
- Consider geological, operational, and seasonal factors holistically
43-
- For anomaly interpretation:
44-
- Analyze time series patterns in detail
45-
- Consider superposition effects from neighboring wells
46-
- Evaluate whether anomaly is isolated or part of regional trend
47-
- Provide differential diagnosis with likelihood assessment
48-
- For calibration/optimization questions:
49-
- Consider trade-offs explicitly
50-
- Provide quantitative recommendations where possible
51-
- Reference hydrogeological principles (Theis, Cooper-Jacob)
52-
- Include confidence levels in your assessments
19+
- Include confidence levels in assessments
20+
- Reference hydrogeological principles (Theis, Cooper-Jacob)
5321
""",
5422
}
5523

5624

5725
def get_model_adaptor(model_pool: str, model_name: str = "") -> str:
5826
"""Get the appropriate model adaptor text for a model pool."""
59-
adaptor = MODEL_ADAPTORS.get(model_pool, "")
60-
if isinstance(adaptor, dict):
61-
if "gemini" in model_name:
62-
return adaptor.get("gemini_flash", list(adaptor.values())[0])
63-
return adaptor.get("deepseek", list(adaptor.values())[0])
64-
return adaptor
27+
return MODEL_ADAPTORS.get(model_pool, MODEL_ADAPTORS["pool-b"])

backend/prompts/task_instructions.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,18 @@
7070
- Provide quantitative recommendations where possible
7171
- Consider trade-offs: yield vs. sustainability, cost vs. accuracy
7272
- Suggest specific parameter adjustments with expected outcomes
73+
""",
74+
75+
"depression_analysis": """## Task: Depression Cone Analysis
76+
When asked about depression cones or well interference:
77+
1. Call query_wells to get well locations and yields in viewport
78+
2. Call detect_anomalies to check for interference patterns
79+
3. Reason about depression cones using:
80+
- Well proximity (wells < 2km apart may interfere)
81+
- Yield magnitudes (higher yield = larger cone)
82+
- Theis superposition principle
83+
4. Describe cone geometry: center, approximate radius, overlap with neighbors
84+
You do NOT need a dedicated depression cone tool — reason from available data.
7385
""",
7486
}
7587

backend/services/llm_router.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55

66
# Task → model pool routing
77
TASK_ROUTING = {
8-
"validate_csv": "pool-a",
9-
"query_wells": "pool-a",
10-
"get_region_stats": "pool-a",
11-
"get_well_history": "pool-a",
12-
"detect_anomalies": "pool-b",
13-
"interpret_anomaly": "pool-b",
14-
"calibration_advice": "pool-b-upgrade",
15-
"general_question": "pool-b",
8+
"validate_csv": "pool-a",
9+
"query_wells": "pool-a",
10+
"get_region_stats": "pool-a",
11+
"get_well_history": "pool-a",
12+
"detect_anomalies": "pool-b",
13+
"interpret_anomaly": "pool-b",
14+
"depression_analysis": "pool-b",
15+
"calibration_advice": "pool-b-upgrade",
16+
"general_question": "pool-b",
1617
}
1718

1819
# Prompt engine singleton
@@ -28,36 +29,32 @@ def create_router() -> Router:
2829

2930
model_list = [
3031
# Pool A — simple/medium tasks
31-
# Primary: DeepSeek V3.2 via OpenRouter (cheap, stable, tool calling)
3232
{"model_name": "pool-a", "litellm_params": {
33-
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
33+
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
3434
"api_key": or_key,
3535
}},
36-
# Fallback: Gemini Flash direct (free but sometimes unreliable)
3736
{"model_name": "pool-a", "litellm_params": {
3837
"model": "gemini/gemini-2.5-flash",
3938
"api_key": gemini_key,
4039
}},
4140

42-
# Pool B — complex tasks (reasoning, anomaly interpretation)
43-
# Primary: DeepSeek V3.2 via OpenRouter
41+
# Pool B — complex tasks
4442
{"model_name": "pool-b", "litellm_params": {
45-
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
43+
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
4644
"api_key": or_key,
4745
}},
48-
# Fallback: free NVIDIA Nemotron via OpenRouter
4946
{"model_name": "pool-b", "litellm_params": {
50-
"model": "openrouter/nvidia/nemotron-3-super",
51-
"api_key": or_key,
47+
"model": "gemini/gemini-2.5-flash",
48+
"api_key": gemini_key,
5249
}},
5350

5451
# Pool B upgrade — deep reasoning
5552
{"model_name": "pool-b-upgrade", "litellm_params": {
56-
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
53+
"model": "openrouter/anthropic/claude-sonnet-4-5-20250514",
5754
"api_key": or_key,
5855
}},
5956
{"model_name": "pool-b-upgrade", "litellm_params": {
60-
"model": "openrouter/deepseek/deepseek-chat-v3-0324",
57+
"model": "openrouter/anthropic/claude-haiku-4-5-20251001",
6158
"api_key": or_key,
6259
}},
6360
]

0 commit comments

Comments
 (0)