CreatmanCEO
diff --git a/‎backend/config.py‎
Lines changed: 3 additions & 3 deletions b/‎backend/config.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backend/eval/batch_runner.py‎
Lines changed: 1 addition & 2 deletions b/‎backend/eval/batch_runner.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backend/eval/metrics_api.py‎
Lines changed: 20 additions & 33 deletions b/‎backend/eval/metrics_api.py‎
Lines changed: 20 additions & 33 deletions
diff --git a/‎backend/main.py‎
Lines changed: 13 additions & 8 deletions b/‎backend/main.py‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎backend/prompts/model_adaptors.py‎
Lines changed: 10 additions & 47 deletions b/‎backend/prompts/model_adaptors.py‎
Lines changed: 10 additions & 47 deletions
diff --git a/‎backend/prompts/task_instructions.py‎
Lines changed: 12 additions & 0 deletions b/‎backend/prompts/task_instructions.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎backend/services/llm_router.py‎
Lines changed: 16 additions & 19 deletions b/‎backend/services/llm_router.py‎
Lines changed: 16 additions & 19 deletions
@@ -12,12 +12,12 @@ class Settings(BaseSettings):
     openrouter_api_key: SecretStr = SecretStr("")
 
     # Model routing — Pool A (simple/medium)
-    model_pool_a_primary: str = "openrouter/deepseek/deepseek-chat-v3-0324"
+    model_pool_a_primary: str = "openrouter/anthropic/claude-haiku-4-5-20251001"
     model_pool_a_fallback: str = "gemini/gemini-2.5-flash"
 
     # Model routing — Pool B (complex tasks)
-    model_pool_b_default: str = "openrouter/deepseek/deepseek-chat-v3-0324"
-    model_pool_b_complex: str = "openrouter/anthropic/claude-haiku-4-5-20251001"
+    model_pool_b_default: str = "openrouter/anthropic/claude-haiku-4-5-20251001"
+    model_pool_b_complex: str = "openrouter/anthropic/claude-sonnet-4-5-20250514"
 
     llm_temperature: float = 0.1
 
 
@@ -177,9 +177,8 @@ def estimate_cost(model: str, tokens_in: int, tokens_out: int, response=None) ->
 
     # Fallback: manual approximate pricing (USD per 1M tokens)
     pricing = {
-        "openrouter/deepseek/deepseek-chat-v3-0324": {"input": 0.26, "output": 0.42},
-        "openrouter/nvidia/nemotron-3-super": {"input": 0.0, "output": 0.0},
         "openrouter/anthropic/claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00},
+        "openrouter/anthropic/claude-sonnet-4-5-20250514": {"input": 3.00, "output": 15.00},
         "gemini/gemini-2.5-flash": {"input": 0.15, "output": 0.60},
     }
 
 
@@ -10,21 +10,21 @@
 
 # Sample metrics for demo (used when no eval has been run yet)
 SAMPLE_METRICS = {
-    "openrouter/deepseek/deepseek-chat-v3-0324": {
-        "model": "openrouter/deepseek/deepseek-chat-v3-0324",
+    "openrouter/anthropic/claude-haiku-4-5-20251001": {
+        "model": "openrouter/anthropic/claude-haiku-4-5-20251001",
         "pool": "pool-a + pool-b",
         "total_cases": 48,
-        "accuracy": 0.896,
-        "schema_compliance": 0.875,
-        "latency_p50": 380,
-        "latency_p95": 950,
-        "cost_per_request": 0.000052,
-        "avg_tokens_per_request": 900,
+        "accuracy": 0.938,
+        "schema_compliance": 0.917,
+        "latency_p50": 650,
+        "latency_p95": 1800,
+        "cost_per_request": 0.00032,
+        "avg_tokens_per_request": 1100,
         "error_rate": 0.0,
     },
     "gemini/gemini-2.5-flash": {
         "model": "gemini/gemini-2.5-flash",
-        "pool": "pool-a (fallback)",
+        "pool": "fallback",
         "total_cases": 48,
         "accuracy": 0.875,
         "schema_compliance": 0.812,
@@ -34,38 +34,25 @@
         "avg_tokens_per_request": 850,
         "error_rate": 0.021,
     },
-    "openrouter/nvidia/nemotron-3-super": {
-        "model": "openrouter/nvidia/nemotron-3-super",
-        "pool": "pool-b (fallback)",
-        "total_cases": 48,
-        "accuracy": 0.812,
-        "schema_compliance": 0.792,
-        "latency_p50": 520,
-        "latency_p95": 1400,
-        "cost_per_request": 0.0,
-        "avg_tokens_per_request": 1050,
-        "error_rate": 0.042,
-    },
-    "openrouter/anthropic/claude-haiku-4-5-20251001": {
-        "model": "openrouter/anthropic/claude-haiku-4-5-20251001",
+    "openrouter/anthropic/claude-sonnet-4-5-20250514": {
+        "model": "openrouter/anthropic/claude-sonnet-4-5-20250514",
         "pool": "pool-b-upgrade",
         "total_cases": 48,
-        "accuracy": 0.938,
-        "schema_compliance": 0.917,
-        "latency_p50": 650,
-        "latency_p95": 1800,
-        "cost_per_request": 0.00032,
-        "avg_tokens_per_request": 1100,
+        "accuracy": 0.958,
+        "schema_compliance": 0.938,
+        "latency_p50": 1200,
+        "latency_p95": 3500,
+        "cost_per_request": 0.00145,
+        "avg_tokens_per_request": 1450,
         "error_rate": 0.0,
     },
 }
 
 
 POOL_MAP = {
-    "openrouter/deepseek/deepseek-chat-v3-0324": "pool-a + pool-b",
-    "gemini/gemini-2.5-flash": "pool-a (fallback)",
-    "openrouter/nvidia/nemotron-3-super": "pool-b (fallback)",
-    "openrouter/anthropic/claude-haiku-4-5-20251001": "pool-b-upgrade",
+    "openrouter/anthropic/claude-haiku-4-5-20251001": "pool-a + pool-b",
+    "gemini/gemini-2.5-flash": "fallback",
+    "openrouter/anthropic/claude-sonnet-4-5-20250514": "pool-b-upgrade",
 }
 
 
 
@@ -161,18 +161,22 @@ def _classify_task(message: str) -> str:
     msg = message.lower()
     if any(w in msg for w in ["csv", "upload", "validate", "file"]):
         return "validate_csv"
-    if any(w in msg for w in ["anomal", "decline", "spike", "fault", "problem"]):
+    if any(w in msg for w in ["anomal", "decline", "spike", "fault", "problem", "scan"]):
         return "detect_anomalies"
+    if any(w in msg for w in ["calibrat", "optimi", "theis", "pumping schedule"]):
+        return "calibration_advice"
+    if any(w in msg for w in ["interpret", "why", "cause", "explain", "reason", "root cause"]):
+        return "interpret_anomaly"
+    if any(w in msg for w in ["depression", "cone", "interference", "drawdown"]):
+        return "depression_analysis"
+    if any(w in msg for w in ["region", "area", "viewport", "overview", "stats", "summary", "report", "daily"]):
+        return "get_region_stats"
+    if any(w in msg for w in ["quality", "tds", "chloride", "ph ", "salinity", "compare cluster"]):
+        return "general_question"
     if any(w in msg for w in ["history", "trend", "time series", "chart"]):
         return "get_well_history"
-    if any(w in msg for w in ["region", "area", "viewport", "overview", "stats"]):
-        return "get_region_stats"
-    if any(w in msg for w in ["find", "search", "list", "wells", "query"]):
+    if any(w in msg for w in ["find", "search", "list", "wells", "query", "status", "active", "inactive"]):
         return "query_wells"
-    if any(w in msg for w in ["interpret", "why", "cause", "explain", "reason"]):
-        return "interpret_anomaly"
-    if any(w in msg for w in ["calibrat", "optimi", "theis", "pumping schedule"]):
-        return "calibration_advice"
     return "general_question"
 
 
@@ -292,6 +296,7 @@ async def generate() -> AsyncIterator[str]:
                     messages.append({"role": "tool", "tool_call_id": f"call_{tc['name']}_{i}", "content": json.dumps(tool_result.result)})
 
                 # ONE follow-up call with all tool results
+                # No tools param — force text response, prevent LLM from attempting more tool calls as text
                 followup = await llm_router.acompletion(
                     model=model_pool,
                     messages=messages,
 
@@ -1,64 +1,27 @@
-"""Model-specific adaptors — tailored instructions per LLM provider.
-
-Each provider has different strengths. We optimize prompt style accordingly:
-- DeepSeek V3: strong structured output, good reasoning, cost-effective
-- Gemini Flash: fast, good at concise responses
-- NVIDIA Nemotron: free tier, needs explicit format guidance
-- Anthropic Haiku: excellent tool calling, chain-of-thought
-"""
+"""Model-specific adaptors — tailored instructions per pool complexity level."""
 
 MODEL_ADAPTORS = {
-    "pool-a": {
-        "deepseek": """## Response Style (DeepSeek — Efficient)
-- Be concise: respond in under 150 words unless detailed analysis is explicitly requested
+    "pool-a": """## Response Style (Quick Analysis)
+- Be concise: under 150 words unless detailed analysis requested
 - When using tools, summarize results in 2-3 sentences
-- For structured output, use exact JSON format — no markdown wrapping
-- Prefer bullet points over paragraphs
 - Include well IDs and numeric values, skip verbose explanations
-- Example anomaly summary: "AUH-01-003: debit declined 32% (12.1→8.2 L/s). Recommend pump inspection."
-""",
-        "gemini_flash": """## Response Style (Gemini Flash — Concise)
-- Be very concise: under 100 words for simple queries
-- Use bullet points, not paragraphs
-- Include well IDs and numeric values with units
-- For structured output, return exact JSON — no wrapping
+- Example: "AUH-01-003: debit declined 32% (12.1 to 8.2 L/s). Recommend pump inspection."
 """,
-    },
     "pool-b": """## Response Style (Analytical)
-- Think step by step before concluding. Consider multiple hypotheses.
-- Structure your analysis:
-  1. Observation: what the data shows
-  2. Context: relevant hydrogeological factors
-  3. Assessment: most likely explanation
-  4. Recommendation: specific actionable steps
-- You may use chain-of-thought reasoning for complex questions
+- Think step by step before concluding
+- Structure: Observation -> Context -> Assessment -> Recommendation
 - Cross-reference multiple data points before declaring anomalies
-- Compare with neighboring wells when relevant
 - Cite specific values and well IDs throughout
 """,
-    "pool-b-upgrade": """## Response Style (Comprehensive Analysis)
+    "pool-b-upgrade": """## Response Style (Comprehensive)
 - Provide comprehensive analysis with evidence and reasoning
-- You have full freedom to reason at length — use it for complex cases
 - Consider geological, operational, and seasonal factors holistically
-- For anomaly interpretation:
-  - Analyze time series patterns in detail
-  - Consider superposition effects from neighboring wells
-  - Evaluate whether anomaly is isolated or part of regional trend
-  - Provide differential diagnosis with likelihood assessment
-- For calibration/optimization questions:
-  - Consider trade-offs explicitly
-  - Provide quantitative recommendations where possible
-  - Reference hydrogeological principles (Theis, Cooper-Jacob)
-- Include confidence levels in your assessments
+- Include confidence levels in assessments
+- Reference hydrogeological principles (Theis, Cooper-Jacob)
 """,
 }
 
 
 def get_model_adaptor(model_pool: str, model_name: str = "") -> str:
     """Get the appropriate model adaptor text for a model pool."""
-    adaptor = MODEL_ADAPTORS.get(model_pool, "")
-    if isinstance(adaptor, dict):
-        if "gemini" in model_name:
-            return adaptor.get("gemini_flash", list(adaptor.values())[0])
-        return adaptor.get("deepseek", list(adaptor.values())[0])
-    return adaptor
+    return MODEL_ADAPTORS.get(model_pool, MODEL_ADAPTORS["pool-b"])
@@ -70,6 +70,18 @@
 - Provide quantitative recommendations where possible
 - Consider trade-offs: yield vs. sustainability, cost vs. accuracy
 - Suggest specific parameter adjustments with expected outcomes
+""",
+
+    "depression_analysis": """## Task: Depression Cone Analysis
+When asked about depression cones or well interference:
+1. Call query_wells to get well locations and yields in viewport
+2. Call detect_anomalies to check for interference patterns
+3. Reason about depression cones using:
+   - Well proximity (wells < 2km apart may interfere)
+   - Yield magnitudes (higher yield = larger cone)
+   - Theis superposition principle
+4. Describe cone geometry: center, approximate radius, overlap with neighbors
+You do NOT need a dedicated depression cone tool — reason from available data.
 """,
 }
 
 
@@ -5,14 +5,15 @@
 
 # Task → model pool routing
 TASK_ROUTING = {
-    "validate_csv":       "pool-a",
-    "query_wells":        "pool-a",
-    "get_region_stats":   "pool-a",
-    "get_well_history":   "pool-a",
-    "detect_anomalies":   "pool-b",
-    "interpret_anomaly":  "pool-b",
-    "calibration_advice": "pool-b-upgrade",
-    "general_question":   "pool-b",
+    "validate_csv":         "pool-a",
+    "query_wells":          "pool-a",
+    "get_region_stats":     "pool-a",
+    "get_well_history":     "pool-a",
+    "detect_anomalies":     "pool-b",
+    "interpret_anomaly":    "pool-b",
+    "depression_analysis":  "pool-b",
+    "calibration_advice":   "pool-b-upgrade",
+    "general_question":     "pool-b",
 }
 
 # Prompt engine singleton
@@ -28,36 +29,32 @@ def create_router() -> Router:
 
     model_list = [
         # Pool A — simple/medium tasks
-        # Primary: DeepSeek V3.2 via OpenRouter (cheap, stable, tool calling)
         {"model_name": "pool-a", "litellm_params": {
-            "model": "openrouter/deepseek/deepseek-chat-v3-0324",
+            "model": "openrouter/anthropic/claude-haiku-4-5-20251001",
             "api_key": or_key,
         }},
-        # Fallback: Gemini Flash direct (free but sometimes unreliable)
         {"model_name": "pool-a", "litellm_params": {
             "model": "gemini/gemini-2.5-flash",
             "api_key": gemini_key,
         }},
 
-        # Pool B — complex tasks (reasoning, anomaly interpretation)
-        # Primary: DeepSeek V3.2 via OpenRouter
+        # Pool B — complex tasks
         {"model_name": "pool-b", "litellm_params": {
-            "model": "openrouter/deepseek/deepseek-chat-v3-0324",
+            "model": "openrouter/anthropic/claude-haiku-4-5-20251001",
             "api_key": or_key,
         }},
-        # Fallback: free NVIDIA Nemotron via OpenRouter
         {"model_name": "pool-b", "litellm_params": {
-            "model": "openrouter/nvidia/nemotron-3-super",
-            "api_key": or_key,
+            "model": "gemini/gemini-2.5-flash",
+            "api_key": gemini_key,
         }},
 
         # Pool B upgrade — deep reasoning
         {"model_name": "pool-b-upgrade", "litellm_params": {
-            "model": "openrouter/anthropic/claude-haiku-4-5-20251001",
+            "model": "openrouter/anthropic/claude-sonnet-4-5-20250514",
             "api_key": or_key,
         }},
         {"model_name": "pool-b-upgrade", "litellm_params": {
-            "model": "openrouter/deepseek/deepseek-chat-v3-0324",
+            "model": "openrouter/anthropic/claude-haiku-4-5-20251001",
             "api_key": or_key,
         }},
     ]
Original file line number	Diff line number	Diff line change
`@@ -177,9 +177,8 @@ def estimate_cost(model: str, tokens_in: int, tokens_out: int, response=None) ->`
`177`	`177`
`178`	`178`	`# Fallback: manual approximate pricing (USD per 1M tokens)`
`179`	`179`	`pricing = {`
`180`		`- "openrouter/deepseek/deepseek-chat-v3-0324": {"input": 0.26, "output": 0.42},`
`181`		`- "openrouter/nvidia/nemotron-3-super": {"input": 0.0, "output": 0.0},`
`182`	`180`	`"openrouter/anthropic/claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00},`
	`181`	`+ "openrouter/anthropic/claude-sonnet-4-5-20250514": {"input": 3.00, "output": 15.00},`
`183`	`182`	`"gemini/gemini-2.5-flash": {"input": 0.15, "output": 0.60},`
`184`	`183`	`}`
`185`	`184`