refactor: Simplify CSV loading success message and enhance routing service prompts for clarity and simplicity

samadpls · samadpls · commit 52ff0bb8ed68 · 2025-08-08T00:58:07.000+05:00
diff --git a/src/services/csv_analysis_tools.py b/src/services/csv_analysis_tools.py
@@ -139,9 +139,7 @@ async def load_csv_data(
     ) -> str:
         result = csv_tools.load_csv_data(csv_content, session_id)
         if result["status"] == "success":
-            return f"CSV loaded successfully! Shape: {
-                result['shape']}, Columns: {
-                result['columns']}"
+            return f"CSV loaded successfully! Shape: {result['shape']}, Columns: {result['columns']}"
         else:
             return f"Error loading CSV: {result['message']}"
 
diff --git a/src/services/routing_service.py b/src/services/routing_service.py
@@ -203,16 +203,13 @@ def install_package(package):
                         image_files.extend(png_files + jpg_files)
 
                     if image_files:
-                        image_info = "\n\n**Generated Images:**\n"
+                        image_info = "\n\n📊 **Charts generated:**\n"
                         for img_file in image_files:
                             image_info += f"- {os.path.basename(img_file)}\n"
                         output += image_info
 
-                    return f"""**Analysis Results:**
-
-{output}
-
-**Explanation:** {explanation}"""
+                    # Return only the human-readable output, not technical details
+                    return output.strip()
 
                 else:
                     # Code execution failed - try to fix it
@@ -313,20 +310,20 @@ def _prepare_csv_context(
                 context_parts.append(f"- {msg.role}: {msg.content}")
 
         context_parts.append(
-            "\nGenerate SIMPLE Python code that directly answers the user's question."
+            "\nGenerate SUPER SIMPLE Python code that directly answers the user's question."
         )
-        context_parts.append("MAXIMUM 10 LINES OF CODE - Keep it simple!")
+        context_parts.append("MAXIMUM 5 LINES OF CODE - Keep it extremely simple!")
         context_parts.append(
-            "NO COMPREHENSIVE ANALYSIS - Just answer the specific question!"
+            "NO FUNCTIONS OR CLASSES - Just direct code that prints results!"
         )
         context_parts.append(
             f"IMPORTANT: Use pd.read_csv('{csv_info['file_path']}') to load the data from the file path!"
         )
         context_parts.append(
-            "Print human-readable insights directly - no complex scripts!"
+            "Print human-readable results like 'Average price: $123.45' - NO technical output!"
         )
         context_parts.append(
-            "For charts, use plt.savefig('/tmp/querypls_session_csv_analysis_temp/chart_name.png') and plt.show()."
+            "For charts, use plt.savefig('/tmp/querypls_session_csv_analysis_temp/chart.png') and plt.show()."
         )
 
         return "\n".join(context_parts)
@@ -378,17 +375,6 @@ def _format_sql_response(self, sql_response) -> str:
 
         return "\n\n".join(response_parts)
 
-    def _format_csv_response(self, csv_response) -> str:
-        """Format CSV analysis response for display."""
-        response_parts = [
-            f"**Python Code:**\n```python\n{csv_response.python_code}\n```",
-            f"**Explanation:** {csv_response.explanation}",
-            f"**Expected Output:** {csv_response.expected_output}",
-            f"**Libraries Used:** {', '.join(csv_response.libraries_used)}",
-        ]
-
-        return "\n\n".join(response_parts)
-
     def _fallback_routing(self, user_query: str, csv_loaded: bool) -> RoutingDecision:
         """Fallback routing when LLM routing fails - let LLM decide, not hardcoded keywords."""
         # Default to conversation - let the LLM handle all decisions
diff --git a/utils/prompt.py b/utils/prompt.py
@@ -100,7 +100,7 @@
 
 Respond only with the JSON object. Do not include any additional text or markdown formatting."""
 
-CSV_ANALYSIS_PROMPT = """You are a Python data analysis expert. Generate SIMPLE, FOCUSED Python code that answers the user's specific question.
+CSV_ANALYSIS_PROMPT = """You are a Python data analysis expert. Generate SIMPLE, FOCUSED Python code that answers the user's specific question in a human-readable way.
 
 ## Response Format
 Your response must be in JSON format.
@@ -111,83 +111,58 @@
 * `expected_output` - What output is expected from the code
 * `libraries_used` - Array of Python libraries used
 
-## IMPORTANT: The code you generate will be EXECUTED automatically. Do NOT include code blocks or markdown formatting in the python_code field.
-
 ## CRITICAL GUIDELINES:
-1. **KEEP CODE SIMPLE** - Maximum 6 lines of code
-2. **ANSWER THE SPECIFIC QUESTION** - Don't create comprehensive analysis
-3. **PRINT CLEAR INSIGHTS** - Human-readable output, not raw data
-4. **NO COMPLEX SCRIPTS** - No functions, classes, or advanced features
-5. **SIMPLE VARIABLES** - Use df, result, avg, etc.
-6. **DIRECT APPROACH** - Load data, analyze, print result
-7. **NO SPECIAL CHARACTERS** - Avoid ≥, ≤, →, etc. Use standard ASCII
-8. **SIMPLE LOGIC** - No complex conditionals or loops
-9. **NO FUNCTIONS** - Write code directly, not inside functions
-10. **NO DOCSTRINGS** - No complex documentation
-
-## CRITICAL Code Requirements:
-- The CSV data is available as a file at the path provided in the context
-- ALWAYS use `pd.read_csv('file_path')` to load the data from the file path
-- The file path will be provided in the context
-- For graphs/charts, save them to `/tmp/querypls_session_csv_analysis_temp/` folder with descriptive names
-- Use `plt.savefig('/tmp/querypls_session_csv_analysis_temp/chart_name.png')` before `plt.show()`
-- Print insights in a human-readable format with clear explanations
-- Don't create complex functions or classes - keep it simple and direct
-- ONLY use these libraries: pandas, numpy, matplotlib.pyplot, seaborn
-- Write clean, simple code without syntax errors
-- Use proper variable names and avoid special characters
-- For charts: use simple matplotlib code, save to specific temp folder, then show
-- Keep each line simple and avoid complex expressions
-
-## Example Response
+1. **KEEP CODE SUPER SIMPLE** - Maximum 5 lines of code
+2. **NO FUNCTIONS OR CLASSES** - Write direct code only
+3. **PRINT HUMAN-READABLE RESULTS** - Use print() with clear formatting
+4. **ANSWER SPECIFIC QUESTION ONLY** - Don't do comprehensive analysis
+5. **USE SIMPLE VARIABLES** - df, avg, count, total, etc.
+6. **NO TECHNICAL JARGON** - Speak like talking to a person
+
+## Code Requirements:
+- Use `pd.read_csv('file_path')` to load data (path provided in context)
+- Print results with clear descriptions like "Average price: $123.45"
+- For charts: save to `/tmp/querypls_session_csv_analysis_temp/chart.png`
+- Use only: pandas, matplotlib.pyplot (as plt), numpy
+- Keep each line simple and readable
+- NO error handling functions - keep it basic
+
+## Example Responses:
+
+### For "average price":
 {
-  "python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\navg = df['salary'].mean()\\nprint(f'Average salary: ${avg:,.2f}')",
-  "explanation": "Loads CSV data and calculates the average salary in a readable format",
-  "expected_output": "Average salary: $60,000.00",
+  "python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/data.csv')\\navg = df['price'].mean()\\nprint(f'Average price: ${avg:,.2f}')",
+  "explanation": "Calculates and displays the average price",
+  "expected_output": "Average price: $1,234.56",
   "libraries_used": ["pandas"]
 }
 
-## Chart Example Response
+### For "show top 5 products":
 {
-  "python_code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport os\\nos.makedirs('/tmp/querypls_session_csv_analysis_temp', exist_ok=True)\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\nplt.figure(figsize=(8, 6))\\ndf['department'].value_counts().plot(kind='bar')\\nplt.title('Department Distribution')\\nplt.savefig('/tmp/querypls_session_csv_analysis_temp/department_chart.png')\\nplt.show()\\nprint('Department counts:')\\nprint(df['department'].value_counts())",
-  "explanation": "Creates a bar chart of department distribution and saves it to specific temp folder",
-  "expected_output": "Bar chart visualization and department counts",
-  "libraries_used": ["pandas", "matplotlib.pyplot"]
+  "python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/data.csv')\\ntop5 = df.nlargest(5, 'price')\\nprint('Top 5 most expensive products:')\\nprint(top5[['name', 'price']].to_string(index=False))",
+  "explanation": "Shows the 5 most expensive products",
+  "expected_output": "Top 5 most expensive products with names and prices",
+  "libraries_used": ["pandas"]
 }
 
-## Graph Example Response
+### For "create chart":
 {
-  "python_code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport os\\nos.makedirs('/tmp/querypls_session_csv_analysis_temp', exist_ok=True)\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\nplt.figure(figsize=(8, 6))\\ndf['department'].value_counts().plot(kind='bar')\\nplt.title('Department Distribution')\\nplt.savefig('/tmp/querypls_session_csv_analysis_temp/department_chart.png')\\nplt.show()\\nprint('Department distribution:')\\nprint(df['department'].value_counts())",
-  "explanation": "Creates a simple bar chart showing department distribution",
-  "expected_output": "Bar chart and department counts",
+  "python_code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport os\\nos.makedirs('/tmp/querypls_session_csv_analysis_temp', exist_ok=True)\\ndf = pd.read_csv('/tmp/data.csv')\\ndf['category'].value_counts().plot(kind='bar')\\nplt.title('Product Categories')\\nplt.savefig('/tmp/querypls_session_csv_analysis_temp/chart.png')\\nplt.show()\\nprint(f'Created chart showing {len(df[\"category\"].unique())} categories')",
+  "explanation": "Creates a bar chart of product categories",
+  "expected_output": "Bar chart and category count message",
   "libraries_used": ["pandas", "matplotlib.pyplot"]
 }
 
-## Price Analysis Example Response
-{
-  "python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\navg_price = df['price'].mean()\\nprint(f'Average price: ${avg_price:,.2f}')",
-  "explanation": "Loads CSV data and calculates the average price in a readable format",
-  "expected_output": "Average price: $1,234.56",
-  "libraries_used": ["pandas"]
-}
-
-## Important Notes:
-- Use double backslashes for newlines in the python_code field
-- ALWAYS use `pd.read_csv('file_path')` to load CSV data from the file path provided in context
-- Save charts to `/tmp/querypls_session_csv_analysis_temp/` folder
-- Keep the explanation concise
-- Make sure the JSON is valid and properly formatted
-- The file path will be provided in the context
-- Write simple, clean code without complex functions or classes
-- Focus on printing clear insights directly
-- Avoid syntax errors and special characters
-- NEVER use line continuation characters (\\) in the code
-- Keep each line complete and self-contained
-- Use simple string formatting with f-strings
-- **MAXIMUM 10 LINES OF CODE** - Keep it simple!
-- **NO COMPREHENSIVE ANALYSIS** - Just answer the specific question
-- **DO NOT include ```python or ``` in the python_code field**
-- **The code will be executed automatically - just provide the raw Python code**
+## IMPORTANT RULES:
+- **NO FUNCTIONS** - Write code directly, not inside functions
+- **NO COMPLEX LOGIC** - Keep it simple and straightforward
+- **HUMAN-READABLE OUTPUT** - Print clear, conversational results
+- **ANSWER THE QUESTION** - Don't add extra analysis
+- **USE f-strings** - For clear formatting like f'Total: {total}'
+- **MAXIMUM 5 LINES** - Keep it super simple
+- Use double backslashes (\\n) for newlines in JSON
+- The code will be executed automatically
+- Focus on answering the specific user question only
 
 Respond only with the JSON object."""