Skip to content

Commit 52ff0bb

Browse files
committed
refactor: Simplify CSV loading success message and enhance routing service prompts for clarity and simplicity
1 parent b74960c commit 52ff0bb

3 files changed

Lines changed: 50 additions & 91 deletions

File tree

src/services/csv_analysis_tools.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,7 @@ async def load_csv_data(
139139
) -> str:
140140
result = csv_tools.load_csv_data(csv_content, session_id)
141141
if result["status"] == "success":
142-
return f"CSV loaded successfully! Shape: {
143-
result['shape']}, Columns: {
144-
result['columns']}"
142+
return f"CSV loaded successfully! Shape: {result['shape']}, Columns: {result['columns']}"
145143
else:
146144
return f"Error loading CSV: {result['message']}"
147145

src/services/routing_service.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -203,16 +203,13 @@ def install_package(package):
203203
image_files.extend(png_files + jpg_files)
204204

205205
if image_files:
206-
image_info = "\n\n**Generated Images:**\n"
206+
image_info = "\n\n📊 **Charts generated:**\n"
207207
for img_file in image_files:
208208
image_info += f"- {os.path.basename(img_file)}\n"
209209
output += image_info
210210

211-
return f"""**Analysis Results:**
212-
213-
{output}
214-
215-
**Explanation:** {explanation}"""
211+
# Return only the human-readable output, not technical details
212+
return output.strip()
216213

217214
else:
218215
# Code execution failed - try to fix it
@@ -313,20 +310,20 @@ def _prepare_csv_context(
313310
context_parts.append(f"- {msg.role}: {msg.content}")
314311

315312
context_parts.append(
316-
"\nGenerate SIMPLE Python code that directly answers the user's question."
313+
"\nGenerate SUPER SIMPLE Python code that directly answers the user's question."
317314
)
318-
context_parts.append("MAXIMUM 10 LINES OF CODE - Keep it simple!")
315+
context_parts.append("MAXIMUM 5 LINES OF CODE - Keep it extremely simple!")
319316
context_parts.append(
320-
"NO COMPREHENSIVE ANALYSIS - Just answer the specific question!"
317+
"NO FUNCTIONS OR CLASSES - Just direct code that prints results!"
321318
)
322319
context_parts.append(
323320
f"IMPORTANT: Use pd.read_csv('{csv_info['file_path']}') to load the data from the file path!"
324321
)
325322
context_parts.append(
326-
"Print human-readable insights directly - no complex scripts!"
323+
"Print human-readable results like 'Average price: $123.45' - NO technical output!"
327324
)
328325
context_parts.append(
329-
"For charts, use plt.savefig('/tmp/querypls_session_csv_analysis_temp/chart_name.png') and plt.show()."
326+
"For charts, use plt.savefig('/tmp/querypls_session_csv_analysis_temp/chart.png') and plt.show()."
330327
)
331328

332329
return "\n".join(context_parts)
@@ -378,17 +375,6 @@ def _format_sql_response(self, sql_response) -> str:
378375

379376
return "\n\n".join(response_parts)
380377

381-
def _format_csv_response(self, csv_response) -> str:
382-
"""Format CSV analysis response for display."""
383-
response_parts = [
384-
f"**Python Code:**\n```python\n{csv_response.python_code}\n```",
385-
f"**Explanation:** {csv_response.explanation}",
386-
f"**Expected Output:** {csv_response.expected_output}",
387-
f"**Libraries Used:** {', '.join(csv_response.libraries_used)}",
388-
]
389-
390-
return "\n\n".join(response_parts)
391-
392378
def _fallback_routing(self, user_query: str, csv_loaded: bool) -> RoutingDecision:
393379
"""Fallback routing when LLM routing fails - let LLM decide, not hardcoded keywords."""
394380
# Default to conversation - let the LLM handle all decisions

utils/prompt.py

Lines changed: 41 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@
100100
101101
Respond only with the JSON object. Do not include any additional text or markdown formatting."""
102102

103-
CSV_ANALYSIS_PROMPT = """You are a Python data analysis expert. Generate SIMPLE, FOCUSED Python code that answers the user's specific question.
103+
CSV_ANALYSIS_PROMPT = """You are a Python data analysis expert. Generate SIMPLE, FOCUSED Python code that answers the user's specific question in a human-readable way.
104104
105105
## Response Format
106106
Your response must be in JSON format.
@@ -111,83 +111,58 @@
111111
* `expected_output` - What output is expected from the code
112112
* `libraries_used` - Array of Python libraries used
113113
114-
## IMPORTANT: The code you generate will be EXECUTED automatically. Do NOT include code blocks or markdown formatting in the python_code field.
115-
116114
## CRITICAL GUIDELINES:
117-
1. **KEEP CODE SIMPLE** - Maximum 6 lines of code
118-
2. **ANSWER THE SPECIFIC QUESTION** - Don't create comprehensive analysis
119-
3. **PRINT CLEAR INSIGHTS** - Human-readable output, not raw data
120-
4. **NO COMPLEX SCRIPTS** - No functions, classes, or advanced features
121-
5. **SIMPLE VARIABLES** - Use df, result, avg, etc.
122-
6. **DIRECT APPROACH** - Load data, analyze, print result
123-
7. **NO SPECIAL CHARACTERS** - Avoid ≥, ≤, →, etc. Use standard ASCII
124-
8. **SIMPLE LOGIC** - No complex conditionals or loops
125-
9. **NO FUNCTIONS** - Write code directly, not inside functions
126-
10. **NO DOCSTRINGS** - No complex documentation
127-
128-
## CRITICAL Code Requirements:
129-
- The CSV data is available as a file at the path provided in the context
130-
- ALWAYS use `pd.read_csv('file_path')` to load the data from the file path
131-
- The file path will be provided in the context
132-
- For graphs/charts, save them to `/tmp/querypls_session_csv_analysis_temp/` folder with descriptive names
133-
- Use `plt.savefig('/tmp/querypls_session_csv_analysis_temp/chart_name.png')` before `plt.show()`
134-
- Print insights in a human-readable format with clear explanations
135-
- Don't create complex functions or classes - keep it simple and direct
136-
- ONLY use these libraries: pandas, numpy, matplotlib.pyplot, seaborn
137-
- Write clean, simple code without syntax errors
138-
- Use proper variable names and avoid special characters
139-
- For charts: use simple matplotlib code, save to specific temp folder, then show
140-
- Keep each line simple and avoid complex expressions
141-
142-
## Example Response
115+
1. **KEEP CODE SUPER SIMPLE** - Maximum 5 lines of code
116+
2. **NO FUNCTIONS OR CLASSES** - Write direct code only
117+
3. **PRINT HUMAN-READABLE RESULTS** - Use print() with clear formatting
118+
4. **ANSWER SPECIFIC QUESTION ONLY** - Don't do comprehensive analysis
119+
5. **USE SIMPLE VARIABLES** - df, avg, count, total, etc.
120+
6. **NO TECHNICAL JARGON** - Speak like talking to a person
121+
122+
## Code Requirements:
123+
- Use `pd.read_csv('file_path')` to load data (path provided in context)
124+
- Print results with clear descriptions like "Average price: $123.45"
125+
- For charts: save to `/tmp/querypls_session_csv_analysis_temp/chart.png`
126+
- Use only: pandas, matplotlib.pyplot (as plt), numpy
127+
- Keep each line simple and readable
128+
- NO error handling functions - keep it basic
129+
130+
## Example Responses:
131+
132+
### For "average price":
143133
{
144-
"python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\navg = df['salary'].mean()\\nprint(f'Average salary: ${avg:,.2f}')",
145-
"explanation": "Loads CSV data and calculates the average salary in a readable format",
146-
"expected_output": "Average salary: $60,000.00",
134+
"python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/data.csv')\\navg = df['price'].mean()\\nprint(f'Average price: ${avg:,.2f}')",
135+
"explanation": "Calculates and displays the average price",
136+
"expected_output": "Average price: $1,234.56",
147137
"libraries_used": ["pandas"]
148138
}
149139
150-
## Chart Example Response
140+
### For "show top 5 products":
151141
{
152-
"python_code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport os\\nos.makedirs('/tmp/querypls_session_csv_analysis_temp', exist_ok=True)\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\nplt.figure(figsize=(8, 6))\\ndf['department'].value_counts().plot(kind='bar')\\nplt.title('Department Distribution')\\nplt.savefig('/tmp/querypls_session_csv_analysis_temp/department_chart.png')\\nplt.show()\\nprint('Department counts:')\\nprint(df['department'].value_counts())",
153-
"explanation": "Creates a bar chart of department distribution and saves it to specific temp folder",
154-
"expected_output": "Bar chart visualization and department counts",
155-
"libraries_used": ["pandas", "matplotlib.pyplot"]
142+
"python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/data.csv')\\ntop5 = df.nlargest(5, 'price')\\nprint('Top 5 most expensive products:')\\nprint(top5[['name', 'price']].to_string(index=False))",
143+
"explanation": "Shows the 5 most expensive products",
144+
"expected_output": "Top 5 most expensive products with names and prices",
145+
"libraries_used": ["pandas"]
156146
}
157147
158-
## Graph Example Response
148+
### For "create chart":
159149
{
160-
"python_code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport os\\nos.makedirs('/tmp/querypls_session_csv_analysis_temp', exist_ok=True)\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\nplt.figure(figsize=(8, 6))\\ndf['department'].value_counts().plot(kind='bar')\\nplt.title('Department Distribution')\\nplt.savefig('/tmp/querypls_session_csv_analysis_temp/department_chart.png')\\nplt.show()\\nprint('Department distribution:')\\nprint(df['department'].value_counts())",
161-
"explanation": "Creates a simple bar chart showing department distribution",
162-
"expected_output": "Bar chart and department counts",
150+
"python_code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\nimport os\\nos.makedirs('/tmp/querypls_session_csv_analysis_temp', exist_ok=True)\\ndf = pd.read_csv('/tmp/data.csv')\\ndf['category'].value_counts().plot(kind='bar')\\nplt.title('Product Categories')\\nplt.savefig('/tmp/querypls_session_csv_analysis_temp/chart.png')\\nplt.show()\\nprint(f'Created chart showing {len(df[\"category\"].unique())} categories')",
151+
"explanation": "Creates a bar chart of product categories",
152+
"expected_output": "Bar chart and category count message",
163153
"libraries_used": ["pandas", "matplotlib.pyplot"]
164154
}
165155
166-
## Price Analysis Example Response
167-
{
168-
"python_code": "import pandas as pd\\ndf = pd.read_csv('/tmp/querypls_session_xxx/data.csv')\\navg_price = df['price'].mean()\\nprint(f'Average price: ${avg_price:,.2f}')",
169-
"explanation": "Loads CSV data and calculates the average price in a readable format",
170-
"expected_output": "Average price: $1,234.56",
171-
"libraries_used": ["pandas"]
172-
}
173-
174-
## Important Notes:
175-
- Use double backslashes for newlines in the python_code field
176-
- ALWAYS use `pd.read_csv('file_path')` to load CSV data from the file path provided in context
177-
- Save charts to `/tmp/querypls_session_csv_analysis_temp/` folder
178-
- Keep the explanation concise
179-
- Make sure the JSON is valid and properly formatted
180-
- The file path will be provided in the context
181-
- Write simple, clean code without complex functions or classes
182-
- Focus on printing clear insights directly
183-
- Avoid syntax errors and special characters
184-
- NEVER use line continuation characters (\\) in the code
185-
- Keep each line complete and self-contained
186-
- Use simple string formatting with f-strings
187-
- **MAXIMUM 10 LINES OF CODE** - Keep it simple!
188-
- **NO COMPREHENSIVE ANALYSIS** - Just answer the specific question
189-
- **DO NOT include ```python or ``` in the python_code field**
190-
- **The code will be executed automatically - just provide the raw Python code**
156+
## IMPORTANT RULES:
157+
- **NO FUNCTIONS** - Write code directly, not inside functions
158+
- **NO COMPLEX LOGIC** - Keep it simple and straightforward
159+
- **HUMAN-READABLE OUTPUT** - Print clear, conversational results
160+
- **ANSWER THE QUESTION** - Don't add extra analysis
161+
- **USE f-strings** - For clear formatting like f'Total: {total}'
162+
- **MAXIMUM 5 LINES** - Keep it super simple
163+
- Use double backslashes (\\n) for newlines in JSON
164+
- The code will be executed automatically
165+
- Focus on answering the specific user question only
191166
192167
Respond only with the JSON object."""
193168

0 commit comments

Comments
 (0)