-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModernChatbot.py
More file actions
345 lines (296 loc) · 16.2 KB
/
Copy pathModernChatbot.py
File metadata and controls
345 lines (296 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# ═══════════════════════════════════════════════════════════════════════════════
# ModernChatbot.py — Modern CSV Agent Chatbot
# A redesigned UI built on top of the same csv_agent and plotter backend.
# Run with: streamlit run ModernChatbot.py
# ═══════════════════════════════════════════════════════════════════════════════
# ── Imports ───────────────────────────────────────────────────────────────────
import os # file path operations and environment access
import atexit # register temp-file cleanup on process exit
import tempfile # write uploaded CSV to a temporary file on disk
import pandas as pd # read CSV for preview and stats in the sidebar
import streamlit as st
import csv_agent, plotter # backend: LangChain agent + DeepSeek formatter
# ── Page Configuration ────────────────────────────────────────────────────────
# page_title → browser tab title
# page_icon → browser tab icon
# layout → "wide" uses full browser width
# initial_sidebar_state → sidebar open by default
st.set_page_config(
page_title="CSV Intelligence",
page_icon="🤖",
layout="wide",
initial_sidebar_state="expanded"
)
# ── Custom CSS ────────────────────────────────────────────────────────────────
# st.markdown with unsafe_allow_html=True injects raw HTML/CSS into the page.
# This overrides Streamlit's default styles for a modern dark-accented look.
st.markdown("""
<style>
/* ── Page background ── */
.stApp {
background: linear-gradient(135deg, #0f0f1a 0%, #1a1a2e 50%, #16213e 100%);
min-height: 100vh;
}
/* ── Header ── */
.modern-header {
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
padding: 1.5rem 2rem;
border-radius: 16px;
margin-bottom: 1.5rem;
box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);
}
.modern-header h1 {
color: white;
margin: 0;
font-size: 1.8rem;
font-weight: 700;
letter-spacing: -0.5px;
}
.modern-header p {
color: rgba(255,255,255,0.8);
margin: 0.3rem 0 0 0;
font-size: 0.9rem;
}
/* ── Chat messages ── */
.stChatMessage {
border-radius: 12px;
margin-bottom: 0.5rem;
backdrop-filter: blur(10px);
}
/* ── User bubble ── */
[data-testid="stChatMessageContent"] {
border-radius: 12px;
}
/* ── Sample question chips ── */
.stButton > button {
background: rgba(102, 126, 234, 0.15);
color: #a78bfa;
border: 1px solid rgba(102, 126, 234, 0.3);
border-radius: 20px;
padding: 0.3rem 0.9rem;
font-size: 0.78rem;
transition: all 0.2s ease;
width: 100%;
text-align: left;
}
.stButton > button:hover {
background: rgba(102, 126, 234, 0.35);
border-color: #667eea;
color: white;
transform: translateX(2px);
}
/* ── Metrics ── */
[data-testid="stMetric"] {
background: rgba(255,255,255,0.05);
border-radius: 10px;
padding: 0.5rem;
border: 1px solid rgba(255,255,255,0.08);
}
/* ── Status box ── */
[data-testid="stStatus"] {
border-radius: 12px;
border: 1px solid rgba(102, 126, 234, 0.3);
background: rgba(102, 126, 234, 0.05);
}
/* ── Sidebar ── */
[data-testid="stSidebar"] {
background: rgba(15, 15, 30, 0.95);
border-right: 1px solid rgba(102, 126, 234, 0.2);
}
/* ── Welcome card ── */
.welcome-card {
background: rgba(102, 126, 234, 0.08);
border: 1px solid rgba(102, 126, 234, 0.2);
border-radius: 16px;
padding: 2rem;
text-align: center;
margin-top: 3rem;
}
.welcome-card h2 { color: #a78bfa; margin-bottom: 0.5rem; }
.welcome-card p { color: rgba(255,255,255,0.6); font-size: 0.95rem; }
/* ── Divider ── */
hr { border-color: rgba(255,255,255,0.08); }
</style>
""", unsafe_allow_html=True)
# ── Header Banner ─────────────────────────────────────────────────────────────
# Rendered as raw HTML for full style control (gradient, shadow, etc.)
st.markdown("""
<div class="modern-header">
<h1>🤖 CSV Intelligence</h1>
<p>Powered by DeepSeek · Ask anything about your data</p>
</div>
""", unsafe_allow_html=True)
# ── Session State Initialisation ──────────────────────────────────────────────
# Streamlit reruns the entire script on every interaction.
# session_state preserves values between reruns — like a dictionary that
# survives page refreshes within the same browser session.
if "chat_history" not in st.session_state:
st.session_state.chat_history = [] # list of message dicts
if "temp_file_path" not in st.session_state:
st.session_state.temp_file_path = "" # path to the current temp CSV file
if "csv_filename" not in st.session_state:
st.session_state.csv_filename = "" # original filename shown in sidebar
if "df_preview" not in st.session_state:
st.session_state.df_preview = None # pandas DataFrame for sidebar preview
# ── Sidebar ───────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("## 📁 Data Source")
# File uploader — accepts only .csv files
# label_visibility="collapsed" hides the label text while keeping it
# accessible for screen readers (Streamlit requires a non-empty label string)
uploaded_file = st.file_uploader(
"Upload your CSV",
type=["csv"],
label_visibility="collapsed"
)
# ── Handle new file upload ─────────────────────────────────────────────────
if uploaded_file is not None:
try:
# LangChain's create_csv_agent needs a file path, not a file object.
# We write the upload to a temp file and store the path in session_state.
# delete=False keeps the file on disk after the with-block closes.
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
tmp.write(uploaded_file.read())
st.session_state.temp_file_path = tmp.name
# atexit ensures the temp file is deleted when Streamlit exits —
# prevents stale files accumulating in the system temp directory.
atexit.register(os.unlink, st.session_state.temp_file_path)
# Read the CSV into a DataFrame purely for sidebar preview/stats.
# This does NOT affect the agent — the agent reads the file path directly.
st.session_state.df_preview = pd.read_csv(st.session_state.temp_file_path)
st.session_state.csv_filename = uploaded_file.name
except Exception as e:
st.error(f"Upload failed: {e}")
# ── CSV Stats & Preview ────────────────────────────────────────────────────
# Only shown after a file has been successfully uploaded
if st.session_state.df_preview is not None:
df = st.session_state.df_preview
st.success(f"✅ {st.session_state.csv_filename}")
st.markdown("---")
# st.metric renders a labelled number card — good for at-a-glance stats
col1, col2 = st.columns(2)
col1.metric("Rows", f"{df.shape[0]:,}") # :, adds thousands separator
col2.metric("Columns", df.shape[1])
st.markdown("**Preview (first 5 rows)**")
# use_container_width makes the table fill the sidebar width
st.dataframe(df.head(5), use_container_width=True, height=180)
st.markdown("**Column types**")
# dtypes returns a Series of column → dtype; rename for clean display
st.dataframe(
df.dtypes.rename("Type").astype(str).reset_index().rename(columns={"index": "Column"}),
use_container_width=True,
hide_index=True,
height=min(200, 35 * len(df.columns)) # cap height at 200px
)
st.markdown("---")
# ── Clear Chat Button ──────────────────────────────────────────────────────
# Resets the chat history so the conversation starts fresh.
# st.session_state assignment on a key clears/sets it for the next rerun.
if st.button("🗑️ Clear Chat", use_container_width=True):
st.session_state.chat_history = []
st.rerun() # force an immediate rerun so the cleared history takes effect
# ── Message Counter ────────────────────────────────────────────────────────
# Count only user messages (not assistant messages) for the turn counter
user_msgs = sum(1 for m in st.session_state.chat_history if m["role"] == "User")
st.caption(f"💬 {user_msgs} question{'s' if user_msgs != 1 else ''} asked this session")
# ── Agent Cache ───────────────────────────────────────────────────────────────
# @st.cache_resource caches the return value keyed by the argument (file_path).
# The agent is expensive to build (parses CSV, initialises LangChain chain).
# Caching means it's built once and reused for all queries on the same file.
@st.cache_resource
def get_csv_agent(file_path):
return csv_agent.build_csv_agent(file_path)
# ── Main Chat Area ────────────────────────────────────────────────────────────
# Show welcome screen if no CSV has been uploaded yet
if not st.session_state.temp_file_path:
st.markdown("""
<div class="welcome-card">
<h2>👋 Welcome to CSV Intelligence</h2>
<p>Upload a CSV file from the sidebar to get started.<br>
Then ask any question about your data in plain English.</p>
<br>
<p><b>Example questions you can ask:</b></p>
<p>• Show me a preview of the data<br>
• What are the top 10 records by value?<br>
• What percentage of rows match condition X?<br>
• Give me a distribution breakdown of column Y</p>
</div>
""", unsafe_allow_html=True)
else:
# ── Sample Questions ───────────────────────────────────────────────────────
# Clickable chips that pre-fill a question into the chat.
# Stored in session_state so clicking a button triggers a rerun with the value.
if "prefill" not in st.session_state:
st.session_state.prefill = ""
with st.expander("💡 Sample questions", expanded=False):
samples = [
"Show me a preview of the data",
"What are the top 10 records by highest value?",
"Give me a count and percentage breakdown of each category",
"What is the average, min, and max of numeric columns?",
]
# Render each sample as a button in a 2-column grid
cols = st.columns(2)
for i, q in enumerate(samples):
if cols[i % 2].button(q, key=f"sample_{i}"):
st.session_state.prefill = q # store the clicked question
st.rerun()
st.markdown("---")
# ── Render Chat History ────────────────────────────────────────────────────
# Replays all previous messages on every rerun so the conversation is visible.
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if msg["role"] == "Assistant" and msg.get("html_content"):
with st.expander("📊 View Chart / Table"):
st.iframe(msg["html_content"], height=600)
# ── Chat Input ─────────────────────────────────────────────────────────────
# st.chat_input renders a sticky input bar at the bottom of the page.
# value= pre-fills the box (used by sample question buttons above).
user_input = st.chat_input(
"Ask anything about your data...",
key="chat_input"
)
# If a sample question was clicked, use it as the input for this rerun
if not user_input and st.session_state.prefill:
user_input = st.session_state.prefill
st.session_state.prefill = "" # clear so it doesn't re-fire next rerun
if user_input:
# ── Display User Message ───────────────────────────────────────────────
with st.chat_message("User"):
st.markdown(user_input)
st.session_state.chat_history.append({
"role": "User",
"content": user_input
})
# ── Run Agent Pipeline with Live Status ────────────────────────────────
# st.status() shows a live collapsible progress panel while the agent runs.
# Each st.write() inside adds a timestamped step to the panel.
# The panel collapses automatically when state="complete".
with st.status("🧠 Thinking...", expanded=True) as status:
try:
st.write("📂 Loading CSV agent...")
agent = get_csv_agent(st.session_state.temp_file_path)
st.write("🔍 Running data analysis...")
csv_agent_response = csv_agent.csv_agent_invoker(agent, user_input)
st.write("🎨 Generating visualisation & summary...")
html_content, response = plotter.output_formatter(user_input, csv_agent_response)
# Mark the status panel as complete — it collapses automatically
status.update(label="✅ Done", state="complete", expanded=False)
except Exception as e:
# Mark the status panel as error — stays expanded for visibility
status.update(label="❌ Error", state="error", expanded=True)
response = f"Sorry, something went wrong: {e}"
html_content = ""
# ── Display Assistant Response ─────────────────────────────────────────
with st.chat_message("Assistant"):
st.markdown(response)
if html_content:
with st.expander("📊 View Chart / Table"):
st.iframe(html_content, height=600)
# ── Persist to Chat History ────────────────────────────────────────────
st.session_state.chat_history.append({
"role": "Assistant",
"content": response,
"html_content": html_content
})