-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp_streamlit.py.bak
More file actions
2356 lines (1942 loc) · 109 KB
/
app_streamlit.py.bak
File metadata and controls
2356 lines (1942 loc) · 109 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import streamlit as st
import asyncio
import tempfile
from pathlib import Path
import sys
import requests
import logging
import traceback
import time
from threading import Thread
from queue import Queue
import sqlite3
import hashlib
import json
import shutil
from datetime import datetime
import chromadb
# Configure logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Add the RAG-Agent source code to the Python path
rag_agent_path = os.path.join(os.path.dirname(__file__), "rag-agent/src")
sys.path.append(rag_agent_path)
# Single import indicator to prevent repeated import logging
if "imports_done" not in st.session_state:
try:
# Import Ollama with error handling
import ollama
logger.info("Successfully imported ollama package")
except ImportError as e:
logger.error(f"Failed to import ollama package: {str(e)}")
st.error(f"Failed to import ollama package: {str(e)}")
sys.exit(1)
try:
# Import local modules from the rag-agent source
from ollama_pure_chat import OllamaChat
from fileloader import Loader_Local
logger.info("Successfully imported modules from rag-agent")
except ImportError as e:
logger.error(f"Failed to import modules from rag-agent: {str(e)}")
st.error(f"Failed to import modules from rag-agent: {str(e)}")
sys.exit(1)
# Mark imports as completed
st.session_state.imports_done = True
else:
# Silent imports when already done
import ollama
from ollama_pure_chat import OllamaChat
from fileloader import Loader_Local
# Check if Ollama API is available
def is_ollama_available():
"""Check if the Ollama server is running and accessible"""
# Only check once per session unless explicitly refreshed
if "ollama_available" not in st.session_state or "ollama_checked_time" not in st.session_state:
try:
response = requests.get("http://localhost:11434/api/tags", timeout=2)
status = response.status_code == 200
st.session_state.ollama_available = status
st.session_state.ollama_checked_time = time.time()
return status
except (requests.ConnectionError, requests.Timeout):
logger.warning("Ollama server is not running or not accessible")
st.session_state.ollama_available = False
st.session_state.ollama_checked_time = time.time()
return False
except Exception as e:
logger.error(f"Unexpected error checking Ollama availability: {str(e)}")
st.session_state.ollama_available = False
st.session_state.ollama_checked_time = time.time()
return False
else:
# Check if we need to refresh (every 60 seconds)
if time.time() - st.session_state.ollama_checked_time > 60:
try:
response = requests.get("http://localhost:11434/api/tags", timeout=2)
status = response.status_code == 200
st.session_state.ollama_available = status
st.session_state.ollama_checked_time = time.time()
return status
except Exception:
# Just keep existing status on error
return st.session_state.ollama_available
return st.session_state.ollama_available
# Default model to use if none is specified
default_model = "deepseek-r1:1.5b"
# Persistent storage paths - Use absolute path for consistency
PERSISTENT_STORAGE_BASE = os.getenv("PERSISTENT_STORAGE_BASE", os.path.join(os.path.dirname(os.path.abspath(__file__)), 'localdatabase'))
USERS_DB_PATH = os.path.join(PERSISTENT_STORAGE_BASE, "users.db")
# Ensure persistent storage directories exist
os.makedirs(PERSISTENT_STORAGE_BASE, exist_ok=True)
# Function to get user-specific persistent storage path
def get_user_storage_path(username):
user_path = os.path.join(PERSISTENT_STORAGE_BASE, username)
os.makedirs(user_path, exist_ok=True)
return user_path
# Modify init_user_database to use persistent path
def init_user_database():
"""Initialize SQLite database for user management"""
conn = sqlite3.connect(USERS_DB_PATH)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS users (
username TEXT PRIMARY KEY,
password TEXT NOT NULL,
databases TEXT DEFAULT '["default"]',
last_login DATETIME DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()
conn.close()
# Modify get_available_databases to use persistent storage
def get_available_databases():
"""Get available databases for the current user from ChromaDB"""
if not hasattr(st.session_state, 'username'):
return []
try:
# Get user-specific storage path
user_storage_path = get_user_storage_path(st.session_state.username)
# List all directories in the user's storage path
# Each directory represents a database
if os.path.exists(user_storage_path):
databases = [d for d in os.listdir(user_storage_path)
if os.path.isdir(os.path.join(user_storage_path, d))]
# If no databases found, return empty list
if not databases:
return []
return databases
else:
return []
except Exception as e:
logger.error(f"Error retrieving databases from ChromaDB: {str(e)}")
return []
# Modify get_context to improve retrieval
def get_context(question):
"""Improved context retrieval with better error handling and persistent storage support"""
if not init_loader():
st.error("Document loader could not be initialized.")
return "", []
context = ""
context_chunks = []
try:
# Get the current database
current_db = st.session_state.current_database
# Get user-specific storage path
user_storage_path = get_user_storage_path(st.session_state.username)
# Set absolute persist directory path for the loader
absolute_persist_dir = os.path.join(user_storage_path, current_db)
st.session_state.loader.persist_directory = absolute_persist_dir
# Debug output
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.info(f"Getting context for question: {question}")
logger.info(f"Using database: {current_db}")
logger.info(f"Absolute persist directory: {absolute_persist_dir}")
logger.info(f"Loader persist directory set to: {st.session_state.loader.persist_directory}")
# Retrieve collections for the current database
database_collections = st.session_state.database_collections.get(current_db, {})
# Show debug info if enabled
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.info(f"Found {len(database_collections)} collections in current database")
# Determine all collections to search in
collections_to_search = []
# First, add regular database collections
for collection_name, collection_info in database_collections.items():
collections_to_search.append({
"id": collection_name, # Use plain collection name without prefix
"name": collection_name,
"is_temporary": False
})
# Then, add temporary collections if any exist
if hasattr(st.session_state, 'temp_collections') and st.session_state.temp_collections:
for temp_coll_name, file_info in st.session_state.temp_collections.items():
collections_to_search.append({
"id": temp_coll_name,
"name": file_info.get('file_name', temp_coll_name),
"is_temporary": True
})
# If there are no collections, return empty context
if not collections_to_search:
st.info("No collections available to search.")
return "", []
# Show debug info about collections if debug mode is on
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.info(f"Searching in {len(collections_to_search)} collections")
for coll in collections_to_search:
logger.info(f"Collection: {coll['id']} ({coll['name']}), Temporary: {coll['is_temporary']}")
# Check if the ChromaDB client is properly initialized with the correct directory
try:
if hasattr(st.session_state.loader, 'chroma_client'):
client_path = getattr(st.session_state.loader.chroma_client, '_path', 'unknown')
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.info(f"ChromaDB client path: {client_path}")
# Reinitialize client if path doesn't match
if client_path != absolute_persist_dir:
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.info(f"Reinitializing ChromaDB client with path: {absolute_persist_dir}")
import chromadb
st.session_state.loader.chroma_client = chromadb.PersistentClient(path=absolute_persist_dir)
except Exception as client_error:
logger.error(f"Error checking ChromaDB client: {str(client_error)}")
# Search in all collections
for collection in collections_to_search:
try:
collection_id = collection["id"]
display_name = collection["name"]
is_temp = collection["is_temporary"]
# Try to retrieve context
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.info(f"Querying collection: {collection_id}")
# Get collection results, handling errors gracefully
try:
results = st.session_state.loader(
question,
collection_id,
top_k=5 # Retrieve up to 5 most relevant chunks
)
except Exception as query_error:
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.error(f"Error querying collection {collection_id}: {str(query_error)}")
logger.error(traceback.format_exc())
continue # Skip this collection and try the next one
# Check if results are valid and contain documents
if results and 'documents' in results and results['documents'] and results['documents'][0]:
# Add documents to the context for the model
for i, doc in enumerate(results['documents'][0]):
# Truncate very long documents to prevent context overflow
truncated_doc = doc[:1000] + '...' if len(doc) > 1000 else doc
source_label = "Temporary File" if is_temp else "Database"
context += f"doc_{i} from {display_name} ({source_label}): {truncated_doc}\n"
context_chunks.append(f"**Document Chunk {i+1} from {display_name}:**\n{truncated_doc}")
except Exception as collection_error:
logger.error(f"Error retrieving from collection {collection['name']}: {str(collection_error)}")
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.error(f"Traceback: {traceback.format_exc()}")
# Log context retrieval details
logger.info(f"Retrieved {len(context_chunks)} context chunks for query: {question}")
# If no context found, return empty
if not context_chunks:
st.info("No relevant context found for the query.")
except Exception as e:
logger.error(f"Comprehensive error getting context: {str(e)}")
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.error(f"Traceback: {traceback.format_exc()}")
st.error(f"Error retrieving context: {str(e)}")
return context, context_chunks
# Modify show_create_database_modal to use persistent storage
def show_create_database_modal():
"""Create a new database for the current user"""
if not hasattr(st.session_state, 'username'):
st.error("Please log in first")
return
with st.form(key="create_database_form"):
st.subheader("Create New Database")
new_db_name = st.text_input("Database Name")
submit_button = st.form_submit_button(label="Create")
if submit_button and new_db_name:
try:
# Check if database already exists in session state
current_databases = st.session_state.available_databases if hasattr(st.session_state, 'available_databases') else []
# Check if database already exists
if new_db_name in current_databases:
st.error(f"Database '{new_db_name}' already exists")
else:
# Add new database to available databases list
current_databases.append(new_db_name)
st.session_state.available_databases = current_databases
# Create a directory for the new ChromaDB database in user's persistent storage
user_storage_path = get_user_storage_path(st.session_state.username)
database_path = os.path.join(user_storage_path, new_db_name)
os.makedirs(database_path, exist_ok=True)
# Update session state
st.session_state.current_database = new_db_name
st.success(f"ChromaDB database '{new_db_name}' created for file loading!")
# Initialize collections for this ChromaDB database
if "database_collections" not in st.session_state:
st.session_state.database_collections = {}
st.session_state.database_collections[new_db_name] = {}
# Refresh the page to update database list
st.rerun()
except Exception as e:
st.error(f"Error creating ChromaDB database: {str(e)}")
# Modify login_user to update last login and load persistent collections
def login_user(username, password):
"""Authenticate user and load persistent collections"""
if len(username) < 3:
return False, "Username must be at least 3 characters long"
# Hash the password
hashed_password = hashlib.sha256(password.encode()).hexdigest()
try:
conn = sqlite3.connect(USERS_DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT * FROM users WHERE username = ? AND password = ?",
(username, hashed_password))
user = cursor.fetchone()
if user:
# Update last login timestamp
cursor.execute(
"UPDATE users SET last_login = CURRENT_TIMESTAMP WHERE username = ?",
(username,)
)
conn.commit()
# Load persistent collections for the user
user_storage_path = get_user_storage_path(username)
# Initialize database collections from persistent storage
database_collections = {}
# Get user's databases
cursor.execute("SELECT databases FROM users WHERE username = ?", (username,))
result = cursor.fetchone()
if result and result[0]:
databases = json.loads(result[0])
# Load collections for each database
for db_name in databases:
db_path = os.path.join(user_storage_path, db_name)
if os.path.exists(db_path):
# Here you might want to add logic to load collection metadata
database_collections[db_name] = {}
# Load temporary collections
temp_collections = {}
file_collections = {}
# Load any persisted temporary collections from ChromaDB
if init_loader():
loader = Loader_Local()
# Check each database directory for temporary collections
for db_name in databases:
db_path = os.path.join(user_storage_path, db_name)
if os.path.exists(db_path):
# Set up the loader to check this database
loader.persist_directory = db_path
try:
# Get all collection names from ChromaDB
chroma_client = chromadb.PersistentClient(path=db_path)
all_collections = chroma_client.list_collections()
# Find temporary collections for this user
for collection in all_collections:
collection_name = collection.name
# Only load temp collections for this user
if collection_name.startswith(f"temp_{username}_"):
# Extract file ID from collection name
file_id = collection_name.replace(f"temp_{username}_", "")
# Try to get collection metadata
try:
collection_obj = chroma_client.get_collection(name=collection_name)
# Get any file in this collection to extract metadata
results = collection_obj.get(limit=1)
if results and results['metadatas'] and results['metadatas'][0]:
source = results['metadatas'][0].get('source', '')
file_name = os.path.basename(source)
file_extension = Path(file_name).suffix.lstrip(".").lower()
# Store in temp_collections
temp_collections[collection_name] = {
"file_name": file_name,
"file_type": file_extension,
"added_date": "Previously uploaded",
"is_temporary": True
}
# Add to file_collections for retrieval
file_collections[file_id] = collection_name
except Exception as e:
logger.error(f"Error loading temporary collection metadata: {str(e)}")
except Exception as e:
logger.error(f"Error listing collections in database {db_name}: {str(e)}")
conn.close()
return True, "Login successful", database_collections, temp_collections, file_collections
else:
conn.close()
return False, "Invalid username or password", {}, {}, {}
except Exception as e:
return False, f"Login error: {str(e)}", {}, {}, {}
# Initialize the loader only once
def init_loader():
if not hasattr(st.session_state, 'loader') or st.session_state.loader is None:
try:
# Check if Ollama API is available
try:
models = ollama.list()
logger.info(f"Ollama API available with {len(models.get('models', []))} models")
except Exception as e:
logger.warning(f"Ollama API not available: {str(e)}")
# Initialize loader with a default persist directory (will be updated later per user/database)
st.session_state.loader = Loader_Local()
# Set user-specific storage path if user is logged in
if hasattr(st.session_state, 'username') and st.session_state.username:
user_storage_path = get_user_storage_path(st.session_state.username)
# If current database is set, use its path as the persist directory
if hasattr(st.session_state, 'current_database') and st.session_state.current_database:
absolute_persist_dir = os.path.join(user_storage_path, st.session_state.current_database)
# Ensure directory exists
os.makedirs(absolute_persist_dir, exist_ok=True)
# Update loader with proper persist directory
st.session_state.loader.persist_directory = absolute_persist_dir
# Re-initialize ChromaDB client with the correct path
import chromadb
st.session_state.loader.chroma_client = chromadb.PersistentClient(path=absolute_persist_dir)
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.info(f"Initialized loader with persist directory: {absolute_persist_dir}")
logger.info("Successfully initialized Loader_Local")
return True
except Exception as e:
logger.error(f"Failed to initialize Loader_Local: {str(e)}")
if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
logger.error(traceback.format_exc())
return False
return True
# Modify main to handle persistent collections
def main():
# Set page config
st.set_page_config(
page_title="RAG-Agent Chat",
page_icon="🤖",
layout="wide",
initial_sidebar_state="expanded"
)
# Initialize session state variables
if "messages" not in st.session_state:
st.session_state.messages = []
if "initialized" not in st.session_state:
st.session_state.initialized = False
if "selected_model" not in st.session_state:
st.session_state.selected_model = default_model
if "enable_retrieval" not in st.session_state:
st.session_state.enable_retrieval = True
if "file_collections" not in st.session_state:
st.session_state.file_collections = {}
# Check Ollama status only once per session load
# Use session variable instead of calling API multiple times
if "ollama_available" not in st.session_state:
# Only make the API call once
ollama_status = is_ollama_available()
else:
ollama_status = st.session_state.ollama_available
# Initialize the document loader
loader_success = init_loader()
if not loader_success:
logger.warning("Failed to initialize document loader. Some features may not be available.")
# Initialize the database
init_user_database()
# Check if the user is authenticated
if "authenticated" not in st.session_state or not st.session_state.authenticated:
show_login_page()
return
# Ensure chat_instance exists and is initialized at the very beginning
if "chat_instance" not in st.session_state or st.session_state.chat_instance is None:
if ollama_status:
try:
# Create a new chat instance
model_name = st.session_state.selected_model
logger.info(f"Initializing chat instance with model: {model_name}")
st.session_state.chat_instance = OllamaChat(
model=model_name,
system_prompt="You are a helpful chatbot assistant designed to answer questions about the given context. <context>"
)
st.session_state.initialized = True
logger.info(f"Successfully initialized chat instance with model: {model_name}")
except Exception as e:
logger.error(f"Error initializing chat instance: {str(e)}\n{traceback.format_exc()}")
st.session_state.chat_instance = None
st.session_state.initialized = False
# We'll show an error to the user in the UI later
else:
logger.warning("Ollama server is not available. Chat functionality will be limited.")
st.session_state.chat_instance = None
st.session_state.initialized = False
# Set current database to first available database or default
available_dbs = get_available_databases()
if available_dbs:
st.session_state.current_database = available_dbs[0]
# Load persistent collections
if "database_collections" not in st.session_state:
user_storage_path = get_user_storage_path(st.session_state.username)
database_collections = {}
for db_name in available_dbs:
db_path = os.path.join(user_storage_path, db_name)
if os.path.exists(db_path):
# Load collections from files.db
files_db_path = os.path.join(db_path, "files.db")
if os.path.exists(files_db_path):
try:
files_conn = sqlite3.connect(files_db_path)
files_cursor = files_conn.cursor()
files_cursor.execute("SELECT * FROM files")
files = files_cursor.fetchall()
database_collections[db_name] = {}
for file in files:
file_id = file[0]
file_name = file[1]
file_type = file[2]
collection_name = file[3]
added_date = file[4]
description = file[5]
database_collections[db_name][file_id] = {
"collection_name": collection_name,
"file_name": file_name,
"file_type": file_type,
"added_date": added_date,
"description": description or ""
}
files_conn.close()
except Exception as e:
logger.error(f"Error loading files from database {db_name}: {str(e)}")
database_collections[db_name] = {}
else:
database_collections[db_name] = {}
st.session_state.database_collections = database_collections
# Sidebar Configuration
with st.sidebar:
st.title("RAG-Agent Chat")
# User info
st.markdown(f"**Logged in as:** {st.session_state.username}")
if st.button("Logout"):
st.session_state.authenticated = False
st.rerun()
st.divider()
# Add debug mode toggle
if "debug_mode" not in st.session_state:
st.session_state.debug_mode = False
debug_mode = st.toggle("Debug Mode", value=st.session_state.debug_mode, key="debug_toggle")
if debug_mode != st.session_state.debug_mode:
st.session_state.debug_mode = debug_mode
if debug_mode:
st.info("Debug mode enabled. Additional logging information will be shown.")
# Set logging level to DEBUG when debug mode is on
logger.setLevel(logging.DEBUG)
else:
# Reset to INFO level when debug mode is off
logger.setLevel(logging.INFO)
# Check if Ollama is running - show clear status
if not ollama_status:
st.error("⚠️ Ollama server is not running")
st.info("Please start Ollama by running 'ollama serve' in a terminal")
# Add manual check button
if st.button("Check Ollama Status"):
# Force refresh the Ollama status
if "ollama_available" in st.session_state:
del st.session_state.ollama_available
if "ollama_checked_time" in st.session_state:
del st.session_state.ollama_checked_time
if "cached_models" in st.session_state:
del st.session_state.cached_models
st.rerun()
else:
st.success("✅ LLM is running")
# Database Selection Section
st.markdown("### Database Selection")
# Get available databases
available_databases = get_available_databases()
# Create a column layout for the database controls
db_col1, db_col2 = st.columns([3, 1])
with db_col1:
# Dropdown to select database - only show if there are databases
if available_databases:
selected_db = st.selectbox(
"Select Database:",
available_databases,
index=available_databases.index(st.session_state.current_database) if st.session_state.current_database in available_databases else 0
)
if selected_db != st.session_state.current_database:
st.session_state.current_database = selected_db
# Update collections for the new database
st.session_state.file_collections = get_collections_for_database(selected_db)
st.success(f"Switched to database: {selected_db}")
else:
st.info("No databases available. Create a new one.")
with db_col2:
# Button to create new database
if st.button("New DB"):
show_create_database_modal()
# Database Editor/Viewer
with st.expander("Database Details", expanded=False):
show_database_editor()
st.divider()
# Retrieval Toggle
st.markdown("### Document Retrieval")
retrieval_status = "🔍 ON" if st.session_state.enable_retrieval else "🚫 OFF"
toggle_label = f"Database Retrieval: {retrieval_status}"
if st.toggle(toggle_label, value=st.session_state.enable_retrieval, key="retrieval_toggle"):
st.session_state.enable_retrieval = True
else:
st.session_state.enable_retrieval = False
st.divider()
# Model Selection Dropdown
st.markdown("### Model Selection")
# Only show model dropdown if Ollama is running
if ollama_status:
# Get available models
models = get_available_models()
# Create dropdown for model selection
if models:
selected_model = st.selectbox(
"Select a model:",
models,
index=models.index(st.session_state.selected_model) if st.session_state.selected_model in models else 0
)
# Apply model change
if selected_model != st.session_state.selected_model:
try:
# Update the session state
st.session_state.selected_model = selected_model
# Create new chat instance with selected model
st.session_state.chat_instance = OllamaChat(
model=selected_model,
system_prompt="You are a helpful chatbot assistant designed to answer questions about the given context. <context>"
)
st.session_state.initialized = True
st.success(f"Model changed to: {selected_model}")
logger.info(f"Model changed to: {selected_model}")
except Exception as e:
logger.error(f"Error changing model: {str(e)}\n{traceback.format_exc()}")
st.error(f"Error changing model: {str(e)}")
else:
st.warning("No models available. Please pull models using Ollama CLI.")
else:
st.warning("Model selection unavailable - Ollama is not running")
st.divider()
# ChromaDB Collections Section
with st.expander("ChromaDB Collections", expanded=True):
st.markdown("### Add Documents to ChromaDB")
# File Upload for ChromaDB
uploaded_files = st.file_uploader(
"Upload documents to add to ChromaDB:",
accept_multiple_files=True,
type=["pdf", "txt", "csv", "md", "doc", "docx"],
key="chromadb_uploader"
)
# Process files for ChromaDB
if uploaded_files:
# Get current database and existing collections
current_db = st.session_state.current_database
existing_collections = list(st.session_state.database_collections.get(current_db, {}).keys())
st.markdown("### Configure Collection for Uploaded Files")
# Collection selection
collection_choice = st.radio(
"Choose Collection Option",
["Create New Collection", "Use Existing Collection"],
key="upload_collection_choice"
)
if collection_choice == "Use Existing Collection":
# Dropdown for existing collections
if existing_collections:
selected_collection = st.selectbox(
"Select Existing Collection:",
existing_collections,
key="upload_existing_collection"
)
# No description needed for existing collections
collection_description = None
else:
st.warning("No existing collections. Please create a new one.")
collection_choice = "Create New Collection"
if collection_choice == "Create New Collection":
# Input for custom collection name
st.markdown("#### Create New Collection")
st.info("Enter a descriptive name for your collection. This name should reflect the content or topic of your documents.")
new_collection_name = st.text_input(
"Collection Name",
key="upload_new_collection_name",
help="Choose a descriptive name related to the document content. For example: 'machine_learning', 'research_papers', 'company_policies', etc."
)
# Validate collection name
if new_collection_name and new_collection_name in existing_collections:
st.warning(f"Collection '{new_collection_name}' already exists. Files will be added to this existing collection.")
# Optional: Add a description or tags
collection_description = st.text_area(
"Collection Description (Optional)",
key="upload_collection_description",
help="Provide additional context about what this collection contains or represents."
)
selected_collection = new_collection_name
# Process the files
if st.button("Process Files", key="upload_process_files_btn", disabled=not (selected_collection and selected_collection.strip())):
with st.spinner("Processing files..."):
success_count = 0
for file in uploaded_files:
success = process_file(file, selected_collection,
collection_description if collection_choice == "Create New Collection" else None)
if success:
success_count += 1
if success_count == len(uploaded_files):
st.success(f"Successfully processed all {success_count} files to collection '{selected_collection}'")
elif success_count > 0:
st.warning(f"Processed {success_count} out of {len(uploaded_files)} files to collection '{selected_collection}'")
else:
st.error("Failed to process any files. Please check the logs for details.")
# Show help text
st.markdown("#### How to use file upload")
st.markdown("""
1. Upload one or more documents using the file uploader
2. Choose to create a new collection or use an existing one
3. If creating a new collection, enter a descriptive name for your documents
4. Click "Process Files" to add the documents to the collection
5. Use the chat interface to ask questions about your documents
""")
# Show available collections in current database
collections = get_collections_for_database(st.session_state.current_database)
if collections:
st.markdown("### Available Collections")
collection_names = list(collections.keys())
# Allow selecting a collection
selected_collection = st.selectbox(
"Select Collection to Use:",
collection_names,
key="selected_chromadb_collection"
)
if selected_collection:
st.session_state.active_collection = selected_collection
st.success(f"Using collection: {selected_collection}")
# Show collection details
if selected_collection in collections:
coll_info = collections[selected_collection]
st.markdown(f"**Collection:** {selected_collection}")
# Show all files in the collection if available
if "files" in coll_info and len(coll_info["files"]) > 0:
st.markdown(f"**Files:** {len(coll_info['files'])} document(s)")
# Display first 5 files directly
files_to_show = min(5, len(coll_info["files"]))
for idx, file in enumerate(coll_info["files"][:files_to_show]):
st.markdown(f"- {file}")
# If there are more files, provide option to view more
remaining_files = len(coll_info["files"]) - files_to_show
if remaining_files > 0:
display_option = st.radio(
f"Show all files ({remaining_files} more)",
["Hide Additional Files", "Show All Files"],
key=f"show_all_{selected_collection}",
horizontal=True
)
if display_option == "Show All Files":
for idx, file in enumerate(coll_info["files"][files_to_show:]):
st.markdown(f"- {file}")
else:
st.markdown(f"**File:** {coll_info.get('file_name', 'Unknown')}")
st.markdown(f"**Type:** {coll_info.get('file_type', 'Unknown')}")
st.markdown(f"**Added:** {coll_info.get('added_date', 'Unknown')}")
# Inspect button
if st.button("Inspect Collection Data", key="inspect_from_dropdown"):
inspect_collection(selected_collection)
else:
st.info("No collections available in the current database. Upload documents to create collections.")
st.divider()
# File Upload Section for temporary QA
st.markdown("### Upload Documents")
# Track upload state to avoid reprocessing
if "upload_state" not in st.session_state:
st.session_state.upload_state = {}
if "temp_collections" not in st.session_state:
st.session_state.temp_collections = {}
# Create an expander to show currently loaded temporary files
with st.expander("Temporary Files in Memory", expanded=True):
if st.session_state.temp_collections:
for temp_coll_name, file_info in st.session_state.temp_collections.items():
st.markdown(f"📄 **{file_info['file_name']}** - *Collection: {temp_coll_name}*")
# Add button to clear all temporary collections
if st.button("Clear All Temporary Files"):
st.session_state.temp_collections = {}
st.success("All temporary files cleared from memory")
st.rerun()
else:
st.info("No temporary files loaded. Upload files below for quick Q&A without saving to your database.")
uploaded_files = st.file_uploader(
"Upload documents for temporary QA (not saved to database):",
accept_multiple_files=True,
type=["pdf", "txt", "csv", "md", "doc", "docx"]
)
# Process newly uploaded files
if uploaded_files:
with st.spinner("Processing files for quick Q&A..."):
# Add a progress bar
progress_bar = st.progress(0)
status_text = st.empty()
# Track processing status
all_processed = True
total_files = len(uploaded_files)
for idx, file in enumerate(uploaded_files):
# Update progress
progress = float(idx) / float(total_files)
progress_bar.progress(progress)
status_text.text(f"Processing file {idx+1}/{total_files}: {file.name}")
# Create a unique identifier for this file
file_id = file.name.replace(" ", "_").replace(".", "_")
# Only process if it's a new file we haven't seen before
if file_id not in st.session_state.upload_state:
# Create a temporary collection name specific to this user and file
temp_collection_name = f"temp_{st.session_state.username}_{file_id}"
# Mark as being processed
st.session_state.upload_state[file_id] = "processing"
# Create a temporary file
temp_dir = tempfile.mkdtemp()
temp_file_path = os.path.join(temp_dir, file.name)
try:
# Write file to temporary location
with open(temp_file_path, "wb") as f:
f.write(file.getbuffer())
# Get file extension
file_extension = Path(file.name).suffix.lstrip(".").lower()
# Get user-specific storage path
user_storage_path = get_user_storage_path(st.session_state.username)
# Set persistent directory for the loader
current_db = st.session_state.current_database
current_db_path = os.path.join(user_storage_path, current_db)
st.session_state.loader.persist_directory = current_db_path
# Process the file with timeout handling
logger.info(f"Processing temporary file: {file.name} into collection: {temp_collection_name}")
status_text.text(f"Processing file {idx+1}/{total_files}: {file.name} - Generating chunks and embeddings...")
# Import the needed modules for timeout handling
import concurrent.futures
import time
# Define a function to process with timeout
def process_with_timeout(timeout=120): # 2-minute timeout
try:
# Store local references to avoid session state access in thread
local_loader = st.session_state.loader
# Define a completely isolated thread function - no Streamlit dependencies
def process_file_thread():
try:
# No session state access in this function
result = local_loader.create_or_insert_collection(
temp_file_path,
temp_collection_name,
file_extension,
'local_parser'
)
return result
except Exception as e:
# Don't use logger inside thread
print(f"Error in processing thread: {str(e)}")
return False
# Try to create the temporary collection in a separate thread
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(process_file_thread)