RAG-Streamlit/app_streamlit.py.bak at master · Computational-Imaging-LAB/RAG-Streamlit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import streamlit as st
import asyncio
import tempfile
from pathlib import Path
import sys
import requests
import logging
import traceback
import time
from threading import Thread
from queue import Queue
import sqlite3
import hashlib
import json
import shutil
from datetime import datetime
import chromadb

# Configure logging
logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Add the RAG-Agent source code to the Python path
rag_agent_path = os.path.join(os.path.dirname(__file__), "rag-agent/src")
sys.path.append(rag_agent_path)

# Single import indicator to prevent repeated import logging
if "imports_done" not in st.session_state:
try:
    # Import Ollama with error handling
    import ollama
    logger.info("Successfully imported ollama package")
except ImportError as e:
    logger.error(f"Failed to import ollama package: {str(e)}")
    st.error(f"Failed to import ollama package: {str(e)}")
    sys.exit(1)

try:
    # Import local modules from the rag-agent source
    from ollama_pure_chat import OllamaChat
    from fileloader import Loader_Local
    logger.info("Successfully imported modules from rag-agent")
except ImportError as e:
    logger.error(f"Failed to import modules from rag-agent: {str(e)}")
    st.error(f"Failed to import modules from rag-agent: {str(e)}")
    sys.exit(1)

    # Mark imports as completed
    st.session_state.imports_done = True
else:
    # Silent imports when already done
    import ollama
    from ollama_pure_chat import OllamaChat
    from fileloader import Loader_Local

# Check if Ollama API is available
def is_ollama_available():
    """Check if the Ollama server is running and accessible"""
    # Only check once per session unless explicitly refreshed
    if "ollama_available" not in st.session_state or "ollama_checked_time" not in st.session_state:
    try:
        response = requests.get("http://localhost:11434/api/tags", timeout=2)
            status = response.status_code == 200
            st.session_state.ollama_available = status
            st.session_state.ollama_checked_time = time.time()
            return status
        except (requests.ConnectionError, requests.Timeout):
            logger.warning("Ollama server is not running or not accessible")
            st.session_state.ollama_available = False
            st.session_state.ollama_checked_time = time.time()
        return False
        except Exception as e:
            logger.error(f"Unexpected error checking Ollama availability: {str(e)}")
            st.session_state.ollama_available = False
            st.session_state.ollama_checked_time = time.time()
            return False
    else:
        # Check if we need to refresh (every 60 seconds)
        if time.time() - st.session_state.ollama_checked_time > 60:
            try:
                response = requests.get("http://localhost:11434/api/tags", timeout=2)
                status = response.status_code == 200
                st.session_state.ollama_available = status
                st.session_state.ollama_checked_time = time.time()
                return status
            except Exception:
                # Just keep existing status on error
                return st.session_state.ollama_available
        return st.session_state.ollama_available

# Default model to use if none is specified
default_model = "deepseek-r1:1.5b"

# Persistent storage paths - Use absolute path for consistency
PERSISTENT_STORAGE_BASE = os.getenv("PERSISTENT_STORAGE_BASE", os.path.join(os.path.dirname(os.path.abspath(__file__)), 'localdatabase'))
USERS_DB_PATH = os.path.join(PERSISTENT_STORAGE_BASE, "users.db")

# Ensure persistent storage directories exist
os.makedirs(PERSISTENT_STORAGE_BASE, exist_ok=True)

# Function to get user-specific persistent storage path
def get_user_storage_path(username):
    user_path = os.path.join(PERSISTENT_STORAGE_BASE, username)
    os.makedirs(user_path, exist_ok=True)
    return user_path

# Modify init_user_database to use persistent path
def init_user_database():
    """Initialize SQLite database for user management"""
    conn = sqlite3.connect(USERS_DB_PATH)
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS users (
            username TEXT PRIMARY KEY,
            password TEXT NOT NULL,
            databases TEXT DEFAULT '["default"]',
            last_login DATETIME DEFAULT CURRENT_TIMESTAMP
        )
    ''')
    conn.commit()
    conn.close()

# Modify get_available_databases to use persistent storage
def get_available_databases():
    """Get available databases for the current user from ChromaDB"""
    if not hasattr(st.session_state, 'username'):
        return []

    try:
        # Get user-specific storage path
        user_storage_path = get_user_storage_path(st.session_state.username)

        # List all directories in the user's storage path
        # Each directory represents a database
        if os.path.exists(user_storage_path):
            databases = [d for d in os.listdir(user_storage_path)
                        if os.path.isdir(os.path.join(user_storage_path, d))]

            # If no databases found, return empty list
            if not databases:
                return []

            return databases
        else:
            return []
        except Exception as e:
        logger.error(f"Error retrieving databases from ChromaDB: {str(e)}")
        return []

# Modify get_context to improve retrieval
def get_context(question):
    """Improved context retrieval with better error handling and persistent storage support"""
    if not init_loader():
        st.error("Document loader could not be initialized.")
        return "", []

    context = ""
    context_chunks = []

    try:
        # Get the current database
        current_db = st.session_state.current_database

        # Get user-specific storage path
        user_storage_path = get_user_storage_path(st.session_state.username)

        # Set absolute persist directory path for the loader
        absolute_persist_dir = os.path.join(user_storage_path, current_db)
        st.session_state.loader.persist_directory = absolute_persist_dir

        # Debug output
        if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
            logger.info(f"Getting context for question: {question}")
            logger.info(f"Using database: {current_db}")
            logger.info(f"Absolute persist directory: {absolute_persist_dir}")
            logger.info(f"Loader persist directory set to: {st.session_state.loader.persist_directory}")

        # Retrieve collections for the current database
        database_collections = st.session_state.database_collections.get(current_db, {})

        # Show debug info if enabled
        if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
            logger.info(f"Found {len(database_collections)} collections in current database")

        # Determine all collections to search in
        collections_to_search = []

        # First, add regular database collections
        for collection_name, collection_info in database_collections.items():
            collections_to_search.append({
                "id": collection_name,  # Use plain collection name without prefix
                "name": collection_name,
                "is_temporary": False
            })

        # Then, add temporary collections if any exist
        if hasattr(st.session_state, 'temp_collections') and st.session_state.temp_collections:
            for temp_coll_name, file_info in st.session_state.temp_collections.items():
                collections_to_search.append({
                    "id": temp_coll_name,
                    "name": file_info.get('file_name', temp_coll_name),
                    "is_temporary": True
                })

        # If there are no collections, return empty context
        if not collections_to_search:
            st.info("No collections available to search.")
            return "", []

        # Show debug info about collections if debug mode is on
        if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
            logger.info(f"Searching in {len(collections_to_search)} collections")
            for coll in collections_to_search:
                logger.info(f"Collection: {coll['id']} ({coll['name']}), Temporary: {coll['is_temporary']}")

        # Check if the ChromaDB client is properly initialized with the correct directory
        try:
            if hasattr(st.session_state.loader, 'chroma_client'):
                client_path = getattr(st.session_state.loader.chroma_client, '_path', 'unknown')
                if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
                    logger.info(f"ChromaDB client path: {client_path}")

                # Reinitialize client if path doesn't match
                if client_path != absolute_persist_dir:
                    if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
                        logger.info(f"Reinitializing ChromaDB client with path: {absolute_persist_dir}")
                    import chromadb
                    st.session_state.loader.chroma_client = chromadb.PersistentClient(path=absolute_persist_dir)
        except Exception as client_error:
            logger.error(f"Error checking ChromaDB client: {str(client_error)}")

        # Search in all collections
        for collection in collections_to_search:
            try:
                collection_id = collection["id"]
                display_name = collection["name"]
                is_temp = collection["is_temporary"]

                # Try to retrieve context
                if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
                    logger.info(f"Querying collection: {collection_id}")

                # Get collection results, handling errors gracefully
                try:
                    results = st.session_state.loader(
                        question,
                        collection_id,
                        top_k=5  # Retrieve up to 5 most relevant chunks
                    )
                except Exception as query_error:
                    if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
                        logger.error(f"Error querying collection {collection_id}: {str(query_error)}")
                        logger.error(traceback.format_exc())
                    continue  # Skip this collection and try the next one

                # Check if results are valid and contain documents
                if results and 'documents' in results and results['documents'] and results['documents'][0]:
                    # Add documents to the context for the model
                    for i, doc in enumerate(results['documents'][0]):
                        # Truncate very long documents to prevent context overflow
                        truncated_doc = doc[:1000] + '...' if len(doc) > 1000 else doc

                        source_label = "Temporary File" if is_temp else "Database"
                        context += f"doc_{i} from {display_name} ({source_label}): {truncated_doc}\n"
                        context_chunks.append(f"**Document Chunk {i+1} from {display_name}:**\n{truncated_doc}")

            except Exception as collection_error:
                logger.error(f"Error retrieving from collection {collection['name']}: {str(collection_error)}")
                if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
                    logger.error(f"Traceback: {traceback.format_exc()}")

        # Log context retrieval details
        logger.info(f"Retrieved {len(context_chunks)} context chunks for query: {question}")

        # If no context found, return empty
        if not context_chunks:
            st.info("No relevant context found for the query.")

    except Exception as e:
        logger.error(f"Comprehensive error getting context: {str(e)}")
        if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
            logger.error(f"Traceback: {traceback.format_exc()}")
        st.error(f"Error retrieving context: {str(e)}")

    return context, context_chunks

# Modify show_create_database_modal to use persistent storage
def show_create_database_modal():
    """Create a new database for the current user"""
    if not hasattr(st.session_state, 'username'):
        st.error("Please log in first")
        return

    with st.form(key="create_database_form"):
        st.subheader("Create New Database")
        new_db_name = st.text_input("Database Name")
        submit_button = st.form_submit_button(label="Create")

        if submit_button and new_db_name:
            try:
                # Check if database already exists in session state
                current_databases = st.session_state.available_databases if hasattr(st.session_state, 'available_databases') else []

                # Check if database already exists
                if new_db_name in current_databases:
                    st.error(f"Database '{new_db_name}' already exists")
            else:
                    # Add new database to available databases list
                    current_databases.append(new_db_name)
                    st.session_state.available_databases = current_databases

                    # Create a directory for the new ChromaDB database in user's persistent storage
                    user_storage_path = get_user_storage_path(st.session_state.username)
                    database_path = os.path.join(user_storage_path, new_db_name)
                    os.makedirs(database_path, exist_ok=True)

                    # Update session state
                    st.session_state.current_database = new_db_name
                    st.success(f"ChromaDB database '{new_db_name}' created for file loading!")

                    # Initialize collections for this ChromaDB database
                    if "database_collections" not in st.session_state:
                        st.session_state.database_collections = {}
                    st.session_state.database_collections[new_db_name] = {}

                    # Refresh the page to update database list
                    st.rerun()

    except Exception as e:
                st.error(f"Error creating ChromaDB database: {str(e)}")

# Modify login_user to update last login and load persistent collections
def login_user(username, password):
    """Authenticate user and load persistent collections"""
    if len(username) < 3:
        return False, "Username must be at least 3 characters long"

    # Hash the password
    hashed_password = hashlib.sha256(password.encode()).hexdigest()

    try:
        conn = sqlite3.connect(USERS_DB_PATH)
        cursor = conn.cursor()

        cursor.execute("SELECT * FROM users WHERE username = ? AND password = ?",
                       (username, hashed_password))
        user = cursor.fetchone()

        if user:
            # Update last login timestamp
            cursor.execute(
                "UPDATE users SET last_login = CURRENT_TIMESTAMP WHERE username = ?",
                (username,)
            )
            conn.commit()

            # Load persistent collections for the user
            user_storage_path = get_user_storage_path(username)

            # Initialize database collections from persistent storage
            database_collections = {}

            # Get user's databases
            cursor.execute("SELECT databases FROM users WHERE username = ?", (username,))
            result = cursor.fetchone()

            if result and result[0]:
                databases = json.loads(result[0])

                # Load collections for each database
                for db_name in databases:
                    db_path = os.path.join(user_storage_path, db_name)
                    if os.path.exists(db_path):
                        # Here you might want to add logic to load collection metadata
                        database_collections[db_name] = {}

            # Load temporary collections
            temp_collections = {}
            file_collections = {}

            # Load any persisted temporary collections from ChromaDB
            if init_loader():
                loader = Loader_Local()

                # Check each database directory for temporary collections
                for db_name in databases:
                    db_path = os.path.join(user_storage_path, db_name)
                    if os.path.exists(db_path):
                        # Set up the loader to check this database
                        loader.persist_directory = db_path

                        try:
                            # Get all collection names from ChromaDB
                            chroma_client = chromadb.PersistentClient(path=db_path)
                            all_collections = chroma_client.list_collections()

                            # Find temporary collections for this user
                            for collection in all_collections:
                                collection_name = collection.name

                                # Only load temp collections for this user
                                if collection_name.startswith(f"temp_{username}_"):
                                    # Extract file ID from collection name
                                    file_id = collection_name.replace(f"temp_{username}_", "")

                                    # Try to get collection metadata
                                    try:
                                        collection_obj = chroma_client.get_collection(name=collection_name)
                                        # Get any file in this collection to extract metadata
                                        results = collection_obj.get(limit=1)

                                        if results and results['metadatas'] and results['metadatas'][0]:
                                            source = results['metadatas'][0].get('source', '')
                                            file_name = os.path.basename(source)
                                            file_extension = Path(file_name).suffix.lstrip(".").lower()

                                            # Store in temp_collections
                                            temp_collections[collection_name] = {
                                                "file_name": file_name,
                                                "file_type": file_extension,
                                                "added_date": "Previously uploaded",
                                                "is_temporary": True
                                            }

                                            # Add to file_collections for retrieval
                                            file_collections[file_id] = collection_name
            except Exception as e:
                                        logger.error(f"Error loading temporary collection metadata: {str(e)}")
    except Exception as e:
                            logger.error(f"Error listing collections in database {db_name}: {str(e)}")

            conn.close()

            return True, "Login successful", database_collections, temp_collections, file_collections
        else:
            conn.close()
            return False, "Invalid username or password", {}, {}, {}
    except Exception as e:
        return False, f"Login error: {str(e)}", {}, {}, {}

# Initialize the loader only once
def init_loader():
    if not hasattr(st.session_state, 'loader') or st.session_state.loader is None:
        try:
            # Check if Ollama API is available
            try:
                models = ollama.list()
                logger.info(f"Ollama API available with {len(models.get('models', []))} models")
            except Exception as e:
                logger.warning(f"Ollama API not available: {str(e)}")

            # Initialize loader with a default persist directory (will be updated later per user/database)
            st.session_state.loader = Loader_Local()

            # Set user-specific storage path if user is logged in
            if hasattr(st.session_state, 'username') and st.session_state.username:
                user_storage_path = get_user_storage_path(st.session_state.username)

                # If current database is set, use its path as the persist directory
                if hasattr(st.session_state, 'current_database') and st.session_state.current_database:
                    absolute_persist_dir = os.path.join(user_storage_path, st.session_state.current_database)
                    # Ensure directory exists
                    os.makedirs(absolute_persist_dir, exist_ok=True)

                    # Update loader with proper persist directory
                    st.session_state.loader.persist_directory = absolute_persist_dir

                    # Re-initialize ChromaDB client with the correct path
                    import chromadb
                    st.session_state.loader.chroma_client = chromadb.PersistentClient(path=absolute_persist_dir)

                    if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
                        logger.info(f"Initialized loader with persist directory: {absolute_persist_dir}")

            logger.info("Successfully initialized Loader_Local")
            return True
        except Exception as e:
            logger.error(f"Failed to initialize Loader_Local: {str(e)}")
            if hasattr(st.session_state, "debug_mode") and st.session_state.debug_mode:
                logger.error(traceback.format_exc())
            return False
    return True

# Modify main to handle persistent collections
def main():
    # Set page config
    st.set_page_config(
        page_title="RAG-Agent Chat",
        page_icon="🤖",
        layout="wide",
        initial_sidebar_state="expanded"
    )

    # Initialize session state variables
    if "messages" not in st.session_state:
        st.session_state.messages = []

    if "initialized" not in st.session_state:
        st.session_state.initialized = False

    if "selected_model" not in st.session_state:
        st.session_state.selected_model = default_model

    if "enable_retrieval" not in st.session_state:
        st.session_state.enable_retrieval = True

    if "file_collections" not in st.session_state:
        st.session_state.file_collections = {}

    # Check Ollama status only once per session load
    # Use session variable instead of calling API multiple times
    if "ollama_available" not in st.session_state:
        # Only make the API call once
    ollama_status = is_ollama_available()
    else:
        ollama_status = st.session_state.ollama_available

    # Initialize the document loader
    loader_success = init_loader()
    if not loader_success:
        logger.warning("Failed to initialize document loader. Some features may not be available.")

    # Initialize the database
    init_user_database()

    # Check if the user is authenticated
    if "authenticated" not in st.session_state or not st.session_state.authenticated:
        show_login_page()
        return

    # Ensure chat_instance exists and is initialized at the very beginning
    if "chat_instance" not in st.session_state or st.session_state.chat_instance is None:
        if ollama_status:
            try:
                # Create a new chat instance
                model_name = st.session_state.selected_model
                logger.info(f"Initializing chat instance with model: {model_name}")
                st.session_state.chat_instance = OllamaChat(
                    model=model_name,
                    system_prompt="You are a helpful chatbot assistant designed to answer questions about the given context. <context>"
                )
                st.session_state.initialized = True
                logger.info(f"Successfully initialized chat instance with model: {model_name}")
            except Exception as e:
                logger.error(f"Error initializing chat instance: {str(e)}\n{traceback.format_exc()}")
                st.session_state.chat_instance = None
                st.session_state.initialized = False
                # We'll show an error to the user in the UI later
        else:
            logger.warning("Ollama server is not available. Chat functionality will be limited.")
            st.session_state.chat_instance = None
            st.session_state.initialized = False

    # Set current database to first available database or default
    available_dbs = get_available_databases()

    if available_dbs:
        st.session_state.current_database = available_dbs[0]

    # Load persistent collections
    if "database_collections" not in st.session_state:
        user_storage_path = get_user_storage_path(st.session_state.username)
        database_collections = {}

        for db_name in available_dbs:
            db_path = os.path.join(user_storage_path, db_name)
            if os.path.exists(db_path):
                # Load collections from files.db
                files_db_path = os.path.join(db_path, "files.db")
                if os.path.exists(files_db_path):
                    try:
                        files_conn = sqlite3.connect(files_db_path)
                        files_cursor = files_conn.cursor()
                        files_cursor.execute("SELECT * FROM files")
                        files = files_cursor.fetchall()

                        database_collections[db_name] = {}
                        for file in files:
                            file_id = file[0]
                            file_name = file[1]
                            file_type = file[2]
                            collection_name = file[3]
                            added_date = file[4]
                            description = file[5]

                            database_collections[db_name][file_id] = {
                                "collection_name": collection_name,
                                "file_name": file_name,
                                "file_type": file_type,
                                "added_date": added_date,
                                "description": description or ""
                            }

                        files_conn.close()
                    except Exception as e:
                        logger.error(f"Error loading files from database {db_name}: {str(e)}")
                        database_collections[db_name] = {}
                else:
                    database_collections[db_name] = {}

        st.session_state.database_collections = database_collections

    # Sidebar Configuration
    with st.sidebar:
        st.title("RAG-Agent Chat")

        # User info
        st.markdown(f"**Logged in as:** {st.session_state.username}")
        if st.button("Logout"):
            st.session_state.authenticated = False
            st.rerun()

        st.divider()

        # Add debug mode toggle
        if "debug_mode" not in st.session_state:
            st.session_state.debug_mode = False

        debug_mode = st.toggle("Debug Mode", value=st.session_state.debug_mode, key="debug_toggle")
        if debug_mode != st.session_state.debug_mode:
            st.session_state.debug_mode = debug_mode
            if debug_mode:
                st.info("Debug mode enabled. Additional logging information will be shown.")
                # Set logging level to DEBUG when debug mode is on
                logger.setLevel(logging.DEBUG)
            else:
                # Reset to INFO level when debug mode is off
                logger.setLevel(logging.INFO)

        # Check if Ollama is running - show clear status
        if not ollama_status:
            st.error("⚠️ Ollama server is not running")
            st.info("Please start Ollama by running 'ollama serve' in a terminal")

            # Add manual check button
            if st.button("Check Ollama Status"):
                # Force refresh the Ollama status
                if "ollama_available" in st.session_state:
                    del st.session_state.ollama_available
                if "ollama_checked_time" in st.session_state:
                    del st.session_state.ollama_checked_time
                if "cached_models" in st.session_state:
                    del st.session_state.cached_models
                st.rerun()
        else:
            st.success("✅ LLM  is running")

        # Database Selection Section
        st.markdown("### Database Selection")

        # Get available databases
        available_databases = get_available_databases()

        # Create a column layout for the database controls
        db_col1, db_col2 = st.columns([3, 1])

        with db_col1:
            # Dropdown to select database - only show if there are databases
            if available_databases:
            selected_db = st.selectbox(
                "Select Database:",
                available_databases,
                index=available_databases.index(st.session_state.current_database) if st.session_state.current_database in available_databases else 0
            )

            if selected_db != st.session_state.current_database:
                st.session_state.current_database = selected_db
                # Update collections for the new database
                st.session_state.file_collections = get_collections_for_database(selected_db)
                st.success(f"Switched to database: {selected_db}")
            else:
                st.info("No databases available. Create a new one.")

        with db_col2:
            # Button to create new database
            if st.button("New DB"):
                show_create_database_modal()

        # Database Editor/Viewer
        with st.expander("Database Details", expanded=False):
            show_database_editor()

        st.divider()

        # Retrieval Toggle
        st.markdown("### Document Retrieval")
        retrieval_status = "🔍 ON" if st.session_state.enable_retrieval else "🚫 OFF"
        toggle_label = f"Database Retrieval: {retrieval_status}"

        if st.toggle(toggle_label, value=st.session_state.enable_retrieval, key="retrieval_toggle"):
            st.session_state.enable_retrieval = True
        else:
            st.session_state.enable_retrieval = False

        st.divider()

        # Model Selection Dropdown
        st.markdown("### Model Selection")

        # Only show model dropdown if Ollama is running
        if ollama_status:
            # Get available models
            models = get_available_models()

            # Create dropdown for model selection
            if models:
            selected_model = st.selectbox(
                "Select a model:",
                models,
                index=models.index(st.session_state.selected_model) if st.session_state.selected_model in models else 0
            )

            # Apply model change
            if selected_model != st.session_state.selected_model:
                try:
                    # Update the session state
                    st.session_state.selected_model = selected_model

                    # Create new chat instance with selected model
                    st.session_state.chat_instance = OllamaChat(
                        model=selected_model,
                            system_prompt="You are a helpful chatbot assistant designed to answer questions about the given context. <context>"
                    )
                    st.session_state.initialized = True
                    st.success(f"Model changed to: {selected_model}")
                    logger.info(f"Model changed to: {selected_model}")
                except Exception as e:
                    logger.error(f"Error changing model: {str(e)}\n{traceback.format_exc()}")
                    st.error(f"Error changing model: {str(e)}")
            else:
                st.warning("No models available. Please pull models using Ollama CLI.")
        else:
            st.warning("Model selection unavailable - Ollama is not running")

        st.divider()

        # ChromaDB Collections Section
        with st.expander("ChromaDB Collections", expanded=True):
            st.markdown("### Add Documents to ChromaDB")

            # File Upload for ChromaDB
            uploaded_files = st.file_uploader(
                "Upload documents to add to ChromaDB:",
                accept_multiple_files=True,
                type=["pdf", "txt", "csv", "md", "doc", "docx"],
                key="chromadb_uploader"
            )

            # Process files for ChromaDB
            if uploaded_files:
                # Get current database and existing collections
                current_db = st.session_state.current_database
                existing_collections = list(st.session_state.database_collections.get(current_db, {}).keys())

                st.markdown("### Configure Collection for Uploaded Files")

                # Collection selection
                collection_choice = st.radio(
                    "Choose Collection Option",
                    ["Create New Collection", "Use Existing Collection"],
                    key="upload_collection_choice"
                )

                if collection_choice == "Use Existing Collection":
                    # Dropdown for existing collections
                    if existing_collections:
                        selected_collection = st.selectbox(
                            "Select Existing Collection:",
                            existing_collections,
                            key="upload_existing_collection"
                        )

                        # No description needed for existing collections
                        collection_description = None
                    else:
                        st.warning("No existing collections. Please create a new one.")
                        collection_choice = "Create New Collection"

                if collection_choice == "Create New Collection":
                    # Input for custom collection name
                    st.markdown("#### Create New Collection")
                    st.info("Enter a descriptive name for your collection. This name should reflect the content or topic of your documents.")

                    new_collection_name = st.text_input(
                        "Collection Name",
                        key="upload_new_collection_name",
                        help="Choose a descriptive name related to the document content. For example: 'machine_learning', 'research_papers', 'company_policies', etc."
                    )

                    # Validate collection name
                    if new_collection_name and new_collection_name in existing_collections:
                        st.warning(f"Collection '{new_collection_name}' already exists. Files will be added to this existing collection.")

                    # Optional: Add a description or tags
                    collection_description = st.text_area(
                        "Collection Description (Optional)",
                        key="upload_collection_description",
                        help="Provide additional context about what this collection contains or represents."
                    )

                    selected_collection = new_collection_name

                # Process the files
                if st.button("Process Files", key="upload_process_files_btn", disabled=not (selected_collection and selected_collection.strip())):
                    with st.spinner("Processing files..."):
                        success_count = 0
                        for file in uploaded_files:
                            success = process_file(file, selected_collection,
                                                  collection_description if collection_choice == "Create New Collection" else None)
                            if success:
                                success_count += 1

                        if success_count == len(uploaded_files):
                            st.success(f"Successfully processed all {success_count} files to collection '{selected_collection}'")
                        elif success_count > 0:
                            st.warning(f"Processed {success_count} out of {len(uploaded_files)} files to collection '{selected_collection}'")
                        else:
                            st.error("Failed to process any files. Please check the logs for details.")

            # Show help text
            st.markdown("#### How to use file upload")
            st.markdown("""
            1. Upload one or more documents using the file uploader
            2. Choose to create a new collection or use an existing one
            3. If creating a new collection, enter a descriptive name for your documents
            4. Click "Process Files" to add the documents to the collection
            5. Use the chat interface to ask questions about your documents
            """)

            # Show available collections in current database
            collections = get_collections_for_database(st.session_state.current_database)
            if collections:
                st.markdown("### Available Collections")
                collection_names = list(collections.keys())

                # Allow selecting a collection
                selected_collection = st.selectbox(
                    "Select Collection to Use:",
                    collection_names,
                    key="selected_chromadb_collection"
                )

                if selected_collection:
                    st.session_state.active_collection = selected_collection
                    st.success(f"Using collection: {selected_collection}")

                    # Show collection details
                    if selected_collection in collections:
                        coll_info = collections[selected_collection]
                        st.markdown(f"**Collection:** {selected_collection}")

                        # Show all files in the collection if available
                        if "files" in coll_info and len(coll_info["files"]) > 0:
                            st.markdown(f"**Files:** {len(coll_info['files'])} document(s)")

                            # Display first 5 files directly
                            files_to_show = min(5, len(coll_info["files"]))
                            for idx, file in enumerate(coll_info["files"][:files_to_show]):
                                st.markdown(f"- {file}")

                            # If there are more files, provide option to view more
                            remaining_files = len(coll_info["files"]) - files_to_show
                            if remaining_files > 0:
                                display_option = st.radio(
                                    f"Show all files ({remaining_files} more)",
                                    ["Hide Additional Files", "Show All Files"],
                                    key=f"show_all_{selected_collection}",
                                    horizontal=True
                                )
                                if display_option == "Show All Files":
                                    for idx, file in enumerate(coll_info["files"][files_to_show:]):
                                        st.markdown(f"- {file}")
                        else:
                            st.markdown(f"**File:** {coll_info.get('file_name', 'Unknown')}")

                        st.markdown(f"**Type:** {coll_info.get('file_type', 'Unknown')}")
                        st.markdown(f"**Added:** {coll_info.get('added_date', 'Unknown')}")

                        # Inspect button
                        if st.button("Inspect Collection Data", key="inspect_from_dropdown"):
                            inspect_collection(selected_collection)
            else:
                st.info("No collections available in the current database. Upload documents to create collections.")

        st.divider()

        # File Upload Section for temporary QA
        st.markdown("### Upload Documents")

        # Track upload state to avoid reprocessing
        if "upload_state" not in st.session_state:
            st.session_state.upload_state = {}

        if "temp_collections" not in st.session_state:
            st.session_state.temp_collections = {}

        # Create an expander to show currently loaded temporary files
        with st.expander("Temporary Files in Memory", expanded=True):
            if st.session_state.temp_collections:
                for temp_coll_name, file_info in st.session_state.temp_collections.items():
                    st.markdown(f"📄 **{file_info['file_name']}** - *Collection: {temp_coll_name}*")

                # Add button to clear all temporary collections
                if st.button("Clear All Temporary Files"):
                    st.session_state.temp_collections = {}
                    st.success("All temporary files cleared from memory")
                    st.rerun()
            else:
                st.info("No temporary files loaded. Upload files below for quick Q&A without saving to your database.")

        uploaded_files = st.file_uploader(
            "Upload documents for temporary QA (not saved to database):",
            accept_multiple_files=True,
            type=["pdf", "txt", "csv", "md", "doc", "docx"]
        )

        # Process newly uploaded files
        if uploaded_files:
            with st.spinner("Processing files for quick Q&A..."):
                # Add a progress bar
                progress_bar = st.progress(0)
                status_text = st.empty()

                # Track processing status
                all_processed = True

                total_files = len(uploaded_files)
                for idx, file in enumerate(uploaded_files):
                    # Update progress
                    progress = float(idx) / float(total_files)
                    progress_bar.progress(progress)
                    status_text.text(f"Processing file {idx+1}/{total_files}: {file.name}")

                    # Create a unique identifier for this file
                file_id = file.name.replace(" ", "_").replace(".", "_")

                # Only process if it's a new file we haven't seen before
                    if file_id not in st.session_state.upload_state:
                        # Create a temporary collection name specific to this user and file
                        temp_collection_name = f"temp_{st.session_state.username}_{file_id}"

                    # Mark as being processed
                    st.session_state.upload_state[file_id] = "processing"

                        # Create a temporary file
                        temp_dir = tempfile.mkdtemp()
                        temp_file_path = os.path.join(temp_dir, file.name)

                        try:
                            # Write file to temporary location
                            with open(temp_file_path, "wb") as f:
                                f.write(file.getbuffer())

                            # Get file extension
                            file_extension = Path(file.name).suffix.lstrip(".").lower()

                            # Get user-specific storage path
                            user_storage_path = get_user_storage_path(st.session_state.username)

                            # Set persistent directory for the loader
                            current_db = st.session_state.current_database
                            current_db_path = os.path.join(user_storage_path, current_db)
                            st.session_state.loader.persist_directory = current_db_path

                            # Process the file with timeout handling
                            logger.info(f"Processing temporary file: {file.name} into collection: {temp_collection_name}")
                            status_text.text(f"Processing file {idx+1}/{total_files}: {file.name} - Generating chunks and embeddings...")

                            # Import the needed modules for timeout handling
                            import concurrent.futures
                            import time

                            # Define a function to process with timeout
                            def process_with_timeout(timeout=120):  # 2-minute timeout
                                try:
                                    # Store local references to avoid session state access in thread
                                    local_loader = st.session_state.loader

                                    # Define a completely isolated thread function - no Streamlit dependencies
                                    def process_file_thread():
                                        try:
                                            # No session state access in this function
                                            result = local_loader.create_or_insert_collection(
                                                temp_file_path,
                                                temp_collection_name,
                                                file_extension,
                                                'local_parser'
                                            )
                                            return result
                                        except Exception as e:
                                            # Don't use logger inside thread
                                            print(f"Error in processing thread: {str(e)}")
                                            return False

                                    # Try to create the temporary collection in a separate thread
                                    with concurrent.futures.ThreadPoolExecutor() as executor:
                                        future = executor.submit(process_file_thread)