1010_PROJECT_ROOT = Path (__file__ ).resolve ().parents [2 ]
1111sys .path .insert (0 , str (_PROJECT_ROOT ))
1212
13- import config
1413from main_chat .rag_pipeline .rag_retrieval import load_vectordb
1514
1615
@@ -56,6 +55,17 @@ def inspect_vectordb():
5655 for dtype , count in sorted (doc_types .items ()):
5756 print (f" - { dtype } : { count } " )
5857
58+ print ("\n " + "=" * 80 )
59+ print ("CLIENT_UPLOAD METADATA CHECK" )
60+ print ("=" * 80 )
61+
62+ for meta in metadatas :
63+ if meta .get ("doc_type" ) == "client_upload" and meta .get ("chunk_id" ) == 0 :
64+ print (f"\n CLIENT_UPLOAD document:" )
65+ print (f" Source: { meta .get ('source' )} " )
66+ print (f" Full metadata: { meta } " )
67+ print (f" Has folder_category? { meta .get ('folder_category' )} " )
68+
5969 # Show ALL files/sources grouped by document type
6070 print ("\n " + "=" * 80 )
6171 print ("📁 ALL FILES IN VECTOR DATABASE (by type)" )
@@ -88,12 +98,12 @@ def inspect_vectordb():
8898 print ("Testing Policy Retrieval" )
8999 print ("=" * 80 )
90100
91- test_query = "anti-displacement"
92- print (f"\n Test query: '{ test_query } '" )
93-
94101 # Try retrieving policies
95102 from main_chat .rag_pipeline .rag_retrieval import retrieve_policies
96103
104+ test_query = "anti-displacement"
105+ print (f"\n Test query: '{ test_query } '" )
106+
97107 result = retrieve_policies (test_query , k = 5 )
98108 chunks = result .get ("chunks" , [])
99109 metadata = result .get ("metadata" , [])
@@ -105,10 +115,11 @@ def inspect_vectordb():
105115 for i , (chunk , meta ) in enumerate (zip (chunks [:2 ], metadata [:2 ]), 1 ):
106116 print (f"\n Result { i } :" )
107117 print (f" Source: { meta .get ('source' , 'unknown' )} " )
108- print (f" Type: { meta .get ('doc_type' , 'unknown' )} " )
118+ print (f" Doc Type: { meta .get ('doc_type' , 'unknown' )} " )
119+ print (f" Folder Category: { meta .get ('folder_category' , 'none' )} " )
109120 print (f" Preview: { chunk [:200 ]} ..." )
110121 else :
111- print ("✗ No chunks retrieved! This is the problem. " )
122+ print ("✗ No chunks retrieved!" )
112123 # Try a more specific source filter
113124 print ("\n Trying with specific source filter..." )
114125 result2 = retrieve_policies (test_query , k = 5 , source = "Boston Anti-Displacement Plan Analysis.txt" )
0 commit comments