@@ -38,7 +38,6 @@ def load_vectordb():
3838def retrieve (query , k = 5 , doc_type = None , tags = None , source = None , folder_category = None , min_score = None , vectordb = None ):
3939 """
4040 Universal retrieval with flexible metadata filtering.
41- Added 'folder_category' parameter to filter by Google Drive subfolder.
4241 """
4342 # Defensive clamp: Chroma requires k >= 1
4443 try :
@@ -52,45 +51,45 @@ def retrieve(query, k=5, doc_type=None, tags=None, source=None, folder_category=
5251 vectordb = load_vectordb ()
5352
5453 # Build filter dictionary
55- filter_conditions = []
54+ filter_dict = None
55+
56+ # Build list of filter conditions
57+ filters = []
5658
5759 # Doc type filter
58- doc_filter = None
59- if isinstance (doc_type , (list , tuple )):
60- doc_types = [dt for dt in doc_type if dt ]
61- if len (doc_types ) == 1 :
62- doc_filter = {"doc_type" : doc_types [0 ]}
63- elif len (doc_types ) > 1 :
64- doc_filter = {"$or" : [{"doc_type" : dt } for dt in doc_types ]}
65- elif doc_type :
66- doc_filter = {"doc_type" : doc_type }
67-
68- if doc_filter :
69- filter_conditions .append (doc_filter )
60+ if doc_type :
61+ if isinstance (doc_type , (list , tuple )):
62+ doc_types = [dt for dt in doc_type if dt ]
63+ if len (doc_types ) == 1 :
64+ filters .append ({"doc_type" : doc_types [0 ]})
65+ elif len (doc_types ) > 1 :
66+ filters .append ({"$or" : [{"doc_type" : dt } for dt in doc_types ]})
67+ else :
68+ filters .append ({"doc_type" : doc_type })
7069
7170 # Source filter
7271 if source :
73- filter_conditions .append ({"source" : source })
72+ filters .append ({"source" : source })
7473
75- # Folder category filter (NEW)
74+ # Folder category filter
7675 if folder_category :
7776 if isinstance (folder_category , (list , tuple )):
7877 if len (folder_category ) == 1 :
79- filter_conditions .append ({"folder_category" : folder_category [0 ]})
78+ filters .append ({"folder_category" : folder_category [0 ]})
8079 elif len (folder_category ) > 1 :
81- filter_conditions .append ({"$or" : [{"folder_category" : f } for f in folder_category ]})
80+ filters .append ({"$or" : [{"folder_category" : f } for f in folder_category ]})
8281 else :
83- filter_conditions .append ({"folder_category" : folder_category })
82+ filters .append ({"folder_category" : folder_category })
8483
85- # Combine all conditions
86- filter_dict = None
87- if len (filter_conditions ) == 1 :
88- filter_dict = filter_conditions [0 ]
89- elif len (filter_conditions ) > 1 :
90- filter_dict = {"$and" : filter_conditions }
84+ # Combine filters
85+ if len (filters ) == 1 :
86+ filter_dict = filters [0 ]
87+ elif len (filters ) > 1 :
88+ filter_dict = {"$and" : filters }
9189
90+ # Retrieve with or without min_score
9291 if min_score is not None :
93- results_with_scores = vectordb .similarity_search_with_score (query , k = k * 3 if tags else k , filter = filter_dict if filter_dict else None )
92+ results_with_scores = vectordb .similarity_search_with_score (query , k = k * 3 if tags else k , filter = filter_dict )
9493
9594 if tags :
9695 filtered_results = []
@@ -106,9 +105,14 @@ def retrieve(query, k=5, doc_type=None, tags=None, source=None, folder_category=
106105
107106 filtered_results = [(doc , score ) for doc , score in results_with_scores if score <= min_score ]
108107
109- return {"chunks" : [doc .page_content for doc , _ in filtered_results [:k ]], "metadata" : [doc .metadata for doc , _ in filtered_results [:k ]], "scores" : [score for _ , score in filtered_results [:k ]], "query" : query }
108+ return {
109+ "chunks" : [doc .page_content for doc , _ in filtered_results [:k ]],
110+ "metadata" : [doc .metadata for doc , _ in filtered_results [:k ]],
111+ "scores" : [score for _ , score in filtered_results [:k ]],
112+ "query" : query ,
113+ }
110114 else :
111- results = vectordb .similarity_search (query , k = k * 3 if tags else k , filter = filter_dict if filter_dict else None )
115+ results = vectordb .similarity_search (query , k = k * 3 if tags else k , filter = filter_dict )
112116
113117 if tags :
114118 filtered_results = []
@@ -122,12 +126,17 @@ def retrieve(query, k=5, doc_type=None, tags=None, source=None, folder_category=
122126 if filtered_results :
123127 results = filtered_results
124128
125- return {"chunks" : [doc .page_content for doc in results [:k ]], "metadata" : [doc .metadata for doc in results [:k ]], "scores" : None , "query" : query }
129+ return {
130+ "chunks" : [doc .page_content for doc in results [:k ]],
131+ "metadata" : [doc .metadata for doc in results [:k ]],
132+ "scores" : None ,
133+ "query" : query ,
134+ }
126135
127136
128137def retrieve_transcripts (query , tags = None , k = 5 ):
129138 """Convenience function for transcript-only search."""
130- return retrieve (query , k = k , doc_type = "transcripts " , tags = tags )
139+ return retrieve (query , k = k , doc_type = "transcript " , tags = tags )
131140
132141
133142def retrieve_policies (query , k = 5 , source = None ):
0 commit comments