|
50 | 50 | ProviderNotFoundError, |
51 | 51 | RerankError, |
52 | 52 | ) |
| 53 | +from aperag.observability import start_span |
53 | 54 | from aperag.platform.query.query import DocumentWithScore |
54 | 55 | from aperag.schema.utils import parseCollectionConfig |
55 | 56 | from aperag.utils.utils import generate_fulltext_index_name, generate_vector_db_collection_name |
@@ -142,92 +143,103 @@ async def execute_search( |
142 | 143 | search_user_id: str, |
143 | 144 | chat_id: Optional[str] = None, |
144 | 145 | ) -> Tuple[List[SearchResultItem], str]: |
145 | | - query = (data.query or "").strip() |
146 | | - if not query: |
147 | | - raise ValidationException("query is required") |
148 | | - |
149 | | - recall_tasks = [] |
150 | | - collection = await async_db_ops.query_collection(search_user_id, collection_id) |
151 | | - if not collection: |
152 | | - raise ValidationException(f"collection not found: {collection_id}") |
153 | | - |
154 | | - if data.vector_search: |
155 | | - recall_tasks.append( |
156 | | - self._vector_search( |
157 | | - collection=collection, |
158 | | - query=query, |
159 | | - top_k=data.vector_search.topk, |
160 | | - similarity_threshold=data.vector_search.similarity, |
161 | | - chat_id=chat_id, |
| 146 | + with start_span( |
| 147 | + "retrieval.search", |
| 148 | + tracer_name=__name__, |
| 149 | + **{ |
| 150 | + "aperag.domain": "retrieval", |
| 151 | + "aperag.operation": "retrieval.search", |
| 152 | + "aperag.collection.id": collection_id, |
| 153 | + "aperag.user.id": search_user_id, |
| 154 | + "aperag.chat.id": chat_id, |
| 155 | + }, |
| 156 | + ): |
| 157 | + query = (data.query or "").strip() |
| 158 | + if not query: |
| 159 | + raise ValidationException("query is required") |
| 160 | + |
| 161 | + recall_tasks = [] |
| 162 | + collection = await async_db_ops.query_collection(search_user_id, collection_id) |
| 163 | + if not collection: |
| 164 | + raise ValidationException(f"collection not found: {collection_id}") |
| 165 | + |
| 166 | + if data.vector_search: |
| 167 | + recall_tasks.append( |
| 168 | + self._vector_search( |
| 169 | + collection=collection, |
| 170 | + query=query, |
| 171 | + top_k=data.vector_search.topk, |
| 172 | + similarity_threshold=data.vector_search.similarity, |
| 173 | + chat_id=chat_id, |
| 174 | + ) |
162 | 175 | ) |
163 | | - ) |
164 | | - if data.fulltext_search: |
165 | | - recall_tasks.append( |
166 | | - self._fulltext_search( |
167 | | - collection=collection, |
168 | | - query=query, |
169 | | - top_k=data.fulltext_search.topk, |
170 | | - keywords=data.fulltext_search.keywords, |
171 | | - user_id=search_user_id, |
172 | | - chat_id=chat_id, |
| 176 | + if data.fulltext_search: |
| 177 | + recall_tasks.append( |
| 178 | + self._fulltext_search( |
| 179 | + collection=collection, |
| 180 | + query=query, |
| 181 | + top_k=data.fulltext_search.topk, |
| 182 | + keywords=data.fulltext_search.keywords, |
| 183 | + user_id=search_user_id, |
| 184 | + chat_id=chat_id, |
| 185 | + ) |
173 | 186 | ) |
174 | | - ) |
175 | | - if data.graph_search: |
176 | | - recall_tasks.append( |
177 | | - self._graph_search( |
178 | | - collection=collection, |
179 | | - query=query, |
180 | | - top_k=data.graph_search.topk, |
| 187 | + if data.graph_search: |
| 188 | + recall_tasks.append( |
| 189 | + self._graph_search( |
| 190 | + collection=collection, |
| 191 | + query=query, |
| 192 | + top_k=data.graph_search.topk, |
| 193 | + ) |
181 | 194 | ) |
182 | | - ) |
183 | | - if data.summary_search: |
184 | | - recall_tasks.append( |
185 | | - self._summary_search( |
186 | | - collection=collection, |
187 | | - query=query, |
188 | | - top_k=data.summary_search.topk, |
189 | | - similarity_threshold=data.summary_search.similarity, |
| 195 | + if data.summary_search: |
| 196 | + recall_tasks.append( |
| 197 | + self._summary_search( |
| 198 | + collection=collection, |
| 199 | + query=query, |
| 200 | + top_k=data.summary_search.topk, |
| 201 | + similarity_threshold=data.summary_search.similarity, |
| 202 | + ) |
190 | 203 | ) |
191 | | - ) |
192 | | - if data.vision_search: |
193 | | - recall_tasks.append( |
194 | | - self._vision_search( |
195 | | - collection=collection, |
196 | | - query=query, |
197 | | - top_k=data.vision_search.topk, |
198 | | - similarity_threshold=data.vision_search.similarity, |
| 204 | + if data.vision_search: |
| 205 | + recall_tasks.append( |
| 206 | + self._vision_search( |
| 207 | + collection=collection, |
| 208 | + query=query, |
| 209 | + top_k=data.vision_search.topk, |
| 210 | + similarity_threshold=data.vision_search.similarity, |
| 211 | + ) |
199 | 212 | ) |
200 | | - ) |
201 | 213 |
|
202 | | - if not recall_tasks: |
203 | | - raise ValidationException("At least one search strategy must be enabled") |
| 214 | + if not recall_tasks: |
| 215 | + raise ValidationException("At least one search strategy must be enabled") |
204 | 216 |
|
205 | | - recall_results = await asyncio.gather(*recall_tasks) |
206 | | - merged_docs = self._merge_results(recall_results) |
207 | | - reranked_docs = await self._rerank( |
208 | | - query=query, |
209 | | - docs=merged_docs, |
210 | | - user_id=search_user_id, |
211 | | - use_rerank=bool(data.rerank), |
212 | | - ) |
| 217 | + recall_results = await asyncio.gather(*recall_tasks) |
| 218 | + merged_docs = self._merge_results(recall_results) |
| 219 | + reranked_docs = await self._rerank( |
| 220 | + query=query, |
| 221 | + docs=merged_docs, |
| 222 | + user_id=search_user_id, |
| 223 | + use_rerank=bool(data.rerank), |
| 224 | + ) |
213 | 225 |
|
214 | | - items = [] |
215 | | - for idx, doc in enumerate(reranked_docs): |
216 | | - metadata = doc.metadata or {} |
217 | | - public_metadata = SearchResultMetadata.from_raw(metadata) |
218 | | - source = public_metadata.source if public_metadata and public_metadata.source else "" |
219 | | - items.append( |
220 | | - SearchResultItem( |
221 | | - rank=idx + 1, |
222 | | - score=doc.score, |
223 | | - content=doc.text, |
224 | | - source=source, |
225 | | - recall_type=metadata.get("recall_type", ""), |
226 | | - metadata=public_metadata, |
| 226 | + items = [] |
| 227 | + for idx, doc in enumerate(reranked_docs): |
| 228 | + metadata = doc.metadata or {} |
| 229 | + public_metadata = SearchResultMetadata.from_raw(metadata) |
| 230 | + source = public_metadata.source if public_metadata and public_metadata.source else "" |
| 231 | + items.append( |
| 232 | + SearchResultItem( |
| 233 | + rank=idx + 1, |
| 234 | + score=doc.score, |
| 235 | + content=doc.text, |
| 236 | + source=source, |
| 237 | + recall_type=metadata.get("recall_type", ""), |
| 238 | + metadata=public_metadata, |
| 239 | + ) |
227 | 240 | ) |
228 | | - ) |
229 | 241 |
|
230 | | - return items, "rerank" |
| 242 | + return items, "rerank" |
231 | 243 |
|
232 | 244 | async def _vector_search( |
233 | 245 | self, |
|
0 commit comments