55- File/URL retrieval via information_retrieval
66- Configurable collection_name and storage_path for flexibility
77
8- The toolkit is generic and portable - task isolation and other application-specific
9- concerns are handled at the orchestration layer (e.g., in get_toolkits()) .
8+ The toolkit is generic and portable - task isolation and other
9+ application-specific concerns are handled at the orchestration layer.
1010
1111# TODO: Support configurable embedding models (currently OpenAI only)
1212# TODO: Add support for other vector storage backends
1313"""
1414
1515import hashlib
16+ import logging
1617import os
1718from pathlib import Path
1819from typing import List , Optional , Union
2728from app .component .environment import env
2829from app .service .task import Agents
2930from app .utils .toolkit .abstract_toolkit import AbstractToolkit
30- from utils import traceroot_wrapper as traceroot
3131
32- logger = traceroot . get_logger ("rag_toolkit" )
32+ logger = logging . getLogger ("rag_toolkit" )
3333
3434# Default paths and constants
3535DEFAULT_RAG_STORAGE_PATH = "~/.eigent/rag_storage"
@@ -64,17 +64,21 @@ def __init__(
6464 Args:
6565 api_task_id (str): Task ID for eigent integration.
6666 agent_name (str | None): Optional agent name override.
67- collection_name (str | None): Name for the vector collection. If not provided,
68- defaults to a generic name.
69- storage_path (str | Path | None): Path for vector storage. If not provided, uses
70- a default path.
67+ collection_name (str | None): Name for the vector collection.
68+ If not provided, defaults to a generic name.
69+ storage_path (str | Path | None): Path for vector storage.
70+ If not provided, uses a default path.
7171 """
7272 self .api_task_id = api_task_id
7373 if agent_name is not None :
7474 self .agent_name = agent_name
7575
7676 # Use provided paths or defaults
77- self ._storage_path = Path (storage_path ) if storage_path else Path (os .path .expanduser (DEFAULT_RAG_STORAGE_PATH ))
77+ self ._storage_path = (
78+ Path (storage_path )
79+ if storage_path
80+ else Path (os .path .expanduser (DEFAULT_RAG_STORAGE_PATH ))
81+ )
7882 self ._storage_path .mkdir (parents = True , exist_ok = True )
7983
8084 self ._collection_name = collection_name or DEFAULT_COLLECTION_NAME
@@ -85,8 +89,10 @@ def __init__(
8589 storage_type = StorageType .QDRANT ,
8690 )
8791
88- # Wrap CAMEL's RetrievalToolkit using composition (for file/URL retrieval)
89- self ._retrieval_toolkit = RetrievalToolkit (auto_retriever = auto_retriever )
92+ # Wrap CAMEL's RetrievalToolkit using composition
93+ self ._retrieval_toolkit = RetrievalToolkit (
94+ auto_retriever = auto_retriever
95+ )
9096
9197 # Lazy-initialized components for raw text support
9298 self ._embedding_model = None
@@ -98,7 +104,9 @@ def _get_embedding_model(self):
98104 if self ._embedding_model is None :
99105 api_key = env ("OPENAI_API_KEY" )
100106 if not api_key :
101- raise ValueError ("OPENAI_API_KEY is required for RAG embeddings" )
107+ raise ValueError (
108+ "OPENAI_API_KEY is required for RAG embeddings"
109+ )
102110 self ._embedding_model = OpenAIEmbedding (api_key = api_key )
103111 return self ._embedding_model
104112
@@ -128,16 +136,16 @@ def information_retrieval(
128136 top_k : int = 5 ,
129137 similarity_threshold : float = 0.5 ,
130138 ) -> str :
131- """Retrieves information from a local vector storage based on the query .
139+ """Retrieves information from local vector storage.
132140
133141 This method connects to a task-isolated vector storage and retrieves
134142 relevant information. Content is automatically indexed on first use.
135143
136144 Args:
137145 query: The question or query for which an answer is required.
138- contents: Local file paths, remote URLs, or string contents to search .
146+ contents: Local file paths, remote URLs, or string contents.
139147 top_k: Number of top results to return (default: 5).
140- similarity_threshold: Minimum similarity score for results (default: 0.5).
148+ similarity_threshold: Min similarity score (default: 0.5).
141149
142150 Returns:
143151 The information retrieved in response to the query.
@@ -155,7 +163,9 @@ def information_retrieval(
155163 top_k = top_k ,
156164 similarity_threshold = similarity_threshold ,
157165 )
158- logger .info (f"Retrieved information for query in collection { self ._collection_name } " )
166+ logger .info (
167+ f"Retrieved info for query in { self ._collection_name } "
168+ )
159169 return result
160170 except Exception as e :
161171 logger .error (f"Failed to retrieve information: { e } " , exc_info = True )
@@ -169,8 +179,8 @@ def add_document(
169179 ) -> str :
170180 """Add a raw text document to the knowledge base.
171181
172- This method allows adding text content directly without requiring a file.
173- Useful for adding API responses, conversation snippets, or any text data.
182+ This method allows adding text content directly without a file.
183+ Useful for API responses, conversation snippets, or text data.
174184
175185 Args:
176186 content: The text content to add to the knowledge base.
@@ -206,8 +216,10 @@ def add_document(
206216 retriever = self ._get_vector_retriever ()
207217 retriever .process (content = content , extra_info = doc_metadata )
208218
209- logger .info (f"Added document { doc_id } to collection { self ._collection_name } " )
210- return f"Successfully added document (ID: { doc_id } ) to knowledge base"
219+ logger .info (f"Added doc { doc_id } to { self ._collection_name } " )
220+ return (
221+ f"Successfully added document (ID: { doc_id } ) to knowledge base"
222+ )
211223
212224 except Exception as e :
213225 logger .error (f"Failed to add document: { e } " , exc_info = True )
@@ -218,14 +230,14 @@ def query_knowledge_base(
218230 query : str ,
219231 top_k : int = 5 ,
220232 ) -> str :
221- """Query the knowledge base for relevant information from added documents .
233+ """Query knowledge base for relevant information.
222234
223235 This queries documents previously added via add_document().
224236 For querying files/URLs, use information_retrieval() instead.
225237
226238 Args:
227- query (str): The question or search query to find relevant documents .
228- top_k (int): Maximum number of relevant chunks to return (default: 5).
239+ query (str): The question or search query.
240+ top_k (int): Max relevant chunks to return (default: 5).
229241
230242 Returns:
231243 Retrieved relevant text chunks from the knowledge base,
@@ -257,7 +269,9 @@ def query_knowledge_base(
257269 result_text += f" (Source: { source } )"
258270 formatted_results .append (result_text )
259271
260- logger .info (f"Retrieved { len (results )} results for query in collection { self ._collection_name } " )
272+ logger .info (
273+ f"Retrieved { len (results )} results in { self ._collection_name } "
274+ )
261275 return "\n \n " .join (formatted_results )
262276
263277 except Exception as e :
@@ -278,9 +292,14 @@ def list_knowledge_bases(self) -> str:
278292 collections .append (item .name )
279293
280294 if not collections :
281- return "No knowledge bases found. Use add_document or information_retrieval to create one."
295+ return (
296+ "No knowledge bases found. Use add_document or "
297+ "information_retrieval to create one."
298+ )
282299
283- return "Available knowledge bases:\n " + "\n " .join (f"- { c } " for c in sorted (collections ))
300+ return "Available knowledge bases:\n " + "\n " .join (
301+ f"- { c } " for c in sorted (collections )
302+ )
284303
285304 except Exception as e :
286305 logger .error (f"Failed to list knowledge bases: { e } " , exc_info = True )
@@ -314,7 +333,7 @@ def get_can_use_tools(
314333 storage_path (str | Path | None): Path for vector storage.
315334
316335 Raises:
317- ValueError: If collection_name is None (must be explicitly specified) .
336+ ValueError: If collection_name is None.
318337 """
319338 # RAG requires OpenAI API key for embeddings
320339 if not env ("OPENAI_API_KEY" ):
@@ -323,7 +342,9 @@ def get_can_use_tools(
323342
324343 # Require explicit collection_name for task isolation
325344 if collection_name is None :
326- raise ValueError ("collection_name must be explicitly specified for RAG toolkit" )
345+ raise ValueError (
346+ "collection_name must be explicitly specified for RAG toolkit"
347+ )
327348
328349 toolkit = RAGToolkit (
329350 api_task_id = api_task_id ,
0 commit comments