Skip to content

Commit 29a1eb7

Browse files
committed
adding the new search_email_by_keyword tool for specific keyword search
1 parent 3a033e9 commit 29a1eb7

3 files changed

Lines changed: 132 additions & 4 deletions

File tree

n0mail/cli/chat_commands.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
# DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small" # Model for user query embedding
4343
DEFAULT_RAG_RESULTS = 5 # Number of chunks to retrieve
4444
DEFAULT_SENDER_SEARCH_RESULTS = 7
45+
DEFAULT_KEYWORD_SEARCH_RESULTS = 7 # New constant for keyword search limit
4546
MAX_HISTORY_TURNS = 10 # Max user/assistant turns to keep in context (20 messages total)
4647
MAX_CONSECUTIVE_TOOL_CALLS = 5 # Limit for consecutive tool calls
4748

@@ -433,6 +434,7 @@ def run_chat(
433434
embedding_model: Optional[str] = typer.Option(None, "--embedding-model", help="AI model for query embedding (default: config.EMBEDDING_DEFAULT_MODEL).", show_default=False),
434435
rag_results_count: int = typer.Option(DEFAULT_RAG_RESULTS, "--rag-results", help="Number of RAG chunks."),
435436
sender_search_count: int = typer.Option(DEFAULT_SENDER_SEARCH_RESULTS, "--sender-results", help="Max emails for sender search."),
437+
keyword_search_count: int = typer.Option(DEFAULT_KEYWORD_SEARCH_RESULTS, "--keyword-results", help="Max emails for keyword search."), # New option
436438
force_refresh_brief: bool = typer.Option(False, "--force-refresh-brief", help="Force regeneration of initial brief, ignoring cache.")
437439
):
438440
"""Start an interactive chat session with RAG and tool capabilities."""
@@ -554,6 +556,11 @@ def run_chat(
554556
max_results=sender_search_count,
555557
debug=debug
556558
),
559+
"search_email_by_keyword": lambda args: search_tool_handler.by_keyword( # New entry
560+
keywords=args.get('keywords', ''),
561+
max_results=keyword_search_count, # Use new option
562+
debug=debug
563+
),
557564
"find_email_address_based_on_name": lambda args: search_tool_handler.by_name(
558565
name=args.get('name', ''),
559566
# max_search uses default from method signature

n0mail/cli/tools/definitions.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
"type": "function",
44
"function": {
55
"name": "retrieve_rag_context",
6-
"description": "Performs a semantic search on the user's email archive based on the content of their query. Use this for general questions about email content.",
6+
"description": "Performs a semantic search on the user's email archive based on the meaning and context of the query. Best for general questions, topic exploration, or finding conceptually related emails even if exact keywords don't match.",
77
"parameters": {
88
"type": "object",
99
"properties": {
1010
"query": {
1111
"type": "string",
12-
"description": "The semantic query to search for relevant email content.",
12+
"description": "The natural language query describing the desired email content or topic.",
1313
}
1414
},
1515
"required": ["query"],
@@ -33,11 +33,28 @@
3333
},
3434
},
3535
},
36+
{
37+
"type": "function",
38+
"function": {
39+
"name": "search_email_by_keyword",
40+
"description": "Searches for emails containing *exact* keywords or phrases within the subject or body. Best for finding specific terms, IDs, error codes, or quoted text. Does NOT perform semantic understanding.",
41+
"parameters": {
42+
"type": "object",
43+
"properties": {
44+
"keywords": {
45+
"type": "string",
46+
"description": "The exact keyword(s) or phrase to search for in the email subject or body.",
47+
}
48+
},
49+
"required": ["keywords"],
50+
},
51+
},
52+
},
3653
{
3754
"type": "function",
3855
"function": {
3956
"name": "find_email_address_based_on_name",
40-
"description": "Looks up potential email addresses associated with a given person's name by searching recent emails. Use this *before* search_email_by_sender if the user only provides a name.",
57+
"description": "Looks up potential email addresses associated with a person's name by searching recent emails. Use this *before* search_email_by_sender if the user only provides a name.",
4158
"parameters": {
4259
"type": "object",
4360
"properties": {

n0mail/cli/tools/search_tools.py

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from rich.console import Console
88
from rich.markdown import Markdown
9-
from sqlalchemy import select, desc
9+
from sqlalchemy import select, desc, or_
1010

1111
# Import necessary components from the n0mail package
1212
# Adjust paths if your structure differs
@@ -57,6 +57,110 @@ def __init__(self, console: Console):
5757
# Example passing: self.db = database; self.models = models, etc.
5858
# For simplicity now, using module imports directly where needed
5959

60+
def by_keyword(self, keywords: str, max_results: int = 7, debug: bool = False) -> str:
61+
"""Searches email subject and body for exact keywords locally and via Gmail API."""
62+
if debug:
63+
self.console.print(f"[cyan]Tool: SearchTools.by_keyword (Keywords: '{keywords}')[/cyan]")
64+
65+
if not keywords or not isinstance(keywords, str):
66+
return "Error: Invalid or missing keywords provided."
67+
68+
search_term = f"%{keywords}%" # Prepare for LIKE search
69+
local_emails = []
70+
local_query_error = None
71+
try:
72+
with database.get_db_connection() as conn:
73+
# Use case-insensitive LIKE (ilike in SQLAlchemy) or lower() with LIKE
74+
# Using or_ to search in subject OR body
75+
query = (
76+
select(
77+
models.emails_table.c.id, models.emails_table.c.subject,
78+
models.emails_table.c.sender, models.emails_table.c.date,
79+
models.emails_table.c.body_md
80+
).where(or_(
81+
models.emails_table.c.subject.ilike(search_term),
82+
models.emails_table.c.body_md.ilike(search_term)
83+
))
84+
.order_by(desc(models.emails_table.c.date))
85+
.limit(max_results)
86+
)
87+
results = conn.execute(query).mappings().fetchall()
88+
temp_local_emails = []
89+
for row in results:
90+
email_dict = dict(row)
91+
email_dict['date'] = _normalize_date_for_sort(email_dict)
92+
email_dict['body_md'] = text_utils.clean_markdown_for_dense_information(email_dict.get('body_md', ''))
93+
temp_local_emails.append(email_dict)
94+
local_emails = temp_local_emails
95+
if debug:
96+
self.console.print(f"[grey]Found {len(local_emails)} emails locally containing keyword(s) '{keywords}'.[/grey]")
97+
except Exception as e:
98+
local_query_error = e
99+
self.console.print(f"[red]Error querying local DB by keyword: {e}[/red]")
100+
101+
found_ids = {e['id'] for e in local_emails}
102+
results_count = len(local_emails)
103+
104+
gmail_emails_parsed = []
105+
gmail_api_error = None
106+
if results_count < max_results:
107+
needed = max_results - results_count
108+
# Construct Gmail query. Searching subject OR body. Quotes might be needed for phrases.
109+
# Simple approach: assume keywords is a single phrase/term for now.
110+
# More robust: could try splitting keywords, but let's keep it simple.
111+
gmail_query = f'subject:"{keywords}" OR "{keywords}'
112+
if debug:
113+
self.console.print(f"[grey]Searching Gmail API for up to {needed} more emails (Query: '{gmail_query}')...[/grey]")
114+
try:
115+
service = gmail_service.get_gmail_service()
116+
if service:
117+
fetched_data, _ = gmail_service.fetch_emails(
118+
service, query_filter=gmail_query, max_results=needed, start_history_id=None
119+
)
120+
newly_fetched = [email for email in fetched_data if email.get('id') not in found_ids]
121+
for email in newly_fetched:
122+
email['date'] = _normalize_date_for_sort(email)
123+
email['body_md'] = text_utils.clean_markdown_for_dense_information(email.get('body_md',''))
124+
gmail_emails_parsed = newly_fetched
125+
if debug:
126+
self.console.print(f"[grey]Fetched {len(gmail_emails_parsed)} new emails from Gmail API containing keyword(s).[/grey]")
127+
else:
128+
self.console.print("[yellow]Gmail service not available, cannot query API.[/yellow]")
129+
gmail_api_error = Exception("Gmail service not available.")
130+
except Exception as e:
131+
gmail_api_error = e
132+
self.console.print(f"[red]Error fetching from Gmail API by keyword: {e}[/red]")
133+
134+
combined_emails = local_emails + gmail_emails_parsed
135+
136+
if not combined_emails:
137+
error_message = f"No emails found locally or via API containing keywords: '{keywords}'"
138+
# ... (error combining logic for keywords) ...
139+
if local_query_error and gmail_api_error:
140+
error_message = f"Error: Failed querying local DB ({local_query_error}) and Gmail API ({gmail_api_error}) for keywords '{keywords}'."
141+
elif local_query_error:
142+
error_message = f"Error: Failed querying local DB ({local_query_error}) and no results from Gmail API for keywords '{keywords}'."
143+
elif gmail_api_error:
144+
error_message = f"Error: Failed querying Gmail API ({gmail_api_error}) and no results from local DB for keywords '{keywords}'."
145+
return error_message
146+
else:
147+
try:
148+
combined_emails.sort(key=_normalize_date_for_sort, reverse=True)
149+
except Exception as sort_e:
150+
self.console.print(f"[yellow]Warning: Could not sort combined emails by keyword due to error: {sort_e}.[/yellow]")
151+
152+
final_emails = combined_emails[:max_results]
153+
result_str = _format_emails_for_llm(final_emails)
154+
155+
if gmail_api_error and local_emails:
156+
result_str += f"\n\n[Warning: Could not retrieve additional keyword-matching emails from Gmail due to an API error: {gmail_api_error}]"
157+
158+
if debug:
159+
self.console.print("--- DEBUG: SearchTools.by_keyword Retrieved Full Emails --- ")
160+
self.console.print(Markdown(result_str))
161+
self.console.print("--- END DEBUG SearchTools.by_keyword --- ")
162+
return result_str
163+
60164
def by_sender(self, sender_email: str, max_results: int = 7, debug: bool = False) -> str:
61165
"""Searches emails from a specific sender locally and via Gmail API."""
62166
if debug:

0 commit comments

Comments
 (0)