Skip to content

Commit f2b7f97

Browse files
committed
Merge: merge into dev/zhouh
2 parents 7542d2b + 93a1ff1 commit f2b7f97

3 files changed

Lines changed: 127 additions & 0 deletions

File tree

agents/matmaster_agent/document_parser_agent/agent.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from agents.matmaster_agent.base_agents.job_agent import CalculationMCPLlmAgent
66
from agents.matmaster_agent.constant import MATMASTER_AGENT_NAME, BohriumStorge
77

8+
from .callback import validate_document_url
89
from .constant import DocumentParserAgentName, DocumentParserServerUrl
910
from .prompt import DocumentParserAgentDescription, DocumentParserAgentInstruction
1011

@@ -21,6 +22,7 @@ def __init__(self, llm_config):
2122
instruction=DocumentParserAgentInstruction,
2223
tools=[toolset],
2324
supervisor_agent=MATMASTER_AGENT_NAME,
25+
after_model_callback=validate_document_url,
2426
)
2527

2628

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
from typing import Optional
2+
from urllib.parse import urlparse
3+
4+
from google.adk.agents.callback_context import CallbackContext
5+
from google.adk.models import LlmResponse
6+
from google.genai import types
7+
8+
9+
def validate_document_url(
10+
callback_context: CallbackContext, llm_response: LlmResponse
11+
) -> Optional[LlmResponse]:
12+
"""
13+
after_model_callback to validate and correct document URLs before tool execution.
14+
Ensures PDF URLs are routed to document parser and web URLs to web parser.
15+
"""
16+
if not llm_response or not llm_response.content or not llm_response.content.parts:
17+
return None
18+
19+
function_call_parts = [
20+
part for part in llm_response.content.parts if part.function_call
21+
]
22+
23+
if not function_call_parts:
24+
return None
25+
26+
# Process each function call
27+
modified_parts = []
28+
has_modifications = False
29+
30+
for part in function_call_parts:
31+
function_call = part.function_call
32+
if (
33+
function_call.name
34+
in ['extract_material_data_from_pdf', 'extract_material_data_from_webpage']
35+
and function_call.args
36+
):
37+
# Check if arguments contain URL(s)
38+
args = function_call.args
39+
if 'url' in args:
40+
corrected_args = args.copy()
41+
url = args['url']
42+
43+
# Validate and correct the tool call based on URL
44+
corrected_call = _correct_tool_call(function_call.name, url)
45+
if corrected_call != function_call.name:
46+
# Create a new function call with corrected tool name
47+
new_function_call = types.FunctionCall(
48+
name=corrected_call, args=corrected_args
49+
)
50+
modified_parts.append(types.Part(function_call=new_function_call))
51+
has_modifications = True
52+
continue
53+
54+
elif 'urls' in args:
55+
corrected_args = args.copy()
56+
urls = args['urls']
57+
if isinstance(urls, list) and urls:
58+
# For multiple URLs, check if correction is needed
59+
url = urls[0] if urls else ''
60+
corrected_call = _correct_tool_call(function_call.name, url)
61+
if corrected_call != function_call.name:
62+
new_function_call = types.FunctionCall(
63+
name=corrected_call, args=corrected_args
64+
)
65+
modified_parts.append(
66+
types.Part(function_call=new_function_call)
67+
)
68+
has_modifications = True
69+
continue
70+
71+
# Keep the original part if no modifications
72+
modified_parts.append(part)
73+
74+
# Return modified response if changes were made
75+
if has_modifications:
76+
new_content = types.Content(
77+
parts=modified_parts, role=llm_response.content.role
78+
)
79+
modified_response = LlmResponse(
80+
content=new_content,
81+
)
82+
return modified_response
83+
84+
return None
85+
86+
87+
def _correct_tool_call(current_tool: str, url: str) -> str:
88+
"""
89+
Determine the correct tool based on URL characteristics.
90+
91+
Args:
92+
current_tool: Current tool name
93+
url: URL to analyze
94+
95+
Returns:
96+
Corrected tool name
97+
"""
98+
if not url:
99+
return current_tool
100+
101+
# Parse URL to check its characteristics
102+
parsed_url = urlparse(url)
103+
104+
# Check if it's clearly a web URL (no file extension or html-based)
105+
if parsed_url.scheme in ['http', 'https']:
106+
path = parsed_url.path.lower()
107+
108+
# If it has a PDF extension, it should use document parser
109+
if path.endswith('.pdf'):
110+
return 'extract_material_data_from_pdf'
111+
112+
# If it doesn't have a file extension or has HTML-related patterns, use web parser
113+
if '.' not in path or path.endswith(
114+
('.html', '.htm', '.asp', '.aspx', '.jsp', '.php')
115+
):
116+
return 'extract_material_data_from_webpage'
117+
118+
# For URLs without clear extensions, check if it looks like a web page
119+
# (this is a heuristic, could be refined)
120+
if any(keyword in url.lower() for keyword in ['www', 'http', 'web', 'site']):
121+
return 'extract_material_data_from_webpage'
122+
123+
# Default to current tool if we can't determine
124+
return current_tool

agents/matmaster_agent/prompt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@
265265
266266
5. **{DPACalulator_AGENT_NAME}** - **Deep potential simulations**
267267
- Purpose: Perform simulations based on deep potential (深度学习势函数) for materials.
268+
- Note that DPA2.4-7M and DPA3.1-3M are both default options. DPA2.4-7M is faster; while DPA3.1-3M is more accurate. Ask the user to choose if they don't specify. If the user requires continuous calculation, use DPA2.4-7M as default and inform the user about the difference.
268269
- Capabilities:
269270
- Structure building (bulk, interface, molecule, adsorbates) and optimization
270271
- Molecular dynamics for alloys

0 commit comments

Comments
 (0)