Skip to content

Commit ba0f8fc

Browse files
authored
Merge pull request #730 from AnguseZhang/new_db_search
feat: add fetch_structures_from_db, rm other db search tools
2 parents f9cce74 + ca793a6 commit ba0f8fc

6 files changed

Lines changed: 286 additions & 113 deletions

File tree

agents/matmaster_agent/sub_agents/mapping.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,28 +112,24 @@
112112
from agents.matmaster_agent.sub_agents.LAMMPS_agent.constant import LAMMPS_AGENT_NAME
113113
from agents.matmaster_agent.sub_agents.MrDice_agent.bohriumpublic_agent.agent import (
114114
Bohriumpublic_AgentBase,
115-
bohriumpublic_toolset,
116115
)
117116
from agents.matmaster_agent.sub_agents.MrDice_agent.bohriumpublic_agent.constant import (
118117
BOHRIUMPUBLIC_DATABASE_AGENT_NAME,
119118
)
120119
from agents.matmaster_agent.sub_agents.MrDice_agent.mofdb_agent.agent import (
121120
Mofdb_AgentBase,
122-
mofdb_toolset,
123121
)
124122
from agents.matmaster_agent.sub_agents.MrDice_agent.mofdb_agent.constant import (
125123
MOFDB_DATABASE_AGENT_NAME,
126124
)
127125
from agents.matmaster_agent.sub_agents.MrDice_agent.openlam_agent.agent import (
128126
Openlam_AgentBase,
129-
openlam_toolset,
130127
)
131128
from agents.matmaster_agent.sub_agents.MrDice_agent.openlam_agent.constant import (
132129
OPENLAM_DATABASE_AGENT_NAME,
133130
)
134131
from agents.matmaster_agent.sub_agents.MrDice_agent.optimade_agent.agent import (
135132
Optimade_AgentBase,
136-
optimade_toolset,
137133
)
138134
from agents.matmaster_agent.sub_agents.MrDice_agent.optimade_agent.constant import (
139135
OPTIMADE_DATABASE_AGENT_NAME,
@@ -219,6 +215,13 @@
219215
from agents.matmaster_agent.sub_agents.structure_generate_agent.constant import (
220216
StructureGenerateAgentName,
221217
)
218+
from agents.matmaster_agent.sub_agents.structure_search_agent.agent import (
219+
StructureSearchAgentBase,
220+
structure_search_toolset,
221+
)
222+
from agents.matmaster_agent.sub_agents.structure_search_agent.constant import (
223+
STRUCTURE_SEARCH_AGENT_NAME,
224+
)
222225
from agents.matmaster_agent.sub_agents.superconductor_agent.agent import (
223226
SuperconductorAgent,
224227
superconductor_toolset,
@@ -293,10 +296,7 @@
293296
'polymer_kb_toolset': polymer_kb_toolset,
294297
'steel_kb_toolset': steel_kb_toolset,
295298
'steel_predict_toolset': steel_predict_toolset,
296-
'optimade_toolset': optimade_toolset,
297-
'bohriumpublic_toolset': bohriumpublic_toolset,
298-
'openlam_toolset': openlam_toolset,
299-
'mofdb_toolset': mofdb_toolset,
299+
'structure_search_toolset': structure_search_toolset,
300300
'organic_reaction_toolset': organic_reaction_toolset,
301301
'perovskite_toolset': perovskite_toolset,
302302
'piloteye_electro_toolset': piloteye_electro_toolset,
@@ -337,6 +337,7 @@
337337
BOHRIUMPUBLIC_DATABASE_AGENT_NAME: Bohriumpublic_AgentBase,
338338
MOFDB_DATABASE_AGENT_NAME: Mofdb_AgentBase,
339339
OPENLAM_DATABASE_AGENT_NAME: Openlam_AgentBase,
340+
STRUCTURE_SEARCH_AGENT_NAME: StructureSearchAgentBase,
340341
ORGANIC_REACTION_AGENT_NAME: OragnicReactionAgent,
341342
PerovskiteAgentName: PerovskiteAgent,
342343
PILOTEYE_ELECTRO_AGENT_NAME: PiloteyeElectroAgent,
@@ -380,6 +381,7 @@ class MatMasterSubAgentsEnum(str, Enum):
380381
BohriumPublicDatabaseAgent = BOHRIUMPUBLIC_DATABASE_AGENT_NAME
381382
MOFDBDatabaseAgent = MOFDB_DATABASE_AGENT_NAME
382383
OpenLAMDatabaseAgent = OPENLAM_DATABASE_AGENT_NAME
384+
StructureSearchAgent = STRUCTURE_SEARCH_AGENT_NAME
383385
OrganicReactionAgent = ORGANIC_REACTION_AGENT_NAME
384386
PerovskiteAgent = PerovskiteAgentName
385387
PiloteyeElectroAgent = PILOTEYE_ELECTRO_AGENT_NAME

agents/matmaster_agent/sub_agents/structure_search_agent/__init__.py

Whitespace-only changes.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from dp.agent.adapter.adk import CalculationMCPToolset
2+
from google.adk.agents import BaseAgent
3+
from google.adk.tools.mcp_tool.mcp_session_manager import SseServerParams
4+
5+
from agents.matmaster_agent.constant import LOCAL_EXECUTOR, BohriumStorge
6+
from agents.matmaster_agent.core_agents.public_agents.sync_agent import (
7+
BaseSyncAgentWithToolValidator,
8+
)
9+
from agents.matmaster_agent.sub_agents.MrDice_agent.constant import MrDice_Agent_Name
10+
from agents.matmaster_agent.sub_agents.structure_search_agent.constant import (
11+
STRUCTURE_SEARCH_AGENT_NAME,
12+
STRUCTURE_SEARCH_URL,
13+
)
14+
15+
structure_search_toolset = CalculationMCPToolset(
16+
connection_params=SseServerParams(url=STRUCTURE_SEARCH_URL),
17+
storage=BohriumStorge,
18+
executor=LOCAL_EXECUTOR,
19+
)
20+
21+
22+
class StructureSearchAgentBase(BaseSyncAgentWithToolValidator):
23+
def __init__(self, llm_config):
24+
super().__init__(
25+
model=llm_config.default_litellm_model,
26+
name=STRUCTURE_SEARCH_AGENT_NAME,
27+
description='',
28+
instruction='',
29+
tools=[structure_search_toolset],
30+
render_tool_response=True,
31+
supervisor_agent=MrDice_Agent_Name,
32+
)
33+
34+
35+
def init_structure_search_agent(llm_config, name_suffix='') -> BaseAgent:
36+
return StructureSearchAgentBase(llm_config, name_suffix=name_suffix)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from agents.matmaster_agent.constant import CURRENT_ENV
2+
3+
STRUCTURE_SEARCH_AGENT_NAME = 'structure_search_agent'
4+
5+
if CURRENT_ENV in ['test', 'uat']:
6+
STRUCTURE_SEARCH_URL = 'http://chvz1424099.bohrium.tech:50001/sse'
7+
else:
8+
STRUCTURE_SEARCH_URL = 'http://chvz1424099.bohrium.tech:50002/sse'
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
StructureSearchAgentToolDescription = (
2+
'What it does: Retrieve structures across multiple sources (BohriumPublic / OpenLAM / OPTIMADE providers) and run advanced SQL queries on MOFdb.\n'
3+
'When to use: Any "find crystal structures" request, including formula/elements/space group/band gap filters, time/energy filters (OpenLAM), cross-provider searches (OPTIMADE), or MOF-specific analytics (MOFdb SQL).\n'
4+
'Prerequisites / Inputs: Provide either structured filters (formula/elements/space group ranges), OpenLAM filters (energy/time), OPTIMADE filter strings, or MOFdb SQL.\n'
5+
'Outputs: Structures or metadata in CIF/JSON; MOFdb returns SQL rows and optional structure file links.\n'
6+
'Cannot do / Limits: OPTIMADE filters must follow the standard grammar; MOFdb is MOF-specific; OpenLAM does not provide space group/band gap.\n'
7+
'Cost / Notes: Default to BohriumPublic for speed; use OPTIMADE for flexible/cross-provider retrieval; use MOFdb for complex MOF analytics.'
8+
)
9+
10+
StructureSearchAgentArgsSetting = """
11+
## PARAMETER CONSTRUCTION GUIDE
12+
13+
## Do not ask the user for confirmation; directly start retrieval when a query is made.
14+
15+
## 0) ROUTING: WHICH TOOL TO CALL
16+
You have access to multiple retrieval tools. Choose ONE based on the user intent:
17+
18+
### A) BohriumPublic (fast structured filters)
19+
when the user asks for:
20+
- formula / elements / space group / atom count / predicted formation energy / band gap
21+
- and they do NOT require cross-provider search or complex boolean logic
22+
23+
### B) OpenLAM (energy / time filters)
24+
when the user asks for:
25+
- OpenLAM specifically, OR upload/submission time filters, OR OpenLAM energy range filters
26+
Limits: OpenLAM does NOT support space group, band gap, or "elements list" filters.
27+
28+
### C) OPTIMADE (cross-provider, flexible composition filters)
29+
when the user needs:
30+
- Raw filter: alexandria, cmr, cod, mcloud, mcloudarchive, mp, mpdd, mpds, nmd, odbx, omdb, oqmd, tcod, twodmatpedia
31+
- cross-provider search (e.g., "search in mp/cod/oqmd..."), OR
32+
- flexible logical composition constraints, OR
33+
- structure-type family queries (anonymous formula like AB2C4), OR
34+
- 2D/1D/0D constraints (nperiodic_dimensions)
35+
36+
### D) MOFdb (MOF-only, complex analytics)
37+
when the user asks for:
38+
- MOF-specific properties (surface area, pore metrics, adsorption/isotherms, heats), OR
39+
- advanced analysis requiring multi-table joins / ranking / statistics
40+
41+
## 1) BOHRIUMPUBLIC PARAMETERS (fetch_bohrium_crystals)
42+
### FILTER OPTIONS
43+
- **Formula**: chemical formula string (e.g., `"CoH12(BrO3)2"`)
44+
- **Elements**: list of required elements (e.g., `["Co","O"]`)
45+
- **Match mode** (applies to both `formula` and `elements`):
46+
- `0` = contains (e.g., formula `"Co"` matches `"CoO"`, `"CoH12(BrO3)2"`; elements `["Co"]` matches materials containing Co + anything else)
47+
- `1` = exact-only match (formula must match exactly; elements list must match **exactly and only** those elements)
48+
- **Space group**: use the space group number (e.g., `14` for P2₁/c)
49+
- **Atom count range**: filter by number of atoms in the unit cell, e.g. `["10","100"]`
50+
- **Predicted formation energy**: range filter in eV, e.g. `["-2","0"]`
51+
- **Band gap**: eV range [lo, hi] (omitted bound defaults to 0/100), e.g. ["0","3"], ["1","100"]
52+
- **Result limit**: maximum number of results (`n_results`)
53+
- **Output formats**:
54+
- `"cif"` → crystallographic structure files
55+
- `"json"` → complete metadata
56+
57+
## HOW TO CHOOSE PARAMETERS
58+
- If user specifies a **formula** → set `formula` and choose `match_mode`:
59+
- `0` if the user means "contains fragment"
60+
- `1` if the user means "exact formula"
61+
- If user specifies **elements** → set `elements` and choose `match_mode`:
62+
- `0` if the user means "must include these elements"
63+
- `1` if the user means "must have exactly these elements and nothing else"
64+
- If user specifies a **space group number** → set `spacegroup_number`
65+
- If user specifies an **atom count range** → set `atom_count_range`
66+
- If user specifies **formation energy or band gap ranges** → set the corresponding ranges
67+
- If the user requests **metadata only** → use `output_formats=['json']`
68+
- If the user requests **downloadable crystal files** → use `output_formats=['cif']`
69+
70+
## PARAMETER EXAMPLES
71+
1) 用户:检索 SrTiO₃ 的晶体结构,并以JSON格式返回
72+
→ Tool: fetch_structures_from_db
73+
formula: "SrTiO3"
74+
match_mode: 1
75+
output_formats: ["json"]
76+
77+
2) 用户:在Materials Project中检索并返回3个带隙大于2 eV的氧化物结构
78+
→ Tool: fetch_structures_from_db
79+
elements: ["O"]
80+
match_mode: 0
81+
band_gap_range: ["2","100"]
82+
n_results: 3
83+
84+
3) 用户:找出空间群编号 14,原子数 50–100 的晶体
85+
→ Tool: fetch_structures_from_db
86+
spacegroup_number: 14
87+
atom_count_range: ["50","100"]
88+
89+
4) 用户:检索 FeNi 合金的结构
90+
→ Tool: fetch_structures_from_db
91+
elements: ["Fe","Ni"] # 合金只含有Fe和Ni元素,不能含有其他元素
92+
match_mode: 1 # 合金需要精确匹配
93+
94+
5) 用户:找所有化学式中包含 SiO3 的材料
95+
→ Tool: fetch_structures_from_db
96+
formula: "SiO3"
97+
match_mode: 0
98+
99+
## 2) OPENLAM PARAMETERS (fetch_structures_from_db)
100+
### FILTER OPTIONS
101+
- **Formula**: chemical formula string (e.g., `"Fe2O3"`)
102+
- **Energy**: `min_energy` and/or `max_energy` in eV
103+
- **Submission time**: ISO UTC date-time (`min_submission_time`, `max_submission_time`)
104+
- **Result limit**: `n_results`
105+
- **Output formats**: `"cif"` or `"json"`
106+
107+
### EXAMPLES
108+
1) 用户:查找 Fe2O3 的 5 个晶体结构,导出为 CIF
109+
→ Tool: fetch_structures_from_db
110+
formula: "Fe2O3"
111+
n_results: 5
112+
output_formats: ["cif"]
113+
114+
2) 用户:查找能量在 -10 到 20 eV 之间,2024 年后上传的材料
115+
→ Tool: fetch_structures_from_db
116+
min_energy: -10.0
117+
max_energy: 20.0
118+
min_submission_time: "2024-01-01T00:00:00Z"
119+
120+
## 3) MOFDB PARAMETERS (fetch_structures_from_db)
121+
### INPUT
122+
- **sql**: SQL query string (use CTEs, window functions, joins as needed)
123+
- **n_results**: controls SQL LIMIT (when applicable) and returned structures
124+
125+
### EXAMPLE
126+
用户:统计各数据库的 MOF 数量
127+
→ Tool: fetch_structures_from_db
128+
sql: "SELECT database, COUNT(*) AS count FROM mofs GROUP BY database ORDER BY count DESC"
129+
130+
## 4) OPTIMADE PARAMETERS (fetch_structures_from_db)
131+
### MINIMUM SAFE OPTIMADE SYNTAX RULES (DO NOT VIOLATE)
132+
- Allowed operators ONLY: =, !=, <, <=, >, >=, AND, OR, NOT, HAS, HAS ALL, HAS ANY, IS KNOWN, IS UNKNOWN
133+
- All strings MUST be in double quotes: "Fe", "SiO2"
134+
- Do NOT use CONTAINS / LIKE / IN / regex / invented fields
135+
- To express "only these elements": use `elements HAS ALL ... AND nelements = N`
136+
137+
### TOOL CHOICE
138+
- If user gives space group number → `fetch_structures_from_db(base_filter, spg_number, ...)`
139+
- If user gives band gap range → `fetch_structures_from_db(base_filter, min_bg, max_bg, ...)`
140+
- Else → `fetch_structures_from_db(filter, ...)`
141+
142+
### EXAMPLES
143+
1) 用户:找空间群 225 的 MgO(rocksalt),返回 CIF
144+
→ Tool: fetch_structures_from_db
145+
base_filter: chemical_formula_reduced="MgO"
146+
spg_number: 225
147+
as_format: "cif"
148+
149+
2) 用户:找含 Al 且带隙 1–2 eV 的材料,返回 JSON
150+
→ Tool: fetch_structures_from_db
151+
base_filter: elements HAS "Al"
152+
min_bg: 1.0
153+
max_bg: 2.0
154+
as_format: "json"
155+
"""
156+
157+
StructureSearchAgentSummaryPrompt = """
158+
## RESPONSE FORMAT
159+
160+
**If the tool response indicates `by_source` = "mofdb"** (MOFdb results):
161+
1. Brief explanation of the SQL query used
162+
2. Markdown table of retrieved MOFs with relevant columns
163+
3. Output directory path for download/archive
164+
4. Key findings from results (if applicable)
165+
166+
**If the tool response indicates `by_source` = "optimade"** (OPTIMADE results):
167+
The response must always have three parts in order:
168+
1. A brief explanation of the applied filters and providers.
169+
2. A Markdown table listing all retrieved results (NO omissions/truncation; number of rows must exactly equal `n_found`).
170+
3. A download link for an archive (.tgz) if provided by the tool.
171+
Each table must always include the following nine columns in this fixed order:
172+
(1) Formula (`attributes.chemical_formula_reduced`)
173+
(2) Elements (infer from formula)
174+
(3) Atom count (if available; else **Not Provided**)
175+
(4) Space group (`Symbol(Number)` if possible; else **Not Provided**)
176+
(5) Energy / Formation energy (if available; else **Not Provided**)
177+
(6) Band gap (if available; else **Not Provided**)
178+
(7) Download link (CIF or JSON file)
179+
(8) Provider (infer from provider URL)
180+
(9) ID (`id`)
181+
Missing values must be exactly **Not Provided**. If `n_found = 0`, do not generate an empty table.
182+
183+
**If the tool response indicates `by_source` = "openlam"** (OpenLAM results):
184+
The response must always include:
185+
1. ✅ A brief explanation of the filters applied
186+
2. 📊 A Markdown table of the retrieved structures
187+
- Columns (fixed order):
188+
(1) Formula (`formula`)
189+
(2) Elements (deduced from `formula`)
190+
(3) Atom count → **Not Provided**
191+
(4) Download link (CIF/JSON, based on requested output)
192+
(5) Source database → always `"OpenLAM"`
193+
(6) ID (`id`)
194+
- Fill missing values with exactly **Not Provided**
195+
- Number of rows **must exactly equal** `n_found`
196+
3. 📦 The `output_dir` path returned by the tool (for download/archive)
197+
If `n_found = 0`, clearly state no matches were found, repeat the applied filters, and suggest loosening criteria. Do **not** generate an empty table.
198+
199+
**Otherwise** (BohriumPublic or other sources):
200+
The response must always include:
201+
1. ✅ A brief explanation of the filters applied
202+
2. 📊 A Markdown table of the retrieved structures
203+
- Columns (fixed order):
204+
(1) Formula (`formula`)
205+
(2) Elements (deduced from `formula`)
206+
(3) Atom count (`crystal_ext.number_of_atoms` if available; else **Not Provided**)
207+
(4) Space group (`Symbol(Number)` if `crystal_ext.symbol` is available and number can be mapped; else **Not Provided**)
208+
(5) Energy / Formation energy (`crystal_ext.predicted_formation_energy` if available; else **Not Provided**)
209+
(6) Band gap (`crystal_ext.band_gap` if available; else **Not Provided**)
210+
(7) Download link (CIF/JSON, based on `output_formats`)
211+
(8) Source database → always `"BohriumPublic"`
212+
(9) ID (`id`)
213+
- Fill missing values with exactly **Not Provided**
214+
- Number of rows **must exactly equal** `n_found`
215+
3. 📦 The `output_dir` path returned by the tool (for download/archive)
216+
217+
If `n_found = 0`, clearly state that no matches were found, repeat the applied filters, and suggest loosening criteria. Do **not** generate an empty table.
218+
"""

0 commit comments

Comments
 (0)