Skip to content

Commit 172e933

Browse files
committed
refactor:重构了流程代理创建指令及步骤验证以增强灵活性和可扩展性
1 parent ce379f8 commit 172e933

6 files changed

Lines changed: 111 additions & 61 deletions

File tree

agents/matmaster_agent/core_agents/public_agents/job_agents/submit_core_agent/agent.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,13 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
100100
yield tool_response_failed_event
101101

102102
# 更新 plan 为失败
103-
update_plan = copy.deepcopy(ctx.session.state['plan'])
104-
update_plan['steps'][ctx.session.state['plan_index']][
105-
'status'
106-
] = 'failed'
107-
yield update_state_event(ctx, state_delta={'plan': update_plan})
103+
post_execution_step = copy.deepcopy(get_current_step(ctx))
104+
post_execution_step[CURRENT_STEP_STATUS] = (
105+
PlanStepStatusEnum.FAILED
106+
)
107+
yield update_state_event(
108+
ctx, state_delta={CURRENT_STEP: post_execution_step}
109+
)
108110

109111
raise RuntimeError('Tool Execution Failed')
110112
dict_result = load_tool_response(first_part)

agents/matmaster_agent/flow_agents/agent.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
is_job_submitted_step,
9999
)
100100
from agents.matmaster_agent.flow_agents.step_validation_agent.prompt import (
101-
STEP_VALIDATION_INSTRUCTION,
101+
create_step_validation_instruction,
102102
)
103103
from agents.matmaster_agent.flow_agents.step_validation_agent.schema import (
104104
StepValidationSchema,
@@ -109,6 +109,7 @@
109109
)
110110
from agents.matmaster_agent.flow_agents.thinking_agent.constant import THINKING_AGENT
111111
from agents.matmaster_agent.flow_agents.utils import (
112+
find_alternative_tool,
112113
get_tools_list,
113114
scenes_contain_query_job_status,
114115
should_bypass_confirmation,
@@ -140,6 +141,7 @@
140141
from agents.matmaster_agent.services.session_files import get_session_files
141142
from agents.matmaster_agent.state import (
142143
CURRENT_STEP,
144+
CURRENT_STEP_TOOL_NAME,
143145
EXPAND,
144146
FINISHED_STATE,
145147
HISTORY_STEPS,
@@ -336,11 +338,19 @@ def all_finished_agent(self) -> DisallowTransferAndContentLimitSchemaAgent:
336338
def _build_execution_agent_for_plan(
337339
self, ctx: InvocationContext
338340
) -> MatMasterSupervisorAgent:
341+
current_step = get_current_step(ctx)
342+
current_step_tool_name = current_step.get(CURRENT_STEP_TOOL_NAME)
343+
belonging_agent = ALL_TOOLS.get(current_step_tool_name, {}).get(
344+
'belonging_agent'
345+
)
346+
339347
step_validation_agent = DisallowTransferAndContentLimitSchemaAgent(
340348
name='step_validation_agent',
341349
model=MatMasterLlmConfig.tool_schema_model,
342350
description='校验步骤执行结果是否合理',
343-
instruction=STEP_VALIDATION_INSTRUCTION,
351+
instruction=create_step_validation_instruction(
352+
find_alternative_tool(current_step_tool_name)
353+
),
344354
output_schema=StepValidationSchema,
345355
state_key='step_validation',
346356
after_model_callback=MatMasterLlmConfig.opik_tracer.after_model_callback,
@@ -356,10 +366,6 @@ def _build_execution_agent_for_plan(
356366
before_model_callback=filter_llm_contents,
357367
after_model_callback=MatMasterLlmConfig.opik_tracer.after_model_callback,
358368
)
359-
current_step = get_current_step(ctx)
360-
tool_name = current_step.get('tool_name')
361-
belonging_agent = ALL_TOOLS.get(tool_name, {}).get('belonging_agent')
362-
363369
execution_agent = MatMasterSupervisorAgent(
364370
name='execution_agent',
365371
model=MatMasterLlmConfig.default_litellm_model,
@@ -872,9 +878,10 @@ async def _run_research_flow(
872878
):
873879
yield _scene_event
874880

881+
execution_count = 0
875882
while True:
876883
if not is_job_submitted_step(ctx):
877-
skip_thinking = scenes_contain_query_job_status(ctx)
884+
skip_thinking = scenes_contain_query_job_status(ctx) or execution_count
878885
async for _step_make_event in self._run_step_make_agent(
879886
ctx,
880887
UPDATE_USER_CONTENT,
@@ -885,6 +892,7 @@ async def _run_research_flow(
885892

886893
async for _plan_execute_event in self._run_plan_execute_agent(ctx):
887894
yield _plan_execute_event
895+
execution_count += 1
888896

889897
# 检查是否为等待异步任务执行完成的阶段
890898
if not is_job_submitted_step(ctx):

agents/matmaster_agent/flow_agents/all_finished_agent/prompt.py

Lines changed: 67 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,53 +11,80 @@ def create_all_finished_instruction(user_request, history_steps, session_files):
1111
history_text = json.dumps(history_steps, ensure_ascii=False, indent=2)
1212
session_files_text = json.dumps(session_files, ensure_ascii=False, indent=2)
1313
return f"""
14-
You are a "Goal Completion Judge" agent. Your task is to determine whether the user's
15-
overall final objective/task has been completed *as of now*, based solely on the provided
16-
tool-call history: history_steps and the provided session_files list.
17-
IMPORTANT: The user's goal may be "content in chat" (e.g., a researched tutorial/summary),
18-
not necessarily a file deliverable. Only require session_files evidence when the user
19-
explicitly asked for a file or a file is clearly the expected final deliverable.
20-
IMPORTANT: If the user_request asks for multiple distinct items/entities/examples (e.g., "caffeine and adenosine",
21-
"A and B", "compare X vs Y", "generate N variants"), the goal is finished ONLY when *all* requested items are completed.
22-
Do NOT mark finished=true when only one of the requested items has been produced.
14+
You are a "Goal Completion Judge" agent. Decide whether the user's overall final objective
15+
has been completed *as of now*, based ONLY on history_steps and session_files.
16+
Key principle: "finished" indicates whether the session should STOP now.
17+
- If the goal is completed: finished=true.
18+
- If the goal is NOT completed but still achievable with further actions: finished=false.
19+
- If the goal is NOT completed AND is blocked/unachievable given the evidence: finished=true (Termination/Unachievable), and the reason MUST explicitly say it is not completed but cannot be completed.
20+
21+
IMPORTANT: The user's goal may be "content in chat" (e.g., a tutorial/summary), not necessarily a file.
22+
Only require session_files evidence when the user explicitly asked for a file or a file is clearly the expected deliverable.
23+
24+
IMPORTANT: If user_request asks for multiple items (A and B / compare X vs Y / generate N variants), finished=true ONLY when ALL are done.
25+
26+
IMPORTANT: Treat explicit numeric/parameter constraints (layers, vacuum thickness, slab orientation/cut, supercell expansion like 5×5×1, etc.) as mandatory. finished=true ONLY if history_steps explicitly confirms EACH constraint was applied.
27+
28+
IMPORTANT (NEW, HIGH PRIORITY): history_steps[*].suggestion is PRIMARY evidence for whether the task is still achievable.
29+
- If ANY actionable suggestion exists (even if earlier), and it has NOT been explicitly attempted and exhausted in later history_steps, you MUST set finished=false (unless the goal is already completed).
30+
- Actionable suggestions include: retrying with modified parameters, switching tools/providers, requesting missing inputs, rerunning with fixes, alternative workflows, etc.
31+
- You MUST NOT output finished=true (Termination/Unachievable) when there exists any untried actionable suggestion.
32+
- Only consider Termination/Unachievable when (a) NOT completed, AND (b) all actionable suggestions have been tried (and are evidenced as tried) with continued failure, AND (c) no remaining viable next action is suggested anywhere in history_steps.
33+
34+
CRITICAL: Do NOT treat "suggestion was not acted upon" as evidence of unachievability.
35+
If there exists any actionable history_steps[*].suggestion that has not been tried, the task is still achievable => finished=false.
36+
2337
# Input
24-
history_steps is a list. Each element is a past tool invocation record, typically including
25-
(but not limited to):
26-
- tool_name: the tool name
27-
- step_description: what this step attempted to do
28-
- status: the step status (e.g., success/failed/running/cancelled/unknown, etc.)
29-
- other fields: such as result/output/error/args/time, etc.
30-
session_files is a list of file links (OSS URLs). Only files that were actually generated
31-
and persisted for this session will appear here. Use session_files as verifiable evidence
32-
that a file deliverable truly exists (only when a file deliverable is required).
33-
Below in the raw user_request:
38+
user_request:
3439
{user_request}
35-
Below is the raw history_steps data (JSON):
40+
history_steps (JSON):
3641
{history_text}
37-
Below is the raw session_files data (JSON):
42+
session_files (JSON):
3843
{session_files_text}
44+
3945
# Decision Rules (must follow)
40-
1) Use "whether the user's final goal is achieved" as the ONLY criterion, not whether all steps were executed.
41-
2) Consider the expected deliverable type based on user_request:
42-
- If the user asked for a file/output artifact (e.g., PDF/DOCX/ZIP/code project), you MUST verify the file exists by checking
43-
that an appropriate OSS link is present in session_files; otherwise finished=false.
44-
- If the user asked for "in-chat content" (e.g., search + summarize + tutorial), you should judge completion by whether the final
45-
requested content is already present/produced in history_steps outputs (e.g., the assistant/tool produced a complete tutorial/summary).
46-
3) If any critical step failed, is missing, is still running, or the outputs are insufficient to prove goal completion, set finished=false.
47-
4) If the information in history_steps and session_files is insufficient to confirm completion (e.g., no final summary/tutorial text,
48-
only partial logs; or a required output file link is not present in session_files),
49-
you MUST return finished=false and explain what information is missing in reason.
50-
5) If there are contradictions in history_steps, prefer the later entries. If you still cannot decide, return finished=false
51-
and explain the contradiction in reason.
52-
6) Do NOT assume results that are not explicitly supported by history_steps or session_files. Judge only from verifiable evidence.
53-
7) Termination/Unachievable rule: If the goal is clearly unachievable given the current context (e.g., repeated critical failures with no viable next action, missing required inputs that cannot be obtained from history_steps/session_files, or hard constraints prevent completion), you MUST return finished=true to terminate, and set reason to explicitly state that the task is not completed but cannot be completed (include the key blocking evidence).
54-
# Output Format (very important)
55-
You must output ONLY ONE JSON object that strictly matches this schema:
46+
1) Judge ONLY the user's final goal completion / stop condition, not whether all intermediate steps ran.
47+
48+
2) Deliverable type:
49+
- If a file artifact is required (PDF/DOCX/ZIP/code project/structure file, etc.), you MUST verify an appropriate OSS link exists in session_files; otherwise finished=false (unless Termination/Unachievable applies).
50+
- If in-chat content is required, verify the complete requested content already exists in history_steps outputs; otherwise finished=false (unless Termination/Unachievable applies).
51+
52+
3) If any critical step is failed/missing/running OR outputs are insufficient to prove completion, set finished=false (unless Termination/Unachievable applies).
53+
54+
4) Insufficient evidence => finished=false and state exactly what is missing (unless Termination/Unachievable applies).
55+
56+
5) Contradictions: prefer later entries; if still unclear => finished=false and explain contradiction (unless Termination/Unachievable applies).
57+
58+
6) Do NOT assume results not explicitly supported by history_steps/session_files.
59+
60+
6.1) For explicit parameter constraints, if ANY constraint is not explicitly evidenced, finished=false (unless Termination/Unachievable applies).
61+
62+
7) Suggestion-first achievability check (MUST APPLY BEFORE declaring finished=true for Termination/Unachievable):
63+
- Scan ALL history_steps for actionable suggestions.
64+
- If any actionable suggestion is not explicitly shown as attempted and exhausted, output finished=false.
65+
66+
8) Termination/Unachievable (STOP even though not done):
67+
You may output finished=true for Termination/Unachievable ONLY if:
68+
- The goal is NOT completed, AND
69+
- history_steps provide concrete evidence that no viable next action exists, AND
70+
- EVERY actionable history_steps[*].suggestion has been explicitly tried in later history_steps and still failed, leaving no remaining options.
71+
If ANY unresolved suggestion proposes a viable next action (e.g., change parameters, switch provider/tool, request missing info),
72+
you MUST output finished=false (the session should continue), unless the goal is already completed.
73+
74+
If you output finished=true (Termination/Unachievable), the reason MUST include:
75+
- "NOT completed" and
76+
- "cannot be completed / unachievable" and
77+
- the blocking evidence (specific failed steps / missing inputs).
78+
79+
You MUST NOT output finished=true (Termination/Unachievable) when the only blocking evidence is that a tool failed once and the agent has not yet tried actionable suggestions (e.g., switching provider/tool, changing parameters). In that case, output finished=false.
80+
81+
# Output Format
82+
Output ONLY ONE JSON object exactly:
5683
{{
5784
"finished": true|false,
58-
"reason": "A brief, specific explanation in English that cites key evidence from history_steps and/or session_files (e.g., a tool_name status/output; or the presence/absence of an OSS link when a file is required). If not finished, state the critical blocking reason(s) or missing info. If finished=true due to the Termination/Unachievable rule, explicitly say it is NOT completed but is impossible/unachievable to complete given the evidence."
85+
"reason": "Brief, specific English explanation citing concrete evidence from history_steps and/or session_files. If using Termination/Unachievable, explicitly state: NOT completed but cannot be completed, and cite the blocking evidence."
5986
}}
87+
6088
# Output Constraints
61-
- Output ONLY valid JSON (no Markdown, no code fences, no extra commentary).
62-
- reason must be an English string and should reference concrete evidence from history_steps and/or session_files.
89+
- Output ONLY valid JSON (no Markdown / code fences / extra text).
6390
""".strip()

agents/matmaster_agent/flow_agents/execution_agent/agent.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@
2323
is_job_submitted_step,
2424
)
2525
from agents.matmaster_agent.flow_agents.step_validation_agent.prompt import (
26-
STEP_VALIDATION_INSTRUCTION,
26+
create_step_validation_instruction,
2727
)
2828
from agents.matmaster_agent.flow_agents.style import separate_card
2929
from agents.matmaster_agent.flow_agents.utils import (
3030
check_plan,
31+
find_alternative_tool,
3132
get_agent_for_tool,
3233
)
3334
from agents.matmaster_agent.llm_config import MatMasterLlmConfig
@@ -36,6 +37,7 @@
3637
from agents.matmaster_agent.state import (
3738
CURRENT_STEP,
3839
CURRENT_STEP_DESCRIPTION,
40+
CURRENT_STEP_STATUS,
3941
CURRENT_STEP_TOOL_NAME,
4042
HISTORY_STEPS,
4143
PLAN,
@@ -209,7 +211,10 @@ async def _tool_result_validation(
209211
)
210212
validation_instruction = '\n'.join(lines)
211213
self.validation_agent.instruction = (
212-
STEP_VALIDATION_INSTRUCTION + validation_instruction
214+
create_step_validation_instruction(
215+
find_alternative_tool(current_step_tool_name)
216+
)
217+
+ validation_instruction
213218
)
214219

215220
async for validation_event in self.validation_agent.run_async(ctx):
@@ -323,15 +328,14 @@ async def _run_events(self, ctx: InvocationContext) -> AsyncGenerator[Event, Non
323328
yield _core_execution_event
324329

325330
post_execution_step = get_current_step(ctx)
326-
# 工具调用结果返回【成功】
327-
if post_execution_step['status'] == PlanStepStatusEnum.SUCCESS:
331+
if post_execution_step[CURRENT_STEP_STATUS] != PlanStepStatusEnum.SUBMITTED:
328332
# 校验工具结果
329333
async for _tool_result_validation_event in self._tool_result_validation(
330334
ctx
331335
):
332336
yield _tool_result_validation_event
333337
# 异步任务,直接退出当前函数
334-
elif post_execution_step['status'] == PlanStepStatusEnum.SUBMITTED:
338+
else:
335339
return
336340

337341
update_history_steps = copy.deepcopy(ctx.session.state[HISTORY_STEPS])

agents/matmaster_agent/flow_agents/step_validation_agent/prompt.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
STEP_VALIDATION_INSTRUCTION = """
1+
from typing import List
2+
3+
4+
def create_step_validation_instruction(alternative_tools: List[str]):
5+
return f"""
26
You are a validation agent responsible for checking if the execution result of a step matches the user's requirements and basic chemical/materials science knowledge.
37
48
Your task is to analyze:
@@ -9,6 +13,9 @@
913
1014
Based on this analysis, determine if the result is reasonable and matches expectations.
1115
16+
Backup tools you may suggest using if the result is invalid or uncertain:
17+
{alternative_tools}
18+
1219
# Validation Criteria:
1320
1. **Relevance**: Does the result address the step's intended purpose?
1421
2. **Accuracy**: Is the result consistent with basic chemical/materials science knowledge?
@@ -17,11 +24,12 @@
1724
1825
# Output Format:
1926
You must respond with a JSON object containing:
20-
{
27+
{{
2128
"is_valid": boolean, // true if result matches requirements and knowledge, false otherwise
2229
"reason": "string", // brief explanation of validation result
23-
"confidence": "high|medium|low" // confidence level in the validation
24-
}
30+
"confidence": "high|medium|low", // confidence level in the validation
31+
"suggestion": "string" // actionable suggestion; if invalid/uncertain, suggest fixes or using one of the backup tools above
32+
}}
2533
2634
# Important Rules:
2735
- If the result contains obvious errors (wrong chemical formulas, impossible physical properties, etc.), mark as invalid

agents/matmaster_agent/flow_agents/step_validation_agent/schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ class StepValidationSchema(BaseModel):
55
is_valid: bool
66
reason: str
77
confidence: str # "high", "medium", "low"
8+
suggestion: str

0 commit comments

Comments
 (0)