forked from onlyphantom/llm-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path22_agents_judge_critic.py
More file actions
163 lines (134 loc) · 6.67 KB
/
22_agents_judge_critic.py
File metadata and controls
163 lines (134 loc) · 6.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
This example shows the LLM as a judge pattern. The first agent generates a stock summary
from the research notes and the second agent evaluates the summary. The first agent is asked
to continually improve the summary until the evaluator gives a pass.
`3_research_notes.txt` is the text file generated by our previous section where our multi-agent
orchestration pattern is demonstrated.
Usage:
python 4_judge_critic.py
🤖: What company are you interested in?
👧: bbca
"""
from dotenv import load_dotenv
import asyncio
from dataclasses import dataclass
from typing import Literal
import json
from agents import Agent, ItemHelpers, Runner, TResponseInputItem, trace, function_tool
load_dotenv()
@function_tool
def read_company_data_from_txt() -> str:
"""
Read company data from the text file 3_research_notes.txt
"""
try:
with open("3_research_notes.txt", "r") as file:
data = file.read()
# print(data)
return data
except FileNotFoundError:
return "File not found. Please ensure the file exists."
except Exception as e:
return str(e)
read_company_data_from_txt = Agent(
name="read_company_data_from_txt",
instructions=(
"Given a company name or ticker by the user, read the company data from the text file 3_research_notes.txt"
"Find the specific company section that matches the user's request (e.g., if user asks for 'BBCA', find the BBCA section only)."
"Summarize ONLY that specific company into 2-3 paragraphs and be informative so it reads like a professional report."
"If there is any feedback, incorporate them to improve the report. If the ticker is not found, say so."
"DO NOT summarize other companies - focus only on the requested company."
),
tools=[read_company_data_from_txt],
)
@dataclass
class EvaluationFeedback:
feedback: str
score: Literal["pass", "expect_improvement", "fail"]
evaluator = Agent[None](
name="evaluator",
instructions=(
"You evaluate a stock overview summary and decide if it's good enough."
"Evaluate based on these criteria:\n"
"1. **Content Quality**: Contains key financial metrics, business model, and market position from source data\n"
"2. **Clarity**: Well-structured, professional language, easy to understand\n"
"3. **Completeness**: Covers company overview, financial performance, and market position using available data\n"
"4. **Accuracy**: Information aligns with source data from research notes\n"
"5. **Length**: 2-3 paragraphs as requested, not too brief or verbose\n"
"6. **Professional Tone**: Reads like a professional investment summary\n"
"\n"
"Give 'pass' only if ALL criteria are met. Focus on what's available in the source data."
"Never give it a pass on the first try, but be increasingly generous so its chance of passing increases over time."
),
output_type=EvaluationFeedback,
)
async def main() -> None:
"""
Runs the summarizer agent and evaluates the output iteratively.
The summarizer generates a company summary, and the evaluator scores it.
If the score is 'fail', feedback is appended and the summarizer tries again.
This repeats until the summary passes or max_attempts is reached.
Args:
input_items: Initial list of input messages (conversation history).
max_attempts: Maximum number of summarization attempts (default: 3).
"""
msg = input("🤖: What company are you interested in? \n👧: ")
input_items: list[TResponseInputItem] = [{"content": msg, "role": "user"}]
summary: str | None = None
# We'll run the entire workflow in a single trace
with trace("LLM as a judge"):
max_attempts = 5
attempt = 0
while True:
attempt += 1
if attempt > max_attempts:
print(f"Reached maximum attempts ({max_attempts}). Exiting with current summary.")
break
# Adjust judge strictness based on attempt number
if attempt >= 3:
evaluator.instructions = (
"You evaluate a stock overview summary and decide if it's good enough."
"Be more generous now - if the summary covers key metrics, is well-structured, and reads professionally, give it a pass."
"Focus on overall quality rather than minor imperfections."
)
# ── Step 1: Run the Summarizer Agent ──────────────────────────
summarized_results = await Runner.run(
read_company_data_from_txt,
input_items,
)
# Update input_items to include summarizer's response (for chaining)
input_items = summarized_results.to_input_list()
# Extract the plain-text summary from the summarizer output
summary = ItemHelpers.text_message_outputs(summarized_results.new_items)
print(f"\n📝 ITERATION {attempt} SUMMARY:")
print("-" * 50)
print(summary)
print("-" * 50)
# ── Step 2: Run the Evaluator Agent ───────────────────────────
evaluator_result = await Runner.run(evaluator, input_items)
result: EvaluationFeedback = evaluator_result.final_output
print(f"Evaluator score: {result.score}")
print(f"Evaluator feedback: {result.feedback}")
# ── Step 3: Check Evaluation Result ───────────────────────────
if result.score == "pass":
print("The stock summary is 💡 good enough, exiting.")
break
# If not passing and attempts remain, attach feedback for next round
print("Re-running with feedback")
input_items.append({
"content": f"Feedback: {result.feedback}",
"role": "user"
})
# ── Final Output ──────────────────────────────────────────────────────
print(f"Final Summary: {summary}")
print("\n" + "="*60)
print("FINAL INVESTMENT SUMMARY")
print("="*60)
print(summary)
print("="*60)
# Optional: Show detailed debug info in a separate file
with open("debug_output.json", "w") as f:
json.dump(input_items, f, indent=2, ensure_ascii=False)
print("Debug info saved to debug_output.json")
if __name__ == "__main__":
asyncio.run(main())