Skip to content

Commit d8a36f6

Browse files
authored
Merge branch 'microsoft:main' into zhangdd/feat/batch_mode
2 parents 0704650 + aa97c90 commit d8a36f6

3 files changed

Lines changed: 104 additions & 18 deletions

File tree

aiopslab/orchestrator/orchestrator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020

2121
class Orchestrator:
22-
def __init__(self):
22+
def __init__(self, results_dir=None):
2323
self.agent = None
2424
self.session = None
2525
self.parser = ResponseParser()
@@ -29,6 +29,7 @@ def __init__(self):
2929
self.execution_end_time = None
3030
self.kubectl = KubeCtl()
3131
self.use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
32+
self.results_dir = results_dir
3233

3334
def init_problem(self, problem_id: str):
3435
"""Initialize a problem instance for the agent to solve.
@@ -42,7 +43,7 @@ def init_problem(self, problem_id: str):
4243
# Start timer
4344
self.execution_start_time = time.time()
4445

45-
self.session = Session()
46+
self.session = Session(results_dir=self.results_dir)
4647
print(f"Session ID: {self.session.session_id}")
4748
prob = self.probs.get_problem_instance(problem_id)
4849
deployment = self.probs.get_problem_deployment(problem_id)

aiopslab/session.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class SessionItem(BaseModel):
1818

1919

2020
class Session:
21-
def __init__(self) -> None:
21+
def __init__(self, results_dir=None) -> None:
2222
self.session_id = uuid.uuid4()
2323
self.pid = None
2424
self.problem = None
@@ -28,6 +28,7 @@ def __init__(self) -> None:
2828
self.start_time = None
2929
self.end_time = None
3030
self.agent_name = None
31+
self.results_dir = results_dir
3132

3233
def set_problem(self, problem, pid=None):
3334
"""Set the problem instance for the session.
@@ -115,19 +116,21 @@ def to_dict(self):
115116

116117
def to_json(self):
117118
"""Save the session to a JSON file."""
118-
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
119+
results_dir = self.results_dir if self.results_dir else RESULTS_DIR
120+
results_dir.mkdir(parents=True, exist_ok=True)
119121

120-
with open(RESULTS_DIR / f"{self.session_id}_{self.start_time}.json", "w") as f:
122+
with open(results_dir / f"{self.session_id}_{self.start_time}.json", "w") as f:
121123
json.dump(self.to_dict(), f, indent=4)
122-
124+
123125
def to_wandb(self):
124126
"""Log the session to Weights & Biases."""
125127
wandb.log(self.to_dict())
126128

127129
def from_json(self, filename: str):
128130
"""Load a session from a JSON file."""
131+
results_dir = self.results_dir if self.results_dir else RESULTS_DIR
129132

130-
with open(RESULTS_DIR / filename, "r") as f:
133+
with open(results_dir / filename, "r") as f:
131134
data = json.load(f)
132135

133136
self.session_id = data.get("session_id")

clients/openrouter.py

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
import asyncio
99
import tiktoken
1010
import wandb
11+
import argparse
12+
import json
13+
from pathlib import Path
1114
from aiopslab.orchestrator import Orchestrator
1215
from aiopslab.orchestrator.problems.registry import ProblemRegistry
1316
from clients.utils.llm import OpenRouterClient
@@ -107,33 +110,112 @@ def _filter_dict(self, dictionary, filter_func):
107110
return {k: v for k, v in dictionary.items() if filter_func(k, v)}
108111

109112

113+
def get_completed_problems(results_dir: Path, agent_name: str, model: str) -> set:
114+
"""Get set of completed problem IDs from existing result files."""
115+
completed = set()
116+
117+
# Look in organized directory structure first
118+
organized_dir = results_dir / agent_name / model.replace("/", "_")
119+
if organized_dir.exists():
120+
for result_file in organized_dir.glob("*.json"):
121+
try:
122+
with open(result_file, 'r') as f:
123+
data = json.load(f)
124+
if 'problem_id' in data:
125+
completed.add(data['problem_id'])
126+
except (json.JSONDecodeError, IOError):
127+
continue
128+
129+
# Also check legacy flat structure
130+
for result_file in results_dir.glob("*.json"):
131+
try:
132+
with open(result_file, 'r') as f:
133+
data = json.load(f)
134+
if ('problem_id' in data and
135+
data.get('agent') == agent_name and
136+
model.split('/')[-1] in str(result_file)):
137+
completed.add(data['problem_id'])
138+
except (json.JSONDecodeError, IOError):
139+
continue
140+
141+
return completed
142+
143+
def setup_results_directory(model: str, agent_name: str = "openrouter") -> Path:
144+
"""Setup organized results directory structure."""
145+
results_base = Path("aiopslab/data/results")
146+
147+
# Create organized structure: results/{agent}/{model_safe}/
148+
model_safe = model.replace("/", "_")
149+
results_dir = results_base / agent_name / model_safe
150+
results_dir.mkdir(parents=True, exist_ok=True)
151+
152+
return results_dir
153+
110154
if __name__ == "__main__":
155+
parser = argparse.ArgumentParser(description='Run OpenRouter agent on AIOpsLab problems')
156+
parser.add_argument('--skip-completed', action='store_true',
157+
help='Skip problems that have already been completed')
158+
parser.add_argument('--problem-ids', nargs='+',
159+
help='Run only specific problem IDs')
160+
parser.add_argument('--max-steps', type=int, default=30,
161+
help='Maximum steps per problem (default: 30)')
162+
parser.add_argument('--model', type=str,
163+
default=os.getenv("OPENROUTER_MODEL", "openai/gpt-4o-mini"),
164+
help='OpenRouter model to use')
165+
166+
args = parser.parse_args()
167+
111168
# Load use_wandb from environment variable with a default of False
112169
use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
113170

114171
if use_wandb:
115172
# Initialize wandb running
116173
wandb.init(project="AIOpsLab", entity="AIOpsLab")
117174

118-
# You can specify different models supported by OpenRouter
119-
# Popular models:
120-
# - "anthropic/claude-3.5-sonnet"
121-
# - "openai/gpt-4-turbo"
122-
# - "meta-llama/llama-3.1-8b-instruct"
123-
# - "google/gemini-pro"
124-
# - "mistralai/mixtral-8x7b-instruct"
125-
model = os.getenv("OPENROUTER_MODEL", "openai/gpt-4o-mini")
175+
model = args.model
176+
agent_name = "openrouter"
126177

178+
# Setup organized results directory
179+
results_dir = setup_results_directory(model, agent_name)
180+
print(f"Results will be saved to: {results_dir}")
181+
182+
# Get all problems
127183
problems = ProblemRegistry().PROBLEM_REGISTRY
184+
185+
# Filter problems if specific IDs requested
186+
if args.problem_ids:
187+
problems = {pid: problems[pid] for pid in args.problem_ids if pid in problems}
188+
if not problems:
189+
print("No valid problem IDs found")
190+
exit(1)
191+
192+
# Skip completed problems if requested
193+
if args.skip_completed:
194+
completed_problems = get_completed_problems(
195+
Path("aiopslab/data/results"), agent_name, model
196+
)
197+
problems = {pid: prob for pid, prob in problems.items()
198+
if pid not in completed_problems}
199+
200+
print(f"Found {len(completed_problems)} completed problems")
201+
print(f"Running {len(problems)} remaining problems")
202+
203+
if not problems:
204+
print("All problems have been completed!")
205+
exit(0)
206+
207+
print(f"Running {len(problems)} problems with model: {model}")
208+
128209
for pid in problems:
210+
print(f"\n=== Starting problem: {pid} ===")
129211
agent = OpenRouterAgent(model=model)
130212

131-
orchestrator = Orchestrator()
132-
orchestrator.register_agent(agent, name="openrouter")
213+
orchestrator = Orchestrator(results_dir=results_dir)
214+
orchestrator.register_agent(agent, name=agent_name)
133215

134216
problem_desc, instructs, apis = orchestrator.init_problem(pid)
135217
agent.init_context(problem_desc, instructs, apis)
136-
asyncio.run(orchestrator.start_problem(max_steps=30))
218+
asyncio.run(orchestrator.start_problem(max_steps=args.max_steps))
137219

138220
if use_wandb:
139221
# Finish the wandb run

0 commit comments

Comments
 (0)