Skip to content

Commit d742071

Browse files
committed
Add better error handling to Lliam
1 parent 7f84eb2 commit d742071

5 files changed

Lines changed: 296 additions & 10 deletions

File tree

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from dataclasses import dataclass
2+
3+
4+
@dataclass
5+
class DomainError:
6+
pass
7+
8+
9+
@dataclass
10+
class CommandExecutionError(DomainError):
11+
command_name: str
12+
message: str

aws_doc_sdk_examples_tools/lliam/entry_points/lliam_app.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import typer
77

88
from aws_doc_sdk_examples_tools.lliam.config import AILLY_DIR, BATCH_PREFIX
9-
from aws_doc_sdk_examples_tools.lliam.domain import commands
9+
from aws_doc_sdk_examples_tools.lliam.domain import commands, errors
1010
from aws_doc_sdk_examples_tools.lliam.service_layer import messagebus, unit_of_work
1111

1212
logging.basicConfig(
@@ -28,7 +28,8 @@ def create_prompts(iam_tributary_root: str, system_prompts: List[str] = []):
2828
out_dir=AILLY_DIR,
2929
)
3030
uow = unit_of_work.FsUnitOfWork()
31-
messagebus.handle(cmd, uow)
31+
errors = messagebus.handle(cmd, uow)
32+
handle_domain_errors(errors)
3233

3334

3435
@app.command()
@@ -50,7 +51,8 @@ def run_ailly(
5051
requested_batches = parse_batch_names(batches)
5152
package_names = parse_package_names(packages)
5253
cmd = commands.RunAilly(batches=requested_batches, packages=package_names)
53-
messagebus.handle(cmd)
54+
errors = messagebus.handle(cmd)
55+
handle_domain_errors(errors)
5456

5557

5658
@app.command()
@@ -75,7 +77,15 @@ def update_reservoir(
7577
cmd = commands.UpdateReservoir(
7678
root=doc_gen_root, batches=batch_names, packages=package_names
7779
)
78-
messagebus.handle(cmd)
80+
errors = messagebus.handle(cmd)
81+
handle_domain_errors(errors)
82+
83+
84+
def handle_domain_errors(errors: List[errors.DomainError]):
85+
if errors:
86+
for error in errors:
87+
logger.error(error)
88+
typer.Exit(code=1)
7989

8090

8191
def parse_batch_names(batch_names_str: Optional[str]) -> List[str]:

aws_doc_sdk_examples_tools/lliam/service_layer/messagebus.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@ def handle(message: commands.Command, uow: Optional[unit_of_work.FsUnitOfWork] =
1818
while queue:
1919
message = queue.pop(0)
2020
if isinstance(message, commands.Command):
21-
handle_command(message, uow)
21+
return handle_command(message, uow)
2222
else:
2323
raise Exception(f"{message} was not a Command")
2424

2525

2626
def handle_command(command: commands.Command, uow: Optional[unit_of_work.FsUnitOfWork]):
2727
handler = COMMAND_HANDLERS[type(command)]
28-
handler(command, uow)
28+
errors = handler(command, uow)
29+
return errors
2930

3031

3132
COMMAND_HANDLERS: Dict[Type[commands.Command], Callable] = {

aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
import json
22
import logging
3-
import sys
43
import time
54
from collections import defaultdict
5+
from dataclasses import dataclass
66
from datetime import timedelta
77
from pathlib import Path
88
from subprocess import run
99
from typing import Any, Dict, List, Optional, Set
1010

1111
from aws_doc_sdk_examples_tools.lliam.domain.commands import RunAilly
12+
from aws_doc_sdk_examples_tools.lliam.domain.errors import (
13+
CommandExecutionError,
14+
DomainError,
15+
)
1216
from aws_doc_sdk_examples_tools.lliam.config import (
1317
AILLY_DIR_PATH,
1418
BATCH_PREFIX,
@@ -28,11 +32,20 @@
2832
def handle_run_ailly(cmd: RunAilly, uow: None):
2933
resolved_batches = resolve_requested_batches(cmd.batches)
3034

35+
errors: List[DomainError] = []
36+
3137
if resolved_batches:
3238
total_start_time = time.time()
3339

3440
for batch in resolved_batches:
35-
run_ailly_single_batch(batch, cmd.packages)
41+
try:
42+
run_ailly_single_batch(batch, cmd.packages)
43+
except FileNotFoundError as e:
44+
errors.append(
45+
CommandExecutionError(
46+
command_name=cmd.__class__.__name__, message=str(e)
47+
)
48+
)
3649

3750
total_end_time = time.time()
3851
total_duration = total_end_time - total_start_time
@@ -41,6 +54,8 @@ def handle_run_ailly(cmd: RunAilly, uow: None):
4154
f"[TIMECHECK] {num_batches} batches took {format_duration(total_duration)} to run"
4255
)
4356

57+
return errors
58+
4459

4560
def resolve_requested_batches(batch_names: List[str]) -> List[Path]:
4661
if not batch_names:
@@ -79,8 +94,7 @@ def run_ailly_single_batch(batch: Path, packages: List[str] = []) -> None:
7994
paths.extend(package_files)
8095

8196
if not paths:
82-
logger.error(f"No matching files found for packages: {packages}")
83-
sys.exit(1)
97+
raise FileNotFoundError(f"No matching files found for packages: {packages}")
8498

8599
cmd = AILLY_CMD_BASE + paths
86100
else:

process_ailly_files.py

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to process .md and .ailly.md file pairs and generate JSONL output.
4+
5+
This script:
6+
1. Collects file data from .md and .ailly.md pairs
7+
2. Extracts content without front matter
8+
3. Generates JSONL output with prompt and model responses
9+
"""
10+
11+
import os
12+
import json
13+
import re
14+
import argparse
15+
from pathlib import Path
16+
from typing import Dict, List, Tuple, Optional
17+
18+
19+
def collect_file_pairs(
20+
directory: str, limit: Optional[int] = None
21+
) -> List[Tuple[str, str]]:
22+
"""
23+
Collect pairs of .md and .ailly.md files from the specified directory.
24+
25+
Args:
26+
directory: Path to the directory containing the files
27+
limit: Optional limit on the number of pairs to process
28+
29+
Returns:
30+
List of tuples containing (md_file_path, ailly_md_file_path)
31+
"""
32+
md_files = {}
33+
ailly_md_files = {}
34+
35+
# Walk through the directory and collect all .md and .ailly.md files
36+
for root, _, files in os.walk(directory):
37+
for file in files:
38+
file_path = os.path.join(root, file)
39+
if file.endswith(".md") and not file.endswith(".ailly.md"):
40+
md_files[file] = file_path
41+
elif file.endswith(".ailly.md"):
42+
base_name = file[:-9] # Remove '.ailly.md'
43+
ailly_md_files[base_name] = file_path
44+
45+
# Match the pairs
46+
pairs = []
47+
for base_name, md_path in md_files.items():
48+
if base_name in ailly_md_files:
49+
pairs.append((md_path, ailly_md_files[base_name]))
50+
51+
# Apply limit if specified
52+
if limit is not None and limit > 0:
53+
pairs = pairs[:limit]
54+
55+
return pairs
56+
57+
58+
def extract_content(file_path: str) -> str:
59+
"""
60+
Extract content from a file, removing any front matter.
61+
62+
Args:
63+
file_path: Path to the file
64+
65+
Returns:
66+
Content of the file without front matter
67+
"""
68+
with open(file_path, "r", encoding="utf-8") as f:
69+
content = f.read()
70+
71+
# Remove front matter if it exists (content between --- markers)
72+
front_matter_pattern = r"^---\n.*?\n---\n"
73+
content = re.sub(front_matter_pattern, "", content, flags=re.DOTALL)
74+
75+
return content.strip()
76+
77+
78+
def get_aillyrc_content(directory: str) -> str:
79+
"""
80+
Get the content of the .aillyrc file without the front matter.
81+
82+
Args:
83+
directory: Directory containing the .aillyrc file
84+
85+
Returns:
86+
Content of the .aillyrc file without front matter
87+
"""
88+
# Find the .aillyrc file by going up directories if needed
89+
current_dir = directory
90+
aillyrc_path = None
91+
92+
while current_dir and current_dir != "/":
93+
potential_path = os.path.join(current_dir, ".aillyrc")
94+
if os.path.exists(potential_path):
95+
aillyrc_path = potential_path
96+
break
97+
current_dir = os.path.dirname(current_dir)
98+
99+
if not aillyrc_path:
100+
raise FileNotFoundError("Could not find .aillyrc file")
101+
102+
return extract_content(aillyrc_path)
103+
104+
105+
def extract_model_identifier(ailly_file_path: str) -> Dict:
106+
"""
107+
Extract model identifier from the .ailly.md file's front matter.
108+
109+
Args:
110+
ailly_file_path: Path to the .ailly.md file
111+
112+
Returns:
113+
Dictionary containing model identifier information
114+
"""
115+
with open(ailly_file_path, "r", encoding="utf-8") as f:
116+
content = f.read()
117+
118+
# Extract front matter
119+
front_matter_match = re.search(r"^---\n(.*?)\n---\n", content, re.DOTALL)
120+
if not front_matter_match:
121+
return {}
122+
123+
front_matter = front_matter_match.group(1)
124+
125+
# Extract debug information
126+
debug_match = re.search(r"debug:\s*\n(.*?)(\n\w|$)", front_matter, re.DOTALL)
127+
if not debug_match:
128+
return {}
129+
130+
debug_content = debug_match.group(1)
131+
132+
# Extract model information
133+
model_match = re.search(r"model:\s*(.*?)$", debug_content, re.MULTILINE)
134+
region_match = re.search(r"region:\s*(.*?)$", debug_content, re.MULTILINE)
135+
136+
model_identifier = {}
137+
if model_match:
138+
model_identifier["model"] = model_match.group(1).strip()
139+
if region_match:
140+
model_identifier["region"] = region_match.group(1).strip()
141+
142+
return model_identifier
143+
144+
145+
def convert_to_jsonl_format(
146+
file_pairs: List[Tuple[str, str]], aillyrc_content: str
147+
) -> List[Dict]:
148+
"""
149+
Convert file pairs to JSONL format.
150+
151+
Args:
152+
file_pairs: List of (md_file_path, ailly_md_file_path) tuples
153+
aillyrc_content: Content of the .aillyrc file
154+
155+
Returns:
156+
List of dictionaries in the required format
157+
"""
158+
jsonl_entries = []
159+
160+
for md_path, ailly_md_path in file_pairs:
161+
# Extract content from files
162+
md_content = extract_content(md_path)
163+
ailly_md_content = extract_content(ailly_md_path)
164+
165+
# Extract model identifier
166+
model_identifier = extract_model_identifier(ailly_md_path)
167+
168+
# Create JSONL entry
169+
entry = {
170+
"prompt": aillyrc_content + "\n\n" + md_content,
171+
"modelResponses": [
172+
{"response": ailly_md_content, "modelIdentifier": model_identifier}
173+
],
174+
}
175+
176+
jsonl_entries.append(entry)
177+
178+
return jsonl_entries
179+
180+
181+
def write_jsonl_file(entries: List[Dict], output_path: str) -> None:
182+
"""
183+
Write entries to a JSONL file.
184+
185+
Args:
186+
entries: List of dictionaries to write
187+
output_path: Path to the output file
188+
"""
189+
with open(output_path, "w", encoding="utf-8") as f:
190+
for entry in entries:
191+
f.write(json.dumps(entry) + "\n")
192+
193+
194+
def main():
195+
"""Main function to process files and generate JSONL output."""
196+
parser = argparse.ArgumentParser(
197+
description="Process .md and .ailly.md file pairs and generate JSONL output."
198+
)
199+
parser.add_argument(
200+
"--directory",
201+
"-d",
202+
type=str,
203+
default=".ailly_iam_policy/batch_01",
204+
help="Directory containing the file pairs",
205+
)
206+
parser.add_argument(
207+
"--output",
208+
"-o",
209+
type=str,
210+
default="output.jsonl",
211+
help="Path to the output JSONL file",
212+
)
213+
parser.add_argument(
214+
"--limit",
215+
"-n",
216+
type=int,
217+
default=None,
218+
help="Limit the number of file pairs to process",
219+
)
220+
221+
args = parser.parse_args()
222+
223+
# Resolve paths
224+
base_dir = os.path.dirname(os.path.abspath(__file__))
225+
directory = os.path.join(base_dir, args.directory)
226+
output_path = os.path.join(base_dir, args.output)
227+
228+
# Step 1: Collect file pairs
229+
print(f"Collecting file pairs from {directory}...")
230+
file_pairs = collect_file_pairs(directory, args.limit)
231+
print(f"Found {len(file_pairs)} file pairs.")
232+
233+
# Step 2: Get .aillyrc content
234+
print("Reading .aillyrc content...")
235+
aillyrc_content = get_aillyrc_content(directory)
236+
237+
# Step 3: Convert to JSONL format
238+
print("Converting to JSONL format...")
239+
jsonl_entries = convert_to_jsonl_format(file_pairs, aillyrc_content)
240+
241+
# Step 4: Write to output file
242+
print(f"Writing {len(jsonl_entries)} entries to {output_path}...")
243+
write_jsonl_file(jsonl_entries, output_path)
244+
245+
print("Done!")
246+
247+
248+
if __name__ == "__main__":
249+
main()

0 commit comments

Comments
 (0)