Skip to content

Commit 0e096a7

Browse files
committed
Batch Ailly calls from Lliam
Rather than make one Ailly call, this batches them into groups of 150 policies and calls Ailly on each batch. Also change the default prefix from [DEFAULT] to DEFAULT. The former was causing build failures on the TCA side.
1 parent 52402b2 commit 0e096a7

5 files changed

Lines changed: 105 additions & 25 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ __pycache__
77
build/
88
dist/
99
.ailly_iam_policy
10+
*.log

aws_doc_sdk_examples_tools/agent/bin/main.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,37 @@
11
from pathlib import Path
22
from subprocess import run
33
from typing import List
4+
import time
5+
from datetime import timedelta, datetime
46

7+
import logging
58
import typer
69

710
from aws_doc_sdk_examples_tools.agent.make_prompts import make_prompts
811
from aws_doc_sdk_examples_tools.agent.process_ailly_files import process_ailly_files
912
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_doc_gen
1013
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many
1114

15+
logging.basicConfig(
16+
level=logging.INFO, filename=f"lliam-run-{datetime.now()}.log", filemode="w"
17+
)
18+
logger = logging.getLogger(__name__)
19+
1220
app = typer.Typer()
1321

1422
AILLY_DIR = ".ailly_iam_policy"
1523
AILLY_DIR_PATH = Path(AILLY_DIR)
1624
IAM_UPDATES_PATH = AILLY_DIR_PATH / "iam_updates.json"
1725

1826

27+
def format_duration(seconds: float) -> str:
28+
"""Format duration in seconds to hours:minutes:seconds format."""
29+
td = timedelta(seconds=seconds)
30+
hours, remainder = divmod(td.total_seconds(), 3600)
31+
minutes, seconds = divmod(remainder, 60)
32+
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
33+
34+
1935
@app.command()
2036
def update(
2137
iam_tributary_root: str,
@@ -34,8 +50,45 @@ def update(
3450
out_dir=AILLY_DIR_PATH,
3551
language="IAMPolicyGrammar",
3652
)
37-
run(["npx @ailly/cli@1.7.0-rc1", "--root", AILLY_DIR])
3853

54+
batch_dirs = [
55+
d.name
56+
for d in AILLY_DIR_PATH.iterdir()
57+
if d.is_dir() and d.name.startswith("batch_")
58+
]
59+
60+
if batch_dirs:
61+
total_start_time = time.time()
62+
63+
for batch_dir in sorted(batch_dirs):
64+
batch_start_time = time.time()
65+
66+
cmd = [
67+
"ailly",
68+
"--max-depth",
69+
"10",
70+
"--root",
71+
AILLY_DIR,
72+
str(batch_dir),
73+
]
74+
logger.info(f"Running {cmd}")
75+
run(cmd)
76+
77+
batch_end_time = time.time()
78+
batch_duration = batch_end_time - batch_start_time
79+
batch_num = batch_dir.replace("batch_", "")
80+
logger.info(
81+
f"[TIMECHECK] Batch {batch_num} took {format_duration(batch_duration)} to run"
82+
)
83+
84+
total_end_time = time.time()
85+
total_duration = total_end_time - total_start_time
86+
num_batches = len(batch_dirs)
87+
logger.info(
88+
f"[TIMECHECK] {num_batches} batches took {format_duration(total_duration)} to run"
89+
)
90+
91+
logger.info("Processing generated content")
3992
process_ailly_files(
4093
input_dir=str(AILLY_DIR_PATH), output_file=str(IAM_UPDATES_PATH)
4194
)

aws_doc_sdk_examples_tools/agent/make_prompts.py

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,14 @@
22

33
import logging
44
import os
5+
import yaml
56
from pathlib import Path
67
from typing import List
7-
import yaml
8-
9-
from aws_doc_sdk_examples_tools.doc_gen import DocGen, Snippet
108

11-
DEFAULT_METADATA_PREFIX = "[DEFAULT]"
9+
from aws_doc_sdk_examples_tools.doc_gen import DocGen
1210

11+
DEFAULT_METADATA_PREFIX = "DEFAULT"
1312

14-
# Setup logging
15-
logging.basicConfig(level=logging.INFO)
1613
logger = logging.getLogger(__name__)
1714

1815

@@ -26,6 +23,8 @@ def make_doc_gen(root: Path) -> DocGen:
2623
def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
2724
examples = doc_gen.examples
2825
snippets = doc_gen.snippets
26+
27+
filtered_examples = []
2928
for example_id, example in examples.items():
3029
# TCXContentAnalyzer prefixes new metadata title/title_abbrev entries with
3130
# the DEFAULT_METADATA_PREFIX. Checking this here to make sure we're only
@@ -35,30 +34,56 @@ def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
3534
if title.startswith(DEFAULT_METADATA_PREFIX) and title_abbrev.startswith(
3635
DEFAULT_METADATA_PREFIX
3736
):
38-
prompt_path = out_dir / f"{example_id}.md"
39-
snippet_key = (
40-
example.languages[language]
41-
.versions[0]
42-
.excerpts[0]
43-
.snippet_files[0]
44-
.replace("/", ".")
45-
)
46-
snippet = snippets[snippet_key]
47-
prompt_path.write_text(snippet.code, encoding="utf-8")
37+
filtered_examples.append((example_id, example))
38+
39+
batch_size = 150
40+
total_examples = len(filtered_examples)
41+
num_batches = (total_examples + batch_size - 1) // batch_size
42+
43+
logger.info(
44+
f"Splitting {total_examples} examples into {num_batches} batches of {batch_size}"
45+
)
46+
47+
for batch_num in range(num_batches):
48+
batch_dir = out_dir / f"batch_{(batch_num + 1):03}"
49+
batch_dir.mkdir(exist_ok=True)
50+
51+
start_idx = batch_num * batch_size
52+
end_idx = min((batch_num + 1) * batch_size, total_examples)
53+
54+
for i in range(start_idx, end_idx):
55+
example_id, example = filtered_examples[i]
56+
prompt_path = batch_dir / f"{example_id}.md"
57+
58+
try:
59+
snippet_key = (
60+
example.languages[language]
61+
.versions[0]
62+
.excerpts[0]
63+
.snippet_files[0]
64+
.replace("/", ".")
65+
)
66+
snippet = snippets[snippet_key]
67+
prompt_path.write_text(snippet.code, encoding="utf-8")
68+
except (KeyError, IndexError, AttributeError) as e:
69+
logger.warning(f"Error processing example {example_id}: {e}")
4870

4971

5072
def setup_ailly(system_prompts: List[str], out_dir: Path) -> None:
5173
"""Create the .aillyrc configuration file."""
5274
fence = "---"
5375
options = {
5476
"isolated": "true",
55-
"mcp": {
56-
"awslabs.aws-documentation-mcp-server": {
57-
"type": "stdio",
58-
"command": "uvx",
59-
"args": ["awslabs.aws-documentation-mcp-server@latest"],
60-
}
61-
},
77+
"overwrite": "true",
78+
# MCP assistance did not produce noticeably different results, but it was
79+
# slowing things down by 10x. Disabled for now.
80+
# "mcp": {
81+
# "awslabs.aws-documentation-mcp-server": {
82+
# "type": "stdio",
83+
# "command": "uvx",
84+
# "args": ["awslabs.aws-documentation-mcp-server@latest"],
85+
# }
86+
# },
6287
}
6388
options_block = yaml.dump(options).strip()
6489
prompts_block = "\n".join(system_prompts)

aws_doc_sdk_examples_tools/agent/process_ailly_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def process_ailly_files(
106106
input_path = Path(input_dir)
107107

108108
try:
109-
for file_path in input_path.glob(file_pattern):
109+
for file_path in input_path.rglob(file_pattern):
110110
logger.info(f"Processing file: {file_path}")
111111
parsed_data = parse_ailly_file(str(file_path))
112112
if parsed_data:

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@
1919
"pathspec==0.11.2",
2020
"PyYAML==6.0.1",
2121
"yamale==4.0.4",
22+
"typer==0.16.0",
2223
],
2324
)

0 commit comments

Comments
 (0)